@chialab/pdfjs-lib 1.0.0-alpha.28 → 1.0.0-alpha.29
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/browser/index.js +454 -582
- package/dist/index.d.ts +7 -1
- package/dist/lib/Canvas.d.ts +0 -5
- package/dist/lib/TextLayer.d.ts +12 -2
- package/dist/lib/TextLayer_v2.d.ts +2 -2
- package/dist/node/index.js +453 -567
- package/package.json +1 -1
package/dist/node/index.js
CHANGED
|
@@ -39173,79 +39173,6 @@ PDFPageProxy.prototype.getAnnotations = async function(params) {
|
|
|
39173
39173
|
return makeSerializable(annotations);
|
|
39174
39174
|
};
|
|
39175
39175
|
|
|
39176
|
-
// src/lib/WasmFactory.ts
|
|
39177
|
-
var WasmFactory = class {
|
|
39178
|
-
async fetch({
|
|
39179
|
-
filename
|
|
39180
|
-
}) {
|
|
39181
|
-
switch (filename) {
|
|
39182
|
-
case "openjpeg.wasm":
|
|
39183
|
-
return import("./openjpeg-TRZ4ANDN.js").then(
|
|
39184
|
-
(module) => module.default
|
|
39185
|
-
);
|
|
39186
|
-
case "qcms_bg":
|
|
39187
|
-
return import("./qcms_bg-PRPVJQFC.js").then(
|
|
39188
|
-
(module) => module.default
|
|
39189
|
-
);
|
|
39190
|
-
}
|
|
39191
|
-
return Uint8Array.from([]);
|
|
39192
|
-
}
|
|
39193
|
-
};
|
|
39194
|
-
|
|
39195
|
-
// src/lib/StandardFontDataFactory.ts
|
|
39196
|
-
var StandardFontDataFactory = class extends BaseStandardFontDataFactory {
|
|
39197
|
-
constructor() {
|
|
39198
|
-
super({
|
|
39199
|
-
baseUrl: null
|
|
39200
|
-
});
|
|
39201
|
-
}
|
|
39202
|
-
/**
|
|
39203
|
-
* Fetch the corresponding standard font data.
|
|
39204
|
-
* We need to use specific dynamic imports for each font file for the bundler to include them.
|
|
39205
|
-
*/
|
|
39206
|
-
async fetch({
|
|
39207
|
-
filename
|
|
39208
|
-
}) {
|
|
39209
|
-
switch (filename) {
|
|
39210
|
-
case "FoxitDingbats.pfb":
|
|
39211
|
-
return import("./FoxitDingbats-UZC34T4F.js").then((module) => module.default);
|
|
39212
|
-
case "FoxitFixed.pfb":
|
|
39213
|
-
return import("./FoxitFixed-DSJVG3IJ.js").then(
|
|
39214
|
-
(module) => module.default
|
|
39215
|
-
);
|
|
39216
|
-
case "FoxitFixedBold.pfb":
|
|
39217
|
-
return import("./FoxitFixedBold-YSX4X5NA.js").then((module) => module.default);
|
|
39218
|
-
case "FoxitFixedBoldItalic.pfb":
|
|
39219
|
-
return import("./FoxitFixedBoldItalic-IFYYA6RR.js").then((module) => module.default);
|
|
39220
|
-
case "FoxitFixedItalic.pfb":
|
|
39221
|
-
return import("./FoxitFixedItalic-4PEFPCQV.js").then((module) => module.default);
|
|
39222
|
-
case "FoxitSerif.pfb":
|
|
39223
|
-
return import("./FoxitSerif-7SSP2H2U.js").then(
|
|
39224
|
-
(module) => module.default
|
|
39225
|
-
);
|
|
39226
|
-
case "FoxitSerifBold.pfb":
|
|
39227
|
-
return import("./FoxitSerifBold-KLMURJ6N.js").then((module) => module.default);
|
|
39228
|
-
case "FoxitSerifBoldItalic.pfb":
|
|
39229
|
-
return import("./FoxitSerifBoldItalic-VSYHWLRM.js").then((module) => module.default);
|
|
39230
|
-
case "FoxitSerifItalic.pfb":
|
|
39231
|
-
return import("./FoxitSerifItalic-NTOPMQDD.js").then((module) => module.default);
|
|
39232
|
-
case "FoxitSymbol.pfb":
|
|
39233
|
-
return import("./FoxitSymbol-QPUM74UN.js").then(
|
|
39234
|
-
(module) => module.default
|
|
39235
|
-
);
|
|
39236
|
-
case "LiberationSans-Bold.ttf":
|
|
39237
|
-
return import("./LiberationSans-Bold-BXFYN4PV.js").then((module) => module.default);
|
|
39238
|
-
case "LiberationSans-BoldItalic.ttf":
|
|
39239
|
-
return import("./LiberationSans-BoldItalic-DQQC5TNJ.js").then((module) => module.default);
|
|
39240
|
-
case "LiberationSans-Italic.ttf":
|
|
39241
|
-
return import("./LiberationSans-Italic-Z4MFN6PY.js").then((module) => module.default);
|
|
39242
|
-
case "LiberationSans-Regular.ttf":
|
|
39243
|
-
return import("./LiberationSans-Regular-MACKS2VL.js").then((module) => module.default);
|
|
39244
|
-
}
|
|
39245
|
-
return Uint8Array.from([]);
|
|
39246
|
-
}
|
|
39247
|
-
};
|
|
39248
|
-
|
|
39249
39176
|
// src/lib/AnnotationData.ts
|
|
39250
39177
|
function isTextAnnotation(annotation) {
|
|
39251
39178
|
return annotation.subtype === "Text";
|
|
@@ -39335,59 +39262,6 @@ function isRedactAnnotation(annotation) {
|
|
|
39335
39262
|
return annotation.subtype === "Redact";
|
|
39336
39263
|
}
|
|
39337
39264
|
|
|
39338
|
-
// src/lib/CanvasGraphics.ts
|
|
39339
|
-
var {
|
|
39340
|
-
beginDrawing,
|
|
39341
|
-
beginText,
|
|
39342
|
-
endText,
|
|
39343
|
-
beginMarkedContent,
|
|
39344
|
-
beginMarkedContentProps,
|
|
39345
|
-
endMarkedContent
|
|
39346
|
-
} = CanvasGraphics.prototype;
|
|
39347
|
-
CanvasGraphics.prototype.beginDrawing = function(options) {
|
|
39348
|
-
if (this.ctx instanceof SvgCanvasContext) {
|
|
39349
|
-
options.transparency = false;
|
|
39350
|
-
}
|
|
39351
|
-
return beginDrawing.call(this, options);
|
|
39352
|
-
};
|
|
39353
|
-
CanvasGraphics.prototype.beginText = function(opIdx) {
|
|
39354
|
-
beginText.call(this, opIdx);
|
|
39355
|
-
if (this.ctx instanceof SvgCanvasContext) {
|
|
39356
|
-
this.ctx.beginText();
|
|
39357
|
-
}
|
|
39358
|
-
};
|
|
39359
|
-
CanvasGraphics.prototype.endText = function(opIdx) {
|
|
39360
|
-
endText.call(this, opIdx);
|
|
39361
|
-
if (this.ctx instanceof SvgCanvasContext) {
|
|
39362
|
-
this.ctx.endText();
|
|
39363
|
-
}
|
|
39364
|
-
};
|
|
39365
|
-
CanvasGraphics.prototype.beginMarkedContent = function(opIdx, type) {
|
|
39366
|
-
beginMarkedContent.call(this, opIdx, this.ctx);
|
|
39367
|
-
if (this.ctx instanceof SvgCanvasContext) {
|
|
39368
|
-
this.ctx.beginMarkedContent(type);
|
|
39369
|
-
}
|
|
39370
|
-
};
|
|
39371
|
-
CanvasGraphics.prototype.beginMarkedContentProps = function(opIdx, type, props) {
|
|
39372
|
-
beginMarkedContentProps.call(this, opIdx, type, props);
|
|
39373
|
-
if (this.ctx instanceof SvgCanvasContext) {
|
|
39374
|
-
this.ctx.beginMarkedContent(type, props);
|
|
39375
|
-
}
|
|
39376
|
-
};
|
|
39377
|
-
CanvasGraphics.prototype.endMarkedContent = function(opIdx) {
|
|
39378
|
-
if (this.ctx instanceof SvgCanvasContext) {
|
|
39379
|
-
this.ctx.endMarkedContent();
|
|
39380
|
-
}
|
|
39381
|
-
endMarkedContent.call(this, opIdx);
|
|
39382
|
-
};
|
|
39383
|
-
Object.assign(CanvasGraphics.prototype, {
|
|
39384
|
-
[OPS.beginText]: CanvasGraphics.prototype.beginText,
|
|
39385
|
-
[OPS.endText]: CanvasGraphics.prototype.endText,
|
|
39386
|
-
[OPS.beginMarkedContent]: CanvasGraphics.prototype.beginMarkedContent,
|
|
39387
|
-
[OPS.beginMarkedContentProps]: CanvasGraphics.prototype.beginMarkedContentProps,
|
|
39388
|
-
[OPS.endMarkedContent]: CanvasGraphics.prototype.endMarkedContent
|
|
39389
|
-
});
|
|
39390
|
-
|
|
39391
39265
|
// src/lib/TextLayer.ts
|
|
39392
39266
|
function isTextNode(node) {
|
|
39393
39267
|
return node.role === "text";
|
|
@@ -39446,6 +39320,7 @@ async function loadTextLayerFonts(document2) {
|
|
|
39446
39320
|
});
|
|
39447
39321
|
return loadedFontsPromise;
|
|
39448
39322
|
}
|
|
39323
|
+
var loadDefaultFonts2 = loadTextLayerFonts;
|
|
39449
39324
|
var normalizeMarkedContentId = (idRef) => {
|
|
39450
39325
|
if (idRef == null) {
|
|
39451
39326
|
return null;
|
|
@@ -39458,10 +39333,321 @@ var normalizeMarkedContentId = (idRef) => {
|
|
|
39458
39333
|
}
|
|
39459
39334
|
return `${idRef}`;
|
|
39460
39335
|
};
|
|
39336
|
+
var getNodeContents = (node) => {
|
|
39337
|
+
if (isTextNode(node)) {
|
|
39338
|
+
return typeof node.text === "string" ? node.text : node.text.map((t) => t.text).join("");
|
|
39339
|
+
}
|
|
39340
|
+
if (isElementNode(node)) {
|
|
39341
|
+
return node.children.map((n) => {
|
|
39342
|
+
if (typeof n === "string") {
|
|
39343
|
+
return n;
|
|
39344
|
+
}
|
|
39345
|
+
return getNodeContents(n);
|
|
39346
|
+
}).join("");
|
|
39347
|
+
}
|
|
39348
|
+
return "";
|
|
39349
|
+
};
|
|
39350
|
+
var findNode = (root, callback) => {
|
|
39351
|
+
for (let i = root.children.length - 1; i >= 0; i--) {
|
|
39352
|
+
const child = root.children[i];
|
|
39353
|
+
if (typeof child === "string") {
|
|
39354
|
+
continue;
|
|
39355
|
+
}
|
|
39356
|
+
if (callback(child, root)) {
|
|
39357
|
+
return child;
|
|
39358
|
+
}
|
|
39359
|
+
if (isElementNode(child)) {
|
|
39360
|
+
const found = findNode(child, callback);
|
|
39361
|
+
if (found) {
|
|
39362
|
+
return found;
|
|
39363
|
+
}
|
|
39364
|
+
}
|
|
39365
|
+
}
|
|
39366
|
+
return null;
|
|
39367
|
+
};
|
|
39368
|
+
var findNodes = (node, callback) => {
|
|
39369
|
+
return node.children.reduce((nodes, child) => {
|
|
39370
|
+
if (typeof child === "string") {
|
|
39371
|
+
return nodes;
|
|
39372
|
+
}
|
|
39373
|
+
if (callback(child, node)) {
|
|
39374
|
+
nodes.push(child);
|
|
39375
|
+
}
|
|
39376
|
+
if (isElementNode(child)) {
|
|
39377
|
+
nodes.push(...findNodes(child, callback));
|
|
39378
|
+
}
|
|
39379
|
+
return nodes;
|
|
39380
|
+
}, []);
|
|
39381
|
+
};
|
|
39382
|
+
var flattenNodes = (node) => {
|
|
39383
|
+
if (typeof node === "string") {
|
|
39384
|
+
return [];
|
|
39385
|
+
}
|
|
39386
|
+
if (isElementNode(node)) {
|
|
39387
|
+
return [node, ...node.children.flatMap(flattenNodes)];
|
|
39388
|
+
}
|
|
39389
|
+
return [node];
|
|
39390
|
+
};
|
|
39461
39391
|
var MAX_TEXT_DIVS_TO_RENDER2 = 1e5;
|
|
39462
39392
|
var DEFAULT_FONT_SIZE3 = 30;
|
|
39463
39393
|
var DEFAULT_FONT_ASCENT = 0.8;
|
|
39464
39394
|
var HYPHEN_REGEX = /-\n+$/;
|
|
39395
|
+
var decorateStructTree = (node, rootContainer, graphics, annotations, parents = []) => {
|
|
39396
|
+
let parent = parents.at(-1) || rootContainer;
|
|
39397
|
+
if ("role" in node) {
|
|
39398
|
+
const role = node.role.toLowerCase();
|
|
39399
|
+
switch (role) {
|
|
39400
|
+
case "root":
|
|
39401
|
+
case "document":
|
|
39402
|
+
case "art":
|
|
39403
|
+
parent.attrs ?? (parent.attrs = {});
|
|
39404
|
+
Object.assign(parent.attrs, node.attrs);
|
|
39405
|
+
for (const child of [...node.children]) {
|
|
39406
|
+
decorateStructTree(
|
|
39407
|
+
child,
|
|
39408
|
+
rootContainer,
|
|
39409
|
+
graphics,
|
|
39410
|
+
annotations,
|
|
39411
|
+
parents
|
|
39412
|
+
);
|
|
39413
|
+
}
|
|
39414
|
+
break;
|
|
39415
|
+
case "part":
|
|
39416
|
+
case "sect": {
|
|
39417
|
+
const section = {
|
|
39418
|
+
role: "section",
|
|
39419
|
+
children: [],
|
|
39420
|
+
attrs: node.attrs
|
|
39421
|
+
};
|
|
39422
|
+
while (parents.find(
|
|
39423
|
+
(p) => ["h1", "h2", "h3", "h4", "h5", "h6"].includes(p.role)
|
|
39424
|
+
)) {
|
|
39425
|
+
parents.pop();
|
|
39426
|
+
parent = parents.at(-1) || rootContainer;
|
|
39427
|
+
}
|
|
39428
|
+
parent.children.push(section);
|
|
39429
|
+
for (const child of [...node.children]) {
|
|
39430
|
+
decorateStructTree(child, rootContainer, graphics, annotations, [
|
|
39431
|
+
...parents,
|
|
39432
|
+
section
|
|
39433
|
+
]);
|
|
39434
|
+
}
|
|
39435
|
+
break;
|
|
39436
|
+
}
|
|
39437
|
+
case "lbl":
|
|
39438
|
+
case "lbody":
|
|
39439
|
+
case "span": {
|
|
39440
|
+
for (const child of [...node.children]) {
|
|
39441
|
+
decorateStructTree(
|
|
39442
|
+
child,
|
|
39443
|
+
rootContainer,
|
|
39444
|
+
graphics,
|
|
39445
|
+
annotations,
|
|
39446
|
+
parents
|
|
39447
|
+
);
|
|
39448
|
+
}
|
|
39449
|
+
break;
|
|
39450
|
+
}
|
|
39451
|
+
case "link": {
|
|
39452
|
+
const [ref, ...children] = node.children;
|
|
39453
|
+
if (ref.type !== "object") {
|
|
39454
|
+
console.warn(
|
|
39455
|
+
`Unsupported link type: ${ref.type}`
|
|
39456
|
+
);
|
|
39457
|
+
for (const child of children) {
|
|
39458
|
+
decorateStructTree(
|
|
39459
|
+
child,
|
|
39460
|
+
rootContainer,
|
|
39461
|
+
graphics,
|
|
39462
|
+
annotations,
|
|
39463
|
+
parents
|
|
39464
|
+
);
|
|
39465
|
+
}
|
|
39466
|
+
return;
|
|
39467
|
+
}
|
|
39468
|
+
const annotation = annotations?.find(
|
|
39469
|
+
(note) => note.id === ref.id
|
|
39470
|
+
);
|
|
39471
|
+
if (!annotation) {
|
|
39472
|
+
console.warn("Link not found", ref);
|
|
39473
|
+
for (const child of children) {
|
|
39474
|
+
decorateStructTree(
|
|
39475
|
+
child,
|
|
39476
|
+
rootContainer,
|
|
39477
|
+
graphics,
|
|
39478
|
+
annotations,
|
|
39479
|
+
parents
|
|
39480
|
+
);
|
|
39481
|
+
}
|
|
39482
|
+
return;
|
|
39483
|
+
}
|
|
39484
|
+
if (isLinkAnnotation(annotation)) {
|
|
39485
|
+
const anchor = {
|
|
39486
|
+
role: "a",
|
|
39487
|
+
href: annotation.url,
|
|
39488
|
+
children: []
|
|
39489
|
+
};
|
|
39490
|
+
parent.children.push(anchor);
|
|
39491
|
+
for (const child of children) {
|
|
39492
|
+
decorateStructTree(child, rootContainer, graphics, annotations, [
|
|
39493
|
+
...parents,
|
|
39494
|
+
anchor
|
|
39495
|
+
]);
|
|
39496
|
+
}
|
|
39497
|
+
} else {
|
|
39498
|
+
console.warn(`Unsupported annotation subtype: ${annotation.subtype}`);
|
|
39499
|
+
for (const child of children) {
|
|
39500
|
+
decorateStructTree(
|
|
39501
|
+
child,
|
|
39502
|
+
rootContainer,
|
|
39503
|
+
graphics,
|
|
39504
|
+
annotations,
|
|
39505
|
+
parents
|
|
39506
|
+
);
|
|
39507
|
+
}
|
|
39508
|
+
}
|
|
39509
|
+
break;
|
|
39510
|
+
}
|
|
39511
|
+
case "p": {
|
|
39512
|
+
if (node.children.length === 0) {
|
|
39513
|
+
break;
|
|
39514
|
+
}
|
|
39515
|
+
if (node.children.length === 1 && node.children[0].role === "Table") {
|
|
39516
|
+
decorateStructTree(
|
|
39517
|
+
node.children[0],
|
|
39518
|
+
rootContainer,
|
|
39519
|
+
graphics,
|
|
39520
|
+
annotations,
|
|
39521
|
+
parents
|
|
39522
|
+
);
|
|
39523
|
+
break;
|
|
39524
|
+
}
|
|
39525
|
+
const paragraph = {
|
|
39526
|
+
role: "p",
|
|
39527
|
+
children: [],
|
|
39528
|
+
attrs: node.attrs
|
|
39529
|
+
};
|
|
39530
|
+
parent.children.push(paragraph);
|
|
39531
|
+
for (const child of [...node.children]) {
|
|
39532
|
+
decorateStructTree(child, rootContainer, graphics, annotations, [
|
|
39533
|
+
...parents,
|
|
39534
|
+
paragraph
|
|
39535
|
+
]);
|
|
39536
|
+
}
|
|
39537
|
+
break;
|
|
39538
|
+
}
|
|
39539
|
+
case "l": {
|
|
39540
|
+
const list = {
|
|
39541
|
+
role: "ul",
|
|
39542
|
+
children: [],
|
|
39543
|
+
attrs: node.attrs
|
|
39544
|
+
};
|
|
39545
|
+
parent.children.push(list);
|
|
39546
|
+
for (const child of [...node.children]) {
|
|
39547
|
+
decorateStructTree(child, rootContainer, graphics, annotations, [
|
|
39548
|
+
...parents,
|
|
39549
|
+
list
|
|
39550
|
+
]);
|
|
39551
|
+
}
|
|
39552
|
+
break;
|
|
39553
|
+
}
|
|
39554
|
+
case "table":
|
|
39555
|
+
case "thead":
|
|
39556
|
+
case "tbody":
|
|
39557
|
+
case "tfoot":
|
|
39558
|
+
case "tr":
|
|
39559
|
+
case "h1":
|
|
39560
|
+
case "h2":
|
|
39561
|
+
case "h3":
|
|
39562
|
+
case "h4":
|
|
39563
|
+
case "h5":
|
|
39564
|
+
case "h6":
|
|
39565
|
+
case "li":
|
|
39566
|
+
case "td":
|
|
39567
|
+
case "th": {
|
|
39568
|
+
const block = {
|
|
39569
|
+
role,
|
|
39570
|
+
children: [],
|
|
39571
|
+
attrs: node.attrs
|
|
39572
|
+
};
|
|
39573
|
+
parent.children.push(block);
|
|
39574
|
+
for (const child of [...node.children]) {
|
|
39575
|
+
decorateStructTree(child, rootContainer, graphics, annotations, [
|
|
39576
|
+
...parents,
|
|
39577
|
+
block
|
|
39578
|
+
]);
|
|
39579
|
+
}
|
|
39580
|
+
break;
|
|
39581
|
+
}
|
|
39582
|
+
case "figure": {
|
|
39583
|
+
if (graphics) {
|
|
39584
|
+
for (const child of [...node.children]) {
|
|
39585
|
+
if (!("id" in child)) {
|
|
39586
|
+
continue;
|
|
39587
|
+
}
|
|
39588
|
+
const contentId = normalizeMarkedContentId(child.id);
|
|
39589
|
+
const useId = `#${id("marked_content" /* MarkedContent */, contentId, graphics.attrs.id)}`;
|
|
39590
|
+
const graphic = findSvgNode(
|
|
39591
|
+
graphics,
|
|
39592
|
+
(node2) => isSvgMarkedContent(node2) && node2.attrs.href === useId
|
|
39593
|
+
);
|
|
39594
|
+
if (!graphic) {
|
|
39595
|
+
continue;
|
|
39596
|
+
}
|
|
39597
|
+
parent.children.push({
|
|
39598
|
+
role: "figure",
|
|
39599
|
+
href: graphic.attrs.href,
|
|
39600
|
+
x: graphic.attrs.x || 0,
|
|
39601
|
+
y: graphic.attrs.y || 0,
|
|
39602
|
+
width: graphic.attrs.width || 0,
|
|
39603
|
+
height: graphic.attrs.height || 0,
|
|
39604
|
+
alt: node.alt
|
|
39605
|
+
});
|
|
39606
|
+
}
|
|
39607
|
+
}
|
|
39608
|
+
break;
|
|
39609
|
+
}
|
|
39610
|
+
default: {
|
|
39611
|
+
const block = {
|
|
39612
|
+
role: "div",
|
|
39613
|
+
children: [],
|
|
39614
|
+
attrs: node.attrs
|
|
39615
|
+
};
|
|
39616
|
+
parent.children.push(block);
|
|
39617
|
+
for (const child of [...node.children]) {
|
|
39618
|
+
decorateStructTree(child, rootContainer, graphics, annotations, [
|
|
39619
|
+
...parents,
|
|
39620
|
+
block
|
|
39621
|
+
]);
|
|
39622
|
+
}
|
|
39623
|
+
break;
|
|
39624
|
+
}
|
|
39625
|
+
}
|
|
39626
|
+
} else {
|
|
39627
|
+
const contentId = normalizeMarkedContentId(node.id);
|
|
39628
|
+
findNode(rootContainer, (child, previousParent) => {
|
|
39629
|
+
if (child.id !== contentId) {
|
|
39630
|
+
return;
|
|
39631
|
+
}
|
|
39632
|
+
previousParent.children = previousParent.children.filter(
|
|
39633
|
+
(c) => c !== child
|
|
39634
|
+
);
|
|
39635
|
+
if (isElementNode(child)) {
|
|
39636
|
+
const children = child.children;
|
|
39637
|
+
const lastTextItem = parent.children.at(-1);
|
|
39638
|
+
if (lastTextItem && typeof lastTextItem !== "string" && isTextNode(lastTextItem) && getNodeContents(lastTextItem).trim() === "") {
|
|
39639
|
+
while (children[0] && typeof children[0] !== "string" && isTextNode(children[0]) && getNodeContents(children[0]).trim() === "") {
|
|
39640
|
+
children.shift();
|
|
39641
|
+
}
|
|
39642
|
+
}
|
|
39643
|
+
parent.children.push(...children);
|
|
39644
|
+
} else {
|
|
39645
|
+
parent.children.push(child);
|
|
39646
|
+
}
|
|
39647
|
+
return true;
|
|
39648
|
+
});
|
|
39649
|
+
}
|
|
39650
|
+
};
|
|
39465
39651
|
async function createTextLayer(page, {
|
|
39466
39652
|
canvasFactory,
|
|
39467
39653
|
viewport = page.getViewport({ scale: 1 }),
|
|
@@ -39473,7 +39659,6 @@ async function createTextLayer(page, {
|
|
|
39473
39659
|
const ascentCache = /* @__PURE__ */ new Map();
|
|
39474
39660
|
const canvasCache = /* @__PURE__ */ new Map();
|
|
39475
39661
|
const textDivs = [];
|
|
39476
|
-
const markedContent = /* @__PURE__ */ new Map();
|
|
39477
39662
|
const [tree, contentSource] = await Promise.all([
|
|
39478
39663
|
page.getStructTree(),
|
|
39479
39664
|
page.getTextContent({ includeMarkedContent: true })
|
|
@@ -39519,10 +39704,6 @@ async function createTextLayer(page, {
|
|
|
39519
39704
|
const id2 = normalizeMarkedContentId(item);
|
|
39520
39705
|
if (id2 != null) {
|
|
39521
39706
|
container.id = id2;
|
|
39522
|
-
markedContent.set(id2, {
|
|
39523
|
-
node: container,
|
|
39524
|
-
parent
|
|
39525
|
-
});
|
|
39526
39707
|
}
|
|
39527
39708
|
} else if (item.type === "endMarkedContent") {
|
|
39528
39709
|
container = parents.pop();
|
|
@@ -39685,215 +39866,6 @@ async function createTextLayer(page, {
|
|
|
39685
39866
|
ascentCache.set(fontFamily, ratio);
|
|
39686
39867
|
return ratio;
|
|
39687
39868
|
};
|
|
39688
|
-
const renderStructTreeNode = (node, parents) => {
|
|
39689
|
-
let parent = parents[parents.length - 1] || rootContainer;
|
|
39690
|
-
if ("role" in node) {
|
|
39691
|
-
const role = node.role.toLowerCase();
|
|
39692
|
-
switch (role) {
|
|
39693
|
-
case "root":
|
|
39694
|
-
case "document":
|
|
39695
|
-
case "art":
|
|
39696
|
-
parent.attrs ?? (parent.attrs = {});
|
|
39697
|
-
Object.assign(parent.attrs, node.attrs);
|
|
39698
|
-
for (const child of [...node.children]) {
|
|
39699
|
-
renderStructTreeNode(child, parents);
|
|
39700
|
-
}
|
|
39701
|
-
break;
|
|
39702
|
-
case "sect": {
|
|
39703
|
-
const section = {
|
|
39704
|
-
role: "section",
|
|
39705
|
-
children: [],
|
|
39706
|
-
attrs: node.attrs
|
|
39707
|
-
};
|
|
39708
|
-
while (parents.find(
|
|
39709
|
-
(p) => ["h1", "h2", "h3", "h4", "h5", "h6"].includes(p.role)
|
|
39710
|
-
)) {
|
|
39711
|
-
parents.pop();
|
|
39712
|
-
parent = parents.at(-1) || rootContainer;
|
|
39713
|
-
}
|
|
39714
|
-
parent.children.push(section);
|
|
39715
|
-
for (const child of [...node.children]) {
|
|
39716
|
-
renderStructTreeNode(child, [...parents, section]);
|
|
39717
|
-
}
|
|
39718
|
-
break;
|
|
39719
|
-
}
|
|
39720
|
-
case "lbl":
|
|
39721
|
-
case "lbody":
|
|
39722
|
-
case "span": {
|
|
39723
|
-
for (const child of [...node.children]) {
|
|
39724
|
-
renderStructTreeNode(child, parents);
|
|
39725
|
-
}
|
|
39726
|
-
break;
|
|
39727
|
-
}
|
|
39728
|
-
case "link": {
|
|
39729
|
-
const [ref, ...children] = node.children;
|
|
39730
|
-
if (ref.type !== "object") {
|
|
39731
|
-
console.warn(
|
|
39732
|
-
`Unsupported link type: ${ref.type}`
|
|
39733
|
-
);
|
|
39734
|
-
for (const child of children) {
|
|
39735
|
-
renderStructTreeNode(child, parents);
|
|
39736
|
-
}
|
|
39737
|
-
return;
|
|
39738
|
-
}
|
|
39739
|
-
const annotation = annotations?.find(
|
|
39740
|
-
(note) => note.id === ref.id
|
|
39741
|
-
);
|
|
39742
|
-
if (!annotation) {
|
|
39743
|
-
console.warn("Link not found", ref);
|
|
39744
|
-
for (const child of children) {
|
|
39745
|
-
renderStructTreeNode(child, parents);
|
|
39746
|
-
}
|
|
39747
|
-
return;
|
|
39748
|
-
}
|
|
39749
|
-
if (isLinkAnnotation(annotation)) {
|
|
39750
|
-
const anchor = {
|
|
39751
|
-
role: "a",
|
|
39752
|
-
href: annotation.url,
|
|
39753
|
-
children: []
|
|
39754
|
-
};
|
|
39755
|
-
parent.children.push(anchor);
|
|
39756
|
-
for (const child of children) {
|
|
39757
|
-
renderStructTreeNode(child, [...parents, anchor]);
|
|
39758
|
-
}
|
|
39759
|
-
} else {
|
|
39760
|
-
console.warn(
|
|
39761
|
-
`Unsupported annotation subtype: ${annotation.subtype}`
|
|
39762
|
-
);
|
|
39763
|
-
for (const child of children) {
|
|
39764
|
-
renderStructTreeNode(child, parents);
|
|
39765
|
-
}
|
|
39766
|
-
}
|
|
39767
|
-
break;
|
|
39768
|
-
}
|
|
39769
|
-
case "p": {
|
|
39770
|
-
if (node.children.length === 0) {
|
|
39771
|
-
break;
|
|
39772
|
-
}
|
|
39773
|
-
if (node.children.length === 1 && node.children[0].role === "Table") {
|
|
39774
|
-
renderStructTreeNode(node.children[0], parents);
|
|
39775
|
-
break;
|
|
39776
|
-
}
|
|
39777
|
-
const paragraph = {
|
|
39778
|
-
role: "p",
|
|
39779
|
-
children: [],
|
|
39780
|
-
attrs: node.attrs
|
|
39781
|
-
};
|
|
39782
|
-
parent.children.push(paragraph);
|
|
39783
|
-
for (const child of [...node.children]) {
|
|
39784
|
-
renderStructTreeNode(child, [...parents, paragraph]);
|
|
39785
|
-
}
|
|
39786
|
-
break;
|
|
39787
|
-
}
|
|
39788
|
-
case "l": {
|
|
39789
|
-
const list = {
|
|
39790
|
-
role: "ul",
|
|
39791
|
-
children: [],
|
|
39792
|
-
attrs: node.attrs
|
|
39793
|
-
};
|
|
39794
|
-
parent.children.push(list);
|
|
39795
|
-
for (const child of [...node.children]) {
|
|
39796
|
-
renderStructTreeNode(child, [...parents, list]);
|
|
39797
|
-
}
|
|
39798
|
-
break;
|
|
39799
|
-
}
|
|
39800
|
-
case "table":
|
|
39801
|
-
case "thead":
|
|
39802
|
-
case "tbody":
|
|
39803
|
-
case "tfoot":
|
|
39804
|
-
case "tr":
|
|
39805
|
-
case "h1":
|
|
39806
|
-
case "h2":
|
|
39807
|
-
case "h3":
|
|
39808
|
-
case "h4":
|
|
39809
|
-
case "h5":
|
|
39810
|
-
case "h6":
|
|
39811
|
-
case "li":
|
|
39812
|
-
case "td":
|
|
39813
|
-
case "th": {
|
|
39814
|
-
const block = {
|
|
39815
|
-
role,
|
|
39816
|
-
children: [],
|
|
39817
|
-
attrs: node.attrs
|
|
39818
|
-
};
|
|
39819
|
-
parent.children.push(block);
|
|
39820
|
-
for (const child of [...node.children]) {
|
|
39821
|
-
renderStructTreeNode(child, [...parents, block]);
|
|
39822
|
-
}
|
|
39823
|
-
break;
|
|
39824
|
-
}
|
|
39825
|
-
case "figure": {
|
|
39826
|
-
const ids = [...node.children].map((child) => {
|
|
39827
|
-
const id2 = normalizeMarkedContentId(child);
|
|
39828
|
-
if (!id2) {
|
|
39829
|
-
return null;
|
|
39830
|
-
}
|
|
39831
|
-
const span = markedContent.get(id2);
|
|
39832
|
-
if (!span) {
|
|
39833
|
-
return id2;
|
|
39834
|
-
}
|
|
39835
|
-
if (span.parent) {
|
|
39836
|
-
span.parent.children = span.parent.children.filter(
|
|
39837
|
-
(child2) => child2 !== span.node
|
|
39838
|
-
);
|
|
39839
|
-
}
|
|
39840
|
-
return id2;
|
|
39841
|
-
}).filter((id2) => id2 !== null);
|
|
39842
|
-
if (graphics) {
|
|
39843
|
-
for (const markedId of ids) {
|
|
39844
|
-
const figureId = normalizeMarkedContentId(markedId);
|
|
39845
|
-
if (!figureId) {
|
|
39846
|
-
continue;
|
|
39847
|
-
}
|
|
39848
|
-
const useId = `#${id("marked_content" /* MarkedContent */, figureId, graphics.attrs.id)}`;
|
|
39849
|
-
const graphic = findSvgNode(
|
|
39850
|
-
graphics,
|
|
39851
|
-
(node2) => isSvgMarkedContent(node2) && node2.attrs.href === useId
|
|
39852
|
-
);
|
|
39853
|
-
if (!graphic) {
|
|
39854
|
-
continue;
|
|
39855
|
-
}
|
|
39856
|
-
const figure = {
|
|
39857
|
-
role: "figure",
|
|
39858
|
-
href: graphic.attrs.href,
|
|
39859
|
-
x: graphic.attrs.x || 0,
|
|
39860
|
-
y: graphic.attrs.y || 0,
|
|
39861
|
-
width: graphic.attrs.width || 0,
|
|
39862
|
-
height: graphic.attrs.height || 0,
|
|
39863
|
-
alt: node.alt
|
|
39864
|
-
};
|
|
39865
|
-
parent.children.push(figure);
|
|
39866
|
-
}
|
|
39867
|
-
}
|
|
39868
|
-
break;
|
|
39869
|
-
}
|
|
39870
|
-
default: {
|
|
39871
|
-
const block = {
|
|
39872
|
-
role: "div",
|
|
39873
|
-
children: [],
|
|
39874
|
-
attrs: node.attrs
|
|
39875
|
-
};
|
|
39876
|
-
parent.children.push(block);
|
|
39877
|
-
for (const child of [...node.children]) {
|
|
39878
|
-
renderStructTreeNode(child, [...parents, block]);
|
|
39879
|
-
}
|
|
39880
|
-
break;
|
|
39881
|
-
}
|
|
39882
|
-
}
|
|
39883
|
-
} else {
|
|
39884
|
-
const span = markedContent.get(`${node.id}`);
|
|
39885
|
-
if (!span) {
|
|
39886
|
-
return;
|
|
39887
|
-
}
|
|
39888
|
-
if (span.parent) {
|
|
39889
|
-
span.parent.children = span.parent.children.filter(
|
|
39890
|
-
(child) => child !== span.node
|
|
39891
|
-
);
|
|
39892
|
-
}
|
|
39893
|
-
parent.children.push(span.node);
|
|
39894
|
-
}
|
|
39895
|
-
};
|
|
39896
|
-
await loadDefaultFonts();
|
|
39897
39869
|
const reader = textContentSource.getReader();
|
|
39898
39870
|
while (true) {
|
|
39899
39871
|
const { value, done } = await reader.read();
|
|
@@ -39906,7 +39878,7 @@ async function createTextLayer(page, {
|
|
|
39906
39878
|
const root = tree?.children[0]?.children[0];
|
|
39907
39879
|
if (root?.children) {
|
|
39908
39880
|
for (const child of root.children) {
|
|
39909
|
-
|
|
39881
|
+
decorateStructTree(child, rootContainer, graphics, annotations);
|
|
39910
39882
|
}
|
|
39911
39883
|
}
|
|
39912
39884
|
ascentCache.clear();
|
|
@@ -39917,22 +39889,134 @@ async function createTextLayer(page, {
|
|
|
39917
39889
|
return rootContainer;
|
|
39918
39890
|
}
|
|
39919
39891
|
|
|
39920
|
-
// src/lib/
|
|
39921
|
-
var
|
|
39922
|
-
|
|
39923
|
-
|
|
39924
|
-
|
|
39925
|
-
|
|
39926
|
-
|
|
39927
|
-
|
|
39928
|
-
|
|
39929
|
-
|
|
39892
|
+
// src/lib/WasmFactory.ts
|
|
39893
|
+
var WasmFactory = class {
|
|
39894
|
+
async fetch({
|
|
39895
|
+
filename
|
|
39896
|
+
}) {
|
|
39897
|
+
switch (filename) {
|
|
39898
|
+
case "openjpeg.wasm":
|
|
39899
|
+
return import("./openjpeg-TRZ4ANDN.js").then(
|
|
39900
|
+
(module) => module.default
|
|
39901
|
+
);
|
|
39902
|
+
case "qcms_bg":
|
|
39903
|
+
return import("./qcms_bg-PRPVJQFC.js").then(
|
|
39904
|
+
(module) => module.default
|
|
39905
|
+
);
|
|
39930
39906
|
}
|
|
39931
|
-
|
|
39932
|
-
|
|
39907
|
+
return Uint8Array.from([]);
|
|
39908
|
+
}
|
|
39909
|
+
};
|
|
39910
|
+
|
|
39911
|
+
// src/lib/StandardFontDataFactory.ts
|
|
39912
|
+
var StandardFontDataFactory = class extends BaseStandardFontDataFactory {
|
|
39913
|
+
constructor() {
|
|
39914
|
+
super({
|
|
39915
|
+
baseUrl: null
|
|
39916
|
+
});
|
|
39917
|
+
}
|
|
39918
|
+
/**
|
|
39919
|
+
* Fetch the corresponding standard font data.
|
|
39920
|
+
* We need to use specific dynamic imports for each font file for the bundler to include them.
|
|
39921
|
+
*/
|
|
39922
|
+
async fetch({
|
|
39923
|
+
filename
|
|
39924
|
+
}) {
|
|
39925
|
+
switch (filename) {
|
|
39926
|
+
case "FoxitDingbats.pfb":
|
|
39927
|
+
return import("./FoxitDingbats-UZC34T4F.js").then((module) => module.default);
|
|
39928
|
+
case "FoxitFixed.pfb":
|
|
39929
|
+
return import("./FoxitFixed-DSJVG3IJ.js").then(
|
|
39930
|
+
(module) => module.default
|
|
39931
|
+
);
|
|
39932
|
+
case "FoxitFixedBold.pfb":
|
|
39933
|
+
return import("./FoxitFixedBold-YSX4X5NA.js").then((module) => module.default);
|
|
39934
|
+
case "FoxitFixedBoldItalic.pfb":
|
|
39935
|
+
return import("./FoxitFixedBoldItalic-IFYYA6RR.js").then((module) => module.default);
|
|
39936
|
+
case "FoxitFixedItalic.pfb":
|
|
39937
|
+
return import("./FoxitFixedItalic-4PEFPCQV.js").then((module) => module.default);
|
|
39938
|
+
case "FoxitSerif.pfb":
|
|
39939
|
+
return import("./FoxitSerif-7SSP2H2U.js").then(
|
|
39940
|
+
(module) => module.default
|
|
39941
|
+
);
|
|
39942
|
+
case "FoxitSerifBold.pfb":
|
|
39943
|
+
return import("./FoxitSerifBold-KLMURJ6N.js").then((module) => module.default);
|
|
39944
|
+
case "FoxitSerifBoldItalic.pfb":
|
|
39945
|
+
return import("./FoxitSerifBoldItalic-VSYHWLRM.js").then((module) => module.default);
|
|
39946
|
+
case "FoxitSerifItalic.pfb":
|
|
39947
|
+
return import("./FoxitSerifItalic-NTOPMQDD.js").then((module) => module.default);
|
|
39948
|
+
case "FoxitSymbol.pfb":
|
|
39949
|
+
return import("./FoxitSymbol-QPUM74UN.js").then(
|
|
39950
|
+
(module) => module.default
|
|
39951
|
+
);
|
|
39952
|
+
case "LiberationSans-Bold.ttf":
|
|
39953
|
+
return import("./LiberationSans-Bold-BXFYN4PV.js").then((module) => module.default);
|
|
39954
|
+
case "LiberationSans-BoldItalic.ttf":
|
|
39955
|
+
return import("./LiberationSans-BoldItalic-DQQC5TNJ.js").then((module) => module.default);
|
|
39956
|
+
case "LiberationSans-Italic.ttf":
|
|
39957
|
+
return import("./LiberationSans-Italic-Z4MFN6PY.js").then((module) => module.default);
|
|
39958
|
+
case "LiberationSans-Regular.ttf":
|
|
39959
|
+
return import("./LiberationSans-Regular-MACKS2VL.js").then((module) => module.default);
|
|
39933
39960
|
}
|
|
39961
|
+
return Uint8Array.from([]);
|
|
39962
|
+
}
|
|
39963
|
+
};
|
|
39964
|
+
|
|
39965
|
+
// src/lib/CanvasGraphics.ts
|
|
39966
|
+
var {
|
|
39967
|
+
beginDrawing,
|
|
39968
|
+
beginText,
|
|
39969
|
+
endText,
|
|
39970
|
+
beginMarkedContent,
|
|
39971
|
+
beginMarkedContentProps,
|
|
39972
|
+
endMarkedContent
|
|
39973
|
+
} = CanvasGraphics.prototype;
|
|
39974
|
+
CanvasGraphics.prototype.beginDrawing = function(options) {
|
|
39975
|
+
if (this.ctx instanceof SvgCanvasContext) {
|
|
39976
|
+
options.transparency = false;
|
|
39977
|
+
}
|
|
39978
|
+
return beginDrawing.call(this, options);
|
|
39979
|
+
};
|
|
39980
|
+
CanvasGraphics.prototype.beginText = function(opIdx) {
|
|
39981
|
+
beginText.call(this, opIdx);
|
|
39982
|
+
if (this.ctx instanceof SvgCanvasContext) {
|
|
39983
|
+
this.ctx.beginText();
|
|
39934
39984
|
}
|
|
39935
39985
|
};
|
|
39986
|
+
CanvasGraphics.prototype.endText = function(opIdx) {
|
|
39987
|
+
endText.call(this, opIdx);
|
|
39988
|
+
if (this.ctx instanceof SvgCanvasContext) {
|
|
39989
|
+
this.ctx.endText();
|
|
39990
|
+
}
|
|
39991
|
+
};
|
|
39992
|
+
CanvasGraphics.prototype.beginMarkedContent = function(opIdx, type) {
|
|
39993
|
+
beginMarkedContent.call(this, opIdx, this.ctx);
|
|
39994
|
+
if (this.ctx instanceof SvgCanvasContext) {
|
|
39995
|
+
this.ctx.beginMarkedContent(type);
|
|
39996
|
+
}
|
|
39997
|
+
};
|
|
39998
|
+
CanvasGraphics.prototype.beginMarkedContentProps = function(opIdx, type, props) {
|
|
39999
|
+
beginMarkedContentProps.call(this, opIdx, type, props);
|
|
40000
|
+
if (this.ctx instanceof SvgCanvasContext) {
|
|
40001
|
+
this.ctx.beginMarkedContent(type, props);
|
|
40002
|
+
}
|
|
40003
|
+
};
|
|
40004
|
+
CanvasGraphics.prototype.endMarkedContent = function(opIdx) {
|
|
40005
|
+
if (this.ctx instanceof SvgCanvasContext) {
|
|
40006
|
+
this.ctx.endMarkedContent();
|
|
40007
|
+
}
|
|
40008
|
+
endMarkedContent.call(this, opIdx);
|
|
40009
|
+
};
|
|
40010
|
+
Object.assign(CanvasGraphics.prototype, {
|
|
40011
|
+
[OPS.beginText]: CanvasGraphics.prototype.beginText,
|
|
40012
|
+
[OPS.endText]: CanvasGraphics.prototype.endText,
|
|
40013
|
+
[OPS.beginMarkedContent]: CanvasGraphics.prototype.beginMarkedContent,
|
|
40014
|
+
[OPS.beginMarkedContentProps]: CanvasGraphics.prototype.beginMarkedContentProps,
|
|
40015
|
+
[OPS.endMarkedContent]: CanvasGraphics.prototype.endMarkedContent
|
|
40016
|
+
});
|
|
40017
|
+
|
|
40018
|
+
// src/lib/TextLayer_v2.ts
|
|
40019
|
+
var import_opentype = __toESM(require_opentype(), 1);
|
|
39936
40020
|
var loadTextLayerFontsMap = /* @__PURE__ */ (() => {
|
|
39937
40021
|
let promise = null;
|
|
39938
40022
|
return () => {
|
|
@@ -39948,12 +40032,6 @@ var loadTextLayerFontsMap = /* @__PURE__ */ (() => {
|
|
|
39948
40032
|
return promise;
|
|
39949
40033
|
};
|
|
39950
40034
|
})();
|
|
39951
|
-
var computeText = (node) => {
|
|
39952
|
-
if (typeof node.text === "string") {
|
|
39953
|
-
return node.text;
|
|
39954
|
-
}
|
|
39955
|
-
return node.text.map((t) => t.text).join("");
|
|
39956
|
-
};
|
|
39957
40035
|
async function createTextLayerV2(page, {
|
|
39958
40036
|
graphics,
|
|
39959
40037
|
annotations: _annotations
|
|
@@ -40059,210 +40137,6 @@ async function createTextLayerV2(page, {
|
|
|
40059
40137
|
});
|
|
40060
40138
|
resetTextItem();
|
|
40061
40139
|
};
|
|
40062
|
-
const renderStructTreeNode = (node, parents) => {
|
|
40063
|
-
let parent = parents.at(-1) || rootContainer;
|
|
40064
|
-
if ("role" in node) {
|
|
40065
|
-
const role = node.role.toLowerCase();
|
|
40066
|
-
switch (role) {
|
|
40067
|
-
case "root":
|
|
40068
|
-
case "document":
|
|
40069
|
-
case "art":
|
|
40070
|
-
parent.attrs ?? (parent.attrs = {});
|
|
40071
|
-
Object.assign(parent.attrs, node.attrs);
|
|
40072
|
-
for (const child of [...node.children]) {
|
|
40073
|
-
renderStructTreeNode(child, parents);
|
|
40074
|
-
}
|
|
40075
|
-
break;
|
|
40076
|
-
case "part":
|
|
40077
|
-
case "sect": {
|
|
40078
|
-
const section = {
|
|
40079
|
-
role: "section",
|
|
40080
|
-
children: [],
|
|
40081
|
-
attrs: node.attrs
|
|
40082
|
-
};
|
|
40083
|
-
while (parents.find(
|
|
40084
|
-
(p) => ["h1", "h2", "h3", "h4", "h5", "h6"].includes(p.role)
|
|
40085
|
-
)) {
|
|
40086
|
-
parents.pop();
|
|
40087
|
-
parent = parents.at(-1) || rootContainer;
|
|
40088
|
-
}
|
|
40089
|
-
parent.children.push(section);
|
|
40090
|
-
for (const child of [...node.children]) {
|
|
40091
|
-
renderStructTreeNode(child, [...parents, section]);
|
|
40092
|
-
}
|
|
40093
|
-
break;
|
|
40094
|
-
}
|
|
40095
|
-
case "lbl":
|
|
40096
|
-
case "lbody":
|
|
40097
|
-
case "span": {
|
|
40098
|
-
for (const child of [...node.children]) {
|
|
40099
|
-
renderStructTreeNode(child, parents);
|
|
40100
|
-
}
|
|
40101
|
-
break;
|
|
40102
|
-
}
|
|
40103
|
-
case "link": {
|
|
40104
|
-
const [ref, ...children] = node.children;
|
|
40105
|
-
if (ref.type !== "object") {
|
|
40106
|
-
console.warn(
|
|
40107
|
-
`Unsupported link type: ${ref.type}`
|
|
40108
|
-
);
|
|
40109
|
-
for (const child of children) {
|
|
40110
|
-
renderStructTreeNode(child, parents);
|
|
40111
|
-
}
|
|
40112
|
-
return;
|
|
40113
|
-
}
|
|
40114
|
-
const annotation = annotations?.find(
|
|
40115
|
-
(note) => note.id === ref.id
|
|
40116
|
-
);
|
|
40117
|
-
if (!annotation) {
|
|
40118
|
-
console.warn("Link not found", ref);
|
|
40119
|
-
for (const child of children) {
|
|
40120
|
-
renderStructTreeNode(child, parents);
|
|
40121
|
-
}
|
|
40122
|
-
return;
|
|
40123
|
-
}
|
|
40124
|
-
if (isLinkAnnotation(annotation)) {
|
|
40125
|
-
const anchor = {
|
|
40126
|
-
role: "a",
|
|
40127
|
-
href: annotation.url,
|
|
40128
|
-
children: []
|
|
40129
|
-
};
|
|
40130
|
-
parent.children.push(anchor);
|
|
40131
|
-
for (const child of children) {
|
|
40132
|
-
renderStructTreeNode(child, [...parents, anchor]);
|
|
40133
|
-
}
|
|
40134
|
-
} else {
|
|
40135
|
-
console.warn(
|
|
40136
|
-
`Unsupported annotation subtype: ${annotation.subtype}`
|
|
40137
|
-
);
|
|
40138
|
-
for (const child of children) {
|
|
40139
|
-
renderStructTreeNode(child, parents);
|
|
40140
|
-
}
|
|
40141
|
-
}
|
|
40142
|
-
break;
|
|
40143
|
-
}
|
|
40144
|
-
case "p": {
|
|
40145
|
-
if (node.children.length === 0) {
|
|
40146
|
-
break;
|
|
40147
|
-
}
|
|
40148
|
-
if (node.children.length === 1 && node.children[0].role === "Table") {
|
|
40149
|
-
renderStructTreeNode(node.children[0], parents);
|
|
40150
|
-
break;
|
|
40151
|
-
}
|
|
40152
|
-
const paragraph = {
|
|
40153
|
-
role: "p",
|
|
40154
|
-
children: [],
|
|
40155
|
-
attrs: node.attrs
|
|
40156
|
-
};
|
|
40157
|
-
parent.children.push(paragraph);
|
|
40158
|
-
for (const child of [...node.children]) {
|
|
40159
|
-
renderStructTreeNode(child, [...parents, paragraph]);
|
|
40160
|
-
}
|
|
40161
|
-
break;
|
|
40162
|
-
}
|
|
40163
|
-
case "l": {
|
|
40164
|
-
const list = {
|
|
40165
|
-
role: "ul",
|
|
40166
|
-
children: [],
|
|
40167
|
-
attrs: node.attrs
|
|
40168
|
-
};
|
|
40169
|
-
parent.children.push(list);
|
|
40170
|
-
for (const child of [...node.children]) {
|
|
40171
|
-
renderStructTreeNode(child, [...parents, list]);
|
|
40172
|
-
}
|
|
40173
|
-
break;
|
|
40174
|
-
}
|
|
40175
|
-
case "table":
|
|
40176
|
-
case "thead":
|
|
40177
|
-
case "tbody":
|
|
40178
|
-
case "tfoot":
|
|
40179
|
-
case "tr":
|
|
40180
|
-
case "h1":
|
|
40181
|
-
case "h2":
|
|
40182
|
-
case "h3":
|
|
40183
|
-
case "h4":
|
|
40184
|
-
case "h5":
|
|
40185
|
-
case "h6":
|
|
40186
|
-
case "li":
|
|
40187
|
-
case "td":
|
|
40188
|
-
case "th": {
|
|
40189
|
-
const block = {
|
|
40190
|
-
role,
|
|
40191
|
-
children: [],
|
|
40192
|
-
attrs: node.attrs
|
|
40193
|
-
};
|
|
40194
|
-
parent.children.push(block);
|
|
40195
|
-
for (const child of [...node.children]) {
|
|
40196
|
-
renderStructTreeNode(child, [...parents, block]);
|
|
40197
|
-
}
|
|
40198
|
-
break;
|
|
40199
|
-
}
|
|
40200
|
-
case "figure": {
|
|
40201
|
-
if (graphics) {
|
|
40202
|
-
for (const child of [...node.children]) {
|
|
40203
|
-
if (!("id" in child)) {
|
|
40204
|
-
continue;
|
|
40205
|
-
}
|
|
40206
|
-
const contentId = normalizeMarkedContentId(child.id);
|
|
40207
|
-
const useId = `#${id("marked_content" /* MarkedContent */, contentId, graphics.attrs.id)}`;
|
|
40208
|
-
const graphic = findSvgNode(
|
|
40209
|
-
graphics,
|
|
40210
|
-
(node2) => isSvgMarkedContent(node2) && node2.attrs.href === useId
|
|
40211
|
-
);
|
|
40212
|
-
if (!graphic) {
|
|
40213
|
-
continue;
|
|
40214
|
-
}
|
|
40215
|
-
parent.children.push({
|
|
40216
|
-
role: "figure",
|
|
40217
|
-
href: graphic.attrs.href,
|
|
40218
|
-
x: graphic.attrs.x || 0,
|
|
40219
|
-
y: graphic.attrs.y || 0,
|
|
40220
|
-
width: graphic.attrs.width || 0,
|
|
40221
|
-
height: graphic.attrs.height || 0,
|
|
40222
|
-
alt: node.alt
|
|
40223
|
-
});
|
|
40224
|
-
}
|
|
40225
|
-
}
|
|
40226
|
-
break;
|
|
40227
|
-
}
|
|
40228
|
-
default: {
|
|
40229
|
-
const block = {
|
|
40230
|
-
role: "div",
|
|
40231
|
-
children: [],
|
|
40232
|
-
attrs: node.attrs
|
|
40233
|
-
};
|
|
40234
|
-
parent.children.push(block);
|
|
40235
|
-
for (const child of [...node.children]) {
|
|
40236
|
-
renderStructTreeNode(child, [...parents, block]);
|
|
40237
|
-
}
|
|
40238
|
-
break;
|
|
40239
|
-
}
|
|
40240
|
-
}
|
|
40241
|
-
} else {
|
|
40242
|
-
const contentId = normalizeMarkedContentId(node.id);
|
|
40243
|
-
findNode(rootContainer, (child, previousParent) => {
|
|
40244
|
-
if (child.id !== contentId) {
|
|
40245
|
-
return;
|
|
40246
|
-
}
|
|
40247
|
-
previousParent.children = previousParent.children.filter(
|
|
40248
|
-
(c) => c !== child
|
|
40249
|
-
);
|
|
40250
|
-
if (isElementNode(child)) {
|
|
40251
|
-
const children = child.children;
|
|
40252
|
-
const lastTextItem = parent.children.at(-1);
|
|
40253
|
-
if (lastTextItem && typeof lastTextItem !== "string" && isTextNode(lastTextItem) && computeText(lastTextItem).trim() === "") {
|
|
40254
|
-
while (children[0] && typeof children[0] !== "string" && isTextNode(children[0]) && computeText(children[0]).trim() === "") {
|
|
40255
|
-
children.shift();
|
|
40256
|
-
}
|
|
40257
|
-
}
|
|
40258
|
-
parent.children.push(...children);
|
|
40259
|
-
} else {
|
|
40260
|
-
parent.children.push(child);
|
|
40261
|
-
}
|
|
40262
|
-
return true;
|
|
40263
|
-
});
|
|
40264
|
-
}
|
|
40265
|
-
};
|
|
40266
40140
|
for (let i = 0; i < operatorsList.fnArray.length; i++) {
|
|
40267
40141
|
const fnId = operatorsList.fnArray[i];
|
|
40268
40142
|
const args = operatorsList.argsArray[i];
|
|
@@ -40460,9 +40334,22 @@ async function createTextLayerV2(page, {
|
|
|
40460
40334
|
}
|
|
40461
40335
|
}
|
|
40462
40336
|
closeTextItem();
|
|
40463
|
-
|
|
40337
|
+
decorateStructTree(
|
|
40338
|
+
structTree,
|
|
40339
|
+
rootContainer,
|
|
40340
|
+
graphics,
|
|
40341
|
+
annotations
|
|
40342
|
+
);
|
|
40464
40343
|
return rootContainer;
|
|
40465
40344
|
}
|
|
40345
|
+
|
|
40346
|
+
// src/index.ts
|
|
40347
|
+
var textLayerUtils = {
|
|
40348
|
+
findNode,
|
|
40349
|
+
findNodes,
|
|
40350
|
+
getNodeContents,
|
|
40351
|
+
flattenNodes
|
|
40352
|
+
};
|
|
40466
40353
|
export {
|
|
40467
40354
|
AbortException,
|
|
40468
40355
|
AnnotationEditorLayer,
|
|
@@ -40512,7 +40399,6 @@ export {
|
|
|
40512
40399
|
createValidAbsoluteUrl,
|
|
40513
40400
|
destroySvgContext,
|
|
40514
40401
|
fetchData,
|
|
40515
|
-
fontLoader,
|
|
40516
40402
|
getDocument,
|
|
40517
40403
|
getFilenameFromUrl,
|
|
40518
40404
|
getPdfFilenameFromUrl,
|
|
@@ -40560,11 +40446,10 @@ export {
|
|
|
40560
40446
|
isValidExplicitDest,
|
|
40561
40447
|
isWatermarkAnnotation,
|
|
40562
40448
|
isWidgetAnnotation,
|
|
40563
|
-
loadDefaultFonts,
|
|
40449
|
+
loadDefaultFonts2 as loadDefaultFonts,
|
|
40564
40450
|
loadTextLayerFonts,
|
|
40565
40451
|
makeSerializable,
|
|
40566
40452
|
noContextMenu,
|
|
40567
|
-
normalizeMarkedContentId,
|
|
40568
40453
|
normalizeUnicode,
|
|
40569
40454
|
parseRgbaColor,
|
|
40570
40455
|
renderSvgNode,
|
|
@@ -40572,6 +40457,7 @@ export {
|
|
|
40572
40457
|
setLayerDimensions,
|
|
40573
40458
|
shadow,
|
|
40574
40459
|
stopEvent,
|
|
40460
|
+
textLayerUtils,
|
|
40575
40461
|
toDataUrl,
|
|
40576
40462
|
toSvgNode,
|
|
40577
40463
|
toSvgString,
|