@chialab/pdfjs-lib 1.0.0-alpha.28 → 1.0.0-alpha.29
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/browser/index.js +454 -582
- package/dist/index.d.ts +7 -1
- package/dist/lib/Canvas.d.ts +0 -5
- package/dist/lib/TextLayer.d.ts +12 -2
- package/dist/lib/TextLayer_v2.d.ts +2 -2
- package/dist/node/index.js +453 -567
- package/package.json +1 -1
package/dist/browser/index.js
CHANGED
|
@@ -5885,25 +5885,11 @@ _serializable = new WeakMap();
|
|
|
5885
5885
|
// src/lib/Canvas.ts
|
|
5886
5886
|
var Path2DConstructor = Path2D;
|
|
5887
5887
|
async function createCanvas(width, height) {
|
|
5888
|
-
await loadDefaultFonts();
|
|
5889
5888
|
const canvas = document.createElement("canvas");
|
|
5890
5889
|
canvas.width = width;
|
|
5891
5890
|
canvas.height = height;
|
|
5892
5891
|
return canvas;
|
|
5893
5892
|
}
|
|
5894
|
-
var loadingFontPromise = null;
|
|
5895
|
-
async function loadDefaultFonts() {
|
|
5896
|
-
if (loadingFontPromise === null) {
|
|
5897
|
-
loadingFontPromise = import("./LiberationSans-Regular-3SH5NGZO.js").then((module) => new Blob([module.default], { type: "font/ttf" })).then(async (sansBlob) => {
|
|
5898
|
-
const fontSans = new FontFace(
|
|
5899
|
-
"Liberation Sans",
|
|
5900
|
-
`url(${URL.createObjectURL(sansBlob)})`
|
|
5901
|
-
);
|
|
5902
|
-
document.fonts.add(await fontSans.load());
|
|
5903
|
-
});
|
|
5904
|
-
}
|
|
5905
|
-
return loadingFontPromise;
|
|
5906
|
-
}
|
|
5907
5893
|
|
|
5908
5894
|
// src/lib/Path2D.ts
|
|
5909
5895
|
function toFixed(value, digits = 6) {
|
|
@@ -27871,79 +27857,6 @@ PDFPageProxy.prototype.getAnnotations = async function(params) {
|
|
|
27871
27857
|
return makeSerializable(annotations);
|
|
27872
27858
|
};
|
|
27873
27859
|
|
|
27874
|
-
// src/lib/WasmFactory.ts
|
|
27875
|
-
var WasmFactory = class {
|
|
27876
|
-
async fetch({
|
|
27877
|
-
filename
|
|
27878
|
-
}) {
|
|
27879
|
-
switch (filename) {
|
|
27880
|
-
case "openjpeg.wasm":
|
|
27881
|
-
return import("./openjpeg-QLA762TL.js").then(
|
|
27882
|
-
(module) => module.default
|
|
27883
|
-
);
|
|
27884
|
-
case "qcms_bg":
|
|
27885
|
-
return import("./qcms_bg-BCJEADMU.js").then(
|
|
27886
|
-
(module) => module.default
|
|
27887
|
-
);
|
|
27888
|
-
}
|
|
27889
|
-
return Uint8Array.from([]);
|
|
27890
|
-
}
|
|
27891
|
-
};
|
|
27892
|
-
|
|
27893
|
-
// src/lib/StandardFontDataFactory.ts
|
|
27894
|
-
var StandardFontDataFactory = class extends BaseStandardFontDataFactory {
|
|
27895
|
-
constructor() {
|
|
27896
|
-
super({
|
|
27897
|
-
baseUrl: null
|
|
27898
|
-
});
|
|
27899
|
-
}
|
|
27900
|
-
/**
|
|
27901
|
-
* Fetch the corresponding standard font data.
|
|
27902
|
-
* We need to use specific dynamic imports for each font file for the bundler to include them.
|
|
27903
|
-
*/
|
|
27904
|
-
async fetch({
|
|
27905
|
-
filename
|
|
27906
|
-
}) {
|
|
27907
|
-
switch (filename) {
|
|
27908
|
-
case "FoxitDingbats.pfb":
|
|
27909
|
-
return import("./FoxitDingbats-XZTZYAP6.js").then((module) => module.default);
|
|
27910
|
-
case "FoxitFixed.pfb":
|
|
27911
|
-
return import("./FoxitFixed-DRWD6QNM.js").then(
|
|
27912
|
-
(module) => module.default
|
|
27913
|
-
);
|
|
27914
|
-
case "FoxitFixedBold.pfb":
|
|
27915
|
-
return import("./FoxitFixedBold-A3IBPIFC.js").then((module) => module.default);
|
|
27916
|
-
case "FoxitFixedBoldItalic.pfb":
|
|
27917
|
-
return import("./FoxitFixedBoldItalic-V4ORMFGL.js").then((module) => module.default);
|
|
27918
|
-
case "FoxitFixedItalic.pfb":
|
|
27919
|
-
return import("./FoxitFixedItalic-Z7BSNTJA.js").then((module) => module.default);
|
|
27920
|
-
case "FoxitSerif.pfb":
|
|
27921
|
-
return import("./FoxitSerif-Y34FHWHO.js").then(
|
|
27922
|
-
(module) => module.default
|
|
27923
|
-
);
|
|
27924
|
-
case "FoxitSerifBold.pfb":
|
|
27925
|
-
return import("./FoxitSerifBold-NCWBT4GX.js").then((module) => module.default);
|
|
27926
|
-
case "FoxitSerifBoldItalic.pfb":
|
|
27927
|
-
return import("./FoxitSerifBoldItalic-YTEOG5ZU.js").then((module) => module.default);
|
|
27928
|
-
case "FoxitSerifItalic.pfb":
|
|
27929
|
-
return import("./FoxitSerifItalic-3H547RIJ.js").then((module) => module.default);
|
|
27930
|
-
case "FoxitSymbol.pfb":
|
|
27931
|
-
return import("./FoxitSymbol-EMTQEYPB.js").then(
|
|
27932
|
-
(module) => module.default
|
|
27933
|
-
);
|
|
27934
|
-
case "LiberationSans-Bold.ttf":
|
|
27935
|
-
return import("./LiberationSans-Bold-MGX34QV4.js").then((module) => module.default);
|
|
27936
|
-
case "LiberationSans-BoldItalic.ttf":
|
|
27937
|
-
return import("./LiberationSans-BoldItalic-WSEQ5LH5.js").then((module) => module.default);
|
|
27938
|
-
case "LiberationSans-Italic.ttf":
|
|
27939
|
-
return import("./LiberationSans-Italic-E4SLDR4M.js").then((module) => module.default);
|
|
27940
|
-
case "LiberationSans-Regular.ttf":
|
|
27941
|
-
return import("./LiberationSans-Regular-3SH5NGZO.js").then((module) => module.default);
|
|
27942
|
-
}
|
|
27943
|
-
return Uint8Array.from([]);
|
|
27944
|
-
}
|
|
27945
|
-
};
|
|
27946
|
-
|
|
27947
27860
|
// src/lib/AnnotationData.ts
|
|
27948
27861
|
function isTextAnnotation(annotation) {
|
|
27949
27862
|
return annotation.subtype === "Text";
|
|
@@ -28033,59 +27946,6 @@ function isRedactAnnotation(annotation) {
|
|
|
28033
27946
|
return annotation.subtype === "Redact";
|
|
28034
27947
|
}
|
|
28035
27948
|
|
|
28036
|
-
// src/lib/CanvasGraphics.ts
|
|
28037
|
-
var {
|
|
28038
|
-
beginDrawing,
|
|
28039
|
-
beginText,
|
|
28040
|
-
endText,
|
|
28041
|
-
beginMarkedContent,
|
|
28042
|
-
beginMarkedContentProps,
|
|
28043
|
-
endMarkedContent
|
|
28044
|
-
} = CanvasGraphics.prototype;
|
|
28045
|
-
CanvasGraphics.prototype.beginDrawing = function(options) {
|
|
28046
|
-
if (this.ctx instanceof SvgCanvasContext) {
|
|
28047
|
-
options.transparency = false;
|
|
28048
|
-
}
|
|
28049
|
-
return beginDrawing.call(this, options);
|
|
28050
|
-
};
|
|
28051
|
-
CanvasGraphics.prototype.beginText = function(opIdx) {
|
|
28052
|
-
beginText.call(this, opIdx);
|
|
28053
|
-
if (this.ctx instanceof SvgCanvasContext) {
|
|
28054
|
-
this.ctx.beginText();
|
|
28055
|
-
}
|
|
28056
|
-
};
|
|
28057
|
-
CanvasGraphics.prototype.endText = function(opIdx) {
|
|
28058
|
-
endText.call(this, opIdx);
|
|
28059
|
-
if (this.ctx instanceof SvgCanvasContext) {
|
|
28060
|
-
this.ctx.endText();
|
|
28061
|
-
}
|
|
28062
|
-
};
|
|
28063
|
-
CanvasGraphics.prototype.beginMarkedContent = function(opIdx, type) {
|
|
28064
|
-
beginMarkedContent.call(this, opIdx, this.ctx);
|
|
28065
|
-
if (this.ctx instanceof SvgCanvasContext) {
|
|
28066
|
-
this.ctx.beginMarkedContent(type);
|
|
28067
|
-
}
|
|
28068
|
-
};
|
|
28069
|
-
CanvasGraphics.prototype.beginMarkedContentProps = function(opIdx, type, props) {
|
|
28070
|
-
beginMarkedContentProps.call(this, opIdx, type, props);
|
|
28071
|
-
if (this.ctx instanceof SvgCanvasContext) {
|
|
28072
|
-
this.ctx.beginMarkedContent(type, props);
|
|
28073
|
-
}
|
|
28074
|
-
};
|
|
28075
|
-
CanvasGraphics.prototype.endMarkedContent = function(opIdx) {
|
|
28076
|
-
if (this.ctx instanceof SvgCanvasContext) {
|
|
28077
|
-
this.ctx.endMarkedContent();
|
|
28078
|
-
}
|
|
28079
|
-
endMarkedContent.call(this, opIdx);
|
|
28080
|
-
};
|
|
28081
|
-
Object.assign(CanvasGraphics.prototype, {
|
|
28082
|
-
[OPS.beginText]: CanvasGraphics.prototype.beginText,
|
|
28083
|
-
[OPS.endText]: CanvasGraphics.prototype.endText,
|
|
28084
|
-
[OPS.beginMarkedContent]: CanvasGraphics.prototype.beginMarkedContent,
|
|
28085
|
-
[OPS.beginMarkedContentProps]: CanvasGraphics.prototype.beginMarkedContentProps,
|
|
28086
|
-
[OPS.endMarkedContent]: CanvasGraphics.prototype.endMarkedContent
|
|
28087
|
-
});
|
|
28088
|
-
|
|
28089
27949
|
// src/lib/TextLayer.ts
|
|
28090
27950
|
function isTextNode(node) {
|
|
28091
27951
|
return node.role === "text";
|
|
@@ -28144,6 +28004,7 @@ async function loadTextLayerFonts(document2) {
|
|
|
28144
28004
|
});
|
|
28145
28005
|
return loadedFontsPromise;
|
|
28146
28006
|
}
|
|
28007
|
+
var loadDefaultFonts = loadTextLayerFonts;
|
|
28147
28008
|
var normalizeMarkedContentId = (idRef) => {
|
|
28148
28009
|
if (idRef == null) {
|
|
28149
28010
|
return null;
|
|
@@ -28156,10 +28017,321 @@ var normalizeMarkedContentId = (idRef) => {
|
|
|
28156
28017
|
}
|
|
28157
28018
|
return `${idRef}`;
|
|
28158
28019
|
};
|
|
28020
|
+
var getNodeContents = (node) => {
|
|
28021
|
+
if (isTextNode(node)) {
|
|
28022
|
+
return typeof node.text === "string" ? node.text : node.text.map((t) => t.text).join("");
|
|
28023
|
+
}
|
|
28024
|
+
if (isElementNode(node)) {
|
|
28025
|
+
return node.children.map((n) => {
|
|
28026
|
+
if (typeof n === "string") {
|
|
28027
|
+
return n;
|
|
28028
|
+
}
|
|
28029
|
+
return getNodeContents(n);
|
|
28030
|
+
}).join("");
|
|
28031
|
+
}
|
|
28032
|
+
return "";
|
|
28033
|
+
};
|
|
28034
|
+
var findNode = (root, callback) => {
|
|
28035
|
+
for (let i = root.children.length - 1; i >= 0; i--) {
|
|
28036
|
+
const child = root.children[i];
|
|
28037
|
+
if (typeof child === "string") {
|
|
28038
|
+
continue;
|
|
28039
|
+
}
|
|
28040
|
+
if (callback(child, root)) {
|
|
28041
|
+
return child;
|
|
28042
|
+
}
|
|
28043
|
+
if (isElementNode(child)) {
|
|
28044
|
+
const found = findNode(child, callback);
|
|
28045
|
+
if (found) {
|
|
28046
|
+
return found;
|
|
28047
|
+
}
|
|
28048
|
+
}
|
|
28049
|
+
}
|
|
28050
|
+
return null;
|
|
28051
|
+
};
|
|
28052
|
+
var findNodes = (node, callback) => {
|
|
28053
|
+
return node.children.reduce((nodes, child) => {
|
|
28054
|
+
if (typeof child === "string") {
|
|
28055
|
+
return nodes;
|
|
28056
|
+
}
|
|
28057
|
+
if (callback(child, node)) {
|
|
28058
|
+
nodes.push(child);
|
|
28059
|
+
}
|
|
28060
|
+
if (isElementNode(child)) {
|
|
28061
|
+
nodes.push(...findNodes(child, callback));
|
|
28062
|
+
}
|
|
28063
|
+
return nodes;
|
|
28064
|
+
}, []);
|
|
28065
|
+
};
|
|
28066
|
+
var flattenNodes = (node) => {
|
|
28067
|
+
if (typeof node === "string") {
|
|
28068
|
+
return [];
|
|
28069
|
+
}
|
|
28070
|
+
if (isElementNode(node)) {
|
|
28071
|
+
return [node, ...node.children.flatMap(flattenNodes)];
|
|
28072
|
+
}
|
|
28073
|
+
return [node];
|
|
28074
|
+
};
|
|
28159
28075
|
var MAX_TEXT_DIVS_TO_RENDER2 = 1e5;
|
|
28160
28076
|
var DEFAULT_FONT_SIZE3 = 30;
|
|
28161
28077
|
var DEFAULT_FONT_ASCENT = 0.8;
|
|
28162
28078
|
var HYPHEN_REGEX = /-\n+$/;
|
|
28079
|
+
var decorateStructTree = (node, rootContainer, graphics, annotations, parents = []) => {
|
|
28080
|
+
let parent = parents.at(-1) || rootContainer;
|
|
28081
|
+
if ("role" in node) {
|
|
28082
|
+
const role = node.role.toLowerCase();
|
|
28083
|
+
switch (role) {
|
|
28084
|
+
case "root":
|
|
28085
|
+
case "document":
|
|
28086
|
+
case "art":
|
|
28087
|
+
parent.attrs ?? (parent.attrs = {});
|
|
28088
|
+
Object.assign(parent.attrs, node.attrs);
|
|
28089
|
+
for (const child of [...node.children]) {
|
|
28090
|
+
decorateStructTree(
|
|
28091
|
+
child,
|
|
28092
|
+
rootContainer,
|
|
28093
|
+
graphics,
|
|
28094
|
+
annotations,
|
|
28095
|
+
parents
|
|
28096
|
+
);
|
|
28097
|
+
}
|
|
28098
|
+
break;
|
|
28099
|
+
case "part":
|
|
28100
|
+
case "sect": {
|
|
28101
|
+
const section = {
|
|
28102
|
+
role: "section",
|
|
28103
|
+
children: [],
|
|
28104
|
+
attrs: node.attrs
|
|
28105
|
+
};
|
|
28106
|
+
while (parents.find(
|
|
28107
|
+
(p) => ["h1", "h2", "h3", "h4", "h5", "h6"].includes(p.role)
|
|
28108
|
+
)) {
|
|
28109
|
+
parents.pop();
|
|
28110
|
+
parent = parents.at(-1) || rootContainer;
|
|
28111
|
+
}
|
|
28112
|
+
parent.children.push(section);
|
|
28113
|
+
for (const child of [...node.children]) {
|
|
28114
|
+
decorateStructTree(child, rootContainer, graphics, annotations, [
|
|
28115
|
+
...parents,
|
|
28116
|
+
section
|
|
28117
|
+
]);
|
|
28118
|
+
}
|
|
28119
|
+
break;
|
|
28120
|
+
}
|
|
28121
|
+
case "lbl":
|
|
28122
|
+
case "lbody":
|
|
28123
|
+
case "span": {
|
|
28124
|
+
for (const child of [...node.children]) {
|
|
28125
|
+
decorateStructTree(
|
|
28126
|
+
child,
|
|
28127
|
+
rootContainer,
|
|
28128
|
+
graphics,
|
|
28129
|
+
annotations,
|
|
28130
|
+
parents
|
|
28131
|
+
);
|
|
28132
|
+
}
|
|
28133
|
+
break;
|
|
28134
|
+
}
|
|
28135
|
+
case "link": {
|
|
28136
|
+
const [ref, ...children] = node.children;
|
|
28137
|
+
if (ref.type !== "object") {
|
|
28138
|
+
console.warn(
|
|
28139
|
+
`Unsupported link type: ${ref.type}`
|
|
28140
|
+
);
|
|
28141
|
+
for (const child of children) {
|
|
28142
|
+
decorateStructTree(
|
|
28143
|
+
child,
|
|
28144
|
+
rootContainer,
|
|
28145
|
+
graphics,
|
|
28146
|
+
annotations,
|
|
28147
|
+
parents
|
|
28148
|
+
);
|
|
28149
|
+
}
|
|
28150
|
+
return;
|
|
28151
|
+
}
|
|
28152
|
+
const annotation = annotations?.find(
|
|
28153
|
+
(note) => note.id === ref.id
|
|
28154
|
+
);
|
|
28155
|
+
if (!annotation) {
|
|
28156
|
+
console.warn("Link not found", ref);
|
|
28157
|
+
for (const child of children) {
|
|
28158
|
+
decorateStructTree(
|
|
28159
|
+
child,
|
|
28160
|
+
rootContainer,
|
|
28161
|
+
graphics,
|
|
28162
|
+
annotations,
|
|
28163
|
+
parents
|
|
28164
|
+
);
|
|
28165
|
+
}
|
|
28166
|
+
return;
|
|
28167
|
+
}
|
|
28168
|
+
if (isLinkAnnotation(annotation)) {
|
|
28169
|
+
const anchor = {
|
|
28170
|
+
role: "a",
|
|
28171
|
+
href: annotation.url,
|
|
28172
|
+
children: []
|
|
28173
|
+
};
|
|
28174
|
+
parent.children.push(anchor);
|
|
28175
|
+
for (const child of children) {
|
|
28176
|
+
decorateStructTree(child, rootContainer, graphics, annotations, [
|
|
28177
|
+
...parents,
|
|
28178
|
+
anchor
|
|
28179
|
+
]);
|
|
28180
|
+
}
|
|
28181
|
+
} else {
|
|
28182
|
+
console.warn(`Unsupported annotation subtype: ${annotation.subtype}`);
|
|
28183
|
+
for (const child of children) {
|
|
28184
|
+
decorateStructTree(
|
|
28185
|
+
child,
|
|
28186
|
+
rootContainer,
|
|
28187
|
+
graphics,
|
|
28188
|
+
annotations,
|
|
28189
|
+
parents
|
|
28190
|
+
);
|
|
28191
|
+
}
|
|
28192
|
+
}
|
|
28193
|
+
break;
|
|
28194
|
+
}
|
|
28195
|
+
case "p": {
|
|
28196
|
+
if (node.children.length === 0) {
|
|
28197
|
+
break;
|
|
28198
|
+
}
|
|
28199
|
+
if (node.children.length === 1 && node.children[0].role === "Table") {
|
|
28200
|
+
decorateStructTree(
|
|
28201
|
+
node.children[0],
|
|
28202
|
+
rootContainer,
|
|
28203
|
+
graphics,
|
|
28204
|
+
annotations,
|
|
28205
|
+
parents
|
|
28206
|
+
);
|
|
28207
|
+
break;
|
|
28208
|
+
}
|
|
28209
|
+
const paragraph = {
|
|
28210
|
+
role: "p",
|
|
28211
|
+
children: [],
|
|
28212
|
+
attrs: node.attrs
|
|
28213
|
+
};
|
|
28214
|
+
parent.children.push(paragraph);
|
|
28215
|
+
for (const child of [...node.children]) {
|
|
28216
|
+
decorateStructTree(child, rootContainer, graphics, annotations, [
|
|
28217
|
+
...parents,
|
|
28218
|
+
paragraph
|
|
28219
|
+
]);
|
|
28220
|
+
}
|
|
28221
|
+
break;
|
|
28222
|
+
}
|
|
28223
|
+
case "l": {
|
|
28224
|
+
const list = {
|
|
28225
|
+
role: "ul",
|
|
28226
|
+
children: [],
|
|
28227
|
+
attrs: node.attrs
|
|
28228
|
+
};
|
|
28229
|
+
parent.children.push(list);
|
|
28230
|
+
for (const child of [...node.children]) {
|
|
28231
|
+
decorateStructTree(child, rootContainer, graphics, annotations, [
|
|
28232
|
+
...parents,
|
|
28233
|
+
list
|
|
28234
|
+
]);
|
|
28235
|
+
}
|
|
28236
|
+
break;
|
|
28237
|
+
}
|
|
28238
|
+
case "table":
|
|
28239
|
+
case "thead":
|
|
28240
|
+
case "tbody":
|
|
28241
|
+
case "tfoot":
|
|
28242
|
+
case "tr":
|
|
28243
|
+
case "h1":
|
|
28244
|
+
case "h2":
|
|
28245
|
+
case "h3":
|
|
28246
|
+
case "h4":
|
|
28247
|
+
case "h5":
|
|
28248
|
+
case "h6":
|
|
28249
|
+
case "li":
|
|
28250
|
+
case "td":
|
|
28251
|
+
case "th": {
|
|
28252
|
+
const block = {
|
|
28253
|
+
role,
|
|
28254
|
+
children: [],
|
|
28255
|
+
attrs: node.attrs
|
|
28256
|
+
};
|
|
28257
|
+
parent.children.push(block);
|
|
28258
|
+
for (const child of [...node.children]) {
|
|
28259
|
+
decorateStructTree(child, rootContainer, graphics, annotations, [
|
|
28260
|
+
...parents,
|
|
28261
|
+
block
|
|
28262
|
+
]);
|
|
28263
|
+
}
|
|
28264
|
+
break;
|
|
28265
|
+
}
|
|
28266
|
+
case "figure": {
|
|
28267
|
+
if (graphics) {
|
|
28268
|
+
for (const child of [...node.children]) {
|
|
28269
|
+
if (!("id" in child)) {
|
|
28270
|
+
continue;
|
|
28271
|
+
}
|
|
28272
|
+
const contentId = normalizeMarkedContentId(child.id);
|
|
28273
|
+
const useId = `#${id("marked_content" /* MarkedContent */, contentId, graphics.attrs.id)}`;
|
|
28274
|
+
const graphic = findSvgNode(
|
|
28275
|
+
graphics,
|
|
28276
|
+
(node2) => isSvgMarkedContent(node2) && node2.attrs.href === useId
|
|
28277
|
+
);
|
|
28278
|
+
if (!graphic) {
|
|
28279
|
+
continue;
|
|
28280
|
+
}
|
|
28281
|
+
parent.children.push({
|
|
28282
|
+
role: "figure",
|
|
28283
|
+
href: graphic.attrs.href,
|
|
28284
|
+
x: graphic.attrs.x || 0,
|
|
28285
|
+
y: graphic.attrs.y || 0,
|
|
28286
|
+
width: graphic.attrs.width || 0,
|
|
28287
|
+
height: graphic.attrs.height || 0,
|
|
28288
|
+
alt: node.alt
|
|
28289
|
+
});
|
|
28290
|
+
}
|
|
28291
|
+
}
|
|
28292
|
+
break;
|
|
28293
|
+
}
|
|
28294
|
+
default: {
|
|
28295
|
+
const block = {
|
|
28296
|
+
role: "div",
|
|
28297
|
+
children: [],
|
|
28298
|
+
attrs: node.attrs
|
|
28299
|
+
};
|
|
28300
|
+
parent.children.push(block);
|
|
28301
|
+
for (const child of [...node.children]) {
|
|
28302
|
+
decorateStructTree(child, rootContainer, graphics, annotations, [
|
|
28303
|
+
...parents,
|
|
28304
|
+
block
|
|
28305
|
+
]);
|
|
28306
|
+
}
|
|
28307
|
+
break;
|
|
28308
|
+
}
|
|
28309
|
+
}
|
|
28310
|
+
} else {
|
|
28311
|
+
const contentId = normalizeMarkedContentId(node.id);
|
|
28312
|
+
findNode(rootContainer, (child, previousParent) => {
|
|
28313
|
+
if (child.id !== contentId) {
|
|
28314
|
+
return;
|
|
28315
|
+
}
|
|
28316
|
+
previousParent.children = previousParent.children.filter(
|
|
28317
|
+
(c) => c !== child
|
|
28318
|
+
);
|
|
28319
|
+
if (isElementNode(child)) {
|
|
28320
|
+
const children = child.children;
|
|
28321
|
+
const lastTextItem = parent.children.at(-1);
|
|
28322
|
+
if (lastTextItem && typeof lastTextItem !== "string" && isTextNode(lastTextItem) && getNodeContents(lastTextItem).trim() === "") {
|
|
28323
|
+
while (children[0] && typeof children[0] !== "string" && isTextNode(children[0]) && getNodeContents(children[0]).trim() === "") {
|
|
28324
|
+
children.shift();
|
|
28325
|
+
}
|
|
28326
|
+
}
|
|
28327
|
+
parent.children.push(...children);
|
|
28328
|
+
} else {
|
|
28329
|
+
parent.children.push(child);
|
|
28330
|
+
}
|
|
28331
|
+
return true;
|
|
28332
|
+
});
|
|
28333
|
+
}
|
|
28334
|
+
};
|
|
28163
28335
|
async function createTextLayer(page, {
|
|
28164
28336
|
canvasFactory,
|
|
28165
28337
|
viewport = page.getViewport({ scale: 1 }),
|
|
@@ -28171,7 +28343,6 @@ async function createTextLayer(page, {
|
|
|
28171
28343
|
const ascentCache = /* @__PURE__ */ new Map();
|
|
28172
28344
|
const canvasCache = /* @__PURE__ */ new Map();
|
|
28173
28345
|
const textDivs = [];
|
|
28174
|
-
const markedContent = /* @__PURE__ */ new Map();
|
|
28175
28346
|
const [tree, contentSource] = await Promise.all([
|
|
28176
28347
|
page.getStructTree(),
|
|
28177
28348
|
page.getTextContent({ includeMarkedContent: true })
|
|
@@ -28217,10 +28388,6 @@ async function createTextLayer(page, {
|
|
|
28217
28388
|
const id2 = normalizeMarkedContentId(item);
|
|
28218
28389
|
if (id2 != null) {
|
|
28219
28390
|
container.id = id2;
|
|
28220
|
-
markedContent.set(id2, {
|
|
28221
|
-
node: container,
|
|
28222
|
-
parent
|
|
28223
|
-
});
|
|
28224
28391
|
}
|
|
28225
28392
|
} else if (item.type === "endMarkedContent") {
|
|
28226
28393
|
container = parents.pop();
|
|
@@ -28383,215 +28550,6 @@ async function createTextLayer(page, {
|
|
|
28383
28550
|
ascentCache.set(fontFamily, ratio);
|
|
28384
28551
|
return ratio;
|
|
28385
28552
|
};
|
|
28386
|
-
const renderStructTreeNode = (node, parents) => {
|
|
28387
|
-
let parent = parents[parents.length - 1] || rootContainer;
|
|
28388
|
-
if ("role" in node) {
|
|
28389
|
-
const role = node.role.toLowerCase();
|
|
28390
|
-
switch (role) {
|
|
28391
|
-
case "root":
|
|
28392
|
-
case "document":
|
|
28393
|
-
case "art":
|
|
28394
|
-
parent.attrs ?? (parent.attrs = {});
|
|
28395
|
-
Object.assign(parent.attrs, node.attrs);
|
|
28396
|
-
for (const child of [...node.children]) {
|
|
28397
|
-
renderStructTreeNode(child, parents);
|
|
28398
|
-
}
|
|
28399
|
-
break;
|
|
28400
|
-
case "sect": {
|
|
28401
|
-
const section = {
|
|
28402
|
-
role: "section",
|
|
28403
|
-
children: [],
|
|
28404
|
-
attrs: node.attrs
|
|
28405
|
-
};
|
|
28406
|
-
while (parents.find(
|
|
28407
|
-
(p) => ["h1", "h2", "h3", "h4", "h5", "h6"].includes(p.role)
|
|
28408
|
-
)) {
|
|
28409
|
-
parents.pop();
|
|
28410
|
-
parent = parents.at(-1) || rootContainer;
|
|
28411
|
-
}
|
|
28412
|
-
parent.children.push(section);
|
|
28413
|
-
for (const child of [...node.children]) {
|
|
28414
|
-
renderStructTreeNode(child, [...parents, section]);
|
|
28415
|
-
}
|
|
28416
|
-
break;
|
|
28417
|
-
}
|
|
28418
|
-
case "lbl":
|
|
28419
|
-
case "lbody":
|
|
28420
|
-
case "span": {
|
|
28421
|
-
for (const child of [...node.children]) {
|
|
28422
|
-
renderStructTreeNode(child, parents);
|
|
28423
|
-
}
|
|
28424
|
-
break;
|
|
28425
|
-
}
|
|
28426
|
-
case "link": {
|
|
28427
|
-
const [ref, ...children] = node.children;
|
|
28428
|
-
if (ref.type !== "object") {
|
|
28429
|
-
console.warn(
|
|
28430
|
-
`Unsupported link type: ${ref.type}`
|
|
28431
|
-
);
|
|
28432
|
-
for (const child of children) {
|
|
28433
|
-
renderStructTreeNode(child, parents);
|
|
28434
|
-
}
|
|
28435
|
-
return;
|
|
28436
|
-
}
|
|
28437
|
-
const annotation = annotations?.find(
|
|
28438
|
-
(note) => note.id === ref.id
|
|
28439
|
-
);
|
|
28440
|
-
if (!annotation) {
|
|
28441
|
-
console.warn("Link not found", ref);
|
|
28442
|
-
for (const child of children) {
|
|
28443
|
-
renderStructTreeNode(child, parents);
|
|
28444
|
-
}
|
|
28445
|
-
return;
|
|
28446
|
-
}
|
|
28447
|
-
if (isLinkAnnotation(annotation)) {
|
|
28448
|
-
const anchor = {
|
|
28449
|
-
role: "a",
|
|
28450
|
-
href: annotation.url,
|
|
28451
|
-
children: []
|
|
28452
|
-
};
|
|
28453
|
-
parent.children.push(anchor);
|
|
28454
|
-
for (const child of children) {
|
|
28455
|
-
renderStructTreeNode(child, [...parents, anchor]);
|
|
28456
|
-
}
|
|
28457
|
-
} else {
|
|
28458
|
-
console.warn(
|
|
28459
|
-
`Unsupported annotation subtype: ${annotation.subtype}`
|
|
28460
|
-
);
|
|
28461
|
-
for (const child of children) {
|
|
28462
|
-
renderStructTreeNode(child, parents);
|
|
28463
|
-
}
|
|
28464
|
-
}
|
|
28465
|
-
break;
|
|
28466
|
-
}
|
|
28467
|
-
case "p": {
|
|
28468
|
-
if (node.children.length === 0) {
|
|
28469
|
-
break;
|
|
28470
|
-
}
|
|
28471
|
-
if (node.children.length === 1 && node.children[0].role === "Table") {
|
|
28472
|
-
renderStructTreeNode(node.children[0], parents);
|
|
28473
|
-
break;
|
|
28474
|
-
}
|
|
28475
|
-
const paragraph = {
|
|
28476
|
-
role: "p",
|
|
28477
|
-
children: [],
|
|
28478
|
-
attrs: node.attrs
|
|
28479
|
-
};
|
|
28480
|
-
parent.children.push(paragraph);
|
|
28481
|
-
for (const child of [...node.children]) {
|
|
28482
|
-
renderStructTreeNode(child, [...parents, paragraph]);
|
|
28483
|
-
}
|
|
28484
|
-
break;
|
|
28485
|
-
}
|
|
28486
|
-
case "l": {
|
|
28487
|
-
const list = {
|
|
28488
|
-
role: "ul",
|
|
28489
|
-
children: [],
|
|
28490
|
-
attrs: node.attrs
|
|
28491
|
-
};
|
|
28492
|
-
parent.children.push(list);
|
|
28493
|
-
for (const child of [...node.children]) {
|
|
28494
|
-
renderStructTreeNode(child, [...parents, list]);
|
|
28495
|
-
}
|
|
28496
|
-
break;
|
|
28497
|
-
}
|
|
28498
|
-
case "table":
|
|
28499
|
-
case "thead":
|
|
28500
|
-
case "tbody":
|
|
28501
|
-
case "tfoot":
|
|
28502
|
-
case "tr":
|
|
28503
|
-
case "h1":
|
|
28504
|
-
case "h2":
|
|
28505
|
-
case "h3":
|
|
28506
|
-
case "h4":
|
|
28507
|
-
case "h5":
|
|
28508
|
-
case "h6":
|
|
28509
|
-
case "li":
|
|
28510
|
-
case "td":
|
|
28511
|
-
case "th": {
|
|
28512
|
-
const block = {
|
|
28513
|
-
role,
|
|
28514
|
-
children: [],
|
|
28515
|
-
attrs: node.attrs
|
|
28516
|
-
};
|
|
28517
|
-
parent.children.push(block);
|
|
28518
|
-
for (const child of [...node.children]) {
|
|
28519
|
-
renderStructTreeNode(child, [...parents, block]);
|
|
28520
|
-
}
|
|
28521
|
-
break;
|
|
28522
|
-
}
|
|
28523
|
-
case "figure": {
|
|
28524
|
-
const ids = [...node.children].map((child) => {
|
|
28525
|
-
const id2 = normalizeMarkedContentId(child);
|
|
28526
|
-
if (!id2) {
|
|
28527
|
-
return null;
|
|
28528
|
-
}
|
|
28529
|
-
const span = markedContent.get(id2);
|
|
28530
|
-
if (!span) {
|
|
28531
|
-
return id2;
|
|
28532
|
-
}
|
|
28533
|
-
if (span.parent) {
|
|
28534
|
-
span.parent.children = span.parent.children.filter(
|
|
28535
|
-
(child2) => child2 !== span.node
|
|
28536
|
-
);
|
|
28537
|
-
}
|
|
28538
|
-
return id2;
|
|
28539
|
-
}).filter((id2) => id2 !== null);
|
|
28540
|
-
if (graphics) {
|
|
28541
|
-
for (const markedId of ids) {
|
|
28542
|
-
const figureId = normalizeMarkedContentId(markedId);
|
|
28543
|
-
if (!figureId) {
|
|
28544
|
-
continue;
|
|
28545
|
-
}
|
|
28546
|
-
const useId = `#${id("marked_content" /* MarkedContent */, figureId, graphics.attrs.id)}`;
|
|
28547
|
-
const graphic = findSvgNode(
|
|
28548
|
-
graphics,
|
|
28549
|
-
(node2) => isSvgMarkedContent(node2) && node2.attrs.href === useId
|
|
28550
|
-
);
|
|
28551
|
-
if (!graphic) {
|
|
28552
|
-
continue;
|
|
28553
|
-
}
|
|
28554
|
-
const figure = {
|
|
28555
|
-
role: "figure",
|
|
28556
|
-
href: graphic.attrs.href,
|
|
28557
|
-
x: graphic.attrs.x || 0,
|
|
28558
|
-
y: graphic.attrs.y || 0,
|
|
28559
|
-
width: graphic.attrs.width || 0,
|
|
28560
|
-
height: graphic.attrs.height || 0,
|
|
28561
|
-
alt: node.alt
|
|
28562
|
-
};
|
|
28563
|
-
parent.children.push(figure);
|
|
28564
|
-
}
|
|
28565
|
-
}
|
|
28566
|
-
break;
|
|
28567
|
-
}
|
|
28568
|
-
default: {
|
|
28569
|
-
const block = {
|
|
28570
|
-
role: "div",
|
|
28571
|
-
children: [],
|
|
28572
|
-
attrs: node.attrs
|
|
28573
|
-
};
|
|
28574
|
-
parent.children.push(block);
|
|
28575
|
-
for (const child of [...node.children]) {
|
|
28576
|
-
renderStructTreeNode(child, [...parents, block]);
|
|
28577
|
-
}
|
|
28578
|
-
break;
|
|
28579
|
-
}
|
|
28580
|
-
}
|
|
28581
|
-
} else {
|
|
28582
|
-
const span = markedContent.get(`${node.id}`);
|
|
28583
|
-
if (!span) {
|
|
28584
|
-
return;
|
|
28585
|
-
}
|
|
28586
|
-
if (span.parent) {
|
|
28587
|
-
span.parent.children = span.parent.children.filter(
|
|
28588
|
-
(child) => child !== span.node
|
|
28589
|
-
);
|
|
28590
|
-
}
|
|
28591
|
-
parent.children.push(span.node);
|
|
28592
|
-
}
|
|
28593
|
-
};
|
|
28594
|
-
await loadDefaultFonts();
|
|
28595
28553
|
const reader = textContentSource.getReader();
|
|
28596
28554
|
while (true) {
|
|
28597
28555
|
const { value, done } = await reader.read();
|
|
@@ -28604,7 +28562,7 @@ async function createTextLayer(page, {
|
|
|
28604
28562
|
const root = tree?.children[0]?.children[0];
|
|
28605
28563
|
if (root?.children) {
|
|
28606
28564
|
for (const child of root.children) {
|
|
28607
|
-
|
|
28565
|
+
decorateStructTree(child, rootContainer, graphics, annotations);
|
|
28608
28566
|
}
|
|
28609
28567
|
}
|
|
28610
28568
|
ascentCache.clear();
|
|
@@ -28615,6 +28573,132 @@ async function createTextLayer(page, {
|
|
|
28615
28573
|
return rootContainer;
|
|
28616
28574
|
}
|
|
28617
28575
|
|
|
28576
|
+
// src/lib/WasmFactory.ts
|
|
28577
|
+
var WasmFactory = class {
|
|
28578
|
+
async fetch({
|
|
28579
|
+
filename
|
|
28580
|
+
}) {
|
|
28581
|
+
switch (filename) {
|
|
28582
|
+
case "openjpeg.wasm":
|
|
28583
|
+
return import("./openjpeg-QLA762TL.js").then(
|
|
28584
|
+
(module) => module.default
|
|
28585
|
+
);
|
|
28586
|
+
case "qcms_bg":
|
|
28587
|
+
return import("./qcms_bg-BCJEADMU.js").then(
|
|
28588
|
+
(module) => module.default
|
|
28589
|
+
);
|
|
28590
|
+
}
|
|
28591
|
+
return Uint8Array.from([]);
|
|
28592
|
+
}
|
|
28593
|
+
};
|
|
28594
|
+
|
|
28595
|
+
// src/lib/StandardFontDataFactory.ts
|
|
28596
|
+
var StandardFontDataFactory = class extends BaseStandardFontDataFactory {
|
|
28597
|
+
constructor() {
|
|
28598
|
+
super({
|
|
28599
|
+
baseUrl: null
|
|
28600
|
+
});
|
|
28601
|
+
}
|
|
28602
|
+
/**
|
|
28603
|
+
* Fetch the corresponding standard font data.
|
|
28604
|
+
* We need to use specific dynamic imports for each font file for the bundler to include them.
|
|
28605
|
+
*/
|
|
28606
|
+
async fetch({
|
|
28607
|
+
filename
|
|
28608
|
+
}) {
|
|
28609
|
+
switch (filename) {
|
|
28610
|
+
case "FoxitDingbats.pfb":
|
|
28611
|
+
return import("./FoxitDingbats-XZTZYAP6.js").then((module) => module.default);
|
|
28612
|
+
case "FoxitFixed.pfb":
|
|
28613
|
+
return import("./FoxitFixed-DRWD6QNM.js").then(
|
|
28614
|
+
(module) => module.default
|
|
28615
|
+
);
|
|
28616
|
+
case "FoxitFixedBold.pfb":
|
|
28617
|
+
return import("./FoxitFixedBold-A3IBPIFC.js").then((module) => module.default);
|
|
28618
|
+
case "FoxitFixedBoldItalic.pfb":
|
|
28619
|
+
return import("./FoxitFixedBoldItalic-V4ORMFGL.js").then((module) => module.default);
|
|
28620
|
+
case "FoxitFixedItalic.pfb":
|
|
28621
|
+
return import("./FoxitFixedItalic-Z7BSNTJA.js").then((module) => module.default);
|
|
28622
|
+
case "FoxitSerif.pfb":
|
|
28623
|
+
return import("./FoxitSerif-Y34FHWHO.js").then(
|
|
28624
|
+
(module) => module.default
|
|
28625
|
+
);
|
|
28626
|
+
case "FoxitSerifBold.pfb":
|
|
28627
|
+
return import("./FoxitSerifBold-NCWBT4GX.js").then((module) => module.default);
|
|
28628
|
+
case "FoxitSerifBoldItalic.pfb":
|
|
28629
|
+
return import("./FoxitSerifBoldItalic-YTEOG5ZU.js").then((module) => module.default);
|
|
28630
|
+
case "FoxitSerifItalic.pfb":
|
|
28631
|
+
return import("./FoxitSerifItalic-3H547RIJ.js").then((module) => module.default);
|
|
28632
|
+
case "FoxitSymbol.pfb":
|
|
28633
|
+
return import("./FoxitSymbol-EMTQEYPB.js").then(
|
|
28634
|
+
(module) => module.default
|
|
28635
|
+
);
|
|
28636
|
+
case "LiberationSans-Bold.ttf":
|
|
28637
|
+
return import("./LiberationSans-Bold-MGX34QV4.js").then((module) => module.default);
|
|
28638
|
+
case "LiberationSans-BoldItalic.ttf":
|
|
28639
|
+
return import("./LiberationSans-BoldItalic-WSEQ5LH5.js").then((module) => module.default);
|
|
28640
|
+
case "LiberationSans-Italic.ttf":
|
|
28641
|
+
return import("./LiberationSans-Italic-E4SLDR4M.js").then((module) => module.default);
|
|
28642
|
+
case "LiberationSans-Regular.ttf":
|
|
28643
|
+
return import("./LiberationSans-Regular-3SH5NGZO.js").then((module) => module.default);
|
|
28644
|
+
}
|
|
28645
|
+
return Uint8Array.from([]);
|
|
28646
|
+
}
|
|
28647
|
+
};
|
|
28648
|
+
|
|
28649
|
+
// src/lib/CanvasGraphics.ts
|
|
28650
|
+
var {
|
|
28651
|
+
beginDrawing,
|
|
28652
|
+
beginText,
|
|
28653
|
+
endText,
|
|
28654
|
+
beginMarkedContent,
|
|
28655
|
+
beginMarkedContentProps,
|
|
28656
|
+
endMarkedContent
|
|
28657
|
+
} = CanvasGraphics.prototype;
|
|
28658
|
+
CanvasGraphics.prototype.beginDrawing = function(options) {
|
|
28659
|
+
if (this.ctx instanceof SvgCanvasContext) {
|
|
28660
|
+
options.transparency = false;
|
|
28661
|
+
}
|
|
28662
|
+
return beginDrawing.call(this, options);
|
|
28663
|
+
};
|
|
28664
|
+
CanvasGraphics.prototype.beginText = function(opIdx) {
|
|
28665
|
+
beginText.call(this, opIdx);
|
|
28666
|
+
if (this.ctx instanceof SvgCanvasContext) {
|
|
28667
|
+
this.ctx.beginText();
|
|
28668
|
+
}
|
|
28669
|
+
};
|
|
28670
|
+
CanvasGraphics.prototype.endText = function(opIdx) {
|
|
28671
|
+
endText.call(this, opIdx);
|
|
28672
|
+
if (this.ctx instanceof SvgCanvasContext) {
|
|
28673
|
+
this.ctx.endText();
|
|
28674
|
+
}
|
|
28675
|
+
};
|
|
28676
|
+
CanvasGraphics.prototype.beginMarkedContent = function(opIdx, type) {
|
|
28677
|
+
beginMarkedContent.call(this, opIdx, this.ctx);
|
|
28678
|
+
if (this.ctx instanceof SvgCanvasContext) {
|
|
28679
|
+
this.ctx.beginMarkedContent(type);
|
|
28680
|
+
}
|
|
28681
|
+
};
|
|
28682
|
+
CanvasGraphics.prototype.beginMarkedContentProps = function(opIdx, type, props) {
|
|
28683
|
+
beginMarkedContentProps.call(this, opIdx, type, props);
|
|
28684
|
+
if (this.ctx instanceof SvgCanvasContext) {
|
|
28685
|
+
this.ctx.beginMarkedContent(type, props);
|
|
28686
|
+
}
|
|
28687
|
+
};
|
|
28688
|
+
CanvasGraphics.prototype.endMarkedContent = function(opIdx) {
|
|
28689
|
+
if (this.ctx instanceof SvgCanvasContext) {
|
|
28690
|
+
this.ctx.endMarkedContent();
|
|
28691
|
+
}
|
|
28692
|
+
endMarkedContent.call(this, opIdx);
|
|
28693
|
+
};
|
|
28694
|
+
Object.assign(CanvasGraphics.prototype, {
|
|
28695
|
+
[OPS.beginText]: CanvasGraphics.prototype.beginText,
|
|
28696
|
+
[OPS.endText]: CanvasGraphics.prototype.endText,
|
|
28697
|
+
[OPS.beginMarkedContent]: CanvasGraphics.prototype.beginMarkedContent,
|
|
28698
|
+
[OPS.beginMarkedContentProps]: CanvasGraphics.prototype.beginMarkedContentProps,
|
|
28699
|
+
[OPS.endMarkedContent]: CanvasGraphics.prototype.endMarkedContent
|
|
28700
|
+
});
|
|
28701
|
+
|
|
28618
28702
|
// node_modules/opentype.js/dist/opentype.module.js
|
|
28619
28703
|
if (!String.prototype.codePointAt) {
|
|
28620
28704
|
(function() {
|
|
@@ -40863,20 +40947,6 @@ var opentype = /* @__PURE__ */ Object.freeze({
|
|
|
40863
40947
|
var opentype_module_default = opentype;
|
|
40864
40948
|
|
|
40865
40949
|
// src/lib/TextLayer_v2.ts
|
|
40866
|
-
var findNode = (node, callback) => {
|
|
40867
|
-
for (let i = node.children.length - 1; i >= 0; i--) {
|
|
40868
|
-
const child = node.children[i];
|
|
40869
|
-
if (typeof child === "string") {
|
|
40870
|
-
continue;
|
|
40871
|
-
}
|
|
40872
|
-
if (callback(child, node)) {
|
|
40873
|
-
return;
|
|
40874
|
-
}
|
|
40875
|
-
if (isElementNode(child)) {
|
|
40876
|
-
findNode(child, callback);
|
|
40877
|
-
}
|
|
40878
|
-
}
|
|
40879
|
-
};
|
|
40880
40950
|
var loadTextLayerFontsMap = /* @__PURE__ */ (() => {
|
|
40881
40951
|
let promise = null;
|
|
40882
40952
|
return () => {
|
|
@@ -40892,12 +40962,6 @@ var loadTextLayerFontsMap = /* @__PURE__ */ (() => {
|
|
|
40892
40962
|
return promise;
|
|
40893
40963
|
};
|
|
40894
40964
|
})();
|
|
40895
|
-
var computeText = (node) => {
|
|
40896
|
-
if (typeof node.text === "string") {
|
|
40897
|
-
return node.text;
|
|
40898
|
-
}
|
|
40899
|
-
return node.text.map((t) => t.text).join("");
|
|
40900
|
-
};
|
|
40901
40965
|
async function createTextLayerV2(page, {
|
|
40902
40966
|
graphics,
|
|
40903
40967
|
annotations: _annotations
|
|
@@ -41003,210 +41067,6 @@ async function createTextLayerV2(page, {
|
|
|
41003
41067
|
});
|
|
41004
41068
|
resetTextItem();
|
|
41005
41069
|
};
|
|
41006
|
-
const renderStructTreeNode = (node, parents) => {
|
|
41007
|
-
let parent = parents.at(-1) || rootContainer;
|
|
41008
|
-
if ("role" in node) {
|
|
41009
|
-
const role = node.role.toLowerCase();
|
|
41010
|
-
switch (role) {
|
|
41011
|
-
case "root":
|
|
41012
|
-
case "document":
|
|
41013
|
-
case "art":
|
|
41014
|
-
parent.attrs ?? (parent.attrs = {});
|
|
41015
|
-
Object.assign(parent.attrs, node.attrs);
|
|
41016
|
-
for (const child of [...node.children]) {
|
|
41017
|
-
renderStructTreeNode(child, parents);
|
|
41018
|
-
}
|
|
41019
|
-
break;
|
|
41020
|
-
case "part":
|
|
41021
|
-
case "sect": {
|
|
41022
|
-
const section = {
|
|
41023
|
-
role: "section",
|
|
41024
|
-
children: [],
|
|
41025
|
-
attrs: node.attrs
|
|
41026
|
-
};
|
|
41027
|
-
while (parents.find(
|
|
41028
|
-
(p) => ["h1", "h2", "h3", "h4", "h5", "h6"].includes(p.role)
|
|
41029
|
-
)) {
|
|
41030
|
-
parents.pop();
|
|
41031
|
-
parent = parents.at(-1) || rootContainer;
|
|
41032
|
-
}
|
|
41033
|
-
parent.children.push(section);
|
|
41034
|
-
for (const child of [...node.children]) {
|
|
41035
|
-
renderStructTreeNode(child, [...parents, section]);
|
|
41036
|
-
}
|
|
41037
|
-
break;
|
|
41038
|
-
}
|
|
41039
|
-
case "lbl":
|
|
41040
|
-
case "lbody":
|
|
41041
|
-
case "span": {
|
|
41042
|
-
for (const child of [...node.children]) {
|
|
41043
|
-
renderStructTreeNode(child, parents);
|
|
41044
|
-
}
|
|
41045
|
-
break;
|
|
41046
|
-
}
|
|
41047
|
-
case "link": {
|
|
41048
|
-
const [ref, ...children] = node.children;
|
|
41049
|
-
if (ref.type !== "object") {
|
|
41050
|
-
console.warn(
|
|
41051
|
-
`Unsupported link type: ${ref.type}`
|
|
41052
|
-
);
|
|
41053
|
-
for (const child of children) {
|
|
41054
|
-
renderStructTreeNode(child, parents);
|
|
41055
|
-
}
|
|
41056
|
-
return;
|
|
41057
|
-
}
|
|
41058
|
-
const annotation = annotations?.find(
|
|
41059
|
-
(note) => note.id === ref.id
|
|
41060
|
-
);
|
|
41061
|
-
if (!annotation) {
|
|
41062
|
-
console.warn("Link not found", ref);
|
|
41063
|
-
for (const child of children) {
|
|
41064
|
-
renderStructTreeNode(child, parents);
|
|
41065
|
-
}
|
|
41066
|
-
return;
|
|
41067
|
-
}
|
|
41068
|
-
if (isLinkAnnotation(annotation)) {
|
|
41069
|
-
const anchor = {
|
|
41070
|
-
role: "a",
|
|
41071
|
-
href: annotation.url,
|
|
41072
|
-
children: []
|
|
41073
|
-
};
|
|
41074
|
-
parent.children.push(anchor);
|
|
41075
|
-
for (const child of children) {
|
|
41076
|
-
renderStructTreeNode(child, [...parents, anchor]);
|
|
41077
|
-
}
|
|
41078
|
-
} else {
|
|
41079
|
-
console.warn(
|
|
41080
|
-
`Unsupported annotation subtype: ${annotation.subtype}`
|
|
41081
|
-
);
|
|
41082
|
-
for (const child of children) {
|
|
41083
|
-
renderStructTreeNode(child, parents);
|
|
41084
|
-
}
|
|
41085
|
-
}
|
|
41086
|
-
break;
|
|
41087
|
-
}
|
|
41088
|
-
case "p": {
|
|
41089
|
-
if (node.children.length === 0) {
|
|
41090
|
-
break;
|
|
41091
|
-
}
|
|
41092
|
-
if (node.children.length === 1 && node.children[0].role === "Table") {
|
|
41093
|
-
renderStructTreeNode(node.children[0], parents);
|
|
41094
|
-
break;
|
|
41095
|
-
}
|
|
41096
|
-
const paragraph = {
|
|
41097
|
-
role: "p",
|
|
41098
|
-
children: [],
|
|
41099
|
-
attrs: node.attrs
|
|
41100
|
-
};
|
|
41101
|
-
parent.children.push(paragraph);
|
|
41102
|
-
for (const child of [...node.children]) {
|
|
41103
|
-
renderStructTreeNode(child, [...parents, paragraph]);
|
|
41104
|
-
}
|
|
41105
|
-
break;
|
|
41106
|
-
}
|
|
41107
|
-
case "l": {
|
|
41108
|
-
const list = {
|
|
41109
|
-
role: "ul",
|
|
41110
|
-
children: [],
|
|
41111
|
-
attrs: node.attrs
|
|
41112
|
-
};
|
|
41113
|
-
parent.children.push(list);
|
|
41114
|
-
for (const child of [...node.children]) {
|
|
41115
|
-
renderStructTreeNode(child, [...parents, list]);
|
|
41116
|
-
}
|
|
41117
|
-
break;
|
|
41118
|
-
}
|
|
41119
|
-
case "table":
|
|
41120
|
-
case "thead":
|
|
41121
|
-
case "tbody":
|
|
41122
|
-
case "tfoot":
|
|
41123
|
-
case "tr":
|
|
41124
|
-
case "h1":
|
|
41125
|
-
case "h2":
|
|
41126
|
-
case "h3":
|
|
41127
|
-
case "h4":
|
|
41128
|
-
case "h5":
|
|
41129
|
-
case "h6":
|
|
41130
|
-
case "li":
|
|
41131
|
-
case "td":
|
|
41132
|
-
case "th": {
|
|
41133
|
-
const block = {
|
|
41134
|
-
role,
|
|
41135
|
-
children: [],
|
|
41136
|
-
attrs: node.attrs
|
|
41137
|
-
};
|
|
41138
|
-
parent.children.push(block);
|
|
41139
|
-
for (const child of [...node.children]) {
|
|
41140
|
-
renderStructTreeNode(child, [...parents, block]);
|
|
41141
|
-
}
|
|
41142
|
-
break;
|
|
41143
|
-
}
|
|
41144
|
-
case "figure": {
|
|
41145
|
-
if (graphics) {
|
|
41146
|
-
for (const child of [...node.children]) {
|
|
41147
|
-
if (!("id" in child)) {
|
|
41148
|
-
continue;
|
|
41149
|
-
}
|
|
41150
|
-
const contentId = normalizeMarkedContentId(child.id);
|
|
41151
|
-
const useId = `#${id("marked_content" /* MarkedContent */, contentId, graphics.attrs.id)}`;
|
|
41152
|
-
const graphic = findSvgNode(
|
|
41153
|
-
graphics,
|
|
41154
|
-
(node2) => isSvgMarkedContent(node2) && node2.attrs.href === useId
|
|
41155
|
-
);
|
|
41156
|
-
if (!graphic) {
|
|
41157
|
-
continue;
|
|
41158
|
-
}
|
|
41159
|
-
parent.children.push({
|
|
41160
|
-
role: "figure",
|
|
41161
|
-
href: graphic.attrs.href,
|
|
41162
|
-
x: graphic.attrs.x || 0,
|
|
41163
|
-
y: graphic.attrs.y || 0,
|
|
41164
|
-
width: graphic.attrs.width || 0,
|
|
41165
|
-
height: graphic.attrs.height || 0,
|
|
41166
|
-
alt: node.alt
|
|
41167
|
-
});
|
|
41168
|
-
}
|
|
41169
|
-
}
|
|
41170
|
-
break;
|
|
41171
|
-
}
|
|
41172
|
-
default: {
|
|
41173
|
-
const block = {
|
|
41174
|
-
role: "div",
|
|
41175
|
-
children: [],
|
|
41176
|
-
attrs: node.attrs
|
|
41177
|
-
};
|
|
41178
|
-
parent.children.push(block);
|
|
41179
|
-
for (const child of [...node.children]) {
|
|
41180
|
-
renderStructTreeNode(child, [...parents, block]);
|
|
41181
|
-
}
|
|
41182
|
-
break;
|
|
41183
|
-
}
|
|
41184
|
-
}
|
|
41185
|
-
} else {
|
|
41186
|
-
const contentId = normalizeMarkedContentId(node.id);
|
|
41187
|
-
findNode(rootContainer, (child, previousParent) => {
|
|
41188
|
-
if (child.id !== contentId) {
|
|
41189
|
-
return;
|
|
41190
|
-
}
|
|
41191
|
-
previousParent.children = previousParent.children.filter(
|
|
41192
|
-
(c) => c !== child
|
|
41193
|
-
);
|
|
41194
|
-
if (isElementNode(child)) {
|
|
41195
|
-
const children = child.children;
|
|
41196
|
-
const lastTextItem = parent.children.at(-1);
|
|
41197
|
-
if (lastTextItem && typeof lastTextItem !== "string" && isTextNode(lastTextItem) && computeText(lastTextItem).trim() === "") {
|
|
41198
|
-
while (children[0] && typeof children[0] !== "string" && isTextNode(children[0]) && computeText(children[0]).trim() === "") {
|
|
41199
|
-
children.shift();
|
|
41200
|
-
}
|
|
41201
|
-
}
|
|
41202
|
-
parent.children.push(...children);
|
|
41203
|
-
} else {
|
|
41204
|
-
parent.children.push(child);
|
|
41205
|
-
}
|
|
41206
|
-
return true;
|
|
41207
|
-
});
|
|
41208
|
-
}
|
|
41209
|
-
};
|
|
41210
41070
|
for (let i = 0; i < operatorsList.fnArray.length; i++) {
|
|
41211
41071
|
const fnId = operatorsList.fnArray[i];
|
|
41212
41072
|
const args = operatorsList.argsArray[i];
|
|
@@ -41404,9 +41264,22 @@ async function createTextLayerV2(page, {
|
|
|
41404
41264
|
}
|
|
41405
41265
|
}
|
|
41406
41266
|
closeTextItem();
|
|
41407
|
-
|
|
41267
|
+
decorateStructTree(
|
|
41268
|
+
structTree,
|
|
41269
|
+
rootContainer,
|
|
41270
|
+
graphics,
|
|
41271
|
+
annotations
|
|
41272
|
+
);
|
|
41408
41273
|
return rootContainer;
|
|
41409
41274
|
}
|
|
41275
|
+
|
|
41276
|
+
// src/index.ts
|
|
41277
|
+
var textLayerUtils = {
|
|
41278
|
+
findNode,
|
|
41279
|
+
findNodes,
|
|
41280
|
+
getNodeContents,
|
|
41281
|
+
flattenNodes
|
|
41282
|
+
};
|
|
41410
41283
|
export {
|
|
41411
41284
|
AbortException,
|
|
41412
41285
|
AnnotationEditorLayer,
|
|
@@ -41456,7 +41329,6 @@ export {
|
|
|
41456
41329
|
createValidAbsoluteUrl,
|
|
41457
41330
|
destroySvgContext,
|
|
41458
41331
|
fetchData,
|
|
41459
|
-
fontLoader,
|
|
41460
41332
|
getDocument,
|
|
41461
41333
|
getFilenameFromUrl,
|
|
41462
41334
|
getPdfFilenameFromUrl,
|
|
@@ -41508,7 +41380,6 @@ export {
|
|
|
41508
41380
|
loadTextLayerFonts,
|
|
41509
41381
|
makeSerializable,
|
|
41510
41382
|
noContextMenu,
|
|
41511
|
-
normalizeMarkedContentId,
|
|
41512
41383
|
normalizeUnicode,
|
|
41513
41384
|
parseRgbaColor,
|
|
41514
41385
|
renderSvgNode,
|
|
@@ -41516,6 +41387,7 @@ export {
|
|
|
41516
41387
|
setLayerDimensions,
|
|
41517
41388
|
shadow,
|
|
41518
41389
|
stopEvent,
|
|
41390
|
+
textLayerUtils,
|
|
41519
41391
|
toDataUrl,
|
|
41520
41392
|
toSvgNode,
|
|
41521
41393
|
toSvgString,
|