@chialab/pdfjs-lib 1.0.0-alpha.30 → 1.0.0-alpha.32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -27858,26 +27858,6 @@ async function toDataUrl(data, type = "image/png") {
27858
27858
  }
27859
27859
  return `data:${type};base64,${Buffer.from(data).toString("base64")}`;
27860
27860
  }
27861
- function makeSerializable(object) {
27862
- if (typeof object !== "object" || object === null) {
27863
- return object;
27864
- }
27865
- if (object instanceof Int8Array || object instanceof Uint8Array || object instanceof Uint8ClampedArray || object instanceof Int16Array || object instanceof Uint16Array || object instanceof Int32Array || object instanceof Uint32Array || object instanceof Float32Array || object instanceof Float64Array) {
27866
- return makeSerializable(Array.from(object));
27867
- }
27868
- if (object instanceof BigInt64Array || object instanceof BigUint64Array) {
27869
- return makeSerializable(Array.from(object));
27870
- }
27871
- if (Array.isArray(object)) {
27872
- return object.map(makeSerializable);
27873
- }
27874
- return Object.fromEntries(
27875
- Object.entries(object).map(([key, value]) => [
27876
- key,
27877
- makeSerializable(value)
27878
- ])
27879
- );
27880
- }
27881
27861
  function colorToRgb(color) {
27882
27862
  if (color.startsWith("#")) {
27883
27863
  const hex = color.slice(1);
@@ -29121,7 +29101,7 @@ PDFPageProxy.prototype.getAnnotations = async function(params) {
29121
29101
  }
29122
29102
  }
29123
29103
  }
29124
- return makeSerializable(annotations);
29104
+ return annotations;
29125
29105
  };
29126
29106
 
29127
29107
  // src/lib/AnnotationData.ts
@@ -29357,6 +29337,30 @@ var renderTextLayer = (root, options = {}) => {
29357
29337
  }
29358
29338
  if (isTextNode(node)) {
29359
29339
  const isHeading = parents.find((p) => p.role.match(/^h[1-6]$/));
29340
+ let contents;
29341
+ if (typeof node.text === "string") {
29342
+ contents = node.text;
29343
+ } else {
29344
+ contents = node.text.map((chunk) => {
29345
+ if (!classes && !styles) {
29346
+ return chunk.text;
29347
+ }
29348
+ const tag3 = !isHeading && node.fontWeight >= 700 && node.fontStyle === "italic" ? "em" : "span";
29349
+ const serializedAttrs3 = serializeAttributes(
29350
+ classes ? {
29351
+ class: "tl-span"
29352
+ } : {}
29353
+ );
29354
+ const serializedStyle2 = styles ? serializeStyles({
29355
+ "--tl-margin": `${chunk.margin}px`,
29356
+ "--tl-transform": typeof chunk.scale === "number" && chunk.scale !== 1 ? `scaleX(${chunk.scale ?? 1})` : null
29357
+ }) : "";
29358
+ return `<${tag3}${serializedAttrs3 ? ` ${serializedAttrs3}` : ""}${serializedStyle2 ? ` style="${serializedStyle2}` : ""}">${chunk.text}</${tag3}>`;
29359
+ }).join("");
29360
+ }
29361
+ if (!classes && !styles) {
29362
+ return contents;
29363
+ }
29360
29364
  const tag2 = isHeading ? "span" : node.fontWeight >= 700 ? "strong" : node.fontStyle === "italic" ? "em" : "span";
29361
29365
  const serializedAttrs2 = serializeAttributes(
29362
29366
  classes ? {
@@ -29374,25 +29378,6 @@ var renderTextLayer = (root, options = {}) => {
29374
29378
  typeof node.angle === "number" && node.angle !== 0 ? `rotate(${node.angle}deg)` : null
29375
29379
  ].filter((v) => v !== null).join(" ")
29376
29380
  }) : "";
29377
- let contents;
29378
- if (typeof node.text === "string") {
29379
- contents = node.text;
29380
- } else {
29381
- contents = node.text.map((chunk) => {
29382
- const tag3 = !isHeading && node.fontWeight >= 700 && node.fontStyle === "italic";
29383
- const serializedAttrs3 = serializeAttributes(
29384
- classes ? {
29385
- class: "tl-span"
29386
- } : {}
29387
- );
29388
- const serializedStyle2 = styles ? serializeStyles({
29389
- "--tl-left": `${chunk.left}px`,
29390
- "--tl-top": `${chunk.top}px`,
29391
- "--tl-transform": typeof chunk.scale === "number" && chunk.scale !== 1 ? `scaleX(${chunk.scale ?? 1})` : null
29392
- }) : "";
29393
- return `<${tag3}${serializedAttrs3 ? ` ${serializedAttrs3}` : ""}${serializedStyle2 ? ` style="${serializedStyle2}` : ""}">${chunk.text}</${tag3}>`;
29394
- }).join("");
29395
- }
29396
29381
  return `<${tag2}${serializedAttrs2 ? ` ${serializedAttrs2}` : ""}${serializedStyle ? ` style="${serializedStyle}"` : ""}>${contents}</${tag2}>`;
29397
29382
  }
29398
29383
  if (isAnchorNode(node)) {
@@ -29668,6 +29653,10 @@ var decorateStructTree = (node, rootContainer, graphics, annotations, parents =
29668
29653
  if (child.id !== contentId) {
29669
29654
  return;
29670
29655
  }
29656
+ if (child.attrs) {
29657
+ parent.attrs ?? (parent.attrs = {});
29658
+ Object.assign(parent.attrs, child.attrs);
29659
+ }
29671
29660
  previousParent.children = previousParent.children.filter(
29672
29661
  (c) => c !== child
29673
29662
  );
@@ -42378,7 +42367,7 @@ async function createTextLayerV2(page, {
42378
42367
  if (!glyph[0]) {
42379
42368
  return acc;
42380
42369
  }
42381
- if (!glyph[0].isSpace) {
42370
+ if (glyph[0].unicode.trim()) {
42382
42371
  acc[acc.length - 1].push(glyph);
42383
42372
  } else {
42384
42373
  acc.push([glyph]);
@@ -42389,21 +42378,50 @@ async function createTextLayerV2(page, {
42389
42378
  [[]]
42390
42379
  );
42391
42380
  const chunks = [];
42392
- for (const glyphs of glyphsBlocks) {
42381
+ let currentLeft = 0;
42382
+ for (let i = 0; i < glyphsBlocks.length; i++) {
42383
+ const glyphs = glyphsBlocks[i];
42393
42384
  if (glyphs.length === 0) {
42394
42385
  continue;
42395
42386
  }
42396
42387
  const text = glyphs.map((g) => g[0].unicode).join("");
42388
+ const textWidth = textFont.getAdvanceWidth(text, fontSize);
42389
+ if (!textWidth) {
42390
+ continue;
42391
+ }
42392
+ const isSpaceOnly = text.trim() === "";
42393
+ const previousBlock = glyphsBlocks.at(i - 1);
42394
+ const nextBlock = glyphsBlocks.at(i + 1);
42395
+ const previousGlyphs = previousBlock?.at(-1);
42396
+ const nextGlyphs = nextBlock?.[0];
42397
42397
  const firstGlyph = glyphs[0];
42398
42398
  const lastGlyph = glyphs[glyphs.length - 1];
42399
- const graphicWidth = lastGlyph[1] - firstGlyph[1] + lastGlyph[2];
42400
- const textWidth = textFont.getAdvanceWidth(text, fontSize);
42399
+ let margin = firstGlyph[1] / fontSize - currentLeft;
42400
+ let graphicWidth = lastGlyph[1] - firstGlyph[1] + lastGlyph[2];
42401
+ if (isSpaceOnly) {
42402
+ if (previousGlyphs) {
42403
+ margin = (previousGlyphs[1] + previousGlyphs[2]) / fontSize - currentLeft;
42404
+ }
42405
+ if (nextGlyphs) {
42406
+ if (previousGlyphs) {
42407
+ graphicWidth = nextGlyphs[1] - (previousGlyphs[1] + previousGlyphs[2]);
42408
+ } else {
42409
+ graphicWidth = nextGlyphs[1] - firstGlyph[1];
42410
+ }
42411
+ }
42412
+ } else if (previousGlyphs) {
42413
+ const space = (previousGlyphs[1] + previousGlyphs[2]) / fontSize - currentLeft;
42414
+ if (margin > 0 && margin - space <= 0.1) {
42415
+ graphicWidth += (margin - space) * fontSize;
42416
+ margin = space;
42417
+ }
42418
+ }
42401
42419
  chunks.push({
42402
42420
  text,
42403
- top: 0,
42404
- left: firstGlyph[1] * finalFontSize,
42405
- scale: textWidth > 0 ? graphicWidth / textWidth : 1
42421
+ margin: margin * finalFontSize,
42422
+ scale: graphicWidth / textWidth
42406
42423
  });
42424
+ currentLeft += margin + textWidth / fontSize;
42407
42425
  }
42408
42426
  markedContent.children.push({
42409
42427
  role: "text",
@@ -42567,11 +42585,12 @@ async function createTextLayerV2(page, {
42567
42585
  break;
42568
42586
  case OPS.beginMarkedContent:
42569
42587
  case OPS.beginMarkedContentProps: {
42570
- const [role, idRef] = args;
42588
+ const [role, idRef, props] = args;
42571
42589
  const id2 = normalizeMarkedContentId(idRef);
42572
42590
  markedContent.children.push({
42573
42591
  id: id2,
42574
42592
  role: role?.toString().toLowerCase() || "span",
42593
+ attrs: props ?? void 0,
42575
42594
  children: []
42576
42595
  });
42577
42596
  markedContent = markedContent.children.at(-1) || rootContainer;
@@ -42736,7 +42755,6 @@ export {
42736
42755
  isWidgetAnnotation,
42737
42756
  loadDefaultFonts,
42738
42757
  loadTextLayerFonts,
42739
- makeSerializable,
42740
42758
  noContextMenu,
42741
42759
  normalizeUnicode,
42742
42760
  parseRgbaColor,
@@ -78280,7 +78280,67 @@ AnnotationFactory.saveNewAnnotations = async (evaluator, task, annotations, imag
78280
78280
  return data;
78281
78281
  };
78282
78282
 
78283
- // src/lib/Font.ts
78283
+ // src/lib/Evaluator.ts
78284
+ var collectMarkedContentLanguage = (stream, xref) => {
78285
+ const map = /* @__PURE__ */ new Map();
78286
+ const pos = stream.pos;
78287
+ const preprocessor = new EvaluatorPreprocessor(stream, xref);
78288
+ const operation = {
78289
+ fn: null,
78290
+ args: null
78291
+ };
78292
+ while (preprocessor.read(operation)) {
78293
+ if (operation.fn === null) {
78294
+ operation.args = null;
78295
+ continue;
78296
+ }
78297
+ if (operation.fn !== OPS.beginMarkedContentProps) {
78298
+ operation.args = null;
78299
+ continue;
78300
+ }
78301
+ if (!operation.args?.length) {
78302
+ operation.args = null;
78303
+ continue;
78304
+ }
78305
+ const [, dict] = operation.args.slice();
78306
+ operation.args = null;
78307
+ if (!(dict instanceof Dict)) {
78308
+ continue;
78309
+ }
78310
+ const mcid = dict.get("MCID");
78311
+ if (typeof mcid !== "number") {
78312
+ continue;
78313
+ }
78314
+ const lang = dict.get("Lang");
78315
+ if (!lang || typeof lang !== "string") {
78316
+ continue;
78317
+ }
78318
+ map.set(mcid, lang);
78319
+ }
78320
+ stream.pos = pos;
78321
+ return map;
78322
+ };
78323
+ var getOperatorList = PartialEvaluator.prototype.getOperatorList;
78324
+ PartialEvaluator.prototype.getOperatorList = async function(options, ...args) {
78325
+ const { stream, operatorList } = options;
78326
+ const languages = collectMarkedContentLanguage(stream, this.xref);
78327
+ const addOp = operatorList.addOp;
78328
+ operatorList.addOp = function(fn, args2) {
78329
+ if (fn === OPS.beginMarkedContentProps) {
78330
+ const mcid = args2[1];
78331
+ if (typeof mcid === "number" && languages.has(mcid)) {
78332
+ const lang = languages.get(mcid);
78333
+ if (lang) {
78334
+ args2[2] = {
78335
+ lang: stringToPDFString(lang)
78336
+ };
78337
+ }
78338
+ }
78339
+ }
78340
+ return addOp.call(this, fn, args2);
78341
+ };
78342
+ await getOperatorList.call(this, options, ...args);
78343
+ };
78284
78344
  var translateFont = PartialEvaluator.prototype.translateFont;
78285
78345
  PartialEvaluator.prototype.translateFont = async function(options, ...args) {
78286
78346
  const { descriptor } = options;
@@ -20,14 +20,14 @@ export interface TextLayerAnchor extends TextLayerElement {
20
20
  role: 'a';
21
21
  href: string;
22
22
  }
23
+ export interface TextLayerChunk {
24
+ text: string;
25
+ margin: number;
26
+ scale?: number;
27
+ }
23
28
  export interface TextLayerText extends TextLayerNode {
24
29
  role: 'text';
25
- text: string | {
26
- text: string;
27
- top: number;
28
- left: number;
29
- scale?: number;
30
- }[];
30
+ text: /* v1 */ string | /* v2 */ TextLayerChunk[];
31
31
  top: number;
32
32
  left: number;
33
33
  fontSize: number;
@@ -16,13 +16,6 @@ export declare function canvasToData(canvas: HTMLCanvasElement | Canvas): Promis
16
16
  * @returns A promise that resolves to the data url.
17
17
  */
18
18
  export declare function toDataUrl(data: Uint8Array<ArrayBuffer>, type?: string): Promise<string>;
19
- /**
20
- * Ensure the object can be serialized and unserialized as JSON.
21
- * Internally it converts typed arrays to plain arrays.
22
- * @param object The object to serialize.
23
- * @returns The serialized object.
24
- */
25
- export declare function makeSerializable<T>(object: T): T;
26
19
  /**
27
20
  * Convert hash color to RGB array.
28
21
  * @param color The color in hex format (e.g., '#ff0000' or '#f00').
@@ -6,7 +6,7 @@ import {
6
6
  NodeWasmFactory,
7
7
  fetchData2 as fetchData,
8
8
  filtersRegistry
9
- } from "./chunk-O74KGUUC.js";
9
+ } from "./chunk-R66TN6BM.js";
10
10
  import "./chunk-T2JWSGAF.js";
11
11
  import "./chunk-ZFIGV5OT.js";
12
12
  export {
@@ -1635,26 +1635,6 @@ async function toDataUrl(data, type = "image/png") {
1635
1635
  }
1636
1636
  return `data:${type};base64,${Buffer.from(data).toString("base64")}`;
1637
1637
  }
1638
- function makeSerializable(object) {
1639
- if (typeof object !== "object" || object === null) {
1640
- return object;
1641
- }
1642
- if (object instanceof Int8Array || object instanceof Uint8Array || object instanceof Uint8ClampedArray || object instanceof Int16Array || object instanceof Uint16Array || object instanceof Int32Array || object instanceof Uint32Array || object instanceof Float32Array || object instanceof Float64Array) {
1643
- return makeSerializable(Array.from(object));
1644
- }
1645
- if (object instanceof BigInt64Array || object instanceof BigUint64Array) {
1646
- return makeSerializable(Array.from(object));
1647
- }
1648
- if (Array.isArray(object)) {
1649
- return object.map(makeSerializable);
1650
- }
1651
- return Object.fromEntries(
1652
- Object.entries(object).map(([key, value]) => [
1653
- key,
1654
- makeSerializable(value)
1655
- ])
1656
- );
1657
- }
1658
1638
  function colorToRgb(color) {
1659
1639
  if (color.startsWith("#")) {
1660
1640
  const hex = color.slice(1);
@@ -2079,7 +2059,6 @@ export {
2079
2059
  DOMFilterFactory,
2080
2060
  canvasToData,
2081
2061
  toDataUrl,
2082
- makeSerializable,
2083
2062
  colorToRgb,
2084
2063
  rgbToHex,
2085
2064
  parseRgbaColor,
@@ -46,7 +46,6 @@ import {
46
46
  isDataScheme,
47
47
  isPdfFile,
48
48
  isValidFetchUrl,
49
- makeSerializable,
50
49
  noContextMenu,
51
50
  parseRgbaColor,
52
51
  renderRichText,
@@ -54,7 +53,7 @@ import {
54
53
  setLayerDimensions,
55
54
  stopEvent,
56
55
  toDataUrl
57
- } from "./chunk-O74KGUUC.js";
56
+ } from "./chunk-R66TN6BM.js";
58
57
  import {
59
58
  AbortException,
60
59
  AnnotationBorderStyleType,
@@ -39834,7 +39833,7 @@ function destroySvgContext(ctx) {
39834
39833
 
39835
39834
  // src/lib/PDFPageProxy.ts
39836
39835
  async function loadNodeCanvasFactory() {
39837
- const { NodeCanvasFactory: NodeCanvasFactory2 } = await import("./NodeUtils-PVXQMWFJ.js");
39836
+ const { NodeCanvasFactory: NodeCanvasFactory2 } = await import("./NodeUtils-SRP3N4DX.js");
39838
39837
  return new NodeCanvasFactory2({});
39839
39838
  }
39840
39839
  var getAnnotations = PDFPageProxy.prototype.getAnnotations;
@@ -40001,7 +40000,7 @@ PDFPageProxy.prototype.getAnnotations = async function(params) {
40001
40000
  }
40002
40001
  }
40003
40002
  }
40004
- return makeSerializable(annotations);
40003
+ return annotations;
40005
40004
  };
40006
40005
 
40007
40006
  // src/lib/AnnotationData.ts
@@ -40237,6 +40236,30 @@ var renderTextLayer = (root, options = {}) => {
40237
40236
  }
40238
40237
  if (isTextNode(node)) {
40239
40238
  const isHeading = parents.find((p) => p.role.match(/^h[1-6]$/));
40239
+ let contents;
40240
+ if (typeof node.text === "string") {
40241
+ contents = node.text;
40242
+ } else {
40243
+ contents = node.text.map((chunk) => {
40244
+ if (!classes && !styles) {
40245
+ return chunk.text;
40246
+ }
40247
+ const tag3 = !isHeading && node.fontWeight >= 700 && node.fontStyle === "italic" ? "em" : "span";
40248
+ const serializedAttrs3 = serializeAttributes(
40249
+ classes ? {
40250
+ class: "tl-span"
40251
+ } : {}
40252
+ );
40253
+ const serializedStyle2 = styles ? serializeStyles({
40254
+ "--tl-margin": `${chunk.margin}px`,
40255
+ "--tl-transform": typeof chunk.scale === "number" && chunk.scale !== 1 ? `scaleX(${chunk.scale ?? 1})` : null
40256
+ }) : "";
40257
+ return `<${tag3}${serializedAttrs3 ? ` ${serializedAttrs3}` : ""}${serializedStyle2 ? ` style="${serializedStyle2}` : ""}">${chunk.text}</${tag3}>`;
40258
+ }).join("");
40259
+ }
40260
+ if (!classes && !styles) {
40261
+ return contents;
40262
+ }
40240
40263
  const tag2 = isHeading ? "span" : node.fontWeight >= 700 ? "strong" : node.fontStyle === "italic" ? "em" : "span";
40241
40264
  const serializedAttrs2 = serializeAttributes(
40242
40265
  classes ? {
@@ -40254,25 +40277,6 @@ var renderTextLayer = (root, options = {}) => {
40254
40277
  typeof node.angle === "number" && node.angle !== 0 ? `rotate(${node.angle}deg)` : null
40255
40278
  ].filter((v) => v !== null).join(" ")
40256
40279
  }) : "";
40257
- let contents;
40258
- if (typeof node.text === "string") {
40259
- contents = node.text;
40260
- } else {
40261
- contents = node.text.map((chunk) => {
40262
- const tag3 = !isHeading && node.fontWeight >= 700 && node.fontStyle === "italic";
40263
- const serializedAttrs3 = serializeAttributes(
40264
- classes ? {
40265
- class: "tl-span"
40266
- } : {}
40267
- );
40268
- const serializedStyle2 = styles ? serializeStyles({
40269
- "--tl-left": `${chunk.left}px`,
40270
- "--tl-top": `${chunk.top}px`,
40271
- "--tl-transform": typeof chunk.scale === "number" && chunk.scale !== 1 ? `scaleX(${chunk.scale ?? 1})` : null
40272
- }) : "";
40273
- return `<${tag3}${serializedAttrs3 ? ` ${serializedAttrs3}` : ""}${serializedStyle2 ? ` style="${serializedStyle2}` : ""}">${chunk.text}</${tag3}>`;
40274
- }).join("");
40275
- }
40276
40280
  return `<${tag2}${serializedAttrs2 ? ` ${serializedAttrs2}` : ""}${serializedStyle ? ` style="${serializedStyle}"` : ""}>${contents}</${tag2}>`;
40277
40281
  }
40278
40282
  if (isAnchorNode(node)) {
@@ -40548,6 +40552,10 @@ var decorateStructTree = (node, rootContainer, graphics, annotations, parents =
40548
40552
  if (child.id !== contentId) {
40549
40553
  return;
40550
40554
  }
40555
+ if (child.attrs) {
40556
+ parent.attrs ?? (parent.attrs = {});
40557
+ Object.assign(parent.attrs, child.attrs);
40558
+ }
40551
40559
  previousParent.children = previousParent.children.filter(
40552
40560
  (c) => c !== child
40553
40561
  );
@@ -41012,7 +41020,7 @@ async function createTextLayerV2(page, {
41012
41020
  if (!glyph[0]) {
41013
41021
  return acc;
41014
41022
  }
41015
- if (!glyph[0].isSpace) {
41023
+ if (glyph[0].unicode.trim()) {
41016
41024
  acc[acc.length - 1].push(glyph);
41017
41025
  } else {
41018
41026
  acc.push([glyph]);
@@ -41023,21 +41031,50 @@ async function createTextLayerV2(page, {
41023
41031
  [[]]
41024
41032
  );
41025
41033
  const chunks = [];
41026
- for (const glyphs of glyphsBlocks) {
41034
+ let currentLeft = 0;
41035
+ for (let i = 0; i < glyphsBlocks.length; i++) {
41036
+ const glyphs = glyphsBlocks[i];
41027
41037
  if (glyphs.length === 0) {
41028
41038
  continue;
41029
41039
  }
41030
41040
  const text = glyphs.map((g) => g[0].unicode).join("");
41041
+ const textWidth = textFont.getAdvanceWidth(text, fontSize);
41042
+ if (!textWidth) {
41043
+ continue;
41044
+ }
41045
+ const isSpaceOnly = text.trim() === "";
41046
+ const previousBlock = glyphsBlocks.at(i - 1);
41047
+ const nextBlock = glyphsBlocks.at(i + 1);
41048
+ const previousGlyphs = previousBlock?.at(-1);
41049
+ const nextGlyphs = nextBlock?.[0];
41031
41050
  const firstGlyph = glyphs[0];
41032
41051
  const lastGlyph = glyphs[glyphs.length - 1];
41033
- const graphicWidth = lastGlyph[1] - firstGlyph[1] + lastGlyph[2];
41034
- const textWidth = textFont.getAdvanceWidth(text, fontSize);
41052
+ let margin = firstGlyph[1] / fontSize - currentLeft;
41053
+ let graphicWidth = lastGlyph[1] - firstGlyph[1] + lastGlyph[2];
41054
+ if (isSpaceOnly) {
41055
+ if (previousGlyphs) {
41056
+ margin = (previousGlyphs[1] + previousGlyphs[2]) / fontSize - currentLeft;
41057
+ }
41058
+ if (nextGlyphs) {
41059
+ if (previousGlyphs) {
41060
+ graphicWidth = nextGlyphs[1] - (previousGlyphs[1] + previousGlyphs[2]);
41061
+ } else {
41062
+ graphicWidth = nextGlyphs[1] - firstGlyph[1];
41063
+ }
41064
+ }
41065
+ } else if (previousGlyphs) {
41066
+ const space = (previousGlyphs[1] + previousGlyphs[2]) / fontSize - currentLeft;
41067
+ if (margin > 0 && margin - space <= 0.1) {
41068
+ graphicWidth += (margin - space) * fontSize;
41069
+ margin = space;
41070
+ }
41071
+ }
41035
41072
  chunks.push({
41036
41073
  text,
41037
- top: 0,
41038
- left: firstGlyph[1] * finalFontSize,
41039
- scale: textWidth > 0 ? graphicWidth / textWidth : 1
41074
+ margin: margin * finalFontSize,
41075
+ scale: graphicWidth / textWidth
41040
41076
  });
41077
+ currentLeft += margin + textWidth / fontSize;
41041
41078
  }
41042
41079
  markedContent.children.push({
41043
41080
  role: "text",
@@ -41201,11 +41238,12 @@ async function createTextLayerV2(page, {
41201
41238
  break;
41202
41239
  case OPS.beginMarkedContent:
41203
41240
  case OPS.beginMarkedContentProps: {
41204
- const [role, idRef] = args;
41241
+ const [role, idRef, props] = args;
41205
41242
  const id2 = normalizeMarkedContentId(idRef);
41206
41243
  markedContent.children.push({
41207
41244
  id: id2,
41208
41245
  role: role?.toString().toLowerCase() || "span",
41246
+ attrs: props ?? void 0,
41209
41247
  children: []
41210
41248
  });
41211
41249
  markedContent = markedContent.children.at(-1) || rootContainer;
@@ -41370,7 +41408,6 @@ export {
41370
41408
  isWidgetAnnotation,
41371
41409
  loadDefaultFonts2 as loadDefaultFonts,
41372
41410
  loadTextLayerFonts,
41373
- makeSerializable,
41374
41411
  noContextMenu,
41375
41412
  normalizeUnicode,
41376
41413
  parseRgbaColor,
@@ -78282,7 +78282,67 @@ AnnotationFactory.saveNewAnnotations = async (evaluator, task, annotations, imag
78282
78282
  return data;
78283
78283
  };
78284
78284
 
78285
- // src/lib/Font.ts
78285
+ // src/lib/Evaluator.ts
78286
+ var collectMarkedContentLanguage = (stream, xref) => {
78287
+ const map = /* @__PURE__ */ new Map();
78288
+ const pos = stream.pos;
78289
+ const preprocessor = new EvaluatorPreprocessor(stream, xref);
78290
+ const operation = {
78291
+ fn: null,
78292
+ args: null
78293
+ };
78294
+ while (preprocessor.read(operation)) {
78295
+ if (operation.fn === null) {
78296
+ operation.args = null;
78297
+ continue;
78298
+ }
78299
+ if (operation.fn !== OPS.beginMarkedContentProps) {
78300
+ operation.args = null;
78301
+ continue;
78302
+ }
78303
+ if (!operation.args?.length) {
78304
+ operation.args = null;
78305
+ continue;
78306
+ }
78307
+ const [, dict] = operation.args.slice();
78308
+ operation.args = null;
78309
+ if (!(dict instanceof Dict)) {
78310
+ continue;
78311
+ }
78312
+ const mcid = dict.get("MCID");
78313
+ if (typeof mcid !== "number") {
78314
+ continue;
78315
+ }
78316
+ const lang = dict.get("Lang");
78317
+ if (!lang || typeof lang !== "string") {
78318
+ continue;
78319
+ }
78320
+ map.set(mcid, lang);
78321
+ }
78322
+ stream.pos = pos;
78323
+ return map;
78324
+ };
78325
+ var getOperatorList = PartialEvaluator.prototype.getOperatorList;
78326
+ PartialEvaluator.prototype.getOperatorList = async function(options, ...args) {
78327
+ const { stream, operatorList } = options;
78328
+ const languages = collectMarkedContentLanguage(stream, this.xref);
78329
+ const addOp = operatorList.addOp;
78330
+ operatorList.addOp = function(fn, args2) {
78331
+ if (fn === OPS.beginMarkedContentProps) {
78332
+ const mcid = args2[1];
78333
+ if (typeof mcid === "number" && languages.has(mcid)) {
78334
+ const lang = languages.get(mcid);
78335
+ if (lang) {
78336
+ args2[2] = {
78337
+ lang: stringToPDFString(lang)
78338
+ };
78339
+ }
78340
+ }
78341
+ }
78342
+ return addOp.call(this, fn, args2);
78343
+ };
78344
+ await getOperatorList.call(this, options, ...args);
78345
+ };
78286
78346
  var translateFont = PartialEvaluator.prototype.translateFont;
78287
78347
  PartialEvaluator.prototype.translateFont = async function(options, ...args) {
78288
78348
  const { descriptor } = options;
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@chialab/pdfjs-lib",
3
3
  "description": "A custom Mozilla's PDF.js build with better Node support and extras.",
4
- "version": "1.0.0-alpha.30",
4
+ "version": "1.0.0-alpha.32",
5
5
  "type": "module",
6
6
  "author": "Chialab <dev@chialab.it>",
7
7
  "license": "MIT",