@chialab/pdfjs-lib 1.0.0-alpha.43 → 1.0.0-alpha.45

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -27916,6 +27916,9 @@ function isCanvas(img) {
27916
27916
  function isImage(img) {
27917
27917
  return "src" in img;
27918
27918
  }
27919
+ function isVideoFrame(img) {
27920
+ return "timestamp" in img;
27921
+ }
27919
27922
  var SvgPattern = class {
27920
27923
  constructor(src, width, height) {
27921
27924
  __publicField(this, "src");
@@ -28449,9 +28452,9 @@ var SvgCanvasContext = class {
28449
28452
  if (args.length === 2) {
28450
28453
  dx = args[0];
28451
28454
  dy = args[1];
28452
- if (isCanvas(image) || isImage(image)) {
28453
- sw = image.width;
28454
- sh = image.height;
28455
+ if (isCanvas(image) || isImage(image) || isVideoFrame(image)) {
28456
+ sw = image.width ?? image.displayWidth;
28457
+ sh = image.height ?? image.displayHeight;
28455
28458
  dw = sw;
28456
28459
  dh = sh;
28457
28460
  } else {
@@ -28462,9 +28465,9 @@ var SvgCanvasContext = class {
28462
28465
  dy = args[1];
28463
28466
  dw = args[2];
28464
28467
  dh = args[3];
28465
- if (isCanvas(image) || isImage(image)) {
28466
- sw = image.width;
28467
- sh = image.height;
28468
+ if (isCanvas(image) || isImage(image) || isVideoFrame(image)) {
28469
+ sw = image.width ?? image.displayWidth;
28470
+ sh = image.height ?? image.displayHeight;
28468
28471
  }
28469
28472
  } else if (args.length === 8) {
28470
28473
  sw = args[2];
@@ -28477,7 +28480,14 @@ var SvgCanvasContext = class {
28477
28480
  throw new Error("Invalid number of arguments");
28478
28481
  }
28479
28482
  const matrix = this._transformMatrix.translate(dx, dy);
28480
- const href = isCanvas(image) ? image.toDataURL() : isImage(image) ? image.src : "";
28483
+ const canvas = createCanvas(dw, dh);
28484
+ const ctx = canvas.getContext("2d");
28485
+ if (sw != null && sh != null) {
28486
+ ctx.drawImage(image, 0, 0, sw, sh, 0, 0, dw, dh);
28487
+ } else {
28488
+ ctx.drawImage(image, 0, 0, dw, dh);
28489
+ }
28490
+ const href = canvas.toDataURL();
28481
28491
  const svgImage = {
28482
28492
  tag: "image",
28483
28493
  attrs: {
@@ -29277,6 +29287,25 @@ var flattenNodes = (node) => {
29277
29287
  var isNameObject = (obj) => {
29278
29288
  return obj !== null && typeof obj === "object" && "name" in obj;
29279
29289
  };
29290
+ var isLigature = (char) => {
29291
+ if (!char || [...char].length !== 1) {
29292
+ return false;
29293
+ }
29294
+ const code = char.codePointAt(0);
29295
+ if (code == null) {
29296
+ return false;
29297
+ }
29298
+ return (
29299
+ // Alphabetic Presentation Forms
29300
+ code >= 64256 && code <= 64335 || // Arabic Presentation Forms-A
29301
+ code >= 64336 && code <= 65023 || // Arabic Presentation Forms-B
29302
+ code >= 65136 && code <= 65279 || // Latin ligatures outside FB00 block
29303
+ code === 338 || // Œ
29304
+ code === 339 || // œ
29305
+ code === 306 || // IJ
29306
+ code === 307
29307
+ );
29308
+ };
29280
29309
  var renderTextLayer = (root, options = {}) => {
29281
29310
  const { classes = false, styles = false, font } = options;
29282
29311
  const serializeAttributes = (attrs) => {
@@ -29311,7 +29340,7 @@ var renderTextLayer = (root, options = {}) => {
29311
29340
  "--tl-margin": `${margin}px`,
29312
29341
  "--tl-transform": scale !== 1 ? `scaleX(${scale})` : null
29313
29342
  }) : "";
29314
- return `<${tag3}${serializedAttrs3 ? ` ${serializedAttrs3}` : ""}${serializedStyle2 ? ` style="${serializedStyle2}` : ""}">${chunk.text}</${tag3}>`;
29343
+ return `<${tag3}${serializedAttrs3 ? ` ${serializedAttrs3}` : ""}${serializedStyle2 ? ` style="${serializedStyle2}"` : ""}>${chunk.text}</${tag3}>`;
29315
29344
  }).join("");
29316
29345
  if (!classes && !styles) {
29317
29346
  return contents;
@@ -29896,6 +29925,44 @@ async function createTextLayer(page, {
29896
29925
  });
29897
29926
  }
29898
29927
  };
29928
+ const addGlyph = (textItem, char) => {
29929
+ const width = (char.width ?? 0) * fontSize / 1e3 || 0;
29930
+ const charUnicode = char.unicode ?? "";
29931
+ if (isLigature(charUnicode)) {
29932
+ const normalized = charUnicode.normalize("NFKD");
29933
+ const expansion = Array.from(normalized);
29934
+ const widthPerChar = width / expansion.length;
29935
+ for (const expandedChar of expansion) {
29936
+ const syntheticGlyph = {
29937
+ unicode: expandedChar,
29938
+ isSpace: false
29939
+ };
29940
+ textItem.glyphs.push([syntheticGlyph, textPosition, widthPerChar]);
29941
+ textPosition += widthPerChar + charSpacing * hScale;
29942
+ }
29943
+ return expansion.length;
29944
+ }
29945
+ textItem.glyphs.push([char, textPosition, width]);
29946
+ textPosition += width + charSpacing * hScale;
29947
+ if (char.isSpace) {
29948
+ textPosition += wordSpacing * hScale;
29949
+ }
29950
+ return 1;
29951
+ };
29952
+ const processChars = (chars, textItem) => {
29953
+ let skip = 0;
29954
+ for (const char of chars) {
29955
+ if (typeof char === "number") {
29956
+ if (!skip) {
29957
+ textPosition += -char * fontSize * hScale / 1e3;
29958
+ }
29959
+ } else if (skip) {
29960
+ skip--;
29961
+ } else {
29962
+ skip = addGlyph(textItem, char) - 1;
29963
+ }
29964
+ }
29965
+ };
29899
29966
  for (let i = 0; i < operatorsList.fnArray.length; i++) {
29900
29967
  const fnId = operatorsList.fnArray[i];
29901
29968
  const args = operatorsList.argsArray[i];
@@ -29917,19 +29984,7 @@ async function createTextLayer(page, {
29917
29984
  currentTextItem ?? (currentTextItem = createTextItem(fontFamily));
29918
29985
  createParagraphIfNeeded(lastTextItem, currentTextItem);
29919
29986
  const [chars] = args;
29920
- for (const char of chars) {
29921
- if (typeof char === "number") {
29922
- const tx = -char * fontSize * hScale / 1e3;
29923
- textPosition += tx;
29924
- } else if (char.unicode) {
29925
- const width = char.width * fontSize / 1e3 || 0;
29926
- currentTextItem.glyphs.push([char, textPosition, width]);
29927
- textPosition += width + charSpacing * hScale;
29928
- if (char.isSpace) {
29929
- textPosition += wordSpacing * hScale;
29930
- }
29931
- }
29932
- }
29987
+ processChars(chars, currentTextItem);
29933
29988
  break;
29934
29989
  }
29935
29990
  case OPS.setTextRise:
@@ -29977,23 +30032,9 @@ async function createTextLayer(page, {
29977
30032
  const [text] = args;
29978
30033
  if (typeof text === "string") {
29979
30034
  const glyphs = currentTextItem.font.charsToGlyphs(text);
29980
- for (const char of glyphs) {
29981
- const width = char.width * fontSize / 1e3 || 0;
29982
- currentTextItem.glyphs.push([char, textPosition, width]);
29983
- textPosition += width + charSpacing * hScale;
29984
- if (char.isSpace) {
29985
- textPosition += wordSpacing * hScale;
29986
- }
29987
- }
30035
+ processChars(glyphs, currentTextItem);
29988
30036
  } else if (Array.isArray(text)) {
29989
- for (const char of text) {
29990
- const width = char.width * fontSize / 1e3 || 0;
29991
- currentTextItem.glyphs.push([char, textPosition, width]);
29992
- textPosition += width + charSpacing * hScale;
29993
- if (char.isSpace) {
29994
- textPosition += wordSpacing * hScale;
29995
- }
29996
- }
30037
+ processChars(text, currentTextItem);
29997
30038
  }
29998
30039
  break;
29999
30040
  }
@@ -30009,23 +30050,9 @@ async function createTextLayer(page, {
30009
30050
  lineMatrix = [...textMatrix];
30010
30051
  if (typeof text === "string") {
30011
30052
  const glyphs = currentTextItem.font.charsToGlyphs(text);
30012
- for (const char of glyphs) {
30013
- const width = char.width * fontSize / 1e3 || 0;
30014
- currentTextItem.glyphs.push([char, textPosition, width]);
30015
- textPosition += width + charSpacing * hScale;
30016
- if (char.isSpace) {
30017
- textPosition += wordSpacing * hScale;
30018
- }
30019
- }
30053
+ processChars(glyphs, currentTextItem);
30020
30054
  } else if (Array.isArray(text)) {
30021
- for (const char of text) {
30022
- const width = char.width * fontSize / 1e3 || 0;
30023
- currentTextItem.glyphs.push([char, textPosition, width]);
30024
- textPosition += width + charSpacing * hScale;
30025
- if (char.isSpace) {
30026
- textPosition += wordSpacing * hScale;
30027
- }
30028
- }
30055
+ processChars(text, currentTextItem);
30029
30056
  }
30030
30057
  break;
30031
30058
  }
package/dist/index.d.ts CHANGED
@@ -10,7 +10,7 @@ export * from './lib/StandardFontDataFactory';
10
10
  export * from './lib/AnnotationData';
11
11
  export * from './lib/CanvasGraphics';
12
12
  export * from './lib/SvgCanvasContext';
13
- export { type TextLayerRoot, type TextLayerNode, type TextLayerAnchor, type TextLayerFigure, type TextLayerElement, isTextNode, isAnchorNode, isFigureNode, isElementNode, measure, createTextLayer, renderTextLayer, } from './lib/TextLayer';
13
+ export { type TextLayerRoot, type TextLayerNode, type TextLayerAnchor, type TextLayerFigure, type TextLayerElement, type TextLayerText, isTextNode, isAnchorNode, isFigureNode, isElementNode, measure, createTextLayer, renderTextLayer, } from './lib/TextLayer';
14
14
  export declare const textLayerUtils: {
15
15
  findNode: (root: import(".").TextLayerElement, callback: (node: import(".").TextLayerNode, parent: import(".").TextLayerElement) => boolean | null | undefined) => import(".").TextLayerNode | null;
16
16
  findNodes: (node: import(".").TextLayerElement, callback: (node: import(".").TextLayerNode, parent: import(".").TextLayerElement) => boolean | null | undefined) => import(".").TextLayerNode[];
@@ -52,6 +52,8 @@ export interface TextLayerFigure extends TextLayerNode {
52
52
  export interface Glyph {
53
53
  unicode: string;
54
54
  isSpace: boolean;
55
+ fontChar?: string;
56
+ width?: number;
55
57
  }
56
58
  export type TextItem = {
57
59
  glyphs: [Glyph, number, number][];
@@ -78,6 +80,11 @@ export declare const renderTextLayer: (root: TextLayerRoot, options?: {
78
80
  }) => string;
79
81
  export declare const decorateStructTree: (node: StructTreeNodeWithAttrs | StructTreeContent, rootContainer: TextLayerRoot, graphics?: SvgRoot, annotations?: AnnotationData[], parents?: TextLayerElement[]) => void;
80
82
  export declare const measure: (text: string, font: string, fontSize: number, lang?: string) => TextMetrics;
83
+ /**
84
+ * Expands ligatures using Unicode normalization (NFKD - Compatibility Decomposition).
85
+ * This automatically handles Latin ligatures (ff fi fl ffi ffl ſt st), IJ ligatures (IJ ij),
86
+ * and AE/OE letters (Æ æ Œ œ) by decomposing them into their component characters.
87
+ */
81
88
  export declare function createTextLayer(page: PDFPageProxy, { graphics, annotations: _annotations, }?: {
82
89
  graphics?: SvgRoot;
83
90
  annotations?: AnnotationData[];
@@ -26517,6 +26517,9 @@ function isCanvas(img) {
26517
26517
  function isImage(img) {
26518
26518
  return "src" in img;
26519
26519
  }
26520
+ function isVideoFrame(img) {
26521
+ return "timestamp" in img;
26522
+ }
26520
26523
  var SvgPattern = class {
26521
26524
  constructor(src, width, height) {
26522
26525
  __publicField(this, "src");
@@ -27050,9 +27053,9 @@ var SvgCanvasContext = class {
27050
27053
  if (args.length === 2) {
27051
27054
  dx = args[0];
27052
27055
  dy = args[1];
27053
- if (isCanvas(image) || isImage(image)) {
27054
- sw = image.width;
27055
- sh = image.height;
27056
+ if (isCanvas(image) || isImage(image) || isVideoFrame(image)) {
27057
+ sw = image.width ?? image.displayWidth;
27058
+ sh = image.height ?? image.displayHeight;
27056
27059
  dw = sw;
27057
27060
  dh = sh;
27058
27061
  } else {
@@ -27063,9 +27066,9 @@ var SvgCanvasContext = class {
27063
27066
  dy = args[1];
27064
27067
  dw = args[2];
27065
27068
  dh = args[3];
27066
- if (isCanvas(image) || isImage(image)) {
27067
- sw = image.width;
27068
- sh = image.height;
27069
+ if (isCanvas(image) || isImage(image) || isVideoFrame(image)) {
27070
+ sw = image.width ?? image.displayWidth;
27071
+ sh = image.height ?? image.displayHeight;
27069
27072
  }
27070
27073
  } else if (args.length === 8) {
27071
27074
  sw = args[2];
@@ -27078,7 +27081,14 @@ var SvgCanvasContext = class {
27078
27081
  throw new Error("Invalid number of arguments");
27079
27082
  }
27080
27083
  const matrix = this._transformMatrix.translate(dx, dy);
27081
- const href = isCanvas(image) ? image.toDataURL() : isImage(image) ? image.src : "";
27084
+ const canvas = createCanvas(dw, dh);
27085
+ const ctx = canvas.getContext("2d");
27086
+ if (sw != null && sh != null) {
27087
+ ctx.drawImage(image, 0, 0, sw, sh, 0, 0, dw, dh);
27088
+ } else {
27089
+ ctx.drawImage(image, 0, 0, dw, dh);
27090
+ }
27091
+ const href = canvas.toDataURL();
27082
27092
  const svgImage = {
27083
27093
  tag: "image",
27084
27094
  attrs: {
@@ -27878,6 +27888,25 @@ var flattenNodes = (node) => {
27878
27888
  var isNameObject = (obj) => {
27879
27889
  return obj !== null && typeof obj === "object" && "name" in obj;
27880
27890
  };
27891
+ var isLigature = (char) => {
27892
+ if (!char || [...char].length !== 1) {
27893
+ return false;
27894
+ }
27895
+ const code = char.codePointAt(0);
27896
+ if (code == null) {
27897
+ return false;
27898
+ }
27899
+ return (
27900
+ // Alphabetic Presentation Forms
27901
+ code >= 64256 && code <= 64335 || // Arabic Presentation Forms-A
27902
+ code >= 64336 && code <= 65023 || // Arabic Presentation Forms-B
27903
+ code >= 65136 && code <= 65279 || // Latin ligatures outside FB00 block
27904
+ code === 338 || // Œ
27905
+ code === 339 || // œ
27906
+ code === 306 || // IJ
27907
+ code === 307
27908
+ );
27909
+ };
27881
27910
  var renderTextLayer = (root, options = {}) => {
27882
27911
  const { classes = false, styles = false, font } = options;
27883
27912
  const serializeAttributes = (attrs) => {
@@ -27912,7 +27941,7 @@ var renderTextLayer = (root, options = {}) => {
27912
27941
  "--tl-margin": `${margin}px`,
27913
27942
  "--tl-transform": scale !== 1 ? `scaleX(${scale})` : null
27914
27943
  }) : "";
27915
- return `<${tag3}${serializedAttrs3 ? ` ${serializedAttrs3}` : ""}${serializedStyle2 ? ` style="${serializedStyle2}` : ""}">${chunk.text}</${tag3}>`;
27944
+ return `<${tag3}${serializedAttrs3 ? ` ${serializedAttrs3}` : ""}${serializedStyle2 ? ` style="${serializedStyle2}"` : ""}>${chunk.text}</${tag3}>`;
27916
27945
  }).join("");
27917
27946
  if (!classes && !styles) {
27918
27947
  return contents;
@@ -28497,6 +28526,44 @@ async function createTextLayer(page, {
28497
28526
  });
28498
28527
  }
28499
28528
  };
28529
+ const addGlyph = (textItem, char) => {
28530
+ const width = (char.width ?? 0) * fontSize / 1e3 || 0;
28531
+ const charUnicode = char.unicode ?? "";
28532
+ if (isLigature(charUnicode)) {
28533
+ const normalized = charUnicode.normalize("NFKD");
28534
+ const expansion = Array.from(normalized);
28535
+ const widthPerChar = width / expansion.length;
28536
+ for (const expandedChar of expansion) {
28537
+ const syntheticGlyph = {
28538
+ unicode: expandedChar,
28539
+ isSpace: false
28540
+ };
28541
+ textItem.glyphs.push([syntheticGlyph, textPosition, widthPerChar]);
28542
+ textPosition += widthPerChar + charSpacing * hScale;
28543
+ }
28544
+ return expansion.length;
28545
+ }
28546
+ textItem.glyphs.push([char, textPosition, width]);
28547
+ textPosition += width + charSpacing * hScale;
28548
+ if (char.isSpace) {
28549
+ textPosition += wordSpacing * hScale;
28550
+ }
28551
+ return 1;
28552
+ };
28553
+ const processChars = (chars, textItem) => {
28554
+ let skip = 0;
28555
+ for (const char of chars) {
28556
+ if (typeof char === "number") {
28557
+ if (!skip) {
28558
+ textPosition += -char * fontSize * hScale / 1e3;
28559
+ }
28560
+ } else if (skip) {
28561
+ skip--;
28562
+ } else {
28563
+ skip = addGlyph(textItem, char) - 1;
28564
+ }
28565
+ }
28566
+ };
28500
28567
  for (let i = 0; i < operatorsList.fnArray.length; i++) {
28501
28568
  const fnId = operatorsList.fnArray[i];
28502
28569
  const args = operatorsList.argsArray[i];
@@ -28518,19 +28585,7 @@ async function createTextLayer(page, {
28518
28585
  currentTextItem ?? (currentTextItem = createTextItem(fontFamily));
28519
28586
  createParagraphIfNeeded(lastTextItem, currentTextItem);
28520
28587
  const [chars] = args;
28521
- for (const char of chars) {
28522
- if (typeof char === "number") {
28523
- const tx = -char * fontSize * hScale / 1e3;
28524
- textPosition += tx;
28525
- } else if (char.unicode) {
28526
- const width = char.width * fontSize / 1e3 || 0;
28527
- currentTextItem.glyphs.push([char, textPosition, width]);
28528
- textPosition += width + charSpacing * hScale;
28529
- if (char.isSpace) {
28530
- textPosition += wordSpacing * hScale;
28531
- }
28532
- }
28533
- }
28588
+ processChars(chars, currentTextItem);
28534
28589
  break;
28535
28590
  }
28536
28591
  case OPS.setTextRise:
@@ -28578,23 +28633,9 @@ async function createTextLayer(page, {
28578
28633
  const [text] = args;
28579
28634
  if (typeof text === "string") {
28580
28635
  const glyphs = currentTextItem.font.charsToGlyphs(text);
28581
- for (const char of glyphs) {
28582
- const width = char.width * fontSize / 1e3 || 0;
28583
- currentTextItem.glyphs.push([char, textPosition, width]);
28584
- textPosition += width + charSpacing * hScale;
28585
- if (char.isSpace) {
28586
- textPosition += wordSpacing * hScale;
28587
- }
28588
- }
28636
+ processChars(glyphs, currentTextItem);
28589
28637
  } else if (Array.isArray(text)) {
28590
- for (const char of text) {
28591
- const width = char.width * fontSize / 1e3 || 0;
28592
- currentTextItem.glyphs.push([char, textPosition, width]);
28593
- textPosition += width + charSpacing * hScale;
28594
- if (char.isSpace) {
28595
- textPosition += wordSpacing * hScale;
28596
- }
28597
- }
28638
+ processChars(text, currentTextItem);
28598
28639
  }
28599
28640
  break;
28600
28641
  }
@@ -28610,23 +28651,9 @@ async function createTextLayer(page, {
28610
28651
  lineMatrix = [...textMatrix];
28611
28652
  if (typeof text === "string") {
28612
28653
  const glyphs = currentTextItem.font.charsToGlyphs(text);
28613
- for (const char of glyphs) {
28614
- const width = char.width * fontSize / 1e3 || 0;
28615
- currentTextItem.glyphs.push([char, textPosition, width]);
28616
- textPosition += width + charSpacing * hScale;
28617
- if (char.isSpace) {
28618
- textPosition += wordSpacing * hScale;
28619
- }
28620
- }
28654
+ processChars(glyphs, currentTextItem);
28621
28655
  } else if (Array.isArray(text)) {
28622
- for (const char of text) {
28623
- const width = char.width * fontSize / 1e3 || 0;
28624
- currentTextItem.glyphs.push([char, textPosition, width]);
28625
- textPosition += width + charSpacing * hScale;
28626
- if (char.isSpace) {
28627
- textPosition += wordSpacing * hScale;
28628
- }
28629
- }
28656
+ processChars(text, currentTextItem);
28630
28657
  }
28631
28658
  break;
28632
28659
  }
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@chialab/pdfjs-lib",
3
3
  "description": "A custom Mozilla's PDF.js build with better Node support and extras.",
4
- "version": "1.0.0-alpha.43",
4
+ "version": "1.0.0-alpha.45",
5
5
  "type": "module",
6
6
  "author": "Chialab <dev@chialab.it>",
7
7
  "license": "MIT",