@chialab/pdfjs-lib 1.0.0-alpha.44 → 1.0.0-alpha.46

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -29287,6 +29287,25 @@ var flattenNodes = (node) => {
29287
29287
  var isNameObject = (obj) => {
29288
29288
  return obj !== null && typeof obj === "object" && "name" in obj;
29289
29289
  };
29290
+ var isLigature = (char) => {
29291
+ if (!char || [...char].length !== 1) {
29292
+ return false;
29293
+ }
29294
+ const code = char.codePointAt(0);
29295
+ if (code == null) {
29296
+ return false;
29297
+ }
29298
+ return (
29299
+ // Alphabetic Presentation Forms
29300
+ code >= 64256 && code <= 64335 || // Arabic Presentation Forms-A
29301
+ code >= 64336 && code <= 65023 || // Arabic Presentation Forms-B
29302
+ code >= 65136 && code <= 65279 || // Latin ligatures outside FB00 block
29303
+ code === 338 || // Œ
29304
+ code === 339 || // œ
29305
+ code === 306 || // IJ
29306
+ code === 307
29307
+ );
29308
+ };
29290
29309
  var renderTextLayer = (root, options = {}) => {
29291
29310
  const { classes = false, styles = false, font } = options;
29292
29311
  const serializeAttributes = (attrs) => {
@@ -29321,7 +29340,7 @@ var renderTextLayer = (root, options = {}) => {
29321
29340
  "--tl-margin": `${margin}px`,
29322
29341
  "--tl-transform": scale !== 1 ? `scaleX(${scale})` : null
29323
29342
  }) : "";
29324
- return `<${tag3}${serializedAttrs3 ? ` ${serializedAttrs3}` : ""}${serializedStyle2 ? ` style="${serializedStyle2}` : ""}">${chunk.text}</${tag3}>`;
29343
+ return `<${tag3}${serializedAttrs3 ? ` ${serializedAttrs3}` : ""}${serializedStyle2 ? ` style="${serializedStyle2}"` : ""}>${chunk.text}</${tag3}>`;
29325
29344
  }).join("");
29326
29345
  if (!classes && !styles) {
29327
29346
  return contents;
@@ -29830,42 +29849,16 @@ async function createTextLayer(page, {
29830
29849
  }
29831
29850
  if (oldTextItem?.glyphs.length) {
29832
29851
  if (newTextItem) {
29833
- const text = oldTextItem.glyphs.map((g) => g[0].unicode).join("") || "";
29834
- const dir = bidi(text).dir;
29835
- const firstGlyph = oldTextItem.glyphs[0];
29836
- const lastGlyph = oldTextItem.glyphs.at(-1);
29837
- const scale = oldTextItem.scaleX * oldTextItem.fontSize;
29838
29852
  const top = oldTextItem.top;
29839
29853
  const bottom = oldTextItem.top + oldTextItem.fontSize;
29840
- const left = oldTextItem.left;
29841
- const right = oldTextItem.left + (lastGlyph[1] + lastGlyph[2]) * scale;
29842
29854
  const nextTop = newTextItem.top;
29843
29855
  const nextBottom = newTextItem.top + newTextItem.fontSize;
29844
- switch (dir) {
29845
- case "ltr": {
29846
- if (newTextItem.left - right < lastGlyph[2] * scale) {
29847
- if (nextTop > top && nextTop < bottom || nextBottom > top && nextBottom < bottom) {
29848
- return;
29849
- }
29850
- if (checkNewLine(oldTextItem, newTextItem, true)) {
29851
- appendNewLineAsNeeded();
29852
- return;
29853
- }
29854
- }
29855
- break;
29856
- }
29857
- case "rtl": {
29858
- if (left - newTextItem.left < firstGlyph[2] * scale) {
29859
- if (nextTop > top && nextTop < bottom || nextBottom > top && nextBottom < bottom) {
29860
- return;
29861
- }
29862
- if (checkNewLine(oldTextItem, newTextItem, true)) {
29863
- appendNewLineAsNeeded();
29864
- return;
29865
- }
29866
- }
29867
- break;
29868
- }
29856
+ if (nextTop > top && nextTop < bottom || nextBottom > top && nextBottom < bottom || nextTop <= top && nextBottom >= bottom) {
29857
+ return;
29858
+ }
29859
+ if (checkNewLine(oldTextItem, newTextItem, true)) {
29860
+ appendNewLineAsNeeded();
29861
+ return;
29869
29862
  }
29870
29863
  }
29871
29864
  closeMarkedContent();
@@ -29906,6 +29899,44 @@ async function createTextLayer(page, {
29906
29899
  });
29907
29900
  }
29908
29901
  };
29902
+ const addGlyph = (textItem, char) => {
29903
+ const width = (char.width ?? 0) * fontSize / 1e3 || 0;
29904
+ const charUnicode = char.unicode ?? "";
29905
+ if (isLigature(charUnicode)) {
29906
+ const normalized = charUnicode.normalize("NFKD");
29907
+ const expansion = Array.from(normalized);
29908
+ const widthPerChar = width / expansion.length;
29909
+ for (const expandedChar of expansion) {
29910
+ const syntheticGlyph = {
29911
+ unicode: expandedChar,
29912
+ isSpace: false
29913
+ };
29914
+ textItem.glyphs.push([syntheticGlyph, textPosition, widthPerChar]);
29915
+ textPosition += widthPerChar + charSpacing * hScale;
29916
+ }
29917
+ return expansion.length;
29918
+ }
29919
+ textItem.glyphs.push([char, textPosition, width]);
29920
+ textPosition += width + charSpacing * hScale;
29921
+ if (char.isSpace) {
29922
+ textPosition += wordSpacing * hScale;
29923
+ }
29924
+ return 1;
29925
+ };
29926
+ const processChars = (chars, textItem) => {
29927
+ let skip = 0;
29928
+ for (const char of chars) {
29929
+ if (typeof char === "number") {
29930
+ if (!skip) {
29931
+ textPosition += -char * fontSize * hScale / 1e3;
29932
+ }
29933
+ } else if (skip) {
29934
+ skip--;
29935
+ } else {
29936
+ skip = addGlyph(textItem, char) - 1;
29937
+ }
29938
+ }
29939
+ };
29909
29940
  for (let i = 0; i < operatorsList.fnArray.length; i++) {
29910
29941
  const fnId = operatorsList.fnArray[i];
29911
29942
  const args = operatorsList.argsArray[i];
@@ -29927,19 +29958,7 @@ async function createTextLayer(page, {
29927
29958
  currentTextItem ?? (currentTextItem = createTextItem(fontFamily));
29928
29959
  createParagraphIfNeeded(lastTextItem, currentTextItem);
29929
29960
  const [chars] = args;
29930
- for (const char of chars) {
29931
- if (typeof char === "number") {
29932
- const tx = -char * fontSize * hScale / 1e3;
29933
- textPosition += tx;
29934
- } else if (char.unicode) {
29935
- const width = char.width * fontSize / 1e3 || 0;
29936
- currentTextItem.glyphs.push([char, textPosition, width]);
29937
- textPosition += width + charSpacing * hScale;
29938
- if (char.isSpace) {
29939
- textPosition += wordSpacing * hScale;
29940
- }
29941
- }
29942
- }
29961
+ processChars(chars, currentTextItem);
29943
29962
  break;
29944
29963
  }
29945
29964
  case OPS.setTextRise:
@@ -29987,23 +30006,9 @@ async function createTextLayer(page, {
29987
30006
  const [text] = args;
29988
30007
  if (typeof text === "string") {
29989
30008
  const glyphs = currentTextItem.font.charsToGlyphs(text);
29990
- for (const char of glyphs) {
29991
- const width = char.width * fontSize / 1e3 || 0;
29992
- currentTextItem.glyphs.push([char, textPosition, width]);
29993
- textPosition += width + charSpacing * hScale;
29994
- if (char.isSpace) {
29995
- textPosition += wordSpacing * hScale;
29996
- }
29997
- }
30009
+ processChars(glyphs, currentTextItem);
29998
30010
  } else if (Array.isArray(text)) {
29999
- for (const char of text) {
30000
- const width = char.width * fontSize / 1e3 || 0;
30001
- currentTextItem.glyphs.push([char, textPosition, width]);
30002
- textPosition += width + charSpacing * hScale;
30003
- if (char.isSpace) {
30004
- textPosition += wordSpacing * hScale;
30005
- }
30006
- }
30011
+ processChars(text, currentTextItem);
30007
30012
  }
30008
30013
  break;
30009
30014
  }
@@ -30019,23 +30024,9 @@ async function createTextLayer(page, {
30019
30024
  lineMatrix = [...textMatrix];
30020
30025
  if (typeof text === "string") {
30021
30026
  const glyphs = currentTextItem.font.charsToGlyphs(text);
30022
- for (const char of glyphs) {
30023
- const width = char.width * fontSize / 1e3 || 0;
30024
- currentTextItem.glyphs.push([char, textPosition, width]);
30025
- textPosition += width + charSpacing * hScale;
30026
- if (char.isSpace) {
30027
- textPosition += wordSpacing * hScale;
30028
- }
30029
- }
30027
+ processChars(glyphs, currentTextItem);
30030
30028
  } else if (Array.isArray(text)) {
30031
- for (const char of text) {
30032
- const width = char.width * fontSize / 1e3 || 0;
30033
- currentTextItem.glyphs.push([char, textPosition, width]);
30034
- textPosition += width + charSpacing * hScale;
30035
- if (char.isSpace) {
30036
- textPosition += wordSpacing * hScale;
30037
- }
30038
- }
30029
+ processChars(text, currentTextItem);
30039
30030
  }
30040
30031
  break;
30041
30032
  }
@@ -52,6 +52,8 @@ export interface TextLayerFigure extends TextLayerNode {
52
52
  export interface Glyph {
53
53
  unicode: string;
54
54
  isSpace: boolean;
55
+ fontChar?: string;
56
+ width?: number;
55
57
  }
56
58
  export type TextItem = {
57
59
  glyphs: [Glyph, number, number][];
@@ -78,6 +80,11 @@ export declare const renderTextLayer: (root: TextLayerRoot, options?: {
78
80
  }) => string;
79
81
  export declare const decorateStructTree: (node: StructTreeNodeWithAttrs | StructTreeContent, rootContainer: TextLayerRoot, graphics?: SvgRoot, annotations?: AnnotationData[], parents?: TextLayerElement[]) => void;
80
82
  export declare const measure: (text: string, font: string, fontSize: number, lang?: string) => TextMetrics;
83
+ /**
84
+ * Expands ligatures using Unicode normalization (NFKD - Compatibility Decomposition).
85
+ * This automatically handles Latin ligatures (ff fi fl ffi ffl ſt st), IJ ligatures (IJ ij),
86
+ * and AE/OE letters (Æ æ Œ œ) by decomposing them into their component characters.
87
+ */
81
88
  export declare function createTextLayer(page: PDFPageProxy, { graphics, annotations: _annotations, }?: {
82
89
  graphics?: SvgRoot;
83
90
  annotations?: AnnotationData[];
@@ -27888,6 +27888,25 @@ var flattenNodes = (node) => {
27888
27888
  var isNameObject = (obj) => {
27889
27889
  return obj !== null && typeof obj === "object" && "name" in obj;
27890
27890
  };
27891
+ var isLigature = (char) => {
27892
+ if (!char || [...char].length !== 1) {
27893
+ return false;
27894
+ }
27895
+ const code = char.codePointAt(0);
27896
+ if (code == null) {
27897
+ return false;
27898
+ }
27899
+ return (
27900
+ // Alphabetic Presentation Forms
27901
+ code >= 64256 && code <= 64335 || // Arabic Presentation Forms-A
27902
+ code >= 64336 && code <= 65023 || // Arabic Presentation Forms-B
27903
+ code >= 65136 && code <= 65279 || // Latin ligatures outside FB00 block
27904
+ code === 338 || // Œ
27905
+ code === 339 || // œ
27906
+ code === 306 || // IJ
27907
+ code === 307
27908
+ );
27909
+ };
27891
27910
  var renderTextLayer = (root, options = {}) => {
27892
27911
  const { classes = false, styles = false, font } = options;
27893
27912
  const serializeAttributes = (attrs) => {
@@ -27922,7 +27941,7 @@ var renderTextLayer = (root, options = {}) => {
27922
27941
  "--tl-margin": `${margin}px`,
27923
27942
  "--tl-transform": scale !== 1 ? `scaleX(${scale})` : null
27924
27943
  }) : "";
27925
- return `<${tag3}${serializedAttrs3 ? ` ${serializedAttrs3}` : ""}${serializedStyle2 ? ` style="${serializedStyle2}` : ""}">${chunk.text}</${tag3}>`;
27944
+ return `<${tag3}${serializedAttrs3 ? ` ${serializedAttrs3}` : ""}${serializedStyle2 ? ` style="${serializedStyle2}"` : ""}>${chunk.text}</${tag3}>`;
27926
27945
  }).join("");
27927
27946
  if (!classes && !styles) {
27928
27947
  return contents;
@@ -28431,42 +28450,16 @@ async function createTextLayer(page, {
28431
28450
  }
28432
28451
  if (oldTextItem?.glyphs.length) {
28433
28452
  if (newTextItem) {
28434
- const text = oldTextItem.glyphs.map((g) => g[0].unicode).join("") || "";
28435
- const dir = bidi(text).dir;
28436
- const firstGlyph = oldTextItem.glyphs[0];
28437
- const lastGlyph = oldTextItem.glyphs.at(-1);
28438
- const scale = oldTextItem.scaleX * oldTextItem.fontSize;
28439
28453
  const top = oldTextItem.top;
28440
28454
  const bottom = oldTextItem.top + oldTextItem.fontSize;
28441
- const left = oldTextItem.left;
28442
- const right = oldTextItem.left + (lastGlyph[1] + lastGlyph[2]) * scale;
28443
28455
  const nextTop = newTextItem.top;
28444
28456
  const nextBottom = newTextItem.top + newTextItem.fontSize;
28445
- switch (dir) {
28446
- case "ltr": {
28447
- if (newTextItem.left - right < lastGlyph[2] * scale) {
28448
- if (nextTop > top && nextTop < bottom || nextBottom > top && nextBottom < bottom) {
28449
- return;
28450
- }
28451
- if (checkNewLine(oldTextItem, newTextItem, true)) {
28452
- appendNewLineAsNeeded();
28453
- return;
28454
- }
28455
- }
28456
- break;
28457
- }
28458
- case "rtl": {
28459
- if (left - newTextItem.left < firstGlyph[2] * scale) {
28460
- if (nextTop > top && nextTop < bottom || nextBottom > top && nextBottom < bottom) {
28461
- return;
28462
- }
28463
- if (checkNewLine(oldTextItem, newTextItem, true)) {
28464
- appendNewLineAsNeeded();
28465
- return;
28466
- }
28467
- }
28468
- break;
28469
- }
28457
+ if (nextTop > top && nextTop < bottom || nextBottom > top && nextBottom < bottom || nextTop <= top && nextBottom >= bottom) {
28458
+ return;
28459
+ }
28460
+ if (checkNewLine(oldTextItem, newTextItem, true)) {
28461
+ appendNewLineAsNeeded();
28462
+ return;
28470
28463
  }
28471
28464
  }
28472
28465
  closeMarkedContent();
@@ -28507,6 +28500,44 @@ async function createTextLayer(page, {
28507
28500
  });
28508
28501
  }
28509
28502
  };
28503
+ const addGlyph = (textItem, char) => {
28504
+ const width = (char.width ?? 0) * fontSize / 1e3 || 0;
28505
+ const charUnicode = char.unicode ?? "";
28506
+ if (isLigature(charUnicode)) {
28507
+ const normalized = charUnicode.normalize("NFKD");
28508
+ const expansion = Array.from(normalized);
28509
+ const widthPerChar = width / expansion.length;
28510
+ for (const expandedChar of expansion) {
28511
+ const syntheticGlyph = {
28512
+ unicode: expandedChar,
28513
+ isSpace: false
28514
+ };
28515
+ textItem.glyphs.push([syntheticGlyph, textPosition, widthPerChar]);
28516
+ textPosition += widthPerChar + charSpacing * hScale;
28517
+ }
28518
+ return expansion.length;
28519
+ }
28520
+ textItem.glyphs.push([char, textPosition, width]);
28521
+ textPosition += width + charSpacing * hScale;
28522
+ if (char.isSpace) {
28523
+ textPosition += wordSpacing * hScale;
28524
+ }
28525
+ return 1;
28526
+ };
28527
+ const processChars = (chars, textItem) => {
28528
+ let skip = 0;
28529
+ for (const char of chars) {
28530
+ if (typeof char === "number") {
28531
+ if (!skip) {
28532
+ textPosition += -char * fontSize * hScale / 1e3;
28533
+ }
28534
+ } else if (skip) {
28535
+ skip--;
28536
+ } else {
28537
+ skip = addGlyph(textItem, char) - 1;
28538
+ }
28539
+ }
28540
+ };
28510
28541
  for (let i = 0; i < operatorsList.fnArray.length; i++) {
28511
28542
  const fnId = operatorsList.fnArray[i];
28512
28543
  const args = operatorsList.argsArray[i];
@@ -28528,19 +28559,7 @@ async function createTextLayer(page, {
28528
28559
  currentTextItem ?? (currentTextItem = createTextItem(fontFamily));
28529
28560
  createParagraphIfNeeded(lastTextItem, currentTextItem);
28530
28561
  const [chars] = args;
28531
- for (const char of chars) {
28532
- if (typeof char === "number") {
28533
- const tx = -char * fontSize * hScale / 1e3;
28534
- textPosition += tx;
28535
- } else if (char.unicode) {
28536
- const width = char.width * fontSize / 1e3 || 0;
28537
- currentTextItem.glyphs.push([char, textPosition, width]);
28538
- textPosition += width + charSpacing * hScale;
28539
- if (char.isSpace) {
28540
- textPosition += wordSpacing * hScale;
28541
- }
28542
- }
28543
- }
28562
+ processChars(chars, currentTextItem);
28544
28563
  break;
28545
28564
  }
28546
28565
  case OPS.setTextRise:
@@ -28588,23 +28607,9 @@ async function createTextLayer(page, {
28588
28607
  const [text] = args;
28589
28608
  if (typeof text === "string") {
28590
28609
  const glyphs = currentTextItem.font.charsToGlyphs(text);
28591
- for (const char of glyphs) {
28592
- const width = char.width * fontSize / 1e3 || 0;
28593
- currentTextItem.glyphs.push([char, textPosition, width]);
28594
- textPosition += width + charSpacing * hScale;
28595
- if (char.isSpace) {
28596
- textPosition += wordSpacing * hScale;
28597
- }
28598
- }
28610
+ processChars(glyphs, currentTextItem);
28599
28611
  } else if (Array.isArray(text)) {
28600
- for (const char of text) {
28601
- const width = char.width * fontSize / 1e3 || 0;
28602
- currentTextItem.glyphs.push([char, textPosition, width]);
28603
- textPosition += width + charSpacing * hScale;
28604
- if (char.isSpace) {
28605
- textPosition += wordSpacing * hScale;
28606
- }
28607
- }
28612
+ processChars(text, currentTextItem);
28608
28613
  }
28609
28614
  break;
28610
28615
  }
@@ -28620,23 +28625,9 @@ async function createTextLayer(page, {
28620
28625
  lineMatrix = [...textMatrix];
28621
28626
  if (typeof text === "string") {
28622
28627
  const glyphs = currentTextItem.font.charsToGlyphs(text);
28623
- for (const char of glyphs) {
28624
- const width = char.width * fontSize / 1e3 || 0;
28625
- currentTextItem.glyphs.push([char, textPosition, width]);
28626
- textPosition += width + charSpacing * hScale;
28627
- if (char.isSpace) {
28628
- textPosition += wordSpacing * hScale;
28629
- }
28630
- }
28628
+ processChars(glyphs, currentTextItem);
28631
28629
  } else if (Array.isArray(text)) {
28632
- for (const char of text) {
28633
- const width = char.width * fontSize / 1e3 || 0;
28634
- currentTextItem.glyphs.push([char, textPosition, width]);
28635
- textPosition += width + charSpacing * hScale;
28636
- if (char.isSpace) {
28637
- textPosition += wordSpacing * hScale;
28638
- }
28639
- }
28630
+ processChars(text, currentTextItem);
28640
28631
  }
28641
28632
  break;
28642
28633
  }
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@chialab/pdfjs-lib",
3
3
  "description": "A custom Mozilla's PDF.js build with better Node support and extras.",
4
- "version": "1.0.0-alpha.44",
4
+ "version": "1.0.0-alpha.46",
5
5
  "type": "module",
6
6
  "author": "Chialab <dev@chialab.it>",
7
7
  "license": "MIT",