@chialab/pdfjs-lib 1.0.0-alpha.44 → 1.0.0-alpha.45

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -29287,6 +29287,25 @@ var flattenNodes = (node) => {
29287
29287
  var isNameObject = (obj) => {
29288
29288
  return obj !== null && typeof obj === "object" && "name" in obj;
29289
29289
  };
29290
+ var isLigature = (char) => {
29291
+ if (!char || [...char].length !== 1) {
29292
+ return false;
29293
+ }
29294
+ const code = char.codePointAt(0);
29295
+ if (code == null) {
29296
+ return false;
29297
+ }
29298
+ return (
29299
+ // Alphabetic Presentation Forms
29300
+ code >= 64256 && code <= 64335 || // Arabic Presentation Forms-A
29301
+ code >= 64336 && code <= 65023 || // Arabic Presentation Forms-B
29302
+ code >= 65136 && code <= 65279 || // Latin ligatures outside FB00 block
29303
+ code === 338 || // Œ
29304
+ code === 339 || // œ
29305
+ code === 306 || // IJ
29306
+ code === 307
29307
+ );
29308
+ };
29290
29309
  var renderTextLayer = (root, options = {}) => {
29291
29310
  const { classes = false, styles = false, font } = options;
29292
29311
  const serializeAttributes = (attrs) => {
@@ -29321,7 +29340,7 @@ var renderTextLayer = (root, options = {}) => {
29321
29340
  "--tl-margin": `${margin}px`,
29322
29341
  "--tl-transform": scale !== 1 ? `scaleX(${scale})` : null
29323
29342
  }) : "";
29324
- return `<${tag3}${serializedAttrs3 ? ` ${serializedAttrs3}` : ""}${serializedStyle2 ? ` style="${serializedStyle2}` : ""}">${chunk.text}</${tag3}>`;
29343
+ return `<${tag3}${serializedAttrs3 ? ` ${serializedAttrs3}` : ""}${serializedStyle2 ? ` style="${serializedStyle2}"` : ""}>${chunk.text}</${tag3}>`;
29325
29344
  }).join("");
29326
29345
  if (!classes && !styles) {
29327
29346
  return contents;
@@ -29906,6 +29925,44 @@ async function createTextLayer(page, {
29906
29925
  });
29907
29926
  }
29908
29927
  };
29928
+ const addGlyph = (textItem, char) => {
29929
+ const width = (char.width ?? 0) * fontSize / 1e3 || 0;
29930
+ const charUnicode = char.unicode ?? "";
29931
+ if (isLigature(charUnicode)) {
29932
+ const normalized = charUnicode.normalize("NFKD");
29933
+ const expansion = Array.from(normalized);
29934
+ const widthPerChar = width / expansion.length;
29935
+ for (const expandedChar of expansion) {
29936
+ const syntheticGlyph = {
29937
+ unicode: expandedChar,
29938
+ isSpace: false
29939
+ };
29940
+ textItem.glyphs.push([syntheticGlyph, textPosition, widthPerChar]);
29941
+ textPosition += widthPerChar + charSpacing * hScale;
29942
+ }
29943
+ return expansion.length;
29944
+ }
29945
+ textItem.glyphs.push([char, textPosition, width]);
29946
+ textPosition += width + charSpacing * hScale;
29947
+ if (char.isSpace) {
29948
+ textPosition += wordSpacing * hScale;
29949
+ }
29950
+ return 1;
29951
+ };
29952
+ const processChars = (chars, textItem) => {
29953
+ let skip = 0;
29954
+ for (const char of chars) {
29955
+ if (typeof char === "number") {
29956
+ if (!skip) {
29957
+ textPosition += -char * fontSize * hScale / 1e3;
29958
+ }
29959
+ } else if (skip) {
29960
+ skip--;
29961
+ } else {
29962
+ skip = addGlyph(textItem, char) - 1;
29963
+ }
29964
+ }
29965
+ };
29909
29966
  for (let i = 0; i < operatorsList.fnArray.length; i++) {
29910
29967
  const fnId = operatorsList.fnArray[i];
29911
29968
  const args = operatorsList.argsArray[i];
@@ -29927,19 +29984,7 @@ async function createTextLayer(page, {
29927
29984
  currentTextItem ?? (currentTextItem = createTextItem(fontFamily));
29928
29985
  createParagraphIfNeeded(lastTextItem, currentTextItem);
29929
29986
  const [chars] = args;
29930
- for (const char of chars) {
29931
- if (typeof char === "number") {
29932
- const tx = -char * fontSize * hScale / 1e3;
29933
- textPosition += tx;
29934
- } else if (char.unicode) {
29935
- const width = char.width * fontSize / 1e3 || 0;
29936
- currentTextItem.glyphs.push([char, textPosition, width]);
29937
- textPosition += width + charSpacing * hScale;
29938
- if (char.isSpace) {
29939
- textPosition += wordSpacing * hScale;
29940
- }
29941
- }
29942
- }
29987
+ processChars(chars, currentTextItem);
29943
29988
  break;
29944
29989
  }
29945
29990
  case OPS.setTextRise:
@@ -29987,23 +30032,9 @@ async function createTextLayer(page, {
29987
30032
  const [text] = args;
29988
30033
  if (typeof text === "string") {
29989
30034
  const glyphs = currentTextItem.font.charsToGlyphs(text);
29990
- for (const char of glyphs) {
29991
- const width = char.width * fontSize / 1e3 || 0;
29992
- currentTextItem.glyphs.push([char, textPosition, width]);
29993
- textPosition += width + charSpacing * hScale;
29994
- if (char.isSpace) {
29995
- textPosition += wordSpacing * hScale;
29996
- }
29997
- }
30035
+ processChars(glyphs, currentTextItem);
29998
30036
  } else if (Array.isArray(text)) {
29999
- for (const char of text) {
30000
- const width = char.width * fontSize / 1e3 || 0;
30001
- currentTextItem.glyphs.push([char, textPosition, width]);
30002
- textPosition += width + charSpacing * hScale;
30003
- if (char.isSpace) {
30004
- textPosition += wordSpacing * hScale;
30005
- }
30006
- }
30037
+ processChars(text, currentTextItem);
30007
30038
  }
30008
30039
  break;
30009
30040
  }
@@ -30019,23 +30050,9 @@ async function createTextLayer(page, {
30019
30050
  lineMatrix = [...textMatrix];
30020
30051
  if (typeof text === "string") {
30021
30052
  const glyphs = currentTextItem.font.charsToGlyphs(text);
30022
- for (const char of glyphs) {
30023
- const width = char.width * fontSize / 1e3 || 0;
30024
- currentTextItem.glyphs.push([char, textPosition, width]);
30025
- textPosition += width + charSpacing * hScale;
30026
- if (char.isSpace) {
30027
- textPosition += wordSpacing * hScale;
30028
- }
30029
- }
30053
+ processChars(glyphs, currentTextItem);
30030
30054
  } else if (Array.isArray(text)) {
30031
- for (const char of text) {
30032
- const width = char.width * fontSize / 1e3 || 0;
30033
- currentTextItem.glyphs.push([char, textPosition, width]);
30034
- textPosition += width + charSpacing * hScale;
30035
- if (char.isSpace) {
30036
- textPosition += wordSpacing * hScale;
30037
- }
30038
- }
30055
+ processChars(text, currentTextItem);
30039
30056
  }
30040
30057
  break;
30041
30058
  }
@@ -52,6 +52,8 @@ export interface TextLayerFigure extends TextLayerNode {
52
52
  export interface Glyph {
53
53
  unicode: string;
54
54
  isSpace: boolean;
55
+ fontChar?: string;
56
+ width?: number;
55
57
  }
56
58
  export type TextItem = {
57
59
  glyphs: [Glyph, number, number][];
@@ -78,6 +80,11 @@ export declare const renderTextLayer: (root: TextLayerRoot, options?: {
78
80
  }) => string;
79
81
  export declare const decorateStructTree: (node: StructTreeNodeWithAttrs | StructTreeContent, rootContainer: TextLayerRoot, graphics?: SvgRoot, annotations?: AnnotationData[], parents?: TextLayerElement[]) => void;
80
82
  export declare const measure: (text: string, font: string, fontSize: number, lang?: string) => TextMetrics;
83
+ /**
84
+ * Expands ligatures using Unicode normalization (NFKD - Compatibility Decomposition).
85
+ * This automatically handles Latin ligatures (ff fi fl ffi ffl ſt st), IJ ligatures (IJ ij),
86
+ * and AE/OE letters (Æ æ Œ œ) by decomposing them into their component characters.
87
+ */
81
88
  export declare function createTextLayer(page: PDFPageProxy, { graphics, annotations: _annotations, }?: {
82
89
  graphics?: SvgRoot;
83
90
  annotations?: AnnotationData[];
@@ -27888,6 +27888,25 @@ var flattenNodes = (node) => {
27888
27888
  var isNameObject = (obj) => {
27889
27889
  return obj !== null && typeof obj === "object" && "name" in obj;
27890
27890
  };
27891
+ var isLigature = (char) => {
27892
+ if (!char || [...char].length !== 1) {
27893
+ return false;
27894
+ }
27895
+ const code = char.codePointAt(0);
27896
+ if (code == null) {
27897
+ return false;
27898
+ }
27899
+ return (
27900
+ // Alphabetic Presentation Forms
27901
+ code >= 64256 && code <= 64335 || // Arabic Presentation Forms-A
27902
+ code >= 64336 && code <= 65023 || // Arabic Presentation Forms-B
27903
+ code >= 65136 && code <= 65279 || // Latin ligatures outside FB00 block
27904
+ code === 338 || // Œ
27905
+ code === 339 || // œ
27906
+ code === 306 || // IJ
27907
+ code === 307
27908
+ );
27909
+ };
27891
27910
  var renderTextLayer = (root, options = {}) => {
27892
27911
  const { classes = false, styles = false, font } = options;
27893
27912
  const serializeAttributes = (attrs) => {
@@ -27922,7 +27941,7 @@ var renderTextLayer = (root, options = {}) => {
27922
27941
  "--tl-margin": `${margin}px`,
27923
27942
  "--tl-transform": scale !== 1 ? `scaleX(${scale})` : null
27924
27943
  }) : "";
27925
- return `<${tag3}${serializedAttrs3 ? ` ${serializedAttrs3}` : ""}${serializedStyle2 ? ` style="${serializedStyle2}` : ""}">${chunk.text}</${tag3}>`;
27944
+ return `<${tag3}${serializedAttrs3 ? ` ${serializedAttrs3}` : ""}${serializedStyle2 ? ` style="${serializedStyle2}"` : ""}>${chunk.text}</${tag3}>`;
27926
27945
  }).join("");
27927
27946
  if (!classes && !styles) {
27928
27947
  return contents;
@@ -28507,6 +28526,44 @@ async function createTextLayer(page, {
28507
28526
  });
28508
28527
  }
28509
28528
  };
28529
+ const addGlyph = (textItem, char) => {
28530
+ const width = (char.width ?? 0) * fontSize / 1e3 || 0;
28531
+ const charUnicode = char.unicode ?? "";
28532
+ if (isLigature(charUnicode)) {
28533
+ const normalized = charUnicode.normalize("NFKD");
28534
+ const expansion = Array.from(normalized);
28535
+ const widthPerChar = width / expansion.length;
28536
+ for (const expandedChar of expansion) {
28537
+ const syntheticGlyph = {
28538
+ unicode: expandedChar,
28539
+ isSpace: false
28540
+ };
28541
+ textItem.glyphs.push([syntheticGlyph, textPosition, widthPerChar]);
28542
+ textPosition += widthPerChar + charSpacing * hScale;
28543
+ }
28544
+ return expansion.length;
28545
+ }
28546
+ textItem.glyphs.push([char, textPosition, width]);
28547
+ textPosition += width + charSpacing * hScale;
28548
+ if (char.isSpace) {
28549
+ textPosition += wordSpacing * hScale;
28550
+ }
28551
+ return 1;
28552
+ };
28553
+ const processChars = (chars, textItem) => {
28554
+ let skip = 0;
28555
+ for (const char of chars) {
28556
+ if (typeof char === "number") {
28557
+ if (!skip) {
28558
+ textPosition += -char * fontSize * hScale / 1e3;
28559
+ }
28560
+ } else if (skip) {
28561
+ skip--;
28562
+ } else {
28563
+ skip = addGlyph(textItem, char) - 1;
28564
+ }
28565
+ }
28566
+ };
28510
28567
  for (let i = 0; i < operatorsList.fnArray.length; i++) {
28511
28568
  const fnId = operatorsList.fnArray[i];
28512
28569
  const args = operatorsList.argsArray[i];
@@ -28528,19 +28585,7 @@ async function createTextLayer(page, {
28528
28585
  currentTextItem ?? (currentTextItem = createTextItem(fontFamily));
28529
28586
  createParagraphIfNeeded(lastTextItem, currentTextItem);
28530
28587
  const [chars] = args;
28531
- for (const char of chars) {
28532
- if (typeof char === "number") {
28533
- const tx = -char * fontSize * hScale / 1e3;
28534
- textPosition += tx;
28535
- } else if (char.unicode) {
28536
- const width = char.width * fontSize / 1e3 || 0;
28537
- currentTextItem.glyphs.push([char, textPosition, width]);
28538
- textPosition += width + charSpacing * hScale;
28539
- if (char.isSpace) {
28540
- textPosition += wordSpacing * hScale;
28541
- }
28542
- }
28543
- }
28588
+ processChars(chars, currentTextItem);
28544
28589
  break;
28545
28590
  }
28546
28591
  case OPS.setTextRise:
@@ -28588,23 +28633,9 @@ async function createTextLayer(page, {
28588
28633
  const [text] = args;
28589
28634
  if (typeof text === "string") {
28590
28635
  const glyphs = currentTextItem.font.charsToGlyphs(text);
28591
- for (const char of glyphs) {
28592
- const width = char.width * fontSize / 1e3 || 0;
28593
- currentTextItem.glyphs.push([char, textPosition, width]);
28594
- textPosition += width + charSpacing * hScale;
28595
- if (char.isSpace) {
28596
- textPosition += wordSpacing * hScale;
28597
- }
28598
- }
28636
+ processChars(glyphs, currentTextItem);
28599
28637
  } else if (Array.isArray(text)) {
28600
- for (const char of text) {
28601
- const width = char.width * fontSize / 1e3 || 0;
28602
- currentTextItem.glyphs.push([char, textPosition, width]);
28603
- textPosition += width + charSpacing * hScale;
28604
- if (char.isSpace) {
28605
- textPosition += wordSpacing * hScale;
28606
- }
28607
- }
28638
+ processChars(text, currentTextItem);
28608
28639
  }
28609
28640
  break;
28610
28641
  }
@@ -28620,23 +28651,9 @@ async function createTextLayer(page, {
28620
28651
  lineMatrix = [...textMatrix];
28621
28652
  if (typeof text === "string") {
28622
28653
  const glyphs = currentTextItem.font.charsToGlyphs(text);
28623
- for (const char of glyphs) {
28624
- const width = char.width * fontSize / 1e3 || 0;
28625
- currentTextItem.glyphs.push([char, textPosition, width]);
28626
- textPosition += width + charSpacing * hScale;
28627
- if (char.isSpace) {
28628
- textPosition += wordSpacing * hScale;
28629
- }
28630
- }
28654
+ processChars(glyphs, currentTextItem);
28631
28655
  } else if (Array.isArray(text)) {
28632
- for (const char of text) {
28633
- const width = char.width * fontSize / 1e3 || 0;
28634
- currentTextItem.glyphs.push([char, textPosition, width]);
28635
- textPosition += width + charSpacing * hScale;
28636
- if (char.isSpace) {
28637
- textPosition += wordSpacing * hScale;
28638
- }
28639
- }
28656
+ processChars(text, currentTextItem);
28640
28657
  }
28641
28658
  break;
28642
28659
  }
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@chialab/pdfjs-lib",
3
3
  "description": "A custom Mozilla's PDF.js build with better Node support and extras.",
4
- "version": "1.0.0-alpha.44",
4
+ "version": "1.0.0-alpha.45",
5
5
  "type": "module",
6
6
  "author": "Chialab <dev@chialab.it>",
7
7
  "license": "MIT",