@chialab/pdfjs-lib 1.0.0-alpha.44 → 1.0.0-alpha.45
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/browser/index.js +63 -46
- package/dist/lib/TextLayer.d.ts +7 -0
- package/dist/node/index.js +63 -46
- package/package.json +1 -1
package/dist/browser/index.js
CHANGED
|
@@ -29287,6 +29287,25 @@ var flattenNodes = (node) => {
|
|
|
29287
29287
|
var isNameObject = (obj) => {
|
|
29288
29288
|
return obj !== null && typeof obj === "object" && "name" in obj;
|
|
29289
29289
|
};
|
|
29290
|
+
var isLigature = (char) => {
|
|
29291
|
+
if (!char || [...char].length !== 1) {
|
|
29292
|
+
return false;
|
|
29293
|
+
}
|
|
29294
|
+
const code = char.codePointAt(0);
|
|
29295
|
+
if (code == null) {
|
|
29296
|
+
return false;
|
|
29297
|
+
}
|
|
29298
|
+
return (
|
|
29299
|
+
// Alphabetic Presentation Forms
|
|
29300
|
+
code >= 64256 && code <= 64335 || // Arabic Presentation Forms-A
|
|
29301
|
+
code >= 64336 && code <= 65023 || // Arabic Presentation Forms-B
|
|
29302
|
+
code >= 65136 && code <= 65279 || // Latin ligatures outside FB00 block
|
|
29303
|
+
code === 338 || // Œ
|
|
29304
|
+
code === 339 || // œ
|
|
29305
|
+
code === 306 || // IJ
|
|
29306
|
+
code === 307
|
|
29307
|
+
);
|
|
29308
|
+
};
|
|
29290
29309
|
var renderTextLayer = (root, options = {}) => {
|
|
29291
29310
|
const { classes = false, styles = false, font } = options;
|
|
29292
29311
|
const serializeAttributes = (attrs) => {
|
|
@@ -29321,7 +29340,7 @@ var renderTextLayer = (root, options = {}) => {
|
|
|
29321
29340
|
"--tl-margin": `${margin}px`,
|
|
29322
29341
|
"--tl-transform": scale !== 1 ? `scaleX(${scale})` : null
|
|
29323
29342
|
}) : "";
|
|
29324
|
-
return `<${tag3}${serializedAttrs3 ? ` ${serializedAttrs3}` : ""}${serializedStyle2 ? ` style="${serializedStyle2}` : ""}
|
|
29343
|
+
return `<${tag3}${serializedAttrs3 ? ` ${serializedAttrs3}` : ""}${serializedStyle2 ? ` style="${serializedStyle2}"` : ""}>${chunk.text}</${tag3}>`;
|
|
29325
29344
|
}).join("");
|
|
29326
29345
|
if (!classes && !styles) {
|
|
29327
29346
|
return contents;
|
|
@@ -29906,6 +29925,44 @@ async function createTextLayer(page, {
|
|
|
29906
29925
|
});
|
|
29907
29926
|
}
|
|
29908
29927
|
};
|
|
29928
|
+
const addGlyph = (textItem, char) => {
|
|
29929
|
+
const width = (char.width ?? 0) * fontSize / 1e3 || 0;
|
|
29930
|
+
const charUnicode = char.unicode ?? "";
|
|
29931
|
+
if (isLigature(charUnicode)) {
|
|
29932
|
+
const normalized = charUnicode.normalize("NFKD");
|
|
29933
|
+
const expansion = Array.from(normalized);
|
|
29934
|
+
const widthPerChar = width / expansion.length;
|
|
29935
|
+
for (const expandedChar of expansion) {
|
|
29936
|
+
const syntheticGlyph = {
|
|
29937
|
+
unicode: expandedChar,
|
|
29938
|
+
isSpace: false
|
|
29939
|
+
};
|
|
29940
|
+
textItem.glyphs.push([syntheticGlyph, textPosition, widthPerChar]);
|
|
29941
|
+
textPosition += widthPerChar + charSpacing * hScale;
|
|
29942
|
+
}
|
|
29943
|
+
return expansion.length;
|
|
29944
|
+
}
|
|
29945
|
+
textItem.glyphs.push([char, textPosition, width]);
|
|
29946
|
+
textPosition += width + charSpacing * hScale;
|
|
29947
|
+
if (char.isSpace) {
|
|
29948
|
+
textPosition += wordSpacing * hScale;
|
|
29949
|
+
}
|
|
29950
|
+
return 1;
|
|
29951
|
+
};
|
|
29952
|
+
const processChars = (chars, textItem) => {
|
|
29953
|
+
let skip = 0;
|
|
29954
|
+
for (const char of chars) {
|
|
29955
|
+
if (typeof char === "number") {
|
|
29956
|
+
if (!skip) {
|
|
29957
|
+
textPosition += -char * fontSize * hScale / 1e3;
|
|
29958
|
+
}
|
|
29959
|
+
} else if (skip) {
|
|
29960
|
+
skip--;
|
|
29961
|
+
} else {
|
|
29962
|
+
skip = addGlyph(textItem, char) - 1;
|
|
29963
|
+
}
|
|
29964
|
+
}
|
|
29965
|
+
};
|
|
29909
29966
|
for (let i = 0; i < operatorsList.fnArray.length; i++) {
|
|
29910
29967
|
const fnId = operatorsList.fnArray[i];
|
|
29911
29968
|
const args = operatorsList.argsArray[i];
|
|
@@ -29927,19 +29984,7 @@ async function createTextLayer(page, {
|
|
|
29927
29984
|
currentTextItem ?? (currentTextItem = createTextItem(fontFamily));
|
|
29928
29985
|
createParagraphIfNeeded(lastTextItem, currentTextItem);
|
|
29929
29986
|
const [chars] = args;
|
|
29930
|
-
|
|
29931
|
-
if (typeof char === "number") {
|
|
29932
|
-
const tx = -char * fontSize * hScale / 1e3;
|
|
29933
|
-
textPosition += tx;
|
|
29934
|
-
} else if (char.unicode) {
|
|
29935
|
-
const width = char.width * fontSize / 1e3 || 0;
|
|
29936
|
-
currentTextItem.glyphs.push([char, textPosition, width]);
|
|
29937
|
-
textPosition += width + charSpacing * hScale;
|
|
29938
|
-
if (char.isSpace) {
|
|
29939
|
-
textPosition += wordSpacing * hScale;
|
|
29940
|
-
}
|
|
29941
|
-
}
|
|
29942
|
-
}
|
|
29987
|
+
processChars(chars, currentTextItem);
|
|
29943
29988
|
break;
|
|
29944
29989
|
}
|
|
29945
29990
|
case OPS.setTextRise:
|
|
@@ -29987,23 +30032,9 @@ async function createTextLayer(page, {
|
|
|
29987
30032
|
const [text] = args;
|
|
29988
30033
|
if (typeof text === "string") {
|
|
29989
30034
|
const glyphs = currentTextItem.font.charsToGlyphs(text);
|
|
29990
|
-
|
|
29991
|
-
const width = char.width * fontSize / 1e3 || 0;
|
|
29992
|
-
currentTextItem.glyphs.push([char, textPosition, width]);
|
|
29993
|
-
textPosition += width + charSpacing * hScale;
|
|
29994
|
-
if (char.isSpace) {
|
|
29995
|
-
textPosition += wordSpacing * hScale;
|
|
29996
|
-
}
|
|
29997
|
-
}
|
|
30035
|
+
processChars(glyphs, currentTextItem);
|
|
29998
30036
|
} else if (Array.isArray(text)) {
|
|
29999
|
-
|
|
30000
|
-
const width = char.width * fontSize / 1e3 || 0;
|
|
30001
|
-
currentTextItem.glyphs.push([char, textPosition, width]);
|
|
30002
|
-
textPosition += width + charSpacing * hScale;
|
|
30003
|
-
if (char.isSpace) {
|
|
30004
|
-
textPosition += wordSpacing * hScale;
|
|
30005
|
-
}
|
|
30006
|
-
}
|
|
30037
|
+
processChars(text, currentTextItem);
|
|
30007
30038
|
}
|
|
30008
30039
|
break;
|
|
30009
30040
|
}
|
|
@@ -30019,23 +30050,9 @@ async function createTextLayer(page, {
|
|
|
30019
30050
|
lineMatrix = [...textMatrix];
|
|
30020
30051
|
if (typeof text === "string") {
|
|
30021
30052
|
const glyphs = currentTextItem.font.charsToGlyphs(text);
|
|
30022
|
-
|
|
30023
|
-
const width = char.width * fontSize / 1e3 || 0;
|
|
30024
|
-
currentTextItem.glyphs.push([char, textPosition, width]);
|
|
30025
|
-
textPosition += width + charSpacing * hScale;
|
|
30026
|
-
if (char.isSpace) {
|
|
30027
|
-
textPosition += wordSpacing * hScale;
|
|
30028
|
-
}
|
|
30029
|
-
}
|
|
30053
|
+
processChars(glyphs, currentTextItem);
|
|
30030
30054
|
} else if (Array.isArray(text)) {
|
|
30031
|
-
|
|
30032
|
-
const width = char.width * fontSize / 1e3 || 0;
|
|
30033
|
-
currentTextItem.glyphs.push([char, textPosition, width]);
|
|
30034
|
-
textPosition += width + charSpacing * hScale;
|
|
30035
|
-
if (char.isSpace) {
|
|
30036
|
-
textPosition += wordSpacing * hScale;
|
|
30037
|
-
}
|
|
30038
|
-
}
|
|
30055
|
+
processChars(text, currentTextItem);
|
|
30039
30056
|
}
|
|
30040
30057
|
break;
|
|
30041
30058
|
}
|
package/dist/lib/TextLayer.d.ts
CHANGED
|
@@ -52,6 +52,8 @@ export interface TextLayerFigure extends TextLayerNode {
|
|
|
52
52
|
export interface Glyph {
|
|
53
53
|
unicode: string;
|
|
54
54
|
isSpace: boolean;
|
|
55
|
+
fontChar?: string;
|
|
56
|
+
width?: number;
|
|
55
57
|
}
|
|
56
58
|
export type TextItem = {
|
|
57
59
|
glyphs: [Glyph, number, number][];
|
|
@@ -78,6 +80,11 @@ export declare const renderTextLayer: (root: TextLayerRoot, options?: {
|
|
|
78
80
|
}) => string;
|
|
79
81
|
export declare const decorateStructTree: (node: StructTreeNodeWithAttrs | StructTreeContent, rootContainer: TextLayerRoot, graphics?: SvgRoot, annotations?: AnnotationData[], parents?: TextLayerElement[]) => void;
|
|
80
82
|
export declare const measure: (text: string, font: string, fontSize: number, lang?: string) => TextMetrics;
|
|
83
|
+
/**
|
|
84
|
+
* Expands ligatures using Unicode normalization (NFKD - Compatibility Decomposition).
|
|
85
|
+
* This automatically handles Latin ligatures (ff fi fl ffi ffl ſt st), IJ ligatures (IJ ij),
|
|
86
|
+
* and AE/OE letters (Æ æ Œ œ) by decomposing them into their component characters.
|
|
87
|
+
*/
|
|
81
88
|
export declare function createTextLayer(page: PDFPageProxy, { graphics, annotations: _annotations, }?: {
|
|
82
89
|
graphics?: SvgRoot;
|
|
83
90
|
annotations?: AnnotationData[];
|
package/dist/node/index.js
CHANGED
|
@@ -27888,6 +27888,25 @@ var flattenNodes = (node) => {
|
|
|
27888
27888
|
var isNameObject = (obj) => {
|
|
27889
27889
|
return obj !== null && typeof obj === "object" && "name" in obj;
|
|
27890
27890
|
};
|
|
27891
|
+
var isLigature = (char) => {
|
|
27892
|
+
if (!char || [...char].length !== 1) {
|
|
27893
|
+
return false;
|
|
27894
|
+
}
|
|
27895
|
+
const code = char.codePointAt(0);
|
|
27896
|
+
if (code == null) {
|
|
27897
|
+
return false;
|
|
27898
|
+
}
|
|
27899
|
+
return (
|
|
27900
|
+
// Alphabetic Presentation Forms
|
|
27901
|
+
code >= 64256 && code <= 64335 || // Arabic Presentation Forms-A
|
|
27902
|
+
code >= 64336 && code <= 65023 || // Arabic Presentation Forms-B
|
|
27903
|
+
code >= 65136 && code <= 65279 || // Latin ligatures outside FB00 block
|
|
27904
|
+
code === 338 || // Œ
|
|
27905
|
+
code === 339 || // œ
|
|
27906
|
+
code === 306 || // IJ
|
|
27907
|
+
code === 307
|
|
27908
|
+
);
|
|
27909
|
+
};
|
|
27891
27910
|
var renderTextLayer = (root, options = {}) => {
|
|
27892
27911
|
const { classes = false, styles = false, font } = options;
|
|
27893
27912
|
const serializeAttributes = (attrs) => {
|
|
@@ -27922,7 +27941,7 @@ var renderTextLayer = (root, options = {}) => {
|
|
|
27922
27941
|
"--tl-margin": `${margin}px`,
|
|
27923
27942
|
"--tl-transform": scale !== 1 ? `scaleX(${scale})` : null
|
|
27924
27943
|
}) : "";
|
|
27925
|
-
return `<${tag3}${serializedAttrs3 ? ` ${serializedAttrs3}` : ""}${serializedStyle2 ? ` style="${serializedStyle2}` : ""}
|
|
27944
|
+
return `<${tag3}${serializedAttrs3 ? ` ${serializedAttrs3}` : ""}${serializedStyle2 ? ` style="${serializedStyle2}"` : ""}>${chunk.text}</${tag3}>`;
|
|
27926
27945
|
}).join("");
|
|
27927
27946
|
if (!classes && !styles) {
|
|
27928
27947
|
return contents;
|
|
@@ -28507,6 +28526,44 @@ async function createTextLayer(page, {
|
|
|
28507
28526
|
});
|
|
28508
28527
|
}
|
|
28509
28528
|
};
|
|
28529
|
+
const addGlyph = (textItem, char) => {
|
|
28530
|
+
const width = (char.width ?? 0) * fontSize / 1e3 || 0;
|
|
28531
|
+
const charUnicode = char.unicode ?? "";
|
|
28532
|
+
if (isLigature(charUnicode)) {
|
|
28533
|
+
const normalized = charUnicode.normalize("NFKD");
|
|
28534
|
+
const expansion = Array.from(normalized);
|
|
28535
|
+
const widthPerChar = width / expansion.length;
|
|
28536
|
+
for (const expandedChar of expansion) {
|
|
28537
|
+
const syntheticGlyph = {
|
|
28538
|
+
unicode: expandedChar,
|
|
28539
|
+
isSpace: false
|
|
28540
|
+
};
|
|
28541
|
+
textItem.glyphs.push([syntheticGlyph, textPosition, widthPerChar]);
|
|
28542
|
+
textPosition += widthPerChar + charSpacing * hScale;
|
|
28543
|
+
}
|
|
28544
|
+
return expansion.length;
|
|
28545
|
+
}
|
|
28546
|
+
textItem.glyphs.push([char, textPosition, width]);
|
|
28547
|
+
textPosition += width + charSpacing * hScale;
|
|
28548
|
+
if (char.isSpace) {
|
|
28549
|
+
textPosition += wordSpacing * hScale;
|
|
28550
|
+
}
|
|
28551
|
+
return 1;
|
|
28552
|
+
};
|
|
28553
|
+
const processChars = (chars, textItem) => {
|
|
28554
|
+
let skip = 0;
|
|
28555
|
+
for (const char of chars) {
|
|
28556
|
+
if (typeof char === "number") {
|
|
28557
|
+
if (!skip) {
|
|
28558
|
+
textPosition += -char * fontSize * hScale / 1e3;
|
|
28559
|
+
}
|
|
28560
|
+
} else if (skip) {
|
|
28561
|
+
skip--;
|
|
28562
|
+
} else {
|
|
28563
|
+
skip = addGlyph(textItem, char) - 1;
|
|
28564
|
+
}
|
|
28565
|
+
}
|
|
28566
|
+
};
|
|
28510
28567
|
for (let i = 0; i < operatorsList.fnArray.length; i++) {
|
|
28511
28568
|
const fnId = operatorsList.fnArray[i];
|
|
28512
28569
|
const args = operatorsList.argsArray[i];
|
|
@@ -28528,19 +28585,7 @@ async function createTextLayer(page, {
|
|
|
28528
28585
|
currentTextItem ?? (currentTextItem = createTextItem(fontFamily));
|
|
28529
28586
|
createParagraphIfNeeded(lastTextItem, currentTextItem);
|
|
28530
28587
|
const [chars] = args;
|
|
28531
|
-
|
|
28532
|
-
if (typeof char === "number") {
|
|
28533
|
-
const tx = -char * fontSize * hScale / 1e3;
|
|
28534
|
-
textPosition += tx;
|
|
28535
|
-
} else if (char.unicode) {
|
|
28536
|
-
const width = char.width * fontSize / 1e3 || 0;
|
|
28537
|
-
currentTextItem.glyphs.push([char, textPosition, width]);
|
|
28538
|
-
textPosition += width + charSpacing * hScale;
|
|
28539
|
-
if (char.isSpace) {
|
|
28540
|
-
textPosition += wordSpacing * hScale;
|
|
28541
|
-
}
|
|
28542
|
-
}
|
|
28543
|
-
}
|
|
28588
|
+
processChars(chars, currentTextItem);
|
|
28544
28589
|
break;
|
|
28545
28590
|
}
|
|
28546
28591
|
case OPS.setTextRise:
|
|
@@ -28588,23 +28633,9 @@ async function createTextLayer(page, {
|
|
|
28588
28633
|
const [text] = args;
|
|
28589
28634
|
if (typeof text === "string") {
|
|
28590
28635
|
const glyphs = currentTextItem.font.charsToGlyphs(text);
|
|
28591
|
-
|
|
28592
|
-
const width = char.width * fontSize / 1e3 || 0;
|
|
28593
|
-
currentTextItem.glyphs.push([char, textPosition, width]);
|
|
28594
|
-
textPosition += width + charSpacing * hScale;
|
|
28595
|
-
if (char.isSpace) {
|
|
28596
|
-
textPosition += wordSpacing * hScale;
|
|
28597
|
-
}
|
|
28598
|
-
}
|
|
28636
|
+
processChars(glyphs, currentTextItem);
|
|
28599
28637
|
} else if (Array.isArray(text)) {
|
|
28600
|
-
|
|
28601
|
-
const width = char.width * fontSize / 1e3 || 0;
|
|
28602
|
-
currentTextItem.glyphs.push([char, textPosition, width]);
|
|
28603
|
-
textPosition += width + charSpacing * hScale;
|
|
28604
|
-
if (char.isSpace) {
|
|
28605
|
-
textPosition += wordSpacing * hScale;
|
|
28606
|
-
}
|
|
28607
|
-
}
|
|
28638
|
+
processChars(text, currentTextItem);
|
|
28608
28639
|
}
|
|
28609
28640
|
break;
|
|
28610
28641
|
}
|
|
@@ -28620,23 +28651,9 @@ async function createTextLayer(page, {
|
|
|
28620
28651
|
lineMatrix = [...textMatrix];
|
|
28621
28652
|
if (typeof text === "string") {
|
|
28622
28653
|
const glyphs = currentTextItem.font.charsToGlyphs(text);
|
|
28623
|
-
|
|
28624
|
-
const width = char.width * fontSize / 1e3 || 0;
|
|
28625
|
-
currentTextItem.glyphs.push([char, textPosition, width]);
|
|
28626
|
-
textPosition += width + charSpacing * hScale;
|
|
28627
|
-
if (char.isSpace) {
|
|
28628
|
-
textPosition += wordSpacing * hScale;
|
|
28629
|
-
}
|
|
28630
|
-
}
|
|
28654
|
+
processChars(glyphs, currentTextItem);
|
|
28631
28655
|
} else if (Array.isArray(text)) {
|
|
28632
|
-
|
|
28633
|
-
const width = char.width * fontSize / 1e3 || 0;
|
|
28634
|
-
currentTextItem.glyphs.push([char, textPosition, width]);
|
|
28635
|
-
textPosition += width + charSpacing * hScale;
|
|
28636
|
-
if (char.isSpace) {
|
|
28637
|
-
textPosition += wordSpacing * hScale;
|
|
28638
|
-
}
|
|
28639
|
-
}
|
|
28656
|
+
processChars(text, currentTextItem);
|
|
28640
28657
|
}
|
|
28641
28658
|
break;
|
|
28642
28659
|
}
|
package/package.json
CHANGED