@polotno/pdf-import 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +39 -0
- package/lib/color-utils.d.ts +2 -0
- package/lib/color-utils.js +10 -0
- package/lib/constants.d.ts +13 -0
- package/lib/constants.js +111 -0
- package/lib/font-mapper.d.ts +7 -0
- package/lib/font-mapper.js +111 -0
- package/lib/font-matcher.d.ts +10 -0
- package/lib/font-matcher.js +89 -0
- package/lib/font-merger.d.ts +7 -0
- package/lib/font-merger.js +114 -0
- package/lib/font-registry.d.ts +15 -0
- package/lib/font-registry.js +110 -0
- package/lib/image-encoder.d.ts +3 -0
- package/lib/image-encoder.js +181 -0
- package/lib/index.d.ts +97 -0
- package/lib/index.js +1 -0
- package/lib/operator-list-helpers.d.ts +6 -0
- package/lib/operator-list-helpers.js +26 -0
- package/lib/operator-list.d.ts +99 -0
- package/lib/operator-list.js +528 -0
- package/lib/page-parser.d.ts +18 -0
- package/lib/page-parser.js +674 -0
- package/lib/pdf-image-extractor.d.ts +14 -0
- package/lib/pdf-image-extractor.js +91 -0
- package/lib/svg-builder.d.ts +23 -0
- package/lib/svg-builder.js +213 -0
- package/lib/text-blocks.d.ts +6 -0
- package/lib/text-blocks.js +294 -0
- package/lib/text-grouper.d.ts +11 -0
- package/lib/text-grouper.js +11 -0
- package/lib/text-layout.d.ts +3 -0
- package/lib/text-layout.js +318 -0
- package/lib/text-span-extractor.d.ts +5 -0
- package/lib/text-span-extractor.js +271 -0
- package/lib/text-types.d.ts +25 -0
- package/lib/text-types.js +2 -0
- package/package.json +46 -0
package/README.md
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# Polotno PDF Import
|
|
2
|
+
|
|
3
|
+
Convert PDF files into [Polotno](https://polotno.com/) JSON format.
|
|
4
|
+
|
|
5
|
+
For full documentation and demo, see [PDF Import Guide](https://polotno.com/docs/pdf-import).
|
|
6
|
+
|
|
7
|
+
## Installation
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
npm install @polotno/pdf-import
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
## Usage
|
|
14
|
+
|
|
15
|
+
### Node.js
|
|
16
|
+
|
|
17
|
+
```ts
|
|
18
|
+
import fs from 'fs';
|
|
19
|
+
import { pdfToJson } from '@polotno/pdf-import';
|
|
20
|
+
|
|
21
|
+
const pdfBuffer = fs.readFileSync('document.pdf');
|
|
22
|
+
const json = await pdfToJson({ pdf: pdfBuffer });
|
|
23
|
+
console.log(json);
|
|
24
|
+
// { width: 612, height: 792, pages: [{ id: '...', children: [] }] }
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
### Browser
|
|
28
|
+
|
|
29
|
+
```ts
|
|
30
|
+
import { pdfToJson } from '@polotno/pdf-import';
|
|
31
|
+
|
|
32
|
+
const input = document.querySelector('input[type="file"]');
|
|
33
|
+
input.addEventListener('change', async (e) => {
|
|
34
|
+
const file = e.target.files[0];
|
|
35
|
+
const pdfBuffer = await file.arrayBuffer();
|
|
36
|
+
const json = await pdfToJson({ pdf: pdfBuffer });
|
|
37
|
+
console.log(json);
|
|
38
|
+
});
|
|
39
|
+
```
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
export function rgbTupleToHex(r, g, b) {
|
|
2
|
+
const ri = Math.max(0, Math.min(255, Math.round(r * 255)));
|
|
3
|
+
const gi = Math.max(0, Math.min(255, Math.round(g * 255)));
|
|
4
|
+
const bi = Math.max(0, Math.min(255, Math.round(b * 255)));
|
|
5
|
+
return ('#' +
|
|
6
|
+
ri.toString(16).toUpperCase().padStart(2, '0') +
|
|
7
|
+
gi.toString(16).toUpperCase().padStart(2, '0') +
|
|
8
|
+
bi.toString(16).toUpperCase().padStart(2, '0'));
|
|
9
|
+
}
|
|
10
|
+
//# sourceMappingURL=color-utils.js.map
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
export declare const PDF_TO_WEB_FONT_MAP: Record<string, string>;
|
|
2
|
+
export declare const DEFAULT_FONT_FAMILY = "Roboto";
|
|
3
|
+
export declare const DEFAULT_FONT_SIZE = 14;
|
|
4
|
+
export declare const DEFAULT_FILL = "#000000";
|
|
5
|
+
export declare const DEFAULT_BACKGROUND = "#FFFFFF";
|
|
6
|
+
export declare const DEFAULT_LINE_HEIGHT = 1.2;
|
|
7
|
+
export declare const DEFAULT_LETTER_SPACING = 0;
|
|
8
|
+
export declare const MIN_TEXT_WIDTH = 2;
|
|
9
|
+
export declare const MIN_TEXT_HEIGHT = 2;
|
|
10
|
+
export declare const MIN_IMAGE_WIDTH = 5;
|
|
11
|
+
export declare const MIN_IMAGE_HEIGHT = 5;
|
|
12
|
+
export declare const MIN_FONT_SIZE = 4;
|
|
13
|
+
//# sourceMappingURL=constants.d.ts.map
|
package/lib/constants.js
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
export const PDF_TO_WEB_FONT_MAP = {
|
|
2
|
+
// Arial family
|
|
3
|
+
arial: 'Arial',
|
|
4
|
+
arialmt: 'Arial',
|
|
5
|
+
'arial-boldmt': 'Arial',
|
|
6
|
+
'arial-italicmt': 'Arial',
|
|
7
|
+
'arial-bolditalicmt': 'Arial',
|
|
8
|
+
arialmtbold: 'Arial',
|
|
9
|
+
// Helvetica family
|
|
10
|
+
helvetica: 'Helvetica',
|
|
11
|
+
'helvetica-bold': 'Helvetica',
|
|
12
|
+
'helvetica-oblique': 'Helvetica',
|
|
13
|
+
'helvetica-boldoblique': 'Helvetica',
|
|
14
|
+
helveticaneue: 'Helvetica Neue',
|
|
15
|
+
'helveticaneue-bold': 'Helvetica Neue',
|
|
16
|
+
'helveticaneue-light': 'Helvetica Neue',
|
|
17
|
+
// Times family
|
|
18
|
+
times: 'Times New Roman',
|
|
19
|
+
timesnewroman: 'Times New Roman',
|
|
20
|
+
timesnewromanpsmt: 'Times New Roman',
|
|
21
|
+
'timesnewromanps-boldmt': 'Times New Roman',
|
|
22
|
+
'timesnewromanps-italicmt': 'Times New Roman',
|
|
23
|
+
'times-roman': 'Times New Roman',
|
|
24
|
+
'times-bold': 'Times New Roman',
|
|
25
|
+
'times-italic': 'Times New Roman',
|
|
26
|
+
// Courier family
|
|
27
|
+
courier: 'Courier New',
|
|
28
|
+
couriernew: 'Courier New',
|
|
29
|
+
couriernewpsmt: 'Courier New',
|
|
30
|
+
'courier-bold': 'Courier New',
|
|
31
|
+
// Georgia
|
|
32
|
+
georgia: 'Georgia',
|
|
33
|
+
'georgia-bold': 'Georgia',
|
|
34
|
+
// Verdana
|
|
35
|
+
verdana: 'Verdana',
|
|
36
|
+
'verdana-bold': 'Verdana',
|
|
37
|
+
// Calibri family
|
|
38
|
+
calibri: 'Calibri',
|
|
39
|
+
'calibri-bold': 'Calibri',
|
|
40
|
+
'calibri-italic': 'Calibri',
|
|
41
|
+
'calibri-light': 'Calibri',
|
|
42
|
+
// Cambria
|
|
43
|
+
cambria: 'Cambria',
|
|
44
|
+
cambriamath: 'Cambria',
|
|
45
|
+
// Open Sans
|
|
46
|
+
opensans: 'Open Sans',
|
|
47
|
+
'opensans-regular': 'Open Sans',
|
|
48
|
+
'opensans-bold': 'Open Sans',
|
|
49
|
+
'opensans-light': 'Open Sans',
|
|
50
|
+
'opensans-semibold': 'Open Sans',
|
|
51
|
+
// Roboto
|
|
52
|
+
roboto: 'Roboto',
|
|
53
|
+
'roboto-regular': 'Roboto',
|
|
54
|
+
'roboto-bold': 'Roboto',
|
|
55
|
+
'roboto-light': 'Roboto',
|
|
56
|
+
'roboto-medium': 'Roboto',
|
|
57
|
+
// Roboto Condensed
|
|
58
|
+
robotocondensed: 'Roboto Condensed',
|
|
59
|
+
'robotocondensed-regular': 'Roboto Condensed',
|
|
60
|
+
'robotocondensed-bold': 'Roboto Condensed',
|
|
61
|
+
'robotocondensed-light': 'Roboto Condensed',
|
|
62
|
+
'robotocondensed-italic': 'Roboto Condensed',
|
|
63
|
+
// Lato
|
|
64
|
+
lato: 'Lato',
|
|
65
|
+
'lato-regular': 'Lato',
|
|
66
|
+
'lato-bold': 'Lato',
|
|
67
|
+
// Montserrat
|
|
68
|
+
montserrat: 'Montserrat',
|
|
69
|
+
'montserrat-bold': 'Montserrat',
|
|
70
|
+
// PT fonts
|
|
71
|
+
ptserif: 'PT Serif',
|
|
72
|
+
'ptserif-regular': 'PT Serif',
|
|
73
|
+
'ptserif-bold': 'PT Serif',
|
|
74
|
+
'ptserif-italic': 'PT Serif',
|
|
75
|
+
'ptserif-bolditalic': 'PT Serif',
|
|
76
|
+
ptsans: 'PT Sans',
|
|
77
|
+
'ptsans-regular': 'PT Sans',
|
|
78
|
+
'ptsans-bold': 'PT Sans',
|
|
79
|
+
// Noto Sans
|
|
80
|
+
notosans: 'Noto Sans',
|
|
81
|
+
'notosans-regular': 'Noto Sans',
|
|
82
|
+
'notosans-bold': 'Noto Sans',
|
|
83
|
+
'notosans-italic': 'Noto Sans',
|
|
84
|
+
// Poppins
|
|
85
|
+
poppins: 'Poppins',
|
|
86
|
+
'poppins-regular': 'Poppins',
|
|
87
|
+
'poppins-bold': 'Poppins',
|
|
88
|
+
'poppins-italic': 'Poppins',
|
|
89
|
+
'poppins-light': 'Poppins',
|
|
90
|
+
'poppins-medium': 'Poppins',
|
|
91
|
+
'poppins-semibold': 'Poppins',
|
|
92
|
+
// League Spartan
|
|
93
|
+
leaguespartan: 'League Spartan',
|
|
94
|
+
'leaguespartan-bold': 'League Spartan',
|
|
95
|
+
// Symbol / special
|
|
96
|
+
symbol: 'Symbol',
|
|
97
|
+
zapfdingbats: 'ZapfDingbats',
|
|
98
|
+
wingdings: 'Wingdings',
|
|
99
|
+
};
|
|
100
|
+
export const DEFAULT_FONT_FAMILY = 'Roboto';
|
|
101
|
+
export const DEFAULT_FONT_SIZE = 14;
|
|
102
|
+
export const DEFAULT_FILL = '#000000';
|
|
103
|
+
export const DEFAULT_BACKGROUND = '#FFFFFF';
|
|
104
|
+
export const DEFAULT_LINE_HEIGHT = 1.2;
|
|
105
|
+
export const DEFAULT_LETTER_SPACING = 0;
|
|
106
|
+
export const MIN_TEXT_WIDTH = 2;
|
|
107
|
+
export const MIN_TEXT_HEIGHT = 2;
|
|
108
|
+
export const MIN_IMAGE_WIDTH = 5;
|
|
109
|
+
export const MIN_IMAGE_HEIGHT = 5;
|
|
110
|
+
export const MIN_FONT_SIZE = 4.0;
|
|
111
|
+
//# sourceMappingURL=constants.js.map
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
export declare function cleanPdfFontName(rawName: string): string;
|
|
2
|
+
export declare function extractWeightFromName(rawName: string): string;
|
|
3
|
+
export declare function extractStyleFromName(rawName: string): string;
|
|
4
|
+
export declare function mapPdfFont(rawName: string): string;
|
|
5
|
+
/** Check if a PDF font name maps to a known web/Google font in our lookup table. */
|
|
6
|
+
export declare function isKnownWebFont(rawName: string): boolean;
|
|
7
|
+
//# sourceMappingURL=font-mapper.d.ts.map
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
import { PDF_TO_WEB_FONT_MAP } from './constants.js';
|
|
2
|
+
export function cleanPdfFontName(rawName) {
|
|
3
|
+
return rawName
|
|
4
|
+
.replace(/^[A-Z]{6}\+/, '')
|
|
5
|
+
.replace(/_\d+wght$/i, '') // Google Fonts variable font naming: Arimo_700wght → Arimo
|
|
6
|
+
.toLowerCase()
|
|
7
|
+
.trim();
|
|
8
|
+
}
|
|
9
|
+
export function extractWeightFromName(rawName) {
|
|
10
|
+
const lower = rawName.toLowerCase();
|
|
11
|
+
// Google Fonts variable font naming: Arimo_700wght
|
|
12
|
+
const wghtMatch = lower.match(/_(\d+)wght/);
|
|
13
|
+
if (wghtMatch) {
|
|
14
|
+
const w = parseInt(wghtMatch[1], 10);
|
|
15
|
+
if (w >= 600)
|
|
16
|
+
return 'bold';
|
|
17
|
+
if (w <= 300)
|
|
18
|
+
return 'light';
|
|
19
|
+
return 'normal';
|
|
20
|
+
}
|
|
21
|
+
if (['bold', 'heavy', 'black', 'semibold', 'demibold'].some((w) => lower.includes(w))) {
|
|
22
|
+
return 'bold';
|
|
23
|
+
}
|
|
24
|
+
if (['light', 'thin', 'extralight', 'ultralight'].some((w) => lower.includes(w))) {
|
|
25
|
+
return 'light';
|
|
26
|
+
}
|
|
27
|
+
return 'normal';
|
|
28
|
+
}
|
|
29
|
+
export function extractStyleFromName(rawName) {
|
|
30
|
+
const lower = rawName.toLowerCase();
|
|
31
|
+
if (['italic', 'oblique', 'inclined'].some((s) => lower.includes(s))) {
|
|
32
|
+
return 'italic';
|
|
33
|
+
}
|
|
34
|
+
return 'normal';
|
|
35
|
+
}
|
|
36
|
+
function removeSuffix(str, suffix) {
|
|
37
|
+
if (str.endsWith(suffix)) {
|
|
38
|
+
return str.slice(0, -suffix.length);
|
|
39
|
+
}
|
|
40
|
+
return str;
|
|
41
|
+
}
|
|
42
|
+
function lookupKnownWebFont(rawName) {
|
|
43
|
+
const cleaned = cleanPdfFontName(rawName);
|
|
44
|
+
// 1. Exact match
|
|
45
|
+
if (cleaned in PDF_TO_WEB_FONT_MAP) {
|
|
46
|
+
return PDF_TO_WEB_FONT_MAP[cleaned];
|
|
47
|
+
}
|
|
48
|
+
// 2. Try removing common suffixes
|
|
49
|
+
for (const suffix of [
|
|
50
|
+
'-roman',
|
|
51
|
+
'psmt',
|
|
52
|
+
'ps-boldmt',
|
|
53
|
+
'ps-italicmt',
|
|
54
|
+
'ps-bolditalicmt',
|
|
55
|
+
'mt',
|
|
56
|
+
'ps',
|
|
57
|
+
]) {
|
|
58
|
+
const base = removeSuffix(cleaned, suffix);
|
|
59
|
+
if (base !== cleaned && base in PDF_TO_WEB_FONT_MAP) {
|
|
60
|
+
return PDF_TO_WEB_FONT_MAP[base];
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
// 3. Try removing weight/style suffixes
|
|
64
|
+
for (const suffix of [
|
|
65
|
+
'-bolditalic',
|
|
66
|
+
'-boldoblique',
|
|
67
|
+
'-bold',
|
|
68
|
+
'-italic',
|
|
69
|
+
'-oblique',
|
|
70
|
+
'-light',
|
|
71
|
+
'-regular',
|
|
72
|
+
'-medium',
|
|
73
|
+
'-semibold',
|
|
74
|
+
'-thin',
|
|
75
|
+
'-heavy',
|
|
76
|
+
]) {
|
|
77
|
+
const base = removeSuffix(cleaned, suffix);
|
|
78
|
+
if (base !== cleaned && base in PDF_TO_WEB_FONT_MAP) {
|
|
79
|
+
return PDF_TO_WEB_FONT_MAP[base];
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
// 4. Check if any known family name is a substring (longest key first)
|
|
83
|
+
const sortedEntries = Object.entries(PDF_TO_WEB_FONT_MAP).sort((a, b) => b[0].length - a[0].length);
|
|
84
|
+
for (const [pdfKey, webFont] of sortedEntries) {
|
|
85
|
+
if (cleaned.includes(pdfKey) || pdfKey.includes(cleaned)) {
|
|
86
|
+
return webFont;
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
return null;
|
|
90
|
+
}
|
|
91
|
+
export function mapPdfFont(rawName) {
|
|
92
|
+
const matched = lookupKnownWebFont(rawName);
|
|
93
|
+
if (matched) {
|
|
94
|
+
return matched;
|
|
95
|
+
}
|
|
96
|
+
// 5. Return cleaned name as-is (might be valid custom font)
|
|
97
|
+
let displayName = rawName.replace(/^[A-Z]{6}\+/, '');
|
|
98
|
+
displayName = displayName
|
|
99
|
+
.replace(/_\d+wght$/i, '') // Google Fonts variable font naming
|
|
100
|
+
.replace(/[-](Bold|Italic|Regular|Light|Medium|Thin|Heavy|BoldItalic|Oblique|BoldOblique)(MT|PSMT|PS)?$/i, '');
|
|
101
|
+
if (displayName && displayName.length > 2) {
|
|
102
|
+
const spaced = displayName.replace(/(?<=[a-z])(?=[A-Z])/g, ' ');
|
|
103
|
+
return spaced;
|
|
104
|
+
}
|
|
105
|
+
return rawName;
|
|
106
|
+
}
|
|
107
|
+
/** Check if a PDF font name maps to a known web/Google font in our lookup table. */
|
|
108
|
+
export function isKnownWebFont(rawName) {
|
|
109
|
+
return lookupKnownWebFont(rawName) !== null;
|
|
110
|
+
}
|
|
111
|
+
//# sourceMappingURL=font-mapper.js.map
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
export interface FontMetrics {
|
|
2
|
+
fontName: string;
|
|
3
|
+
isSerifFont: boolean;
|
|
4
|
+
isMonospace: boolean;
|
|
5
|
+
avgWidth: number;
|
|
6
|
+
ascent: number;
|
|
7
|
+
descent: number;
|
|
8
|
+
}
|
|
9
|
+
export declare function findClosestGoogleFont(metrics: FontMetrics): string;
|
|
10
|
+
//# sourceMappingURL=font-matcher.d.ts.map
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
// Google Font matching for non-embeddable PDF fonts.
|
|
2
|
+
// Uses font metrics (average glyph width, ascent/descent) and name hints
|
|
3
|
+
// to find the closest matching Google Font.
|
|
4
|
+
// Reference metrics for popular Google Fonts.
|
|
5
|
+
// avgWidth = average glyph width in 1/1000 em units (measured from font files).
|
|
6
|
+
// These are approximate but good enough for nearest-neighbor matching.
|
|
7
|
+
const GOOGLE_FONT_TABLE = [
|
|
8
|
+
// Sans-serif — narrow/condensed
|
|
9
|
+
{ name: 'Roboto Condensed', category: 'sans-serif', avgWidth: 460, ascent: 0.928, descent: -0.244 },
|
|
10
|
+
{ name: 'Barlow Condensed', category: 'sans-serif', avgWidth: 420, ascent: 0.950, descent: -0.250 },
|
|
11
|
+
// Sans-serif — normal width
|
|
12
|
+
{ name: 'Roboto', category: 'sans-serif', avgWidth: 538, ascent: 0.928, descent: -0.244 },
|
|
13
|
+
{ name: 'Open Sans', category: 'sans-serif', avgWidth: 570, ascent: 1.069, descent: -0.293 },
|
|
14
|
+
{ name: 'Lato', category: 'sans-serif', avgWidth: 518, ascent: 0.987, descent: -0.213 },
|
|
15
|
+
{ name: 'Inter', category: 'sans-serif', avgWidth: 540, ascent: 0.984, descent: -0.250 },
|
|
16
|
+
{ name: 'DM Sans', category: 'sans-serif', avgWidth: 530, ascent: 1.000, descent: -0.250 },
|
|
17
|
+
{ name: 'Work Sans', category: 'sans-serif', avgWidth: 535, ascent: 0.970, descent: -0.260 },
|
|
18
|
+
{ name: 'Outfit', category: 'sans-serif', avgWidth: 530, ascent: 1.000, descent: -0.260 },
|
|
19
|
+
{ name: 'Noto Sans', category: 'sans-serif', avgWidth: 545, ascent: 1.069, descent: -0.293 },
|
|
20
|
+
{ name: 'PT Sans', category: 'sans-serif', avgWidth: 510, ascent: 0.905, descent: -0.212 },
|
|
21
|
+
{ name: 'Source Sans Pro', category: 'sans-serif', avgWidth: 500, ascent: 0.984, descent: -0.273 },
|
|
22
|
+
// Sans-serif — wide/geometric
|
|
23
|
+
{ name: 'Montserrat', category: 'sans-serif', avgWidth: 640, ascent: 0.968, descent: -0.251 },
|
|
24
|
+
{ name: 'Poppins', category: 'sans-serif', avgWidth: 540, ascent: 1.050, descent: -0.350 },
|
|
25
|
+
{ name: 'Nunito', category: 'sans-serif', avgWidth: 580, ascent: 1.011, descent: -0.353 },
|
|
26
|
+
{ name: 'Raleway', category: 'sans-serif', avgWidth: 600, ascent: 0.930, descent: -0.250 },
|
|
27
|
+
{ name: 'Nunito Sans', category: 'sans-serif', avgWidth: 545, ascent: 1.011, descent: -0.353 },
|
|
28
|
+
{ name: 'Manrope', category: 'sans-serif', avgWidth: 560, ascent: 1.028, descent: -0.272 },
|
|
29
|
+
// Serif
|
|
30
|
+
{ name: 'Merriweather', category: 'serif', avgWidth: 530, ascent: 0.985, descent: -0.300 },
|
|
31
|
+
{ name: 'Lora', category: 'serif', avgWidth: 510, ascent: 0.956, descent: -0.382 },
|
|
32
|
+
{ name: 'PT Serif', category: 'serif', avgWidth: 524, ascent: 1.039, descent: -0.286 },
|
|
33
|
+
{ name: 'Noto Serif', category: 'serif', avgWidth: 530, ascent: 1.069, descent: -0.293 },
|
|
34
|
+
{ name: 'Source Serif Pro', category: 'serif', avgWidth: 505, ascent: 0.918, descent: -0.250 },
|
|
35
|
+
{ name: 'Libre Baskerville', category: 'serif', avgWidth: 540, ascent: 0.983, descent: -0.300 },
|
|
36
|
+
{ name: 'Playfair Display', category: 'serif', avgWidth: 500, ascent: 1.082, descent: -0.251 },
|
|
37
|
+
{ name: 'EB Garamond', category: 'serif', avgWidth: 440, ascent: 0.960, descent: -0.240 },
|
|
38
|
+
{ name: 'Crimson Text', category: 'serif', avgWidth: 460, ascent: 0.927, descent: -0.315 },
|
|
39
|
+
// Monospace
|
|
40
|
+
{ name: 'Roboto Mono', category: 'monospace', avgWidth: 600, ascent: 1.048, descent: -0.271 },
|
|
41
|
+
{ name: 'Source Code Pro', category: 'monospace', avgWidth: 600, ascent: 0.984, descent: -0.273 },
|
|
42
|
+
{ name: 'JetBrains Mono', category: 'monospace', avgWidth: 600, ascent: 1.020, descent: -0.300 },
|
|
43
|
+
{ name: 'Fira Code', category: 'monospace', avgWidth: 600, ascent: 0.935, descent: -0.265 },
|
|
44
|
+
// Script/handwriting
|
|
45
|
+
{ name: 'Dancing Script', category: 'script', avgWidth: 470, ascent: 0.958, descent: -0.400 },
|
|
46
|
+
{ name: 'Great Vibes', category: 'script', avgWidth: 430, ascent: 1.050, descent: -0.550 },
|
|
47
|
+
{ name: 'Pacifico', category: 'script', avgWidth: 550, ascent: 1.050, descent: -0.370 },
|
|
48
|
+
{ name: 'Sacramento', category: 'script', avgWidth: 380, ascent: 0.900, descent: -0.400 },
|
|
49
|
+
{ name: 'Satisfy', category: 'script', avgWidth: 460, ascent: 1.050, descent: -0.350 },
|
|
50
|
+
];
|
|
51
|
+
// Keywords in font names that hint at category
|
|
52
|
+
const SERIF_HINTS = ['serif', 'roman', 'garamond', 'baskerville', 'palatino', 'georgia', 'cambria', 'minion', 'caslon', 'bodoni', 'didot', 'times', 'book'];
|
|
53
|
+
const MONO_HINTS = ['mono', 'courier', 'consolas', 'code', 'terminal', 'fixed'];
|
|
54
|
+
const SCRIPT_HINTS = ['script', 'cursive', 'handwrit', 'callig', 'brush', 'sloop', 'dancing', 'pacifico', 'satisfy', 'lobster', 'sacramento', 'kaushan'];
|
|
55
|
+
function detectCategory(metrics) {
|
|
56
|
+
const lower = metrics.fontName.toLowerCase();
|
|
57
|
+
if (SCRIPT_HINTS.some(h => lower.includes(h)))
|
|
58
|
+
return 'script';
|
|
59
|
+
if (metrics.isMonospace || MONO_HINTS.some(h => lower.includes(h)))
|
|
60
|
+
return 'monospace';
|
|
61
|
+
if (metrics.isSerifFont || SERIF_HINTS.some(h => lower.includes(h)))
|
|
62
|
+
return 'serif';
|
|
63
|
+
return 'sans-serif';
|
|
64
|
+
}
|
|
65
|
+
export function findClosestGoogleFont(metrics) {
|
|
66
|
+
const category = detectCategory(metrics);
|
|
67
|
+
// Filter candidates by category
|
|
68
|
+
let candidates = GOOGLE_FONT_TABLE.filter(f => f.category === category);
|
|
69
|
+
if (candidates.length === 0) {
|
|
70
|
+
// Fallback to all sans-serif if category has no entries
|
|
71
|
+
candidates = GOOGLE_FONT_TABLE.filter(f => f.category === 'sans-serif');
|
|
72
|
+
}
|
|
73
|
+
// Score each candidate: lower is better
|
|
74
|
+
// Weight width similarity most heavily since it's the most visually impactful
|
|
75
|
+
let best = candidates[0];
|
|
76
|
+
let bestScore = Infinity;
|
|
77
|
+
for (const candidate of candidates) {
|
|
78
|
+
const widthDiff = Math.abs(candidate.avgWidth - metrics.avgWidth) / 100;
|
|
79
|
+
const ascentDiff = Math.abs(candidate.ascent - metrics.ascent) * 2;
|
|
80
|
+
const descentDiff = Math.abs(candidate.descent - metrics.descent) * 2;
|
|
81
|
+
const score = widthDiff + ascentDiff + descentDiff;
|
|
82
|
+
if (score < bestScore) {
|
|
83
|
+
bestScore = score;
|
|
84
|
+
best = candidate;
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
return best.name;
|
|
88
|
+
}
|
|
89
|
+
//# sourceMappingURL=font-matcher.js.map
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Merge multiple font subset buffers into a single font.
|
|
3
|
+
* Deduplicates glyphs by unicode code point, keeping the first occurrence.
|
|
4
|
+
* Falls back to the largest blob if merging fails.
|
|
5
|
+
*/
|
|
6
|
+
export declare function mergeSubsetFonts(buffers: Uint8Array[]): Uint8Array;
|
|
7
|
+
//# sourceMappingURL=font-merger.d.ts.map
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
import opentype from 'opentype.js';
|
|
2
|
+
// Unicode Private Use Area range added by pdfjs for internal glyph mapping.
|
|
3
|
+
// These must be stripped before merging to avoid cmap conflicts.
|
|
4
|
+
const PUA_START = 0xe000;
|
|
5
|
+
const PUA_END = 0xf8ff;
|
|
6
|
+
function isPUA(u) {
|
|
7
|
+
return u >= PUA_START && u <= PUA_END;
|
|
8
|
+
}
|
|
9
|
+
/**
|
|
10
|
+
* Merge multiple font subset buffers into a single font.
|
|
11
|
+
* Deduplicates glyphs by unicode code point, keeping the first occurrence.
|
|
12
|
+
* Falls back to the largest blob if merging fails.
|
|
13
|
+
*/
|
|
14
|
+
export function mergeSubsetFonts(buffers) {
|
|
15
|
+
if (buffers.length === 1)
|
|
16
|
+
return buffers[0];
|
|
17
|
+
try {
|
|
18
|
+
// Start with the largest subset as the base
|
|
19
|
+
const sorted = [...buffers].sort((a, b) => b.length - a.length);
|
|
20
|
+
const base = opentype.parse(toArrayBuffer(sorted[0]));
|
|
21
|
+
// Strip PUA unicodes from base font glyphs
|
|
22
|
+
stripPUA(base);
|
|
23
|
+
// Collect existing unicodes from base font
|
|
24
|
+
const existingUnicodes = new Set();
|
|
25
|
+
for (let i = 0; i < base.glyphs.length; i++) {
|
|
26
|
+
const g = base.glyphs.get(i);
|
|
27
|
+
if (g.unicodes)
|
|
28
|
+
g.unicodes.forEach((u) => existingUnicodes.add(u));
|
|
29
|
+
}
|
|
30
|
+
// Merge glyphs from remaining subsets
|
|
31
|
+
for (let si = 1; si < sorted.length; si++) {
|
|
32
|
+
const other = opentype.parse(toArrayBuffer(sorted[si]));
|
|
33
|
+
stripPUA(other);
|
|
34
|
+
for (let i = 0; i < other.glyphs.length; i++) {
|
|
35
|
+
const g = other.glyphs.get(i);
|
|
36
|
+
const unis = g.unicodes || [];
|
|
37
|
+
if (unis.length === 0)
|
|
38
|
+
continue;
|
|
39
|
+
if (unis.every((u) => existingUnicodes.has(u)))
|
|
40
|
+
continue;
|
|
41
|
+
base.glyphs.push(base.glyphs.length, g);
|
|
42
|
+
unis.forEach((u) => existingUnicodes.add(u));
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
// Assign names to unnamed glyphs to avoid opentype.js warnings
|
|
46
|
+
assignMissingGlyphNames(base);
|
|
47
|
+
// Assign a non-zero createdTimestamp so opentype.js doesn't generate a fresh
|
|
48
|
+
// one on each serialization (it checks `if (options.createdTimestamp)`).
|
|
49
|
+
base.createdTimestamp = 1;
|
|
50
|
+
const merged = base.toArrayBuffer();
|
|
51
|
+
// opentype.js writes non-deterministic fields (modified timestamp, checksums)
|
|
52
|
+
// into the serialized font. Zero them out for stable output.
|
|
53
|
+
stabilizeFontBytes(new Uint8Array(merged));
|
|
54
|
+
return new Uint8Array(merged);
|
|
55
|
+
}
|
|
56
|
+
catch {
|
|
57
|
+
// Fall back to largest blob if parsing/merging fails
|
|
58
|
+
return buffers.reduce((a, b) => (a.length >= b.length ? a : b));
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
function stripPUA(font) {
|
|
62
|
+
for (let i = 0; i < font.glyphs.length; i++) {
|
|
63
|
+
const g = font.glyphs.get(i);
|
|
64
|
+
if (g.unicodes) {
|
|
65
|
+
g.unicodes = g.unicodes.filter((u) => !isPUA(u));
|
|
66
|
+
}
|
|
67
|
+
if (g.unicode && isPUA(g.unicode)) {
|
|
68
|
+
g.unicode = g.unicodes.length > 0 ? g.unicodes[0] : 0;
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
function assignMissingGlyphNames(font) {
|
|
73
|
+
for (let i = 0; i < font.glyphs.length; i++) {
|
|
74
|
+
const g = font.glyphs.get(i);
|
|
75
|
+
if (!g.name) {
|
|
76
|
+
g.name = g.unicode ? `uni${g.unicode.toString(16).toUpperCase().padStart(4, '0')}` : `glyph${i}`;
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
/**
|
|
81
|
+
* Zero out non-deterministic fields in the serialized font so output is stable
|
|
82
|
+
* across runs. opentype.js writes a current-time "modified" timestamp and
|
|
83
|
+
* computes checksums that vary slightly between processes.
|
|
84
|
+
*
|
|
85
|
+
* Fields zeroed in the head table (found via the table directory):
|
|
86
|
+
* - checkSumAdjustment (offset 8, 4 bytes) — whole-file checksum
|
|
87
|
+
* - modified (offset 28, 8 bytes) — current-time timestamp
|
|
88
|
+
* Also zeroes the head entry's checksum in the table directory (offset 4, 4 bytes).
|
|
89
|
+
*/
|
|
90
|
+
function stabilizeFontBytes(buf) {
|
|
91
|
+
const view = new DataView(buf.buffer, buf.byteOffset, buf.byteLength);
|
|
92
|
+
const numTables = view.getUint16(4);
|
|
93
|
+
for (let i = 0; i < numTables; i++) {
|
|
94
|
+
const dirOffset = 12 + i * 16;
|
|
95
|
+
const tag = String.fromCharCode(buf[dirOffset], buf[dirOffset + 1], buf[dirOffset + 2], buf[dirOffset + 3]);
|
|
96
|
+
if (tag === 'head') {
|
|
97
|
+
const tableOffset = view.getUint32(dirOffset + 8);
|
|
98
|
+
// Zero head directory checksum (4 bytes at dirOffset+4)
|
|
99
|
+
for (let b = 0; b < 4; b++)
|
|
100
|
+
buf[dirOffset + 4 + b] = 0;
|
|
101
|
+
// Zero checkSumAdjustment (4 bytes at head+8)
|
|
102
|
+
for (let b = 0; b < 4; b++)
|
|
103
|
+
buf[tableOffset + 8 + b] = 0;
|
|
104
|
+
// Zero modified timestamp (8 bytes at head+28)
|
|
105
|
+
for (let b = 0; b < 8; b++)
|
|
106
|
+
buf[tableOffset + 28 + b] = 0;
|
|
107
|
+
return;
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
function toArrayBuffer(data) {
|
|
112
|
+
return data.buffer.slice(data.byteOffset, data.byteOffset + data.byteLength);
|
|
113
|
+
}
|
|
114
|
+
//# sourceMappingURL=font-merger.js.map
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import opentype from 'opentype.js';
|
|
2
|
+
import type { PolotnoFont, PolotnoPage } from './index.js';
|
|
3
|
+
export declare class FontRegistry {
|
|
4
|
+
private fontDataMap;
|
|
5
|
+
private fontMetricsMap;
|
|
6
|
+
private otCache;
|
|
7
|
+
/**
|
|
8
|
+
* Parse font data with opentype.js, returning cached result if available.
|
|
9
|
+
* Key is the pdfjs loaded font name (e.g. "g_d0_f1").
|
|
10
|
+
*/
|
|
11
|
+
parseOpentype(loadedName: string, data: Uint8Array): opentype.Font | null;
|
|
12
|
+
recordFont(fontObj: any): void;
|
|
13
|
+
finalize(fontStrategy: 'embed' | 'googleFontsMatch', pages: PolotnoPage[]): PolotnoFont[];
|
|
14
|
+
}
|
|
15
|
+
//# sourceMappingURL=font-registry.d.ts.map
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
import opentype from 'opentype.js';
|
|
2
|
+
import { mapPdfFont, isKnownWebFont } from './font-mapper.js';
|
|
3
|
+
import { findClosestGoogleFont } from './font-matcher.js';
|
|
4
|
+
import { mergeSubsetFonts } from './font-merger.js';
|
|
5
|
+
export class FontRegistry {
|
|
6
|
+
constructor() {
|
|
7
|
+
this.fontDataMap = new Map();
|
|
8
|
+
this.fontMetricsMap = new Map();
|
|
9
|
+
// Cache opentype.js parsed fonts across pages to avoid re-parsing
|
|
10
|
+
this.otCache = new Map();
|
|
11
|
+
}
|
|
12
|
+
/**
|
|
13
|
+
* Parse font data with opentype.js, returning cached result if available.
|
|
14
|
+
* Key is the pdfjs loaded font name (e.g. "g_d0_f1").
|
|
15
|
+
*/
|
|
16
|
+
parseOpentype(loadedName, data) {
|
|
17
|
+
if (this.otCache.has(loadedName)) {
|
|
18
|
+
return this.otCache.get(loadedName);
|
|
19
|
+
}
|
|
20
|
+
try {
|
|
21
|
+
const buf = new Uint8Array(data).buffer;
|
|
22
|
+
const otFont = opentype.parse(buf);
|
|
23
|
+
this.otCache.set(loadedName, otFont);
|
|
24
|
+
return otFont;
|
|
25
|
+
}
|
|
26
|
+
catch {
|
|
27
|
+
this.otCache.set(loadedName, null);
|
|
28
|
+
return null;
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
recordFont(fontObj) {
|
|
32
|
+
if (!fontObj?.name)
|
|
33
|
+
return;
|
|
34
|
+
const mappedFamily = mapPdfFont(fontObj.name);
|
|
35
|
+
const isUnknown = !isKnownWebFont(fontObj.name);
|
|
36
|
+
// Collect font binary data for non-Google/non-standard fonts
|
|
37
|
+
if (isUnknown && fontObj.data && fontObj.data.length > 0) {
|
|
38
|
+
const mime = fontObj.mimetype || 'font/opentype';
|
|
39
|
+
const arr = this.fontDataMap.get(mappedFamily) || [];
|
|
40
|
+
arr.push({ mime, data: new Uint8Array(fontObj.data) });
|
|
41
|
+
this.fontDataMap.set(mappedFamily, arr);
|
|
42
|
+
}
|
|
43
|
+
// Collect font metrics for unknown fonts (for Google Font matching)
|
|
44
|
+
if (isUnknown && !this.fontMetricsMap.has(mappedFamily)) {
|
|
45
|
+
const widths = (fontObj.widths || []).filter((w) => w != null && w > 0);
|
|
46
|
+
const avgWidth = widths.length > 0
|
|
47
|
+
? widths.reduce((a, b) => a + b, 0) / widths.length
|
|
48
|
+
: 500;
|
|
49
|
+
this.fontMetricsMap.set(mappedFamily, {
|
|
50
|
+
fontName: fontObj.name.replace(/^[A-Z]{6}\+/, ''),
|
|
51
|
+
isSerifFont: fontObj.isSerifFont || false,
|
|
52
|
+
isMonospace: fontObj.isMonospace || false,
|
|
53
|
+
avgWidth: Math.round(avgWidth),
|
|
54
|
+
ascent: fontObj.ascent || 0.9,
|
|
55
|
+
descent: fontObj.descent || -0.25,
|
|
56
|
+
});
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
finalize(fontStrategy, pages) {
|
|
60
|
+
const fonts = [];
|
|
61
|
+
if (fontStrategy === 'googleFontsMatch') {
|
|
62
|
+
// Replace all non-Google font families with closest Google Font matches
|
|
63
|
+
const fontReplacementMap = new Map();
|
|
64
|
+
for (const [mappedFamily, metrics] of this.fontMetricsMap) {
|
|
65
|
+
const googleFont = findClosestGoogleFont(metrics);
|
|
66
|
+
fontReplacementMap.set(mappedFamily, googleFont);
|
|
67
|
+
}
|
|
68
|
+
// Apply replacements to all text elements across all pages
|
|
69
|
+
for (const page of pages) {
|
|
70
|
+
for (const child of page.children) {
|
|
71
|
+
if (child.type === 'text') {
|
|
72
|
+
const replacement = fontReplacementMap.get(child.fontFamily);
|
|
73
|
+
if (replacement) {
|
|
74
|
+
child.fontFamily = replacement;
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
return fonts;
|
|
80
|
+
}
|
|
81
|
+
// 'embed' strategy: embed font data as base64 data URIs.
|
|
82
|
+
// When multiple subsets exist, merge them into a single font.
|
|
83
|
+
for (const [fontFamily, blobs] of this.fontDataMap) {
|
|
84
|
+
let fontData;
|
|
85
|
+
let mime;
|
|
86
|
+
if (blobs.length === 1) {
|
|
87
|
+
fontData = blobs[0].data;
|
|
88
|
+
mime = blobs[0].mime;
|
|
89
|
+
}
|
|
90
|
+
else {
|
|
91
|
+
fontData = mergeSubsetFonts(blobs.map((b) => b.data));
|
|
92
|
+
mime = blobs[0].mime;
|
|
93
|
+
}
|
|
94
|
+
let b64;
|
|
95
|
+
if (typeof Buffer !== 'undefined') {
|
|
96
|
+
b64 = Buffer.from(fontData).toString('base64');
|
|
97
|
+
}
|
|
98
|
+
else {
|
|
99
|
+
let binary = '';
|
|
100
|
+
for (let bi = 0; bi < fontData.length; bi++) {
|
|
101
|
+
binary += String.fromCharCode(fontData[bi]);
|
|
102
|
+
}
|
|
103
|
+
b64 = btoa(binary);
|
|
104
|
+
}
|
|
105
|
+
fonts.push({ fontFamily, url: `data:${mime};base64,${b64}` });
|
|
106
|
+
}
|
|
107
|
+
return fonts;
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
//# sourceMappingURL=font-registry.js.map
|