@polotno/pdf-import 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,39 @@
1
+ # Polotno PDF Import
2
+
3
+ Convert PDF files into [Polotno](https://polotno.com/) JSON format.
4
+
5
+ For full documentation and demo, see [PDF Import Guide](https://polotno.com/docs/pdf-import).
6
+
7
+ ## Installation
8
+
9
+ ```bash
10
+ npm install @polotno/pdf-import
11
+ ```
12
+
13
+ ## Usage
14
+
15
+ ### Node.js
16
+
17
+ ```ts
18
+ import fs from 'fs';
19
+ import { pdfToJson } from '@polotno/pdf-import';
20
+
21
+ const pdfBuffer = fs.readFileSync('document.pdf');
22
+ const json = await pdfToJson({ pdf: pdfBuffer });
23
+ console.log(json);
24
+ // { width: 612, height: 792, pages: [{ id: '...', children: [] }] }
25
+ ```
26
+
27
+ ### Browser
28
+
29
+ ```ts
30
+ import { pdfToJson } from '@polotno/pdf-import';
31
+
32
+ const input = document.querySelector('input[type="file"]');
33
+ input.addEventListener('change', async (e) => {
34
+ const file = e.target.files[0];
35
+ const pdfBuffer = await file.arrayBuffer();
36
+ const json = await pdfToJson({ pdf: pdfBuffer });
37
+ console.log(json);
38
+ });
39
+ ```
@@ -0,0 +1,2 @@
1
+ export declare function rgbTupleToHex(r: number, g: number, b: number): string;
2
+ //# sourceMappingURL=color-utils.d.ts.map
@@ -0,0 +1,10 @@
1
+ export function rgbTupleToHex(r, g, b) {
2
+ const ri = Math.max(0, Math.min(255, Math.round(r * 255)));
3
+ const gi = Math.max(0, Math.min(255, Math.round(g * 255)));
4
+ const bi = Math.max(0, Math.min(255, Math.round(b * 255)));
5
+ return ('#' +
6
+ ri.toString(16).toUpperCase().padStart(2, '0') +
7
+ gi.toString(16).toUpperCase().padStart(2, '0') +
8
+ bi.toString(16).toUpperCase().padStart(2, '0'));
9
+ }
10
+ //# sourceMappingURL=color-utils.js.map
@@ -0,0 +1,13 @@
1
+ export declare const PDF_TO_WEB_FONT_MAP: Record<string, string>;
2
+ export declare const DEFAULT_FONT_FAMILY = "Roboto";
3
+ export declare const DEFAULT_FONT_SIZE = 14;
4
+ export declare const DEFAULT_FILL = "#000000";
5
+ export declare const DEFAULT_BACKGROUND = "#FFFFFF";
6
+ export declare const DEFAULT_LINE_HEIGHT = 1.2;
7
+ export declare const DEFAULT_LETTER_SPACING = 0;
8
+ export declare const MIN_TEXT_WIDTH = 2;
9
+ export declare const MIN_TEXT_HEIGHT = 2;
10
+ export declare const MIN_IMAGE_WIDTH = 5;
11
+ export declare const MIN_IMAGE_HEIGHT = 5;
12
+ export declare const MIN_FONT_SIZE = 4;
13
+ //# sourceMappingURL=constants.d.ts.map
@@ -0,0 +1,111 @@
1
+ export const PDF_TO_WEB_FONT_MAP = {
2
+ // Arial family
3
+ arial: 'Arial',
4
+ arialmt: 'Arial',
5
+ 'arial-boldmt': 'Arial',
6
+ 'arial-italicmt': 'Arial',
7
+ 'arial-bolditalicmt': 'Arial',
8
+ arialmtbold: 'Arial',
9
+ // Helvetica family
10
+ helvetica: 'Helvetica',
11
+ 'helvetica-bold': 'Helvetica',
12
+ 'helvetica-oblique': 'Helvetica',
13
+ 'helvetica-boldoblique': 'Helvetica',
14
+ helveticaneue: 'Helvetica Neue',
15
+ 'helveticaneue-bold': 'Helvetica Neue',
16
+ 'helveticaneue-light': 'Helvetica Neue',
17
+ // Times family
18
+ times: 'Times New Roman',
19
+ timesnewroman: 'Times New Roman',
20
+ timesnewromanpsmt: 'Times New Roman',
21
+ 'timesnewromanps-boldmt': 'Times New Roman',
22
+ 'timesnewromanps-italicmt': 'Times New Roman',
23
+ 'times-roman': 'Times New Roman',
24
+ 'times-bold': 'Times New Roman',
25
+ 'times-italic': 'Times New Roman',
26
+ // Courier family
27
+ courier: 'Courier New',
28
+ couriernew: 'Courier New',
29
+ couriernewpsmt: 'Courier New',
30
+ 'courier-bold': 'Courier New',
31
+ // Georgia
32
+ georgia: 'Georgia',
33
+ 'georgia-bold': 'Georgia',
34
+ // Verdana
35
+ verdana: 'Verdana',
36
+ 'verdana-bold': 'Verdana',
37
+ // Calibri family
38
+ calibri: 'Calibri',
39
+ 'calibri-bold': 'Calibri',
40
+ 'calibri-italic': 'Calibri',
41
+ 'calibri-light': 'Calibri',
42
+ // Cambria
43
+ cambria: 'Cambria',
44
+ cambriamath: 'Cambria',
45
+ // Open Sans
46
+ opensans: 'Open Sans',
47
+ 'opensans-regular': 'Open Sans',
48
+ 'opensans-bold': 'Open Sans',
49
+ 'opensans-light': 'Open Sans',
50
+ 'opensans-semibold': 'Open Sans',
51
+ // Roboto
52
+ roboto: 'Roboto',
53
+ 'roboto-regular': 'Roboto',
54
+ 'roboto-bold': 'Roboto',
55
+ 'roboto-light': 'Roboto',
56
+ 'roboto-medium': 'Roboto',
57
+ // Roboto Condensed
58
+ robotocondensed: 'Roboto Condensed',
59
+ 'robotocondensed-regular': 'Roboto Condensed',
60
+ 'robotocondensed-bold': 'Roboto Condensed',
61
+ 'robotocondensed-light': 'Roboto Condensed',
62
+ 'robotocondensed-italic': 'Roboto Condensed',
63
+ // Lato
64
+ lato: 'Lato',
65
+ 'lato-regular': 'Lato',
66
+ 'lato-bold': 'Lato',
67
+ // Montserrat
68
+ montserrat: 'Montserrat',
69
+ 'montserrat-bold': 'Montserrat',
70
+ // PT fonts
71
+ ptserif: 'PT Serif',
72
+ 'ptserif-regular': 'PT Serif',
73
+ 'ptserif-bold': 'PT Serif',
74
+ 'ptserif-italic': 'PT Serif',
75
+ 'ptserif-bolditalic': 'PT Serif',
76
+ ptsans: 'PT Sans',
77
+ 'ptsans-regular': 'PT Sans',
78
+ 'ptsans-bold': 'PT Sans',
79
+ // Noto Sans
80
+ notosans: 'Noto Sans',
81
+ 'notosans-regular': 'Noto Sans',
82
+ 'notosans-bold': 'Noto Sans',
83
+ 'notosans-italic': 'Noto Sans',
84
+ // Poppins
85
+ poppins: 'Poppins',
86
+ 'poppins-regular': 'Poppins',
87
+ 'poppins-bold': 'Poppins',
88
+ 'poppins-italic': 'Poppins',
89
+ 'poppins-light': 'Poppins',
90
+ 'poppins-medium': 'Poppins',
91
+ 'poppins-semibold': 'Poppins',
92
+ // League Spartan
93
+ leaguespartan: 'League Spartan',
94
+ 'leaguespartan-bold': 'League Spartan',
95
+ // Symbol / special
96
+ symbol: 'Symbol',
97
+ zapfdingbats: 'ZapfDingbats',
98
+ wingdings: 'Wingdings',
99
+ };
100
+ export const DEFAULT_FONT_FAMILY = 'Roboto';
101
+ export const DEFAULT_FONT_SIZE = 14;
102
+ export const DEFAULT_FILL = '#000000';
103
+ export const DEFAULT_BACKGROUND = '#FFFFFF';
104
+ export const DEFAULT_LINE_HEIGHT = 1.2;
105
+ export const DEFAULT_LETTER_SPACING = 0;
106
+ export const MIN_TEXT_WIDTH = 2;
107
+ export const MIN_TEXT_HEIGHT = 2;
108
+ export const MIN_IMAGE_WIDTH = 5;
109
+ export const MIN_IMAGE_HEIGHT = 5;
110
+ export const MIN_FONT_SIZE = 4.0;
111
+ //# sourceMappingURL=constants.js.map
@@ -0,0 +1,7 @@
1
+ export declare function cleanPdfFontName(rawName: string): string;
2
+ export declare function extractWeightFromName(rawName: string): string;
3
+ export declare function extractStyleFromName(rawName: string): string;
4
+ export declare function mapPdfFont(rawName: string): string;
5
+ /** Check if a PDF font name maps to a known web/Google font in our lookup table. */
6
+ export declare function isKnownWebFont(rawName: string): boolean;
7
+ //# sourceMappingURL=font-mapper.d.ts.map
@@ -0,0 +1,111 @@
1
+ import { PDF_TO_WEB_FONT_MAP } from './constants.js';
2
+ export function cleanPdfFontName(rawName) {
3
+ return rawName
4
+ .replace(/^[A-Z]{6}\+/, '')
5
+ .replace(/_\d+wght$/i, '') // Google Fonts variable font naming: Arimo_700wght → Arimo
6
+ .toLowerCase()
7
+ .trim();
8
+ }
9
+ export function extractWeightFromName(rawName) {
10
+ const lower = rawName.toLowerCase();
11
+ // Google Fonts variable font naming: Arimo_700wght
12
+ const wghtMatch = lower.match(/_(\d+)wght/);
13
+ if (wghtMatch) {
14
+ const w = parseInt(wghtMatch[1], 10);
15
+ if (w >= 600)
16
+ return 'bold';
17
+ if (w <= 300)
18
+ return 'light';
19
+ return 'normal';
20
+ }
21
+ if (['bold', 'heavy', 'black', 'semibold', 'demibold'].some((w) => lower.includes(w))) {
22
+ return 'bold';
23
+ }
24
+ if (['light', 'thin', 'extralight', 'ultralight'].some((w) => lower.includes(w))) {
25
+ return 'light';
26
+ }
27
+ return 'normal';
28
+ }
29
+ export function extractStyleFromName(rawName) {
30
+ const lower = rawName.toLowerCase();
31
+ if (['italic', 'oblique', 'inclined'].some((s) => lower.includes(s))) {
32
+ return 'italic';
33
+ }
34
+ return 'normal';
35
+ }
36
+ function removeSuffix(str, suffix) {
37
+ if (str.endsWith(suffix)) {
38
+ return str.slice(0, -suffix.length);
39
+ }
40
+ return str;
41
+ }
42
+ function lookupKnownWebFont(rawName) {
43
+ const cleaned = cleanPdfFontName(rawName);
44
+ // 1. Exact match
45
+ if (cleaned in PDF_TO_WEB_FONT_MAP) {
46
+ return PDF_TO_WEB_FONT_MAP[cleaned];
47
+ }
48
+ // 2. Try removing common suffixes
49
+ for (const suffix of [
50
+ '-roman',
51
+ 'psmt',
52
+ 'ps-boldmt',
53
+ 'ps-italicmt',
54
+ 'ps-bolditalicmt',
55
+ 'mt',
56
+ 'ps',
57
+ ]) {
58
+ const base = removeSuffix(cleaned, suffix);
59
+ if (base !== cleaned && base in PDF_TO_WEB_FONT_MAP) {
60
+ return PDF_TO_WEB_FONT_MAP[base];
61
+ }
62
+ }
63
+ // 3. Try removing weight/style suffixes
64
+ for (const suffix of [
65
+ '-bolditalic',
66
+ '-boldoblique',
67
+ '-bold',
68
+ '-italic',
69
+ '-oblique',
70
+ '-light',
71
+ '-regular',
72
+ '-medium',
73
+ '-semibold',
74
+ '-thin',
75
+ '-heavy',
76
+ ]) {
77
+ const base = removeSuffix(cleaned, suffix);
78
+ if (base !== cleaned && base in PDF_TO_WEB_FONT_MAP) {
79
+ return PDF_TO_WEB_FONT_MAP[base];
80
+ }
81
+ }
82
+ // 4. Check if any known family name is a substring (longest key first)
83
+ const sortedEntries = Object.entries(PDF_TO_WEB_FONT_MAP).sort((a, b) => b[0].length - a[0].length);
84
+ for (const [pdfKey, webFont] of sortedEntries) {
85
+ if (cleaned.includes(pdfKey) || pdfKey.includes(cleaned)) {
86
+ return webFont;
87
+ }
88
+ }
89
+ return null;
90
+ }
91
+ export function mapPdfFont(rawName) {
92
+ const matched = lookupKnownWebFont(rawName);
93
+ if (matched) {
94
+ return matched;
95
+ }
96
+ // 5. Return cleaned name as-is (might be valid custom font)
97
+ let displayName = rawName.replace(/^[A-Z]{6}\+/, '');
98
+ displayName = displayName
99
+ .replace(/_\d+wght$/i, '') // Google Fonts variable font naming
100
+ .replace(/[-](Bold|Italic|Regular|Light|Medium|Thin|Heavy|BoldItalic|Oblique|BoldOblique)(MT|PSMT|PS)?$/i, '');
101
+ if (displayName && displayName.length > 2) {
102
+ const spaced = displayName.replace(/(?<=[a-z])(?=[A-Z])/g, ' ');
103
+ return spaced;
104
+ }
105
+ return rawName;
106
+ }
107
+ /** Check if a PDF font name maps to a known web/Google font in our lookup table. */
108
+ export function isKnownWebFont(rawName) {
109
+ return lookupKnownWebFont(rawName) !== null;
110
+ }
111
+ //# sourceMappingURL=font-mapper.js.map
@@ -0,0 +1,10 @@
1
+ export interface FontMetrics {
2
+ fontName: string;
3
+ isSerifFont: boolean;
4
+ isMonospace: boolean;
5
+ avgWidth: number;
6
+ ascent: number;
7
+ descent: number;
8
+ }
9
+ export declare function findClosestGoogleFont(metrics: FontMetrics): string;
10
+ //# sourceMappingURL=font-matcher.d.ts.map
@@ -0,0 +1,89 @@
1
+ // Google Font matching for non-embeddable PDF fonts.
2
+ // Uses font metrics (average glyph width, ascent/descent) and name hints
3
+ // to find the closest matching Google Font.
4
+ // Reference metrics for popular Google Fonts.
5
+ // avgWidth = average glyph width in 1/1000 em units (measured from font files).
6
+ // These are approximate but good enough for nearest-neighbor matching.
7
+ const GOOGLE_FONT_TABLE = [
8
+ // Sans-serif — narrow/condensed
9
+ { name: 'Roboto Condensed', category: 'sans-serif', avgWidth: 460, ascent: 0.928, descent: -0.244 },
10
+ { name: 'Barlow Condensed', category: 'sans-serif', avgWidth: 420, ascent: 0.950, descent: -0.250 },
11
+ // Sans-serif — normal width
12
+ { name: 'Roboto', category: 'sans-serif', avgWidth: 538, ascent: 0.928, descent: -0.244 },
13
+ { name: 'Open Sans', category: 'sans-serif', avgWidth: 570, ascent: 1.069, descent: -0.293 },
14
+ { name: 'Lato', category: 'sans-serif', avgWidth: 518, ascent: 0.987, descent: -0.213 },
15
+ { name: 'Inter', category: 'sans-serif', avgWidth: 540, ascent: 0.984, descent: -0.250 },
16
+ { name: 'DM Sans', category: 'sans-serif', avgWidth: 530, ascent: 1.000, descent: -0.250 },
17
+ { name: 'Work Sans', category: 'sans-serif', avgWidth: 535, ascent: 0.970, descent: -0.260 },
18
+ { name: 'Outfit', category: 'sans-serif', avgWidth: 530, ascent: 1.000, descent: -0.260 },
19
+ { name: 'Noto Sans', category: 'sans-serif', avgWidth: 545, ascent: 1.069, descent: -0.293 },
20
+ { name: 'PT Sans', category: 'sans-serif', avgWidth: 510, ascent: 0.905, descent: -0.212 },
21
+ { name: 'Source Sans Pro', category: 'sans-serif', avgWidth: 500, ascent: 0.984, descent: -0.273 },
22
+ // Sans-serif — wide/geometric
23
+ { name: 'Montserrat', category: 'sans-serif', avgWidth: 640, ascent: 0.968, descent: -0.251 },
24
+ { name: 'Poppins', category: 'sans-serif', avgWidth: 540, ascent: 1.050, descent: -0.350 },
25
+ { name: 'Nunito', category: 'sans-serif', avgWidth: 580, ascent: 1.011, descent: -0.353 },
26
+ { name: 'Raleway', category: 'sans-serif', avgWidth: 600, ascent: 0.930, descent: -0.250 },
27
+ { name: 'Nunito Sans', category: 'sans-serif', avgWidth: 545, ascent: 1.011, descent: -0.353 },
28
+ { name: 'Manrope', category: 'sans-serif', avgWidth: 560, ascent: 1.028, descent: -0.272 },
29
+ // Serif
30
+ { name: 'Merriweather', category: 'serif', avgWidth: 530, ascent: 0.985, descent: -0.300 },
31
+ { name: 'Lora', category: 'serif', avgWidth: 510, ascent: 0.956, descent: -0.382 },
32
+ { name: 'PT Serif', category: 'serif', avgWidth: 524, ascent: 1.039, descent: -0.286 },
33
+ { name: 'Noto Serif', category: 'serif', avgWidth: 530, ascent: 1.069, descent: -0.293 },
34
+ { name: 'Source Serif Pro', category: 'serif', avgWidth: 505, ascent: 0.918, descent: -0.250 },
35
+ { name: 'Libre Baskerville', category: 'serif', avgWidth: 540, ascent: 0.983, descent: -0.300 },
36
+ { name: 'Playfair Display', category: 'serif', avgWidth: 500, ascent: 1.082, descent: -0.251 },
37
+ { name: 'EB Garamond', category: 'serif', avgWidth: 440, ascent: 0.960, descent: -0.240 },
38
+ { name: 'Crimson Text', category: 'serif', avgWidth: 460, ascent: 0.927, descent: -0.315 },
39
+ // Monospace
40
+ { name: 'Roboto Mono', category: 'monospace', avgWidth: 600, ascent: 1.048, descent: -0.271 },
41
+ { name: 'Source Code Pro', category: 'monospace', avgWidth: 600, ascent: 0.984, descent: -0.273 },
42
+ { name: 'JetBrains Mono', category: 'monospace', avgWidth: 600, ascent: 1.020, descent: -0.300 },
43
+ { name: 'Fira Code', category: 'monospace', avgWidth: 600, ascent: 0.935, descent: -0.265 },
44
+ // Script/handwriting
45
+ { name: 'Dancing Script', category: 'script', avgWidth: 470, ascent: 0.958, descent: -0.400 },
46
+ { name: 'Great Vibes', category: 'script', avgWidth: 430, ascent: 1.050, descent: -0.550 },
47
+ { name: 'Pacifico', category: 'script', avgWidth: 550, ascent: 1.050, descent: -0.370 },
48
+ { name: 'Sacramento', category: 'script', avgWidth: 380, ascent: 0.900, descent: -0.400 },
49
+ { name: 'Satisfy', category: 'script', avgWidth: 460, ascent: 1.050, descent: -0.350 },
50
+ ];
51
+ // Keywords in font names that hint at category
52
+ const SERIF_HINTS = ['serif', 'roman', 'garamond', 'baskerville', 'palatino', 'georgia', 'cambria', 'minion', 'caslon', 'bodoni', 'didot', 'times', 'book'];
53
+ const MONO_HINTS = ['mono', 'courier', 'consolas', 'code', 'terminal', 'fixed'];
54
+ const SCRIPT_HINTS = ['script', 'cursive', 'handwrit', 'callig', 'brush', 'sloop', 'dancing', 'pacifico', 'satisfy', 'lobster', 'sacramento', 'kaushan'];
55
+ function detectCategory(metrics) {
56
+ const lower = metrics.fontName.toLowerCase();
57
+ if (SCRIPT_HINTS.some(h => lower.includes(h)))
58
+ return 'script';
59
+ if (metrics.isMonospace || MONO_HINTS.some(h => lower.includes(h)))
60
+ return 'monospace';
61
+ if (metrics.isSerifFont || SERIF_HINTS.some(h => lower.includes(h)))
62
+ return 'serif';
63
+ return 'sans-serif';
64
+ }
65
+ export function findClosestGoogleFont(metrics) {
66
+ const category = detectCategory(metrics);
67
+ // Filter candidates by category
68
+ let candidates = GOOGLE_FONT_TABLE.filter(f => f.category === category);
69
+ if (candidates.length === 0) {
70
+ // Fallback to all sans-serif if category has no entries
71
+ candidates = GOOGLE_FONT_TABLE.filter(f => f.category === 'sans-serif');
72
+ }
73
+ // Score each candidate: lower is better
74
+ // Weight width similarity most heavily since it's the most visually impactful
75
+ let best = candidates[0];
76
+ let bestScore = Infinity;
77
+ for (const candidate of candidates) {
78
+ const widthDiff = Math.abs(candidate.avgWidth - metrics.avgWidth) / 100;
79
+ const ascentDiff = Math.abs(candidate.ascent - metrics.ascent) * 2;
80
+ const descentDiff = Math.abs(candidate.descent - metrics.descent) * 2;
81
+ const score = widthDiff + ascentDiff + descentDiff;
82
+ if (score < bestScore) {
83
+ bestScore = score;
84
+ best = candidate;
85
+ }
86
+ }
87
+ return best.name;
88
+ }
89
+ //# sourceMappingURL=font-matcher.js.map
@@ -0,0 +1,7 @@
1
+ /**
2
+ * Merge multiple font subset buffers into a single font.
3
+ * Deduplicates glyphs by unicode code point, keeping the first occurrence.
4
+ * Falls back to the largest blob if merging fails.
5
+ */
6
+ export declare function mergeSubsetFonts(buffers: Uint8Array[]): Uint8Array;
7
+ //# sourceMappingURL=font-merger.d.ts.map
@@ -0,0 +1,114 @@
1
+ import opentype from 'opentype.js';
2
+ // Unicode Private Use Area range added by pdfjs for internal glyph mapping.
3
+ // These must be stripped before merging to avoid cmap conflicts.
4
+ const PUA_START = 0xe000;
5
+ const PUA_END = 0xf8ff;
6
+ function isPUA(u) {
7
+ return u >= PUA_START && u <= PUA_END;
8
+ }
9
+ /**
10
+ * Merge multiple font subset buffers into a single font.
11
+ * Deduplicates glyphs by unicode code point, keeping the first occurrence.
12
+ * Falls back to the largest blob if merging fails.
13
+ */
14
+ export function mergeSubsetFonts(buffers) {
15
+ if (buffers.length === 1)
16
+ return buffers[0];
17
+ try {
18
+ // Start with the largest subset as the base
19
+ const sorted = [...buffers].sort((a, b) => b.length - a.length);
20
+ const base = opentype.parse(toArrayBuffer(sorted[0]));
21
+ // Strip PUA unicodes from base font glyphs
22
+ stripPUA(base);
23
+ // Collect existing unicodes from base font
24
+ const existingUnicodes = new Set();
25
+ for (let i = 0; i < base.glyphs.length; i++) {
26
+ const g = base.glyphs.get(i);
27
+ if (g.unicodes)
28
+ g.unicodes.forEach((u) => existingUnicodes.add(u));
29
+ }
30
+ // Merge glyphs from remaining subsets
31
+ for (let si = 1; si < sorted.length; si++) {
32
+ const other = opentype.parse(toArrayBuffer(sorted[si]));
33
+ stripPUA(other);
34
+ for (let i = 0; i < other.glyphs.length; i++) {
35
+ const g = other.glyphs.get(i);
36
+ const unis = g.unicodes || [];
37
+ if (unis.length === 0)
38
+ continue;
39
+ if (unis.every((u) => existingUnicodes.has(u)))
40
+ continue;
41
+ base.glyphs.push(base.glyphs.length, g);
42
+ unis.forEach((u) => existingUnicodes.add(u));
43
+ }
44
+ }
45
+ // Assign names to unnamed glyphs to avoid opentype.js warnings
46
+ assignMissingGlyphNames(base);
47
+ // Assign a non-zero createdTimestamp so opentype.js doesn't generate a fresh
48
+ // one on each serialization (it checks `if (options.createdTimestamp)`).
49
+ base.createdTimestamp = 1;
50
+ const merged = base.toArrayBuffer();
51
+ // opentype.js writes non-deterministic fields (modified timestamp, checksums)
52
+ // into the serialized font. Zero them out for stable output.
53
+ stabilizeFontBytes(new Uint8Array(merged));
54
+ return new Uint8Array(merged);
55
+ }
56
+ catch {
57
+ // Fall back to largest blob if parsing/merging fails
58
+ return buffers.reduce((a, b) => (a.length >= b.length ? a : b));
59
+ }
60
+ }
61
+ function stripPUA(font) {
62
+ for (let i = 0; i < font.glyphs.length; i++) {
63
+ const g = font.glyphs.get(i);
64
+ if (g.unicodes) {
65
+ g.unicodes = g.unicodes.filter((u) => !isPUA(u));
66
+ }
67
+ if (g.unicode && isPUA(g.unicode)) {
68
+ g.unicode = g.unicodes.length > 0 ? g.unicodes[0] : 0;
69
+ }
70
+ }
71
+ }
72
+ function assignMissingGlyphNames(font) {
73
+ for (let i = 0; i < font.glyphs.length; i++) {
74
+ const g = font.glyphs.get(i);
75
+ if (!g.name) {
76
+ g.name = g.unicode ? `uni${g.unicode.toString(16).toUpperCase().padStart(4, '0')}` : `glyph${i}`;
77
+ }
78
+ }
79
+ }
80
+ /**
81
+ * Zero out non-deterministic fields in the serialized font so output is stable
82
+ * across runs. opentype.js writes a current-time "modified" timestamp and
83
+ * computes checksums that vary slightly between processes.
84
+ *
85
+ * Fields zeroed in the head table (found via the table directory):
86
+ * - checkSumAdjustment (offset 8, 4 bytes) — whole-file checksum
87
+ * - modified (offset 28, 8 bytes) — current-time timestamp
88
+ * Also zeroes the head entry's checksum in the table directory (offset 4, 4 bytes).
89
+ */
90
+ function stabilizeFontBytes(buf) {
91
+ const view = new DataView(buf.buffer, buf.byteOffset, buf.byteLength);
92
+ const numTables = view.getUint16(4);
93
+ for (let i = 0; i < numTables; i++) {
94
+ const dirOffset = 12 + i * 16;
95
+ const tag = String.fromCharCode(buf[dirOffset], buf[dirOffset + 1], buf[dirOffset + 2], buf[dirOffset + 3]);
96
+ if (tag === 'head') {
97
+ const tableOffset = view.getUint32(dirOffset + 8);
98
+ // Zero head directory checksum (4 bytes at dirOffset+4)
99
+ for (let b = 0; b < 4; b++)
100
+ buf[dirOffset + 4 + b] = 0;
101
+ // Zero checkSumAdjustment (4 bytes at head+8)
102
+ for (let b = 0; b < 4; b++)
103
+ buf[tableOffset + 8 + b] = 0;
104
+ // Zero modified timestamp (8 bytes at head+28)
105
+ for (let b = 0; b < 8; b++)
106
+ buf[tableOffset + 28 + b] = 0;
107
+ return;
108
+ }
109
+ }
110
+ }
111
+ function toArrayBuffer(data) {
112
+ return data.buffer.slice(data.byteOffset, data.byteOffset + data.byteLength);
113
+ }
114
+ //# sourceMappingURL=font-merger.js.map
@@ -0,0 +1,15 @@
1
+ import opentype from 'opentype.js';
2
+ import type { PolotnoFont, PolotnoPage } from './index.js';
3
+ export declare class FontRegistry {
4
+ private fontDataMap;
5
+ private fontMetricsMap;
6
+ private otCache;
7
+ /**
8
+ * Parse font data with opentype.js, returning cached result if available.
9
+ * Key is the pdfjs loaded font name (e.g. "g_d0_f1").
10
+ */
11
+ parseOpentype(loadedName: string, data: Uint8Array): opentype.Font | null;
12
+ recordFont(fontObj: any): void;
13
+ finalize(fontStrategy: 'embed' | 'googleFontsMatch', pages: PolotnoPage[]): PolotnoFont[];
14
+ }
15
+ //# sourceMappingURL=font-registry.d.ts.map
@@ -0,0 +1,110 @@
1
+ import opentype from 'opentype.js';
2
+ import { mapPdfFont, isKnownWebFont } from './font-mapper.js';
3
+ import { findClosestGoogleFont } from './font-matcher.js';
4
+ import { mergeSubsetFonts } from './font-merger.js';
5
+ export class FontRegistry {
6
+ constructor() {
7
+ this.fontDataMap = new Map();
8
+ this.fontMetricsMap = new Map();
9
+ // Cache opentype.js parsed fonts across pages to avoid re-parsing
10
+ this.otCache = new Map();
11
+ }
12
+ /**
13
+ * Parse font data with opentype.js, returning cached result if available.
14
+ * Key is the pdfjs loaded font name (e.g. "g_d0_f1").
15
+ */
16
+ parseOpentype(loadedName, data) {
17
+ if (this.otCache.has(loadedName)) {
18
+ return this.otCache.get(loadedName);
19
+ }
20
+ try {
21
+ const buf = new Uint8Array(data).buffer;
22
+ const otFont = opentype.parse(buf);
23
+ this.otCache.set(loadedName, otFont);
24
+ return otFont;
25
+ }
26
+ catch {
27
+ this.otCache.set(loadedName, null);
28
+ return null;
29
+ }
30
+ }
31
+ recordFont(fontObj) {
32
+ if (!fontObj?.name)
33
+ return;
34
+ const mappedFamily = mapPdfFont(fontObj.name);
35
+ const isUnknown = !isKnownWebFont(fontObj.name);
36
+ // Collect font binary data for non-Google/non-standard fonts
37
+ if (isUnknown && fontObj.data && fontObj.data.length > 0) {
38
+ const mime = fontObj.mimetype || 'font/opentype';
39
+ const arr = this.fontDataMap.get(mappedFamily) || [];
40
+ arr.push({ mime, data: new Uint8Array(fontObj.data) });
41
+ this.fontDataMap.set(mappedFamily, arr);
42
+ }
43
+ // Collect font metrics for unknown fonts (for Google Font matching)
44
+ if (isUnknown && !this.fontMetricsMap.has(mappedFamily)) {
45
+ const widths = (fontObj.widths || []).filter((w) => w != null && w > 0);
46
+ const avgWidth = widths.length > 0
47
+ ? widths.reduce((a, b) => a + b, 0) / widths.length
48
+ : 500;
49
+ this.fontMetricsMap.set(mappedFamily, {
50
+ fontName: fontObj.name.replace(/^[A-Z]{6}\+/, ''),
51
+ isSerifFont: fontObj.isSerifFont || false,
52
+ isMonospace: fontObj.isMonospace || false,
53
+ avgWidth: Math.round(avgWidth),
54
+ ascent: fontObj.ascent || 0.9,
55
+ descent: fontObj.descent || -0.25,
56
+ });
57
+ }
58
+ }
59
+ finalize(fontStrategy, pages) {
60
+ const fonts = [];
61
+ if (fontStrategy === 'googleFontsMatch') {
62
+ // Replace all non-Google font families with closest Google Font matches
63
+ const fontReplacementMap = new Map();
64
+ for (const [mappedFamily, metrics] of this.fontMetricsMap) {
65
+ const googleFont = findClosestGoogleFont(metrics);
66
+ fontReplacementMap.set(mappedFamily, googleFont);
67
+ }
68
+ // Apply replacements to all text elements across all pages
69
+ for (const page of pages) {
70
+ for (const child of page.children) {
71
+ if (child.type === 'text') {
72
+ const replacement = fontReplacementMap.get(child.fontFamily);
73
+ if (replacement) {
74
+ child.fontFamily = replacement;
75
+ }
76
+ }
77
+ }
78
+ }
79
+ return fonts;
80
+ }
81
+ // 'embed' strategy: embed font data as base64 data URIs.
82
+ // When multiple subsets exist, merge them into a single font.
83
+ for (const [fontFamily, blobs] of this.fontDataMap) {
84
+ let fontData;
85
+ let mime;
86
+ if (blobs.length === 1) {
87
+ fontData = blobs[0].data;
88
+ mime = blobs[0].mime;
89
+ }
90
+ else {
91
+ fontData = mergeSubsetFonts(blobs.map((b) => b.data));
92
+ mime = blobs[0].mime;
93
+ }
94
+ let b64;
95
+ if (typeof Buffer !== 'undefined') {
96
+ b64 = Buffer.from(fontData).toString('base64');
97
+ }
98
+ else {
99
+ let binary = '';
100
+ for (let bi = 0; bi < fontData.length; bi++) {
101
+ binary += String.fromCharCode(fontData[bi]);
102
+ }
103
+ b64 = btoa(binary);
104
+ }
105
+ fonts.push({ fontFamily, url: `data:${mime};base64,${b64}` });
106
+ }
107
+ return fonts;
108
+ }
109
+ }
110
+ //# sourceMappingURL=font-registry.js.map
@@ -0,0 +1,3 @@
1
+ export declare function imageDataToDataUri(data: Uint8ClampedArray, width: number, height: number, kind: number): string;
2
+ export declare function imageBytesToDataUri(data: Uint8Array, mimeType: string): string;
3
+ //# sourceMappingURL=image-encoder.d.ts.map