@polotno/pdf-import 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/font-registry.js +58 -21
- package/lib/page-parser.js +10 -12
- package/lib/text-blocks.d.ts +4 -1
- package/lib/text-blocks.js +14 -3
- package/lib/text-layout.js +1 -1
- package/package.json +1 -1
package/lib/font-registry.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import opentype from 'opentype.js';
|
|
2
|
-
import { mapPdfFont, isKnownWebFont } from './font-mapper.js';
|
|
2
|
+
import { mapPdfFont, isKnownWebFont, extractWeightFromName, extractStyleFromName, } from './font-mapper.js';
|
|
3
3
|
import { findClosestGoogleFont } from './font-matcher.js';
|
|
4
4
|
import { mergeSubsetFonts } from './font-merger.js';
|
|
5
5
|
export class FontRegistry {
|
|
@@ -43,11 +43,19 @@ export class FontRegistry {
|
|
|
43
43
|
if (fontFamily !== mappedFamily) {
|
|
44
44
|
this.renameMap.set(fontObj.name, fontFamily);
|
|
45
45
|
}
|
|
46
|
-
//
|
|
46
|
+
// Detect weight/style from the PDF font name
|
|
47
|
+
const fontWeight = extractWeightFromName(fontObj.name);
|
|
48
|
+
const fontStyle = extractStyleFromName(fontObj.name);
|
|
49
|
+
// Collect font binary data with weight/style info
|
|
47
50
|
if (shouldEmbed && fontObj.data && fontObj.data.length > 0) {
|
|
48
51
|
const mime = fontObj.mimetype || 'font/opentype';
|
|
49
52
|
const arr = this.fontDataMap.get(fontFamily) || [];
|
|
50
|
-
arr.push({
|
|
53
|
+
arr.push({
|
|
54
|
+
mime,
|
|
55
|
+
data: new Uint8Array(fontObj.data),
|
|
56
|
+
fontWeight,
|
|
57
|
+
fontStyle,
|
|
58
|
+
});
|
|
51
59
|
this.fontDataMap.set(fontFamily, arr);
|
|
52
60
|
}
|
|
53
61
|
// Collect font metrics for unknown fonts (for Google Font matching)
|
|
@@ -93,30 +101,59 @@ export class FontRegistry {
|
|
|
93
101
|
return fonts;
|
|
94
102
|
}
|
|
95
103
|
// 'embed' strategy: embed font data as base64 data URIs.
|
|
96
|
-
// When multiple subsets exist, merge them into a single font.
|
|
97
104
|
for (const [fontFamily, blobs] of this.fontDataMap) {
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
fontData = mergeSubsetFonts(blobs.map((b) => b.data));
|
|
106
|
-
mime = blobs[0].mime;
|
|
105
|
+
// Group blobs by weight+style variant
|
|
106
|
+
const variantMap = new Map();
|
|
107
|
+
for (const blob of blobs) {
|
|
108
|
+
const key = `${blob.fontWeight}|${blob.fontStyle}`;
|
|
109
|
+
const arr = variantMap.get(key) || [];
|
|
110
|
+
arr.push(blob);
|
|
111
|
+
variantMap.set(key, arr);
|
|
107
112
|
}
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
113
|
+
// When multiple subsets exist for the same variant, merge them.
|
|
114
|
+
const variants = [];
|
|
115
|
+
for (const [, variantBlobs] of variantMap) {
|
|
116
|
+
let fontData;
|
|
117
|
+
if (variantBlobs.length === 1) {
|
|
118
|
+
fontData = variantBlobs[0].data;
|
|
119
|
+
}
|
|
120
|
+
else {
|
|
121
|
+
fontData = mergeSubsetFonts(variantBlobs.map((b) => b.data));
|
|
122
|
+
}
|
|
123
|
+
variants.push({
|
|
124
|
+
fontWeight: variantBlobs[0].fontWeight,
|
|
125
|
+
fontStyle: variantBlobs[0].fontStyle,
|
|
126
|
+
data: fontData,
|
|
127
|
+
mime: variantBlobs[0].mime,
|
|
128
|
+
});
|
|
111
129
|
}
|
|
112
|
-
|
|
130
|
+
function toBase64(data) {
|
|
131
|
+
if (typeof Buffer !== 'undefined') {
|
|
132
|
+
return Buffer.from(data).toString('base64');
|
|
133
|
+
}
|
|
113
134
|
let binary = '';
|
|
114
|
-
for (let bi = 0; bi <
|
|
115
|
-
binary += String.fromCharCode(
|
|
135
|
+
for (let bi = 0; bi < data.length; bi++) {
|
|
136
|
+
binary += String.fromCharCode(data[bi]);
|
|
116
137
|
}
|
|
117
|
-
|
|
138
|
+
return btoa(binary);
|
|
139
|
+
}
|
|
140
|
+
if (variants.length === 1 && variants[0].fontWeight === 'normal' && variants[0].fontStyle === 'normal') {
|
|
141
|
+
// Single normal variant — use simple url format
|
|
142
|
+
const b64 = toBase64(variants[0].data);
|
|
143
|
+
fonts.push({ fontFamily, url: `data:${variants[0].mime};base64,${b64}` });
|
|
144
|
+
}
|
|
145
|
+
else {
|
|
146
|
+
// Multiple variants — use styles array
|
|
147
|
+
const styles = variants.map((v) => {
|
|
148
|
+
const b64 = toBase64(v.data);
|
|
149
|
+
return {
|
|
150
|
+
src: `url("data:${v.mime};base64,${b64}")`,
|
|
151
|
+
fontWeight: v.fontWeight,
|
|
152
|
+
fontStyle: v.fontStyle,
|
|
153
|
+
};
|
|
154
|
+
});
|
|
155
|
+
fonts.push({ fontFamily, styles });
|
|
118
156
|
}
|
|
119
|
-
fonts.push({ fontFamily, url: `data:${mime};base64,${b64}` });
|
|
120
157
|
}
|
|
121
158
|
return fonts;
|
|
122
159
|
}
|
package/lib/page-parser.js
CHANGED
|
@@ -558,7 +558,7 @@ async function buildTextElements({ page, pageWidth, yFlipOffset, positionColors,
|
|
|
558
558
|
// Find dominant span (longest text)
|
|
559
559
|
const dominant = block.spans.reduce((a, b) => a.text.length > b.text.length ? a : b);
|
|
560
560
|
const fontFamily = fontRegistry.getFontFamily(dominant.fontName);
|
|
561
|
-
const align = detectAlignment(block.spans, pageWidth, leftMargin, rightMargin);
|
|
561
|
+
const align = detectAlignment(block.spans, pageWidth, leftMargin, rightMargin, blocks.map((b) => ({ x: b.x, width: b.width })));
|
|
562
562
|
const lineHeight = computeLineHeight(block.spans);
|
|
563
563
|
// Build text content with line breaks
|
|
564
564
|
const lineMap = new Map();
|
|
@@ -641,10 +641,14 @@ async function buildTextElements({ page, pageWidth, yFlipOffset, positionColors,
|
|
|
641
641
|
else if (align === 'right') {
|
|
642
642
|
elemX -= extraWidth;
|
|
643
643
|
}
|
|
644
|
-
// Polotno rotates text around the top-left corner. For vertical text
|
|
645
|
-
//
|
|
644
|
+
// Polotno rotates text around the top-left corner. For vertical text we need
|
|
645
|
+
// to convert PDF baseline coordinates into Polotno element coordinates.
|
|
646
|
+
// The element's y-axis maps to the visual x-axis after rotation, so the
|
|
647
|
+
// half-leading and ascent offsets that normally adjust elemY must instead
|
|
648
|
+
// adjust elemX.
|
|
646
649
|
if (dominant.rotation <= -45 && dominant.rotation >= -135) {
|
|
647
|
-
|
|
650
|
+
const ascentPx = dominant.baselineY - dominant.y;
|
|
651
|
+
elemX = block.x - ascentPx - halfLeading;
|
|
648
652
|
elemY = dominant.baselineY;
|
|
649
653
|
}
|
|
650
654
|
else if (dominant.rotation >= 45 && dominant.rotation <= 135) {
|
|
@@ -671,14 +675,8 @@ async function buildTextElements({ page, pageWidth, yFlipOffset, positionColors,
|
|
|
671
675
|
text: textContent2,
|
|
672
676
|
fontSize: dominant.fontSize,
|
|
673
677
|
fontFamily,
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
fontWeight: fontStrategy === 'embed'
|
|
677
|
-
? 'normal'
|
|
678
|
-
: dominant.fontWeight || extractWeightFromName(dominant.fontName),
|
|
679
|
-
fontStyle: fontStrategy === 'embed'
|
|
680
|
-
? 'normal'
|
|
681
|
-
: dominant.fontStyle || extractStyleFromName(dominant.fontName),
|
|
678
|
+
fontWeight: dominant.fontWeight || extractWeightFromName(dominant.fontName),
|
|
679
|
+
fontStyle: dominant.fontStyle || extractStyleFromName(dominant.fontName),
|
|
682
680
|
fill: dominant.color || '#000000',
|
|
683
681
|
align,
|
|
684
682
|
lineHeight,
|
package/lib/text-blocks.d.ts
CHANGED
|
@@ -1,6 +1,9 @@
|
|
|
1
1
|
import type { TextBlock, TextSpan } from './text-types.js';
|
|
2
2
|
export declare function groupSpansByBlock(spans: TextSpan[]): TextBlock[];
|
|
3
|
-
export declare function detectAlignment(blockSpans: TextSpan[], pageWidth: number, leftMargin: number, rightMargin: number
|
|
3
|
+
export declare function detectAlignment(blockSpans: TextSpan[], pageWidth: number, leftMargin: number, rightMargin: number, allBlocks?: {
|
|
4
|
+
x: number;
|
|
5
|
+
width: number;
|
|
6
|
+
}[]): string;
|
|
4
7
|
export declare function estimatePageMargins(spans: TextSpan[]): [number, number];
|
|
5
8
|
export declare function computeLineHeight(blockSpans: TextSpan[]): number;
|
|
6
9
|
//# sourceMappingURL=text-blocks.d.ts.map
|
package/lib/text-blocks.js
CHANGED
|
@@ -152,7 +152,7 @@ function splitSpansAtPositions(spans, splitPositions) {
|
|
|
152
152
|
return columns.filter((c) => c.length > 0);
|
|
153
153
|
}
|
|
154
154
|
// Alignment detection
|
|
155
|
-
export function detectAlignment(blockSpans, pageWidth, leftMargin, rightMargin) {
|
|
155
|
+
export function detectAlignment(blockSpans, pageWidth, leftMargin, rightMargin, allBlocks) {
|
|
156
156
|
const lines = new Map();
|
|
157
157
|
for (const span of blockSpans) {
|
|
158
158
|
const arr = lines.get(span.lineNo) || [];
|
|
@@ -185,10 +185,16 @@ export function detectAlignment(blockSpans, pageWidth, leftMargin, rightMargin)
|
|
|
185
185
|
// Only classify as right-aligned if the line is short relative to the text
|
|
186
186
|
// area — a near-full-width line that happens to align with the right margin
|
|
187
187
|
// is more likely a paragraph line than a right-aligned label.
|
|
188
|
+
// Additional guard: if other blocks share this left edge, the text is in a
|
|
189
|
+
// left-aligned column and just happens to reach the right margin.
|
|
188
190
|
const lineWidth = x1 - x0;
|
|
191
|
+
const hasAlignedNeighbors = allBlocks &&
|
|
192
|
+
allBlocks.filter((b) => Math.abs(b.x - x0) < 3 && b.width !== lineWidth)
|
|
193
|
+
.length >= 2;
|
|
189
194
|
if (Math.abs(x1 - rightMargin) < rightTol &&
|
|
190
195
|
x0 > leftMargin + centerTol &&
|
|
191
|
-
lineWidth < textWidth * 0.6
|
|
196
|
+
lineWidth < textWidth * 0.6 &&
|
|
197
|
+
!hasAlignedNeighbors)
|
|
192
198
|
return 'right';
|
|
193
199
|
return 'left';
|
|
194
200
|
}
|
|
@@ -204,6 +210,11 @@ export function detectAlignment(blockSpans, pageWidth, leftMargin, rightMargin)
|
|
|
204
210
|
const rightStd = stddev(rightEdges);
|
|
205
211
|
const midStd = stddev(midpoints);
|
|
206
212
|
const THRESHOLD = 3.0;
|
|
213
|
+
// For center detection, scale threshold with font size. A fixed 3px is fine
|
|
214
|
+
// for 12px text but too tight for large display text (e.g. 55px "BENEFIT OF"
|
|
215
|
+
// / "WAXING" has midpoint stddev 3.2 — clearly centered but misses 3.0).
|
|
216
|
+
const dominantFontSize = blockSpans.reduce((a, b) => a.text.length > b.text.length ? a : b).fontSize;
|
|
217
|
+
const centerThreshold = Math.max(THRESHOLD, dominantFontSize * 0.1);
|
|
207
218
|
if (leftStd < THRESHOLD && rightStd < THRESHOLD)
|
|
208
219
|
return 'justify';
|
|
209
220
|
// Justify detection with tolerance for short lines: last lines of paragraphs,
|
|
@@ -219,7 +230,7 @@ export function detectAlignment(blockSpans, pageWidth, leftMargin, rightMargin)
|
|
|
219
230
|
return 'justify';
|
|
220
231
|
}
|
|
221
232
|
}
|
|
222
|
-
if (midStd <
|
|
233
|
+
if (midStd < centerThreshold && midStd <= leftStd && midStd <= rightStd)
|
|
223
234
|
return 'center';
|
|
224
235
|
if (rightStd < THRESHOLD && rightStd < leftStd)
|
|
225
236
|
return 'right';
|
package/lib/text-layout.js
CHANGED
|
@@ -276,7 +276,7 @@ function assignBlockNumbers(spans, startBlockNo = 0) {
|
|
|
276
276
|
const fontSizeRatio = Math.max(prevFontSize, currFontSize) /
|
|
277
277
|
Math.min(prevFontSize, currFontSize);
|
|
278
278
|
const onSameLine = prev.spans[0].lineNo === curr.spans[0].lineNo;
|
|
279
|
-
const fontSizeChanged = !onSameLine && fontSizeRatio > 1.
|
|
279
|
+
const fontSizeChanged = !onSameLine && fontSizeRatio > 1.08;
|
|
280
280
|
// Color change between lines signals different text elements (e.g. diagram
|
|
281
281
|
// labels in different colors: gray "Force control signal" vs black "Inter-neurons").
|
|
282
282
|
// Only split on color when X overlap is weak — paragraph lines have strong overlap
|