@polotno/pdf-import 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
1
  import opentype from 'opentype.js';
2
- import { mapPdfFont, isKnownWebFont } from './font-mapper.js';
2
+ import { mapPdfFont, isKnownWebFont, extractWeightFromName, extractStyleFromName, } from './font-mapper.js';
3
3
  import { findClosestGoogleFont } from './font-matcher.js';
4
4
  import { mergeSubsetFonts } from './font-merger.js';
5
5
  export class FontRegistry {
@@ -43,11 +43,19 @@ export class FontRegistry {
43
43
  if (fontFamily !== mappedFamily) {
44
44
  this.renameMap.set(fontObj.name, fontFamily);
45
45
  }
46
- // Collect font binary data
46
+ // Detect weight/style from the PDF font name
47
+ const fontWeight = extractWeightFromName(fontObj.name);
48
+ const fontStyle = extractStyleFromName(fontObj.name);
49
+ // Collect font binary data with weight/style info
47
50
  if (shouldEmbed && fontObj.data && fontObj.data.length > 0) {
48
51
  const mime = fontObj.mimetype || 'font/opentype';
49
52
  const arr = this.fontDataMap.get(fontFamily) || [];
50
- arr.push({ mime, data: new Uint8Array(fontObj.data) });
53
+ arr.push({
54
+ mime,
55
+ data: new Uint8Array(fontObj.data),
56
+ fontWeight,
57
+ fontStyle,
58
+ });
51
59
  this.fontDataMap.set(fontFamily, arr);
52
60
  }
53
61
  // Collect font metrics for unknown fonts (for Google Font matching)
@@ -93,30 +101,59 @@ export class FontRegistry {
93
101
  return fonts;
94
102
  }
95
103
  // 'embed' strategy: embed font data as base64 data URIs.
96
- // When multiple subsets exist, merge them into a single font.
97
104
  for (const [fontFamily, blobs] of this.fontDataMap) {
98
- let fontData;
99
- let mime;
100
- if (blobs.length === 1) {
101
- fontData = blobs[0].data;
102
- mime = blobs[0].mime;
103
- }
104
- else {
105
- fontData = mergeSubsetFonts(blobs.map((b) => b.data));
106
- mime = blobs[0].mime;
105
+ // Group blobs by weight+style variant
106
+ const variantMap = new Map();
107
+ for (const blob of blobs) {
108
+ const key = `${blob.fontWeight}|${blob.fontStyle}`;
109
+ const arr = variantMap.get(key) || [];
110
+ arr.push(blob);
111
+ variantMap.set(key, arr);
107
112
  }
108
- let b64;
109
- if (typeof Buffer !== 'undefined') {
110
- b64 = Buffer.from(fontData).toString('base64');
113
+ // When multiple subsets exist for the same variant, merge them.
114
+ const variants = [];
115
+ for (const [, variantBlobs] of variantMap) {
116
+ let fontData;
117
+ if (variantBlobs.length === 1) {
118
+ fontData = variantBlobs[0].data;
119
+ }
120
+ else {
121
+ fontData = mergeSubsetFonts(variantBlobs.map((b) => b.data));
122
+ }
123
+ variants.push({
124
+ fontWeight: variantBlobs[0].fontWeight,
125
+ fontStyle: variantBlobs[0].fontStyle,
126
+ data: fontData,
127
+ mime: variantBlobs[0].mime,
128
+ });
111
129
  }
112
- else {
130
+ function toBase64(data) {
131
+ if (typeof Buffer !== 'undefined') {
132
+ return Buffer.from(data).toString('base64');
133
+ }
113
134
  let binary = '';
114
- for (let bi = 0; bi < fontData.length; bi++) {
115
- binary += String.fromCharCode(fontData[bi]);
135
+ for (let bi = 0; bi < data.length; bi++) {
136
+ binary += String.fromCharCode(data[bi]);
116
137
  }
117
- b64 = btoa(binary);
138
+ return btoa(binary);
139
+ }
140
+ if (variants.length === 1 && variants[0].fontWeight === 'normal' && variants[0].fontStyle === 'normal') {
141
+ // Single normal variant — use simple url format
142
+ const b64 = toBase64(variants[0].data);
143
+ fonts.push({ fontFamily, url: `data:${variants[0].mime};base64,${b64}` });
144
+ }
145
+ else {
146
+ // Multiple variants — use styles array
147
+ const styles = variants.map((v) => {
148
+ const b64 = toBase64(v.data);
149
+ return {
150
+ src: `url("data:${v.mime};base64,${b64}")`,
151
+ fontWeight: v.fontWeight,
152
+ fontStyle: v.fontStyle,
153
+ };
154
+ });
155
+ fonts.push({ fontFamily, styles });
118
156
  }
119
- fonts.push({ fontFamily, url: `data:${mime};base64,${b64}` });
120
157
  }
121
158
  return fonts;
122
159
  }
@@ -558,7 +558,7 @@ async function buildTextElements({ page, pageWidth, yFlipOffset, positionColors,
558
558
  // Find dominant span (longest text)
559
559
  const dominant = block.spans.reduce((a, b) => a.text.length > b.text.length ? a : b);
560
560
  const fontFamily = fontRegistry.getFontFamily(dominant.fontName);
561
- const align = detectAlignment(block.spans, pageWidth, leftMargin, rightMargin);
561
+ const align = detectAlignment(block.spans, pageWidth, leftMargin, rightMargin, blocks.map((b) => ({ x: b.x, width: b.width })));
562
562
  const lineHeight = computeLineHeight(block.spans);
563
563
  // Build text content with line breaks
564
564
  const lineMap = new Map();
@@ -641,10 +641,14 @@ async function buildTextElements({ page, pageWidth, yFlipOffset, positionColors,
641
641
  else if (align === 'right') {
642
642
  elemX -= extraWidth;
643
643
  }
644
- // Polotno rotates text around the top-left corner. For vertical text, anchor
645
- // against the baseline/right edge so the rotated box stays in the same place.
644
+ // Polotno rotates text around the top-left corner. For vertical text we need
645
+ // to convert PDF baseline coordinates into Polotno element coordinates.
646
+ // The element's y-axis maps to the visual x-axis after rotation, so the
647
+ // half-leading and ascent offsets that normally adjust elemY must instead
648
+ // adjust elemX.
646
649
  if (dominant.rotation <= -45 && dominant.rotation >= -135) {
647
- elemX -= elemHeight;
650
+ const ascentPx = dominant.baselineY - dominant.y;
651
+ elemX = block.x - ascentPx - halfLeading;
648
652
  elemY = dominant.baselineY;
649
653
  }
650
654
  else if (dominant.rotation >= 45 && dominant.rotation <= 135) {
@@ -671,14 +675,8 @@ async function buildTextElements({ page, pageWidth, yFlipOffset, positionColors,
671
675
  text: textContent2,
672
676
  fontSize: dominant.fontSize,
673
677
  fontFamily,
674
- // When embedding fonts, the subset file IS the specific variant (bold,
675
- // italic, etc.), so use "normal" to avoid Polotno synthesizing on top.
676
- fontWeight: fontStrategy === 'embed'
677
- ? 'normal'
678
- : dominant.fontWeight || extractWeightFromName(dominant.fontName),
679
- fontStyle: fontStrategy === 'embed'
680
- ? 'normal'
681
- : dominant.fontStyle || extractStyleFromName(dominant.fontName),
678
+ fontWeight: dominant.fontWeight || extractWeightFromName(dominant.fontName),
679
+ fontStyle: dominant.fontStyle || extractStyleFromName(dominant.fontName),
682
680
  fill: dominant.color || '#000000',
683
681
  align,
684
682
  lineHeight,
@@ -1,6 +1,9 @@
1
1
  import type { TextBlock, TextSpan } from './text-types.js';
2
2
  export declare function groupSpansByBlock(spans: TextSpan[]): TextBlock[];
3
- export declare function detectAlignment(blockSpans: TextSpan[], pageWidth: number, leftMargin: number, rightMargin: number): string;
3
+ export declare function detectAlignment(blockSpans: TextSpan[], pageWidth: number, leftMargin: number, rightMargin: number, allBlocks?: {
4
+ x: number;
5
+ width: number;
6
+ }[]): string;
4
7
  export declare function estimatePageMargins(spans: TextSpan[]): [number, number];
5
8
  export declare function computeLineHeight(blockSpans: TextSpan[]): number;
6
9
  //# sourceMappingURL=text-blocks.d.ts.map
@@ -152,7 +152,7 @@ function splitSpansAtPositions(spans, splitPositions) {
152
152
  return columns.filter((c) => c.length > 0);
153
153
  }
154
154
  // Alignment detection
155
- export function detectAlignment(blockSpans, pageWidth, leftMargin, rightMargin) {
155
+ export function detectAlignment(blockSpans, pageWidth, leftMargin, rightMargin, allBlocks) {
156
156
  const lines = new Map();
157
157
  for (const span of blockSpans) {
158
158
  const arr = lines.get(span.lineNo) || [];
@@ -185,10 +185,16 @@ export function detectAlignment(blockSpans, pageWidth, leftMargin, rightMargin)
185
185
  // Only classify as right-aligned if the line is short relative to the text
186
186
  // area — a near-full-width line that happens to align with the right margin
187
187
  // is more likely a paragraph line than a right-aligned label.
188
+ // Additional guard: if other blocks share this left edge, the text is in a
189
+ // left-aligned column and just happens to reach the right margin.
188
190
  const lineWidth = x1 - x0;
191
+ const hasAlignedNeighbors = allBlocks &&
192
+ allBlocks.filter((b) => Math.abs(b.x - x0) < 3 && b.width !== lineWidth)
193
+ .length >= 2;
189
194
  if (Math.abs(x1 - rightMargin) < rightTol &&
190
195
  x0 > leftMargin + centerTol &&
191
- lineWidth < textWidth * 0.6)
196
+ lineWidth < textWidth * 0.6 &&
197
+ !hasAlignedNeighbors)
192
198
  return 'right';
193
199
  return 'left';
194
200
  }
@@ -204,6 +210,11 @@ export function detectAlignment(blockSpans, pageWidth, leftMargin, rightMargin)
204
210
  const rightStd = stddev(rightEdges);
205
211
  const midStd = stddev(midpoints);
206
212
  const THRESHOLD = 3.0;
213
+ // For center detection, scale threshold with font size. A fixed 3px is fine
214
+ // for 12px text but too tight for large display text (e.g. 55px "BENEFIT OF"
215
+ // / "WAXING" has midpoint stddev 3.2 — clearly centered but misses 3.0).
216
+ const dominantFontSize = blockSpans.reduce((a, b) => a.text.length > b.text.length ? a : b).fontSize;
217
+ const centerThreshold = Math.max(THRESHOLD, dominantFontSize * 0.1);
207
218
  if (leftStd < THRESHOLD && rightStd < THRESHOLD)
208
219
  return 'justify';
209
220
  // Justify detection with tolerance for short lines: last lines of paragraphs,
@@ -219,7 +230,7 @@ export function detectAlignment(blockSpans, pageWidth, leftMargin, rightMargin)
219
230
  return 'justify';
220
231
  }
221
232
  }
222
- if (midStd < THRESHOLD && midStd <= leftStd && midStd <= rightStd)
233
+ if (midStd < centerThreshold && midStd <= leftStd && midStd <= rightStd)
223
234
  return 'center';
224
235
  if (rightStd < THRESHOLD && rightStd < leftStd)
225
236
  return 'right';
@@ -276,7 +276,7 @@ function assignBlockNumbers(spans, startBlockNo = 0) {
276
276
  const fontSizeRatio = Math.max(prevFontSize, currFontSize) /
277
277
  Math.min(prevFontSize, currFontSize);
278
278
  const onSameLine = prev.spans[0].lineNo === curr.spans[0].lineNo;
279
- const fontSizeChanged = !onSameLine && fontSizeRatio > 1.15;
279
+ const fontSizeChanged = !onSameLine && fontSizeRatio > 1.08;
280
280
  // Color change between lines signals different text elements (e.g. diagram
281
281
  // labels in different colors: gray "Force control signal" vs black "Inter-neurons").
282
282
  // Only split on color when X overlap is weak — paragraph lines have strong overlap
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@polotno/pdf-import",
3
- "version": "0.0.2",
3
+ "version": "0.0.3",
4
4
  "description": "Convert PDF files into Polotno JSON format",
5
5
  "type": "module",
6
6
  "main": "./lib/index.js",