@polotno/pdf-import 0.0.2 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
1
  import opentype from 'opentype.js';
2
- import { mapPdfFont, isKnownWebFont } from './font-mapper.js';
2
+ import { mapPdfFont, isKnownWebFont, extractWeightFromName, extractStyleFromName, } from './font-mapper.js';
3
3
  import { findClosestGoogleFont } from './font-matcher.js';
4
4
  import { mergeSubsetFonts } from './font-merger.js';
5
5
  export class FontRegistry {
@@ -35,19 +35,35 @@ export class FontRegistry {
35
35
  return;
36
36
  const mappedFamily = mapPdfFont(fontObj.name);
37
37
  const isGoogleFont = isKnownWebFont(fontObj.name);
38
+ const hasEmbeddedData = fontObj.data && fontObj.data.length > 0;
38
39
  const shouldEmbed = embedAllFonts || !isGoogleFont;
39
40
  // When embedding a known Google Font, rename to avoid Polotno loading
40
41
  // the Google version instead of the embedded subset.
41
- const fontFamily = embedAllFonts && isGoogleFont ? `${mappedFamily} (PDF)` : mappedFamily;
42
+ // Only rename if the font actually has embedded data — standard PDF fonts
43
+ // (Helvetica, Times, Courier, etc.) have no data, so they won't appear
44
+ // in the fonts[] array (all doc fonts must be declared there). Without an
45
+ // entry in fonts[], there's no collision risk, and keeping the original
46
+ // name lets Polotno load the font normally.
47
+ const fontFamily = embedAllFonts && isGoogleFont && hasEmbeddedData
48
+ ? `${mappedFamily} (PDF)`
49
+ : mappedFamily;
42
50
  // Track the rename so text elements can use the correct fontFamily
43
51
  if (fontFamily !== mappedFamily) {
44
52
  this.renameMap.set(fontObj.name, fontFamily);
45
53
  }
46
- // Collect font binary data
54
+ // Detect weight/style from the PDF font name
55
+ const fontWeight = extractWeightFromName(fontObj.name);
56
+ const fontStyle = extractStyleFromName(fontObj.name);
57
+ // Collect font binary data with weight/style info
47
58
  if (shouldEmbed && fontObj.data && fontObj.data.length > 0) {
48
59
  const mime = fontObj.mimetype || 'font/opentype';
49
60
  const arr = this.fontDataMap.get(fontFamily) || [];
50
- arr.push({ mime, data: new Uint8Array(fontObj.data) });
61
+ arr.push({
62
+ mime,
63
+ data: new Uint8Array(fontObj.data),
64
+ fontWeight,
65
+ fontStyle,
66
+ });
51
67
  this.fontDataMap.set(fontFamily, arr);
52
68
  }
53
69
  // Collect font metrics for unknown fonts (for Google Font matching)
@@ -93,30 +109,59 @@ export class FontRegistry {
93
109
  return fonts;
94
110
  }
95
111
  // 'embed' strategy: embed font data as base64 data URIs.
96
- // When multiple subsets exist, merge them into a single font.
97
112
  for (const [fontFamily, blobs] of this.fontDataMap) {
98
- let fontData;
99
- let mime;
100
- if (blobs.length === 1) {
101
- fontData = blobs[0].data;
102
- mime = blobs[0].mime;
103
- }
104
- else {
105
- fontData = mergeSubsetFonts(blobs.map((b) => b.data));
106
- mime = blobs[0].mime;
113
+ // Group blobs by weight+style variant
114
+ const variantMap = new Map();
115
+ for (const blob of blobs) {
116
+ const key = `${blob.fontWeight}|${blob.fontStyle}`;
117
+ const arr = variantMap.get(key) || [];
118
+ arr.push(blob);
119
+ variantMap.set(key, arr);
107
120
  }
108
- let b64;
109
- if (typeof Buffer !== 'undefined') {
110
- b64 = Buffer.from(fontData).toString('base64');
121
+ // When multiple subsets exist for the same variant, merge them.
122
+ const variants = [];
123
+ for (const [, variantBlobs] of variantMap) {
124
+ let fontData;
125
+ if (variantBlobs.length === 1) {
126
+ fontData = variantBlobs[0].data;
127
+ }
128
+ else {
129
+ fontData = mergeSubsetFonts(variantBlobs.map((b) => b.data));
130
+ }
131
+ variants.push({
132
+ fontWeight: variantBlobs[0].fontWeight,
133
+ fontStyle: variantBlobs[0].fontStyle,
134
+ data: fontData,
135
+ mime: variantBlobs[0].mime,
136
+ });
111
137
  }
112
- else {
138
+ function toBase64(data) {
139
+ if (typeof Buffer !== 'undefined') {
140
+ return Buffer.from(data).toString('base64');
141
+ }
113
142
  let binary = '';
114
- for (let bi = 0; bi < fontData.length; bi++) {
115
- binary += String.fromCharCode(fontData[bi]);
143
+ for (let bi = 0; bi < data.length; bi++) {
144
+ binary += String.fromCharCode(data[bi]);
116
145
  }
117
- b64 = btoa(binary);
146
+ return btoa(binary);
147
+ }
148
+ if (variants.length === 1 && variants[0].fontWeight === 'normal' && variants[0].fontStyle === 'normal') {
149
+ // Single normal variant — use simple url format
150
+ const b64 = toBase64(variants[0].data);
151
+ fonts.push({ fontFamily, url: `data:${variants[0].mime};base64,${b64}` });
152
+ }
153
+ else {
154
+ // Multiple variants — use styles array
155
+ const styles = variants.map((v) => {
156
+ const b64 = toBase64(v.data);
157
+ return {
158
+ src: `url("data:${v.mime};base64,${b64}")`,
159
+ fontWeight: v.fontWeight,
160
+ fontStyle: v.fontStyle,
161
+ };
162
+ });
163
+ fonts.push({ fontFamily, styles });
118
164
  }
119
- fonts.push({ fontFamily, url: `data:${mime};base64,${b64}` });
120
165
  }
121
166
  return fonts;
122
167
  }
package/lib/index.js CHANGED
@@ -1 +1 @@
1
- import{getDocument as x,GlobalWorkerOptions as m}from"pdfjs-dist/legacy/build/pdf.mjs";import{parsePage as R}from"./page-parser.js";import{FontRegistry as I}from"./font-registry.js";import{buildJpegIndex as O}from"./pdf-image-extractor.js";import{workerSource as k}from"./generated/pdf-worker-source.js";let A=0;function C(){return`el_${Date.now()}_${++A}`}async function L({pdf:e,fontStrategy:s="embed"}){if(typeof window<"u"&&!m.workerSrc){const r=new Blob([k],{type:"application/javascript"});m.workerSrc=URL.createObjectURL(r)}const i=new Uint8Array(e instanceof ArrayBuffer?e:e.buffer.slice(e.byteOffset,e.byteOffset+e.byteLength)),d=O(i),o=await x({data:i,useSystemFonts:!0,disableFontFace:!0,fontExtraProperties:!0}).promise,c=new I;let g=612,p=792;const f=await o.getPage(1),u=f.getViewport({scale:1});g=u.width,p=u.height;const w=3,n=new Array(o.numPages);for(let r=0;r<o.numPages;r+=w){const h=Math.min(r+w,o.numPages),l=[];for(let t=r;t<h;t++)l.push((async()=>{const a=t===0?f:await o.getPage(t+1),{parsedPage:P}=await R({page:a,pageIdx:t,fontRegistry:c,generateId:C,jpegIndex:d,fontStrategy:s});return{parsedPage:P,pageIdx:t}})());const y=await Promise.all(l);for(const{parsedPage:t,pageIdx:a}of y)n[a]=t}await o.destroy();const b=c.finalize(s,n);return{width:g,height:p,fonts:b,pages:n,unit:"px",dpi:72}}export{L as pdfToJson};
1
+ import{getDocument as I,GlobalWorkerOptions as b}from"pdfjs-dist/legacy/build/pdf.mjs";import{parsePage as O}from"./page-parser.js";import{FontRegistry as k}from"./font-registry.js";import{buildJpegIndex as A}from"./pdf-image-extractor.js";import{workerSource as C}from"./generated/pdf-worker-source.js";let E=0;function F(){return`el_${Date.now()}_${++E}`}async function _({pdf:e,fontStrategy:g="embed"}){if(typeof window<"u"&&!b.workerSrc){const r=new Blob([C],{type:"application/javascript"});b.workerSrc=URL.createObjectURL(r)}const p=new Uint8Array(e instanceof ArrayBuffer?e:e.buffer.slice(e.byteOffset,e.byteOffset+e.byteLength)),y=A(p),o=await I({data:p,useSystemFonts:!0,disableFontFace:!0,fontExtraProperties:!0}).promise,f=new k;let n=612,a=792;const u=await o.getPage(1),w=u.getViewport({scale:1});n=w.width,a=w.height;const d=3,i=new Array(o.numPages);for(let r=0;r<o.numPages;r+=d){const x=Math.min(r+d,o.numPages),h=[];for(let t=r;t<x;t++)h.push((async()=>{const s=t===0?u:await o.getPage(t+1),{parsedPage:c,pageWidth:l,pageHeight:m}=await O({page:s,pageIdx:t,fontRegistry:f,generateId:F,jpegIndex:y,fontStrategy:g});return(l!==n||m!==a)&&(c.width=l,c.height=m),{parsedPage:c,pageIdx:t}})());const R=await Promise.all(h);for(const{parsedPage:t,pageIdx:s}of R)i[s]=t}await o.destroy();const P=f.finalize(g,i);return{width:n,height:a,fonts:P,pages:i,unit:"px",dpi:72}}export{_ as pdfToJson};
@@ -27,8 +27,8 @@ export async function parsePage({ page, pageIdx, fontRegistry, generateId, jpegI
27
27
  buildImageElements(page, imageRefs, pageIdx, generateId, jpegIndex),
28
28
  collectPageFonts(page, fontRefs, fontRegistry, fontStrategy === 'embed'),
29
29
  ]);
30
- const pageBackground = detectPageBackground(drawings, pageWidth, pageHeight);
31
- const svgElements = buildSvgElements(drawings, pageWidth, pageHeight, generateId);
30
+ const { color: pageBackground, bgDrawingIndices } = detectPageBackground(drawings, pageWidth, pageHeight);
31
+ const svgElements = buildSvgElements(drawings, pageWidth, pageHeight, generateId, bgDrawingIndices);
32
32
  const textElements = await buildTextElements({
33
33
  page,
34
34
  pageWidth,
@@ -113,26 +113,86 @@ async function resolveDrawingGradients(page, drawings) {
113
113
  }
114
114
  }
115
115
  function detectPageBackground(drawings, pageWidth, pageHeight) {
116
+ const bgDrawingIndices = new Set();
116
117
  let pageBackground = '#FFFFFF';
117
- for (const drawing of drawings) {
118
+ for (let i = 0; i < drawings.length; i++) {
119
+ const drawing = drawings[i];
118
120
  if (drawing.fill !== null) {
119
121
  const dw = drawing.rect[2] - drawing.rect[0];
120
122
  const dh = drawing.rect[3] - drawing.rect[1];
121
- if (dw >= pageWidth * 0.9 && dh >= pageHeight * 0.9) {
123
+ if (dw >= pageWidth * 0.95 && dh >= pageHeight * 0.95) {
122
124
  const [r, g, b] = drawing.fill;
123
125
  pageBackground = rgbTupleToHex(r, g, b);
126
+ bgDrawingIndices.add(i);
124
127
  }
125
128
  }
126
129
  }
127
- return pageBackground;
130
+ return { color: pageBackground, bgDrawingIndices };
131
+ }
132
+ /**
133
+ * Detect crop/bleed mark drawings (full-page, stroke-only, short edge lines)
134
+ * and split them into individual per-line SVGs so they don't block clicks.
135
+ */
136
+ function trySplitCropMarks(drawing, pageWidth, pageHeight) {
137
+ if (drawing.fill !== null)
138
+ return null;
139
+ if (!drawing.stroke)
140
+ return null;
141
+ const dw = drawing.rect[2] - drawing.rect[0];
142
+ const dh = drawing.rect[3] - drawing.rect[1];
143
+ if (dw < pageWidth * 0.8 || dh < pageHeight * 0.8)
144
+ return null;
145
+ const lines = drawing.items.filter((it) => it.kind === 'l');
146
+ if (lines.length === 0)
147
+ return null;
148
+ // All line items must be short and near page edges
149
+ const edgeThreshold = Math.min(pageWidth, pageHeight) * 0.15;
150
+ for (const item of drawing.items) {
151
+ if (item.kind === 'm')
152
+ continue;
153
+ if (item.kind !== 'l')
154
+ return null;
155
+ const len = Math.hypot(item.x2 - item.x1, item.y2 - item.y1);
156
+ if (len > edgeThreshold)
157
+ return null;
158
+ const nearEdge = item.x1 < edgeThreshold || item.x1 > pageWidth - edgeThreshold ||
159
+ item.y1 < edgeThreshold || item.y1 > pageHeight - edgeThreshold;
160
+ if (!nearEdge)
161
+ return null;
162
+ }
163
+ // Build one small SVG per line segment
164
+ const strokeHex = rgbTupleToHex(...drawing.stroke);
165
+ const sw = drawing.strokeWidth || 1;
166
+ const results = [];
167
+ for (const line of lines) {
168
+ const x0 = Math.min(line.x1, line.x2);
169
+ const y0 = Math.min(line.y1, line.y2);
170
+ const x1 = Math.max(line.x1, line.x2);
171
+ const y1 = Math.max(line.y1, line.y2);
172
+ const half = sw / 2;
173
+ const bx = x0 - half;
174
+ const by = y0 - half;
175
+ const bw = Math.max(x1 - x0 + sw, sw + 1);
176
+ const bh = Math.max(y1 - y0 + sw, sw + 1);
177
+ const lx1 = line.x1 - bx;
178
+ const ly1 = line.y1 - by;
179
+ const lx2 = line.x2 - bx;
180
+ const ly2 = line.y2 - by;
181
+ const svg = `<svg viewBox="0 0 ${bw} ${bh}" xmlns="http://www.w3.org/2000/svg"><line x1="${lx1}" y1="${ly1}" x2="${lx2}" y2="${ly2}" stroke="${strokeHex}" stroke-width="${sw}"/></svg>`;
182
+ results.push({ svg, x: bx, y: by, width: bw, height: bh });
183
+ }
184
+ return results;
128
185
  }
129
- function buildSvgElements(drawings, pageWidth, pageHeight, generateId) {
186
+ function buildSvgElements(drawings, pageWidth, pageHeight, generateId, bgDrawingIndices) {
130
187
  const svgElements = [];
131
188
  for (let idx = 0; idx < drawings.length; idx++) {
132
189
  const drawing = drawings[idx];
133
190
  // Skip fully transparent drawings (e.g. accessibility marker rectangles)
134
191
  if (drawing.opacity <= 0)
135
192
  continue;
193
+ // Skip drawings used as the page background
194
+ if (bgDrawingIndices?.has(idx))
195
+ continue;
136
196
  if (isMergeableClipRunDrawing(drawing)) {
137
197
  const run = [drawing];
138
198
  while (idx + 1 < drawings.length &&
@@ -161,6 +221,27 @@ function buildSvgElements(drawings, pageWidth, pageHeight, generateId) {
161
221
  }
162
222
  }
163
223
  }
224
+ // Split full-page crop/bleed marks into per-segment SVGs so they don't
225
+ // block clicks on the entire page in the editor.
226
+ const splitResults = trySplitCropMarks(drawing, pageWidth, pageHeight);
227
+ if (splitResults) {
228
+ for (const seg of splitResults) {
229
+ svgElements.push({
230
+ type: 'svg',
231
+ id: generateId(),
232
+ x: seg.x,
233
+ y: seg.y,
234
+ width: seg.width,
235
+ height: seg.height,
236
+ rotation: 0,
237
+ opacity: drawing.opacity,
238
+ src: svgToDataUri(seg.svg),
239
+ name: '',
240
+ _order: drawing.orderIndex,
241
+ });
242
+ }
243
+ continue;
244
+ }
164
245
  const result = drawingToSvg(drawing, pageWidth, pageHeight);
165
246
  if (result) {
166
247
  svgElements.push({
@@ -558,7 +639,7 @@ async function buildTextElements({ page, pageWidth, yFlipOffset, positionColors,
558
639
  // Find dominant span (longest text)
559
640
  const dominant = block.spans.reduce((a, b) => a.text.length > b.text.length ? a : b);
560
641
  const fontFamily = fontRegistry.getFontFamily(dominant.fontName);
561
- const align = detectAlignment(block.spans, pageWidth, leftMargin, rightMargin);
642
+ const align = detectAlignment(block.spans, pageWidth, leftMargin, rightMargin, blocks.map((b) => ({ x: b.x, width: b.width })));
562
643
  const lineHeight = computeLineHeight(block.spans);
563
644
  // Build text content with line breaks
564
645
  const lineMap = new Map();
@@ -641,10 +722,14 @@ async function buildTextElements({ page, pageWidth, yFlipOffset, positionColors,
641
722
  else if (align === 'right') {
642
723
  elemX -= extraWidth;
643
724
  }
644
- // Polotno rotates text around the top-left corner. For vertical text, anchor
645
- // against the baseline/right edge so the rotated box stays in the same place.
725
+ // Polotno rotates text around the top-left corner. For vertical text we need
726
+ // to convert PDF baseline coordinates into Polotno element coordinates.
727
+ // The element's y-axis maps to the visual x-axis after rotation, so the
728
+ // half-leading and ascent offsets that normally adjust elemY must instead
729
+ // adjust elemX.
646
730
  if (dominant.rotation <= -45 && dominant.rotation >= -135) {
647
- elemX -= elemHeight;
731
+ const ascentPx = dominant.baselineY - dominant.y;
732
+ elemX = block.x - ascentPx - halfLeading;
648
733
  elemY = dominant.baselineY;
649
734
  }
650
735
  else if (dominant.rotation >= 45 && dominant.rotation <= 135) {
@@ -671,14 +756,8 @@ async function buildTextElements({ page, pageWidth, yFlipOffset, positionColors,
671
756
  text: textContent2,
672
757
  fontSize: dominant.fontSize,
673
758
  fontFamily,
674
- // When embedding fonts, the subset file IS the specific variant (bold,
675
- // italic, etc.), so use "normal" to avoid Polotno synthesizing on top.
676
- fontWeight: fontStrategy === 'embed'
677
- ? 'normal'
678
- : dominant.fontWeight || extractWeightFromName(dominant.fontName),
679
- fontStyle: fontStrategy === 'embed'
680
- ? 'normal'
681
- : dominant.fontStyle || extractStyleFromName(dominant.fontName),
759
+ fontWeight: dominant.fontWeight || extractWeightFromName(dominant.fontName),
760
+ fontStyle: dominant.fontStyle || extractStyleFromName(dominant.fontName),
682
761
  fill: dominant.color || '#000000',
683
762
  align,
684
763
  lineHeight,
@@ -30,8 +30,8 @@ function computeDrawingBounds(drawing, pageWidth, pageHeight) {
30
30
  if (drawing.fill !== null &&
31
31
  pageWidth > 0 &&
32
32
  pageHeight > 0 &&
33
- width >= pageWidth * 0.9 &&
34
- height >= pageHeight * 0.9) {
33
+ width >= pageWidth * 0.95 &&
34
+ height >= pageHeight * 0.95) {
35
35
  return null;
36
36
  }
37
37
  return { x0, y0, width, height };
@@ -86,6 +86,28 @@ function buildPathData(items, originX, originY, shouldCloseFill, closePath) {
86
86
  }
87
87
  return pathParts.join(' ');
88
88
  }
89
+ /**
90
+ * Check if a clip path is a simple axis-aligned rectangle,
91
+ * regardless of whether it's represented as `re` or `m`/`l` segments.
92
+ */
93
+ function isRectangularClipPath(items) {
94
+ if (items.length === 1 && items[0].kind === 're')
95
+ return true;
96
+ // Check if it's 4 line segments (plus optional move) forming a rectangle
97
+ const lines = items.filter(it => it.kind === 'l');
98
+ if (lines.length < 4)
99
+ return false;
100
+ // All lines must be axis-aligned (horizontal or vertical)
101
+ for (const line of lines) {
102
+ if (line.kind !== 'l')
103
+ continue;
104
+ const dx = Math.abs(line.x2 - line.x1);
105
+ const dy = Math.abs(line.y2 - line.y1);
106
+ if (dx > 0.5 && dy > 0.5)
107
+ return false; // diagonal line = not rect
108
+ }
109
+ return true;
110
+ }
89
111
  export function drawingToSvg(drawing, pageWidth, pageHeight) {
90
112
  const bounds = computeDrawingBounds(drawing, pageWidth, pageHeight);
91
113
  if (!bounds)
@@ -150,7 +172,39 @@ export function drawingToSvg(drawing, pageWidth, pageHeight) {
150
172
  fillValue = 'url(#g)';
151
173
  }
152
174
  }
153
- const svgStr = `<svg viewBox="0 0 ${w} ${h}" xmlns="http://www.w3.org/2000/svg">${defs}<path d="${dAttr}" fill="${fillValue}" ${strokeAttr}${fillRule}/></svg>`;
175
+ // Apply clip path if the drawing has a non-trivial one
176
+ let clipDef = '';
177
+ let clipAttr = '';
178
+ if (drawing.clipPath && drawing.clipRect) {
179
+ const [cx0, cy0, cx1, cy1] = drawing.clipRect;
180
+ const cw = cx1 - cx0;
181
+ const ch = cy1 - cy0;
182
+ // Only clip if it meaningfully reduces the visible area.
183
+ // A clip rect that covers 90%+ of the drawing in both dimensions is trivial.
184
+ const insetX = Math.max(0, cx0 - x0) + Math.max(0, (x0 + w) - cx1);
185
+ const insetY = Math.max(0, cy0 - y0) + Math.max(0, (y0 + h) - cy1);
186
+ const significantClip = cw > 0 && ch > 0 &&
187
+ (insetX > w * 0.1 || insetY > h * 0.1);
188
+ if (significantClip) {
189
+ // Check if clip is a simple axis-aligned rectangle (re or 4 line segments)
190
+ const isRectClip = isRectangularClipPath(drawing.clipPath);
191
+ if (!isRectClip) {
192
+ const clipD = buildPathData(drawing.clipPath, x0, y0, true, true);
193
+ clipDef = `<defs><clipPath id="c"><path d="${clipD}"/></clipPath></defs>`;
194
+ clipAttr = ' clip-path="url(#c)"';
195
+ // If both gradient defs and clip defs are needed, merge them
196
+ if (defs) {
197
+ clipDef = defs.replace('</defs>', '') + `<clipPath id="c"><path d="${clipD}"/></clipPath></defs>`;
198
+ defs = '';
199
+ }
200
+ }
201
+ }
202
+ }
203
+ const allDefs = clipDef || defs;
204
+ const pathEl = `<path d="${dAttr}" fill="${fillValue}" ${strokeAttr}${fillRule}/>`;
205
+ const svgStr = clipAttr
206
+ ? `<svg viewBox="0 0 ${w} ${h}" xmlns="http://www.w3.org/2000/svg">${allDefs}<g${clipAttr}>${pathEl}</g></svg>`
207
+ : `<svg viewBox="0 0 ${w} ${h}" xmlns="http://www.w3.org/2000/svg">${allDefs}${pathEl}</svg>`;
154
208
  return { svg: svgStr, x: x0, y: y0, width: w, height: h };
155
209
  }
156
210
  export function clippedDrawingsToSvg(drawings) {
@@ -1,6 +1,9 @@
1
1
  import type { TextBlock, TextSpan } from './text-types.js';
2
2
  export declare function groupSpansByBlock(spans: TextSpan[]): TextBlock[];
3
- export declare function detectAlignment(blockSpans: TextSpan[], pageWidth: number, leftMargin: number, rightMargin: number): string;
3
+ export declare function detectAlignment(blockSpans: TextSpan[], pageWidth: number, leftMargin: number, rightMargin: number, allBlocks?: {
4
+ x: number;
5
+ width: number;
6
+ }[]): string;
4
7
  export declare function estimatePageMargins(spans: TextSpan[]): [number, number];
5
8
  export declare function computeLineHeight(blockSpans: TextSpan[]): number;
6
9
  //# sourceMappingURL=text-blocks.d.ts.map
@@ -152,7 +152,7 @@ function splitSpansAtPositions(spans, splitPositions) {
152
152
  return columns.filter((c) => c.length > 0);
153
153
  }
154
154
  // Alignment detection
155
- export function detectAlignment(blockSpans, pageWidth, leftMargin, rightMargin) {
155
+ export function detectAlignment(blockSpans, pageWidth, leftMargin, rightMargin, allBlocks) {
156
156
  const lines = new Map();
157
157
  for (const span of blockSpans) {
158
158
  const arr = lines.get(span.lineNo) || [];
@@ -185,10 +185,16 @@ export function detectAlignment(blockSpans, pageWidth, leftMargin, rightMargin)
185
185
  // Only classify as right-aligned if the line is short relative to the text
186
186
  // area — a near-full-width line that happens to align with the right margin
187
187
  // is more likely a paragraph line than a right-aligned label.
188
+ // Additional guard: if other blocks share this left edge, the text is in a
189
+ // left-aligned column and just happens to reach the right margin.
188
190
  const lineWidth = x1 - x0;
191
+ const hasAlignedNeighbors = allBlocks &&
192
+ allBlocks.filter((b) => Math.abs(b.x - x0) < 3 && b.width !== lineWidth)
193
+ .length >= 2;
189
194
  if (Math.abs(x1 - rightMargin) < rightTol &&
190
195
  x0 > leftMargin + centerTol &&
191
- lineWidth < textWidth * 0.6)
196
+ lineWidth < textWidth * 0.6 &&
197
+ !hasAlignedNeighbors)
192
198
  return 'right';
193
199
  return 'left';
194
200
  }
@@ -204,6 +210,11 @@ export function detectAlignment(blockSpans, pageWidth, leftMargin, rightMargin)
204
210
  const rightStd = stddev(rightEdges);
205
211
  const midStd = stddev(midpoints);
206
212
  const THRESHOLD = 3.0;
213
+ // For center detection, scale threshold with font size. A fixed 3px is fine
214
+ // for 12px text but too tight for large display text (e.g. 55px "BENEFIT OF"
215
+ // / "WAXING" has midpoint stddev 3.2 — clearly centered but misses 3.0).
216
+ const dominantFontSize = blockSpans.reduce((a, b) => a.text.length > b.text.length ? a : b).fontSize;
217
+ const centerThreshold = Math.max(THRESHOLD, dominantFontSize * 0.1);
207
218
  if (leftStd < THRESHOLD && rightStd < THRESHOLD)
208
219
  return 'justify';
209
220
  // Justify detection with tolerance for short lines: last lines of paragraphs,
@@ -219,7 +230,7 @@ export function detectAlignment(blockSpans, pageWidth, leftMargin, rightMargin)
219
230
  return 'justify';
220
231
  }
221
232
  }
222
- if (midStd < THRESHOLD && midStd <= leftStd && midStd <= rightStd)
233
+ if (midStd < centerThreshold && midStd <= leftStd && midStd <= rightStd)
223
234
  return 'center';
224
235
  if (rightStd < THRESHOLD && rightStd < leftStd)
225
236
  return 'right';
@@ -197,8 +197,11 @@ function assignBlockNumbers(spans, startBlockNo = 0) {
197
197
  // When font size clearly differs, spans are separate visual elements even if
198
198
  // close together (e.g. bold heading label next to body text list). Use a
199
199
  // smaller gap threshold for such cases.
200
+ const fontNameDiffers = prev.fontName !== curr.fontName;
200
201
  const fontSizeDiffers = fontRatio > 1.1;
201
- const effectiveSplitGap = fontSizeDiffers ? Math.max(avgFontSize * 0.5, 8) : splitGap;
202
+ const effectiveSplitGap = (fontSizeDiffers || fontNameDiffers)
203
+ ? Math.max(avgFontSize * 0.5, 8)
204
+ : splitGap;
202
205
  if (gap > effectiveSplitGap) {
203
206
  const forceSplit = prev.color !== curr.color ||
204
207
  prev.fontName !== curr.fontName ||
@@ -276,7 +279,7 @@ function assignBlockNumbers(spans, startBlockNo = 0) {
276
279
  const fontSizeRatio = Math.max(prevFontSize, currFontSize) /
277
280
  Math.min(prevFontSize, currFontSize);
278
281
  const onSameLine = prev.spans[0].lineNo === curr.spans[0].lineNo;
279
- const fontSizeChanged = !onSameLine && fontSizeRatio > 1.15;
282
+ const fontSizeChanged = !onSameLine && fontSizeRatio > 1.08;
280
283
  // Color change between lines signals different text elements (e.g. diagram
281
284
  // labels in different colors: gray "Force control signal" vs black "Inter-neurons").
282
285
  // Only split on color when X overlap is weak — paragraph lines have strong overlap
@@ -303,7 +306,7 @@ function assignBlockNumbers(spans, startBlockNo = 0) {
303
306
  if (!fontSizeChanged &&
304
307
  !colorChanged &&
305
308
  !largeSingleSpanDisplayShift &&
306
- yGap < avgFontSize * 0.5 &&
309
+ yGap <= avgFontSize * 0.5 &&
307
310
  (xOverlap > 0 || Math.abs(prev.x0 - curr.x0) < avgFontSize * 2)) {
308
311
  currentBlock.push(curr);
309
312
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@polotno/pdf-import",
3
- "version": "0.0.2",
3
+ "version": "0.0.4",
4
4
  "description": "Convert PDF files into Polotno JSON format",
5
5
  "type": "module",
6
6
  "main": "./lib/index.js",
@@ -28,19 +28,19 @@
28
28
  "pdfjs-dist": "^4.10.38"
29
29
  },
30
30
  "devDependencies": {
31
- "@types/node": "^25.3.3",
31
+ "@types/node": "^25.5.0",
32
32
  "@types/react": "^19.2.14",
33
33
  "@types/react-dom": "^19.2.3",
34
- "@vitejs/plugin-react": "^5.1.4",
35
- "esbuild": "^0.27.3",
36
- "polotno": "^2.37.1",
37
- "polotno-node": "^2.15.13",
34
+ "@vitejs/plugin-react": "^6.0.1",
35
+ "esbuild": "^0.27.4",
36
+ "polotno": "^2.38.2",
37
+ "polotno-node": "^2.15.15",
38
38
  "react": "^18.3.1",
39
39
  "react-dom": "^18",
40
40
  "sharp": "^0.34.5",
41
41
  "ssim.js": "^3.5.0",
42
42
  "typescript": "~5.9.3",
43
- "vite": "^7.3.1",
44
- "vitest": "^4.0.18"
43
+ "vite": "^8.0.0",
44
+ "vitest": "^4.1.0"
45
45
  }
46
46
  }