@polotno/pdf-import 0.0.2 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/font-registry.js +67 -22
- package/lib/index.js +1 -1
- package/lib/page-parser.js +97 -18
- package/lib/svg-builder.js +57 -3
- package/lib/text-blocks.d.ts +4 -1
- package/lib/text-blocks.js +14 -3
- package/lib/text-layout.js +6 -3
- package/package.json +8 -8
package/lib/font-registry.js
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import opentype from 'opentype.js';
|
|
2
|
-
import { mapPdfFont, isKnownWebFont } from './font-mapper.js';
|
|
2
|
+
import { mapPdfFont, isKnownWebFont, extractWeightFromName, extractStyleFromName, } from './font-mapper.js';
|
|
3
3
|
import { findClosestGoogleFont } from './font-matcher.js';
|
|
4
4
|
import { mergeSubsetFonts } from './font-merger.js';
|
|
5
5
|
export class FontRegistry {
|
|
@@ -35,19 +35,35 @@ export class FontRegistry {
|
|
|
35
35
|
return;
|
|
36
36
|
const mappedFamily = mapPdfFont(fontObj.name);
|
|
37
37
|
const isGoogleFont = isKnownWebFont(fontObj.name);
|
|
38
|
+
const hasEmbeddedData = fontObj.data && fontObj.data.length > 0;
|
|
38
39
|
const shouldEmbed = embedAllFonts || !isGoogleFont;
|
|
39
40
|
// When embedding a known Google Font, rename to avoid Polotno loading
|
|
40
41
|
// the Google version instead of the embedded subset.
|
|
41
|
-
|
|
42
|
+
// Only rename if the font actually has embedded data — standard PDF fonts
|
|
43
|
+
// (Helvetica, Times, Courier, etc.) have no data, so they won't appear
|
|
44
|
+
// in the fonts[] array (all doc fonts must be declared there). Without an
|
|
45
|
+
// entry in fonts[], there's no collision risk, and keeping the original
|
|
46
|
+
// name lets Polotno load the font normally.
|
|
47
|
+
const fontFamily = embedAllFonts && isGoogleFont && hasEmbeddedData
|
|
48
|
+
? `${mappedFamily} (PDF)`
|
|
49
|
+
: mappedFamily;
|
|
42
50
|
// Track the rename so text elements can use the correct fontFamily
|
|
43
51
|
if (fontFamily !== mappedFamily) {
|
|
44
52
|
this.renameMap.set(fontObj.name, fontFamily);
|
|
45
53
|
}
|
|
46
|
-
//
|
|
54
|
+
// Detect weight/style from the PDF font name
|
|
55
|
+
const fontWeight = extractWeightFromName(fontObj.name);
|
|
56
|
+
const fontStyle = extractStyleFromName(fontObj.name);
|
|
57
|
+
// Collect font binary data with weight/style info
|
|
47
58
|
if (shouldEmbed && fontObj.data && fontObj.data.length > 0) {
|
|
48
59
|
const mime = fontObj.mimetype || 'font/opentype';
|
|
49
60
|
const arr = this.fontDataMap.get(fontFamily) || [];
|
|
50
|
-
arr.push({
|
|
61
|
+
arr.push({
|
|
62
|
+
mime,
|
|
63
|
+
data: new Uint8Array(fontObj.data),
|
|
64
|
+
fontWeight,
|
|
65
|
+
fontStyle,
|
|
66
|
+
});
|
|
51
67
|
this.fontDataMap.set(fontFamily, arr);
|
|
52
68
|
}
|
|
53
69
|
// Collect font metrics for unknown fonts (for Google Font matching)
|
|
@@ -93,30 +109,59 @@ export class FontRegistry {
|
|
|
93
109
|
return fonts;
|
|
94
110
|
}
|
|
95
111
|
// 'embed' strategy: embed font data as base64 data URIs.
|
|
96
|
-
// When multiple subsets exist, merge them into a single font.
|
|
97
112
|
for (const [fontFamily, blobs] of this.fontDataMap) {
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
fontData = mergeSubsetFonts(blobs.map((b) => b.data));
|
|
106
|
-
mime = blobs[0].mime;
|
|
113
|
+
// Group blobs by weight+style variant
|
|
114
|
+
const variantMap = new Map();
|
|
115
|
+
for (const blob of blobs) {
|
|
116
|
+
const key = `${blob.fontWeight}|${blob.fontStyle}`;
|
|
117
|
+
const arr = variantMap.get(key) || [];
|
|
118
|
+
arr.push(blob);
|
|
119
|
+
variantMap.set(key, arr);
|
|
107
120
|
}
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
121
|
+
// When multiple subsets exist for the same variant, merge them.
|
|
122
|
+
const variants = [];
|
|
123
|
+
for (const [, variantBlobs] of variantMap) {
|
|
124
|
+
let fontData;
|
|
125
|
+
if (variantBlobs.length === 1) {
|
|
126
|
+
fontData = variantBlobs[0].data;
|
|
127
|
+
}
|
|
128
|
+
else {
|
|
129
|
+
fontData = mergeSubsetFonts(variantBlobs.map((b) => b.data));
|
|
130
|
+
}
|
|
131
|
+
variants.push({
|
|
132
|
+
fontWeight: variantBlobs[0].fontWeight,
|
|
133
|
+
fontStyle: variantBlobs[0].fontStyle,
|
|
134
|
+
data: fontData,
|
|
135
|
+
mime: variantBlobs[0].mime,
|
|
136
|
+
});
|
|
111
137
|
}
|
|
112
|
-
|
|
138
|
+
function toBase64(data) {
|
|
139
|
+
if (typeof Buffer !== 'undefined') {
|
|
140
|
+
return Buffer.from(data).toString('base64');
|
|
141
|
+
}
|
|
113
142
|
let binary = '';
|
|
114
|
-
for (let bi = 0; bi <
|
|
115
|
-
binary += String.fromCharCode(
|
|
143
|
+
for (let bi = 0; bi < data.length; bi++) {
|
|
144
|
+
binary += String.fromCharCode(data[bi]);
|
|
116
145
|
}
|
|
117
|
-
|
|
146
|
+
return btoa(binary);
|
|
147
|
+
}
|
|
148
|
+
if (variants.length === 1 && variants[0].fontWeight === 'normal' && variants[0].fontStyle === 'normal') {
|
|
149
|
+
// Single normal variant — use simple url format
|
|
150
|
+
const b64 = toBase64(variants[0].data);
|
|
151
|
+
fonts.push({ fontFamily, url: `data:${variants[0].mime};base64,${b64}` });
|
|
152
|
+
}
|
|
153
|
+
else {
|
|
154
|
+
// Multiple variants — use styles array
|
|
155
|
+
const styles = variants.map((v) => {
|
|
156
|
+
const b64 = toBase64(v.data);
|
|
157
|
+
return {
|
|
158
|
+
src: `url("data:${v.mime};base64,${b64}")`,
|
|
159
|
+
fontWeight: v.fontWeight,
|
|
160
|
+
fontStyle: v.fontStyle,
|
|
161
|
+
};
|
|
162
|
+
});
|
|
163
|
+
fonts.push({ fontFamily, styles });
|
|
118
164
|
}
|
|
119
|
-
fonts.push({ fontFamily, url: `data:${mime};base64,${b64}` });
|
|
120
165
|
}
|
|
121
166
|
return fonts;
|
|
122
167
|
}
|
package/lib/index.js
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
import{getDocument as
|
|
1
|
+
import{getDocument as I,GlobalWorkerOptions as b}from"pdfjs-dist/legacy/build/pdf.mjs";import{parsePage as O}from"./page-parser.js";import{FontRegistry as k}from"./font-registry.js";import{buildJpegIndex as A}from"./pdf-image-extractor.js";import{workerSource as C}from"./generated/pdf-worker-source.js";let E=0;function F(){return`el_${Date.now()}_${++E}`}async function _({pdf:e,fontStrategy:g="embed"}){if(typeof window<"u"&&!b.workerSrc){const r=new Blob([C],{type:"application/javascript"});b.workerSrc=URL.createObjectURL(r)}const p=new Uint8Array(e instanceof ArrayBuffer?e:e.buffer.slice(e.byteOffset,e.byteOffset+e.byteLength)),y=A(p),o=await I({data:p,useSystemFonts:!0,disableFontFace:!0,fontExtraProperties:!0}).promise,f=new k;let n=612,a=792;const u=await o.getPage(1),w=u.getViewport({scale:1});n=w.width,a=w.height;const d=3,i=new Array(o.numPages);for(let r=0;r<o.numPages;r+=d){const x=Math.min(r+d,o.numPages),h=[];for(let t=r;t<x;t++)h.push((async()=>{const s=t===0?u:await o.getPage(t+1),{parsedPage:c,pageWidth:l,pageHeight:m}=await O({page:s,pageIdx:t,fontRegistry:f,generateId:F,jpegIndex:y,fontStrategy:g});return(l!==n||m!==a)&&(c.width=l,c.height=m),{parsedPage:c,pageIdx:t}})());const R=await Promise.all(h);for(const{parsedPage:t,pageIdx:s}of R)i[s]=t}await o.destroy();const P=f.finalize(g,i);return{width:n,height:a,fonts:P,pages:i,unit:"px",dpi:72}}export{_ as pdfToJson};
|
package/lib/page-parser.js
CHANGED
|
@@ -27,8 +27,8 @@ export async function parsePage({ page, pageIdx, fontRegistry, generateId, jpegI
|
|
|
27
27
|
buildImageElements(page, imageRefs, pageIdx, generateId, jpegIndex),
|
|
28
28
|
collectPageFonts(page, fontRefs, fontRegistry, fontStrategy === 'embed'),
|
|
29
29
|
]);
|
|
30
|
-
const pageBackground = detectPageBackground(drawings, pageWidth, pageHeight);
|
|
31
|
-
const svgElements = buildSvgElements(drawings, pageWidth, pageHeight, generateId);
|
|
30
|
+
const { color: pageBackground, bgDrawingIndices } = detectPageBackground(drawings, pageWidth, pageHeight);
|
|
31
|
+
const svgElements = buildSvgElements(drawings, pageWidth, pageHeight, generateId, bgDrawingIndices);
|
|
32
32
|
const textElements = await buildTextElements({
|
|
33
33
|
page,
|
|
34
34
|
pageWidth,
|
|
@@ -113,26 +113,86 @@ async function resolveDrawingGradients(page, drawings) {
|
|
|
113
113
|
}
|
|
114
114
|
}
|
|
115
115
|
function detectPageBackground(drawings, pageWidth, pageHeight) {
|
|
116
|
+
const bgDrawingIndices = new Set();
|
|
116
117
|
let pageBackground = '#FFFFFF';
|
|
117
|
-
for (
|
|
118
|
+
for (let i = 0; i < drawings.length; i++) {
|
|
119
|
+
const drawing = drawings[i];
|
|
118
120
|
if (drawing.fill !== null) {
|
|
119
121
|
const dw = drawing.rect[2] - drawing.rect[0];
|
|
120
122
|
const dh = drawing.rect[3] - drawing.rect[1];
|
|
121
|
-
if (dw >= pageWidth * 0.
|
|
123
|
+
if (dw >= pageWidth * 0.95 && dh >= pageHeight * 0.95) {
|
|
122
124
|
const [r, g, b] = drawing.fill;
|
|
123
125
|
pageBackground = rgbTupleToHex(r, g, b);
|
|
126
|
+
bgDrawingIndices.add(i);
|
|
124
127
|
}
|
|
125
128
|
}
|
|
126
129
|
}
|
|
127
|
-
return pageBackground;
|
|
130
|
+
return { color: pageBackground, bgDrawingIndices };
|
|
131
|
+
}
|
|
132
|
+
/**
|
|
133
|
+
* Detect crop/bleed mark drawings (full-page, stroke-only, short edge lines)
|
|
134
|
+
* and split them into individual per-line SVGs so they don't block clicks.
|
|
135
|
+
*/
|
|
136
|
+
function trySplitCropMarks(drawing, pageWidth, pageHeight) {
|
|
137
|
+
if (drawing.fill !== null)
|
|
138
|
+
return null;
|
|
139
|
+
if (!drawing.stroke)
|
|
140
|
+
return null;
|
|
141
|
+
const dw = drawing.rect[2] - drawing.rect[0];
|
|
142
|
+
const dh = drawing.rect[3] - drawing.rect[1];
|
|
143
|
+
if (dw < pageWidth * 0.8 || dh < pageHeight * 0.8)
|
|
144
|
+
return null;
|
|
145
|
+
const lines = drawing.items.filter((it) => it.kind === 'l');
|
|
146
|
+
if (lines.length === 0)
|
|
147
|
+
return null;
|
|
148
|
+
// All line items must be short and near page edges
|
|
149
|
+
const edgeThreshold = Math.min(pageWidth, pageHeight) * 0.15;
|
|
150
|
+
for (const item of drawing.items) {
|
|
151
|
+
if (item.kind === 'm')
|
|
152
|
+
continue;
|
|
153
|
+
if (item.kind !== 'l')
|
|
154
|
+
return null;
|
|
155
|
+
const len = Math.hypot(item.x2 - item.x1, item.y2 - item.y1);
|
|
156
|
+
if (len > edgeThreshold)
|
|
157
|
+
return null;
|
|
158
|
+
const nearEdge = item.x1 < edgeThreshold || item.x1 > pageWidth - edgeThreshold ||
|
|
159
|
+
item.y1 < edgeThreshold || item.y1 > pageHeight - edgeThreshold;
|
|
160
|
+
if (!nearEdge)
|
|
161
|
+
return null;
|
|
162
|
+
}
|
|
163
|
+
// Build one small SVG per line segment
|
|
164
|
+
const strokeHex = rgbTupleToHex(...drawing.stroke);
|
|
165
|
+
const sw = drawing.strokeWidth || 1;
|
|
166
|
+
const results = [];
|
|
167
|
+
for (const line of lines) {
|
|
168
|
+
const x0 = Math.min(line.x1, line.x2);
|
|
169
|
+
const y0 = Math.min(line.y1, line.y2);
|
|
170
|
+
const x1 = Math.max(line.x1, line.x2);
|
|
171
|
+
const y1 = Math.max(line.y1, line.y2);
|
|
172
|
+
const half = sw / 2;
|
|
173
|
+
const bx = x0 - half;
|
|
174
|
+
const by = y0 - half;
|
|
175
|
+
const bw = Math.max(x1 - x0 + sw, sw + 1);
|
|
176
|
+
const bh = Math.max(y1 - y0 + sw, sw + 1);
|
|
177
|
+
const lx1 = line.x1 - bx;
|
|
178
|
+
const ly1 = line.y1 - by;
|
|
179
|
+
const lx2 = line.x2 - bx;
|
|
180
|
+
const ly2 = line.y2 - by;
|
|
181
|
+
const svg = `<svg viewBox="0 0 ${bw} ${bh}" xmlns="http://www.w3.org/2000/svg"><line x1="${lx1}" y1="${ly1}" x2="${lx2}" y2="${ly2}" stroke="${strokeHex}" stroke-width="${sw}"/></svg>`;
|
|
182
|
+
results.push({ svg, x: bx, y: by, width: bw, height: bh });
|
|
183
|
+
}
|
|
184
|
+
return results;
|
|
128
185
|
}
|
|
129
|
-
function buildSvgElements(drawings, pageWidth, pageHeight, generateId) {
|
|
186
|
+
function buildSvgElements(drawings, pageWidth, pageHeight, generateId, bgDrawingIndices) {
|
|
130
187
|
const svgElements = [];
|
|
131
188
|
for (let idx = 0; idx < drawings.length; idx++) {
|
|
132
189
|
const drawing = drawings[idx];
|
|
133
190
|
// Skip fully transparent drawings (e.g. accessibility marker rectangles)
|
|
134
191
|
if (drawing.opacity <= 0)
|
|
135
192
|
continue;
|
|
193
|
+
// Skip drawings used as the page background
|
|
194
|
+
if (bgDrawingIndices?.has(idx))
|
|
195
|
+
continue;
|
|
136
196
|
if (isMergeableClipRunDrawing(drawing)) {
|
|
137
197
|
const run = [drawing];
|
|
138
198
|
while (idx + 1 < drawings.length &&
|
|
@@ -161,6 +221,27 @@ function buildSvgElements(drawings, pageWidth, pageHeight, generateId) {
|
|
|
161
221
|
}
|
|
162
222
|
}
|
|
163
223
|
}
|
|
224
|
+
// Split full-page crop/bleed marks into per-segment SVGs so they don't
|
|
225
|
+
// block clicks on the entire page in the editor.
|
|
226
|
+
const splitResults = trySplitCropMarks(drawing, pageWidth, pageHeight);
|
|
227
|
+
if (splitResults) {
|
|
228
|
+
for (const seg of splitResults) {
|
|
229
|
+
svgElements.push({
|
|
230
|
+
type: 'svg',
|
|
231
|
+
id: generateId(),
|
|
232
|
+
x: seg.x,
|
|
233
|
+
y: seg.y,
|
|
234
|
+
width: seg.width,
|
|
235
|
+
height: seg.height,
|
|
236
|
+
rotation: 0,
|
|
237
|
+
opacity: drawing.opacity,
|
|
238
|
+
src: svgToDataUri(seg.svg),
|
|
239
|
+
name: '',
|
|
240
|
+
_order: drawing.orderIndex,
|
|
241
|
+
});
|
|
242
|
+
}
|
|
243
|
+
continue;
|
|
244
|
+
}
|
|
164
245
|
const result = drawingToSvg(drawing, pageWidth, pageHeight);
|
|
165
246
|
if (result) {
|
|
166
247
|
svgElements.push({
|
|
@@ -558,7 +639,7 @@ async function buildTextElements({ page, pageWidth, yFlipOffset, positionColors,
|
|
|
558
639
|
// Find dominant span (longest text)
|
|
559
640
|
const dominant = block.spans.reduce((a, b) => a.text.length > b.text.length ? a : b);
|
|
560
641
|
const fontFamily = fontRegistry.getFontFamily(dominant.fontName);
|
|
561
|
-
const align = detectAlignment(block.spans, pageWidth, leftMargin, rightMargin);
|
|
642
|
+
const align = detectAlignment(block.spans, pageWidth, leftMargin, rightMargin, blocks.map((b) => ({ x: b.x, width: b.width })));
|
|
562
643
|
const lineHeight = computeLineHeight(block.spans);
|
|
563
644
|
// Build text content with line breaks
|
|
564
645
|
const lineMap = new Map();
|
|
@@ -641,10 +722,14 @@ async function buildTextElements({ page, pageWidth, yFlipOffset, positionColors,
|
|
|
641
722
|
else if (align === 'right') {
|
|
642
723
|
elemX -= extraWidth;
|
|
643
724
|
}
|
|
644
|
-
// Polotno rotates text around the top-left corner. For vertical text
|
|
645
|
-
//
|
|
725
|
+
// Polotno rotates text around the top-left corner. For vertical text we need
|
|
726
|
+
// to convert PDF baseline coordinates into Polotno element coordinates.
|
|
727
|
+
// The element's y-axis maps to the visual x-axis after rotation, so the
|
|
728
|
+
// half-leading and ascent offsets that normally adjust elemY must instead
|
|
729
|
+
// adjust elemX.
|
|
646
730
|
if (dominant.rotation <= -45 && dominant.rotation >= -135) {
|
|
647
|
-
|
|
731
|
+
const ascentPx = dominant.baselineY - dominant.y;
|
|
732
|
+
elemX = block.x - ascentPx - halfLeading;
|
|
648
733
|
elemY = dominant.baselineY;
|
|
649
734
|
}
|
|
650
735
|
else if (dominant.rotation >= 45 && dominant.rotation <= 135) {
|
|
@@ -671,14 +756,8 @@ async function buildTextElements({ page, pageWidth, yFlipOffset, positionColors,
|
|
|
671
756
|
text: textContent2,
|
|
672
757
|
fontSize: dominant.fontSize,
|
|
673
758
|
fontFamily,
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
fontWeight: fontStrategy === 'embed'
|
|
677
|
-
? 'normal'
|
|
678
|
-
: dominant.fontWeight || extractWeightFromName(dominant.fontName),
|
|
679
|
-
fontStyle: fontStrategy === 'embed'
|
|
680
|
-
? 'normal'
|
|
681
|
-
: dominant.fontStyle || extractStyleFromName(dominant.fontName),
|
|
759
|
+
fontWeight: dominant.fontWeight || extractWeightFromName(dominant.fontName),
|
|
760
|
+
fontStyle: dominant.fontStyle || extractStyleFromName(dominant.fontName),
|
|
682
761
|
fill: dominant.color || '#000000',
|
|
683
762
|
align,
|
|
684
763
|
lineHeight,
|
package/lib/svg-builder.js
CHANGED
|
@@ -30,8 +30,8 @@ function computeDrawingBounds(drawing, pageWidth, pageHeight) {
|
|
|
30
30
|
if (drawing.fill !== null &&
|
|
31
31
|
pageWidth > 0 &&
|
|
32
32
|
pageHeight > 0 &&
|
|
33
|
-
width >= pageWidth * 0.
|
|
34
|
-
height >= pageHeight * 0.
|
|
33
|
+
width >= pageWidth * 0.95 &&
|
|
34
|
+
height >= pageHeight * 0.95) {
|
|
35
35
|
return null;
|
|
36
36
|
}
|
|
37
37
|
return { x0, y0, width, height };
|
|
@@ -86,6 +86,28 @@ function buildPathData(items, originX, originY, shouldCloseFill, closePath) {
|
|
|
86
86
|
}
|
|
87
87
|
return pathParts.join(' ');
|
|
88
88
|
}
|
|
89
|
+
/**
|
|
90
|
+
* Check if a clip path is a simple axis-aligned rectangle,
|
|
91
|
+
* regardless of whether it's represented as `re` or `m`/`l` segments.
|
|
92
|
+
*/
|
|
93
|
+
function isRectangularClipPath(items) {
|
|
94
|
+
if (items.length === 1 && items[0].kind === 're')
|
|
95
|
+
return true;
|
|
96
|
+
// Check if it's 4 line segments (plus optional move) forming a rectangle
|
|
97
|
+
const lines = items.filter(it => it.kind === 'l');
|
|
98
|
+
if (lines.length < 4)
|
|
99
|
+
return false;
|
|
100
|
+
// All lines must be axis-aligned (horizontal or vertical)
|
|
101
|
+
for (const line of lines) {
|
|
102
|
+
if (line.kind !== 'l')
|
|
103
|
+
continue;
|
|
104
|
+
const dx = Math.abs(line.x2 - line.x1);
|
|
105
|
+
const dy = Math.abs(line.y2 - line.y1);
|
|
106
|
+
if (dx > 0.5 && dy > 0.5)
|
|
107
|
+
return false; // diagonal line = not rect
|
|
108
|
+
}
|
|
109
|
+
return true;
|
|
110
|
+
}
|
|
89
111
|
export function drawingToSvg(drawing, pageWidth, pageHeight) {
|
|
90
112
|
const bounds = computeDrawingBounds(drawing, pageWidth, pageHeight);
|
|
91
113
|
if (!bounds)
|
|
@@ -150,7 +172,39 @@ export function drawingToSvg(drawing, pageWidth, pageHeight) {
|
|
|
150
172
|
fillValue = 'url(#g)';
|
|
151
173
|
}
|
|
152
174
|
}
|
|
153
|
-
|
|
175
|
+
// Apply clip path if the drawing has a non-trivial one
|
|
176
|
+
let clipDef = '';
|
|
177
|
+
let clipAttr = '';
|
|
178
|
+
if (drawing.clipPath && drawing.clipRect) {
|
|
179
|
+
const [cx0, cy0, cx1, cy1] = drawing.clipRect;
|
|
180
|
+
const cw = cx1 - cx0;
|
|
181
|
+
const ch = cy1 - cy0;
|
|
182
|
+
// Only clip if it meaningfully reduces the visible area.
|
|
183
|
+
// A clip rect that covers 90%+ of the drawing in both dimensions is trivial.
|
|
184
|
+
const insetX = Math.max(0, cx0 - x0) + Math.max(0, (x0 + w) - cx1);
|
|
185
|
+
const insetY = Math.max(0, cy0 - y0) + Math.max(0, (y0 + h) - cy1);
|
|
186
|
+
const significantClip = cw > 0 && ch > 0 &&
|
|
187
|
+
(insetX > w * 0.1 || insetY > h * 0.1);
|
|
188
|
+
if (significantClip) {
|
|
189
|
+
// Check if clip is a simple axis-aligned rectangle (re or 4 line segments)
|
|
190
|
+
const isRectClip = isRectangularClipPath(drawing.clipPath);
|
|
191
|
+
if (!isRectClip) {
|
|
192
|
+
const clipD = buildPathData(drawing.clipPath, x0, y0, true, true);
|
|
193
|
+
clipDef = `<defs><clipPath id="c"><path d="${clipD}"/></clipPath></defs>`;
|
|
194
|
+
clipAttr = ' clip-path="url(#c)"';
|
|
195
|
+
// If both gradient defs and clip defs are needed, merge them
|
|
196
|
+
if (defs) {
|
|
197
|
+
clipDef = defs.replace('</defs>', '') + `<clipPath id="c"><path d="${clipD}"/></clipPath></defs>`;
|
|
198
|
+
defs = '';
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
const allDefs = clipDef || defs;
|
|
204
|
+
const pathEl = `<path d="${dAttr}" fill="${fillValue}" ${strokeAttr}${fillRule}/>`;
|
|
205
|
+
const svgStr = clipAttr
|
|
206
|
+
? `<svg viewBox="0 0 ${w} ${h}" xmlns="http://www.w3.org/2000/svg">${allDefs}<g${clipAttr}>${pathEl}</g></svg>`
|
|
207
|
+
: `<svg viewBox="0 0 ${w} ${h}" xmlns="http://www.w3.org/2000/svg">${allDefs}${pathEl}</svg>`;
|
|
154
208
|
return { svg: svgStr, x: x0, y: y0, width: w, height: h };
|
|
155
209
|
}
|
|
156
210
|
export function clippedDrawingsToSvg(drawings) {
|
package/lib/text-blocks.d.ts
CHANGED
|
@@ -1,6 +1,9 @@
|
|
|
1
1
|
import type { TextBlock, TextSpan } from './text-types.js';
|
|
2
2
|
export declare function groupSpansByBlock(spans: TextSpan[]): TextBlock[];
|
|
3
|
-
export declare function detectAlignment(blockSpans: TextSpan[], pageWidth: number, leftMargin: number, rightMargin: number
|
|
3
|
+
export declare function detectAlignment(blockSpans: TextSpan[], pageWidth: number, leftMargin: number, rightMargin: number, allBlocks?: {
|
|
4
|
+
x: number;
|
|
5
|
+
width: number;
|
|
6
|
+
}[]): string;
|
|
4
7
|
export declare function estimatePageMargins(spans: TextSpan[]): [number, number];
|
|
5
8
|
export declare function computeLineHeight(blockSpans: TextSpan[]): number;
|
|
6
9
|
//# sourceMappingURL=text-blocks.d.ts.map
|
package/lib/text-blocks.js
CHANGED
|
@@ -152,7 +152,7 @@ function splitSpansAtPositions(spans, splitPositions) {
|
|
|
152
152
|
return columns.filter((c) => c.length > 0);
|
|
153
153
|
}
|
|
154
154
|
// Alignment detection
|
|
155
|
-
export function detectAlignment(blockSpans, pageWidth, leftMargin, rightMargin) {
|
|
155
|
+
export function detectAlignment(blockSpans, pageWidth, leftMargin, rightMargin, allBlocks) {
|
|
156
156
|
const lines = new Map();
|
|
157
157
|
for (const span of blockSpans) {
|
|
158
158
|
const arr = lines.get(span.lineNo) || [];
|
|
@@ -185,10 +185,16 @@ export function detectAlignment(blockSpans, pageWidth, leftMargin, rightMargin)
|
|
|
185
185
|
// Only classify as right-aligned if the line is short relative to the text
|
|
186
186
|
// area — a near-full-width line that happens to align with the right margin
|
|
187
187
|
// is more likely a paragraph line than a right-aligned label.
|
|
188
|
+
// Additional guard: if other blocks share this left edge, the text is in a
|
|
189
|
+
// left-aligned column and just happens to reach the right margin.
|
|
188
190
|
const lineWidth = x1 - x0;
|
|
191
|
+
const hasAlignedNeighbors = allBlocks &&
|
|
192
|
+
allBlocks.filter((b) => Math.abs(b.x - x0) < 3 && b.width !== lineWidth)
|
|
193
|
+
.length >= 2;
|
|
189
194
|
if (Math.abs(x1 - rightMargin) < rightTol &&
|
|
190
195
|
x0 > leftMargin + centerTol &&
|
|
191
|
-
lineWidth < textWidth * 0.6
|
|
196
|
+
lineWidth < textWidth * 0.6 &&
|
|
197
|
+
!hasAlignedNeighbors)
|
|
192
198
|
return 'right';
|
|
193
199
|
return 'left';
|
|
194
200
|
}
|
|
@@ -204,6 +210,11 @@ export function detectAlignment(blockSpans, pageWidth, leftMargin, rightMargin)
|
|
|
204
210
|
const rightStd = stddev(rightEdges);
|
|
205
211
|
const midStd = stddev(midpoints);
|
|
206
212
|
const THRESHOLD = 3.0;
|
|
213
|
+
// For center detection, scale threshold with font size. A fixed 3px is fine
|
|
214
|
+
// for 12px text but too tight for large display text (e.g. 55px "BENEFIT OF"
|
|
215
|
+
// / "WAXING" has midpoint stddev 3.2 — clearly centered but misses 3.0).
|
|
216
|
+
const dominantFontSize = blockSpans.reduce((a, b) => a.text.length > b.text.length ? a : b).fontSize;
|
|
217
|
+
const centerThreshold = Math.max(THRESHOLD, dominantFontSize * 0.1);
|
|
207
218
|
if (leftStd < THRESHOLD && rightStd < THRESHOLD)
|
|
208
219
|
return 'justify';
|
|
209
220
|
// Justify detection with tolerance for short lines: last lines of paragraphs,
|
|
@@ -219,7 +230,7 @@ export function detectAlignment(blockSpans, pageWidth, leftMargin, rightMargin)
|
|
|
219
230
|
return 'justify';
|
|
220
231
|
}
|
|
221
232
|
}
|
|
222
|
-
if (midStd <
|
|
233
|
+
if (midStd < centerThreshold && midStd <= leftStd && midStd <= rightStd)
|
|
223
234
|
return 'center';
|
|
224
235
|
if (rightStd < THRESHOLD && rightStd < leftStd)
|
|
225
236
|
return 'right';
|
package/lib/text-layout.js
CHANGED
|
@@ -197,8 +197,11 @@ function assignBlockNumbers(spans, startBlockNo = 0) {
|
|
|
197
197
|
// When font size clearly differs, spans are separate visual elements even if
|
|
198
198
|
// close together (e.g. bold heading label next to body text list). Use a
|
|
199
199
|
// smaller gap threshold for such cases.
|
|
200
|
+
const fontNameDiffers = prev.fontName !== curr.fontName;
|
|
200
201
|
const fontSizeDiffers = fontRatio > 1.1;
|
|
201
|
-
const effectiveSplitGap = fontSizeDiffers
|
|
202
|
+
const effectiveSplitGap = (fontSizeDiffers || fontNameDiffers)
|
|
203
|
+
? Math.max(avgFontSize * 0.5, 8)
|
|
204
|
+
: splitGap;
|
|
202
205
|
if (gap > effectiveSplitGap) {
|
|
203
206
|
const forceSplit = prev.color !== curr.color ||
|
|
204
207
|
prev.fontName !== curr.fontName ||
|
|
@@ -276,7 +279,7 @@ function assignBlockNumbers(spans, startBlockNo = 0) {
|
|
|
276
279
|
const fontSizeRatio = Math.max(prevFontSize, currFontSize) /
|
|
277
280
|
Math.min(prevFontSize, currFontSize);
|
|
278
281
|
const onSameLine = prev.spans[0].lineNo === curr.spans[0].lineNo;
|
|
279
|
-
const fontSizeChanged = !onSameLine && fontSizeRatio > 1.
|
|
282
|
+
const fontSizeChanged = !onSameLine && fontSizeRatio > 1.08;
|
|
280
283
|
// Color change between lines signals different text elements (e.g. diagram
|
|
281
284
|
// labels in different colors: gray "Force control signal" vs black "Inter-neurons").
|
|
282
285
|
// Only split on color when X overlap is weak — paragraph lines have strong overlap
|
|
@@ -303,7 +306,7 @@ function assignBlockNumbers(spans, startBlockNo = 0) {
|
|
|
303
306
|
if (!fontSizeChanged &&
|
|
304
307
|
!colorChanged &&
|
|
305
308
|
!largeSingleSpanDisplayShift &&
|
|
306
|
-
yGap
|
|
309
|
+
yGap <= avgFontSize * 0.5 &&
|
|
307
310
|
(xOverlap > 0 || Math.abs(prev.x0 - curr.x0) < avgFontSize * 2)) {
|
|
308
311
|
currentBlock.push(curr);
|
|
309
312
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@polotno/pdf-import",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.4",
|
|
4
4
|
"description": "Convert PDF files into Polotno JSON format",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./lib/index.js",
|
|
@@ -28,19 +28,19 @@
|
|
|
28
28
|
"pdfjs-dist": "^4.10.38"
|
|
29
29
|
},
|
|
30
30
|
"devDependencies": {
|
|
31
|
-
"@types/node": "^25.
|
|
31
|
+
"@types/node": "^25.5.0",
|
|
32
32
|
"@types/react": "^19.2.14",
|
|
33
33
|
"@types/react-dom": "^19.2.3",
|
|
34
|
-
"@vitejs/plugin-react": "^
|
|
35
|
-
"esbuild": "^0.27.
|
|
36
|
-
"polotno": "^2.
|
|
37
|
-
"polotno-node": "^2.15.
|
|
34
|
+
"@vitejs/plugin-react": "^6.0.1",
|
|
35
|
+
"esbuild": "^0.27.4",
|
|
36
|
+
"polotno": "^2.38.2",
|
|
37
|
+
"polotno-node": "^2.15.15",
|
|
38
38
|
"react": "^18.3.1",
|
|
39
39
|
"react-dom": "^18",
|
|
40
40
|
"sharp": "^0.34.5",
|
|
41
41
|
"ssim.js": "^3.5.0",
|
|
42
42
|
"typescript": "~5.9.3",
|
|
43
|
-
"vite": "^
|
|
44
|
-
"vitest": "^4.0
|
|
43
|
+
"vite": "^8.0.0",
|
|
44
|
+
"vitest": "^4.1.0"
|
|
45
45
|
}
|
|
46
46
|
}
|