@polotno/pdf-import 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/font-registry.js +9 -1
- package/lib/index.js +1 -1
- package/lib/page-parser.js +87 -6
- package/lib/svg-builder.js +57 -3
- package/lib/text-layout.js +5 -2
- package/package.json +8 -8
package/lib/font-registry.js
CHANGED
|
@@ -35,10 +35,18 @@ export class FontRegistry {
|
|
|
35
35
|
return;
|
|
36
36
|
const mappedFamily = mapPdfFont(fontObj.name);
|
|
37
37
|
const isGoogleFont = isKnownWebFont(fontObj.name);
|
|
38
|
+
const hasEmbeddedData = fontObj.data && fontObj.data.length > 0;
|
|
38
39
|
const shouldEmbed = embedAllFonts || !isGoogleFont;
|
|
39
40
|
// When embedding a known Google Font, rename to avoid Polotno loading
|
|
40
41
|
// the Google version instead of the embedded subset.
|
|
41
|
-
|
|
42
|
+
// Only rename if the font actually has embedded data — standard PDF fonts
|
|
43
|
+
// (Helvetica, Times, Courier, etc.) have no data, so they won't appear
|
|
44
|
+
// in the fonts[] array (all doc fonts must be declared there). Without an
|
|
45
|
+
// entry in fonts[], there's no collision risk, and keeping the original
|
|
46
|
+
// name lets Polotno load the font normally.
|
|
47
|
+
const fontFamily = embedAllFonts && isGoogleFont && hasEmbeddedData
|
|
48
|
+
? `${mappedFamily} (PDF)`
|
|
49
|
+
: mappedFamily;
|
|
42
50
|
// Track the rename so text elements can use the correct fontFamily
|
|
43
51
|
if (fontFamily !== mappedFamily) {
|
|
44
52
|
this.renameMap.set(fontObj.name, fontFamily);
|
package/lib/index.js
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
import{getDocument as
|
|
1
|
+
import{getDocument as I,GlobalWorkerOptions as b}from"pdfjs-dist/legacy/build/pdf.mjs";import{parsePage as O}from"./page-parser.js";import{FontRegistry as k}from"./font-registry.js";import{buildJpegIndex as A}from"./pdf-image-extractor.js";import{workerSource as C}from"./generated/pdf-worker-source.js";let E=0;function F(){return`el_${Date.now()}_${++E}`}async function _({pdf:e,fontStrategy:g="embed"}){if(typeof window<"u"&&!b.workerSrc){const r=new Blob([C],{type:"application/javascript"});b.workerSrc=URL.createObjectURL(r)}const p=new Uint8Array(e instanceof ArrayBuffer?e:e.buffer.slice(e.byteOffset,e.byteOffset+e.byteLength)),y=A(p),o=await I({data:p,useSystemFonts:!0,disableFontFace:!0,fontExtraProperties:!0}).promise,f=new k;let n=612,a=792;const u=await o.getPage(1),w=u.getViewport({scale:1});n=w.width,a=w.height;const d=3,i=new Array(o.numPages);for(let r=0;r<o.numPages;r+=d){const x=Math.min(r+d,o.numPages),h=[];for(let t=r;t<x;t++)h.push((async()=>{const s=t===0?u:await o.getPage(t+1),{parsedPage:c,pageWidth:l,pageHeight:m}=await O({page:s,pageIdx:t,fontRegistry:f,generateId:F,jpegIndex:y,fontStrategy:g});return(l!==n||m!==a)&&(c.width=l,c.height=m),{parsedPage:c,pageIdx:t}})());const R=await Promise.all(h);for(const{parsedPage:t,pageIdx:s}of R)i[s]=t}await o.destroy();const P=f.finalize(g,i);return{width:n,height:a,fonts:P,pages:i,unit:"px",dpi:72}}export{_ as pdfToJson};
|
package/lib/page-parser.js
CHANGED
|
@@ -27,8 +27,8 @@ export async function parsePage({ page, pageIdx, fontRegistry, generateId, jpegI
|
|
|
27
27
|
buildImageElements(page, imageRefs, pageIdx, generateId, jpegIndex),
|
|
28
28
|
collectPageFonts(page, fontRefs, fontRegistry, fontStrategy === 'embed'),
|
|
29
29
|
]);
|
|
30
|
-
const pageBackground = detectPageBackground(drawings, pageWidth, pageHeight);
|
|
31
|
-
const svgElements = buildSvgElements(drawings, pageWidth, pageHeight, generateId);
|
|
30
|
+
const { color: pageBackground, bgDrawingIndices } = detectPageBackground(drawings, pageWidth, pageHeight);
|
|
31
|
+
const svgElements = buildSvgElements(drawings, pageWidth, pageHeight, generateId, bgDrawingIndices);
|
|
32
32
|
const textElements = await buildTextElements({
|
|
33
33
|
page,
|
|
34
34
|
pageWidth,
|
|
@@ -113,26 +113,86 @@ async function resolveDrawingGradients(page, drawings) {
|
|
|
113
113
|
}
|
|
114
114
|
}
|
|
115
115
|
function detectPageBackground(drawings, pageWidth, pageHeight) {
|
|
116
|
+
const bgDrawingIndices = new Set();
|
|
116
117
|
let pageBackground = '#FFFFFF';
|
|
117
|
-
for (
|
|
118
|
+
for (let i = 0; i < drawings.length; i++) {
|
|
119
|
+
const drawing = drawings[i];
|
|
118
120
|
if (drawing.fill !== null) {
|
|
119
121
|
const dw = drawing.rect[2] - drawing.rect[0];
|
|
120
122
|
const dh = drawing.rect[3] - drawing.rect[1];
|
|
121
|
-
if (dw >= pageWidth * 0.
|
|
123
|
+
if (dw >= pageWidth * 0.95 && dh >= pageHeight * 0.95) {
|
|
122
124
|
const [r, g, b] = drawing.fill;
|
|
123
125
|
pageBackground = rgbTupleToHex(r, g, b);
|
|
126
|
+
bgDrawingIndices.add(i);
|
|
124
127
|
}
|
|
125
128
|
}
|
|
126
129
|
}
|
|
127
|
-
return pageBackground;
|
|
130
|
+
return { color: pageBackground, bgDrawingIndices };
|
|
131
|
+
}
|
|
132
|
+
/**
|
|
133
|
+
* Detect crop/bleed mark drawings (full-page, stroke-only, short edge lines)
|
|
134
|
+
* and split them into individual per-line SVGs so they don't block clicks.
|
|
135
|
+
*/
|
|
136
|
+
function trySplitCropMarks(drawing, pageWidth, pageHeight) {
|
|
137
|
+
if (drawing.fill !== null)
|
|
138
|
+
return null;
|
|
139
|
+
if (!drawing.stroke)
|
|
140
|
+
return null;
|
|
141
|
+
const dw = drawing.rect[2] - drawing.rect[0];
|
|
142
|
+
const dh = drawing.rect[3] - drawing.rect[1];
|
|
143
|
+
if (dw < pageWidth * 0.8 || dh < pageHeight * 0.8)
|
|
144
|
+
return null;
|
|
145
|
+
const lines = drawing.items.filter((it) => it.kind === 'l');
|
|
146
|
+
if (lines.length === 0)
|
|
147
|
+
return null;
|
|
148
|
+
// All line items must be short and near page edges
|
|
149
|
+
const edgeThreshold = Math.min(pageWidth, pageHeight) * 0.15;
|
|
150
|
+
for (const item of drawing.items) {
|
|
151
|
+
if (item.kind === 'm')
|
|
152
|
+
continue;
|
|
153
|
+
if (item.kind !== 'l')
|
|
154
|
+
return null;
|
|
155
|
+
const len = Math.hypot(item.x2 - item.x1, item.y2 - item.y1);
|
|
156
|
+
if (len > edgeThreshold)
|
|
157
|
+
return null;
|
|
158
|
+
const nearEdge = item.x1 < edgeThreshold || item.x1 > pageWidth - edgeThreshold ||
|
|
159
|
+
item.y1 < edgeThreshold || item.y1 > pageHeight - edgeThreshold;
|
|
160
|
+
if (!nearEdge)
|
|
161
|
+
return null;
|
|
162
|
+
}
|
|
163
|
+
// Build one small SVG per line segment
|
|
164
|
+
const strokeHex = rgbTupleToHex(...drawing.stroke);
|
|
165
|
+
const sw = drawing.strokeWidth || 1;
|
|
166
|
+
const results = [];
|
|
167
|
+
for (const line of lines) {
|
|
168
|
+
const x0 = Math.min(line.x1, line.x2);
|
|
169
|
+
const y0 = Math.min(line.y1, line.y2);
|
|
170
|
+
const x1 = Math.max(line.x1, line.x2);
|
|
171
|
+
const y1 = Math.max(line.y1, line.y2);
|
|
172
|
+
const half = sw / 2;
|
|
173
|
+
const bx = x0 - half;
|
|
174
|
+
const by = y0 - half;
|
|
175
|
+
const bw = Math.max(x1 - x0 + sw, sw + 1);
|
|
176
|
+
const bh = Math.max(y1 - y0 + sw, sw + 1);
|
|
177
|
+
const lx1 = line.x1 - bx;
|
|
178
|
+
const ly1 = line.y1 - by;
|
|
179
|
+
const lx2 = line.x2 - bx;
|
|
180
|
+
const ly2 = line.y2 - by;
|
|
181
|
+
const svg = `<svg viewBox="0 0 ${bw} ${bh}" xmlns="http://www.w3.org/2000/svg"><line x1="${lx1}" y1="${ly1}" x2="${lx2}" y2="${ly2}" stroke="${strokeHex}" stroke-width="${sw}"/></svg>`;
|
|
182
|
+
results.push({ svg, x: bx, y: by, width: bw, height: bh });
|
|
183
|
+
}
|
|
184
|
+
return results;
|
|
128
185
|
}
|
|
129
|
-
function buildSvgElements(drawings, pageWidth, pageHeight, generateId) {
|
|
186
|
+
function buildSvgElements(drawings, pageWidth, pageHeight, generateId, bgDrawingIndices) {
|
|
130
187
|
const svgElements = [];
|
|
131
188
|
for (let idx = 0; idx < drawings.length; idx++) {
|
|
132
189
|
const drawing = drawings[idx];
|
|
133
190
|
// Skip fully transparent drawings (e.g. accessibility marker rectangles)
|
|
134
191
|
if (drawing.opacity <= 0)
|
|
135
192
|
continue;
|
|
193
|
+
// Skip drawings used as the page background
|
|
194
|
+
if (bgDrawingIndices?.has(idx))
|
|
195
|
+
continue;
|
|
136
196
|
if (isMergeableClipRunDrawing(drawing)) {
|
|
137
197
|
const run = [drawing];
|
|
138
198
|
while (idx + 1 < drawings.length &&
|
|
@@ -161,6 +221,27 @@ function buildSvgElements(drawings, pageWidth, pageHeight, generateId) {
|
|
|
161
221
|
}
|
|
162
222
|
}
|
|
163
223
|
}
|
|
224
|
+
// Split full-page crop/bleed marks into per-segment SVGs so they don't
|
|
225
|
+
// block clicks on the entire page in the editor.
|
|
226
|
+
const splitResults = trySplitCropMarks(drawing, pageWidth, pageHeight);
|
|
227
|
+
if (splitResults) {
|
|
228
|
+
for (const seg of splitResults) {
|
|
229
|
+
svgElements.push({
|
|
230
|
+
type: 'svg',
|
|
231
|
+
id: generateId(),
|
|
232
|
+
x: seg.x,
|
|
233
|
+
y: seg.y,
|
|
234
|
+
width: seg.width,
|
|
235
|
+
height: seg.height,
|
|
236
|
+
rotation: 0,
|
|
237
|
+
opacity: drawing.opacity,
|
|
238
|
+
src: svgToDataUri(seg.svg),
|
|
239
|
+
name: '',
|
|
240
|
+
_order: drawing.orderIndex,
|
|
241
|
+
});
|
|
242
|
+
}
|
|
243
|
+
continue;
|
|
244
|
+
}
|
|
164
245
|
const result = drawingToSvg(drawing, pageWidth, pageHeight);
|
|
165
246
|
if (result) {
|
|
166
247
|
svgElements.push({
|
package/lib/svg-builder.js
CHANGED
|
@@ -30,8 +30,8 @@ function computeDrawingBounds(drawing, pageWidth, pageHeight) {
|
|
|
30
30
|
if (drawing.fill !== null &&
|
|
31
31
|
pageWidth > 0 &&
|
|
32
32
|
pageHeight > 0 &&
|
|
33
|
-
width >= pageWidth * 0.
|
|
34
|
-
height >= pageHeight * 0.
|
|
33
|
+
width >= pageWidth * 0.95 &&
|
|
34
|
+
height >= pageHeight * 0.95) {
|
|
35
35
|
return null;
|
|
36
36
|
}
|
|
37
37
|
return { x0, y0, width, height };
|
|
@@ -86,6 +86,28 @@ function buildPathData(items, originX, originY, shouldCloseFill, closePath) {
|
|
|
86
86
|
}
|
|
87
87
|
return pathParts.join(' ');
|
|
88
88
|
}
|
|
89
|
+
/**
|
|
90
|
+
* Check if a clip path is a simple axis-aligned rectangle,
|
|
91
|
+
* regardless of whether it's represented as `re` or `m`/`l` segments.
|
|
92
|
+
*/
|
|
93
|
+
function isRectangularClipPath(items) {
|
|
94
|
+
if (items.length === 1 && items[0].kind === 're')
|
|
95
|
+
return true;
|
|
96
|
+
// Check if it's 4 line segments (plus optional move) forming a rectangle
|
|
97
|
+
const lines = items.filter(it => it.kind === 'l');
|
|
98
|
+
if (lines.length < 4)
|
|
99
|
+
return false;
|
|
100
|
+
// All lines must be axis-aligned (horizontal or vertical)
|
|
101
|
+
for (const line of lines) {
|
|
102
|
+
if (line.kind !== 'l')
|
|
103
|
+
continue;
|
|
104
|
+
const dx = Math.abs(line.x2 - line.x1);
|
|
105
|
+
const dy = Math.abs(line.y2 - line.y1);
|
|
106
|
+
if (dx > 0.5 && dy > 0.5)
|
|
107
|
+
return false; // diagonal line = not rect
|
|
108
|
+
}
|
|
109
|
+
return true;
|
|
110
|
+
}
|
|
89
111
|
export function drawingToSvg(drawing, pageWidth, pageHeight) {
|
|
90
112
|
const bounds = computeDrawingBounds(drawing, pageWidth, pageHeight);
|
|
91
113
|
if (!bounds)
|
|
@@ -150,7 +172,39 @@ export function drawingToSvg(drawing, pageWidth, pageHeight) {
|
|
|
150
172
|
fillValue = 'url(#g)';
|
|
151
173
|
}
|
|
152
174
|
}
|
|
153
|
-
|
|
175
|
+
// Apply clip path if the drawing has a non-trivial one
|
|
176
|
+
let clipDef = '';
|
|
177
|
+
let clipAttr = '';
|
|
178
|
+
if (drawing.clipPath && drawing.clipRect) {
|
|
179
|
+
const [cx0, cy0, cx1, cy1] = drawing.clipRect;
|
|
180
|
+
const cw = cx1 - cx0;
|
|
181
|
+
const ch = cy1 - cy0;
|
|
182
|
+
// Only clip if it meaningfully reduces the visible area.
|
|
183
|
+
// A clip rect that covers 90%+ of the drawing in both dimensions is trivial.
|
|
184
|
+
const insetX = Math.max(0, cx0 - x0) + Math.max(0, (x0 + w) - cx1);
|
|
185
|
+
const insetY = Math.max(0, cy0 - y0) + Math.max(0, (y0 + h) - cy1);
|
|
186
|
+
const significantClip = cw > 0 && ch > 0 &&
|
|
187
|
+
(insetX > w * 0.1 || insetY > h * 0.1);
|
|
188
|
+
if (significantClip) {
|
|
189
|
+
// Check if clip is a simple axis-aligned rectangle (re or 4 line segments)
|
|
190
|
+
const isRectClip = isRectangularClipPath(drawing.clipPath);
|
|
191
|
+
if (!isRectClip) {
|
|
192
|
+
const clipD = buildPathData(drawing.clipPath, x0, y0, true, true);
|
|
193
|
+
clipDef = `<defs><clipPath id="c"><path d="${clipD}"/></clipPath></defs>`;
|
|
194
|
+
clipAttr = ' clip-path="url(#c)"';
|
|
195
|
+
// If both gradient defs and clip defs are needed, merge them
|
|
196
|
+
if (defs) {
|
|
197
|
+
clipDef = defs.replace('</defs>', '') + `<clipPath id="c"><path d="${clipD}"/></clipPath></defs>`;
|
|
198
|
+
defs = '';
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
const allDefs = clipDef || defs;
|
|
204
|
+
const pathEl = `<path d="${dAttr}" fill="${fillValue}" ${strokeAttr}${fillRule}/>`;
|
|
205
|
+
const svgStr = clipAttr
|
|
206
|
+
? `<svg viewBox="0 0 ${w} ${h}" xmlns="http://www.w3.org/2000/svg">${allDefs}<g${clipAttr}>${pathEl}</g></svg>`
|
|
207
|
+
: `<svg viewBox="0 0 ${w} ${h}" xmlns="http://www.w3.org/2000/svg">${allDefs}${pathEl}</svg>`;
|
|
154
208
|
return { svg: svgStr, x: x0, y: y0, width: w, height: h };
|
|
155
209
|
}
|
|
156
210
|
export function clippedDrawingsToSvg(drawings) {
|
package/lib/text-layout.js
CHANGED
|
@@ -197,8 +197,11 @@ function assignBlockNumbers(spans, startBlockNo = 0) {
|
|
|
197
197
|
// When font size clearly differs, spans are separate visual elements even if
|
|
198
198
|
// close together (e.g. bold heading label next to body text list). Use a
|
|
199
199
|
// smaller gap threshold for such cases.
|
|
200
|
+
const fontNameDiffers = prev.fontName !== curr.fontName;
|
|
200
201
|
const fontSizeDiffers = fontRatio > 1.1;
|
|
201
|
-
const effectiveSplitGap = fontSizeDiffers
|
|
202
|
+
const effectiveSplitGap = (fontSizeDiffers || fontNameDiffers)
|
|
203
|
+
? Math.max(avgFontSize * 0.5, 8)
|
|
204
|
+
: splitGap;
|
|
202
205
|
if (gap > effectiveSplitGap) {
|
|
203
206
|
const forceSplit = prev.color !== curr.color ||
|
|
204
207
|
prev.fontName !== curr.fontName ||
|
|
@@ -303,7 +306,7 @@ function assignBlockNumbers(spans, startBlockNo = 0) {
|
|
|
303
306
|
if (!fontSizeChanged &&
|
|
304
307
|
!colorChanged &&
|
|
305
308
|
!largeSingleSpanDisplayShift &&
|
|
306
|
-
yGap
|
|
309
|
+
yGap <= avgFontSize * 0.5 &&
|
|
307
310
|
(xOverlap > 0 || Math.abs(prev.x0 - curr.x0) < avgFontSize * 2)) {
|
|
308
311
|
currentBlock.push(curr);
|
|
309
312
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@polotno/pdf-import",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.4",
|
|
4
4
|
"description": "Convert PDF files into Polotno JSON format",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./lib/index.js",
|
|
@@ -28,19 +28,19 @@
|
|
|
28
28
|
"pdfjs-dist": "^4.10.38"
|
|
29
29
|
},
|
|
30
30
|
"devDependencies": {
|
|
31
|
-
"@types/node": "^25.
|
|
31
|
+
"@types/node": "^25.5.0",
|
|
32
32
|
"@types/react": "^19.2.14",
|
|
33
33
|
"@types/react-dom": "^19.2.3",
|
|
34
|
-
"@vitejs/plugin-react": "^
|
|
35
|
-
"esbuild": "^0.27.
|
|
36
|
-
"polotno": "^2.
|
|
37
|
-
"polotno-node": "^2.15.
|
|
34
|
+
"@vitejs/plugin-react": "^6.0.1",
|
|
35
|
+
"esbuild": "^0.27.4",
|
|
36
|
+
"polotno": "^2.38.2",
|
|
37
|
+
"polotno-node": "^2.15.15",
|
|
38
38
|
"react": "^18.3.1",
|
|
39
39
|
"react-dom": "^18",
|
|
40
40
|
"sharp": "^0.34.5",
|
|
41
41
|
"ssim.js": "^3.5.0",
|
|
42
42
|
"typescript": "~5.9.3",
|
|
43
|
-
"vite": "^
|
|
44
|
-
"vitest": "^4.0
|
|
43
|
+
"vite": "^8.0.0",
|
|
44
|
+
"vitest": "^4.1.0"
|
|
45
45
|
}
|
|
46
46
|
}
|