@polotno/pdf-import 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -35,10 +35,18 @@ export class FontRegistry {
35
35
  return;
36
36
  const mappedFamily = mapPdfFont(fontObj.name);
37
37
  const isGoogleFont = isKnownWebFont(fontObj.name);
38
+ const hasEmbeddedData = fontObj.data && fontObj.data.length > 0;
38
39
  const shouldEmbed = embedAllFonts || !isGoogleFont;
39
40
  // When embedding a known Google Font, rename to avoid Polotno loading
40
41
  // the Google version instead of the embedded subset.
41
- const fontFamily = embedAllFonts && isGoogleFont ? `${mappedFamily} (PDF)` : mappedFamily;
42
+ // Only rename if the font actually has embedded data — standard PDF fonts
43
+ // (Helvetica, Times, Courier, etc.) have no data, so they won't appear
44
+ // in the fonts[] array (all doc fonts must be declared there). Without an
45
+ // entry in fonts[], there's no collision risk, and keeping the original
46
+ // name lets Polotno load the font normally.
47
+ const fontFamily = embedAllFonts && isGoogleFont && hasEmbeddedData
48
+ ? `${mappedFamily} (PDF)`
49
+ : mappedFamily;
42
50
  // Track the rename so text elements can use the correct fontFamily
43
51
  if (fontFamily !== mappedFamily) {
44
52
  this.renameMap.set(fontObj.name, fontFamily);
package/lib/index.js CHANGED
@@ -1 +1 @@
1
- import{getDocument as x,GlobalWorkerOptions as m}from"pdfjs-dist/legacy/build/pdf.mjs";import{parsePage as R}from"./page-parser.js";import{FontRegistry as I}from"./font-registry.js";import{buildJpegIndex as O}from"./pdf-image-extractor.js";import{workerSource as k}from"./generated/pdf-worker-source.js";let A=0;function C(){return`el_${Date.now()}_${++A}`}async function L({pdf:e,fontStrategy:s="embed"}){if(typeof window<"u"&&!m.workerSrc){const r=new Blob([k],{type:"application/javascript"});m.workerSrc=URL.createObjectURL(r)}const i=new Uint8Array(e instanceof ArrayBuffer?e:e.buffer.slice(e.byteOffset,e.byteOffset+e.byteLength)),d=O(i),o=await x({data:i,useSystemFonts:!0,disableFontFace:!0,fontExtraProperties:!0}).promise,c=new I;let g=612,p=792;const f=await o.getPage(1),u=f.getViewport({scale:1});g=u.width,p=u.height;const w=3,n=new Array(o.numPages);for(let r=0;r<o.numPages;r+=w){const h=Math.min(r+w,o.numPages),l=[];for(let t=r;t<h;t++)l.push((async()=>{const a=t===0?f:await o.getPage(t+1),{parsedPage:P}=await R({page:a,pageIdx:t,fontRegistry:c,generateId:C,jpegIndex:d,fontStrategy:s});return{parsedPage:P,pageIdx:t}})());const y=await Promise.all(l);for(const{parsedPage:t,pageIdx:a}of y)n[a]=t}await o.destroy();const b=c.finalize(s,n);return{width:g,height:p,fonts:b,pages:n,unit:"px",dpi:72}}export{L as pdfToJson};
1
+ import{getDocument as I,GlobalWorkerOptions as b}from"pdfjs-dist/legacy/build/pdf.mjs";import{parsePage as O}from"./page-parser.js";import{FontRegistry as k}from"./font-registry.js";import{buildJpegIndex as A}from"./pdf-image-extractor.js";import{workerSource as C}from"./generated/pdf-worker-source.js";let E=0;function F(){return`el_${Date.now()}_${++E}`}async function _({pdf:e,fontStrategy:g="embed"}){if(typeof window<"u"&&!b.workerSrc){const r=new Blob([C],{type:"application/javascript"});b.workerSrc=URL.createObjectURL(r)}const p=new Uint8Array(e instanceof ArrayBuffer?e:e.buffer.slice(e.byteOffset,e.byteOffset+e.byteLength)),y=A(p),o=await I({data:p,useSystemFonts:!0,disableFontFace:!0,fontExtraProperties:!0}).promise,f=new k;let n=612,a=792;const u=await o.getPage(1),w=u.getViewport({scale:1});n=w.width,a=w.height;const d=3,i=new Array(o.numPages);for(let r=0;r<o.numPages;r+=d){const x=Math.min(r+d,o.numPages),h=[];for(let t=r;t<x;t++)h.push((async()=>{const s=t===0?u:await o.getPage(t+1),{parsedPage:c,pageWidth:l,pageHeight:m}=await O({page:s,pageIdx:t,fontRegistry:f,generateId:F,jpegIndex:y,fontStrategy:g});return(l!==n||m!==a)&&(c.width=l,c.height=m),{parsedPage:c,pageIdx:t}})());const R=await Promise.all(h);for(const{parsedPage:t,pageIdx:s}of R)i[s]=t}await o.destroy();const P=f.finalize(g,i);return{width:n,height:a,fonts:P,pages:i,unit:"px",dpi:72}}export{_ as pdfToJson};
@@ -27,8 +27,8 @@ export async function parsePage({ page, pageIdx, fontRegistry, generateId, jpegI
27
27
  buildImageElements(page, imageRefs, pageIdx, generateId, jpegIndex),
28
28
  collectPageFonts(page, fontRefs, fontRegistry, fontStrategy === 'embed'),
29
29
  ]);
30
- const pageBackground = detectPageBackground(drawings, pageWidth, pageHeight);
31
- const svgElements = buildSvgElements(drawings, pageWidth, pageHeight, generateId);
30
+ const { color: pageBackground, bgDrawingIndices } = detectPageBackground(drawings, pageWidth, pageHeight);
31
+ const svgElements = buildSvgElements(drawings, pageWidth, pageHeight, generateId, bgDrawingIndices);
32
32
  const textElements = await buildTextElements({
33
33
  page,
34
34
  pageWidth,
@@ -113,26 +113,86 @@ async function resolveDrawingGradients(page, drawings) {
113
113
  }
114
114
  }
115
115
  function detectPageBackground(drawings, pageWidth, pageHeight) {
116
+ const bgDrawingIndices = new Set();
116
117
  let pageBackground = '#FFFFFF';
117
- for (const drawing of drawings) {
118
+ for (let i = 0; i < drawings.length; i++) {
119
+ const drawing = drawings[i];
118
120
  if (drawing.fill !== null) {
119
121
  const dw = drawing.rect[2] - drawing.rect[0];
120
122
  const dh = drawing.rect[3] - drawing.rect[1];
121
- if (dw >= pageWidth * 0.9 && dh >= pageHeight * 0.9) {
123
+ if (dw >= pageWidth * 0.95 && dh >= pageHeight * 0.95) {
122
124
  const [r, g, b] = drawing.fill;
123
125
  pageBackground = rgbTupleToHex(r, g, b);
126
+ bgDrawingIndices.add(i);
124
127
  }
125
128
  }
126
129
  }
127
- return pageBackground;
130
+ return { color: pageBackground, bgDrawingIndices };
131
+ }
132
+ /**
133
+ * Detect crop/bleed mark drawings (full-page, stroke-only, short edge lines)
134
+ * and split them into individual per-line SVGs so they don't block clicks.
135
+ */
136
+ function trySplitCropMarks(drawing, pageWidth, pageHeight) {
137
+ if (drawing.fill !== null)
138
+ return null;
139
+ if (!drawing.stroke)
140
+ return null;
141
+ const dw = drawing.rect[2] - drawing.rect[0];
142
+ const dh = drawing.rect[3] - drawing.rect[1];
143
+ if (dw < pageWidth * 0.8 || dh < pageHeight * 0.8)
144
+ return null;
145
+ const lines = drawing.items.filter((it) => it.kind === 'l');
146
+ if (lines.length === 0)
147
+ return null;
148
+ // All line items must be short and near page edges
149
+ const edgeThreshold = Math.min(pageWidth, pageHeight) * 0.15;
150
+ for (const item of drawing.items) {
151
+ if (item.kind === 'm')
152
+ continue;
153
+ if (item.kind !== 'l')
154
+ return null;
155
+ const len = Math.hypot(item.x2 - item.x1, item.y2 - item.y1);
156
+ if (len > edgeThreshold)
157
+ return null;
158
+ const nearEdge = item.x1 < edgeThreshold || item.x1 > pageWidth - edgeThreshold ||
159
+ item.y1 < edgeThreshold || item.y1 > pageHeight - edgeThreshold;
160
+ if (!nearEdge)
161
+ return null;
162
+ }
163
+ // Build one small SVG per line segment
164
+ const strokeHex = rgbTupleToHex(...drawing.stroke);
165
+ const sw = drawing.strokeWidth || 1;
166
+ const results = [];
167
+ for (const line of lines) {
168
+ const x0 = Math.min(line.x1, line.x2);
169
+ const y0 = Math.min(line.y1, line.y2);
170
+ const x1 = Math.max(line.x1, line.x2);
171
+ const y1 = Math.max(line.y1, line.y2);
172
+ const half = sw / 2;
173
+ const bx = x0 - half;
174
+ const by = y0 - half;
175
+ const bw = Math.max(x1 - x0 + sw, sw + 1);
176
+ const bh = Math.max(y1 - y0 + sw, sw + 1);
177
+ const lx1 = line.x1 - bx;
178
+ const ly1 = line.y1 - by;
179
+ const lx2 = line.x2 - bx;
180
+ const ly2 = line.y2 - by;
181
+ const svg = `<svg viewBox="0 0 ${bw} ${bh}" xmlns="http://www.w3.org/2000/svg"><line x1="${lx1}" y1="${ly1}" x2="${lx2}" y2="${ly2}" stroke="${strokeHex}" stroke-width="${sw}"/></svg>`;
182
+ results.push({ svg, x: bx, y: by, width: bw, height: bh });
183
+ }
184
+ return results;
128
185
  }
129
- function buildSvgElements(drawings, pageWidth, pageHeight, generateId) {
186
+ function buildSvgElements(drawings, pageWidth, pageHeight, generateId, bgDrawingIndices) {
130
187
  const svgElements = [];
131
188
  for (let idx = 0; idx < drawings.length; idx++) {
132
189
  const drawing = drawings[idx];
133
190
  // Skip fully transparent drawings (e.g. accessibility marker rectangles)
134
191
  if (drawing.opacity <= 0)
135
192
  continue;
193
+ // Skip drawings used as the page background
194
+ if (bgDrawingIndices?.has(idx))
195
+ continue;
136
196
  if (isMergeableClipRunDrawing(drawing)) {
137
197
  const run = [drawing];
138
198
  while (idx + 1 < drawings.length &&
@@ -161,6 +221,27 @@ function buildSvgElements(drawings, pageWidth, pageHeight, generateId) {
161
221
  }
162
222
  }
163
223
  }
224
+ // Split full-page crop/bleed marks into per-segment SVGs so they don't
225
+ // block clicks on the entire page in the editor.
226
+ const splitResults = trySplitCropMarks(drawing, pageWidth, pageHeight);
227
+ if (splitResults) {
228
+ for (const seg of splitResults) {
229
+ svgElements.push({
230
+ type: 'svg',
231
+ id: generateId(),
232
+ x: seg.x,
233
+ y: seg.y,
234
+ width: seg.width,
235
+ height: seg.height,
236
+ rotation: 0,
237
+ opacity: drawing.opacity,
238
+ src: svgToDataUri(seg.svg),
239
+ name: '',
240
+ _order: drawing.orderIndex,
241
+ });
242
+ }
243
+ continue;
244
+ }
164
245
  const result = drawingToSvg(drawing, pageWidth, pageHeight);
165
246
  if (result) {
166
247
  svgElements.push({
@@ -30,8 +30,8 @@ function computeDrawingBounds(drawing, pageWidth, pageHeight) {
30
30
  if (drawing.fill !== null &&
31
31
  pageWidth > 0 &&
32
32
  pageHeight > 0 &&
33
- width >= pageWidth * 0.9 &&
34
- height >= pageHeight * 0.9) {
33
+ width >= pageWidth * 0.95 &&
34
+ height >= pageHeight * 0.95) {
35
35
  return null;
36
36
  }
37
37
  return { x0, y0, width, height };
@@ -86,6 +86,28 @@ function buildPathData(items, originX, originY, shouldCloseFill, closePath) {
86
86
  }
87
87
  return pathParts.join(' ');
88
88
  }
89
+ /**
90
+ * Check if a clip path is a simple axis-aligned rectangle,
91
+ * regardless of whether it's represented as `re` or `m`/`l` segments.
92
+ */
93
+ function isRectangularClipPath(items) {
94
+ if (items.length === 1 && items[0].kind === 're')
95
+ return true;
96
+ // Check if it's 4 line segments (plus optional move) forming a rectangle
97
+ const lines = items.filter(it => it.kind === 'l');
98
+ if (lines.length < 4)
99
+ return false;
100
+ // All lines must be axis-aligned (horizontal or vertical)
101
+ for (const line of lines) {
102
+ if (line.kind !== 'l')
103
+ continue;
104
+ const dx = Math.abs(line.x2 - line.x1);
105
+ const dy = Math.abs(line.y2 - line.y1);
106
+ if (dx > 0.5 && dy > 0.5)
107
+ return false; // diagonal line = not rect
108
+ }
109
+ return true;
110
+ }
89
111
  export function drawingToSvg(drawing, pageWidth, pageHeight) {
90
112
  const bounds = computeDrawingBounds(drawing, pageWidth, pageHeight);
91
113
  if (!bounds)
@@ -150,7 +172,39 @@ export function drawingToSvg(drawing, pageWidth, pageHeight) {
150
172
  fillValue = 'url(#g)';
151
173
  }
152
174
  }
153
- const svgStr = `<svg viewBox="0 0 ${w} ${h}" xmlns="http://www.w3.org/2000/svg">${defs}<path d="${dAttr}" fill="${fillValue}" ${strokeAttr}${fillRule}/></svg>`;
175
+ // Apply clip path if the drawing has a non-trivial one
176
+ let clipDef = '';
177
+ let clipAttr = '';
178
+ if (drawing.clipPath && drawing.clipRect) {
179
+ const [cx0, cy0, cx1, cy1] = drawing.clipRect;
180
+ const cw = cx1 - cx0;
181
+ const ch = cy1 - cy0;
182
+ // Only clip if it meaningfully reduces the visible area.
183
+ // A clip rect that covers 90%+ of the drawing in both dimensions is trivial.
184
+ const insetX = Math.max(0, cx0 - x0) + Math.max(0, (x0 + w) - cx1);
185
+ const insetY = Math.max(0, cy0 - y0) + Math.max(0, (y0 + h) - cy1);
186
+ const significantClip = cw > 0 && ch > 0 &&
187
+ (insetX > w * 0.1 || insetY > h * 0.1);
188
+ if (significantClip) {
189
+ // Check if clip is a simple axis-aligned rectangle (re or 4 line segments)
190
+ const isRectClip = isRectangularClipPath(drawing.clipPath);
191
+ if (!isRectClip) {
192
+ const clipD = buildPathData(drawing.clipPath, x0, y0, true, true);
193
+ clipDef = `<defs><clipPath id="c"><path d="${clipD}"/></clipPath></defs>`;
194
+ clipAttr = ' clip-path="url(#c)"';
195
+ // If both gradient defs and clip defs are needed, merge them
196
+ if (defs) {
197
+ clipDef = defs.replace('</defs>', '') + `<clipPath id="c"><path d="${clipD}"/></clipPath></defs>`;
198
+ defs = '';
199
+ }
200
+ }
201
+ }
202
+ }
203
+ const allDefs = clipDef || defs;
204
+ const pathEl = `<path d="${dAttr}" fill="${fillValue}" ${strokeAttr}${fillRule}/>`;
205
+ const svgStr = clipAttr
206
+ ? `<svg viewBox="0 0 ${w} ${h}" xmlns="http://www.w3.org/2000/svg">${allDefs}<g${clipAttr}>${pathEl}</g></svg>`
207
+ : `<svg viewBox="0 0 ${w} ${h}" xmlns="http://www.w3.org/2000/svg">${allDefs}${pathEl}</svg>`;
154
208
  return { svg: svgStr, x: x0, y: y0, width: w, height: h };
155
209
  }
156
210
  export function clippedDrawingsToSvg(drawings) {
@@ -197,8 +197,11 @@ function assignBlockNumbers(spans, startBlockNo = 0) {
197
197
  // When font size clearly differs, spans are separate visual elements even if
198
198
  // close together (e.g. bold heading label next to body text list). Use a
199
199
  // smaller gap threshold for such cases.
200
+ const fontNameDiffers = prev.fontName !== curr.fontName;
200
201
  const fontSizeDiffers = fontRatio > 1.1;
201
- const effectiveSplitGap = fontSizeDiffers ? Math.max(avgFontSize * 0.5, 8) : splitGap;
202
+ const effectiveSplitGap = (fontSizeDiffers || fontNameDiffers)
203
+ ? Math.max(avgFontSize * 0.5, 8)
204
+ : splitGap;
202
205
  if (gap > effectiveSplitGap) {
203
206
  const forceSplit = prev.color !== curr.color ||
204
207
  prev.fontName !== curr.fontName ||
@@ -303,7 +306,7 @@ function assignBlockNumbers(spans, startBlockNo = 0) {
303
306
  if (!fontSizeChanged &&
304
307
  !colorChanged &&
305
308
  !largeSingleSpanDisplayShift &&
306
- yGap < avgFontSize * 0.5 &&
309
+ yGap <= avgFontSize * 0.5 &&
307
310
  (xOverlap > 0 || Math.abs(prev.x0 - curr.x0) < avgFontSize * 2)) {
308
311
  currentBlock.push(curr);
309
312
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@polotno/pdf-import",
3
- "version": "0.0.3",
3
+ "version": "0.0.4",
4
4
  "description": "Convert PDF files into Polotno JSON format",
5
5
  "type": "module",
6
6
  "main": "./lib/index.js",
@@ -28,19 +28,19 @@
28
28
  "pdfjs-dist": "^4.10.38"
29
29
  },
30
30
  "devDependencies": {
31
- "@types/node": "^25.3.3",
31
+ "@types/node": "^25.5.0",
32
32
  "@types/react": "^19.2.14",
33
33
  "@types/react-dom": "^19.2.3",
34
- "@vitejs/plugin-react": "^5.1.4",
35
- "esbuild": "^0.27.3",
36
- "polotno": "^2.37.1",
37
- "polotno-node": "^2.15.13",
34
+ "@vitejs/plugin-react": "^6.0.1",
35
+ "esbuild": "^0.27.4",
36
+ "polotno": "^2.38.2",
37
+ "polotno-node": "^2.15.15",
38
38
  "react": "^18.3.1",
39
39
  "react-dom": "^18",
40
40
  "sharp": "^0.34.5",
41
41
  "ssim.js": "^3.5.0",
42
42
  "typescript": "~5.9.3",
43
- "vite": "^7.3.1",
44
- "vitest": "^4.0.18"
43
+ "vite": "^8.0.0",
44
+ "vitest": "^4.1.0"
45
45
  }
46
46
  }