@polotno/pdf-import 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,528 @@
1
+ import { OPS } from 'pdfjs-dist/legacy/build/pdf.mjs';
2
+ import { argsToMatrix, isTextShowOp, isVisualOrderOp, multiplyMatrices, } from './operator-list-helpers.js';
3
+ // pdfjs setFillRGBColor/setStrokeRGBColor args are Uint8ClampedArray 0-255
4
+ function argsToHex(args) {
5
+ const r = Math.round(args[0]);
6
+ const g = Math.round(args[1]);
7
+ const b = Math.round(args[2]);
8
+ return ('#' +
9
+ (r & 0xff).toString(16).toUpperCase().padStart(2, '0') +
10
+ (g & 0xff).toString(16).toUpperCase().padStart(2, '0') +
11
+ (b & 0xff).toString(16).toUpperCase().padStart(2, '0'));
12
+ }
13
+ function grayToHex(args) {
14
+ const v = Math.round(args[0]);
15
+ const h = (v & 0xff).toString(16).toUpperCase().padStart(2, '0');
16
+ return '#' + h + h + h;
17
+ }
18
+ function cmykToHex(args) {
19
+ // CMYK args are 0-1 floats in pdfjs
20
+ const c = args[0] / 255, m = args[1] / 255, y = args[2] / 255, k = args[3] / 255;
21
+ const r = Math.round(255 * (1 - c) * (1 - k));
22
+ const g = Math.round(255 * (1 - m) * (1 - k));
23
+ const b = Math.round(255 * (1 - y) * (1 - k));
24
+ return ('#' +
25
+ (r & 0xff).toString(16).toUpperCase().padStart(2, '0') +
26
+ (g & 0xff).toString(16).toUpperCase().padStart(2, '0') +
27
+ (b & 0xff).toString(16).toUpperCase().padStart(2, '0'));
28
+ }
29
+ function argsToRgbFloat(args) {
30
+ return [args[0] / 255, args[1] / 255, args[2] / 255];
31
+ }
32
+ function grayToRgbFloat(args) {
33
+ const v = args[0] / 255;
34
+ return [v, v, v];
35
+ }
36
+ function cmykToRgbFloat(args) {
37
+ const c = args[0] / 255, m = args[1] / 255, y = args[2] / 255, k = args[3] / 255;
38
+ return [(1 - c) * (1 - k), (1 - m) * (1 - k), (1 - y) * (1 - k)];
39
+ }
40
+ export function extractTextColors(ops) {
41
+ const result = new Map();
42
+ let currentFillColor = '#000000';
43
+ let currentFontName = '';
44
+ for (let i = 0; i < ops.fnArray.length; i++) {
45
+ const fn = ops.fnArray[i];
46
+ const args = ops.argsArray[i];
47
+ if (fn === OPS.setFillRGBColor) {
48
+ currentFillColor = argsToHex(args);
49
+ }
50
+ else if (fn === OPS.setFillGray) {
51
+ currentFillColor = grayToHex(args);
52
+ }
53
+ else if (fn === OPS.setFillCMYKColor) {
54
+ currentFillColor = cmykToHex(args);
55
+ }
56
+ else if (fn === OPS.setFont) {
57
+ currentFontName = args[0];
58
+ }
59
+ else if (fn === OPS.showText || fn === OPS.showSpacedText) {
60
+ if (currentFontName) {
61
+ result.set(currentFontName, currentFillColor);
62
+ }
63
+ }
64
+ }
65
+ return result;
66
+ }
67
+ export function extractTextPositionColors(ops, pageHeight) {
68
+ const result = [];
69
+ const fontRefs = new Set();
70
+ let currentFillColor = '#000000';
71
+ let currentFontName = '';
72
+ let currentFontSize = 12;
73
+ let elementOrder = 0;
74
+ // Track CTM stack for proper position computation
75
+ let ctm = [1, 0, 0, 1, 0, 0];
76
+ const ctmStack = [];
77
+ // Track text matrix for position
78
+ let tmX = 0;
79
+ let tmY = 0;
80
+ for (let i = 0; i < ops.fnArray.length; i++) {
81
+ const fn = ops.fnArray[i];
82
+ const args = ops.argsArray[i];
83
+ if (fn === OPS.save) {
84
+ ctmStack.push({ ctm: [...ctm], fill: currentFillColor });
85
+ }
86
+ else if (fn === OPS.restore) {
87
+ const state = ctmStack.pop();
88
+ if (state) {
89
+ ctm = state.ctm;
90
+ currentFillColor = state.fill;
91
+ }
92
+ }
93
+ else if (fn === OPS.transform) {
94
+ ctm = multiplyMatrices(argsToMatrix(args), ctm);
95
+ }
96
+ else if (fn === OPS.setFillRGBColor) {
97
+ currentFillColor = argsToHex(args);
98
+ }
99
+ else if (fn === OPS.setFillGray) {
100
+ currentFillColor = grayToHex(args);
101
+ }
102
+ else if (fn === OPS.setFillCMYKColor) {
103
+ currentFillColor = cmykToHex(args);
104
+ }
105
+ else if (fn === OPS.setFont) {
106
+ currentFontName = args[0];
107
+ currentFontSize = args[1];
108
+ fontRefs.add(currentFontName);
109
+ }
110
+ else if (fn === OPS.beginText) {
111
+ // BT resets the text matrix to identity; without this reset,
112
+ // moveText (Td) values accumulate across BT/ET blocks.
113
+ tmX = 0;
114
+ tmY = 0;
115
+ }
116
+ else if (fn === OPS.setTextMatrix) {
117
+ tmX = args[4];
118
+ tmY = args[5];
119
+ }
120
+ else if (fn === OPS.moveText) {
121
+ tmX += args[0];
122
+ tmY += args[1];
123
+ }
124
+ else if (isTextShowOp(fn)) {
125
+ // Transform text matrix position through CTM to get page coordinates
126
+ const px = ctm[0] * tmX + ctm[2] * tmY + ctm[4];
127
+ const py = ctm[1] * tmX + ctm[3] * tmY + ctm[5];
128
+ result.push({
129
+ x: px,
130
+ y: pageHeight - py,
131
+ color: currentFillColor,
132
+ fontName: currentFontName,
133
+ fontSize: currentFontSize,
134
+ orderIndex: elementOrder++,
135
+ });
136
+ }
137
+ else if (isVisualOrderOp(fn)) {
138
+ // Count non-text visual ops to keep orderIndex in sync with extractDrawingsAndImages
139
+ elementOrder++;
140
+ }
141
+ }
142
+ result.fontRefs = fontRefs;
143
+ return result;
144
+ }
145
+ export function extractDrawingsAndImages(ops, pageHeight) {
146
+ const drawings = [];
147
+ const imageRefs = [];
148
+ let elementOrder = 0;
149
+ let fillColor = [0, 0, 0];
150
+ let strokeColor = [0, 0, 0];
151
+ let fillGradient = null;
152
+ // Track shading references from TilingPattern fills for later resolution
153
+ let pendingShadingNames = null;
154
+ let pendingShadingMatrix = null;
155
+ let strokeWidth = 1;
156
+ let opacity = 1.0;
157
+ // CTM stack for tracking transforms
158
+ let ctm = [1, 0, 0, 1, 0, 0];
159
+ // Track current clip path for images
160
+ let currentClipPath = null;
161
+ let currentClipRect = null;
162
+ const ctmStack = [];
163
+ // Track soft mask groups — drawings inside smask groups are mask definitions, not visible
164
+ let smaskDepth = 0;
165
+ // Accumulated path data
166
+ let pathItems = [];
167
+ let pathMinX = Infinity, pathMinY = Infinity, pathMaxX = -Infinity, pathMaxY = -Infinity;
168
+ let curX = 0, curY = 0;
169
+ function transformPoint(x, y) {
170
+ const tx = ctm[0] * x + ctm[2] * y + ctm[4];
171
+ const ty = ctm[1] * x + ctm[3] * y + ctm[5];
172
+ return [tx, pageHeight - ty];
173
+ }
174
+ function expandBounds(x, y) {
175
+ if (x < pathMinX)
176
+ pathMinX = x;
177
+ if (y < pathMinY)
178
+ pathMinY = y;
179
+ if (x > pathMaxX)
180
+ pathMaxX = x;
181
+ if (y > pathMaxY)
182
+ pathMaxY = y;
183
+ }
184
+ function resetPath() {
185
+ pathItems = [];
186
+ pathMinX = Infinity;
187
+ pathMinY = Infinity;
188
+ pathMaxX = -Infinity;
189
+ pathMaxY = -Infinity;
190
+ }
191
+ for (let i = 0; i < ops.fnArray.length; i++) {
192
+ const fn = ops.fnArray[i];
193
+ const args = ops.argsArray[i];
194
+ if (fn === OPS.save) {
195
+ ctmStack.push({
196
+ ctm: [...ctm],
197
+ fill: fillColor ? [...fillColor] : null,
198
+ stroke: strokeColor
199
+ ? [...strokeColor]
200
+ : null,
201
+ fillGradient,
202
+ strokeWidth,
203
+ opacity,
204
+ clipPath: currentClipPath,
205
+ clipRect: currentClipRect,
206
+ });
207
+ }
208
+ else if (fn === OPS.restore) {
209
+ const state = ctmStack.pop();
210
+ if (state) {
211
+ ctm = state.ctm;
212
+ fillColor = state.fill;
213
+ strokeColor = state.stroke;
214
+ fillGradient = state.fillGradient;
215
+ strokeWidth = state.strokeWidth;
216
+ opacity = state.opacity;
217
+ currentClipPath = state.clipPath;
218
+ currentClipRect = state.clipRect;
219
+ }
220
+ }
221
+ else if (fn === OPS.transform) {
222
+ ctm = multiplyMatrices(argsToMatrix(args), ctm);
223
+ }
224
+ else if (fn === OPS.setFillRGBColor) {
225
+ fillColor = argsToRgbFloat(args);
226
+ fillGradient = null;
227
+ pendingShadingNames = null;
228
+ }
229
+ else if (fn === OPS.setFillGray) {
230
+ fillColor = grayToRgbFloat(args);
231
+ fillGradient = null;
232
+ pendingShadingNames = null;
233
+ }
234
+ else if (fn === OPS.setFillCMYKColor) {
235
+ fillColor = cmykToRgbFloat(args);
236
+ fillGradient = null;
237
+ pendingShadingNames = null;
238
+ }
239
+ else if (fn === OPS.setFillColorN) {
240
+ // Pattern/gradient fill — extract shading info from TilingPattern
241
+ if (args && args[0] === 'TilingPattern') {
242
+ const patternOps = args[2]; // embedded operator list
243
+ if (patternOps && patternOps.argsArray) {
244
+ // Find shading references in the pattern's operator list
245
+ const names = [];
246
+ let matrix = null;
247
+ for (const pArgs of patternOps.argsArray) {
248
+ if (Array.isArray(pArgs) && pArgs[0] === 'Shading' && typeof pArgs[1] === 'string') {
249
+ if (!names.includes(pArgs[1]))
250
+ names.push(pArgs[1]);
251
+ if (!matrix && Array.isArray(pArgs[2])) {
252
+ matrix = pArgs[2];
253
+ }
254
+ }
255
+ }
256
+ if (names.length > 0) {
257
+ pendingShadingNames = names;
258
+ pendingShadingMatrix = matrix;
259
+ }
260
+ }
261
+ // Set fill to null — gradient will be resolved later
262
+ fillColor = null;
263
+ fillGradient = null;
264
+ }
265
+ }
266
+ else if (fn === OPS.setStrokeRGBColor) {
267
+ strokeColor = argsToRgbFloat(args);
268
+ }
269
+ else if (fn === OPS.setStrokeGray) {
270
+ strokeColor = grayToRgbFloat(args);
271
+ }
272
+ else if (fn === OPS.setStrokeCMYKColor) {
273
+ strokeColor = cmykToRgbFloat(args);
274
+ }
275
+ else if (fn === OPS.setGState) {
276
+ // setGState args is an array of [key, value] pairs
277
+ const pairs = args[0];
278
+ if (Array.isArray(pairs)) {
279
+ for (const [key, value] of pairs) {
280
+ if (key === 'ca') {
281
+ // ca = fill opacity (0-1)
282
+ opacity = value;
283
+ }
284
+ }
285
+ }
286
+ }
287
+ else if (fn === OPS.setLineWidth) {
288
+ strokeWidth = args[0];
289
+ }
290
+ else if (fn === OPS.constructPath) {
291
+ // args[0] = array of sub-ops, args[1] = flat array of coordinates
292
+ const subOps = args[0];
293
+ const coords = args[1];
294
+ let ci = 0;
295
+ for (const subOp of subOps) {
296
+ if (subOp === OPS.moveTo) {
297
+ const [tx, ty] = transformPoint(coords[ci], coords[ci + 1]);
298
+ ci += 2;
299
+ pathItems.push({ kind: 'm', x: tx, y: ty });
300
+ expandBounds(tx, ty);
301
+ curX = tx;
302
+ curY = ty;
303
+ }
304
+ else if (subOp === OPS.lineTo) {
305
+ const [tx, ty] = transformPoint(coords[ci], coords[ci + 1]);
306
+ ci += 2;
307
+ pathItems.push({ kind: 'l', x1: curX, y1: curY, x2: tx, y2: ty });
308
+ expandBounds(tx, ty);
309
+ curX = tx;
310
+ curY = ty;
311
+ }
312
+ else if (subOp === OPS.curveTo) {
313
+ const [cpx1, cpy1] = transformPoint(coords[ci], coords[ci + 1]);
314
+ const [cpx2, cpy2] = transformPoint(coords[ci + 2], coords[ci + 3]);
315
+ const [tx, ty] = transformPoint(coords[ci + 4], coords[ci + 5]);
316
+ ci += 6;
317
+ pathItems.push({
318
+ kind: 'c',
319
+ x1: curX,
320
+ y1: curY,
321
+ cpx1,
322
+ cpy1,
323
+ cpx2,
324
+ cpy2,
325
+ x2: tx,
326
+ y2: ty,
327
+ });
328
+ expandBounds(cpx1, cpy1);
329
+ expandBounds(cpx2, cpy2);
330
+ expandBounds(tx, ty);
331
+ curX = tx;
332
+ curY = ty;
333
+ }
334
+ else if (subOp === OPS.curveTo2) {
335
+ // curveTo2: current point as first control point
336
+ const [cpx2, cpy2] = transformPoint(coords[ci], coords[ci + 1]);
337
+ const [tx, ty] = transformPoint(coords[ci + 2], coords[ci + 3]);
338
+ ci += 4;
339
+ pathItems.push({
340
+ kind: 'c',
341
+ x1: curX,
342
+ y1: curY,
343
+ cpx1: curX,
344
+ cpy1: curY,
345
+ cpx2,
346
+ cpy2,
347
+ x2: tx,
348
+ y2: ty,
349
+ });
350
+ expandBounds(cpx2, cpy2);
351
+ expandBounds(tx, ty);
352
+ curX = tx;
353
+ curY = ty;
354
+ }
355
+ else if (subOp === OPS.curveTo3) {
356
+ // curveTo3: endpoint as last control point
357
+ const [cpx1, cpy1] = transformPoint(coords[ci], coords[ci + 1]);
358
+ const [tx, ty] = transformPoint(coords[ci + 2], coords[ci + 3]);
359
+ ci += 4;
360
+ pathItems.push({
361
+ kind: 'c',
362
+ x1: curX,
363
+ y1: curY,
364
+ cpx1,
365
+ cpy1,
366
+ cpx2: tx,
367
+ cpy2: ty,
368
+ x2: tx,
369
+ y2: ty,
370
+ });
371
+ expandBounds(cpx1, cpy1);
372
+ expandBounds(tx, ty);
373
+ curX = tx;
374
+ curY = ty;
375
+ }
376
+ else if (subOp === OPS.rectangle) {
377
+ const rx = coords[ci], ry = coords[ci + 1], rw = coords[ci + 2], rh = coords[ci + 3];
378
+ ci += 4;
379
+ const [tx0, ty0] = transformPoint(rx, ry);
380
+ const [tx1, ty1] = transformPoint(rx + rw, ry);
381
+ const [tx2, ty2] = transformPoint(rx + rw, ry + rh);
382
+ const [tx3, ty3] = transformPoint(rx, ry + rh);
383
+ // Use the min/max of transformed corners
384
+ const minX = Math.min(tx0, tx1, tx2, tx3);
385
+ const minY = Math.min(ty0, ty1, ty2, ty3);
386
+ const maxX = Math.max(tx0, tx1, tx2, tx3);
387
+ const maxY = Math.max(ty0, ty1, ty2, ty3);
388
+ pathItems.push({
389
+ kind: 're',
390
+ x: minX,
391
+ y: minY,
392
+ w: maxX - minX,
393
+ h: maxY - minY,
394
+ });
395
+ expandBounds(minX, minY);
396
+ expandBounds(maxX, maxY);
397
+ curX = minX;
398
+ curY = minY;
399
+ }
400
+ else if (subOp === OPS.closePath) {
401
+ // close subpath - no coords consumed
402
+ }
403
+ }
404
+ }
405
+ else if (fn === OPS.fill ||
406
+ fn === OPS.eoFill ||
407
+ fn === OPS.fillStroke ||
408
+ fn === OPS.eoFillStroke) {
409
+ // Always increment order to stay in sync with extractTextPositionColors
410
+ const order = elementOrder++;
411
+ if (smaskDepth > 0) {
412
+ // Inside soft mask group — skip, this is a mask definition not a visible drawing
413
+ resetPath();
414
+ }
415
+ else if (pathItems.length > 0 && pathMinX < Infinity) {
416
+ const hasFill = fn === OPS.fill ||
417
+ fn === OPS.eoFill ||
418
+ fn === OPS.fillStroke ||
419
+ fn === OPS.eoFillStroke;
420
+ const hasStroke = fn === OPS.fillStroke || fn === OPS.eoFillStroke;
421
+ // Scale stroke width by CTM scale factor (path coords are already transformed)
422
+ const ctmScale = Math.sqrt(ctm[0] * ctm[0] + ctm[1] * ctm[1]);
423
+ const scaledStrokeWidth = strokeWidth * ctmScale;
424
+ drawings.push({
425
+ rect: [pathMinX, pathMinY, pathMaxX, pathMaxY],
426
+ fill: hasFill && fillColor ? [...fillColor] : null,
427
+ stroke: hasStroke && strokeColor
428
+ ? [...strokeColor]
429
+ : null,
430
+ strokeWidth: scaledStrokeWidth,
431
+ items: [...pathItems],
432
+ opacity,
433
+ evenOdd: fn === OPS.eoFill || fn === OPS.eoFillStroke,
434
+ closePath: true,
435
+ orderIndex: order,
436
+ gradient: fillGradient,
437
+ clipPath: currentClipPath,
438
+ clipRect: currentClipRect,
439
+ _shadingNames: pendingShadingNames,
440
+ _shadingMatrix: pendingShadingMatrix,
441
+ });
442
+ }
443
+ resetPath();
444
+ }
445
+ else if (fn === OPS.stroke) {
446
+ // Always increment order to stay in sync with extractTextPositionColors
447
+ const order = elementOrder++;
448
+ if (smaskDepth > 0) {
449
+ resetPath();
450
+ }
451
+ else if (pathItems.length > 0 && pathMinX < Infinity) {
452
+ // Scale stroke width by CTM scale factor (path coords are already transformed)
453
+ const ctmScale = Math.sqrt(ctm[0] * ctm[0] + ctm[1] * ctm[1]);
454
+ const scaledStrokeWidth = strokeWidth * ctmScale;
455
+ drawings.push({
456
+ rect: [pathMinX, pathMinY, pathMaxX, pathMaxY],
457
+ fill: null,
458
+ stroke: strokeColor
459
+ ? [...strokeColor]
460
+ : null,
461
+ strokeWidth: scaledStrokeWidth,
462
+ items: [...pathItems],
463
+ opacity,
464
+ evenOdd: false,
465
+ closePath: false,
466
+ orderIndex: order,
467
+ gradient: null,
468
+ clipPath: currentClipPath,
469
+ clipRect: currentClipRect,
470
+ });
471
+ }
472
+ resetPath();
473
+ }
474
+ else if (fn === OPS.clip || fn === OPS.eoClip) {
475
+ // Capture current path as clip path for subsequent image painting
476
+ if (pathItems.length > 0 && pathMinX < Infinity) {
477
+ // Only set clip if we don't already have a non-rectangular one
478
+ // (PDF clips are intersected; a rectangle inside a circle still shows the circle)
479
+ const isSimpleRect = pathItems.length === 1 && pathItems[0].kind === 're';
480
+ if (!currentClipPath || !isSimpleRect) {
481
+ currentClipPath = [...pathItems];
482
+ currentClipRect = [pathMinX, pathMinY, pathMaxX, pathMaxY];
483
+ }
484
+ }
485
+ // Don't reset path — endPath will do that
486
+ }
487
+ else if (fn === OPS.endPath) {
488
+ resetPath();
489
+ }
490
+ else if (fn === OPS.beginGroup) {
491
+ // Check if this group has a soft mask (smask) — content inside is mask definition
492
+ const groupInfo = args[0];
493
+ if (groupInfo && groupInfo.smask) {
494
+ smaskDepth++;
495
+ }
496
+ }
497
+ else if (fn === OPS.endGroup) {
498
+ if (smaskDepth > 0) {
499
+ smaskDepth--;
500
+ }
501
+ }
502
+ else if (fn === OPS.paintImageXObject) {
503
+ // args[0] = image name, args[1] = width, args[2] = height
504
+ const name = args[0];
505
+ // Image is painted in a 1x1 unit square transformed by CTM.
506
+ // Unit square corners: (0,0)=BL, (1,0)=BR, (1,1)=TR, (0,1)=TL in image space
507
+ // (Y is flipped in image space, so (0,1) is top-left)
508
+ const tl = transformPoint(0, 1);
509
+ const tr = transformPoint(1, 1);
510
+ const bl = transformPoint(0, 0);
511
+ // Width/height from edge lengths of the transformed parallelogram
512
+ const w = Math.sqrt((tr[0] - tl[0]) ** 2 + (tr[1] - tl[1]) ** 2);
513
+ const h = Math.sqrt((bl[0] - tl[0]) ** 2 + (bl[1] - tl[1]) ** 2);
514
+ // Rotation: angle of the top edge from horizontal (degrees, clockwise)
515
+ const rotation = Math.atan2(tr[1] - tl[1], tr[0] - tl[0]) * 180 / Math.PI;
516
+ // Position: top-left corner in screen space (polotno rotates around top-left)
517
+ const x = tl[0];
518
+ const y = tl[1];
519
+ imageRefs.push({ name, x, y, width: w, height: h, rotation, clipPath: currentClipPath, clipRect: currentClipRect, orderIndex: elementOrder++ });
520
+ }
521
+ else if (isTextShowOp(fn)) {
522
+ // Count text ops to keep orderIndex in sync with extractTextPositionColors
523
+ elementOrder++;
524
+ }
525
+ }
526
+ return { drawings, imageRefs };
527
+ }
528
+ //# sourceMappingURL=operator-list.js.map
@@ -0,0 +1,18 @@
1
+ import { FontRegistry } from './font-registry.js';
2
+ import { type RawImageStream } from './pdf-image-extractor.js';
3
+ import type { PolotnoPage } from './index.js';
4
+ interface ParsePageOptions {
5
+ page: any;
6
+ pageIdx: number;
7
+ fontRegistry: FontRegistry;
8
+ generateId: () => string;
9
+ jpegIndex: Map<number, RawImageStream>;
10
+ }
11
+ interface ParsePageResult {
12
+ parsedPage: PolotnoPage;
13
+ pageWidth: number;
14
+ pageHeight: number;
15
+ }
16
+ export declare function parsePage({ page, pageIdx, fontRegistry, generateId, jpegIndex, }: ParsePageOptions): Promise<ParsePageResult>;
17
+ export {};
18
+ //# sourceMappingURL=page-parser.d.ts.map