@polotno/pdf-import 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +39 -0
- package/lib/color-utils.d.ts +2 -0
- package/lib/color-utils.js +10 -0
- package/lib/constants.d.ts +13 -0
- package/lib/constants.js +111 -0
- package/lib/font-mapper.d.ts +7 -0
- package/lib/font-mapper.js +111 -0
- package/lib/font-matcher.d.ts +10 -0
- package/lib/font-matcher.js +89 -0
- package/lib/font-merger.d.ts +7 -0
- package/lib/font-merger.js +114 -0
- package/lib/font-registry.d.ts +15 -0
- package/lib/font-registry.js +110 -0
- package/lib/image-encoder.d.ts +3 -0
- package/lib/image-encoder.js +181 -0
- package/lib/index.d.ts +97 -0
- package/lib/index.js +1 -0
- package/lib/operator-list-helpers.d.ts +6 -0
- package/lib/operator-list-helpers.js +26 -0
- package/lib/operator-list.d.ts +99 -0
- package/lib/operator-list.js +528 -0
- package/lib/page-parser.d.ts +18 -0
- package/lib/page-parser.js +674 -0
- package/lib/pdf-image-extractor.d.ts +14 -0
- package/lib/pdf-image-extractor.js +91 -0
- package/lib/svg-builder.d.ts +23 -0
- package/lib/svg-builder.js +213 -0
- package/lib/text-blocks.d.ts +6 -0
- package/lib/text-blocks.js +294 -0
- package/lib/text-grouper.d.ts +11 -0
- package/lib/text-grouper.js +11 -0
- package/lib/text-layout.d.ts +3 -0
- package/lib/text-layout.js +318 -0
- package/lib/text-span-extractor.d.ts +5 -0
- package/lib/text-span-extractor.js +271 -0
- package/lib/text-types.d.ts +25 -0
- package/lib/text-types.js +2 -0
- package/package.json +46 -0
|
@@ -0,0 +1,528 @@
|
|
|
1
|
+
import { OPS } from 'pdfjs-dist/legacy/build/pdf.mjs';
|
|
2
|
+
import { argsToMatrix, isTextShowOp, isVisualOrderOp, multiplyMatrices, } from './operator-list-helpers.js';
|
|
3
|
+
// pdfjs setFillRGBColor/setStrokeRGBColor args are Uint8ClampedArray 0-255
|
|
4
|
+
function argsToHex(args) {
|
|
5
|
+
const r = Math.round(args[0]);
|
|
6
|
+
const g = Math.round(args[1]);
|
|
7
|
+
const b = Math.round(args[2]);
|
|
8
|
+
return ('#' +
|
|
9
|
+
(r & 0xff).toString(16).toUpperCase().padStart(2, '0') +
|
|
10
|
+
(g & 0xff).toString(16).toUpperCase().padStart(2, '0') +
|
|
11
|
+
(b & 0xff).toString(16).toUpperCase().padStart(2, '0'));
|
|
12
|
+
}
|
|
13
|
+
function grayToHex(args) {
|
|
14
|
+
const v = Math.round(args[0]);
|
|
15
|
+
const h = (v & 0xff).toString(16).toUpperCase().padStart(2, '0');
|
|
16
|
+
return '#' + h + h + h;
|
|
17
|
+
}
|
|
18
|
+
function cmykToHex(args) {
|
|
19
|
+
// CMYK args are 0-1 floats in pdfjs
|
|
20
|
+
const c = args[0] / 255, m = args[1] / 255, y = args[2] / 255, k = args[3] / 255;
|
|
21
|
+
const r = Math.round(255 * (1 - c) * (1 - k));
|
|
22
|
+
const g = Math.round(255 * (1 - m) * (1 - k));
|
|
23
|
+
const b = Math.round(255 * (1 - y) * (1 - k));
|
|
24
|
+
return ('#' +
|
|
25
|
+
(r & 0xff).toString(16).toUpperCase().padStart(2, '0') +
|
|
26
|
+
(g & 0xff).toString(16).toUpperCase().padStart(2, '0') +
|
|
27
|
+
(b & 0xff).toString(16).toUpperCase().padStart(2, '0'));
|
|
28
|
+
}
|
|
29
|
+
function argsToRgbFloat(args) {
|
|
30
|
+
return [args[0] / 255, args[1] / 255, args[2] / 255];
|
|
31
|
+
}
|
|
32
|
+
function grayToRgbFloat(args) {
|
|
33
|
+
const v = args[0] / 255;
|
|
34
|
+
return [v, v, v];
|
|
35
|
+
}
|
|
36
|
+
function cmykToRgbFloat(args) {
|
|
37
|
+
const c = args[0] / 255, m = args[1] / 255, y = args[2] / 255, k = args[3] / 255;
|
|
38
|
+
return [(1 - c) * (1 - k), (1 - m) * (1 - k), (1 - y) * (1 - k)];
|
|
39
|
+
}
|
|
40
|
+
export function extractTextColors(ops) {
|
|
41
|
+
const result = new Map();
|
|
42
|
+
let currentFillColor = '#000000';
|
|
43
|
+
let currentFontName = '';
|
|
44
|
+
for (let i = 0; i < ops.fnArray.length; i++) {
|
|
45
|
+
const fn = ops.fnArray[i];
|
|
46
|
+
const args = ops.argsArray[i];
|
|
47
|
+
if (fn === OPS.setFillRGBColor) {
|
|
48
|
+
currentFillColor = argsToHex(args);
|
|
49
|
+
}
|
|
50
|
+
else if (fn === OPS.setFillGray) {
|
|
51
|
+
currentFillColor = grayToHex(args);
|
|
52
|
+
}
|
|
53
|
+
else if (fn === OPS.setFillCMYKColor) {
|
|
54
|
+
currentFillColor = cmykToHex(args);
|
|
55
|
+
}
|
|
56
|
+
else if (fn === OPS.setFont) {
|
|
57
|
+
currentFontName = args[0];
|
|
58
|
+
}
|
|
59
|
+
else if (fn === OPS.showText || fn === OPS.showSpacedText) {
|
|
60
|
+
if (currentFontName) {
|
|
61
|
+
result.set(currentFontName, currentFillColor);
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
return result;
|
|
66
|
+
}
|
|
67
|
+
export function extractTextPositionColors(ops, pageHeight) {
|
|
68
|
+
const result = [];
|
|
69
|
+
const fontRefs = new Set();
|
|
70
|
+
let currentFillColor = '#000000';
|
|
71
|
+
let currentFontName = '';
|
|
72
|
+
let currentFontSize = 12;
|
|
73
|
+
let elementOrder = 0;
|
|
74
|
+
// Track CTM stack for proper position computation
|
|
75
|
+
let ctm = [1, 0, 0, 1, 0, 0];
|
|
76
|
+
const ctmStack = [];
|
|
77
|
+
// Track text matrix for position
|
|
78
|
+
let tmX = 0;
|
|
79
|
+
let tmY = 0;
|
|
80
|
+
for (let i = 0; i < ops.fnArray.length; i++) {
|
|
81
|
+
const fn = ops.fnArray[i];
|
|
82
|
+
const args = ops.argsArray[i];
|
|
83
|
+
if (fn === OPS.save) {
|
|
84
|
+
ctmStack.push({ ctm: [...ctm], fill: currentFillColor });
|
|
85
|
+
}
|
|
86
|
+
else if (fn === OPS.restore) {
|
|
87
|
+
const state = ctmStack.pop();
|
|
88
|
+
if (state) {
|
|
89
|
+
ctm = state.ctm;
|
|
90
|
+
currentFillColor = state.fill;
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
else if (fn === OPS.transform) {
|
|
94
|
+
ctm = multiplyMatrices(argsToMatrix(args), ctm);
|
|
95
|
+
}
|
|
96
|
+
else if (fn === OPS.setFillRGBColor) {
|
|
97
|
+
currentFillColor = argsToHex(args);
|
|
98
|
+
}
|
|
99
|
+
else if (fn === OPS.setFillGray) {
|
|
100
|
+
currentFillColor = grayToHex(args);
|
|
101
|
+
}
|
|
102
|
+
else if (fn === OPS.setFillCMYKColor) {
|
|
103
|
+
currentFillColor = cmykToHex(args);
|
|
104
|
+
}
|
|
105
|
+
else if (fn === OPS.setFont) {
|
|
106
|
+
currentFontName = args[0];
|
|
107
|
+
currentFontSize = args[1];
|
|
108
|
+
fontRefs.add(currentFontName);
|
|
109
|
+
}
|
|
110
|
+
else if (fn === OPS.beginText) {
|
|
111
|
+
// BT resets the text matrix to identity; without this reset,
|
|
112
|
+
// moveText (Td) values accumulate across BT/ET blocks.
|
|
113
|
+
tmX = 0;
|
|
114
|
+
tmY = 0;
|
|
115
|
+
}
|
|
116
|
+
else if (fn === OPS.setTextMatrix) {
|
|
117
|
+
tmX = args[4];
|
|
118
|
+
tmY = args[5];
|
|
119
|
+
}
|
|
120
|
+
else if (fn === OPS.moveText) {
|
|
121
|
+
tmX += args[0];
|
|
122
|
+
tmY += args[1];
|
|
123
|
+
}
|
|
124
|
+
else if (isTextShowOp(fn)) {
|
|
125
|
+
// Transform text matrix position through CTM to get page coordinates
|
|
126
|
+
const px = ctm[0] * tmX + ctm[2] * tmY + ctm[4];
|
|
127
|
+
const py = ctm[1] * tmX + ctm[3] * tmY + ctm[5];
|
|
128
|
+
result.push({
|
|
129
|
+
x: px,
|
|
130
|
+
y: pageHeight - py,
|
|
131
|
+
color: currentFillColor,
|
|
132
|
+
fontName: currentFontName,
|
|
133
|
+
fontSize: currentFontSize,
|
|
134
|
+
orderIndex: elementOrder++,
|
|
135
|
+
});
|
|
136
|
+
}
|
|
137
|
+
else if (isVisualOrderOp(fn)) {
|
|
138
|
+
// Count non-text visual ops to keep orderIndex in sync with extractDrawingsAndImages
|
|
139
|
+
elementOrder++;
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
result.fontRefs = fontRefs;
|
|
143
|
+
return result;
|
|
144
|
+
}
|
|
145
|
+
export function extractDrawingsAndImages(ops, pageHeight) {
|
|
146
|
+
const drawings = [];
|
|
147
|
+
const imageRefs = [];
|
|
148
|
+
let elementOrder = 0;
|
|
149
|
+
let fillColor = [0, 0, 0];
|
|
150
|
+
let strokeColor = [0, 0, 0];
|
|
151
|
+
let fillGradient = null;
|
|
152
|
+
// Track shading references from TilingPattern fills for later resolution
|
|
153
|
+
let pendingShadingNames = null;
|
|
154
|
+
let pendingShadingMatrix = null;
|
|
155
|
+
let strokeWidth = 1;
|
|
156
|
+
let opacity = 1.0;
|
|
157
|
+
// CTM stack for tracking transforms
|
|
158
|
+
let ctm = [1, 0, 0, 1, 0, 0];
|
|
159
|
+
// Track current clip path for images
|
|
160
|
+
let currentClipPath = null;
|
|
161
|
+
let currentClipRect = null;
|
|
162
|
+
const ctmStack = [];
|
|
163
|
+
// Track soft mask groups — drawings inside smask groups are mask definitions, not visible
|
|
164
|
+
let smaskDepth = 0;
|
|
165
|
+
// Accumulated path data
|
|
166
|
+
let pathItems = [];
|
|
167
|
+
let pathMinX = Infinity, pathMinY = Infinity, pathMaxX = -Infinity, pathMaxY = -Infinity;
|
|
168
|
+
let curX = 0, curY = 0;
|
|
169
|
+
function transformPoint(x, y) {
|
|
170
|
+
const tx = ctm[0] * x + ctm[2] * y + ctm[4];
|
|
171
|
+
const ty = ctm[1] * x + ctm[3] * y + ctm[5];
|
|
172
|
+
return [tx, pageHeight - ty];
|
|
173
|
+
}
|
|
174
|
+
function expandBounds(x, y) {
|
|
175
|
+
if (x < pathMinX)
|
|
176
|
+
pathMinX = x;
|
|
177
|
+
if (y < pathMinY)
|
|
178
|
+
pathMinY = y;
|
|
179
|
+
if (x > pathMaxX)
|
|
180
|
+
pathMaxX = x;
|
|
181
|
+
if (y > pathMaxY)
|
|
182
|
+
pathMaxY = y;
|
|
183
|
+
}
|
|
184
|
+
function resetPath() {
|
|
185
|
+
pathItems = [];
|
|
186
|
+
pathMinX = Infinity;
|
|
187
|
+
pathMinY = Infinity;
|
|
188
|
+
pathMaxX = -Infinity;
|
|
189
|
+
pathMaxY = -Infinity;
|
|
190
|
+
}
|
|
191
|
+
for (let i = 0; i < ops.fnArray.length; i++) {
|
|
192
|
+
const fn = ops.fnArray[i];
|
|
193
|
+
const args = ops.argsArray[i];
|
|
194
|
+
if (fn === OPS.save) {
|
|
195
|
+
ctmStack.push({
|
|
196
|
+
ctm: [...ctm],
|
|
197
|
+
fill: fillColor ? [...fillColor] : null,
|
|
198
|
+
stroke: strokeColor
|
|
199
|
+
? [...strokeColor]
|
|
200
|
+
: null,
|
|
201
|
+
fillGradient,
|
|
202
|
+
strokeWidth,
|
|
203
|
+
opacity,
|
|
204
|
+
clipPath: currentClipPath,
|
|
205
|
+
clipRect: currentClipRect,
|
|
206
|
+
});
|
|
207
|
+
}
|
|
208
|
+
else if (fn === OPS.restore) {
|
|
209
|
+
const state = ctmStack.pop();
|
|
210
|
+
if (state) {
|
|
211
|
+
ctm = state.ctm;
|
|
212
|
+
fillColor = state.fill;
|
|
213
|
+
strokeColor = state.stroke;
|
|
214
|
+
fillGradient = state.fillGradient;
|
|
215
|
+
strokeWidth = state.strokeWidth;
|
|
216
|
+
opacity = state.opacity;
|
|
217
|
+
currentClipPath = state.clipPath;
|
|
218
|
+
currentClipRect = state.clipRect;
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
else if (fn === OPS.transform) {
|
|
222
|
+
ctm = multiplyMatrices(argsToMatrix(args), ctm);
|
|
223
|
+
}
|
|
224
|
+
else if (fn === OPS.setFillRGBColor) {
|
|
225
|
+
fillColor = argsToRgbFloat(args);
|
|
226
|
+
fillGradient = null;
|
|
227
|
+
pendingShadingNames = null;
|
|
228
|
+
}
|
|
229
|
+
else if (fn === OPS.setFillGray) {
|
|
230
|
+
fillColor = grayToRgbFloat(args);
|
|
231
|
+
fillGradient = null;
|
|
232
|
+
pendingShadingNames = null;
|
|
233
|
+
}
|
|
234
|
+
else if (fn === OPS.setFillCMYKColor) {
|
|
235
|
+
fillColor = cmykToRgbFloat(args);
|
|
236
|
+
fillGradient = null;
|
|
237
|
+
pendingShadingNames = null;
|
|
238
|
+
}
|
|
239
|
+
else if (fn === OPS.setFillColorN) {
|
|
240
|
+
// Pattern/gradient fill — extract shading info from TilingPattern
|
|
241
|
+
if (args && args[0] === 'TilingPattern') {
|
|
242
|
+
const patternOps = args[2]; // embedded operator list
|
|
243
|
+
if (patternOps && patternOps.argsArray) {
|
|
244
|
+
// Find shading references in the pattern's operator list
|
|
245
|
+
const names = [];
|
|
246
|
+
let matrix = null;
|
|
247
|
+
for (const pArgs of patternOps.argsArray) {
|
|
248
|
+
if (Array.isArray(pArgs) && pArgs[0] === 'Shading' && typeof pArgs[1] === 'string') {
|
|
249
|
+
if (!names.includes(pArgs[1]))
|
|
250
|
+
names.push(pArgs[1]);
|
|
251
|
+
if (!matrix && Array.isArray(pArgs[2])) {
|
|
252
|
+
matrix = pArgs[2];
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
if (names.length > 0) {
|
|
257
|
+
pendingShadingNames = names;
|
|
258
|
+
pendingShadingMatrix = matrix;
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
// Set fill to null — gradient will be resolved later
|
|
262
|
+
fillColor = null;
|
|
263
|
+
fillGradient = null;
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
else if (fn === OPS.setStrokeRGBColor) {
|
|
267
|
+
strokeColor = argsToRgbFloat(args);
|
|
268
|
+
}
|
|
269
|
+
else if (fn === OPS.setStrokeGray) {
|
|
270
|
+
strokeColor = grayToRgbFloat(args);
|
|
271
|
+
}
|
|
272
|
+
else if (fn === OPS.setStrokeCMYKColor) {
|
|
273
|
+
strokeColor = cmykToRgbFloat(args);
|
|
274
|
+
}
|
|
275
|
+
else if (fn === OPS.setGState) {
|
|
276
|
+
// setGState args is an array of [key, value] pairs
|
|
277
|
+
const pairs = args[0];
|
|
278
|
+
if (Array.isArray(pairs)) {
|
|
279
|
+
for (const [key, value] of pairs) {
|
|
280
|
+
if (key === 'ca') {
|
|
281
|
+
// ca = fill opacity (0-1)
|
|
282
|
+
opacity = value;
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
else if (fn === OPS.setLineWidth) {
|
|
288
|
+
strokeWidth = args[0];
|
|
289
|
+
}
|
|
290
|
+
else if (fn === OPS.constructPath) {
|
|
291
|
+
// args[0] = array of sub-ops, args[1] = flat array of coordinates
|
|
292
|
+
const subOps = args[0];
|
|
293
|
+
const coords = args[1];
|
|
294
|
+
let ci = 0;
|
|
295
|
+
for (const subOp of subOps) {
|
|
296
|
+
if (subOp === OPS.moveTo) {
|
|
297
|
+
const [tx, ty] = transformPoint(coords[ci], coords[ci + 1]);
|
|
298
|
+
ci += 2;
|
|
299
|
+
pathItems.push({ kind: 'm', x: tx, y: ty });
|
|
300
|
+
expandBounds(tx, ty);
|
|
301
|
+
curX = tx;
|
|
302
|
+
curY = ty;
|
|
303
|
+
}
|
|
304
|
+
else if (subOp === OPS.lineTo) {
|
|
305
|
+
const [tx, ty] = transformPoint(coords[ci], coords[ci + 1]);
|
|
306
|
+
ci += 2;
|
|
307
|
+
pathItems.push({ kind: 'l', x1: curX, y1: curY, x2: tx, y2: ty });
|
|
308
|
+
expandBounds(tx, ty);
|
|
309
|
+
curX = tx;
|
|
310
|
+
curY = ty;
|
|
311
|
+
}
|
|
312
|
+
else if (subOp === OPS.curveTo) {
|
|
313
|
+
const [cpx1, cpy1] = transformPoint(coords[ci], coords[ci + 1]);
|
|
314
|
+
const [cpx2, cpy2] = transformPoint(coords[ci + 2], coords[ci + 3]);
|
|
315
|
+
const [tx, ty] = transformPoint(coords[ci + 4], coords[ci + 5]);
|
|
316
|
+
ci += 6;
|
|
317
|
+
pathItems.push({
|
|
318
|
+
kind: 'c',
|
|
319
|
+
x1: curX,
|
|
320
|
+
y1: curY,
|
|
321
|
+
cpx1,
|
|
322
|
+
cpy1,
|
|
323
|
+
cpx2,
|
|
324
|
+
cpy2,
|
|
325
|
+
x2: tx,
|
|
326
|
+
y2: ty,
|
|
327
|
+
});
|
|
328
|
+
expandBounds(cpx1, cpy1);
|
|
329
|
+
expandBounds(cpx2, cpy2);
|
|
330
|
+
expandBounds(tx, ty);
|
|
331
|
+
curX = tx;
|
|
332
|
+
curY = ty;
|
|
333
|
+
}
|
|
334
|
+
else if (subOp === OPS.curveTo2) {
|
|
335
|
+
// curveTo2: current point as first control point
|
|
336
|
+
const [cpx2, cpy2] = transformPoint(coords[ci], coords[ci + 1]);
|
|
337
|
+
const [tx, ty] = transformPoint(coords[ci + 2], coords[ci + 3]);
|
|
338
|
+
ci += 4;
|
|
339
|
+
pathItems.push({
|
|
340
|
+
kind: 'c',
|
|
341
|
+
x1: curX,
|
|
342
|
+
y1: curY,
|
|
343
|
+
cpx1: curX,
|
|
344
|
+
cpy1: curY,
|
|
345
|
+
cpx2,
|
|
346
|
+
cpy2,
|
|
347
|
+
x2: tx,
|
|
348
|
+
y2: ty,
|
|
349
|
+
});
|
|
350
|
+
expandBounds(cpx2, cpy2);
|
|
351
|
+
expandBounds(tx, ty);
|
|
352
|
+
curX = tx;
|
|
353
|
+
curY = ty;
|
|
354
|
+
}
|
|
355
|
+
else if (subOp === OPS.curveTo3) {
|
|
356
|
+
// curveTo3: endpoint as last control point
|
|
357
|
+
const [cpx1, cpy1] = transformPoint(coords[ci], coords[ci + 1]);
|
|
358
|
+
const [tx, ty] = transformPoint(coords[ci + 2], coords[ci + 3]);
|
|
359
|
+
ci += 4;
|
|
360
|
+
pathItems.push({
|
|
361
|
+
kind: 'c',
|
|
362
|
+
x1: curX,
|
|
363
|
+
y1: curY,
|
|
364
|
+
cpx1,
|
|
365
|
+
cpy1,
|
|
366
|
+
cpx2: tx,
|
|
367
|
+
cpy2: ty,
|
|
368
|
+
x2: tx,
|
|
369
|
+
y2: ty,
|
|
370
|
+
});
|
|
371
|
+
expandBounds(cpx1, cpy1);
|
|
372
|
+
expandBounds(tx, ty);
|
|
373
|
+
curX = tx;
|
|
374
|
+
curY = ty;
|
|
375
|
+
}
|
|
376
|
+
else if (subOp === OPS.rectangle) {
|
|
377
|
+
const rx = coords[ci], ry = coords[ci + 1], rw = coords[ci + 2], rh = coords[ci + 3];
|
|
378
|
+
ci += 4;
|
|
379
|
+
const [tx0, ty0] = transformPoint(rx, ry);
|
|
380
|
+
const [tx1, ty1] = transformPoint(rx + rw, ry);
|
|
381
|
+
const [tx2, ty2] = transformPoint(rx + rw, ry + rh);
|
|
382
|
+
const [tx3, ty3] = transformPoint(rx, ry + rh);
|
|
383
|
+
// Use the min/max of transformed corners
|
|
384
|
+
const minX = Math.min(tx0, tx1, tx2, tx3);
|
|
385
|
+
const minY = Math.min(ty0, ty1, ty2, ty3);
|
|
386
|
+
const maxX = Math.max(tx0, tx1, tx2, tx3);
|
|
387
|
+
const maxY = Math.max(ty0, ty1, ty2, ty3);
|
|
388
|
+
pathItems.push({
|
|
389
|
+
kind: 're',
|
|
390
|
+
x: minX,
|
|
391
|
+
y: minY,
|
|
392
|
+
w: maxX - minX,
|
|
393
|
+
h: maxY - minY,
|
|
394
|
+
});
|
|
395
|
+
expandBounds(minX, minY);
|
|
396
|
+
expandBounds(maxX, maxY);
|
|
397
|
+
curX = minX;
|
|
398
|
+
curY = minY;
|
|
399
|
+
}
|
|
400
|
+
else if (subOp === OPS.closePath) {
|
|
401
|
+
// close subpath - no coords consumed
|
|
402
|
+
}
|
|
403
|
+
}
|
|
404
|
+
}
|
|
405
|
+
else if (fn === OPS.fill ||
|
|
406
|
+
fn === OPS.eoFill ||
|
|
407
|
+
fn === OPS.fillStroke ||
|
|
408
|
+
fn === OPS.eoFillStroke) {
|
|
409
|
+
// Always increment order to stay in sync with extractTextPositionColors
|
|
410
|
+
const order = elementOrder++;
|
|
411
|
+
if (smaskDepth > 0) {
|
|
412
|
+
// Inside soft mask group — skip, this is a mask definition not a visible drawing
|
|
413
|
+
resetPath();
|
|
414
|
+
}
|
|
415
|
+
else if (pathItems.length > 0 && pathMinX < Infinity) {
|
|
416
|
+
const hasFill = fn === OPS.fill ||
|
|
417
|
+
fn === OPS.eoFill ||
|
|
418
|
+
fn === OPS.fillStroke ||
|
|
419
|
+
fn === OPS.eoFillStroke;
|
|
420
|
+
const hasStroke = fn === OPS.fillStroke || fn === OPS.eoFillStroke;
|
|
421
|
+
// Scale stroke width by CTM scale factor (path coords are already transformed)
|
|
422
|
+
const ctmScale = Math.sqrt(ctm[0] * ctm[0] + ctm[1] * ctm[1]);
|
|
423
|
+
const scaledStrokeWidth = strokeWidth * ctmScale;
|
|
424
|
+
drawings.push({
|
|
425
|
+
rect: [pathMinX, pathMinY, pathMaxX, pathMaxY],
|
|
426
|
+
fill: hasFill && fillColor ? [...fillColor] : null,
|
|
427
|
+
stroke: hasStroke && strokeColor
|
|
428
|
+
? [...strokeColor]
|
|
429
|
+
: null,
|
|
430
|
+
strokeWidth: scaledStrokeWidth,
|
|
431
|
+
items: [...pathItems],
|
|
432
|
+
opacity,
|
|
433
|
+
evenOdd: fn === OPS.eoFill || fn === OPS.eoFillStroke,
|
|
434
|
+
closePath: true,
|
|
435
|
+
orderIndex: order,
|
|
436
|
+
gradient: fillGradient,
|
|
437
|
+
clipPath: currentClipPath,
|
|
438
|
+
clipRect: currentClipRect,
|
|
439
|
+
_shadingNames: pendingShadingNames,
|
|
440
|
+
_shadingMatrix: pendingShadingMatrix,
|
|
441
|
+
});
|
|
442
|
+
}
|
|
443
|
+
resetPath();
|
|
444
|
+
}
|
|
445
|
+
else if (fn === OPS.stroke) {
|
|
446
|
+
// Always increment order to stay in sync with extractTextPositionColors
|
|
447
|
+
const order = elementOrder++;
|
|
448
|
+
if (smaskDepth > 0) {
|
|
449
|
+
resetPath();
|
|
450
|
+
}
|
|
451
|
+
else if (pathItems.length > 0 && pathMinX < Infinity) {
|
|
452
|
+
// Scale stroke width by CTM scale factor (path coords are already transformed)
|
|
453
|
+
const ctmScale = Math.sqrt(ctm[0] * ctm[0] + ctm[1] * ctm[1]);
|
|
454
|
+
const scaledStrokeWidth = strokeWidth * ctmScale;
|
|
455
|
+
drawings.push({
|
|
456
|
+
rect: [pathMinX, pathMinY, pathMaxX, pathMaxY],
|
|
457
|
+
fill: null,
|
|
458
|
+
stroke: strokeColor
|
|
459
|
+
? [...strokeColor]
|
|
460
|
+
: null,
|
|
461
|
+
strokeWidth: scaledStrokeWidth,
|
|
462
|
+
items: [...pathItems],
|
|
463
|
+
opacity,
|
|
464
|
+
evenOdd: false,
|
|
465
|
+
closePath: false,
|
|
466
|
+
orderIndex: order,
|
|
467
|
+
gradient: null,
|
|
468
|
+
clipPath: currentClipPath,
|
|
469
|
+
clipRect: currentClipRect,
|
|
470
|
+
});
|
|
471
|
+
}
|
|
472
|
+
resetPath();
|
|
473
|
+
}
|
|
474
|
+
else if (fn === OPS.clip || fn === OPS.eoClip) {
|
|
475
|
+
// Capture current path as clip path for subsequent image painting
|
|
476
|
+
if (pathItems.length > 0 && pathMinX < Infinity) {
|
|
477
|
+
// Only set clip if we don't already have a non-rectangular one
|
|
478
|
+
// (PDF clips are intersected; a rectangle inside a circle still shows the circle)
|
|
479
|
+
const isSimpleRect = pathItems.length === 1 && pathItems[0].kind === 're';
|
|
480
|
+
if (!currentClipPath || !isSimpleRect) {
|
|
481
|
+
currentClipPath = [...pathItems];
|
|
482
|
+
currentClipRect = [pathMinX, pathMinY, pathMaxX, pathMaxY];
|
|
483
|
+
}
|
|
484
|
+
}
|
|
485
|
+
// Don't reset path — endPath will do that
|
|
486
|
+
}
|
|
487
|
+
else if (fn === OPS.endPath) {
|
|
488
|
+
resetPath();
|
|
489
|
+
}
|
|
490
|
+
else if (fn === OPS.beginGroup) {
|
|
491
|
+
// Check if this group has a soft mask (smask) — content inside is mask definition
|
|
492
|
+
const groupInfo = args[0];
|
|
493
|
+
if (groupInfo && groupInfo.smask) {
|
|
494
|
+
smaskDepth++;
|
|
495
|
+
}
|
|
496
|
+
}
|
|
497
|
+
else if (fn === OPS.endGroup) {
|
|
498
|
+
if (smaskDepth > 0) {
|
|
499
|
+
smaskDepth--;
|
|
500
|
+
}
|
|
501
|
+
}
|
|
502
|
+
else if (fn === OPS.paintImageXObject) {
|
|
503
|
+
// args[0] = image name, args[1] = width, args[2] = height
|
|
504
|
+
const name = args[0];
|
|
505
|
+
// Image is painted in a 1x1 unit square transformed by CTM.
|
|
506
|
+
// Unit square corners: (0,0)=BL, (1,0)=BR, (1,1)=TR, (0,1)=TL in image space
|
|
507
|
+
// (Y is flipped in image space, so (0,1) is top-left)
|
|
508
|
+
const tl = transformPoint(0, 1);
|
|
509
|
+
const tr = transformPoint(1, 1);
|
|
510
|
+
const bl = transformPoint(0, 0);
|
|
511
|
+
// Width/height from edge lengths of the transformed parallelogram
|
|
512
|
+
const w = Math.sqrt((tr[0] - tl[0]) ** 2 + (tr[1] - tl[1]) ** 2);
|
|
513
|
+
const h = Math.sqrt((bl[0] - tl[0]) ** 2 + (bl[1] - tl[1]) ** 2);
|
|
514
|
+
// Rotation: angle of the top edge from horizontal (degrees, clockwise)
|
|
515
|
+
const rotation = Math.atan2(tr[1] - tl[1], tr[0] - tl[0]) * 180 / Math.PI;
|
|
516
|
+
// Position: top-left corner in screen space (polotno rotates around top-left)
|
|
517
|
+
const x = tl[0];
|
|
518
|
+
const y = tl[1];
|
|
519
|
+
imageRefs.push({ name, x, y, width: w, height: h, rotation, clipPath: currentClipPath, clipRect: currentClipRect, orderIndex: elementOrder++ });
|
|
520
|
+
}
|
|
521
|
+
else if (isTextShowOp(fn)) {
|
|
522
|
+
// Count text ops to keep orderIndex in sync with extractTextPositionColors
|
|
523
|
+
elementOrder++;
|
|
524
|
+
}
|
|
525
|
+
}
|
|
526
|
+
return { drawings, imageRefs };
|
|
527
|
+
}
|
|
528
|
+
//# sourceMappingURL=operator-list.js.map
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import { FontRegistry } from './font-registry.js';
|
|
2
|
+
import { type RawImageStream } from './pdf-image-extractor.js';
|
|
3
|
+
import type { PolotnoPage } from './index.js';
|
|
4
|
+
interface ParsePageOptions {
|
|
5
|
+
page: any;
|
|
6
|
+
pageIdx: number;
|
|
7
|
+
fontRegistry: FontRegistry;
|
|
8
|
+
generateId: () => string;
|
|
9
|
+
jpegIndex: Map<number, RawImageStream>;
|
|
10
|
+
}
|
|
11
|
+
interface ParsePageResult {
|
|
12
|
+
parsedPage: PolotnoPage;
|
|
13
|
+
pageWidth: number;
|
|
14
|
+
pageHeight: number;
|
|
15
|
+
}
|
|
16
|
+
export declare function parsePage({ page, pageIdx, fontRegistry, generateId, jpegIndex, }: ParsePageOptions): Promise<ParsePageResult>;
|
|
17
|
+
export {};
|
|
18
|
+
//# sourceMappingURL=page-parser.d.ts.map
|