@u1f992/pdfdiff 0.2.2 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.clang-format +3 -0
- package/.github/workflows/gh-pages.yml +6 -6
- package/.vscode/extensions.json +1 -1
- package/.vscode/settings.json +1 -1
- package/LICENSE +68 -81
- package/README.md +7 -0
- package/dist/browser.js +243 -3109
- package/dist/browser.js.map +1 -1
- package/dist/cli-png-worker.d.ts.map +1 -1
- package/dist/cli-png-worker.js +0 -16
- package/dist/cli-png-worker.js.map +1 -1
- package/dist/cli.js +270 -3151
- package/dist/cli.js.map +1 -1
- package/dist/core.wasm +0 -0
- package/dist/decode.d.ts +9 -0
- package/dist/decode.d.ts.map +1 -0
- package/dist/diff.d.ts.map +1 -1
- package/dist/gs-wasm/gs.js +5821 -0
- package/dist/gs-wasm/gs.wasm +0 -0
- package/dist/gs-wasm/index.js +120 -0
- package/dist/gs-wasm/index.js.map +1 -0
- package/dist/gs-wasm/worker.js +764 -0
- package/dist/gs-wasm/worker.js.map +1 -0
- package/dist/image.d.ts.map +1 -1
- package/dist/index.d.ts +1 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.html +1 -1
- package/dist/index.js +242 -3109
- package/dist/index.js.map +1 -1
- package/dist/iterable.d.ts.map +1 -1
- package/dist/jimp.d.ts +23 -1
- package/dist/jimp.d.ts.map +1 -1
- package/dist/pdf.d.ts +15 -4
- package/dist/pdf.d.ts.map +1 -1
- package/dist/perf.d.ts.map +1 -1
- package/dist/rgba-color.d.ts.map +1 -1
- package/dist/transferable.d.ts +6 -2
- package/dist/transferable.d.ts.map +1 -1
- package/dist/version.d.ts +1 -1
- package/dist/worker.d.ts +6 -8
- package/dist/worker.d.ts.map +1 -1
- package/dist/worker.js +70 -3311
- package/dist/worker.js.map +1 -1
- package/package.json +10 -5
- package/prettier.config.js +1 -1
- package/rollup.config.js +63 -5
- package/scripts/build-wasm.sh +32 -0
- package/src/browser.ts +9 -6
- package/src/cli-png-worker.ts +0 -17
- package/src/cli.ts +38 -23
- package/src/decode.ts +13 -0
- package/src/diff.ts +0 -17
- package/src/image.ts +1 -18
- package/src/index.html +1 -1
- package/src/index.test.ts +10 -18
- package/src/index.ts +170 -74
- package/src/iterable.test.ts +0 -17
- package/src/iterable.ts +0 -17
- package/src/jimp.ts +25 -7
- package/src/pdf.ts +100 -69
- package/src/perf.ts +0 -17
- package/src/rgba-color.test.ts +0 -17
- package/src/rgba-color.ts +0 -17
- package/src/transferable.ts +6 -21
- package/src/worker.ts +91 -87
- package/tsconfig.json +53 -50
- package/wasm/Makefile +34 -0
- package/wasm/bindings.cpp +76 -0
- package/wasm/core.c +179 -0
- package/wasm/core.h +69 -0
- package/dist/mupdf-wasm.wasm +0 -0
package/src/index.ts
CHANGED
|
@@ -1,29 +1,13 @@
|
|
|
1
|
-
/*
|
|
2
|
-
* Copyright (C) 2025 Koutaro Mukai
|
|
3
|
-
*
|
|
4
|
-
* This program is free software: you can redistribute it and/or modify
|
|
5
|
-
* it under the terms of the GNU General Public License as published by
|
|
6
|
-
* the Free Software Foundation, either version 3 of the License, or
|
|
7
|
-
* (at your option) any later version.
|
|
8
|
-
*
|
|
9
|
-
* This program is distributed in the hope that it will be useful,
|
|
10
|
-
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
11
|
-
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
12
|
-
* GNU General Public License for more details.
|
|
13
|
-
*
|
|
14
|
-
* You should have received a copy of the GNU General Public License
|
|
15
|
-
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
16
|
-
*/
|
|
17
|
-
|
|
18
1
|
import * as jimp from "jimp";
|
|
19
|
-
import * as mupdf from "mupdf";
|
|
20
2
|
import Worker from "web-worker";
|
|
21
3
|
|
|
22
4
|
import { type Pallet } from "./diff.ts";
|
|
23
5
|
import { isValidAlignStrategy, type AlignStrategy } from "./image.ts";
|
|
24
6
|
import { withIndex } from "./iterable.ts";
|
|
7
|
+
import { countPages, renderPageRangePng } from "./pdf.ts";
|
|
25
8
|
import { perf } from "./perf.ts";
|
|
26
9
|
import { parseHex, formatHex } from "./rgba-color.ts";
|
|
10
|
+
import { sliceBackingBuffer } from "./transferable.ts";
|
|
27
11
|
import { VERSION } from "./version.ts";
|
|
28
12
|
import type { JimpInstance } from "./jimp.ts";
|
|
29
13
|
import type {
|
|
@@ -55,6 +39,15 @@ type Result = {
|
|
|
55
39
|
modification: [number, number][];
|
|
56
40
|
};
|
|
57
41
|
|
|
42
|
+
// Default parallelism scales with the machine: rendering and diffing run across
|
|
43
|
+
// several workers, so the out-of-the-box run uses the CPU rather than a single
|
|
44
|
+
// core. Capped at 4 to keep the default memory footprint and oversubscription
|
|
45
|
+
// modest; raise --workers explicitly for large jobs on big machines.
|
|
46
|
+
export const defaultWorkers = Math.max(
|
|
47
|
+
1,
|
|
48
|
+
Math.min(globalThis.navigator?.hardwareConcurrency ?? 1, 4),
|
|
49
|
+
);
|
|
50
|
+
|
|
58
51
|
export const defaultOptions: Options = {
|
|
59
52
|
dpi: 150,
|
|
60
53
|
alpha: true,
|
|
@@ -65,25 +58,9 @@ export const defaultOptions: Options = {
|
|
|
65
58
|
deletion: [0xff, 0x57, 0x24, 0xff],
|
|
66
59
|
modification: [0xff, 0xc1, 0x05, 0xff],
|
|
67
60
|
},
|
|
68
|
-
workers:
|
|
61
|
+
workers: defaultWorkers,
|
|
69
62
|
};
|
|
70
63
|
|
|
71
|
-
function asSharedBytes(bytes: Uint8Array): Uint8Array {
|
|
72
|
-
const isNode =
|
|
73
|
-
typeof globalThis.process !== "undefined" &&
|
|
74
|
-
!!globalThis.process.versions?.node;
|
|
75
|
-
const coiOk =
|
|
76
|
-
(globalThis as { crossOriginIsolated?: boolean }).crossOriginIsolated ===
|
|
77
|
-
true;
|
|
78
|
-
if (typeof SharedArrayBuffer !== "undefined" && (isNode || coiOk)) {
|
|
79
|
-
const sab = new SharedArrayBuffer(bytes.byteLength);
|
|
80
|
-
const view = new Uint8Array(sab);
|
|
81
|
-
view.set(bytes);
|
|
82
|
-
return view;
|
|
83
|
-
}
|
|
84
|
-
return new Uint8Array(bytes);
|
|
85
|
-
}
|
|
86
|
-
|
|
87
64
|
type WorkerResponse =
|
|
88
65
|
| LoadedMessage
|
|
89
66
|
| ReadyMessage
|
|
@@ -136,12 +113,29 @@ class WorkerHandle {
|
|
|
136
113
|
});
|
|
137
114
|
}
|
|
138
115
|
|
|
139
|
-
|
|
116
|
+
processDiff(
|
|
117
|
+
index: number,
|
|
118
|
+
a: Uint8Array<ArrayBuffer> | null,
|
|
119
|
+
b: Uint8Array<ArrayBuffer> | null,
|
|
120
|
+
mask: Uint8Array<ArrayBuffer> | null,
|
|
121
|
+
): Promise<PageResultMessage> {
|
|
140
122
|
return new Promise<PageResultMessage>((resolve, reject) => {
|
|
141
123
|
this.pendingResolve = resolve as (data: WorkerResponse) => void;
|
|
142
124
|
this.pendingReject = reject;
|
|
143
|
-
const
|
|
144
|
-
|
|
125
|
+
const aBuf = a !== null ? sliceBackingBuffer(a) : null;
|
|
126
|
+
const bBuf = b !== null ? sliceBackingBuffer(b) : null;
|
|
127
|
+
const maskBuf = mask !== null ? sliceBackingBuffer(mask) : null;
|
|
128
|
+
const msg: PageMessage = {
|
|
129
|
+
type: "page",
|
|
130
|
+
index,
|
|
131
|
+
a: aBuf,
|
|
132
|
+
b: bBuf,
|
|
133
|
+
mask: maskBuf,
|
|
134
|
+
};
|
|
135
|
+
const transfer = [aBuf, bBuf, maskBuf].filter(
|
|
136
|
+
(buf): buf is ArrayBuffer => buf !== null,
|
|
137
|
+
);
|
|
138
|
+
this.worker.postMessage(msg, transfer);
|
|
145
139
|
});
|
|
146
140
|
}
|
|
147
141
|
|
|
@@ -155,6 +149,15 @@ function workerUrl(): URL {
|
|
|
155
149
|
return new URL(`${file}?v=${encodeURIComponent(VERSION)}`, import.meta.url);
|
|
156
150
|
}
|
|
157
151
|
|
|
152
|
+
function unpackCoords(buf: ArrayBuffer): [number, number][] {
|
|
153
|
+
const arr = new Int32Array(buf);
|
|
154
|
+
const out: [number, number][] = new Array(arr.length >>> 1);
|
|
155
|
+
for (let i = 0, j = 0; j < out.length; i += 2, j++) {
|
|
156
|
+
out[j] = [arr[i]!, arr[i + 1]!];
|
|
157
|
+
}
|
|
158
|
+
return out;
|
|
159
|
+
}
|
|
160
|
+
|
|
158
161
|
function pageResultToResult(msg: PageResultMessage): Result {
|
|
159
162
|
const sP = perf.span("main.pageResultToResult_ms");
|
|
160
163
|
const r = {
|
|
@@ -173,9 +176,9 @@ function pageResultToResult(msg: PageResultMessage): Result {
|
|
|
173
176
|
height: msg.diff.height,
|
|
174
177
|
data: new Uint8Array(msg.diff.data),
|
|
175
178
|
}) as JimpInstance,
|
|
176
|
-
addition: msg.addition,
|
|
177
|
-
deletion: msg.deletion,
|
|
178
|
-
modification: msg.modification,
|
|
179
|
+
addition: unpackCoords(msg.addition),
|
|
180
|
+
deletion: unpackCoords(msg.deletion),
|
|
181
|
+
modification: unpackCoords(msg.modification),
|
|
179
182
|
};
|
|
180
183
|
sP.stop();
|
|
181
184
|
perf.incr("main.resultsReceived");
|
|
@@ -202,62 +205,151 @@ export async function* visualizeDifferences(
|
|
|
202
205
|
workers: options?.workers ?? defaultOptions.workers,
|
|
203
206
|
};
|
|
204
207
|
|
|
205
|
-
const
|
|
206
|
-
|
|
207
|
-
|
|
208
|
+
const [aPages, bPages, maskPages] = await Promise.all([
|
|
209
|
+
countPages(a),
|
|
210
|
+
countPages(b),
|
|
208
211
|
typeof merged.mask !== "undefined"
|
|
209
|
-
?
|
|
210
|
-
:
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
probeB.countPages(),
|
|
214
|
-
probeMask.countPages(),
|
|
215
|
-
);
|
|
216
|
-
probe.destroy();
|
|
217
|
-
probeB.destroy();
|
|
218
|
-
probeMask.destroy();
|
|
212
|
+
? countPages(merged.mask)
|
|
213
|
+
: Promise.resolve(0),
|
|
214
|
+
]);
|
|
215
|
+
const maxPages = Math.max(aPages, bPages, maskPages);
|
|
219
216
|
|
|
220
217
|
if (maxPages === 0) return;
|
|
221
218
|
|
|
222
|
-
const
|
|
223
|
-
const
|
|
224
|
-
const
|
|
225
|
-
typeof merged.mask !== "undefined" ? asSharedBytes(merged.mask) : null;
|
|
219
|
+
const mask = merged.mask;
|
|
220
|
+
const hasMask = typeof mask !== "undefined" && maskPages > 0;
|
|
221
|
+
const numDocs = hasMask ? 3 : 2;
|
|
226
222
|
|
|
227
223
|
const initMsg: InitMessage = {
|
|
228
224
|
type: "init",
|
|
229
|
-
aBytes,
|
|
230
|
-
bBytes,
|
|
231
|
-
maskBytes,
|
|
232
|
-
dpi: merged.dpi,
|
|
233
|
-
alpha: merged.alpha,
|
|
234
225
|
pallet: merged.pallet,
|
|
235
226
|
align: merged.align,
|
|
236
227
|
};
|
|
237
228
|
|
|
238
229
|
const N = Math.max(1, Math.min(merged.workers, maxPages));
|
|
239
230
|
const url = workerUrl();
|
|
240
|
-
const
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
const buffered = new Map<number, Result>();
|
|
244
|
-
let nextToAssign = 0;
|
|
245
|
-
|
|
246
|
-
const workers: WorkerHandle[] = [worker0];
|
|
247
|
-
for (let i = 1; i < N; i++) {
|
|
231
|
+
const workers: WorkerHandle[] = [];
|
|
232
|
+
for (let i = 0; i < N; i++) {
|
|
248
233
|
const w = new WorkerHandle(url);
|
|
249
234
|
await w.init(initMsg);
|
|
250
235
|
workers.push(w);
|
|
251
236
|
}
|
|
252
237
|
|
|
238
|
+
let aborted: unknown = null;
|
|
239
|
+
|
|
240
|
+
// One PNG slot per page per document, fulfilled as render chunks complete.
|
|
241
|
+
// Pages past a document's page count resolve to null (an empty/transparent
|
|
242
|
+
// page). The defensive catch keeps a chunk failure from surfacing as an
|
|
243
|
+
// unhandled rejection before a diff lane awaits the slot.
|
|
244
|
+
type Slot = {
|
|
245
|
+
p: Promise<Uint8Array<ArrayBuffer> | null>;
|
|
246
|
+
resolve: (v: Uint8Array<ArrayBuffer> | null) => void;
|
|
247
|
+
reject: (e: unknown) => void;
|
|
248
|
+
};
|
|
249
|
+
const makeSlots = (count: number): Slot[] =>
|
|
250
|
+
Array.from({ length: maxPages }, (_, i) => {
|
|
251
|
+
if (i >= count) {
|
|
252
|
+
return {
|
|
253
|
+
p: Promise.resolve(null),
|
|
254
|
+
resolve: () => {},
|
|
255
|
+
reject: () => {},
|
|
256
|
+
};
|
|
257
|
+
}
|
|
258
|
+
let resolve!: (v: Uint8Array<ArrayBuffer> | null) => void;
|
|
259
|
+
let reject!: (e: unknown) => void;
|
|
260
|
+
const p = new Promise<Uint8Array<ArrayBuffer> | null>((res, rej) => {
|
|
261
|
+
resolve = res;
|
|
262
|
+
reject = rej;
|
|
263
|
+
});
|
|
264
|
+
p.catch(() => {});
|
|
265
|
+
return { p, resolve, reject };
|
|
266
|
+
});
|
|
267
|
+
const slots = {
|
|
268
|
+
a: makeSlots(aPages),
|
|
269
|
+
b: makeSlots(bPages),
|
|
270
|
+
mask: makeSlots(hasMask ? maskPages : 0),
|
|
271
|
+
};
|
|
272
|
+
|
|
273
|
+
// Render chunk tasks: batch several pages per gs() call to amortize startup,
|
|
274
|
+
// interleaving A/B/mask by page range so the early pages of every document
|
|
275
|
+
// become available together (which keeps the diff stage fed).
|
|
276
|
+
// Render concurrency. Aim for ~2x as many chunks as render slots so pages
|
|
277
|
+
// arrive in waves and the diff/decode stage overlaps later renders instead of
|
|
278
|
+
// waiting for one big batch. A floor keeps each chunk large enough to amortize
|
|
279
|
+
// Ghostscript's per-call startup: when there are many slots relative to pages,
|
|
280
|
+
// the slots are already saturated, so batching beats finer streaming.
|
|
281
|
+
const MIN_CHUNK = 4;
|
|
282
|
+
const R = Math.max(merged.workers, numDocs);
|
|
283
|
+
const totalRenderPages = aPages + bPages + (hasMask ? maskPages : 0);
|
|
284
|
+
const chunkSize = Math.max(
|
|
285
|
+
1,
|
|
286
|
+
Math.min(
|
|
287
|
+
maxPages,
|
|
288
|
+
Math.max(MIN_CHUNK, Math.ceil(totalRenderPages / (2 * R))),
|
|
289
|
+
),
|
|
290
|
+
);
|
|
291
|
+
type Task = { bytes: Uint8Array; start: number; end: number; slots: Slot[] };
|
|
292
|
+
const tasks: Task[] = [];
|
|
293
|
+
const pushChunk = (
|
|
294
|
+
bytes: Uint8Array | undefined,
|
|
295
|
+
count: number,
|
|
296
|
+
target: Slot[],
|
|
297
|
+
start: number,
|
|
298
|
+
) => {
|
|
299
|
+
if (bytes === undefined || start >= count) return;
|
|
300
|
+
tasks.push({
|
|
301
|
+
bytes,
|
|
302
|
+
start,
|
|
303
|
+
end: Math.min(start + chunkSize, count) - 1,
|
|
304
|
+
slots: target,
|
|
305
|
+
});
|
|
306
|
+
};
|
|
307
|
+
for (let start = 0; start < maxPages; start += chunkSize) {
|
|
308
|
+
pushChunk(a, aPages, slots.a, start);
|
|
309
|
+
pushChunk(b, bPages, slots.b, start);
|
|
310
|
+
if (hasMask) pushChunk(mask, maskPages, slots.mask, start);
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
let taskIdx = 0;
|
|
314
|
+
const renderLoops = Array.from(
|
|
315
|
+
{ length: Math.min(R, tasks.length) },
|
|
316
|
+
async () => {
|
|
317
|
+
while (taskIdx < tasks.length && aborted === null) {
|
|
318
|
+
const t = tasks[taskIdx++]!;
|
|
319
|
+
try {
|
|
320
|
+
const pngs = await renderPageRangePng(
|
|
321
|
+
t.bytes,
|
|
322
|
+
t.start,
|
|
323
|
+
t.end,
|
|
324
|
+
merged.dpi,
|
|
325
|
+
merged.alpha,
|
|
326
|
+
);
|
|
327
|
+
for (let i = t.start; i <= t.end; i++) {
|
|
328
|
+
t.slots[i]!.resolve(pngs.get(i) ?? null);
|
|
329
|
+
}
|
|
330
|
+
} catch (e) {
|
|
331
|
+
aborted = e;
|
|
332
|
+
for (let i = t.start; i <= t.end; i++) t.slots[i]!.reject(e);
|
|
333
|
+
}
|
|
334
|
+
}
|
|
335
|
+
},
|
|
336
|
+
);
|
|
337
|
+
|
|
338
|
+
const buffered = new Map<number, Result>();
|
|
339
|
+
let nextToAssign = 0;
|
|
253
340
|
const resolvers = new Map<number, (r: Result) => void>();
|
|
254
341
|
let workerError: unknown = null;
|
|
255
342
|
|
|
256
|
-
const
|
|
343
|
+
const diffLoops = workers.map(async (w) => {
|
|
257
344
|
while (nextToAssign < maxPages && workerError === null) {
|
|
258
345
|
const idx = nextToAssign++;
|
|
259
346
|
try {
|
|
260
|
-
const
|
|
347
|
+
const [aPng, bPng, maskPng] = await Promise.all([
|
|
348
|
+
slots.a[idx]!.p,
|
|
349
|
+
slots.b[idx]!.p,
|
|
350
|
+
slots.mask[idx]!.p,
|
|
351
|
+
]);
|
|
352
|
+
const msg = await w.processDiff(idx, aPng, bPng, maskPng);
|
|
261
353
|
const result = pageResultToResult(msg);
|
|
262
354
|
const resolve = resolvers.get(idx);
|
|
263
355
|
if (resolve) {
|
|
@@ -269,6 +361,7 @@ export async function* visualizeDifferences(
|
|
|
269
361
|
}
|
|
270
362
|
} catch (e) {
|
|
271
363
|
workerError = e;
|
|
364
|
+
aborted = e;
|
|
272
365
|
for (const [, resolve] of resolvers) resolve(null as never);
|
|
273
366
|
resolvers.clear();
|
|
274
367
|
return;
|
|
@@ -294,8 +387,11 @@ export async function* visualizeDifferences(
|
|
|
294
387
|
yield r;
|
|
295
388
|
sYield.stop();
|
|
296
389
|
}
|
|
297
|
-
await Promise.all(
|
|
390
|
+
await Promise.all(diffLoops);
|
|
391
|
+
await Promise.all(renderLoops);
|
|
298
392
|
} finally {
|
|
393
|
+
aborted = aborted ?? new Error("aborted");
|
|
394
|
+
await Promise.allSettled(renderLoops);
|
|
299
395
|
for (const w of workers) w.terminate();
|
|
300
396
|
}
|
|
301
397
|
}
|
package/src/iterable.test.ts
CHANGED
|
@@ -1,20 +1,3 @@
|
|
|
1
|
-
/*
|
|
2
|
-
* Copyright (C) 2025 Koutaro Mukai
|
|
3
|
-
*
|
|
4
|
-
* This program is free software: you can redistribute it and/or modify
|
|
5
|
-
* it under the terms of the GNU General Public License as published by
|
|
6
|
-
* the Free Software Foundation, either version 3 of the License, or
|
|
7
|
-
* (at your option) any later version.
|
|
8
|
-
*
|
|
9
|
-
* This program is distributed in the hope that it will be useful,
|
|
10
|
-
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
11
|
-
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
12
|
-
* GNU General Public License for more details.
|
|
13
|
-
*
|
|
14
|
-
* You should have received a copy of the GNU General Public License
|
|
15
|
-
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
16
|
-
*/
|
|
17
|
-
|
|
18
1
|
import assert from "assert";
|
|
19
2
|
import test from "node:test";
|
|
20
3
|
|
package/src/iterable.ts
CHANGED
|
@@ -1,20 +1,3 @@
|
|
|
1
|
-
/*
|
|
2
|
-
* Copyright (C) 2025 Koutaro Mukai
|
|
3
|
-
*
|
|
4
|
-
* This program is free software: you can redistribute it and/or modify
|
|
5
|
-
* it under the terms of the GNU General Public License as published by
|
|
6
|
-
* the Free Software Foundation, either version 3 of the License, or
|
|
7
|
-
* (at your option) any later version.
|
|
8
|
-
*
|
|
9
|
-
* This program is distributed in the hope that it will be useful,
|
|
10
|
-
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
11
|
-
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
12
|
-
* GNU General Public License for more details.
|
|
13
|
-
*
|
|
14
|
-
* You should have received a copy of the GNU General Public License
|
|
15
|
-
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
16
|
-
*/
|
|
17
|
-
|
|
18
1
|
export async function* withIndex<T>(iter: AsyncIterable<T>, start = 0) {
|
|
19
2
|
let index = start;
|
|
20
3
|
for await (const item of iter) {
|
package/src/jimp.ts
CHANGED
|
@@ -1,15 +1,33 @@
|
|
|
1
1
|
import * as jimp from "jimp";
|
|
2
2
|
|
|
3
|
+
/**
|
|
4
|
+
* Narrowed view of `jimp.JimpInstance` used throughout the project.
|
|
5
|
+
*
|
|
6
|
+
* Two constraints are tightened relative to the upstream type:
|
|
7
|
+
*
|
|
8
|
+
* 1. `bitmap.data` is asserted to be `Uint8Array<ArrayBuffer>` (never
|
|
9
|
+
* SAB-backed). jimp allocates pixels via `Buffer`, which is always
|
|
10
|
+
* backed by a real ArrayBuffer in practice. Pinning the generic
|
|
11
|
+
* parameter here lets `sliceBackingBuffer` (and `postMessage` transfer
|
|
12
|
+
* lists) infer ArrayBuffer instead of ArrayBufferLike.
|
|
13
|
+
*
|
|
14
|
+
* 2. `resize` and `composite` return `JimpInstance` (this narrowed type)
|
|
15
|
+
* rather than upstream `jimp.JimpInstance`, so chaining preserves the
|
|
16
|
+
* bitmap-backing constraint above.
|
|
17
|
+
*/
|
|
3
18
|
export type JimpInstance = Pick<
|
|
4
19
|
jimp.JimpInstance,
|
|
5
|
-
| "
|
|
6
|
-
| "height"
|
|
7
|
-
| "bitmap"
|
|
8
|
-
| "getPixelColor"
|
|
9
|
-
| "setPixelColor"
|
|
10
|
-
| "resize"
|
|
11
|
-
| "composite"
|
|
20
|
+
"width" | "height" | "getPixelColor" | "setPixelColor"
|
|
12
21
|
> & {
|
|
22
|
+
bitmap: {
|
|
23
|
+
data: Uint8Array<ArrayBuffer>;
|
|
24
|
+
width: number;
|
|
25
|
+
height: number;
|
|
26
|
+
};
|
|
27
|
+
resize: (options: Parameters<jimp.JimpInstance["resize"]>[0]) => JimpInstance;
|
|
28
|
+
composite: (
|
|
29
|
+
...args: Parameters<jimp.JimpInstance["composite"]>
|
|
30
|
+
) => JimpInstance;
|
|
13
31
|
getBuffer: (mime: "image/png") => ReturnType<jimp.JimpInstance["getBuffer"]>;
|
|
14
32
|
getBase64: (mime: "image/png") => ReturnType<jimp.JimpInstance["getBase64"]>;
|
|
15
33
|
};
|
package/src/pdf.ts
CHANGED
|
@@ -1,82 +1,113 @@
|
|
|
1
|
+
import { gs } from "@u1f992/gs-wasm";
|
|
2
|
+
|
|
3
|
+
import { perf } from "./perf.ts";
|
|
4
|
+
|
|
1
5
|
/*
|
|
2
|
-
*
|
|
3
|
-
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
* the Free Software Foundation, either version 3 of the License, or
|
|
7
|
-
* (at your option) any later version.
|
|
8
|
-
*
|
|
9
|
-
* This program is distributed in the hope that it will be useful,
|
|
10
|
-
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
11
|
-
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
12
|
-
* GNU General Public License for more details.
|
|
6
|
+
* Pages are rendered with Ghostscript (gs-wasm). Each gs() invocation spins up
|
|
7
|
+
* its own worker and Ghostscript instance, so we render one page per call and
|
|
8
|
+
* let the caller drive concurrency (e.g. by rendering A/B/mask together and by
|
|
9
|
+
* running multiple page workers).
|
|
13
10
|
*
|
|
14
|
-
*
|
|
15
|
-
*
|
|
11
|
+
* Ghostscript (via the `web-worker` package) must be invoked from a context
|
|
12
|
+
* that can itself spawn a worker. In Node's `web-worker` this is only the main
|
|
13
|
+
* thread, so rendering happens on the main thread and the resulting PNG bytes
|
|
14
|
+
* are handed off to the diff workers for decoding. The PDF is placed in
|
|
15
|
+
* Ghostscript's in-memory FS as `input.pdf` and the page is read back as a PNG.
|
|
16
16
|
*/
|
|
17
|
+
const INPUT_VM_PATH = "input.pdf";
|
|
17
18
|
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
19
|
+
/**
|
|
20
|
+
* Count the pages of a PDF using Ghostscript's `pdfpagecount`. Runs with
|
|
21
|
+
* `-dNODISPLAY` (no rendering) so it is cheap relative to a page render.
|
|
22
|
+
*/
|
|
23
|
+
export async function countPages(pdf: Uint8Array): Promise<number> {
|
|
24
|
+
const span = perf.span("pdf.countPages_ms");
|
|
25
|
+
const out: number[] = [];
|
|
26
|
+
const { exitCode } = await gs({
|
|
27
|
+
args: [
|
|
28
|
+
"-q",
|
|
29
|
+
"-dNODISPLAY",
|
|
30
|
+
"-dNOSAFER",
|
|
31
|
+
"-c",
|
|
32
|
+
`(${INPUT_VM_PATH}) (r) file runpdfbegin pdfpagecount = quit`,
|
|
33
|
+
],
|
|
34
|
+
inputFiles: { [INPUT_VM_PATH]: pdf },
|
|
35
|
+
onStdout: (charCode) => {
|
|
36
|
+
if (charCode !== null) out.push(charCode);
|
|
37
|
+
},
|
|
38
|
+
});
|
|
39
|
+
span.stop();
|
|
40
|
+
if (exitCode !== 0) {
|
|
41
|
+
throw new Error(`gs countPages failed (exit ${exitCode})`);
|
|
42
|
+
}
|
|
43
|
+
const text = String.fromCharCode(...out).trim();
|
|
44
|
+
const n = Number.parseInt(text, 10);
|
|
45
|
+
if (!Number.isFinite(n) || n < 0) {
|
|
46
|
+
throw new Error(`gs countPages: unexpected output ${JSON.stringify(text)}`);
|
|
27
47
|
}
|
|
48
|
+
return n;
|
|
28
49
|
}
|
|
29
50
|
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
51
|
+
/**
|
|
52
|
+
* Render an inclusive range of (0-based) pages of a PDF to PNG bytes in a single
|
|
53
|
+
* Ghostscript invocation, returning a map keyed by 0-based page index. Batching
|
|
54
|
+
* several pages per call amortizes Ghostscript's startup and PDF parsing, which
|
|
55
|
+
* dominate a single-page render. `alpha` selects the device: `pngalpha` keeps
|
|
56
|
+
* the page background transparent (so the diff can tell "no content" from
|
|
57
|
+
* "content" via the alpha channel), while `png16m` renders opaque. Decoding to
|
|
58
|
+
* RGBA is left to the caller (the diff workers) so it can run off this thread.
|
|
59
|
+
*/
|
|
60
|
+
export async function renderPageRangePng(
|
|
61
|
+
pdf: Uint8Array,
|
|
62
|
+
firstIndex: number,
|
|
63
|
+
lastIndex: number,
|
|
64
|
+
dpi: number,
|
|
65
|
+
alpha: boolean,
|
|
66
|
+
): Promise<Map<number, Uint8Array<ArrayBuffer>>> {
|
|
67
|
+
const device = alpha ? "pngalpha" : "png16m";
|
|
68
|
+
const first = firstIndex + 1; // Ghostscript page numbers are 1-based.
|
|
69
|
+
const last = lastIndex + 1;
|
|
36
70
|
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
71
|
+
// `%d` in the output pattern is the 1-based index of the page *within this
|
|
72
|
+
// run* (it restarts at 1 regardless of -dFirstPage), so the k-th output maps
|
|
73
|
+
// back to absolute page (first + k - 1).
|
|
74
|
+
const pageCount = last - first + 1;
|
|
75
|
+
const names: string[] = [];
|
|
76
|
+
for (let k = 1; k <= pageCount; k++) names.push(`out-${k}.png`);
|
|
40
77
|
|
|
41
|
-
const
|
|
42
|
-
const
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
78
|
+
const sRender = perf.span("pdf.gsRender_ms");
|
|
79
|
+
const { exitCode, outputFiles } = await gs({
|
|
80
|
+
args: [
|
|
81
|
+
"-dNOPAUSE",
|
|
82
|
+
"-dBATCH",
|
|
83
|
+
"-dQUIET",
|
|
84
|
+
`-dFirstPage=${first}`,
|
|
85
|
+
`-dLastPage=${last}`,
|
|
86
|
+
`-sDEVICE=${device}`,
|
|
87
|
+
`-r${dpi}`,
|
|
88
|
+
"-dTextAlphaBits=4",
|
|
89
|
+
"-dGraphicsAlphaBits=4",
|
|
90
|
+
"-sOutputFile=out-%d.png",
|
|
91
|
+
INPUT_VM_PATH,
|
|
92
|
+
],
|
|
93
|
+
inputFiles: { [INPUT_VM_PATH]: pdf },
|
|
94
|
+
outputFilePaths: names,
|
|
95
|
+
});
|
|
96
|
+
sRender.stop();
|
|
97
|
+
if (exitCode !== 0) {
|
|
98
|
+
throw new Error(
|
|
99
|
+
`gs render failed (pages ${first}-${last}, exit ${exitCode})`,
|
|
100
|
+
);
|
|
101
|
+
}
|
|
102
|
+
const result = new Map<number, Uint8Array<ArrayBuffer>>();
|
|
103
|
+
for (let k = 1; k <= pageCount; k++) {
|
|
104
|
+
const png = outputFiles[`out-${k}.png`];
|
|
105
|
+
if (!png) {
|
|
106
|
+
throw new Error(`gs render produced no output (page ${first + k - 1})`);
|
|
53
107
|
}
|
|
108
|
+
result.set(firstIndex + (k - 1), png);
|
|
54
109
|
}
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
export async function pageToImage(
|
|
59
|
-
page: mupdf.Page,
|
|
60
|
-
dpi: number,
|
|
61
|
-
alpha: boolean,
|
|
62
|
-
) {
|
|
63
|
-
const zoom = dpi / 72;
|
|
64
|
-
const sToPixmap = perf.span("pdf.toPixmap_ms");
|
|
65
|
-
const pixmap = page.toPixmap(
|
|
66
|
-
[zoom, 0, 0, zoom, 0, 0],
|
|
67
|
-
mupdf.ColorSpace.DeviceRGB,
|
|
68
|
-
alpha,
|
|
69
|
-
);
|
|
70
|
-
const width = pixmap.getWidth();
|
|
71
|
-
const height = pixmap.getHeight();
|
|
72
|
-
sToPixmap.stop();
|
|
73
|
-
const sRgba = perf.span("pdf.pixmapToRGBA_ms");
|
|
74
|
-
const data = pixmapToRGBA(pixmap);
|
|
75
|
-
pixmap.destroy();
|
|
76
|
-
page.destroy();
|
|
77
|
-
sRgba.stop();
|
|
78
|
-
const sFromBitmap = perf.span("pdf.fromBitmap_ms");
|
|
79
|
-
const result = jimp.Jimp.fromBitmap({ width, height, data }) as JimpInstance;
|
|
80
|
-
sFromBitmap.stop();
|
|
110
|
+
perf.incr("pdf.gsCalls");
|
|
111
|
+
perf.incr("pdf.pagesRendered", pageCount);
|
|
81
112
|
return result;
|
|
82
113
|
}
|
package/src/perf.ts
CHANGED
|
@@ -1,20 +1,3 @@
|
|
|
1
|
-
/*
|
|
2
|
-
* Copyright (C) 2025 Koutaro Mukai
|
|
3
|
-
*
|
|
4
|
-
* This program is free software: you can redistribute it and/or modify
|
|
5
|
-
* it under the terms of the GNU General Public License as published by
|
|
6
|
-
* the Free Software Foundation, either version 3 of the License, or
|
|
7
|
-
* (at your option) any later version.
|
|
8
|
-
*
|
|
9
|
-
* This program is distributed in the hope that it will be useful,
|
|
10
|
-
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
11
|
-
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
12
|
-
* GNU General Public License for more details.
|
|
13
|
-
*
|
|
14
|
-
* You should have received a copy of the GNU General Public License
|
|
15
|
-
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
16
|
-
*/
|
|
17
|
-
|
|
18
1
|
const _enabled = (() => {
|
|
19
2
|
try {
|
|
20
3
|
if (
|
package/src/rgba-color.test.ts
CHANGED
|
@@ -1,20 +1,3 @@
|
|
|
1
|
-
/*
|
|
2
|
-
* Copyright (C) 2025 Koutaro Mukai
|
|
3
|
-
*
|
|
4
|
-
* This program is free software: you can redistribute it and/or modify
|
|
5
|
-
* it under the terms of the GNU General Public License as published by
|
|
6
|
-
* the Free Software Foundation, either version 3 of the License, or
|
|
7
|
-
* (at your option) any later version.
|
|
8
|
-
*
|
|
9
|
-
* This program is distributed in the hope that it will be useful,
|
|
10
|
-
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
11
|
-
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
12
|
-
* GNU General Public License for more details.
|
|
13
|
-
*
|
|
14
|
-
* You should have received a copy of the GNU General Public License
|
|
15
|
-
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
16
|
-
*/
|
|
17
|
-
|
|
18
1
|
import assert from "assert";
|
|
19
2
|
import test from "node:test";
|
|
20
3
|
|
package/src/rgba-color.ts
CHANGED
|
@@ -1,20 +1,3 @@
|
|
|
1
|
-
/*
|
|
2
|
-
* Copyright (C) 2025 Koutaro Mukai
|
|
3
|
-
*
|
|
4
|
-
* This program is free software: you can redistribute it and/or modify
|
|
5
|
-
* it under the terms of the GNU General Public License as published by
|
|
6
|
-
* the Free Software Foundation, either version 3 of the License, or
|
|
7
|
-
* (at your option) any later version.
|
|
8
|
-
*
|
|
9
|
-
* This program is distributed in the hope that it will be useful,
|
|
10
|
-
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
11
|
-
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
12
|
-
* GNU General Public License for more details.
|
|
13
|
-
*
|
|
14
|
-
* You should have received a copy of the GNU General Public License
|
|
15
|
-
* along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
16
|
-
*/
|
|
17
|
-
|
|
18
1
|
export type RGBAColor = [number, number, number, number];
|
|
19
2
|
|
|
20
3
|
export const parseHex = (hex: string) => {
|