@u1f992/pdfdiff 0.2.2 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/.clang-format +3 -0
  2. package/.github/workflows/gh-pages.yml +6 -6
  3. package/.vscode/extensions.json +1 -1
  4. package/.vscode/settings.json +1 -1
  5. package/LICENSE +68 -81
  6. package/README.md +7 -0
  7. package/dist/browser.js +243 -3109
  8. package/dist/browser.js.map +1 -1
  9. package/dist/cli-png-worker.d.ts.map +1 -1
  10. package/dist/cli-png-worker.js +0 -16
  11. package/dist/cli-png-worker.js.map +1 -1
  12. package/dist/cli.js +270 -3151
  13. package/dist/cli.js.map +1 -1
  14. package/dist/core.wasm +0 -0
  15. package/dist/decode.d.ts +9 -0
  16. package/dist/decode.d.ts.map +1 -0
  17. package/dist/diff.d.ts.map +1 -1
  18. package/dist/gs-wasm/gs.js +5821 -0
  19. package/dist/gs-wasm/gs.wasm +0 -0
  20. package/dist/gs-wasm/index.js +120 -0
  21. package/dist/gs-wasm/index.js.map +1 -0
  22. package/dist/gs-wasm/worker.js +764 -0
  23. package/dist/gs-wasm/worker.js.map +1 -0
  24. package/dist/image.d.ts.map +1 -1
  25. package/dist/index.d.ts +1 -0
  26. package/dist/index.d.ts.map +1 -1
  27. package/dist/index.html +1 -1
  28. package/dist/index.js +242 -3109
  29. package/dist/index.js.map +1 -1
  30. package/dist/iterable.d.ts.map +1 -1
  31. package/dist/jimp.d.ts +23 -1
  32. package/dist/jimp.d.ts.map +1 -1
  33. package/dist/pdf.d.ts +15 -4
  34. package/dist/pdf.d.ts.map +1 -1
  35. package/dist/perf.d.ts.map +1 -1
  36. package/dist/rgba-color.d.ts.map +1 -1
  37. package/dist/transferable.d.ts +6 -2
  38. package/dist/transferable.d.ts.map +1 -1
  39. package/dist/version.d.ts +1 -1
  40. package/dist/worker.d.ts +6 -8
  41. package/dist/worker.d.ts.map +1 -1
  42. package/dist/worker.js +70 -3311
  43. package/dist/worker.js.map +1 -1
  44. package/package.json +10 -5
  45. package/prettier.config.js +1 -1
  46. package/rollup.config.js +63 -5
  47. package/scripts/build-wasm.sh +32 -0
  48. package/src/browser.ts +9 -6
  49. package/src/cli-png-worker.ts +0 -17
  50. package/src/cli.ts +38 -23
  51. package/src/decode.ts +13 -0
  52. package/src/diff.ts +0 -17
  53. package/src/image.ts +1 -18
  54. package/src/index.html +1 -1
  55. package/src/index.test.ts +10 -18
  56. package/src/index.ts +170 -74
  57. package/src/iterable.test.ts +0 -17
  58. package/src/iterable.ts +0 -17
  59. package/src/jimp.ts +25 -7
  60. package/src/pdf.ts +100 -69
  61. package/src/perf.ts +0 -17
  62. package/src/rgba-color.test.ts +0 -17
  63. package/src/rgba-color.ts +0 -17
  64. package/src/transferable.ts +6 -21
  65. package/src/worker.ts +91 -87
  66. package/tsconfig.json +53 -50
  67. package/wasm/Makefile +34 -0
  68. package/wasm/bindings.cpp +76 -0
  69. package/wasm/core.c +179 -0
  70. package/wasm/core.h +69 -0
  71. package/dist/mupdf-wasm.wasm +0 -0
package/src/index.ts CHANGED
@@ -1,29 +1,13 @@
1
- /*
2
- * Copyright (C) 2025 Koutaro Mukai
3
- *
4
- * This program is free software: you can redistribute it and/or modify
5
- * it under the terms of the GNU General Public License as published by
6
- * the Free Software Foundation, either version 3 of the License, or
7
- * (at your option) any later version.
8
- *
9
- * This program is distributed in the hope that it will be useful,
10
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
11
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
- * GNU General Public License for more details.
13
- *
14
- * You should have received a copy of the GNU General Public License
15
- * along with this program. If not, see <https://www.gnu.org/licenses/>.
16
- */
17
-
18
1
  import * as jimp from "jimp";
19
- import * as mupdf from "mupdf";
20
2
  import Worker from "web-worker";
21
3
 
22
4
  import { type Pallet } from "./diff.ts";
23
5
  import { isValidAlignStrategy, type AlignStrategy } from "./image.ts";
24
6
  import { withIndex } from "./iterable.ts";
7
+ import { countPages, renderPageRangePng } from "./pdf.ts";
25
8
  import { perf } from "./perf.ts";
26
9
  import { parseHex, formatHex } from "./rgba-color.ts";
10
+ import { sliceBackingBuffer } from "./transferable.ts";
27
11
  import { VERSION } from "./version.ts";
28
12
  import type { JimpInstance } from "./jimp.ts";
29
13
  import type {
@@ -55,6 +39,15 @@ type Result = {
55
39
  modification: [number, number][];
56
40
  };
57
41
 
42
+ // Default parallelism scales with the machine: rendering and diffing run across
43
+ // several workers, so the out-of-the-box run uses the CPU rather than a single
44
+ // core. Capped at 4 to keep the default memory footprint and oversubscription
45
+ // modest; raise --workers explicitly for large jobs on big machines.
46
+ export const defaultWorkers = Math.max(
47
+ 1,
48
+ Math.min(globalThis.navigator?.hardwareConcurrency ?? 1, 4),
49
+ );
50
+
58
51
  export const defaultOptions: Options = {
59
52
  dpi: 150,
60
53
  alpha: true,
@@ -65,25 +58,9 @@ export const defaultOptions: Options = {
65
58
  deletion: [0xff, 0x57, 0x24, 0xff],
66
59
  modification: [0xff, 0xc1, 0x05, 0xff],
67
60
  },
68
- workers: 1,
61
+ workers: defaultWorkers,
69
62
  };
70
63
 
71
- function asSharedBytes(bytes: Uint8Array): Uint8Array {
72
- const isNode =
73
- typeof globalThis.process !== "undefined" &&
74
- !!globalThis.process.versions?.node;
75
- const coiOk =
76
- (globalThis as { crossOriginIsolated?: boolean }).crossOriginIsolated ===
77
- true;
78
- if (typeof SharedArrayBuffer !== "undefined" && (isNode || coiOk)) {
79
- const sab = new SharedArrayBuffer(bytes.byteLength);
80
- const view = new Uint8Array(sab);
81
- view.set(bytes);
82
- return view;
83
- }
84
- return new Uint8Array(bytes);
85
- }
86
-
87
64
  type WorkerResponse =
88
65
  | LoadedMessage
89
66
  | ReadyMessage
@@ -136,12 +113,29 @@ class WorkerHandle {
136
113
  });
137
114
  }
138
115
 
139
- processPage(index: number): Promise<PageResultMessage> {
116
+ processDiff(
117
+ index: number,
118
+ a: Uint8Array<ArrayBuffer> | null,
119
+ b: Uint8Array<ArrayBuffer> | null,
120
+ mask: Uint8Array<ArrayBuffer> | null,
121
+ ): Promise<PageResultMessage> {
140
122
  return new Promise<PageResultMessage>((resolve, reject) => {
141
123
  this.pendingResolve = resolve as (data: WorkerResponse) => void;
142
124
  this.pendingReject = reject;
143
- const msg: PageMessage = { type: "page", index };
144
- this.worker.postMessage(msg);
125
+ const aBuf = a !== null ? sliceBackingBuffer(a) : null;
126
+ const bBuf = b !== null ? sliceBackingBuffer(b) : null;
127
+ const maskBuf = mask !== null ? sliceBackingBuffer(mask) : null;
128
+ const msg: PageMessage = {
129
+ type: "page",
130
+ index,
131
+ a: aBuf,
132
+ b: bBuf,
133
+ mask: maskBuf,
134
+ };
135
+ const transfer = [aBuf, bBuf, maskBuf].filter(
136
+ (buf): buf is ArrayBuffer => buf !== null,
137
+ );
138
+ this.worker.postMessage(msg, transfer);
145
139
  });
146
140
  }
147
141
 
@@ -155,6 +149,15 @@ function workerUrl(): URL {
155
149
  return new URL(`${file}?v=${encodeURIComponent(VERSION)}`, import.meta.url);
156
150
  }
157
151
 
152
+ function unpackCoords(buf: ArrayBuffer): [number, number][] {
153
+ const arr = new Int32Array(buf);
154
+ const out: [number, number][] = new Array(arr.length >>> 1);
155
+ for (let i = 0, j = 0; j < out.length; i += 2, j++) {
156
+ out[j] = [arr[i]!, arr[i + 1]!];
157
+ }
158
+ return out;
159
+ }
160
+
158
161
  function pageResultToResult(msg: PageResultMessage): Result {
159
162
  const sP = perf.span("main.pageResultToResult_ms");
160
163
  const r = {
@@ -173,9 +176,9 @@ function pageResultToResult(msg: PageResultMessage): Result {
173
176
  height: msg.diff.height,
174
177
  data: new Uint8Array(msg.diff.data),
175
178
  }) as JimpInstance,
176
- addition: msg.addition,
177
- deletion: msg.deletion,
178
- modification: msg.modification,
179
+ addition: unpackCoords(msg.addition),
180
+ deletion: unpackCoords(msg.deletion),
181
+ modification: unpackCoords(msg.modification),
179
182
  };
180
183
  sP.stop();
181
184
  perf.incr("main.resultsReceived");
@@ -202,62 +205,151 @@ export async function* visualizeDifferences(
202
205
  workers: options?.workers ?? defaultOptions.workers,
203
206
  };
204
207
 
205
- const probe = mupdf.PDFDocument.openDocument(a, "application/pdf");
206
- const probeB = mupdf.PDFDocument.openDocument(b, "application/pdf");
207
- const probeMask =
208
+ const [aPages, bPages, maskPages] = await Promise.all([
209
+ countPages(a),
210
+ countPages(b),
208
211
  typeof merged.mask !== "undefined"
209
- ? mupdf.PDFDocument.openDocument(merged.mask, "application/pdf")
210
- : new mupdf.PDFDocument();
211
- const maxPages = Math.max(
212
- probe.countPages(),
213
- probeB.countPages(),
214
- probeMask.countPages(),
215
- );
216
- probe.destroy();
217
- probeB.destroy();
218
- probeMask.destroy();
212
+ ? countPages(merged.mask)
213
+ : Promise.resolve(0),
214
+ ]);
215
+ const maxPages = Math.max(aPages, bPages, maskPages);
219
216
 
220
217
  if (maxPages === 0) return;
221
218
 
222
- const aBytes = asSharedBytes(a);
223
- const bBytes = asSharedBytes(b);
224
- const maskBytes =
225
- typeof merged.mask !== "undefined" ? asSharedBytes(merged.mask) : null;
219
+ const mask = merged.mask;
220
+ const hasMask = typeof mask !== "undefined" && maskPages > 0;
221
+ const numDocs = hasMask ? 3 : 2;
226
222
 
227
223
  const initMsg: InitMessage = {
228
224
  type: "init",
229
- aBytes,
230
- bBytes,
231
- maskBytes,
232
- dpi: merged.dpi,
233
- alpha: merged.alpha,
234
225
  pallet: merged.pallet,
235
226
  align: merged.align,
236
227
  };
237
228
 
238
229
  const N = Math.max(1, Math.min(merged.workers, maxPages));
239
230
  const url = workerUrl();
240
- const worker0 = new WorkerHandle(url);
241
- await worker0.init(initMsg);
242
-
243
- const buffered = new Map<number, Result>();
244
- let nextToAssign = 0;
245
-
246
- const workers: WorkerHandle[] = [worker0];
247
- for (let i = 1; i < N; i++) {
231
+ const workers: WorkerHandle[] = [];
232
+ for (let i = 0; i < N; i++) {
248
233
  const w = new WorkerHandle(url);
249
234
  await w.init(initMsg);
250
235
  workers.push(w);
251
236
  }
252
237
 
238
+ let aborted: unknown = null;
239
+
240
+ // One PNG slot per page per document, fulfilled as render chunks complete.
241
+ // Pages past a document's page count resolve to null (an empty/transparent
242
+ // page). The defensive catch keeps a chunk failure from surfacing as an
243
+ // unhandled rejection before a diff lane awaits the slot.
244
+ type Slot = {
245
+ p: Promise<Uint8Array<ArrayBuffer> | null>;
246
+ resolve: (v: Uint8Array<ArrayBuffer> | null) => void;
247
+ reject: (e: unknown) => void;
248
+ };
249
+ const makeSlots = (count: number): Slot[] =>
250
+ Array.from({ length: maxPages }, (_, i) => {
251
+ if (i >= count) {
252
+ return {
253
+ p: Promise.resolve(null),
254
+ resolve: () => {},
255
+ reject: () => {},
256
+ };
257
+ }
258
+ let resolve!: (v: Uint8Array<ArrayBuffer> | null) => void;
259
+ let reject!: (e: unknown) => void;
260
+ const p = new Promise<Uint8Array<ArrayBuffer> | null>((res, rej) => {
261
+ resolve = res;
262
+ reject = rej;
263
+ });
264
+ p.catch(() => {});
265
+ return { p, resolve, reject };
266
+ });
267
+ const slots = {
268
+ a: makeSlots(aPages),
269
+ b: makeSlots(bPages),
270
+ mask: makeSlots(hasMask ? maskPages : 0),
271
+ };
272
+
273
+ // Render chunk tasks: batch several pages per gs() call to amortize startup,
274
+ // interleaving A/B/mask by page range so the early pages of every document
275
+ // become available together (which keeps the diff stage fed).
276
+ // Render concurrency. Aim for ~2x as many chunks as render slots so pages
277
+ // arrive in waves and the diff/decode stage overlaps later renders instead of
278
+ // waiting for one big batch. A floor keeps each chunk large enough to amortize
279
+ // Ghostscript's per-call startup: when there are many slots relative to pages,
280
+ // the slots are already saturated, so batching beats finer streaming.
281
+ const MIN_CHUNK = 4;
282
+ const R = Math.max(merged.workers, numDocs);
283
+ const totalRenderPages = aPages + bPages + (hasMask ? maskPages : 0);
284
+ const chunkSize = Math.max(
285
+ 1,
286
+ Math.min(
287
+ maxPages,
288
+ Math.max(MIN_CHUNK, Math.ceil(totalRenderPages / (2 * R))),
289
+ ),
290
+ );
291
+ type Task = { bytes: Uint8Array; start: number; end: number; slots: Slot[] };
292
+ const tasks: Task[] = [];
293
+ const pushChunk = (
294
+ bytes: Uint8Array | undefined,
295
+ count: number,
296
+ target: Slot[],
297
+ start: number,
298
+ ) => {
299
+ if (bytes === undefined || start >= count) return;
300
+ tasks.push({
301
+ bytes,
302
+ start,
303
+ end: Math.min(start + chunkSize, count) - 1,
304
+ slots: target,
305
+ });
306
+ };
307
+ for (let start = 0; start < maxPages; start += chunkSize) {
308
+ pushChunk(a, aPages, slots.a, start);
309
+ pushChunk(b, bPages, slots.b, start);
310
+ if (hasMask) pushChunk(mask, maskPages, slots.mask, start);
311
+ }
312
+
313
+ let taskIdx = 0;
314
+ const renderLoops = Array.from(
315
+ { length: Math.min(R, tasks.length) },
316
+ async () => {
317
+ while (taskIdx < tasks.length && aborted === null) {
318
+ const t = tasks[taskIdx++]!;
319
+ try {
320
+ const pngs = await renderPageRangePng(
321
+ t.bytes,
322
+ t.start,
323
+ t.end,
324
+ merged.dpi,
325
+ merged.alpha,
326
+ );
327
+ for (let i = t.start; i <= t.end; i++) {
328
+ t.slots[i]!.resolve(pngs.get(i) ?? null);
329
+ }
330
+ } catch (e) {
331
+ aborted = e;
332
+ for (let i = t.start; i <= t.end; i++) t.slots[i]!.reject(e);
333
+ }
334
+ }
335
+ },
336
+ );
337
+
338
+ const buffered = new Map<number, Result>();
339
+ let nextToAssign = 0;
253
340
  const resolvers = new Map<number, (r: Result) => void>();
254
341
  let workerError: unknown = null;
255
342
 
256
- const loops = workers.map(async (w) => {
343
+ const diffLoops = workers.map(async (w) => {
257
344
  while (nextToAssign < maxPages && workerError === null) {
258
345
  const idx = nextToAssign++;
259
346
  try {
260
- const msg = await w.processPage(idx);
347
+ const [aPng, bPng, maskPng] = await Promise.all([
348
+ slots.a[idx]!.p,
349
+ slots.b[idx]!.p,
350
+ slots.mask[idx]!.p,
351
+ ]);
352
+ const msg = await w.processDiff(idx, aPng, bPng, maskPng);
261
353
  const result = pageResultToResult(msg);
262
354
  const resolve = resolvers.get(idx);
263
355
  if (resolve) {
@@ -269,6 +361,7 @@ export async function* visualizeDifferences(
269
361
  }
270
362
  } catch (e) {
271
363
  workerError = e;
364
+ aborted = e;
272
365
  for (const [, resolve] of resolvers) resolve(null as never);
273
366
  resolvers.clear();
274
367
  return;
@@ -294,8 +387,11 @@ export async function* visualizeDifferences(
294
387
  yield r;
295
388
  sYield.stop();
296
389
  }
297
- await Promise.all(loops);
390
+ await Promise.all(diffLoops);
391
+ await Promise.all(renderLoops);
298
392
  } finally {
393
+ aborted = aborted ?? new Error("aborted");
394
+ await Promise.allSettled(renderLoops);
299
395
  for (const w of workers) w.terminate();
300
396
  }
301
397
  }
@@ -1,20 +1,3 @@
1
- /*
2
- * Copyright (C) 2025 Koutaro Mukai
3
- *
4
- * This program is free software: you can redistribute it and/or modify
5
- * it under the terms of the GNU General Public License as published by
6
- * the Free Software Foundation, either version 3 of the License, or
7
- * (at your option) any later version.
8
- *
9
- * This program is distributed in the hope that it will be useful,
10
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
11
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
- * GNU General Public License for more details.
13
- *
14
- * You should have received a copy of the GNU General Public License
15
- * along with this program. If not, see <https://www.gnu.org/licenses/>.
16
- */
17
-
18
1
  import assert from "assert";
19
2
  import test from "node:test";
20
3
 
package/src/iterable.ts CHANGED
@@ -1,20 +1,3 @@
1
- /*
2
- * Copyright (C) 2025 Koutaro Mukai
3
- *
4
- * This program is free software: you can redistribute it and/or modify
5
- * it under the terms of the GNU General Public License as published by
6
- * the Free Software Foundation, either version 3 of the License, or
7
- * (at your option) any later version.
8
- *
9
- * This program is distributed in the hope that it will be useful,
10
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
11
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
- * GNU General Public License for more details.
13
- *
14
- * You should have received a copy of the GNU General Public License
15
- * along with this program. If not, see <https://www.gnu.org/licenses/>.
16
- */
17
-
18
1
  export async function* withIndex<T>(iter: AsyncIterable<T>, start = 0) {
19
2
  let index = start;
20
3
  for await (const item of iter) {
package/src/jimp.ts CHANGED
@@ -1,15 +1,33 @@
1
1
  import * as jimp from "jimp";
2
2
 
3
+ /**
4
+ * Narrowed view of `jimp.JimpInstance` used throughout the project.
5
+ *
6
+ * Two constraints are tightened relative to the upstream type:
7
+ *
8
+ * 1. `bitmap.data` is asserted to be `Uint8Array<ArrayBuffer>` (never
9
+ * SAB-backed). jimp allocates pixels via `Buffer`, which is always
10
+ * backed by a real ArrayBuffer in practice. Pinning the generic
11
+ * parameter here lets `sliceBackingBuffer` (and `postMessage` transfer
12
+ * lists) infer ArrayBuffer instead of ArrayBufferLike.
13
+ *
14
+ * 2. `resize` and `composite` return `JimpInstance` (this narrowed type)
15
+ * rather than upstream `jimp.JimpInstance`, so chaining preserves the
16
+ * bitmap-backing constraint above.
17
+ */
3
18
  export type JimpInstance = Pick<
4
19
  jimp.JimpInstance,
5
- | "width"
6
- | "height"
7
- | "bitmap"
8
- | "getPixelColor"
9
- | "setPixelColor"
10
- | "resize"
11
- | "composite"
20
+ "width" | "height" | "getPixelColor" | "setPixelColor"
12
21
  > & {
22
+ bitmap: {
23
+ data: Uint8Array<ArrayBuffer>;
24
+ width: number;
25
+ height: number;
26
+ };
27
+ resize: (options: Parameters<jimp.JimpInstance["resize"]>[0]) => JimpInstance;
28
+ composite: (
29
+ ...args: Parameters<jimp.JimpInstance["composite"]>
30
+ ) => JimpInstance;
13
31
  getBuffer: (mime: "image/png") => ReturnType<jimp.JimpInstance["getBuffer"]>;
14
32
  getBase64: (mime: "image/png") => ReturnType<jimp.JimpInstance["getBase64"]>;
15
33
  };
package/src/pdf.ts CHANGED
@@ -1,82 +1,113 @@
1
+ import { gs } from "@u1f992/gs-wasm";
2
+
3
+ import { perf } from "./perf.ts";
4
+
1
5
  /*
2
- * Copyright (C) 2025 Koutaro Mukai
3
- *
4
- * This program is free software: you can redistribute it and/or modify
5
- * it under the terms of the GNU General Public License as published by
6
- * the Free Software Foundation, either version 3 of the License, or
7
- * (at your option) any later version.
8
- *
9
- * This program is distributed in the hope that it will be useful,
10
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
11
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
- * GNU General Public License for more details.
6
+ * Pages are rendered with Ghostscript (gs-wasm). Each gs() invocation spins up
7
+ * its own worker and Ghostscript instance, so we render one page per call and
8
+ * let the caller drive concurrency (e.g. by rendering A/B/mask together and by
9
+ * running multiple page workers).
13
10
  *
14
- * You should have received a copy of the GNU General Public License
15
- * along with this program. If not, see <https://www.gnu.org/licenses/>.
11
+ * Ghostscript (via the `web-worker` package) must be invoked from a context
12
+ * that can itself spawn a worker. In Node's `web-worker` this is only the main
13
+ * thread, so rendering happens on the main thread and the resulting PNG bytes
14
+ * are handed off to the diff workers for decoding. The PDF is placed in
15
+ * Ghostscript's in-memory FS as `input.pdf` and the page is read back as a PNG.
16
16
  */
17
+ const INPUT_VM_PATH = "input.pdf";
17
18
 
18
- import * as jimp from "jimp";
19
- import * as mupdf from "mupdf";
20
-
21
- import type { JimpInstance } from "./jimp.ts";
22
- import { perf } from "./perf.ts";
23
-
24
- export function* loadPages(pdf: mupdf.Document) {
25
- for (let i = 0; i < pdf.countPages(); i++) {
26
- yield pdf.loadPage(i);
19
+ /**
20
+ * Count the pages of a PDF using Ghostscript's `pdfpagecount`. Runs with
21
+ * `-dNODISPLAY` (no rendering) so it is cheap relative to a page render.
22
+ */
23
+ export async function countPages(pdf: Uint8Array): Promise<number> {
24
+ const span = perf.span("pdf.countPages_ms");
25
+ const out: number[] = [];
26
+ const { exitCode } = await gs({
27
+ args: [
28
+ "-q",
29
+ "-dNODISPLAY",
30
+ "-dNOSAFER",
31
+ "-c",
32
+ `(${INPUT_VM_PATH}) (r) file runpdfbegin pdfpagecount = quit`,
33
+ ],
34
+ inputFiles: { [INPUT_VM_PATH]: pdf },
35
+ onStdout: (charCode) => {
36
+ if (charCode !== null) out.push(charCode);
37
+ },
38
+ });
39
+ span.stop();
40
+ if (exitCode !== 0) {
41
+ throw new Error(`gs countPages failed (exit ${exitCode})`);
42
+ }
43
+ const text = String.fromCharCode(...out).trim();
44
+ const n = Number.parseInt(text, 10);
45
+ if (!Number.isFinite(n) || n < 0) {
46
+ throw new Error(`gs countPages: unexpected output ${JSON.stringify(text)}`);
27
47
  }
48
+ return n;
28
49
  }
29
50
 
30
- function pixmapToRGBA(pixmap: mupdf.Pixmap): Uint8Array {
31
- const width = pixmap.getWidth();
32
- const height = pixmap.getHeight();
33
- const stride = pixmap.getStride();
34
- const hasAlpha = pixmap.getAlpha() !== 0;
35
- const samples = pixmap.getPixels();
51
+ /**
52
+ * Render an inclusive range of (0-based) pages of a PDF to PNG bytes in a single
53
+ * Ghostscript invocation, returning a map keyed by 0-based page index. Batching
54
+ * several pages per call amortizes Ghostscript's startup and PDF parsing, which
55
+ * dominate a single-page render. `alpha` selects the device: `pngalpha` keeps
56
+ * the page background transparent (so the diff can tell "no content" from
57
+ * "content" via the alpha channel), while `png16m` renders opaque. Decoding to
58
+ * RGBA is left to the caller (the diff workers) so it can run off this thread.
59
+ */
60
+ export async function renderPageRangePng(
61
+ pdf: Uint8Array,
62
+ firstIndex: number,
63
+ lastIndex: number,
64
+ dpi: number,
65
+ alpha: boolean,
66
+ ): Promise<Map<number, Uint8Array<ArrayBuffer>>> {
67
+ const device = alpha ? "pngalpha" : "png16m";
68
+ const first = firstIndex + 1; // Ghostscript page numbers are 1-based.
69
+ const last = lastIndex + 1;
36
70
 
37
- if (hasAlpha && stride === width * 4) {
38
- return new Uint8Array(samples);
39
- }
71
+ // `%d` in the output pattern is the 1-based index of the page *within this
72
+ // run* (it restarts at 1 regardless of -dFirstPage), so the k-th output maps
73
+ // back to absolute page (first + k - 1).
74
+ const pageCount = last - first + 1;
75
+ const names: string[] = [];
76
+ for (let k = 1; k <= pageCount; k++) names.push(`out-${k}.png`);
40
77
 
41
- const out = new Uint8Array(width * height * 4);
42
- const srcBpp = pixmap.getNumberOfComponents() + (hasAlpha ? 1 : 0);
43
- for (let y = 0; y < height; y++) {
44
- const srcRow = y * stride;
45
- const dstRow = y * width * 4;
46
- for (let x = 0; x < width; x++) {
47
- const s = srcRow + x * srcBpp;
48
- const d = dstRow + x * 4;
49
- out[d] = samples[s]!;
50
- out[d + 1] = samples[s + 1]!;
51
- out[d + 2] = samples[s + 2]!;
52
- out[d + 3] = hasAlpha ? samples[s + 3]! : 255;
78
+ const sRender = perf.span("pdf.gsRender_ms");
79
+ const { exitCode, outputFiles } = await gs({
80
+ args: [
81
+ "-dNOPAUSE",
82
+ "-dBATCH",
83
+ "-dQUIET",
84
+ `-dFirstPage=${first}`,
85
+ `-dLastPage=${last}`,
86
+ `-sDEVICE=${device}`,
87
+ `-r${dpi}`,
88
+ "-dTextAlphaBits=4",
89
+ "-dGraphicsAlphaBits=4",
90
+ "-sOutputFile=out-%d.png",
91
+ INPUT_VM_PATH,
92
+ ],
93
+ inputFiles: { [INPUT_VM_PATH]: pdf },
94
+ outputFilePaths: names,
95
+ });
96
+ sRender.stop();
97
+ if (exitCode !== 0) {
98
+ throw new Error(
99
+ `gs render failed (pages ${first}-${last}, exit ${exitCode})`,
100
+ );
101
+ }
102
+ const result = new Map<number, Uint8Array<ArrayBuffer>>();
103
+ for (let k = 1; k <= pageCount; k++) {
104
+ const png = outputFiles[`out-${k}.png`];
105
+ if (!png) {
106
+ throw new Error(`gs render produced no output (page ${first + k - 1})`);
53
107
  }
108
+ result.set(firstIndex + (k - 1), png);
54
109
  }
55
- return out;
56
- }
57
-
58
- export async function pageToImage(
59
- page: mupdf.Page,
60
- dpi: number,
61
- alpha: boolean,
62
- ) {
63
- const zoom = dpi / 72;
64
- const sToPixmap = perf.span("pdf.toPixmap_ms");
65
- const pixmap = page.toPixmap(
66
- [zoom, 0, 0, zoom, 0, 0],
67
- mupdf.ColorSpace.DeviceRGB,
68
- alpha,
69
- );
70
- const width = pixmap.getWidth();
71
- const height = pixmap.getHeight();
72
- sToPixmap.stop();
73
- const sRgba = perf.span("pdf.pixmapToRGBA_ms");
74
- const data = pixmapToRGBA(pixmap);
75
- pixmap.destroy();
76
- page.destroy();
77
- sRgba.stop();
78
- const sFromBitmap = perf.span("pdf.fromBitmap_ms");
79
- const result = jimp.Jimp.fromBitmap({ width, height, data }) as JimpInstance;
80
- sFromBitmap.stop();
110
+ perf.incr("pdf.gsCalls");
111
+ perf.incr("pdf.pagesRendered", pageCount);
81
112
  return result;
82
113
  }
package/src/perf.ts CHANGED
@@ -1,20 +1,3 @@
1
- /*
2
- * Copyright (C) 2025 Koutaro Mukai
3
- *
4
- * This program is free software: you can redistribute it and/or modify
5
- * it under the terms of the GNU General Public License as published by
6
- * the Free Software Foundation, either version 3 of the License, or
7
- * (at your option) any later version.
8
- *
9
- * This program is distributed in the hope that it will be useful,
10
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
11
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
- * GNU General Public License for more details.
13
- *
14
- * You should have received a copy of the GNU General Public License
15
- * along with this program. If not, see <https://www.gnu.org/licenses/>.
16
- */
17
-
18
1
  const _enabled = (() => {
19
2
  try {
20
3
  if (
@@ -1,20 +1,3 @@
1
- /*
2
- * Copyright (C) 2025 Koutaro Mukai
3
- *
4
- * This program is free software: you can redistribute it and/or modify
5
- * it under the terms of the GNU General Public License as published by
6
- * the Free Software Foundation, either version 3 of the License, or
7
- * (at your option) any later version.
8
- *
9
- * This program is distributed in the hope that it will be useful,
10
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
11
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
- * GNU General Public License for more details.
13
- *
14
- * You should have received a copy of the GNU General Public License
15
- * along with this program. If not, see <https://www.gnu.org/licenses/>.
16
- */
17
-
18
1
  import assert from "assert";
19
2
  import test from "node:test";
20
3
 
package/src/rgba-color.ts CHANGED
@@ -1,20 +1,3 @@
1
- /*
2
- * Copyright (C) 2025 Koutaro Mukai
3
- *
4
- * This program is free software: you can redistribute it and/or modify
5
- * it under the terms of the GNU General Public License as published by
6
- * the Free Software Foundation, either version 3 of the License, or
7
- * (at your option) any later version.
8
- *
9
- * This program is distributed in the hope that it will be useful,
10
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
11
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
- * GNU General Public License for more details.
13
- *
14
- * You should have received a copy of the GNU General Public License
15
- * along with this program. If not, see <https://www.gnu.org/licenses/>.
16
- */
17
-
18
1
  export type RGBAColor = [number, number, number, number];
19
2
 
20
3
  export const parseHex = (hex: string) => {