@u1f992/pdfdiff 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. package/.clang-format +3 -0
  2. package/.github/workflows/gh-pages.yml +6 -1
  3. package/LICENSE +68 -81
  4. package/dist/browser.js +1405 -3099
  5. package/dist/browser.js.map +1 -1
  6. package/dist/cli-png-worker.d.ts +13 -0
  7. package/dist/cli-png-worker.d.ts.map +1 -0
  8. package/dist/cli-png-worker.js +287 -0
  9. package/dist/cli-png-worker.js.map +1 -0
  10. package/dist/cli.js +401 -3110
  11. package/dist/cli.js.map +1 -1
  12. package/dist/core.wasm +0 -0
  13. package/dist/decode.d.ts +9 -0
  14. package/dist/decode.d.ts.map +1 -0
  15. package/dist/diff.d.ts +2 -1
  16. package/dist/diff.d.ts.map +1 -1
  17. package/dist/gs-wasm/gs.js +5821 -0
  18. package/dist/gs-wasm/gs.wasm +0 -0
  19. package/dist/gs-wasm/index.js +120 -0
  20. package/dist/gs-wasm/index.js.map +1 -0
  21. package/dist/gs-wasm/worker.js +764 -0
  22. package/dist/gs-wasm/worker.js.map +1 -0
  23. package/dist/image.d.ts.map +1 -1
  24. package/dist/index.d.ts +3 -1
  25. package/dist/index.d.ts.map +1 -1
  26. package/dist/index.html +6 -1
  27. package/dist/index.js +310 -3094
  28. package/dist/index.js.map +1 -1
  29. package/dist/iterable.d.ts.map +1 -1
  30. package/dist/jimp.d.ts +23 -1
  31. package/dist/jimp.d.ts.map +1 -1
  32. package/dist/pdf.d.ts +15 -4
  33. package/dist/pdf.d.ts.map +1 -1
  34. package/dist/perf.d.ts +16 -0
  35. package/dist/perf.d.ts.map +1 -0
  36. package/dist/rgba-color.d.ts.map +1 -1
  37. package/dist/squoosh_png_bg.wasm +0 -0
  38. package/dist/style.css +12 -0
  39. package/dist/transferable.d.ts +11 -0
  40. package/dist/transferable.d.ts.map +1 -0
  41. package/dist/version.d.ts +1 -1
  42. package/dist/worker.d.ts +8 -8
  43. package/dist/worker.d.ts.map +1 -1
  44. package/dist/worker.js +144 -3210
  45. package/dist/worker.js.map +1 -1
  46. package/package.json +11 -4
  47. package/rollup.config.js +83 -5
  48. package/scripts/build-wasm.sh +32 -0
  49. package/src/browser.ts +122 -9
  50. package/src/cli-png-worker.ts +42 -0
  51. package/src/cli.ts +113 -34
  52. package/src/decode.ts +15 -0
  53. package/src/diff.ts +99 -51
  54. package/src/image.ts +4 -18
  55. package/src/index.html +6 -1
  56. package/src/index.test.ts +10 -18
  57. package/src/index.ts +176 -76
  58. package/src/iterable.test.ts +0 -17
  59. package/src/iterable.ts +0 -17
  60. package/src/jimp.ts +25 -7
  61. package/src/pdf.ts +99 -62
  62. package/src/perf.ts +77 -0
  63. package/src/rgba-color.test.ts +0 -17
  64. package/src/rgba-color.ts +0 -17
  65. package/src/style.css +12 -0
  66. package/src/transferable.ts +15 -0
  67. package/src/worker.ts +106 -100
  68. package/wasm/Makefile +34 -0
  69. package/wasm/bindings.cpp +76 -0
  70. package/wasm/core.c +176 -0
  71. package/wasm/core.h +69 -0
  72. package/dist/mupdf-wasm.wasm +0 -0
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@u1f992/pdfdiff",
3
- "version": "0.2.1",
3
+ "version": "0.3.0",
4
4
  "description": "Visualize and quantify differences between two PDF files.",
5
5
  "main": "dist/index.js",
6
6
  "bin": {
@@ -10,7 +10,11 @@
10
10
  "test": "node --test",
11
11
  "test:cli": "node src/cli.js test/a.pdf test/b.pdf out --mask test/mask.pdf --dpi 300 && echo \"expected: Page 1, Addition: 7500, Deletion: 7500, Modification: 7500\"",
12
12
  "build:version": "node scripts/version.ts",
13
- "build": "npm run build:version && rollup -c",
13
+ "build:wasm": "docker run --rm --volume .:/workdir --workdir /workdir --user root --entrypoint bash emscripten/emsdk:5.0.7 scripts/build-wasm.sh",
14
+ "format:wasm": "docker run --rm --volume .:/workdir --workdir /workdir --user root --entrypoint bash emscripten/emsdk:5.0.7 scripts/build-wasm.sh format",
15
+ "tidy:wasm": "docker run --rm --volume .:/workdir --workdir /workdir --user root --entrypoint bash emscripten/emsdk:5.0.7 scripts/build-wasm.sh tidy",
16
+ "clean:wasm": "docker run --rm --volume .:/workdir --workdir /workdir --user root --entrypoint bash emscripten/emsdk:5.0.7 scripts/build-wasm.sh clean",
17
+ "build": "npm run build:wasm && npm run build:version && rollup -c",
14
18
  "serve": "npm run build && http-server dist"
15
19
  },
16
20
  "repository": {
@@ -19,7 +23,7 @@
19
23
  },
20
24
  "keywords": [],
21
25
  "author": "Koutaro Mukai",
22
- "license": "GPL-3.0",
26
+ "license": "AGPL-3.0-only",
23
27
  "bugs": {
24
28
  "url": "https://github.com/u1f992/pdfdiff/issues"
25
29
  },
@@ -32,8 +36,10 @@
32
36
  "@rollup/plugin-typescript": "^12.1.4",
33
37
  "@types/node": "^22.18.7",
34
38
  "coi-serviceworker": "^0.1.7",
39
+ "fflate": "^0.8.2",
35
40
  "http-server": "^14.1.1",
36
41
  "nodehog": "^0.1.2",
42
+ "path-browserify": "^1.0.1",
37
43
  "prettier": "^3.5.3",
38
44
  "rollup": "^4.42.0",
39
45
  "rollup-plugin-copy": "^3.5.0",
@@ -41,9 +47,10 @@
41
47
  "typescript": "^5.9.2"
42
48
  },
43
49
  "dependencies": {
50
+ "@jsquash/png": "^3.1.1",
51
+ "@u1f992/gs-wasm": "0.3.0",
44
52
  "ix": "^7.0.0",
45
53
  "jimp": "^1.6.0",
46
- "mupdf": "^1.26.2",
47
54
  "web-worker": "^1.5.0"
48
55
  }
49
56
  }
package/rollup.config.js CHANGED
@@ -11,10 +11,6 @@ const plugins = [
11
11
  commonjs(),
12
12
  copy({
13
13
  targets: [
14
- {
15
- src: "node_modules/mupdf/dist/mupdf-wasm.wasm",
16
- dest: "dist",
17
- },
18
14
  {
19
15
  src: "node_modules/coi-serviceworker/coi-serviceworker.min.js",
20
16
  dest: "dist",
@@ -27,10 +23,30 @@ const plugins = [
27
23
  src: "src/style.css",
28
24
  dest: "dist",
29
25
  },
26
+ {
27
+ src: "src/wasm/core.wasm",
28
+ dest: "dist",
29
+ },
30
+ // Ghostscript (gs-wasm) Emscripten glue + binary. The `index.js`/
31
+ // `worker.js` ESM wrappers are re-bundled (below) into dist/gs-wasm/ so
32
+ // their bare imports (`web-worker`, `upath`) resolve in the browser; the
33
+ // large glue is shipped as-is and imported as a sibling by worker.js.
34
+ {
35
+ src: [
36
+ "node_modules/@u1f992/gs-wasm/dist/gs.js",
37
+ "node_modules/@u1f992/gs-wasm/dist/gs.wasm",
38
+ ],
39
+ dest: "dist/gs-wasm",
40
+ },
30
41
  ],
31
42
  }),
32
43
  ];
33
44
 
45
+ // gs-wasm is kept external (not bundled): the CLI resolves it from node_modules,
46
+ // while the browser bundles load it from the copied dist/gs-wasm/ folder.
47
+ const GS_WASM = "@u1f992/gs-wasm";
48
+ const gsWasmPaths = { [GS_WASM]: "./gs-wasm/index.js" };
49
+
34
50
  const jimpAlias = alias({
35
51
  entries: [
36
52
  {
@@ -51,12 +67,26 @@ const webWorkerAlias = alias({
51
67
  ],
52
68
  });
53
69
 
70
+ // gs-wasm's worker depends on `upath`, which imports node's `path`. Shim it for
71
+ // the browser. Scoped to the gs-wasm worker bundle only so the Node CLI bundles
72
+ // keep the real `path`.
73
+ const pathAlias = alias({
74
+ entries: [
75
+ {
76
+ find: "path",
77
+ replacement: path.resolve("node_modules/path-browserify/index.js"),
78
+ },
79
+ ],
80
+ });
81
+
54
82
  const rollupConfig = defineConfig([
55
83
  {
56
84
  input: "src/index.ts",
85
+ external: [GS_WASM],
57
86
  output: {
58
87
  file: "dist/index.js",
59
88
  sourcemap: true,
89
+ paths: gsWasmPaths,
60
90
  },
61
91
  plugins: [
62
92
  jimpAlias,
@@ -83,18 +113,40 @@ const rollupConfig = defineConfig([
83
113
  file: "dist/cli.js",
84
114
  sourcemap: true,
85
115
  },
86
- external: ["web-worker"],
116
+ external: ["web-worker", GS_WASM],
87
117
  plugins: [
88
118
  typescript({ tsconfig: "./tsconfig.json" }),
89
119
  nodeResolve(),
90
120
  commonjs(),
91
121
  ],
92
122
  },
123
+ {
124
+ input: "src/cli-png-worker.ts",
125
+ output: {
126
+ file: "dist/cli-png-worker.js",
127
+ sourcemap: true,
128
+ },
129
+ plugins: [
130
+ typescript({ tsconfig: "./tsconfig.json" }),
131
+ nodeResolve(),
132
+ commonjs(),
133
+ copy({
134
+ targets: [
135
+ {
136
+ src: "node_modules/@jsquash/png/codec/pkg/squoosh_png_bg.wasm",
137
+ dest: "dist",
138
+ },
139
+ ],
140
+ }),
141
+ ],
142
+ },
93
143
  {
94
144
  input: "src/browser.ts",
145
+ external: [GS_WASM],
95
146
  output: {
96
147
  file: "dist/browser.js",
97
148
  sourcemap: true,
149
+ paths: gsWasmPaths,
98
150
  },
99
151
  plugins: [
100
152
  jimpAlias,
@@ -103,6 +155,32 @@ const rollupConfig = defineConfig([
103
155
  ...plugins,
104
156
  ],
105
157
  },
158
+ // Re-bundle gs-wasm's ESM wrappers into dist/gs-wasm/ with their bare
159
+ // dependencies resolved, so the browser can load them as plain static files.
160
+ // The main-thread wrapper spawns ./worker.js (sibling) via new URL(...).
161
+ {
162
+ input: "node_modules/@u1f992/gs-wasm/dist/index.js",
163
+ output: {
164
+ file: "dist/gs-wasm/index.js",
165
+ format: "es",
166
+ sourcemap: true,
167
+ },
168
+ plugins: [webWorkerAlias, nodeResolve(), commonjs()],
169
+ },
170
+ // The worker wrapper imports the (large) emscripten glue as a sibling
171
+ // ./gs.js, which is copied verbatim; everything else (upath, status) is
172
+ // bundled in.
173
+ {
174
+ input: "node_modules/@u1f992/gs-wasm/dist/worker.js",
175
+ external: (id) => id === "./gs.js" || id.endsWith("/gs.js"),
176
+ output: {
177
+ file: "dist/gs-wasm/worker.js",
178
+ format: "es",
179
+ sourcemap: true,
180
+ paths: { "./gs.js": "./gs.js" },
181
+ },
182
+ plugins: [pathAlias, nodeResolve(), commonjs()],
183
+ },
106
184
  ]);
107
185
 
108
186
  export default rollupConfig;
@@ -0,0 +1,32 @@
1
+ #!/usr/bin/env bash
2
+
3
+ set -eEuo pipefail
4
+ shopt -s inherit_errexit
5
+ trap 'echo "Error on line $LINENO: $BASH_COMMAND (exit $?)" >&2' ERR
6
+
7
+ # Align the ubuntu user in the emscripten/emsdk image with the mounted
8
+ # workdir's owner so files we create are owned by the host user, and so we
9
+ # can write back into /workdir regardless of the host user's UID. Without
10
+ # this the script fails on hosts whose user is not UID 1000 (e.g. GitHub
11
+ # Actions' runner is UID 1001).
12
+ HOST_UID=$(stat --format='%u' /workdir)
13
+ HOST_GID=$(stat --format='%g' /workdir)
14
+ if [ "$HOST_UID" -ne 0 ] && [ "$HOST_UID" -ne "$(id -u ubuntu)" ]; then
15
+ groupmod --non-unique --gid "$HOST_GID" ubuntu
16
+ usermod --non-unique --uid "$HOST_UID" --gid "$HOST_GID" ubuntu
17
+ chown --recursive "$HOST_UID:$HOST_GID" /home/ubuntu
18
+ fi
19
+
20
+ TARGET=${1:-all}
21
+
22
+ # clang-format / clang-tidy are only needed for the format and tidy targets
23
+ # and are not part of the emscripten/emsdk base image; install on demand.
24
+ case "$TARGET" in
25
+ format | tidy)
26
+ DEBIAN_FRONTEND=noninteractive apt-get update -qq >/dev/null
27
+ DEBIAN_FRONTEND=noninteractive apt-get install -qq --yes --no-install-recommends \
28
+ clang-format clang-tidy >/dev/null
29
+ ;;
30
+ esac
31
+
32
+ exec runuser -u ubuntu -- bash -c "cd /workdir && make -C wasm $TARGET"
package/src/browser.ts CHANGED
@@ -1,11 +1,81 @@
1
1
  /// <reference lib="dom" />
2
2
 
3
+ import encode from "@jsquash/png/encode";
4
+ import { zipSync } from "fflate";
5
+
3
6
  import * as pdfdiff from "./index.ts";
4
7
  import { VERSION } from "./version.ts";
5
8
 
9
+ async function encodeBitmapToPng(img: {
10
+ width: number;
11
+ height: number;
12
+ bitmap: {
13
+ data: Uint8Array<ArrayBuffer> | Uint8ClampedArray<ArrayBuffer>;
14
+ };
15
+ }): Promise<Uint8Array> {
16
+ const data = img.bitmap.data;
17
+ const view: Uint8ClampedArray<ArrayBuffer> =
18
+ data instanceof Uint8ClampedArray
19
+ ? data
20
+ : new Uint8ClampedArray(data.buffer, data.byteOffset, data.byteLength);
21
+ const png = await encode(new ImageData(view, img.width, img.height));
22
+ return new Uint8Array(png);
23
+ }
24
+
6
25
  const versionEl = document.getElementById("version");
7
26
  if (versionEl) versionEl.textContent = "v" + VERSION;
8
27
 
28
+ const hideNoDiffEl = document.getElementById(
29
+ "hide-no-diff",
30
+ ) as HTMLInputElement | null;
31
+ const applyHideNoDiff = () => {
32
+ document.body.classList.toggle("hide-no-diff", !!hideNoDiffEl?.checked);
33
+ };
34
+ hideNoDiffEl?.addEventListener("change", applyHideNoDiff);
35
+ applyHideNoDiff();
36
+
37
+ const downloadButton = document.getElementById(
38
+ "download-zip",
39
+ ) as HTMLButtonElement | null;
40
+ type ResultPage = {
41
+ a: Uint8Array;
42
+ b: Uint8Array;
43
+ diff: Uint8Array;
44
+ hasDiff: boolean;
45
+ };
46
+ let lastResultPages: Map<number, ResultPage> | null = null;
47
+ const updateDownloadLabel = () => {
48
+ if (!downloadButton) return;
49
+ downloadButton.textContent = hideNoDiffEl?.checked
50
+ ? "Download zip (diff only)"
51
+ : "Download zip";
52
+ };
53
+ hideNoDiffEl?.addEventListener("change", updateDownloadLabel);
54
+ updateDownloadLabel();
55
+
56
+ downloadButton?.addEventListener("click", () => {
57
+ if (!lastResultPages) return;
58
+ const diffOnly = !!hideNoDiffEl?.checked;
59
+ const files: Record<string, Uint8Array> = {};
60
+ for (const [i, page] of lastResultPages) {
61
+ if (diffOnly && !page.hasDiff) continue;
62
+ files[`${i}/a.png`] = page.a;
63
+ files[`${i}/b.png`] = page.b;
64
+ files[`${i}/diff.png`] = page.diff;
65
+ }
66
+ if (Object.keys(files).length === 0) return;
67
+ const zipped = zipSync(files, { level: 0 });
68
+ const blob = new Blob([new Uint8Array(zipped)], { type: "application/zip" });
69
+ const url = URL.createObjectURL(blob);
70
+ const a = document.createElement("a");
71
+ a.href = url;
72
+ a.download = diffOnly ? "pdfdiff-result-diff-only.zip" : "pdfdiff-result.zip";
73
+ document.body.appendChild(a);
74
+ a.click();
75
+ document.body.removeChild(a);
76
+ URL.revokeObjectURL(url);
77
+ });
78
+
9
79
  async function readFileAsUint8Array(file: File): Promise<Uint8Array> {
10
80
  return new Promise((resolve, reject) => {
11
81
  const reader = new FileReader();
@@ -28,6 +98,19 @@ document
28
98
  const errorElement = document.getElementById("error-message");
29
99
  if (errorElement) errorElement.textContent = "";
30
100
 
101
+ const submitButton = (event.currentTarget as HTMLFormElement).querySelector(
102
+ 'button[type="submit"]',
103
+ ) as HTMLButtonElement | null;
104
+ const originalSubmitText = submitButton?.textContent ?? "";
105
+ if (submitButton) {
106
+ submitButton.disabled = true;
107
+ submitButton.textContent = "Preparing...";
108
+ }
109
+ if (downloadButton) downloadButton.disabled = true;
110
+ lastResultPages = null;
111
+ const resultPages = new Map<number, ResultPage>();
112
+ let completed = false;
113
+
31
114
  try {
32
115
  const pdfAFile = (
33
116
  document.getElementById("pdf-a") as HTMLInputElement | null
@@ -66,10 +149,11 @@ document
66
149
  throw new Error();
67
150
  }
68
151
 
69
- const workers = ((
70
- val = (document.getElementById("workers") as HTMLInputElement | null)
71
- ?.value,
72
- ) => (typeof val !== "undefined" ? parseInt(val, 10) : undefined))();
152
+ const workersRaw = (
153
+ document.getElementById("workers") as HTMLInputElement | null
154
+ )?.value;
155
+ // Empty input means "use the default" (which scales with CPU cores).
156
+ const workers = workersRaw ? parseInt(workersRaw, 10) : undefined;
73
157
  if (
74
158
  typeof workers !== "undefined" &&
75
159
  (Number.isNaN(workers) || workers < 1)
@@ -123,10 +207,13 @@ document
123
207
  pdfdiff.visualizeDifferences(pdfA, pdfB, options),
124
208
  1,
125
209
  )) {
210
+ if (submitButton) submitButton.textContent = `Page ${i}...`;
126
211
  const pageResult = document.createElement("details");
127
212
  pageResult.className = "diff-details";
128
- pageResult.open =
129
- addition.length + deletion.length + modification.length > 0;
213
+ const totalDiff =
214
+ addition.length + deletion.length + modification.length;
215
+ if (totalDiff === 0) pageResult.classList.add("no-diff");
216
+ pageResult.open = totalDiff > 0;
130
217
 
131
218
  const summary = document.createElement("summary");
132
219
 
@@ -165,23 +252,39 @@ document
165
252
 
166
253
  const imagesRow = document.createElement("tr");
167
254
 
255
+ const aPng = await encodeBitmapToPng(a);
256
+ const bPng = await encodeBitmapToPng(b);
257
+ const diffPng = await encodeBitmapToPng(diff);
258
+ resultPages.set(i, {
259
+ a: aPng,
260
+ b: bPng,
261
+ diff: diffPng,
262
+ hasDiff: totalDiff > 0,
263
+ });
264
+
168
265
  const cellA = document.createElement("td");
169
266
  const imageA = document.createElement("img");
170
- imageA.src = await a.getBase64("image/png");
267
+ imageA.src = URL.createObjectURL(
268
+ new Blob([new Uint8Array(aPng)], { type: "image/png" }),
269
+ );
171
270
  imageA.className = "checkerboard-bg";
172
271
  cellA.appendChild(imageA);
173
272
  imagesRow.appendChild(cellA);
174
273
 
175
274
  const cellB = document.createElement("td");
176
275
  const imageB = document.createElement("img");
177
- imageB.src = await b.getBase64("image/png");
276
+ imageB.src = URL.createObjectURL(
277
+ new Blob([new Uint8Array(bPng)], { type: "image/png" }),
278
+ );
178
279
  imageB.className = "checkerboard-bg";
179
280
  cellB.appendChild(imageB);
180
281
  imagesRow.appendChild(cellB);
181
282
 
182
283
  const cellDiff = document.createElement("td");
183
284
  const imageDiff = document.createElement("img");
184
- imageDiff.src = await diff.getBase64("image/png");
285
+ imageDiff.src = URL.createObjectURL(
286
+ new Blob([new Uint8Array(diffPng)], { type: "image/png" }),
287
+ );
185
288
  imageDiff.className = "checkerboard-bg";
186
289
  cellDiff.appendChild(imageDiff);
187
290
  imagesRow.appendChild(cellDiff);
@@ -191,10 +294,20 @@ document
191
294
  pageResult.appendChild(imagesTable);
192
295
  resultsContainer?.appendChild(pageResult);
193
296
  }
297
+ completed = true;
194
298
  } catch (e) {
195
299
  console.error(e);
196
300
  if (errorElement) {
197
301
  errorElement.textContent = `Error: ${(e as Error).message}`;
198
302
  }
303
+ } finally {
304
+ if (submitButton) {
305
+ submitButton.disabled = false;
306
+ submitButton.textContent = originalSubmitText;
307
+ }
308
+ if (completed && resultPages.size > 0) {
309
+ lastResultPages = resultPages;
310
+ if (downloadButton) downloadButton.disabled = false;
311
+ }
199
312
  }
200
313
  });
@@ -0,0 +1,42 @@
1
+ import fs from "node:fs";
2
+ import { fileURLToPath } from "node:url";
3
+ import { parentPort } from "node:worker_threads";
4
+
5
+ import encode, { init } from "@jsquash/png/encode";
6
+
7
+ export type EncodeJob = {
8
+ width: number;
9
+ height: number;
10
+ data: ArrayBuffer;
11
+ path: string;
12
+ };
13
+
14
+ export type EncodeReply = { ok: true } | { ok: false; error: string };
15
+
16
+ const wasmPath = fileURLToPath(
17
+ new URL("./squoosh_png_bg.wasm", import.meta.url),
18
+ );
19
+ await init(fs.readFileSync(wasmPath));
20
+
21
+ if (!parentPort) {
22
+ throw new Error("cli-png-worker must be run as a worker_threads worker");
23
+ }
24
+
25
+ const port = parentPort;
26
+
27
+ port.on("message", async (job: EncodeJob) => {
28
+ try {
29
+ const png = await encode(
30
+ new ImageData(new Uint8ClampedArray(job.data), job.width, job.height),
31
+ );
32
+ fs.writeFileSync(job.path, new Uint8Array(png));
33
+ const reply: EncodeReply = { ok: true };
34
+ port.postMessage(reply);
35
+ } catch (e) {
36
+ const reply: EncodeReply = {
37
+ ok: false,
38
+ error: e instanceof Error ? `${e.message}\n${e.stack}` : String(e),
39
+ };
40
+ port.postMessage(reply);
41
+ }
42
+ });
package/src/cli.ts CHANGED
@@ -1,26 +1,11 @@
1
1
  #!/usr/bin/env node
2
2
 
3
- /*
4
- * Copyright (C) 2025 Koutaro Mukai
5
- *
6
- * This program is free software: you can redistribute it and/or modify
7
- * it under the terms of the GNU General Public License as published by
8
- * the Free Software Foundation, either version 3 of the License, or
9
- * (at your option) any later version.
10
- *
11
- * This program is distributed in the hope that it will be useful,
12
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
- * GNU General Public License for more details.
15
- *
16
- * You should have received a copy of the GNU General Public License
17
- * along with this program. If not, see <https://www.gnu.org/licenses/>.
18
- */
19
-
20
3
  import fs from "node:fs";
21
4
  import path from "node:path";
22
5
  import util from "node:util";
6
+ import { Worker as ThreadWorker } from "node:worker_threads";
23
7
 
8
+ import type { EncodeJob, EncodeReply } from "./cli-png-worker.ts";
24
9
  import {
25
10
  isValidAlignStrategy,
26
11
  defaultOptions,
@@ -28,9 +13,65 @@ import {
28
13
  parseHex,
29
14
  formatHex,
30
15
  visualizeDifferences,
16
+ perf,
31
17
  } from "./index.ts";
18
+ import { sliceBackingBuffer } from "./transferable.ts";
32
19
  import { VERSION } from "./version.ts";
33
20
 
21
+ class PngWriterPool {
22
+ private readonly workers: ThreadWorker[] = [];
23
+ private readonly idle: ThreadWorker[] = [];
24
+ private readonly waiting: Array<(w: ThreadWorker) => void> = [];
25
+
26
+ constructor(size: number, scriptUrl: URL) {
27
+ for (let i = 0; i < size; i++) {
28
+ const w = new ThreadWorker(scriptUrl);
29
+ this.workers.push(w);
30
+ this.idle.push(w);
31
+ }
32
+ }
33
+
34
+ private acquire(): Promise<ThreadWorker> {
35
+ const w = this.idle.pop();
36
+ if (w) return Promise.resolve(w);
37
+ return new Promise<ThreadWorker>((resolve) => this.waiting.push(resolve));
38
+ }
39
+
40
+ private release(w: ThreadWorker) {
41
+ const next = this.waiting.shift();
42
+ if (next) next(w);
43
+ else this.idle.push(w);
44
+ }
45
+
46
+ async submit(job: EncodeJob): Promise<void> {
47
+ const w = await this.acquire();
48
+ return new Promise<void>((resolve, reject) => {
49
+ const onMessage = (msg: EncodeReply) => {
50
+ w.off("message", onMessage);
51
+ w.off("error", onError);
52
+ this.release(w);
53
+ if (msg.ok) resolve();
54
+ else reject(new Error(msg.error));
55
+ };
56
+ const onError = (err: Error) => {
57
+ w.off("message", onMessage);
58
+ w.off("error", onError);
59
+ this.release(w);
60
+ reject(err);
61
+ };
62
+ w.on("message", onMessage);
63
+ w.once("error", onError);
64
+ w.postMessage(job, [job.data]);
65
+ });
66
+ }
67
+
68
+ async terminate(): Promise<void> {
69
+ await Promise.all(this.workers.map((w) => w.terminate()));
70
+ }
71
+ }
72
+
73
+ const _wallSpan = perf.span("cli.wallTotal_ms");
74
+
34
75
  const {
35
76
  positionals,
36
77
  values: {
@@ -80,12 +121,15 @@ OPTIONS:
80
121
  -h, --help
81
122
 
82
123
  NOTES:
83
- Approximate per-worker memory:
84
- a_size_MB + b_size_MB [+ mask_size_MB] (PDF buffers in wasm)
85
- + 300 MB (mupdf + V8 base)
86
- + (dpi / 150)^2 * 50 MB (pixmap working set)
87
- The main process adds ~500 MB - 1 GB (varies with --workers).
88
- Choose --workers so the total stays under ~80% of available memory.
124
+ Pages are rendered with Ghostscript (gs-wasm). Each page render spins up a
125
+ transient Ghostscript instance (~26 MB WASM binary plus a rasterization
126
+ working set that grows with --dpi). A and B (and the mask) render
127
+ concurrently, and --workers controls how many pages are rendered and diffed
128
+ in parallel, so peak memory scales with both --workers and --dpi. Each
129
+ in-flight page additionally holds decoded RGBA bitmaps of ~width*height*4
130
+ bytes. --workers defaults to the CPU core count (capped at 4); lower it to
131
+ reduce memory, or raise it for large jobs on big machines. Keep the total
132
+ under ~80% of available memory.
89
133
  `);
90
134
  process.exit(0);
91
135
  }
@@ -149,6 +193,13 @@ if (Number.isNaN(workers) || workers < 1) {
149
193
  }
150
194
 
151
195
  fs.mkdirSync(outDir, { recursive: true });
196
+ const writerPool = new PngWriterPool(
197
+ workers,
198
+ new URL("./cli-png-worker.js", import.meta.url),
199
+ );
200
+ const pendingWrites: Promise<void>[] = [];
201
+
202
+ const _loopSpan = perf.span("cli.loopWall_ms");
152
203
  for await (const [
153
204
  i,
154
205
  { a, b, diff, addition, deletion, modification },
@@ -172,16 +223,44 @@ for await (const [
172
223
  );
173
224
  const dir = path.join(outDir, i.toString(10));
174
225
  fs.mkdirSync(dir, { recursive: true });
175
- fs.writeFileSync(
176
- path.join(dir, "a.png"),
177
- new Uint8Array(await a.getBuffer("image/png")),
178
- );
179
- fs.writeFileSync(
180
- path.join(dir, "b.png"),
181
- new Uint8Array(await b.getBuffer("image/png")),
182
- );
183
- fs.writeFileSync(
184
- path.join(dir, "diff.png"),
185
- new Uint8Array(await diff.getBuffer("image/png")),
226
+ const sSubmit = perf.span("cli.poolSubmit_ms");
227
+ const aBuf = sliceBackingBuffer(a.bitmap.data);
228
+ const bBuf = sliceBackingBuffer(b.bitmap.data);
229
+ const dBuf = sliceBackingBuffer(diff.bitmap.data);
230
+ pendingWrites.push(
231
+ writerPool.submit({
232
+ width: a.width,
233
+ height: a.height,
234
+ data: aBuf,
235
+ path: path.join(dir, "a.png"),
236
+ }),
237
+ writerPool.submit({
238
+ width: b.width,
239
+ height: b.height,
240
+ data: bBuf,
241
+ path: path.join(dir, "b.png"),
242
+ }),
243
+ writerPool.submit({
244
+ width: diff.width,
245
+ height: diff.height,
246
+ data: dBuf,
247
+ path: path.join(dir, "diff.png"),
248
+ }),
186
249
  );
250
+ sSubmit.stop();
251
+ }
252
+ const sDrain = perf.span("cli.poolDrain_ms");
253
+ await Promise.all(pendingWrites);
254
+ sDrain.stop();
255
+ await writerPool.terminate();
256
+ _loopSpan.stop();
257
+ _wallSpan.stop();
258
+
259
+ if (perf.enabled) {
260
+ const counters = perf.dump();
261
+ process.stderr.write("\n=== PERF ===\n");
262
+ const keys = Object.keys(counters).sort();
263
+ const out: Record<string, number> = {};
264
+ for (const k of keys) out[k] = Math.round(counters[k]! * 1000) / 1000;
265
+ process.stderr.write(JSON.stringify(out, null, 2) + "\n");
187
266
  }
package/src/decode.ts ADDED
@@ -0,0 +1,15 @@
1
+ import * as jimp from "jimp";
2
+
3
+ import type { JimpInstance } from "./jimp.ts";
4
+
5
+ /**
6
+ * Decode PNG bytes (as produced by {@link renderPageRangePng}) into an RGBA
7
+ * image.
8
+ * `jimp.fromBuffer` accepts an ArrayBuffer directly, which keeps this usable in
9
+ * the browser without relying on Node's `Buffer`.
10
+ */
11
+ export async function decodePng(
12
+ png: ArrayBuffer,
13
+ ): Promise<JimpInstance> {
14
+ return (await jimp.Jimp.fromBuffer(png)) as JimpInstance;
15
+ }