@u1f992/pdfdiff 0.2.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. package/.clang-format +3 -0
  2. package/LICENSE +68 -81
  3. package/dist/browser.js +239 -3109
  4. package/dist/browser.js.map +1 -1
  5. package/dist/cli-png-worker.d.ts.map +1 -1
  6. package/dist/cli-png-worker.js +0 -16
  7. package/dist/cli-png-worker.js.map +1 -1
  8. package/dist/cli.js +240 -3150
  9. package/dist/cli.js.map +1 -1
  10. package/dist/core.wasm +0 -0
  11. package/dist/decode.d.ts +9 -0
  12. package/dist/decode.d.ts.map +1 -0
  13. package/dist/diff.d.ts.map +1 -1
  14. package/dist/gs-wasm/gs.js +5821 -0
  15. package/dist/gs-wasm/gs.wasm +0 -0
  16. package/dist/gs-wasm/index.js +120 -0
  17. package/dist/gs-wasm/index.js.map +1 -0
  18. package/dist/gs-wasm/worker.js +764 -0
  19. package/dist/gs-wasm/worker.js.map +1 -0
  20. package/dist/image.d.ts.map +1 -1
  21. package/dist/index.d.ts +1 -0
  22. package/dist/index.d.ts.map +1 -1
  23. package/dist/index.html +1 -1
  24. package/dist/index.js +238 -3109
  25. package/dist/index.js.map +1 -1
  26. package/dist/iterable.d.ts.map +1 -1
  27. package/dist/jimp.d.ts +23 -1
  28. package/dist/jimp.d.ts.map +1 -1
  29. package/dist/pdf.d.ts +15 -4
  30. package/dist/pdf.d.ts.map +1 -1
  31. package/dist/perf.d.ts.map +1 -1
  32. package/dist/rgba-color.d.ts.map +1 -1
  33. package/dist/transferable.d.ts +6 -2
  34. package/dist/transferable.d.ts.map +1 -1
  35. package/dist/version.d.ts +1 -1
  36. package/dist/worker.d.ts +6 -8
  37. package/dist/worker.d.ts.map +1 -1
  38. package/dist/worker.js +70 -3311
  39. package/dist/worker.js.map +1 -1
  40. package/package.json +9 -4
  41. package/rollup.config.js +63 -5
  42. package/scripts/build-wasm.sh +32 -0
  43. package/src/browser.ts +9 -6
  44. package/src/cli-png-worker.ts +0 -17
  45. package/src/cli.ts +9 -23
  46. package/src/decode.ts +15 -0
  47. package/src/diff.ts +0 -17
  48. package/src/image.ts +1 -18
  49. package/src/index.html +1 -1
  50. package/src/index.test.ts +10 -18
  51. package/src/index.ts +163 -74
  52. package/src/iterable.test.ts +0 -17
  53. package/src/iterable.ts +0 -17
  54. package/src/jimp.ts +25 -7
  55. package/src/pdf.ts +98 -69
  56. package/src/perf.ts +0 -17
  57. package/src/rgba-color.test.ts +0 -17
  58. package/src/rgba-color.ts +0 -17
  59. package/src/transferable.ts +6 -21
  60. package/src/worker.ts +91 -87
  61. package/wasm/Makefile +34 -0
  62. package/wasm/bindings.cpp +76 -0
  63. package/wasm/core.c +176 -0
  64. package/wasm/core.h +69 -0
  65. package/dist/mupdf-wasm.wasm +0 -0
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@u1f992/pdfdiff",
3
- "version": "0.2.2",
3
+ "version": "0.3.0",
4
4
  "description": "Visualize and quantify differences between two PDF files.",
5
5
  "main": "dist/index.js",
6
6
  "bin": {
@@ -10,7 +10,11 @@
10
10
  "test": "node --test",
11
11
  "test:cli": "node src/cli.js test/a.pdf test/b.pdf out --mask test/mask.pdf --dpi 300 && echo \"expected: Page 1, Addition: 7500, Deletion: 7500, Modification: 7500\"",
12
12
  "build:version": "node scripts/version.ts",
13
- "build": "npm run build:version && rollup -c",
13
+ "build:wasm": "docker run --rm --volume .:/workdir --workdir /workdir --user root --entrypoint bash emscripten/emsdk:5.0.7 scripts/build-wasm.sh",
14
+ "format:wasm": "docker run --rm --volume .:/workdir --workdir /workdir --user root --entrypoint bash emscripten/emsdk:5.0.7 scripts/build-wasm.sh format",
15
+ "tidy:wasm": "docker run --rm --volume .:/workdir --workdir /workdir --user root --entrypoint bash emscripten/emsdk:5.0.7 scripts/build-wasm.sh tidy",
16
+ "clean:wasm": "docker run --rm --volume .:/workdir --workdir /workdir --user root --entrypoint bash emscripten/emsdk:5.0.7 scripts/build-wasm.sh clean",
17
+ "build": "npm run build:wasm && npm run build:version && rollup -c",
14
18
  "serve": "npm run build && http-server dist"
15
19
  },
16
20
  "repository": {
@@ -19,7 +23,7 @@
19
23
  },
20
24
  "keywords": [],
21
25
  "author": "Koutaro Mukai",
22
- "license": "GPL-3.0",
26
+ "license": "AGPL-3.0-only",
23
27
  "bugs": {
24
28
  "url": "https://github.com/u1f992/pdfdiff/issues"
25
29
  },
@@ -35,6 +39,7 @@
35
39
  "fflate": "^0.8.2",
36
40
  "http-server": "^14.1.1",
37
41
  "nodehog": "^0.1.2",
42
+ "path-browserify": "^1.0.1",
38
43
  "prettier": "^3.5.3",
39
44
  "rollup": "^4.42.0",
40
45
  "rollup-plugin-copy": "^3.5.0",
@@ -43,9 +48,9 @@
43
48
  },
44
49
  "dependencies": {
45
50
  "@jsquash/png": "^3.1.1",
51
+ "@u1f992/gs-wasm": "0.3.0",
46
52
  "ix": "^7.0.0",
47
53
  "jimp": "^1.6.0",
48
- "mupdf": "^1.26.2",
49
54
  "web-worker": "^1.5.0"
50
55
  }
51
56
  }
package/rollup.config.js CHANGED
@@ -11,10 +11,6 @@ const plugins = [
11
11
  commonjs(),
12
12
  copy({
13
13
  targets: [
14
- {
15
- src: "node_modules/mupdf/dist/mupdf-wasm.wasm",
16
- dest: "dist",
17
- },
18
14
  {
19
15
  src: "node_modules/coi-serviceworker/coi-serviceworker.min.js",
20
16
  dest: "dist",
@@ -27,10 +23,30 @@ const plugins = [
27
23
  src: "src/style.css",
28
24
  dest: "dist",
29
25
  },
26
+ {
27
+ src: "src/wasm/core.wasm",
28
+ dest: "dist",
29
+ },
30
+ // Ghostscript (gs-wasm) Emscripten glue + binary. The `index.js`/
31
+ // `worker.js` ESM wrappers are re-bundled (below) into dist/gs-wasm/ so
32
+ // their bare imports (`web-worker`, `upath`) resolve in the browser; the
33
+ // large glue is shipped as-is and imported as a sibling by worker.js.
34
+ {
35
+ src: [
36
+ "node_modules/@u1f992/gs-wasm/dist/gs.js",
37
+ "node_modules/@u1f992/gs-wasm/dist/gs.wasm",
38
+ ],
39
+ dest: "dist/gs-wasm",
40
+ },
30
41
  ],
31
42
  }),
32
43
  ];
33
44
 
45
+ // gs-wasm is kept external (not bundled): the CLI resolves it from node_modules,
46
+ // while the browser bundles load it from the copied dist/gs-wasm/ folder.
47
+ const GS_WASM = "@u1f992/gs-wasm";
48
+ const gsWasmPaths = { [GS_WASM]: "./gs-wasm/index.js" };
49
+
34
50
  const jimpAlias = alias({
35
51
  entries: [
36
52
  {
@@ -51,12 +67,26 @@ const webWorkerAlias = alias({
51
67
  ],
52
68
  });
53
69
 
70
+ // gs-wasm's worker depends on `upath`, which imports node's `path`. Shim it for
71
+ // the browser. Scoped to the gs-wasm worker bundle only so the Node CLI bundles
72
+ // keep the real `path`.
73
+ const pathAlias = alias({
74
+ entries: [
75
+ {
76
+ find: "path",
77
+ replacement: path.resolve("node_modules/path-browserify/index.js"),
78
+ },
79
+ ],
80
+ });
81
+
54
82
  const rollupConfig = defineConfig([
55
83
  {
56
84
  input: "src/index.ts",
85
+ external: [GS_WASM],
57
86
  output: {
58
87
  file: "dist/index.js",
59
88
  sourcemap: true,
89
+ paths: gsWasmPaths,
60
90
  },
61
91
  plugins: [
62
92
  jimpAlias,
@@ -83,7 +113,7 @@ const rollupConfig = defineConfig([
83
113
  file: "dist/cli.js",
84
114
  sourcemap: true,
85
115
  },
86
- external: ["web-worker"],
116
+ external: ["web-worker", GS_WASM],
87
117
  plugins: [
88
118
  typescript({ tsconfig: "./tsconfig.json" }),
89
119
  nodeResolve(),
@@ -112,9 +142,11 @@ const rollupConfig = defineConfig([
112
142
  },
113
143
  {
114
144
  input: "src/browser.ts",
145
+ external: [GS_WASM],
115
146
  output: {
116
147
  file: "dist/browser.js",
117
148
  sourcemap: true,
149
+ paths: gsWasmPaths,
118
150
  },
119
151
  plugins: [
120
152
  jimpAlias,
@@ -123,6 +155,32 @@ const rollupConfig = defineConfig([
123
155
  ...plugins,
124
156
  ],
125
157
  },
158
+ // Re-bundle gs-wasm's ESM wrappers into dist/gs-wasm/ with their bare
159
+ // dependencies resolved, so the browser can load them as plain static files.
160
+ // The main-thread wrapper spawns ./worker.js (sibling) via new URL(...).
161
+ {
162
+ input: "node_modules/@u1f992/gs-wasm/dist/index.js",
163
+ output: {
164
+ file: "dist/gs-wasm/index.js",
165
+ format: "es",
166
+ sourcemap: true,
167
+ },
168
+ plugins: [webWorkerAlias, nodeResolve(), commonjs()],
169
+ },
170
+ // The worker wrapper imports the (large) emscripten glue as a sibling
171
+ // ./gs.js, which is copied verbatim; everything else (upath, status) is
172
+ // bundled in.
173
+ {
174
+ input: "node_modules/@u1f992/gs-wasm/dist/worker.js",
175
+ external: (id) => id === "./gs.js" || id.endsWith("/gs.js"),
176
+ output: {
177
+ file: "dist/gs-wasm/worker.js",
178
+ format: "es",
179
+ sourcemap: true,
180
+ paths: { "./gs.js": "./gs.js" },
181
+ },
182
+ plugins: [pathAlias, nodeResolve(), commonjs()],
183
+ },
126
184
  ]);
127
185
 
128
186
  export default rollupConfig;
@@ -0,0 +1,32 @@
1
+ #!/usr/bin/env bash
2
+
3
+ set -eEuo pipefail
4
+ shopt -s inherit_errexit
5
+ trap 'echo "Error on line $LINENO: $BASH_COMMAND (exit $?)" >&2' ERR
6
+
7
+ # Align the ubuntu user in the emscripten/emsdk image with the mounted
8
+ # workdir's owner so files we create are owned by the host user, and so we
9
+ # can write back into /workdir regardless of the host user's UID. Without
10
+ # this the script fails on hosts whose user is not UID 1000 (e.g. GitHub
11
+ # Actions' runner is UID 1001).
12
+ HOST_UID=$(stat --format='%u' /workdir)
13
+ HOST_GID=$(stat --format='%g' /workdir)
14
+ if [ "$HOST_UID" -ne 0 ] && [ "$HOST_UID" -ne "$(id -u ubuntu)" ]; then
15
+ groupmod --non-unique --gid "$HOST_GID" ubuntu
16
+ usermod --non-unique --uid "$HOST_UID" --gid "$HOST_GID" ubuntu
17
+ chown --recursive "$HOST_UID:$HOST_GID" /home/ubuntu
18
+ fi
19
+
20
+ TARGET=${1:-all}
21
+
22
+ # clang-format / clang-tidy are only needed for the format and tidy targets
23
+ # and are not part of the emscripten/emsdk base image; install on demand.
24
+ case "$TARGET" in
25
+ format | tidy)
26
+ DEBIAN_FRONTEND=noninteractive apt-get update -qq >/dev/null
27
+ DEBIAN_FRONTEND=noninteractive apt-get install -qq --yes --no-install-recommends \
28
+ clang-format clang-tidy >/dev/null
29
+ ;;
30
+ esac
31
+
32
+ exec runuser -u ubuntu -- bash -c "cd /workdir && make -C wasm $TARGET"
package/src/browser.ts CHANGED
@@ -9,10 +9,12 @@ import { VERSION } from "./version.ts";
9
9
  async function encodeBitmapToPng(img: {
10
10
  width: number;
11
11
  height: number;
12
- bitmap: { data: Uint8Array | Uint8ClampedArray };
12
+ bitmap: {
13
+ data: Uint8Array<ArrayBuffer> | Uint8ClampedArray<ArrayBuffer>;
14
+ };
13
15
  }): Promise<Uint8Array> {
14
16
  const data = img.bitmap.data;
15
- const view =
17
+ const view: Uint8ClampedArray<ArrayBuffer> =
16
18
  data instanceof Uint8ClampedArray
17
19
  ? data
18
20
  : new Uint8ClampedArray(data.buffer, data.byteOffset, data.byteLength);
@@ -147,10 +149,11 @@ document
147
149
  throw new Error();
148
150
  }
149
151
 
150
- const workers = ((
151
- val = (document.getElementById("workers") as HTMLInputElement | null)
152
- ?.value,
153
- ) => (typeof val !== "undefined" ? parseInt(val, 10) : undefined))();
152
+ const workersRaw = (
153
+ document.getElementById("workers") as HTMLInputElement | null
154
+ )?.value;
155
+ // Empty input means "use the default" (which scales with CPU cores).
156
+ const workers = workersRaw ? parseInt(workersRaw, 10) : undefined;
154
157
  if (
155
158
  typeof workers !== "undefined" &&
156
159
  (Number.isNaN(workers) || workers < 1)
@@ -1,20 +1,3 @@
1
- /*
2
- * Copyright (C) 2025 Koutaro Mukai
3
- *
4
- * This program is free software: you can redistribute it and/or modify
5
- * it under the terms of the GNU General Public License as published by
6
- * the Free Software Foundation, either version 3 of the License, or
7
- * (at your option) any later version.
8
- *
9
- * This program is distributed in the hope that it will be useful,
10
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
11
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
- * GNU General Public License for more details.
13
- *
14
- * You should have received a copy of the GNU General Public License
15
- * along with this program. If not, see <https://www.gnu.org/licenses/>.
16
- */
17
-
18
1
  import fs from "node:fs";
19
2
  import { fileURLToPath } from "node:url";
20
3
  import { parentPort } from "node:worker_threads";
package/src/cli.ts CHANGED
@@ -1,22 +1,5 @@
1
1
  #!/usr/bin/env node
2
2
 
3
- /*
4
- * Copyright (C) 2025 Koutaro Mukai
5
- *
6
- * This program is free software: you can redistribute it and/or modify
7
- * it under the terms of the GNU General Public License as published by
8
- * the Free Software Foundation, either version 3 of the License, or
9
- * (at your option) any later version.
10
- *
11
- * This program is distributed in the hope that it will be useful,
12
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
- * GNU General Public License for more details.
15
- *
16
- * You should have received a copy of the GNU General Public License
17
- * along with this program. If not, see <https://www.gnu.org/licenses/>.
18
- */
19
-
20
3
  import fs from "node:fs";
21
4
  import path from "node:path";
22
5
  import util from "node:util";
@@ -138,12 +121,15 @@ OPTIONS:
138
121
  -h, --help
139
122
 
140
123
  NOTES:
141
- Approximate per-worker memory:
142
- a_size_MB + b_size_MB [+ mask_size_MB] (PDF buffers in wasm)
143
- + 300 MB (mupdf + V8 base)
144
- + (dpi / 150)^2 * 50 MB (pixmap working set)
145
- The main process adds ~500 MB - 1 GB (varies with --workers).
146
- Choose --workers so the total stays under ~80% of available memory.
124
+ Pages are rendered with Ghostscript (gs-wasm). Each page render spins up a
125
+ transient Ghostscript instance (~26 MB WASM binary plus a rasterization
126
+ working set that grows with --dpi). A and B (and the mask) render
127
+ concurrently, and --workers controls how many pages are rendered and diffed
128
+ in parallel, so peak memory scales with both --workers and --dpi. Each
129
+ in-flight page additionally holds decoded RGBA bitmaps of ~width*height*4
130
+ bytes. --workers defaults to the CPU core count (capped at 4); lower it to
131
+ reduce memory, or raise it for large jobs on big machines. Keep the total
132
+ under ~80% of available memory.
147
133
  `);
148
134
  process.exit(0);
149
135
  }
package/src/decode.ts ADDED
@@ -0,0 +1,15 @@
1
+ import * as jimp from "jimp";
2
+
3
+ import type { JimpInstance } from "./jimp.ts";
4
+
5
+ /**
6
+ * Decode PNG bytes (as produced by {@link renderPageRangePng}) into an RGBA
7
+ * image.
8
+ * `jimp.fromBuffer` accepts an ArrayBuffer directly, which keeps this usable in
9
+ * the browser without relying on Node's `Buffer`.
10
+ */
11
+ export async function decodePng(
12
+ png: ArrayBuffer,
13
+ ): Promise<JimpInstance> {
14
+ return (await jimp.Jimp.fromBuffer(png)) as JimpInstance;
15
+ }
package/src/diff.ts CHANGED
@@ -1,20 +1,3 @@
1
- /*
2
- * Copyright (C) 2025 Koutaro Mukai
3
- *
4
- * This program is free software: you can redistribute it and/or modify
5
- * it under the terms of the GNU General Public License as published by
6
- * the Free Software Foundation, either version 3 of the License, or
7
- * (at your option) any later version.
8
- *
9
- * This program is distributed in the hope that it will be useful,
10
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
11
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
- * GNU General Public License for more details.
13
- *
14
- * You should have received a copy of the GNU General Public License
15
- * along with this program. If not, see <https://www.gnu.org/licenses/>.
16
- */
17
-
18
1
  import { type JimpInstance } from "./jimp.ts";
19
2
  import { alignSize, createEmptyImage, type AlignStrategy } from "./image.ts";
20
3
  import { perf } from "./perf.ts";
package/src/image.ts CHANGED
@@ -1,20 +1,3 @@
1
- /*
2
- * Copyright (C) 2025 Koutaro Mukai
3
- *
4
- * This program is free software: you can redistribute it and/or modify
5
- * it under the terms of the GNU General Public License as published by
6
- * the Free Software Foundation, either version 3 of the License, or
7
- * (at your option) any later version.
8
- *
9
- * This program is distributed in the hope that it will be useful,
10
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
11
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
- * GNU General Public License for more details.
13
- *
14
- * You should have received a copy of the GNU General Public License
15
- * along with this program. If not, see <https://www.gnu.org/licenses/>.
16
- */
17
-
18
1
  import * as jimp from "jimp";
19
2
  import { type JimpInstance } from "./jimp.ts";
20
3
  import { perf } from "./perf.ts";
@@ -69,7 +52,7 @@ function alignImage(
69
52
  targetWidth: number,
70
53
  targetHeight: number,
71
54
  align: AlignStrategy,
72
- ) {
55
+ ): JimpInstance {
73
56
  if (align === "resize") {
74
57
  return img.resize({ w: targetWidth, h: targetHeight });
75
58
  } else {
package/src/index.html CHANGED
@@ -49,7 +49,7 @@
49
49
  </div>
50
50
  <div>
51
51
  <label for="workers">Workers:</label>
52
- <input type="number" id="workers" value="1" min="1" />
52
+ <input type="number" id="workers" placeholder="auto" min="1" />
53
53
  </div>
54
54
  <div>
55
55
  <label for="addition-color">Addition:</label>
package/src/index.test.ts CHANGED
@@ -1,26 +1,10 @@
1
- /*
2
- * Copyright (C) 2025 Koutaro Mukai
3
- *
4
- * This program is free software: you can redistribute it and/or modify
5
- * it under the terms of the GNU General Public License as published by
6
- * the Free Software Foundation, either version 3 of the License, or
7
- * (at your option) any later version.
8
- *
9
- * This program is distributed in the hope that it will be useful,
10
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
11
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
- * GNU General Public License for more details.
13
- *
14
- * You should have received a copy of the GNU General Public License
15
- * along with this program. If not, see <https://www.gnu.org/licenses/>.
16
- */
17
-
18
1
  import assert from "node:assert/strict";
19
2
  import fs from "node:fs";
20
3
  import test from "node:test";
21
4
 
22
5
  import {
23
6
  defaultOptions,
7
+ defaultWorkers,
24
8
  formatHex,
25
9
  isValidAlignStrategy,
26
10
  parseHex,
@@ -50,10 +34,18 @@ test("defaultOptions", () => {
50
34
  deletion: [0xff, 0x57, 0x24, 0xff],
51
35
  modification: [0xff, 0xc1, 0x05, 0xff],
52
36
  },
53
- workers: 1,
37
+ workers: defaultWorkers,
54
38
  });
55
39
  });
56
40
 
41
+ test("defaultWorkers scales with cores, capped at 4", () => {
42
+ assert.equal(
43
+ defaultWorkers,
44
+ Math.max(1, Math.min(globalThis.navigator?.hardwareConcurrency ?? 1, 4)),
45
+ );
46
+ assert.ok(defaultWorkers >= 1 && defaultWorkers <= 4);
47
+ });
48
+
57
49
  test("isValidAlignStrategy", async (ctx) => {
58
50
  for (const s of [
59
51
  "resize",