@u1f992/pdfdiff 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/index.ts CHANGED
@@ -19,16 +19,17 @@ import * as jimp from "jimp";
19
19
  import * as mupdf from "mupdf";
20
20
  import Worker from "web-worker";
21
21
 
22
- import {
23
- createEmptyImage,
24
- isValidAlignStrategy,
25
- type AlignStrategy,
26
- } from "./image.js";
27
- import { withIndex } from "./iterable.js";
28
- import { pageToImage } from "./pdf.js";
29
- import { parseHex, formatHex } from "./rgba-color.js";
30
- import type { Pallet } from "./diff.js";
31
- import type { JimpInstance } from "./jimp.js";
22
+ import { type Pallet } from "./diff.ts";
23
+ import { isValidAlignStrategy, type AlignStrategy } from "./image.ts";
24
+ import { withIndex } from "./iterable.ts";
25
+ import { parseHex, formatHex } from "./rgba-color.ts";
26
+ import type { JimpInstance } from "./jimp.ts";
27
+ import type {
28
+ InitMessage,
29
+ PageMessage,
30
+ PageResultMessage,
31
+ ReadyMessage,
32
+ } from "./worker.ts";
32
33
 
33
34
  export { withIndex, isValidAlignStrategy, parseHex, formatHex };
34
35
 
@@ -38,6 +39,7 @@ type Options = {
38
39
  mask: Uint8Array | undefined;
39
40
  align: AlignStrategy;
40
41
  pallet: Pallet;
42
+ workers: number;
41
43
  };
42
44
 
43
45
  type Result = {
@@ -59,14 +61,113 @@ export const defaultOptions: Options = {
59
61
  deletion: [0xff, 0x57, 0x24, 0xff],
60
62
  modification: [0xff, 0xc1, 0x05, 0xff],
61
63
  },
64
+ workers: 1,
62
65
  };
63
66
 
67
+ function asSharedBytes(bytes: Uint8Array): Uint8Array {
68
+ const isNode =
69
+ typeof globalThis.process !== "undefined" &&
70
+ !!globalThis.process.versions?.node;
71
+ const coiOk =
72
+ (globalThis as { crossOriginIsolated?: boolean }).crossOriginIsolated ===
73
+ true;
74
+ if (typeof SharedArrayBuffer !== "undefined" && (isNode || coiOk)) {
75
+ const sab = new SharedArrayBuffer(bytes.byteLength);
76
+ const view = new Uint8Array(sab);
77
+ view.set(bytes);
78
+ return view;
79
+ }
80
+ return new Uint8Array(bytes);
81
+ }
82
+
83
+ class WorkerHandle {
84
+ worker: InstanceType<typeof Worker>;
85
+ private pendingResolve:
86
+ | ((data: ReadyMessage | PageResultMessage) => void)
87
+ | null = null;
88
+ private pendingReject: ((reason: unknown) => void) | null = null;
89
+
90
+ constructor(url: URL) {
91
+ this.worker = new Worker(url, { type: "module" });
92
+ this.worker.addEventListener(
93
+ "message",
94
+ (e: MessageEvent<ReadyMessage | PageResultMessage>) => {
95
+ const resolve = this.pendingResolve;
96
+ this.pendingResolve = null;
97
+ this.pendingReject = null;
98
+ resolve?.(e.data);
99
+ },
100
+ );
101
+ this.worker.addEventListener("error", (e: ErrorEvent) => {
102
+ const reject = this.pendingReject;
103
+ this.pendingResolve = null;
104
+ this.pendingReject = null;
105
+ reject?.(e.error ?? new Error(e.message));
106
+ });
107
+ }
108
+
109
+ init(msg: InitMessage): Promise<ReadyMessage> {
110
+ return new Promise<ReadyMessage>((resolve, reject) => {
111
+ this.pendingResolve = resolve as (
112
+ data: ReadyMessage | PageResultMessage,
113
+ ) => void;
114
+ this.pendingReject = reject;
115
+ this.worker.postMessage(msg);
116
+ });
117
+ }
118
+
119
+ processPage(index: number): Promise<PageResultMessage> {
120
+ return new Promise<PageResultMessage>((resolve, reject) => {
121
+ this.pendingResolve = resolve as (
122
+ data: ReadyMessage | PageResultMessage,
123
+ ) => void;
124
+ this.pendingReject = reject;
125
+ const msg: PageMessage = { type: "page", index };
126
+ this.worker.postMessage(msg);
127
+ });
128
+ }
129
+
130
+ terminate() {
131
+ this.worker.terminate();
132
+ }
133
+ }
134
+
135
+ function workerUrl(): URL {
136
+ return new URL(
137
+ import.meta.url.endsWith(".ts") ? "./worker.ts" : "./worker.js",
138
+ import.meta.url,
139
+ );
140
+ }
141
+
142
+ function pageResultToResult(msg: PageResultMessage): Result {
143
+ return {
144
+ a: jimp.Jimp.fromBitmap({
145
+ width: msg.a.width,
146
+ height: msg.a.height,
147
+ data: new Uint8Array(msg.a.data),
148
+ }) as JimpInstance,
149
+ b: jimp.Jimp.fromBitmap({
150
+ width: msg.b.width,
151
+ height: msg.b.height,
152
+ data: new Uint8Array(msg.b.data),
153
+ }) as JimpInstance,
154
+ diff: jimp.Jimp.fromBitmap({
155
+ width: msg.diff.width,
156
+ height: msg.diff.height,
157
+ data: new Uint8Array(msg.diff.data),
158
+ }) as JimpInstance,
159
+ addition: msg.addition,
160
+ deletion: msg.deletion,
161
+ modification: msg.modification,
162
+ };
163
+ }
164
+
64
165
  export async function* visualizeDifferences(
65
166
  a: Uint8Array,
66
167
  b: Uint8Array,
67
168
  options: Partial<Omit<Options, "pallet"> & { pallet: Partial<Pallet> }>,
68
169
  ) {
69
- const mergedOptions = {
170
+ const merged = {
70
171
  dpi: options?.dpi ?? defaultOptions.dpi,
71
172
  alpha: options?.alpha ?? defaultOptions.alpha,
72
173
  mask: options?.mask ?? defaultOptions.mask,
@@ -77,110 +178,98 @@ export async function* visualizeDifferences(
77
178
  modification:
78
179
  options?.pallet?.modification ?? defaultOptions.pallet.modification,
79
180
  },
181
+ workers: options?.workers ?? defaultOptions.workers,
80
182
  };
81
183
 
82
- const pdfA = mupdf.PDFDocument.openDocument(a, "application/pdf");
83
- const pdfB = mupdf.PDFDocument.openDocument(b, "application/pdf");
84
- const pdfMask =
85
- typeof mergedOptions.mask !== "undefined"
86
- ? mupdf.PDFDocument.openDocument(mergedOptions.mask, "application/pdf")
184
+ const probe = mupdf.PDFDocument.openDocument(a, "application/pdf");
185
+ const probeB = mupdf.PDFDocument.openDocument(b, "application/pdf");
186
+ const probeMask =
187
+ typeof merged.mask !== "undefined"
188
+ ? mupdf.PDFDocument.openDocument(merged.mask, "application/pdf")
87
189
  : new mupdf.PDFDocument();
88
-
89
190
  const maxPages = Math.max(
90
- pdfA.countPages(),
91
- pdfB.countPages(),
92
- pdfMask.countPages(),
191
+ probe.countPages(),
192
+ probeB.countPages(),
193
+ probeMask.countPages(),
93
194
  );
195
+ probe.destroy();
196
+ probeB.destroy();
197
+ probeMask.destroy();
94
198
 
95
- async function processPage(pageIndex: number) {
96
- const [pageA, pageB, pageMask] = await Promise.all([
97
- pageIndex < pdfA.countPages()
98
- ? pageToImage(
99
- pdfA.loadPage(pageIndex),
100
- mergedOptions.dpi,
101
- mergedOptions.alpha,
102
- )
103
- : createEmptyImage(1, 1),
104
- pageIndex < pdfB.countPages()
105
- ? pageToImage(
106
- pdfB.loadPage(pageIndex),
107
- mergedOptions.dpi,
108
- mergedOptions.alpha,
109
- )
110
- : createEmptyImage(1, 1),
111
- pageIndex < pdfMask.countPages()
112
- ? pageToImage(
113
- pdfMask.loadPage(pageIndex),
114
- mergedOptions.dpi,
115
- mergedOptions.alpha,
116
- )
117
- : createEmptyImage(1, 1),
118
- ]);
119
-
120
- // NOTE: getBufferはcopyなので、Workerに移譲した後もa, bを使用して問題ない
121
- // https://github.com/jimp-dev/jimp/blob/b6b0e418a5f1259211a133b20cddb4f4e5c25679/packages/core/src/index.ts#L444
122
- const [bufA, bufB, bufMask] = await Promise.all([
123
- pageA
124
- .getBuffer(jimp.JimpMime.png)
125
- .then((buf) => new Uint8Array(buf).buffer),
126
- pageB
127
- .getBuffer(jimp.JimpMime.png)
128
- .then((buf) => new Uint8Array(buf).buffer),
129
- pageMask
130
- .getBuffer(jimp.JimpMime.png)
131
- .then((buf) => new Uint8Array(buf).buffer),
132
- ]);
133
-
134
- const { bufDiff, addition, deletion, modification } = (await new Promise(
135
- (resolve, reject) => {
136
- const url = new URL("./worker.js", import.meta.url);
137
- const worker = new Worker(url, { type: "module" });
138
- worker.addEventListener("message", (e) => {
139
- resolve(e.data);
140
- worker.terminate();
141
- });
142
- worker.addEventListener("error", (e) => {
143
- reject(e);
144
- worker.terminate();
145
- });
146
- worker.postMessage(
147
- {
148
- bufA,
149
- bufB,
150
- bufMask,
151
- pallet: mergedOptions.pallet,
152
- align: mergedOptions.align,
153
- },
154
- [bufA, bufB, bufMask],
155
- );
156
- },
157
- )) as {
158
- bufDiff: ArrayBuffer;
159
- addition: [number, number][];
160
- deletion: [number, number][];
161
- modification: [number, number][];
162
- };
163
- const diff = await jimp.Jimp.fromBuffer(bufDiff);
164
- return { a: pageA, b: pageB, diff, addition, deletion, modification };
199
+ if (maxPages === 0) return;
200
+
201
+ const aBytes = asSharedBytes(a);
202
+ const bBytes = asSharedBytes(b);
203
+ const maskBytes =
204
+ typeof merged.mask !== "undefined" ? asSharedBytes(merged.mask) : null;
205
+
206
+ const initMsg: InitMessage = {
207
+ type: "init",
208
+ aBytes,
209
+ bBytes,
210
+ maskBytes,
211
+ dpi: merged.dpi,
212
+ alpha: merged.alpha,
213
+ pallet: merged.pallet,
214
+ align: merged.align,
215
+ };
216
+
217
+ const N = Math.max(1, Math.min(merged.workers, maxPages));
218
+ const url = workerUrl();
219
+ const worker0 = new WorkerHandle(url);
220
+ await worker0.init(initMsg);
221
+
222
+ const buffered = new Map<number, Result>();
223
+ let nextToAssign = 0;
224
+
225
+ const workers: WorkerHandle[] = [worker0];
226
+ for (let i = 1; i < N; i++) {
227
+ const w = new WorkerHandle(url);
228
+ await w.init(initMsg);
229
+ workers.push(w);
165
230
  }
166
231
 
167
- // ページ処理を並列発行し、順序を保証して出力
168
- const concurrency = navigator.hardwareConcurrency;
169
- const pending = /** @type {Promise<VisualizeDifferencesResult>[]} */ [];
170
- let nextPageToProcess = 0;
171
- let nextPageToYield = 0;
172
-
173
- while (nextPageToYield < maxPages) {
174
- // プールに空きがあれば新しいページ処理を追加
175
- while (nextPageToProcess < maxPages && pending.length < concurrency) {
176
- pending.push(processPage(nextPageToProcess));
177
- nextPageToProcess++;
232
+ const resolvers = new Map<number, (r: Result) => void>();
233
+ let workerError: unknown = null;
234
+
235
+ const loops = workers.map(async (w) => {
236
+ while (nextToAssign < maxPages && workerError === null) {
237
+ const idx = nextToAssign++;
238
+ try {
239
+ const msg = await w.processPage(idx);
240
+ const result = pageResultToResult(msg);
241
+ const resolve = resolvers.get(idx);
242
+ if (resolve) {
243
+ resolvers.delete(idx);
244
+ resolve(result);
245
+ } else {
246
+ buffered.set(idx, result);
247
+ }
248
+ } catch (e) {
249
+ workerError = e;
250
+ for (const [, resolve] of resolvers) resolve(null as never);
251
+ resolvers.clear();
252
+ return;
253
+ }
178
254
  }
255
+ });
179
256
 
180
- // 次に出力すべきページのPromiseを待つ
181
- const result = await pending[0];
182
- pending.shift();
183
- yield result as Result;
184
- nextPageToYield++;
257
+ try {
258
+ for (let i = 0; i < maxPages; i++) {
259
+ if (workerError !== null) throw workerError;
260
+ let r: Result;
261
+ const buf = buffered.get(i);
262
+ if (buf !== undefined) {
263
+ buffered.delete(i);
264
+ r = buf;
265
+ } else {
266
+ r = await new Promise<Result>((resolve) => resolvers.set(i, resolve));
267
+ if (workerError !== null) throw workerError;
268
+ }
269
+ yield r;
270
+ }
271
+ await Promise.all(loops);
272
+ } finally {
273
+ for (const w of workers) w.terminate();
185
274
  }
186
275
  }
package/src/jimp.ts CHANGED
@@ -4,6 +4,7 @@ export type JimpInstance = Pick<
4
4
  jimp.JimpInstance,
5
5
  | "width"
6
6
  | "height"
7
+ | "bitmap"
7
8
  | "getPixelColor"
8
9
  | "setPixelColor"
9
10
  | "resize"
package/src/pdf.ts CHANGED
@@ -18,12 +18,42 @@
18
18
  import * as jimp from "jimp";
19
19
  import * as mupdf from "mupdf";
20
20
 
21
+ import type { JimpInstance } from "./jimp.ts";
22
+
21
23
  export function* loadPages(pdf: mupdf.Document) {
22
24
  for (let i = 0; i < pdf.countPages(); i++) {
23
25
  yield pdf.loadPage(i);
24
26
  }
25
27
  }
26
28
 
29
+ function pixmapToRGBA(pixmap: mupdf.Pixmap): Uint8Array {
30
+ const width = pixmap.getWidth();
31
+ const height = pixmap.getHeight();
32
+ const stride = pixmap.getStride();
33
+ const hasAlpha = pixmap.getAlpha() !== 0;
34
+ const samples = pixmap.getPixels();
35
+
36
+ if (hasAlpha && stride === width * 4) {
37
+ return new Uint8Array(samples);
38
+ }
39
+
40
+ const out = new Uint8Array(width * height * 4);
41
+ const srcBpp = pixmap.getNumberOfComponents() + (hasAlpha ? 1 : 0);
42
+ for (let y = 0; y < height; y++) {
43
+ const srcRow = y * stride;
44
+ const dstRow = y * width * 4;
45
+ for (let x = 0; x < width; x++) {
46
+ const s = srcRow + x * srcBpp;
47
+ const d = dstRow + x * 4;
48
+ out[d] = samples[s]!;
49
+ out[d + 1] = samples[s + 1]!;
50
+ out[d + 2] = samples[s + 2]!;
51
+ out[d + 3] = hasAlpha ? samples[s + 3]! : 255;
52
+ }
53
+ }
54
+ return out;
55
+ }
56
+
27
57
  export async function pageToImage(
28
58
  page: mupdf.Page,
29
59
  dpi: number,
@@ -35,8 +65,10 @@ export async function pageToImage(
35
65
  mupdf.ColorSpace.DeviceRGB,
36
66
  alpha,
37
67
  );
38
- const ret = await jimp.Jimp.fromBuffer(new Uint8Array(pixmap.asPNG()).buffer);
68
+ const width = pixmap.getWidth();
69
+ const height = pixmap.getHeight();
70
+ const data = pixmapToRGBA(pixmap);
39
71
  pixmap.destroy();
40
72
  page.destroy();
41
- return ret;
73
+ return jimp.Jimp.fromBitmap({ width, height, data }) as JimpInstance;
42
74
  }
package/src/worker.ts CHANGED
@@ -15,48 +15,145 @@
15
15
  * along with this program. If not, see <https://www.gnu.org/licenses/>.
16
16
  */
17
17
 
18
- import * as jimp from "jimp";
18
+ import * as mupdf from "mupdf";
19
19
 
20
- import { drawDifference, type Pallet } from "./diff.js";
21
- import { composeLayers, type AlignStrategy } from "./image.js";
22
- import type { JimpInstance } from "./jimp.js";
20
+ import { drawDifference, type Pallet } from "./diff.ts";
21
+ import {
22
+ composeLayers,
23
+ createEmptyImage,
24
+ type AlignStrategy,
25
+ } from "./image.ts";
26
+ import type { JimpInstance } from "./jimp.ts";
27
+ import { pageToImage } from "./pdf.ts";
28
+
29
+ export type InitMessage = {
30
+ type: "init";
31
+ aBytes: Uint8Array;
32
+ bBytes: Uint8Array;
33
+ maskBytes: Uint8Array | null;
34
+ dpi: number;
35
+ alpha: boolean;
36
+ pallet: Pallet;
37
+ align: AlignStrategy;
38
+ };
39
+
40
+ export type PageMessage = {
41
+ type: "page";
42
+ index: number;
43
+ };
44
+
45
+ export type ReadyMessage = {
46
+ type: "ready";
47
+ };
48
+
49
+ export type PageResultMessage = {
50
+ type: "pageResult";
51
+ index: number;
52
+ a: { width: number; height: number; data: ArrayBuffer };
53
+ b: { width: number; height: number; data: ArrayBuffer };
54
+ diff: { width: number; height: number; data: ArrayBuffer };
55
+ addition: [number, number][];
56
+ deletion: [number, number][];
57
+ modification: [number, number][];
58
+ };
59
+
60
+ let pdfA: mupdf.Document;
61
+ let pdfB: mupdf.Document;
62
+ let pdfMask: mupdf.Document;
63
+ let opts: {
64
+ dpi: number;
65
+ alpha: boolean;
66
+ pallet: Pallet;
67
+ align: AlignStrategy;
68
+ };
69
+
70
+ function toTransferable(
71
+ src: Buffer | Uint8Array | Uint8ClampedArray | number[],
72
+ ): ArrayBuffer {
73
+ const view =
74
+ src instanceof Uint8Array || src instanceof Uint8ClampedArray
75
+ ? src
76
+ : Uint8Array.from(src as ArrayLike<number>);
77
+ const out = new ArrayBuffer(view.byteLength);
78
+ new Uint8Array(out).set(view);
79
+ return out;
80
+ }
81
+
82
+ async function processPage(index: number): Promise<PageResultMessage> {
83
+ const [pageA, pageB, pageMask] = (await Promise.all([
84
+ index < pdfA.countPages()
85
+ ? pageToImage(pdfA.loadPage(index), opts.dpi, opts.alpha)
86
+ : createEmptyImage(1, 1),
87
+ index < pdfB.countPages()
88
+ ? pageToImage(pdfB.loadPage(index), opts.dpi, opts.alpha)
89
+ : createEmptyImage(1, 1),
90
+ index < pdfMask.countPages()
91
+ ? pageToImage(pdfMask.loadPage(index), opts.dpi, opts.alpha)
92
+ : createEmptyImage(1, 1),
93
+ ])) as [JimpInstance, JimpInstance, JimpInstance];
94
+
95
+ const {
96
+ diff: diffLayer,
97
+ addition,
98
+ deletion,
99
+ modification,
100
+ } = drawDifference(pageA, pageB, pageMask, opts.pallet, opts.align);
101
+ const diff = composeLayers(pageA.width, pageA.height, [
102
+ [pageA, 0.2],
103
+ [pageB, 0.2],
104
+ [diffLayer, 1],
105
+ ]);
106
+
107
+ return {
108
+ type: "pageResult",
109
+ index,
110
+ a: {
111
+ width: pageA.width,
112
+ height: pageA.height,
113
+ data: toTransferable(pageA.bitmap.data),
114
+ },
115
+ b: {
116
+ width: pageB.width,
117
+ height: pageB.height,
118
+ data: toTransferable(pageB.bitmap.data),
119
+ },
120
+ diff: {
121
+ width: diff.width,
122
+ height: diff.height,
123
+ data: toTransferable(diff.bitmap.data),
124
+ },
125
+ addition,
126
+ deletion,
127
+ modification,
128
+ };
129
+ }
23
130
 
24
131
  self.addEventListener(
25
132
  "message",
26
- async (
27
- e: MessageEvent<{
28
- bufA: ArrayBuffer;
29
- bufB: ArrayBuffer;
30
- bufMask: ArrayBuffer;
31
- pallet: Pallet;
32
- align: AlignStrategy;
33
- }>,
34
- ) => {
35
- const { bufA, bufB, bufMask, pallet, align } = e.data;
36
- const a = (await jimp.Jimp.fromBuffer(bufA)) as JimpInstance;
37
- const b = (await jimp.Jimp.fromBuffer(bufB)) as JimpInstance;
38
- const mask = (await jimp.Jimp.fromBuffer(bufMask)) as JimpInstance;
39
- const {
40
- diff: diffLayer,
41
- addition,
42
- deletion,
43
- modification,
44
- } = drawDifference(a, b, mask, pallet, align);
45
- const diff = composeLayers(a.width, a.height, [
46
- [a, 0.2],
47
- [b, 0.2],
48
- [diffLayer, 1],
49
- ]);
50
- const bufDiff = new Uint8Array(await diff.getBuffer(jimp.JimpMime.png))
51
- .buffer;
52
- self.postMessage(
53
- {
54
- bufDiff,
55
- addition,
56
- deletion,
57
- modification,
58
- },
59
- [bufDiff],
60
- );
133
+ async (e: MessageEvent<InitMessage | PageMessage>) => {
134
+ const msg = e.data;
135
+ if (msg.type === "init") {
136
+ pdfA = mupdf.PDFDocument.openDocument(msg.aBytes, "application/pdf");
137
+ pdfB = mupdf.PDFDocument.openDocument(msg.bBytes, "application/pdf");
138
+ pdfMask = msg.maskBytes
139
+ ? mupdf.PDFDocument.openDocument(msg.maskBytes, "application/pdf")
140
+ : new mupdf.PDFDocument();
141
+ opts = {
142
+ dpi: msg.dpi,
143
+ alpha: msg.alpha,
144
+ pallet: msg.pallet,
145
+ align: msg.align,
146
+ };
147
+ if (pdfA.countPages() > 0) pdfA.loadPage(0).destroy();
148
+ const ready: ReadyMessage = { type: "ready" };
149
+ self.postMessage(ready);
150
+ } else if (msg.type === "page") {
151
+ const result = await processPage(msg.index);
152
+ self.postMessage(result, [
153
+ result.a.data,
154
+ result.b.data,
155
+ result.diff.data,
156
+ ]);
157
+ }
61
158
  },
62
159
  );