@u1f992/pdfdiff 0.1.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/index.ts CHANGED
@@ -19,16 +19,20 @@ import * as jimp from "jimp";
19
19
  import * as mupdf from "mupdf";
20
20
  import Worker from "web-worker";
21
21
 
22
- import {
23
- createEmptyImage,
24
- isValidAlignStrategy,
25
- type AlignStrategy,
26
- } from "./image.js";
27
- import { withIndex } from "./iterable.js";
28
- import { pageToImage } from "./pdf.js";
29
- import { parseHex, formatHex } from "./rgba-color.js";
30
- import type { Pallet } from "./diff.js";
31
- import type { JimpInstance } from "./jimp.js";
22
+ import { type Pallet } from "./diff.ts";
23
+ import { isValidAlignStrategy, type AlignStrategy } from "./image.ts";
24
+ import { withIndex } from "./iterable.ts";
25
+ import { parseHex, formatHex } from "./rgba-color.ts";
26
+ import { VERSION } from "./version.ts";
27
+ import type { JimpInstance } from "./jimp.ts";
28
+ import type {
29
+ ErrorMessage,
30
+ InitMessage,
31
+ LoadedMessage,
32
+ PageMessage,
33
+ PageResultMessage,
34
+ ReadyMessage,
35
+ } from "./worker.ts";
32
36
 
33
37
  export { withIndex, isValidAlignStrategy, parseHex, formatHex };
34
38
 
@@ -38,6 +42,7 @@ type Options = {
38
42
  mask: Uint8Array | undefined;
39
43
  align: AlignStrategy;
40
44
  pallet: Pallet;
45
+ workers: number;
41
46
  };
42
47
 
43
48
  type Result = {
@@ -59,14 +64,125 @@ export const defaultOptions: Options = {
59
64
  deletion: [0xff, 0x57, 0x24, 0xff],
60
65
  modification: [0xff, 0xc1, 0x05, 0xff],
61
66
  },
67
+ workers: 1,
62
68
  };
63
69
 
70
+ function asSharedBytes(bytes: Uint8Array): Uint8Array {
71
+ const isNode =
72
+ typeof globalThis.process !== "undefined" &&
73
+ !!globalThis.process.versions?.node;
74
+ const coiOk =
75
+ (globalThis as { crossOriginIsolated?: boolean }).crossOriginIsolated ===
76
+ true;
77
+ if (typeof SharedArrayBuffer !== "undefined" && (isNode || coiOk)) {
78
+ const sab = new SharedArrayBuffer(bytes.byteLength);
79
+ const view = new Uint8Array(sab);
80
+ view.set(bytes);
81
+ return view;
82
+ }
83
+ return new Uint8Array(bytes);
84
+ }
85
+
86
+ type WorkerResponse =
87
+ | LoadedMessage
88
+ | ReadyMessage
89
+ | PageResultMessage
90
+ | ErrorMessage;
91
+
92
+ class WorkerHandle {
93
+ worker: InstanceType<typeof Worker>;
94
+ private loaded: Promise<void>;
95
+ private pendingResolve: ((data: WorkerResponse) => void) | null = null;
96
+ private pendingReject: ((reason: unknown) => void) | null = null;
97
+
98
+ constructor(url: URL) {
99
+ this.worker = new Worker(url, { type: "module" });
100
+ this.loaded = new Promise<void>((resolveLoaded, rejectLoaded) => {
101
+ const onMessage = (e: MessageEvent<WorkerResponse>) => {
102
+ const data = e.data;
103
+ if (data.type === "loaded") {
104
+ resolveLoaded();
105
+ return;
106
+ }
107
+ const resolve = this.pendingResolve;
108
+ const reject = this.pendingReject;
109
+ this.pendingResolve = null;
110
+ this.pendingReject = null;
111
+ if (data.type === "error") {
112
+ reject?.(new Error(`worker: ${data.message}`));
113
+ } else {
114
+ resolve?.(data);
115
+ }
116
+ };
117
+ this.worker.addEventListener("message", onMessage);
118
+ this.worker.addEventListener("error", (e: ErrorEvent) => {
119
+ const err = e.error ?? new Error(e.message);
120
+ rejectLoaded(err);
121
+ const reject = this.pendingReject;
122
+ this.pendingResolve = null;
123
+ this.pendingReject = null;
124
+ reject?.(err);
125
+ });
126
+ });
127
+ }
128
+
129
+ async init(msg: InitMessage): Promise<ReadyMessage> {
130
+ await this.loaded;
131
+ return new Promise<ReadyMessage>((resolve, reject) => {
132
+ this.pendingResolve = resolve as (data: WorkerResponse) => void;
133
+ this.pendingReject = reject;
134
+ this.worker.postMessage(msg);
135
+ });
136
+ }
137
+
138
+ processPage(index: number): Promise<PageResultMessage> {
139
+ return new Promise<PageResultMessage>((resolve, reject) => {
140
+ this.pendingResolve = resolve as (data: WorkerResponse) => void;
141
+ this.pendingReject = reject;
142
+ const msg: PageMessage = { type: "page", index };
143
+ this.worker.postMessage(msg);
144
+ });
145
+ }
146
+
147
+ terminate() {
148
+ this.worker.terminate();
149
+ }
150
+ }
151
+
152
+ function workerUrl(): URL {
153
+ const file = import.meta.url.endsWith(".ts") ? "./worker.ts" : "./worker.js";
154
+ return new URL(`${file}?v=${encodeURIComponent(VERSION)}`, import.meta.url);
155
+ }
156
+
157
+ function pageResultToResult(msg: PageResultMessage): Result {
158
+ return {
159
+ a: jimp.Jimp.fromBitmap({
160
+ width: msg.a.width,
161
+ height: msg.a.height,
162
+ data: new Uint8Array(msg.a.data),
163
+ }) as JimpInstance,
164
+ b: jimp.Jimp.fromBitmap({
165
+ width: msg.b.width,
166
+ height: msg.b.height,
167
+ data: new Uint8Array(msg.b.data),
168
+ }) as JimpInstance,
169
+ diff: jimp.Jimp.fromBitmap({
170
+ width: msg.diff.width,
171
+ height: msg.diff.height,
172
+ data: new Uint8Array(msg.diff.data),
173
+ }) as JimpInstance,
174
+ addition: msg.addition,
175
+ deletion: msg.deletion,
176
+ modification: msg.modification,
177
+ };
178
+ }
179
+
64
180
  export async function* visualizeDifferences(
65
181
  a: Uint8Array,
66
182
  b: Uint8Array,
67
183
  options: Partial<Omit<Options, "pallet"> & { pallet: Partial<Pallet> }>,
68
184
  ) {
69
- const mergedOptions = {
185
+ const merged = {
70
186
  dpi: options?.dpi ?? defaultOptions.dpi,
71
187
  alpha: options?.alpha ?? defaultOptions.alpha,
72
188
  mask: options?.mask ?? defaultOptions.mask,
@@ -77,110 +193,98 @@ export async function* visualizeDifferences(
77
193
  modification:
78
194
  options?.pallet?.modification ?? defaultOptions.pallet.modification,
79
195
  },
196
+ workers: options?.workers ?? defaultOptions.workers,
80
197
  };
81
198
 
82
- const pdfA = mupdf.PDFDocument.openDocument(a, "application/pdf");
83
- const pdfB = mupdf.PDFDocument.openDocument(b, "application/pdf");
84
- const pdfMask =
85
- typeof mergedOptions.mask !== "undefined"
86
- ? mupdf.PDFDocument.openDocument(mergedOptions.mask, "application/pdf")
199
+ const probe = mupdf.PDFDocument.openDocument(a, "application/pdf");
200
+ const probeB = mupdf.PDFDocument.openDocument(b, "application/pdf");
201
+ const probeMask =
202
+ typeof merged.mask !== "undefined"
203
+ ? mupdf.PDFDocument.openDocument(merged.mask, "application/pdf")
87
204
  : new mupdf.PDFDocument();
88
-
89
205
  const maxPages = Math.max(
90
- pdfA.countPages(),
91
- pdfB.countPages(),
92
- pdfMask.countPages(),
206
+ probe.countPages(),
207
+ probeB.countPages(),
208
+ probeMask.countPages(),
93
209
  );
210
+ probe.destroy();
211
+ probeB.destroy();
212
+ probeMask.destroy();
213
+
214
+ if (maxPages === 0) return;
94
215
 
95
- async function processPage(pageIndex: number) {
96
- const [pageA, pageB, pageMask] = await Promise.all([
97
- pageIndex < pdfA.countPages()
98
- ? pageToImage(
99
- pdfA.loadPage(pageIndex),
100
- mergedOptions.dpi,
101
- mergedOptions.alpha,
102
- )
103
- : createEmptyImage(1, 1),
104
- pageIndex < pdfB.countPages()
105
- ? pageToImage(
106
- pdfB.loadPage(pageIndex),
107
- mergedOptions.dpi,
108
- mergedOptions.alpha,
109
- )
110
- : createEmptyImage(1, 1),
111
- pageIndex < pdfMask.countPages()
112
- ? pageToImage(
113
- pdfMask.loadPage(pageIndex),
114
- mergedOptions.dpi,
115
- mergedOptions.alpha,
116
- )
117
- : createEmptyImage(1, 1),
118
- ]);
119
-
120
- // NOTE: getBufferはcopyなので、Workerに移譲した後もa, bを使用して問題ない
121
- // https://github.com/jimp-dev/jimp/blob/b6b0e418a5f1259211a133b20cddb4f4e5c25679/packages/core/src/index.ts#L444
122
- const [bufA, bufB, bufMask] = await Promise.all([
123
- pageA
124
- .getBuffer(jimp.JimpMime.png)
125
- .then((buf) => new Uint8Array(buf).buffer),
126
- pageB
127
- .getBuffer(jimp.JimpMime.png)
128
- .then((buf) => new Uint8Array(buf).buffer),
129
- pageMask
130
- .getBuffer(jimp.JimpMime.png)
131
- .then((buf) => new Uint8Array(buf).buffer),
132
- ]);
133
-
134
- const { bufDiff, addition, deletion, modification } = (await new Promise(
135
- (resolve, reject) => {
136
- const url = new URL("./worker.js", import.meta.url);
137
- const worker = new Worker(url, { type: "module" });
138
- worker.addEventListener("message", (e) => {
139
- resolve(e.data);
140
- worker.terminate();
141
- });
142
- worker.addEventListener("error", (e) => {
143
- reject(e);
144
- worker.terminate();
145
- });
146
- worker.postMessage(
147
- {
148
- bufA,
149
- bufB,
150
- bufMask,
151
- pallet: mergedOptions.pallet,
152
- align: mergedOptions.align,
153
- },
154
- [bufA, bufB, bufMask],
155
- );
156
- },
157
- )) as {
158
- bufDiff: ArrayBuffer;
159
- addition: [number, number][];
160
- deletion: [number, number][];
161
- modification: [number, number][];
162
- };
163
- const diff = await jimp.Jimp.fromBuffer(bufDiff);
164
- return { a: pageA, b: pageB, diff, addition, deletion, modification };
216
+ const aBytes = asSharedBytes(a);
217
+ const bBytes = asSharedBytes(b);
218
+ const maskBytes =
219
+ typeof merged.mask !== "undefined" ? asSharedBytes(merged.mask) : null;
220
+
221
+ const initMsg: InitMessage = {
222
+ type: "init",
223
+ aBytes,
224
+ bBytes,
225
+ maskBytes,
226
+ dpi: merged.dpi,
227
+ alpha: merged.alpha,
228
+ pallet: merged.pallet,
229
+ align: merged.align,
230
+ };
231
+
232
+ const N = Math.max(1, Math.min(merged.workers, maxPages));
233
+ const url = workerUrl();
234
+ const worker0 = new WorkerHandle(url);
235
+ await worker0.init(initMsg);
236
+
237
+ const buffered = new Map<number, Result>();
238
+ let nextToAssign = 0;
239
+
240
+ const workers: WorkerHandle[] = [worker0];
241
+ for (let i = 1; i < N; i++) {
242
+ const w = new WorkerHandle(url);
243
+ await w.init(initMsg);
244
+ workers.push(w);
165
245
  }
166
246
 
167
- // ページ処理を並列発行し、順序を保証して出力
168
- const concurrency = navigator.hardwareConcurrency;
169
- const pending = /** @type {Promise<VisualizeDifferencesResult>[]} */ [];
170
- let nextPageToProcess = 0;
171
- let nextPageToYield = 0;
172
-
173
- while (nextPageToYield < maxPages) {
174
- // プールに空きがあれば新しいページ処理を追加
175
- while (nextPageToProcess < maxPages && pending.length < concurrency) {
176
- pending.push(processPage(nextPageToProcess));
177
- nextPageToProcess++;
247
+ const resolvers = new Map<number, (r: Result) => void>();
248
+ let workerError: unknown = null;
249
+
250
+ const loops = workers.map(async (w) => {
251
+ while (nextToAssign < maxPages && workerError === null) {
252
+ const idx = nextToAssign++;
253
+ try {
254
+ const msg = await w.processPage(idx);
255
+ const result = pageResultToResult(msg);
256
+ const resolve = resolvers.get(idx);
257
+ if (resolve) {
258
+ resolvers.delete(idx);
259
+ resolve(result);
260
+ } else {
261
+ buffered.set(idx, result);
262
+ }
263
+ } catch (e) {
264
+ workerError = e;
265
+ for (const [, resolve] of resolvers) resolve(null as never);
266
+ resolvers.clear();
267
+ return;
268
+ }
178
269
  }
270
+ });
179
271
 
180
- // 次に出力すべきページのPromiseを待つ
181
- const result = await pending[0];
182
- pending.shift();
183
- yield result as Result;
184
- nextPageToYield++;
272
+ try {
273
+ for (let i = 0; i < maxPages; i++) {
274
+ if (workerError !== null) throw workerError;
275
+ let r: Result;
276
+ const buf = buffered.get(i);
277
+ if (buf !== undefined) {
278
+ buffered.delete(i);
279
+ r = buf;
280
+ } else {
281
+ r = await new Promise<Result>((resolve) => resolvers.set(i, resolve));
282
+ if (workerError !== null) throw workerError;
283
+ }
284
+ yield r;
285
+ }
286
+ await Promise.all(loops);
287
+ } finally {
288
+ for (const w of workers) w.terminate();
185
289
  }
186
290
  }
package/src/jimp.ts CHANGED
@@ -4,6 +4,7 @@ export type JimpInstance = Pick<
4
4
  jimp.JimpInstance,
5
5
  | "width"
6
6
  | "height"
7
+ | "bitmap"
7
8
  | "getPixelColor"
8
9
  | "setPixelColor"
9
10
  | "resize"
package/src/pdf.ts CHANGED
@@ -18,12 +18,42 @@
18
18
  import * as jimp from "jimp";
19
19
  import * as mupdf from "mupdf";
20
20
 
21
+ import type { JimpInstance } from "./jimp.ts";
22
+
21
23
  export function* loadPages(pdf: mupdf.Document) {
22
24
  for (let i = 0; i < pdf.countPages(); i++) {
23
25
  yield pdf.loadPage(i);
24
26
  }
25
27
  }
26
28
 
29
+ function pixmapToRGBA(pixmap: mupdf.Pixmap): Uint8Array {
30
+ const width = pixmap.getWidth();
31
+ const height = pixmap.getHeight();
32
+ const stride = pixmap.getStride();
33
+ const hasAlpha = pixmap.getAlpha() !== 0;
34
+ const samples = pixmap.getPixels();
35
+
36
+ if (hasAlpha && stride === width * 4) {
37
+ return new Uint8Array(samples);
38
+ }
39
+
40
+ const out = new Uint8Array(width * height * 4);
41
+ const srcBpp = pixmap.getNumberOfComponents() + (hasAlpha ? 1 : 0);
42
+ for (let y = 0; y < height; y++) {
43
+ const srcRow = y * stride;
44
+ const dstRow = y * width * 4;
45
+ for (let x = 0; x < width; x++) {
46
+ const s = srcRow + x * srcBpp;
47
+ const d = dstRow + x * 4;
48
+ out[d] = samples[s]!;
49
+ out[d + 1] = samples[s + 1]!;
50
+ out[d + 2] = samples[s + 2]!;
51
+ out[d + 3] = hasAlpha ? samples[s + 3]! : 255;
52
+ }
53
+ }
54
+ return out;
55
+ }
56
+
27
57
  export async function pageToImage(
28
58
  page: mupdf.Page,
29
59
  dpi: number,
@@ -35,8 +65,10 @@ export async function pageToImage(
35
65
  mupdf.ColorSpace.DeviceRGB,
36
66
  alpha,
37
67
  );
38
- const ret = await jimp.Jimp.fromBuffer(new Uint8Array(pixmap.asPNG()).buffer);
68
+ const width = pixmap.getWidth();
69
+ const height = pixmap.getHeight();
70
+ const data = pixmapToRGBA(pixmap);
39
71
  pixmap.destroy();
40
72
  page.destroy();
41
- return ret;
73
+ return jimp.Jimp.fromBitmap({ width, height, data }) as JimpInstance;
42
74
  }
package/src/worker.ts CHANGED
@@ -15,48 +15,166 @@
15
15
  * along with this program. If not, see <https://www.gnu.org/licenses/>.
16
16
  */
17
17
 
18
- import * as jimp from "jimp";
18
+ import * as mupdf from "mupdf";
19
19
 
20
- import { drawDifference, type Pallet } from "./diff.js";
21
- import { composeLayers, type AlignStrategy } from "./image.js";
22
- import type { JimpInstance } from "./jimp.js";
20
+ import { drawDifference, type Pallet } from "./diff.ts";
21
+ import {
22
+ composeLayers,
23
+ createEmptyImage,
24
+ type AlignStrategy,
25
+ } from "./image.ts";
26
+ import type { JimpInstance } from "./jimp.ts";
27
+ import { pageToImage } from "./pdf.ts";
28
+
29
+ export type InitMessage = {
30
+ type: "init";
31
+ aBytes: Uint8Array;
32
+ bBytes: Uint8Array;
33
+ maskBytes: Uint8Array | null;
34
+ dpi: number;
35
+ alpha: boolean;
36
+ pallet: Pallet;
37
+ align: AlignStrategy;
38
+ };
39
+
40
+ export type PageMessage = {
41
+ type: "page";
42
+ index: number;
43
+ };
44
+
45
+ export type LoadedMessage = {
46
+ type: "loaded";
47
+ };
48
+
49
+ export type ReadyMessage = {
50
+ type: "ready";
51
+ };
52
+
53
+ export type PageResultMessage = {
54
+ type: "pageResult";
55
+ index: number;
56
+ a: { width: number; height: number; data: ArrayBuffer };
57
+ b: { width: number; height: number; data: ArrayBuffer };
58
+ diff: { width: number; height: number; data: ArrayBuffer };
59
+ addition: [number, number][];
60
+ deletion: [number, number][];
61
+ modification: [number, number][];
62
+ };
63
+
64
+ export type ErrorMessage = {
65
+ type: "error";
66
+ message: string;
67
+ };
68
+
69
+ let pdfA: mupdf.Document;
70
+ let pdfB: mupdf.Document;
71
+ let pdfMask: mupdf.Document;
72
+ let opts: {
73
+ dpi: number;
74
+ alpha: boolean;
75
+ pallet: Pallet;
76
+ align: AlignStrategy;
77
+ };
78
+
79
+ function toTransferable(
80
+ src: Buffer | Uint8Array | Uint8ClampedArray | number[],
81
+ ): ArrayBuffer {
82
+ const view =
83
+ src instanceof Uint8Array || src instanceof Uint8ClampedArray
84
+ ? src
85
+ : Uint8Array.from(src as ArrayLike<number>);
86
+ const out = new ArrayBuffer(view.byteLength);
87
+ new Uint8Array(out).set(view);
88
+ return out;
89
+ }
90
+
91
+ async function processPage(index: number): Promise<PageResultMessage> {
92
+ const [pageA, pageB, pageMask] = (await Promise.all([
93
+ index < pdfA.countPages()
94
+ ? pageToImage(pdfA.loadPage(index), opts.dpi, opts.alpha)
95
+ : createEmptyImage(1, 1),
96
+ index < pdfB.countPages()
97
+ ? pageToImage(pdfB.loadPage(index), opts.dpi, opts.alpha)
98
+ : createEmptyImage(1, 1),
99
+ index < pdfMask.countPages()
100
+ ? pageToImage(pdfMask.loadPage(index), opts.dpi, opts.alpha)
101
+ : createEmptyImage(1, 1),
102
+ ])) as [JimpInstance, JimpInstance, JimpInstance];
103
+
104
+ const {
105
+ diff: diffLayer,
106
+ addition,
107
+ deletion,
108
+ modification,
109
+ } = drawDifference(pageA, pageB, pageMask, opts.pallet, opts.align);
110
+ const diff = composeLayers(pageA.width, pageA.height, [
111
+ [pageA, 0.2],
112
+ [pageB, 0.2],
113
+ [diffLayer, 1],
114
+ ]);
115
+
116
+ return {
117
+ type: "pageResult",
118
+ index,
119
+ a: {
120
+ width: pageA.width,
121
+ height: pageA.height,
122
+ data: toTransferable(pageA.bitmap.data),
123
+ },
124
+ b: {
125
+ width: pageB.width,
126
+ height: pageB.height,
127
+ data: toTransferable(pageB.bitmap.data),
128
+ },
129
+ diff: {
130
+ width: diff.width,
131
+ height: diff.height,
132
+ data: toTransferable(diff.bitmap.data),
133
+ },
134
+ addition,
135
+ deletion,
136
+ modification,
137
+ };
138
+ }
23
139
 
24
140
  self.addEventListener(
25
141
  "message",
26
- async (
27
- e: MessageEvent<{
28
- bufA: ArrayBuffer;
29
- bufB: ArrayBuffer;
30
- bufMask: ArrayBuffer;
31
- pallet: Pallet;
32
- align: AlignStrategy;
33
- }>,
34
- ) => {
35
- const { bufA, bufB, bufMask, pallet, align } = e.data;
36
- const a = (await jimp.Jimp.fromBuffer(bufA)) as JimpInstance;
37
- const b = (await jimp.Jimp.fromBuffer(bufB)) as JimpInstance;
38
- const mask = (await jimp.Jimp.fromBuffer(bufMask)) as JimpInstance;
39
- const {
40
- diff: diffLayer,
41
- addition,
42
- deletion,
43
- modification,
44
- } = drawDifference(a, b, mask, pallet, align);
45
- const diff = composeLayers(a.width, a.height, [
46
- [a, 0.2],
47
- [b, 0.2],
48
- [diffLayer, 1],
49
- ]);
50
- const bufDiff = new Uint8Array(await diff.getBuffer(jimp.JimpMime.png))
51
- .buffer;
52
- self.postMessage(
53
- {
54
- bufDiff,
55
- addition,
56
- deletion,
57
- modification,
58
- },
59
- [bufDiff],
60
- );
142
+ async (e: MessageEvent<InitMessage | PageMessage>) => {
143
+ try {
144
+ const msg = e.data;
145
+ if (msg.type === "init") {
146
+ pdfA = mupdf.PDFDocument.openDocument(msg.aBytes, "application/pdf");
147
+ pdfB = mupdf.PDFDocument.openDocument(msg.bBytes, "application/pdf");
148
+ pdfMask = msg.maskBytes
149
+ ? mupdf.PDFDocument.openDocument(msg.maskBytes, "application/pdf")
150
+ : new mupdf.PDFDocument();
151
+ opts = {
152
+ dpi: msg.dpi,
153
+ alpha: msg.alpha,
154
+ pallet: msg.pallet,
155
+ align: msg.align,
156
+ };
157
+ if (pdfA.countPages() > 0) pdfA.loadPage(0).destroy();
158
+ const ready: ReadyMessage = { type: "ready" };
159
+ self.postMessage(ready);
160
+ } else if (msg.type === "page") {
161
+ const result = await processPage(msg.index);
162
+ self.postMessage(result, [
163
+ result.a.data,
164
+ result.b.data,
165
+ result.diff.data,
166
+ ]);
167
+ }
168
+ } catch (err) {
169
+ const errorMsg: ErrorMessage = {
170
+ type: "error",
171
+ message:
172
+ err instanceof Error ? `${err.message}\n${err.stack}` : String(err),
173
+ };
174
+ self.postMessage(errorMsg);
175
+ }
61
176
  },
62
177
  );
178
+
179
+ const loaded: LoadedMessage = { type: "loaded" };
180
+ self.postMessage(loaded);