pdf-diff-viewer 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +25 -0
- package/LICENSE +21 -0
- package/README.md +302 -0
- package/bin/cli.js +2 -0
- package/package.json +63 -0
- package/public/app.js +588 -0
- package/public/index.html +39 -0
- package/server.js +20 -0
- package/src/PDFDiffViewer.js +638 -0
|
@@ -0,0 +1,638 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* PDFDiffViewer - Embeddable PDF Comparison Library
|
|
3
|
+
* A framework-agnostic library for visual PDF comparison
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
class PDFDiffViewer {
|
|
7
|
+
constructor(container, options = {}) {
|
|
8
|
+
if (typeof container === 'string') {
|
|
9
|
+
this.container = document.querySelector(container);
|
|
10
|
+
} else {
|
|
11
|
+
this.container = container;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
if (!this.container) {
|
|
15
|
+
throw new Error('Container element not found');
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
// Configuration
|
|
19
|
+
this.options = {
|
|
20
|
+
scale: options.scale || 3.0,
|
|
21
|
+
maxShift: options.maxShift || 3,
|
|
22
|
+
dilationRadius: options.dilationRadius || 0,
|
|
23
|
+
colorTolerance: options.colorTolerance || 120,
|
|
24
|
+
minHighlightArea: options.minHighlightArea || 60,
|
|
25
|
+
minWordSize: options.minWordSize || 8,
|
|
26
|
+
highlightAlpha: options.highlightAlpha || 0.32,
|
|
27
|
+
labelA: options.labelA || 'Document A',
|
|
28
|
+
labelB: options.labelB || 'Document B',
|
|
29
|
+
workerSrc: options.workerSrc || 'https://cdnjs.cloudflare.com/ajax/libs/pdf.js/3.11.174/pdf.worker.min.js',
|
|
30
|
+
showPageNumbers: options.showPageNumbers !== false,
|
|
31
|
+
cropRegions: options.cropRegions || [],
|
|
32
|
+
maskRegions: options.maskRegions || []
|
|
33
|
+
};
|
|
34
|
+
|
|
35
|
+
// Check if PDF.js is loaded
|
|
36
|
+
if (typeof pdfjsLib === 'undefined') {
|
|
37
|
+
throw new Error('PDF.js library not found. Please include PDF.js before using PDFDiffViewer.');
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
pdfjsLib.GlobalWorkerOptions.workerSrc = this.options.workerSrc;
|
|
41
|
+
|
|
42
|
+
this.results = null;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Compare two PDF files and render the results
|
|
47
|
+
* @param {File|ArrayBuffer|Uint8Array} pdfA - First PDF
|
|
48
|
+
* @param {File|ArrayBuffer|Uint8Array} pdfB - Second PDF
|
|
49
|
+
* @returns {Promise<Object>} Comparison results
|
|
50
|
+
*/
|
|
51
|
+
async compare(pdfA, pdfB) {
|
|
52
|
+
// Convert Files to ArrayBuffers if needed
|
|
53
|
+
const bufferA = await this._toArrayBuffer(pdfA);
|
|
54
|
+
const bufferB = await this._toArrayBuffer(pdfB);
|
|
55
|
+
|
|
56
|
+
// Load PDFs
|
|
57
|
+
const docA = await pdfjsLib.getDocument({ data: bufferA }).promise;
|
|
58
|
+
const docB = await pdfjsLib.getDocument({ data: bufferB }).promise;
|
|
59
|
+
|
|
60
|
+
if (docA.numPages !== docB.numPages) {
|
|
61
|
+
throw new Error(`Page count mismatch: ${docA.numPages} vs ${docB.numPages}`);
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
// Clear previous results
|
|
65
|
+
this.container.innerHTML = '';
|
|
66
|
+
|
|
67
|
+
let totalDiffPixels = 0;
|
|
68
|
+
const pageResults = [];
|
|
69
|
+
|
|
70
|
+
// Create summary element
|
|
71
|
+
const summaryDiv = document.createElement('div');
|
|
72
|
+
summaryDiv.className = 'pdf-diff-summary';
|
|
73
|
+
this.container.appendChild(summaryDiv);
|
|
74
|
+
|
|
75
|
+
// Process each page
|
|
76
|
+
for (let i = 1; i <= docA.numPages; i++) {
|
|
77
|
+
const pageResult = await this._comparePage(docA, docB, i);
|
|
78
|
+
pageResults.push(pageResult);
|
|
79
|
+
totalDiffPixels += pageResult.diffPixels;
|
|
80
|
+
|
|
81
|
+
this._renderPageComparison(pageResult, i);
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
this.results = {
|
|
85
|
+
totalPages: docA.numPages,
|
|
86
|
+
totalDiffPixels,
|
|
87
|
+
pageResults
|
|
88
|
+
};
|
|
89
|
+
|
|
90
|
+
// Update summary
|
|
91
|
+
if (this.options.showPageNumbers) {
|
|
92
|
+
summaryDiv.innerHTML = `<h3>Comparison Results: ${docA.numPages} page(s)</h3>`;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
return this.results;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
/**
|
|
99
|
+
* Get the comparison results
|
|
100
|
+
*/
|
|
101
|
+
getResults() {
|
|
102
|
+
return this.results;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
/**
|
|
106
|
+
* Clear the viewer and reset
|
|
107
|
+
*/
|
|
108
|
+
clear() {
|
|
109
|
+
this.container.innerHTML = '';
|
|
110
|
+
this.results = null;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
/**
|
|
114
|
+
* Destroy the viewer and clean up
|
|
115
|
+
*/
|
|
116
|
+
destroy() {
|
|
117
|
+
this.clear();
|
|
118
|
+
this.container = null;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
// ===== PRIVATE METHODS =====
|
|
122
|
+
|
|
123
|
+
async _toArrayBuffer(input) {
|
|
124
|
+
if (input instanceof ArrayBuffer) {
|
|
125
|
+
return input;
|
|
126
|
+
}
|
|
127
|
+
if (input instanceof Uint8Array) {
|
|
128
|
+
return input.buffer;
|
|
129
|
+
}
|
|
130
|
+
if (input instanceof File || input instanceof Blob) {
|
|
131
|
+
return await input.arrayBuffer();
|
|
132
|
+
}
|
|
133
|
+
throw new Error('Unsupported input type. Expected File, Blob, ArrayBuffer, or Uint8Array.');
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
async _comparePage(docA, docB, pageNum) {
|
|
137
|
+
const canvasA = document.createElement('canvas');
|
|
138
|
+
const canvasB = document.createElement('canvas');
|
|
139
|
+
|
|
140
|
+
const { words: wordsA } = await this._renderPageToCanvas(docA, pageNum, canvasA);
|
|
141
|
+
const { words: wordsB } = await this._renderPageToCanvas(docB, pageNum, canvasB);
|
|
142
|
+
|
|
143
|
+
const pageCrop = this.options.cropRegions.find(r => r.page === pageNum);
|
|
144
|
+
const croppedWordsA = this._offsetWordBoxes(wordsA, pageCrop);
|
|
145
|
+
const croppedWordsB = this._offsetWordBoxes(wordsB, pageCrop);
|
|
146
|
+
|
|
147
|
+
const croppedA = this._applyCrop(canvasA, pageCrop);
|
|
148
|
+
const croppedB = this._applyCrop(canvasB, pageCrop);
|
|
149
|
+
const targetWidth = Math.max(croppedA.width, croppedB.width);
|
|
150
|
+
const targetHeight = Math.max(croppedA.height, croppedB.height);
|
|
151
|
+
|
|
152
|
+
const paddedA = this._padCanvas(croppedA, targetWidth, targetHeight);
|
|
153
|
+
const paddedB = this._padCanvas(croppedB, targetWidth, targetHeight);
|
|
154
|
+
|
|
155
|
+
const highlightCanvasB = document.createElement('canvas');
|
|
156
|
+
highlightCanvasB.width = targetWidth;
|
|
157
|
+
highlightCanvasB.height = targetHeight;
|
|
158
|
+
|
|
159
|
+
const imgA = this._canvasToImageData(paddedA);
|
|
160
|
+
const imgB = this._canvasToImageData(paddedB);
|
|
161
|
+
|
|
162
|
+
const canvasDiff = document.createElement('canvas');
|
|
163
|
+
canvasDiff.width = targetWidth;
|
|
164
|
+
canvasDiff.height = targetHeight;
|
|
165
|
+
|
|
166
|
+
const ctxDiff = canvasDiff.getContext('2d');
|
|
167
|
+
const diffImage = ctxDiff.createImageData(imgA.width, imgA.height);
|
|
168
|
+
|
|
169
|
+
// Find best alignment
|
|
170
|
+
const best = this._findBestOffset(imgA, paddedB, imgA.width, imgA.height, this.options.colorTolerance);
|
|
171
|
+
const shiftedB = this._getShiftedImageData(paddedB, imgA.width, imgA.height, best.dx, best.dy);
|
|
172
|
+
|
|
173
|
+
const diffPixels = this._buildDiffImage(imgA, shiftedB, diffImage, this.options.colorTolerance);
|
|
174
|
+
|
|
175
|
+
// Apply masks
|
|
176
|
+
const pageMasks = this.options.maskRegions.filter(r => r.page === pageNum);
|
|
177
|
+
this._applyMasks(diffImage, pageMasks);
|
|
178
|
+
|
|
179
|
+
// Dilate diff mask
|
|
180
|
+
this._dilateDiffMask(diffImage, imgA.width, imgA.height, this.options.dilationRadius);
|
|
181
|
+
|
|
182
|
+
const boxes = this._extractDiffBoxes(diffImage, imgA.width, imgA.height, this.options.minHighlightArea);
|
|
183
|
+
|
|
184
|
+
const wordHighlightsA = this._mapDiffsToWordBoxes(boxes, croppedWordsA);
|
|
185
|
+
this._drawHighlightBoxes(ctxDiff, wordHighlightsA, 'red');
|
|
186
|
+
|
|
187
|
+
const boxesForB = this._translateBoxes(boxes, -best.dx, -best.dy);
|
|
188
|
+
const wordHighlightsB = this._mapDiffsToWordBoxes(boxesForB, croppedWordsB);
|
|
189
|
+
const highlightCtxB = highlightCanvasB.getContext('2d');
|
|
190
|
+
this._drawHighlightBoxes(highlightCtxB, wordHighlightsB, 'green');
|
|
191
|
+
|
|
192
|
+
// Create overlays
|
|
193
|
+
const overlayOnA = this._overlayDiff(paddedA, canvasDiff);
|
|
194
|
+
const overlayOnB = this._overlayDiff(paddedB, highlightCanvasB);
|
|
195
|
+
|
|
196
|
+
return {
|
|
197
|
+
pageNum,
|
|
198
|
+
diffPixels,
|
|
199
|
+
overlayA: overlayOnA.toDataURL('image/png'),
|
|
200
|
+
overlayB: overlayOnB.toDataURL('image/png'),
|
|
201
|
+
alignment: { dx: best.dx, dy: best.dy }
|
|
202
|
+
};
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
_renderPageComparison(pageResult, pageNum) {
|
|
206
|
+
const pageDiv = document.createElement('div');
|
|
207
|
+
pageDiv.className = 'pdf-diff-page';
|
|
208
|
+
|
|
209
|
+
if (this.options.showPageNumbers) {
|
|
210
|
+
const title = document.createElement('h4');
|
|
211
|
+
title.innerText = `Page ${pageNum}`;
|
|
212
|
+
title.style.marginTop = '20px';
|
|
213
|
+
pageDiv.appendChild(title);
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
const row = document.createElement('div');
|
|
217
|
+
row.style.display = 'grid';
|
|
218
|
+
row.style.gridTemplateColumns = '1fr 1fr';
|
|
219
|
+
row.style.gap = '15px';
|
|
220
|
+
row.style.marginBottom = '25px';
|
|
221
|
+
row.style.borderTop = '2px solid #ddd';
|
|
222
|
+
row.style.paddingTop = '15px';
|
|
223
|
+
|
|
224
|
+
const colA = this._makeColumn(this.options.labelA, pageResult.overlayA);
|
|
225
|
+
const colB = this._makeColumn(this.options.labelB, pageResult.overlayB);
|
|
226
|
+
|
|
227
|
+
row.appendChild(colA);
|
|
228
|
+
row.appendChild(colB);
|
|
229
|
+
pageDiv.appendChild(row);
|
|
230
|
+
|
|
231
|
+
this.container.appendChild(pageDiv);
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
_makeColumn(labelText, imageSrc) {
|
|
235
|
+
const col = document.createElement('div');
|
|
236
|
+
const label = document.createElement('div');
|
|
237
|
+
label.innerHTML = `<b>${labelText}</b>`;
|
|
238
|
+
|
|
239
|
+
const img = document.createElement('img');
|
|
240
|
+
img.src = imageSrc;
|
|
241
|
+
img.style.width = '100%';
|
|
242
|
+
img.style.border = '1px solid #ccc';
|
|
243
|
+
img.style.imageRendering = 'crisp-edges';
|
|
244
|
+
img.style.backgroundColor = '#fff';
|
|
245
|
+
|
|
246
|
+
col.appendChild(label);
|
|
247
|
+
col.appendChild(img);
|
|
248
|
+
return col;
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
async _renderPageToCanvas(pdf, pageNum, canvas) {
|
|
252
|
+
const page = await pdf.getPage(pageNum);
|
|
253
|
+
const viewport = page.getViewport({ scale: this.options.scale });
|
|
254
|
+
|
|
255
|
+
canvas.width = Math.floor(viewport.width);
|
|
256
|
+
canvas.height = Math.floor(viewport.height);
|
|
257
|
+
|
|
258
|
+
const ctx = canvas.getContext('2d');
|
|
259
|
+
ctx.clearRect(0, 0, canvas.width, canvas.height);
|
|
260
|
+
ctx.imageSmoothingEnabled = false;
|
|
261
|
+
|
|
262
|
+
await page.render({ canvasContext: ctx, viewport }).promise;
|
|
263
|
+
|
|
264
|
+
const words = await this._extractWordBoxes(page, viewport);
|
|
265
|
+
return { words };
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
async _extractWordBoxes(page, viewport) {
|
|
269
|
+
const textContent = await page.getTextContent({ normalizeWhitespace: true });
|
|
270
|
+
const boxes = [];
|
|
271
|
+
|
|
272
|
+
textContent.items.forEach((item) => {
|
|
273
|
+
const text = (item.str || '').trim();
|
|
274
|
+
if (!text) return;
|
|
275
|
+
|
|
276
|
+
const transform = pdfjsLib.Util.transform(viewport.transform, item.transform);
|
|
277
|
+
const x = transform[4];
|
|
278
|
+
const y = transform[5];
|
|
279
|
+
|
|
280
|
+
const width = (item.width || 0) * viewport.scale;
|
|
281
|
+
const glyphHeight = Math.hypot(transform[2], transform[3]);
|
|
282
|
+
const height = glyphHeight || ((item.height || 0) * viewport.scale);
|
|
283
|
+
|
|
284
|
+
if (!width || !height) return;
|
|
285
|
+
|
|
286
|
+
const charWidth = width / text.length;
|
|
287
|
+
if (!isFinite(charWidth) || charWidth <= 0) return;
|
|
288
|
+
|
|
289
|
+
const baseY = y - height;
|
|
290
|
+
let cursorX = x;
|
|
291
|
+
|
|
292
|
+
text.split(/(\s+)/).forEach((segment) => {
|
|
293
|
+
if (!segment) return;
|
|
294
|
+
|
|
295
|
+
const segmentWidth = charWidth * segment.length;
|
|
296
|
+
if (!segment.trim()) {
|
|
297
|
+
cursorX += segmentWidth;
|
|
298
|
+
return;
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
const paddingX = charWidth * 0.18;
|
|
302
|
+
const paddingY = height * 0.15;
|
|
303
|
+
const box = this._padBox({
|
|
304
|
+
x: cursorX,
|
|
305
|
+
y: baseY,
|
|
306
|
+
width: segmentWidth,
|
|
307
|
+
height
|
|
308
|
+
}, paddingX, paddingY);
|
|
309
|
+
|
|
310
|
+
boxes.push(box);
|
|
311
|
+
cursorX += segmentWidth;
|
|
312
|
+
});
|
|
313
|
+
});
|
|
314
|
+
|
|
315
|
+
return boxes;
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
_padCanvas(srcCanvas, targetWidth, targetHeight) {
|
|
319
|
+
if (srcCanvas.width === targetWidth && srcCanvas.height === targetHeight) {
|
|
320
|
+
return srcCanvas;
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
const padded = document.createElement('canvas');
|
|
324
|
+
padded.width = targetWidth;
|
|
325
|
+
padded.height = targetHeight;
|
|
326
|
+
|
|
327
|
+
const ctx = padded.getContext('2d');
|
|
328
|
+
ctx.fillStyle = 'white';
|
|
329
|
+
ctx.fillRect(0, 0, targetWidth, targetHeight);
|
|
330
|
+
ctx.drawImage(srcCanvas, 0, 0);
|
|
331
|
+
return padded;
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
_getShiftedImageData(srcCanvas, width, height, dx, dy) {
|
|
335
|
+
const temp = document.createElement('canvas');
|
|
336
|
+
temp.width = width;
|
|
337
|
+
temp.height = height;
|
|
338
|
+
const ctx = temp.getContext('2d');
|
|
339
|
+
ctx.fillStyle = 'white';
|
|
340
|
+
ctx.fillRect(0, 0, width, height);
|
|
341
|
+
ctx.drawImage(srcCanvas, dx, dy);
|
|
342
|
+
return ctx.getImageData(0, 0, width, height);
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
_pixelDelta(dataA, dataB, index) {
|
|
346
|
+
return Math.abs(dataA[index] - dataB[index]) +
|
|
347
|
+
Math.abs(dataA[index + 1] - dataB[index + 1]) +
|
|
348
|
+
Math.abs(dataA[index + 2] - dataB[index + 2]);
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
_countDiffPixels(imgA, imgB, tolerance) {
|
|
352
|
+
const dataA = imgA.data;
|
|
353
|
+
const dataB = imgB.data;
|
|
354
|
+
let diff = 0;
|
|
355
|
+
for (let i = 0; i < dataA.length; i += 4) {
|
|
356
|
+
if (this._pixelDelta(dataA, dataB, i) > tolerance) {
|
|
357
|
+
diff++;
|
|
358
|
+
}
|
|
359
|
+
}
|
|
360
|
+
return diff;
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
_findBestOffset(imgA, paddedB, width, height, tolerance) {
|
|
364
|
+
let best = { diff: Infinity, dx: 0, dy: 0 };
|
|
365
|
+
for (let dy = -this.options.maxShift; dy <= this.options.maxShift; dy++) {
|
|
366
|
+
for (let dx = -this.options.maxShift; dx <= this.options.maxShift; dx++) {
|
|
367
|
+
const shiftedB = this._getShiftedImageData(paddedB, width, height, dx, dy);
|
|
368
|
+
const diffCount = this._countDiffPixels(imgA, shiftedB, tolerance);
|
|
369
|
+
if (diffCount < best.diff) {
|
|
370
|
+
best = { diff: diffCount, dx, dy };
|
|
371
|
+
}
|
|
372
|
+
}
|
|
373
|
+
}
|
|
374
|
+
return best;
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
_buildDiffImage(imgA, imgB, diffImage, tolerance) {
|
|
378
|
+
const target = diffImage.data;
|
|
379
|
+
const dataA = imgA.data;
|
|
380
|
+
const dataB = imgB.data;
|
|
381
|
+
let diffPixels = 0;
|
|
382
|
+
|
|
383
|
+
for (let i = 0; i < dataA.length; i += 4) {
|
|
384
|
+
if (this._pixelDelta(dataA, dataB, i) > tolerance) {
|
|
385
|
+
target[i] = 255;
|
|
386
|
+
target[i + 1] = 0;
|
|
387
|
+
target[i + 2] = 0;
|
|
388
|
+
target[i + 3] = 255;
|
|
389
|
+
diffPixels++;
|
|
390
|
+
} else {
|
|
391
|
+
target[i] = 0;
|
|
392
|
+
target[i + 1] = 0;
|
|
393
|
+
target[i + 2] = 0;
|
|
394
|
+
target[i + 3] = 0;
|
|
395
|
+
}
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
return diffPixels;
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
_dilateDiffMask(diffImage, width, height, radius = 0) {
|
|
402
|
+
const src = diffImage.data;
|
|
403
|
+
const mask = new Uint8Array(width * height);
|
|
404
|
+
|
|
405
|
+
for (let i = 0; i < width * height; i++) {
|
|
406
|
+
if (src[i * 4] > 0) {
|
|
407
|
+
mask[i] = 1;
|
|
408
|
+
}
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
const expanded = new Uint8Array(mask);
|
|
412
|
+
if (radius > 0) {
|
|
413
|
+
for (let y = 0; y < height; y++) {
|
|
414
|
+
for (let x = 0; x < width; x++) {
|
|
415
|
+
const idx = y * width + x;
|
|
416
|
+
if (!mask[idx]) continue;
|
|
417
|
+
const yMin = Math.max(0, y - radius);
|
|
418
|
+
const yMax = Math.min(height - 1, y + radius);
|
|
419
|
+
const xMin = Math.max(0, x - radius);
|
|
420
|
+
const xMax = Math.min(width - 1, x + radius);
|
|
421
|
+
for (let ny = yMin; ny <= yMax; ny++) {
|
|
422
|
+
for (let nx = xMin; nx <= xMax; nx++) {
|
|
423
|
+
expanded[ny * width + nx] = 1;
|
|
424
|
+
}
|
|
425
|
+
}
|
|
426
|
+
}
|
|
427
|
+
}
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
for (let i = 0; i < width * height; i++) {
|
|
431
|
+
const base = i * 4;
|
|
432
|
+
if (expanded[i]) {
|
|
433
|
+
src[base] = 255;
|
|
434
|
+
src[base + 1] = 0;
|
|
435
|
+
src[base + 2] = 0;
|
|
436
|
+
src[base + 3] = 255;
|
|
437
|
+
}
|
|
438
|
+
}
|
|
439
|
+
}
|
|
440
|
+
|
|
441
|
+
_extractDiffBoxes(diffImage, width, height, minArea = 25) {
|
|
442
|
+
const data = diffImage.data;
|
|
443
|
+
const visited = new Uint8Array(width * height);
|
|
444
|
+
const boxes = [];
|
|
445
|
+
const directions = [1, -1, width, -width];
|
|
446
|
+
|
|
447
|
+
for (let idx = 0; idx < width * height; idx++) {
|
|
448
|
+
if (visited[idx]) continue;
|
|
449
|
+
if (data[idx * 4 + 3] === 0) continue;
|
|
450
|
+
|
|
451
|
+
let minX = idx % width;
|
|
452
|
+
let maxX = minX;
|
|
453
|
+
let minY = Math.floor(idx / width);
|
|
454
|
+
let maxY = minY;
|
|
455
|
+
|
|
456
|
+
const stack = [idx];
|
|
457
|
+
visited[idx] = 1;
|
|
458
|
+
|
|
459
|
+
while (stack.length) {
|
|
460
|
+
const current = stack.pop();
|
|
461
|
+
const cx = current % width;
|
|
462
|
+
const cy = Math.floor(current / width);
|
|
463
|
+
|
|
464
|
+
if (cx < minX) minX = cx;
|
|
465
|
+
if (cx > maxX) maxX = cx;
|
|
466
|
+
if (cy < minY) minY = cy;
|
|
467
|
+
if (cy > maxY) maxY = cy;
|
|
468
|
+
|
|
469
|
+
for (const dir of directions) {
|
|
470
|
+
const next = current + dir;
|
|
471
|
+
if (next < 0 || next >= width * height) continue;
|
|
472
|
+
const nx = next % width;
|
|
473
|
+
const ny = Math.floor(next / width);
|
|
474
|
+
if (Math.abs(nx - cx) + Math.abs(ny - cy) !== 1) continue;
|
|
475
|
+
if (visited[next]) continue;
|
|
476
|
+
if (data[next * 4 + 3] === 0) continue;
|
|
477
|
+
visited[next] = 1;
|
|
478
|
+
stack.push(next);
|
|
479
|
+
}
|
|
480
|
+
}
|
|
481
|
+
|
|
482
|
+
const area = (maxX - minX + 1) * (maxY - minY + 1);
|
|
483
|
+
if (area >= minArea) {
|
|
484
|
+
boxes.push({
|
|
485
|
+
x: minX,
|
|
486
|
+
y: minY,
|
|
487
|
+
width: maxX - minX + 1,
|
|
488
|
+
height: maxY - minY + 1
|
|
489
|
+
});
|
|
490
|
+
}
|
|
491
|
+
}
|
|
492
|
+
|
|
493
|
+
return boxes;
|
|
494
|
+
}
|
|
495
|
+
|
|
496
|
+
_drawHighlightBoxes(ctx, boxes, color = 'red') {
|
|
497
|
+
ctx.clearRect(0, 0, ctx.canvas.width, ctx.canvas.height);
|
|
498
|
+
const alpha = this.options.highlightAlpha;
|
|
499
|
+
if (color === 'red') {
|
|
500
|
+
ctx.fillStyle = `rgba(255, 0, 0, ${alpha})`;
|
|
501
|
+
} else if (color === 'green') {
|
|
502
|
+
ctx.fillStyle = `rgba(0, 200, 0, ${alpha})`;
|
|
503
|
+
}
|
|
504
|
+
boxes.forEach(({ x, y, width, height }) => {
|
|
505
|
+
ctx.fillRect(x, y, width, height);
|
|
506
|
+
});
|
|
507
|
+
}
|
|
508
|
+
|
|
509
|
+
_rectsIntersect(a, b) {
|
|
510
|
+
return (
|
|
511
|
+
a.x < b.x + b.width &&
|
|
512
|
+
a.x + a.width > b.x &&
|
|
513
|
+
a.y < b.y + b.height &&
|
|
514
|
+
a.y + a.height > b.y
|
|
515
|
+
);
|
|
516
|
+
}
|
|
517
|
+
|
|
518
|
+
_dedupeBoxes(boxes) {
|
|
519
|
+
const seen = new Set();
|
|
520
|
+
const result = [];
|
|
521
|
+
boxes.forEach((box) => {
|
|
522
|
+
const key = [Math.round(box.x), Math.round(box.y), Math.round(box.width), Math.round(box.height)].join(':');
|
|
523
|
+
if (seen.has(key)) return;
|
|
524
|
+
seen.add(key);
|
|
525
|
+
result.push({ x: box.x, y: box.y, width: box.width, height: box.height });
|
|
526
|
+
});
|
|
527
|
+
return result;
|
|
528
|
+
}
|
|
529
|
+
|
|
530
|
+
_padBox(box, paddingX, paddingY) {
|
|
531
|
+
const x = Math.max(0, box.x - paddingX);
|
|
532
|
+
const y = Math.max(0, box.y - paddingY);
|
|
533
|
+
return {
|
|
534
|
+
x,
|
|
535
|
+
y,
|
|
536
|
+
width: Math.max(1, box.width + paddingX * 2),
|
|
537
|
+
height: Math.max(1, box.height + paddingY * 2)
|
|
538
|
+
};
|
|
539
|
+
}
|
|
540
|
+
|
|
541
|
+
_mapDiffsToWordBoxes(diffBoxes, wordBoxes) {
|
|
542
|
+
if (!wordBoxes || !wordBoxes.length) {
|
|
543
|
+
return diffBoxes;
|
|
544
|
+
}
|
|
545
|
+
|
|
546
|
+
const matched = [];
|
|
547
|
+
|
|
548
|
+
diffBoxes.forEach((diffBox) => {
|
|
549
|
+
let found = false;
|
|
550
|
+
for (const word of wordBoxes) {
|
|
551
|
+
if (this._rectsIntersect(diffBox, word)) {
|
|
552
|
+
if (word.width >= this.options.minWordSize && word.height >= this.options.minWordSize) {
|
|
553
|
+
matched.push(word);
|
|
554
|
+
found = true;
|
|
555
|
+
}
|
|
556
|
+
}
|
|
557
|
+
}
|
|
558
|
+
if (!found) {
|
|
559
|
+
matched.push(diffBox);
|
|
560
|
+
}
|
|
561
|
+
});
|
|
562
|
+
|
|
563
|
+
return this._dedupeBoxes(matched);
|
|
564
|
+
}
|
|
565
|
+
|
|
566
|
+
_translateBoxes(boxes, dx, dy) {
|
|
567
|
+
return boxes.map((box) => ({
|
|
568
|
+
x: box.x + dx,
|
|
569
|
+
y: box.y + dy,
|
|
570
|
+
width: box.width,
|
|
571
|
+
height: box.height
|
|
572
|
+
}));
|
|
573
|
+
}
|
|
574
|
+
|
|
575
|
+
_offsetWordBoxes(words, crop) {
|
|
576
|
+
if (!crop) return words;
|
|
577
|
+
return words.map(box => ({
|
|
578
|
+
...box,
|
|
579
|
+
x: box.x - crop.x,
|
|
580
|
+
y: box.y - crop.y
|
|
581
|
+
}));
|
|
582
|
+
}
|
|
583
|
+
|
|
584
|
+
_canvasToImageData(canvas) {
|
|
585
|
+
const ctx = canvas.getContext('2d');
|
|
586
|
+
return ctx.getImageData(0, 0, canvas.width, canvas.height);
|
|
587
|
+
}
|
|
588
|
+
|
|
589
|
+
_applyCrop(canvas, region) {
|
|
590
|
+
if (!region) return canvas;
|
|
591
|
+
const cropped = document.createElement('canvas');
|
|
592
|
+
cropped.width = region.width;
|
|
593
|
+
cropped.height = region.height;
|
|
594
|
+
const ctx = cropped.getContext('2d');
|
|
595
|
+
ctx.drawImage(
|
|
596
|
+
canvas,
|
|
597
|
+
region.x, region.y, region.width, region.height,
|
|
598
|
+
0, 0, region.width, region.height
|
|
599
|
+
);
|
|
600
|
+
return cropped;
|
|
601
|
+
}
|
|
602
|
+
|
|
603
|
+
_applyMasks(diffImage, masks) {
|
|
604
|
+
if (!masks?.length) return;
|
|
605
|
+
const data = diffImage.data;
|
|
606
|
+
masks.forEach(({ x, y, width, height }) => {
|
|
607
|
+
for (let row = y; row < y + height; row++) {
|
|
608
|
+
for (let col = x; col < x + width; col++) {
|
|
609
|
+
const idx = (row * diffImage.width + col) * 4;
|
|
610
|
+
data[idx] = data[idx + 1] = data[idx + 2] = data[idx + 3] = 0;
|
|
611
|
+
}
|
|
612
|
+
}
|
|
613
|
+
});
|
|
614
|
+
}
|
|
615
|
+
|
|
616
|
+
_overlayDiff(baseCanvas, diffCanvas, opacity = 0.3) {
|
|
617
|
+
const overlay = document.createElement('canvas');
|
|
618
|
+
overlay.width = baseCanvas.width;
|
|
619
|
+
overlay.height = baseCanvas.height;
|
|
620
|
+
|
|
621
|
+
const ctx = overlay.getContext('2d');
|
|
622
|
+
ctx.drawImage(baseCanvas, 0, 0);
|
|
623
|
+
ctx.globalAlpha = opacity;
|
|
624
|
+
ctx.drawImage(diffCanvas, 0, 0);
|
|
625
|
+
ctx.globalAlpha = 1;
|
|
626
|
+
ctx.imageSmoothingEnabled = false;
|
|
627
|
+
|
|
628
|
+
return overlay;
|
|
629
|
+
}
|
|
630
|
+
}
|
|
631
|
+
|
|
632
|
+
// Export for different module systems
|
|
633
|
+
if (typeof module !== 'undefined' && module.exports) {
|
|
634
|
+
module.exports = PDFDiffViewer;
|
|
635
|
+
}
|
|
636
|
+
if (typeof window !== 'undefined') {
|
|
637
|
+
window.PDFDiffViewer = PDFDiffViewer;
|
|
638
|
+
}
|