@govtechsg/oobee 0.10.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.dockerignore +22 -0
- package/.github/pull_request_template.md +11 -0
- package/.github/workflows/docker-test.yml +54 -0
- package/.github/workflows/image.yml +107 -0
- package/.github/workflows/publish.yml +18 -0
- package/.idea/modules.xml +8 -0
- package/.idea/purple-a11y.iml +9 -0
- package/.idea/vcs.xml +6 -0
- package/.prettierrc.json +12 -0
- package/.vscode/extensions.json +5 -0
- package/.vscode/settings.json +10 -0
- package/CODE_OF_CONDUCT.md +128 -0
- package/DETAILS.md +163 -0
- package/Dockerfile +60 -0
- package/INSTALLATION.md +146 -0
- package/INTEGRATION.md +785 -0
- package/LICENSE +22 -0
- package/README.md +587 -0
- package/SECURITY.md +5 -0
- package/__mocks__/mock-report.html +1431 -0
- package/__mocks__/mockFunctions.ts +32 -0
- package/__mocks__/mockIssues.ts +64 -0
- package/__mocks__/mock_all_issues/000000001.json +64 -0
- package/__mocks__/mock_all_issues/000000002.json +53 -0
- package/__mocks__/mock_all_issues/fake-file.txt +0 -0
- package/__tests__/logs.test.ts +25 -0
- package/__tests__/mergeAxeResults.test.ts +278 -0
- package/__tests__/utils.test.ts +118 -0
- package/a11y-scan-results.zip +0 -0
- package/eslint.config.js +53 -0
- package/exclusions.txt +2 -0
- package/gitlab-pipeline-template.yml +54 -0
- package/jest.config.js +1 -0
- package/package.json +96 -0
- package/scripts/copyFiles.js +44 -0
- package/scripts/install_oobee_dependencies.cmd +13 -0
- package/scripts/install_oobee_dependencies.command +101 -0
- package/scripts/install_oobee_dependencies.ps1 +110 -0
- package/scripts/oobee_shell.cmd +13 -0
- package/scripts/oobee_shell.command +11 -0
- package/scripts/oobee_shell.sh +55 -0
- package/scripts/oobee_shell_ps.ps1 +54 -0
- package/src/cli.ts +401 -0
- package/src/combine.ts +240 -0
- package/src/constants/__tests__/common.test.ts +44 -0
- package/src/constants/cliFunctions.ts +305 -0
- package/src/constants/common.ts +1840 -0
- package/src/constants/constants.ts +443 -0
- package/src/constants/errorMeta.json +319 -0
- package/src/constants/itemTypeDescription.ts +11 -0
- package/src/constants/oobeeAi.ts +141 -0
- package/src/constants/questions.ts +181 -0
- package/src/constants/sampleData.ts +187 -0
- package/src/crawlers/__tests__/commonCrawlerFunc.test.ts +51 -0
- package/src/crawlers/commonCrawlerFunc.ts +656 -0
- package/src/crawlers/crawlDomain.ts +877 -0
- package/src/crawlers/crawlIntelligentSitemap.ts +156 -0
- package/src/crawlers/crawlLocalFile.ts +193 -0
- package/src/crawlers/crawlSitemap.ts +356 -0
- package/src/crawlers/custom/extractAndGradeText.ts +57 -0
- package/src/crawlers/custom/flagUnlabelledClickableElements.ts +964 -0
- package/src/crawlers/custom/utils.ts +486 -0
- package/src/crawlers/customAxeFunctions.ts +82 -0
- package/src/crawlers/pdfScanFunc.ts +468 -0
- package/src/crawlers/runCustom.ts +117 -0
- package/src/index.ts +173 -0
- package/src/logs.ts +66 -0
- package/src/mergeAxeResults.ts +964 -0
- package/src/npmIndex.ts +284 -0
- package/src/screenshotFunc/htmlScreenshotFunc.ts +411 -0
- package/src/screenshotFunc/pdfScreenshotFunc.ts +762 -0
- package/src/static/ejs/partials/components/categorySelector.ejs +4 -0
- package/src/static/ejs/partials/components/categorySelectorDropdown.ejs +57 -0
- package/src/static/ejs/partials/components/pagesScannedModal.ejs +70 -0
- package/src/static/ejs/partials/components/reportSearch.ejs +47 -0
- package/src/static/ejs/partials/components/ruleOffcanvas.ejs +105 -0
- package/src/static/ejs/partials/components/scanAbout.ejs +263 -0
- package/src/static/ejs/partials/components/screenshotLightbox.ejs +13 -0
- package/src/static/ejs/partials/components/summaryScanAbout.ejs +141 -0
- package/src/static/ejs/partials/components/summaryScanResults.ejs +16 -0
- package/src/static/ejs/partials/components/summaryTable.ejs +20 -0
- package/src/static/ejs/partials/components/summaryWcagCompliance.ejs +94 -0
- package/src/static/ejs/partials/components/topFive.ejs +6 -0
- package/src/static/ejs/partials/components/wcagCompliance.ejs +70 -0
- package/src/static/ejs/partials/footer.ejs +21 -0
- package/src/static/ejs/partials/header.ejs +230 -0
- package/src/static/ejs/partials/main.ejs +40 -0
- package/src/static/ejs/partials/scripts/bootstrap.ejs +8 -0
- package/src/static/ejs/partials/scripts/categorySelectorDropdownScript.ejs +190 -0
- package/src/static/ejs/partials/scripts/categorySummary.ejs +141 -0
- package/src/static/ejs/partials/scripts/highlightjs.ejs +335 -0
- package/src/static/ejs/partials/scripts/popper.ejs +7 -0
- package/src/static/ejs/partials/scripts/reportSearch.ejs +248 -0
- package/src/static/ejs/partials/scripts/ruleOffcanvas.ejs +801 -0
- package/src/static/ejs/partials/scripts/screenshotLightbox.ejs +71 -0
- package/src/static/ejs/partials/scripts/summaryScanResults.ejs +14 -0
- package/src/static/ejs/partials/scripts/summaryTable.ejs +78 -0
- package/src/static/ejs/partials/scripts/utils.ejs +441 -0
- package/src/static/ejs/partials/styles/bootstrap.ejs +12375 -0
- package/src/static/ejs/partials/styles/highlightjs.ejs +54 -0
- package/src/static/ejs/partials/styles/styles.ejs +1843 -0
- package/src/static/ejs/partials/styles/summaryBootstrap.ejs +12458 -0
- package/src/static/ejs/partials/summaryHeader.ejs +70 -0
- package/src/static/ejs/partials/summaryMain.ejs +75 -0
- package/src/static/ejs/report.ejs +420 -0
- package/src/static/ejs/summary.ejs +47 -0
- package/src/static/mustache/.prettierrc +4 -0
- package/src/static/mustache/Attention Deficit.mustache +11 -0
- package/src/static/mustache/Blind.mustache +11 -0
- package/src/static/mustache/Cognitive.mustache +7 -0
- package/src/static/mustache/Colorblindness.mustache +20 -0
- package/src/static/mustache/Deaf.mustache +12 -0
- package/src/static/mustache/Deafblind.mustache +7 -0
- package/src/static/mustache/Dyslexia.mustache +14 -0
- package/src/static/mustache/Low Vision.mustache +7 -0
- package/src/static/mustache/Mobility.mustache +15 -0
- package/src/static/mustache/Sighted Keyboard Users.mustache +42 -0
- package/src/static/mustache/report.mustache +1709 -0
- package/src/types/print-message.d.ts +28 -0
- package/src/types/types.ts +46 -0
- package/src/types/xpath-to-css.d.ts +3 -0
- package/src/utils.ts +332 -0
- package/tsconfig.json +15 -0
@@ -0,0 +1,762 @@
|
|
1
|
+
import _ from 'lodash';
|
2
|
+
import pdfjs, { PDFPageProxy } from 'pdfjs-dist';
|
3
|
+
import fs from 'fs';
|
4
|
+
import { Canvas, createCanvas, SKRSContext2D } from '@napi-rs/canvas';
|
5
|
+
import assert from 'assert';
|
6
|
+
import path from 'path';
|
7
|
+
import { fileURLToPath } from 'url';
|
8
|
+
import { silentLogger } from '../logs.js';
|
9
|
+
import { TransformedRuleObject } from '../crawlers/pdfScanFunc.js';
|
10
|
+
import { IBboxLocation, StructureTree, ViewportSize } from '../types/types.js';
|
11
|
+
|
12
|
+
const filename = fileURLToPath(import.meta.url);
|
13
|
+
const dirname = path.dirname(filename);
|
14
|
+
|
15
|
+
// CONSTANTS
|
16
|
+
const BBOX_PADDING = 50;
|
17
|
+
|
18
|
+
// Interfaces
|
19
|
+
interface pathObject {
|
20
|
+
pageIndex?: number;
|
21
|
+
contentStream?: number;
|
22
|
+
content?: number;
|
23
|
+
contentItems?: number[];
|
24
|
+
mcid?: number;
|
25
|
+
annot?: number;
|
26
|
+
}
|
27
|
+
|
28
|
+
function NodeCanvasFactory() {}
|
29
|
+
NodeCanvasFactory.prototype = {
|
30
|
+
create: function NodeCanvasFactory_create(width: number, height: number) {
|
31
|
+
assert(width > 0 && height > 0, 'Invalid canvas size');
|
32
|
+
const canvas = createCanvas(width, height);
|
33
|
+
const context = canvas.getContext('2d');
|
34
|
+
return {
|
35
|
+
canvas,
|
36
|
+
context,
|
37
|
+
};
|
38
|
+
},
|
39
|
+
|
40
|
+
reset: function NodeCanvasFactory_reset(
|
41
|
+
canvasAndContext: { canvas: Canvas; context: SKRSContext2D },
|
42
|
+
width: number,
|
43
|
+
height: number,
|
44
|
+
) {
|
45
|
+
assert(canvasAndContext.canvas, 'Canvas is not specified');
|
46
|
+
assert(width > 0 && height > 0, 'Invalid canvas size');
|
47
|
+
canvasAndContext.canvas.width = width;
|
48
|
+
canvasAndContext.canvas.height = height;
|
49
|
+
},
|
50
|
+
|
51
|
+
destroy: function NodeCanvasFactory_destroy(canvasAndContext: {
|
52
|
+
canvas: Canvas;
|
53
|
+
context: SKRSContext2D;
|
54
|
+
}) {
|
55
|
+
assert(canvasAndContext.canvas, 'Canvas is not specified');
|
56
|
+
|
57
|
+
canvasAndContext.canvas = null;
|
58
|
+
canvasAndContext.context = null;
|
59
|
+
},
|
60
|
+
};
|
61
|
+
|
62
|
+
const canvasFactory = new NodeCanvasFactory();
|
63
|
+
|
64
|
+
export async function getPdfScreenshots(
|
65
|
+
pdfFilePath: string,
|
66
|
+
items: TransformedRuleObject['items'],
|
67
|
+
screenshotPath: string,
|
68
|
+
) {
|
69
|
+
const newItems = _.cloneDeep(items);
|
70
|
+
const loadingTask = pdfjs.getDocument({
|
71
|
+
url: pdfFilePath,
|
72
|
+
standardFontDataUrl: path.join(dirname, '../node_modules/pdfjs-dist/standard_fonts/'),
|
73
|
+
disableFontFace: true,
|
74
|
+
verbosity: 0,
|
75
|
+
});
|
76
|
+
const pdf = await loadingTask.promise;
|
77
|
+
const structureTree = await pdf._pdfInfo.structureTree;
|
78
|
+
|
79
|
+
// save some resources by caching page canvases to be reused by diff violations
|
80
|
+
const pageCanvasCache = {};
|
81
|
+
|
82
|
+
// iterate through each violation
|
83
|
+
for (let i = 0; i < newItems.length; i++) {
|
84
|
+
const { context } = newItems[i];
|
85
|
+
const bbox: IBboxLocation = { location: context };
|
86
|
+
const bboxMap = buildBboxMap([bbox], structureTree);
|
87
|
+
|
88
|
+
for (const [pageNum, bboxList] of Object.entries(bboxMap)) {
|
89
|
+
const page = await pdf.getPage(parseInt(pageNum, 10));
|
90
|
+
|
91
|
+
// an array of length 1, containing location of current violation
|
92
|
+
const bboxesWithCoords = await Promise.all([
|
93
|
+
page.getOperatorList(),
|
94
|
+
page.getAnnotations(),
|
95
|
+
]).then(getBboxesList(bboxList, page));
|
96
|
+
|
97
|
+
// Render the page on a Node canvas with 200% scale.
|
98
|
+
const viewport = page.getViewport({ scale: 2.0 });
|
99
|
+
|
100
|
+
const canvasAndContext =
|
101
|
+
pageCanvasCache[pageNum] ?? canvasFactory.create(viewport.width, viewport.height);
|
102
|
+
if (!pageCanvasCache[pageNum]) {
|
103
|
+
pageCanvasCache[pageNum] = canvasAndContext;
|
104
|
+
}
|
105
|
+
const { canvas: origCanvas, context: origCtx } = canvasAndContext;
|
106
|
+
|
107
|
+
const renderContext = {
|
108
|
+
canvasContext: origCtx,
|
109
|
+
viewport,
|
110
|
+
canvasFactory,
|
111
|
+
};
|
112
|
+
const renderTask = page.render(renderContext); // render pdf page onto a canvas
|
113
|
+
await renderTask.promise;
|
114
|
+
|
115
|
+
const finalScreenshotPath = annotateAndSave(
|
116
|
+
origCanvas,
|
117
|
+
screenshotPath,
|
118
|
+
viewport,
|
119
|
+
)(bboxesWithCoords[0]);
|
120
|
+
|
121
|
+
newItems[i].screenshotPath = finalScreenshotPath;
|
122
|
+
newItems[i].page = parseInt(pageNum, 10);
|
123
|
+
|
124
|
+
page.cleanup();
|
125
|
+
}
|
126
|
+
}
|
127
|
+
return newItems;
|
128
|
+
}
|
129
|
+
|
130
|
+
const annotateAndSave = (origCanvas: Canvas, screenshotPath: string, viewport: ViewportSize) => {
|
131
|
+
return ({ location }) => {
|
132
|
+
const [left, bottom, width, height] = location.map(loc => loc * 2); // scale up by 2
|
133
|
+
const rectParams = [left, viewport.height - bottom - height, width, height];
|
134
|
+
|
135
|
+
// create new canvas to annotate so we do not "pollute" the original
|
136
|
+
const { context: highlightCtx, canvas: highlightCanvas } = canvasFactory.create(
|
137
|
+
viewport.width,
|
138
|
+
viewport.height,
|
139
|
+
);
|
140
|
+
|
141
|
+
highlightCtx.drawImage(origCanvas, 0, 0);
|
142
|
+
highlightCtx.fillStyle = 'rgba(0, 255, 255, 0.2)';
|
143
|
+
highlightCtx.fillRect(...rectParams);
|
144
|
+
|
145
|
+
const rectParamsWithPadding = [
|
146
|
+
left - BBOX_PADDING,
|
147
|
+
viewport.height - bottom - height - BBOX_PADDING,
|
148
|
+
width + BBOX_PADDING * 2,
|
149
|
+
height + BBOX_PADDING * 2,
|
150
|
+
];
|
151
|
+
|
152
|
+
// create new canvas to crop image
|
153
|
+
const { context: croppedCtx, canvas: croppedCanvas } = canvasFactory.create(
|
154
|
+
rectParamsWithPadding[2],
|
155
|
+
rectParamsWithPadding[3],
|
156
|
+
);
|
157
|
+
|
158
|
+
croppedCtx.drawImage(
|
159
|
+
highlightCanvas,
|
160
|
+
...rectParamsWithPadding,
|
161
|
+
0,
|
162
|
+
0,
|
163
|
+
rectParamsWithPadding[2],
|
164
|
+
rectParamsWithPadding[3],
|
165
|
+
);
|
166
|
+
|
167
|
+
// convert the canvas to an image
|
168
|
+
// const croppedImage = croppedCanvas.toBuffer();
|
169
|
+
const croppedImage = croppedCanvas.toBuffer('image/png');
|
170
|
+
|
171
|
+
// save image
|
172
|
+
let counter = 0;
|
173
|
+
let indexedScreenshotPath = `${screenshotPath}-${counter}.png`;
|
174
|
+
let fileExists = fs.existsSync(indexedScreenshotPath);
|
175
|
+
while (fileExists) {
|
176
|
+
counter++;
|
177
|
+
indexedScreenshotPath = `${screenshotPath}-${counter}.png`;
|
178
|
+
fileExists = fs.existsSync(indexedScreenshotPath);
|
179
|
+
}
|
180
|
+
try {
|
181
|
+
fs.writeFileSync(indexedScreenshotPath, croppedImage);
|
182
|
+
} catch (e) {
|
183
|
+
silentLogger.error('Error in writing screenshot:', e);
|
184
|
+
}
|
185
|
+
|
186
|
+
canvasFactory.destroy({ canvas: croppedCanvas, context: croppedCtx });
|
187
|
+
canvasFactory.destroy({ canvas: highlightCanvas, context: highlightCtx });
|
188
|
+
|
189
|
+
// current screenshot path leads to a temp dir, so modify to save the final file path
|
190
|
+
const [_, ...rest] = indexedScreenshotPath.split(path.sep);
|
191
|
+
const finalScreenshotPath = path.join(...rest);
|
192
|
+
return finalScreenshotPath;
|
193
|
+
};
|
194
|
+
};
|
195
|
+
|
196
|
+
export const rotateViewport = (rotateAngle, viewport) => {
|
197
|
+
if ([0, 180].includes(rotateAngle)) {
|
198
|
+
return viewport;
|
199
|
+
}
|
200
|
+
return [viewport[1], viewport[0], viewport[3], viewport[2]];
|
201
|
+
};
|
202
|
+
|
203
|
+
export const rotatePoint = (rotateAngle, point, viewport) => {
|
204
|
+
const rad = (rotateAngle * Math.PI) / 180;
|
205
|
+
let x = point[0] * Math.cos(rad) + point[1] * Math.sin(rad);
|
206
|
+
let y = -point[0] * Math.sin(rad) + point[1] * Math.cos(rad);
|
207
|
+
switch (rotateAngle) {
|
208
|
+
case 90:
|
209
|
+
y += viewport[2] + viewport[0];
|
210
|
+
break;
|
211
|
+
case 180:
|
212
|
+
x += viewport[2] + viewport[0];
|
213
|
+
y += viewport[3] + viewport[1];
|
214
|
+
break;
|
215
|
+
case 270:
|
216
|
+
x += viewport[3] + viewport[1];
|
217
|
+
break;
|
218
|
+
default:
|
219
|
+
break;
|
220
|
+
}
|
221
|
+
return [x, y];
|
222
|
+
};
|
223
|
+
|
224
|
+
export const rotateCoordinates = (coords, rotateAngle, viewport) => {
|
225
|
+
if (rotateAngle === 0) return coords;
|
226
|
+
const [x1, y1] = rotatePoint(rotateAngle, [coords[0], coords[1]], viewport);
|
227
|
+
const [x2, y2] = rotatePoint(
|
228
|
+
rotateAngle,
|
229
|
+
[coords[0] + coords[2], coords[1] + coords[3]],
|
230
|
+
viewport,
|
231
|
+
);
|
232
|
+
return [Math.min(x1, x2), Math.min(y1, y2), Math.abs(x1 - x2), Math.abs(y1 - y2)];
|
233
|
+
};
|
234
|
+
|
235
|
+
function concatBoundingBoxes(newBoundingBox, oldBoundingBox) {
|
236
|
+
if (_.isNil(oldBoundingBox) && _.isNil(newBoundingBox)) {
|
237
|
+
return {};
|
238
|
+
}
|
239
|
+
|
240
|
+
if (_.isNil(newBoundingBox)) {
|
241
|
+
return oldBoundingBox || {};
|
242
|
+
}
|
243
|
+
if (_.isNil(oldBoundingBox)) {
|
244
|
+
return _.cloneDeep(newBoundingBox);
|
245
|
+
}
|
246
|
+
return {
|
247
|
+
x: Math.min(newBoundingBox.x, oldBoundingBox.x),
|
248
|
+
y: Math.min(newBoundingBox.y, oldBoundingBox.y),
|
249
|
+
width:
|
250
|
+
Math.max(newBoundingBox.x + newBoundingBox.width, oldBoundingBox.x + oldBoundingBox.width) -
|
251
|
+
Math.min(newBoundingBox.x, oldBoundingBox.x),
|
252
|
+
height:
|
253
|
+
Math.max(newBoundingBox.y + newBoundingBox.height, oldBoundingBox.y + oldBoundingBox.height) -
|
254
|
+
Math.min(newBoundingBox.y, oldBoundingBox.y),
|
255
|
+
};
|
256
|
+
}
|
257
|
+
|
258
|
+
export const parseMcidToBbox = (listOfMcid, pageMap, annotations, viewport, rotateAngle) => {
|
259
|
+
type coordsObject = {
|
260
|
+
x: number;
|
261
|
+
y: number;
|
262
|
+
width: number;
|
263
|
+
height: number;
|
264
|
+
};
|
265
|
+
let coords: coordsObject = { x: undefined, y: undefined, width: undefined, height: undefined };
|
266
|
+
|
267
|
+
if (listOfMcid instanceof Array) {
|
268
|
+
listOfMcid.forEach(mcid => {
|
269
|
+
const currentBbox = pageMap[mcid];
|
270
|
+
if (
|
271
|
+
!_.isNil(currentBbox) &&
|
272
|
+
!_.isNaN(currentBbox.x) &&
|
273
|
+
!_.isNaN(currentBbox.y) &&
|
274
|
+
!_.isNaN(currentBbox.width) &&
|
275
|
+
!_.isNaN(currentBbox.height)
|
276
|
+
) {
|
277
|
+
coords = concatBoundingBoxes(currentBbox, coords.x ? coords : undefined);
|
278
|
+
}
|
279
|
+
});
|
280
|
+
} else if (Object.prototype.hasOwnProperty.call(listOfMcid, 'annot')) {
|
281
|
+
const rect = annotations[listOfMcid.annot]?.rect;
|
282
|
+
if (rect) {
|
283
|
+
coords = {
|
284
|
+
x: rect[0],
|
285
|
+
y: rect[1],
|
286
|
+
width: Math.abs(rect[0] - rect[2]),
|
287
|
+
height: Math.abs(rect[1] - rect[3]),
|
288
|
+
};
|
289
|
+
}
|
290
|
+
}
|
291
|
+
if (!coords) return [];
|
292
|
+
const coordsArray = rotateCoordinates(
|
293
|
+
[coords.x, coords.y, coords.width, coords.height],
|
294
|
+
rotateAngle,
|
295
|
+
viewport,
|
296
|
+
);
|
297
|
+
const rotatedViewport = rotateViewport(rotateAngle, viewport);
|
298
|
+
return [
|
299
|
+
coordsArray[0] - rotatedViewport[0],
|
300
|
+
coordsArray[1] - rotatedViewport[1],
|
301
|
+
coordsArray[2],
|
302
|
+
coordsArray[3],
|
303
|
+
];
|
304
|
+
};
|
305
|
+
|
306
|
+
export const getBboxForGlyph = (
|
307
|
+
operatorIndex,
|
308
|
+
glyphIndex,
|
309
|
+
operationsList,
|
310
|
+
viewport,
|
311
|
+
rotateAngle,
|
312
|
+
) => {
|
313
|
+
const bbox = operationsList[operatorIndex] ? operationsList[operatorIndex][glyphIndex] : null;
|
314
|
+
if (!bbox) {
|
315
|
+
return [];
|
316
|
+
}
|
317
|
+
const coordsArray = rotateCoordinates(bbox, rotateAngle, viewport);
|
318
|
+
const rotatedViewport = rotateViewport(rotateAngle, viewport);
|
319
|
+
return [
|
320
|
+
coordsArray[0] - rotatedViewport[0],
|
321
|
+
coordsArray[1] - rotatedViewport[1],
|
322
|
+
coordsArray[2],
|
323
|
+
coordsArray[3],
|
324
|
+
];
|
325
|
+
};
|
326
|
+
|
327
|
+
// Below are methods adapted from
|
328
|
+
// https://github.com/veraPDF/verapdf-js-viewer/blob/master/src/services/bboxService.ts
|
329
|
+
// to determine the bounding box data of the violations from the context field
|
330
|
+
|
331
|
+
export const getBboxesList = (bboxList, page: PDFPageProxy) => {
|
332
|
+
return ([operatorList, annotations]) => {
|
333
|
+
const operationData = operatorList.argsArray[operatorList.argsArray.length - 2];
|
334
|
+
const [positionData, noMCIDData] = operatorList.argsArray[operatorList.argsArray.length - 1];
|
335
|
+
const bboxes = bboxList.map(bbox => {
|
336
|
+
if (bbox.mcidList) {
|
337
|
+
bbox.location = parseMcidToBbox(
|
338
|
+
bbox.mcidList,
|
339
|
+
positionData,
|
340
|
+
annotations,
|
341
|
+
page.view,
|
342
|
+
page.rotate,
|
343
|
+
);
|
344
|
+
} else if (bbox.contentItemPath) {
|
345
|
+
const contentItemsPath = bbox.contentItemPath.slice(2);
|
346
|
+
let contentItemsBBoxes = noMCIDData[bbox.contentItemPath[1]];
|
347
|
+
try {
|
348
|
+
contentItemsPath.forEach((ci, i) => {
|
349
|
+
if (contentItemsPath.length > i + 1 || !contentItemsBBoxes.final) {
|
350
|
+
contentItemsBBoxes = contentItemsBBoxes.contentItems[0];
|
351
|
+
}
|
352
|
+
contentItemsBBoxes = contentItemsBBoxes.contentItems[ci];
|
353
|
+
});
|
354
|
+
|
355
|
+
bbox.location = [
|
356
|
+
contentItemsBBoxes.contentItem.x,
|
357
|
+
contentItemsBBoxes.contentItem.y,
|
358
|
+
contentItemsBBoxes.contentItem.w,
|
359
|
+
contentItemsBBoxes.contentItem.h,
|
360
|
+
];
|
361
|
+
} catch (err) {
|
362
|
+
console.log('NoMCIDDataParseError:', err.message || err);
|
363
|
+
bbox.location = [0, 0, 0, 0];
|
364
|
+
}
|
365
|
+
}
|
366
|
+
if (_.isNumber(bbox.operatorIndex) && _.isNumber(bbox.glyphIndex)) {
|
367
|
+
bbox.location = getBboxForGlyph(
|
368
|
+
bbox.operatorIndex,
|
369
|
+
bbox.glyphIndex,
|
370
|
+
operationData,
|
371
|
+
page.view,
|
372
|
+
page.rotate,
|
373
|
+
);
|
374
|
+
}
|
375
|
+
return bbox;
|
376
|
+
});
|
377
|
+
return bboxes;
|
378
|
+
};
|
379
|
+
};
|
380
|
+
|
381
|
+
/*
|
382
|
+
* Going through object of tags from error placement and return array of its MCIDs
|
383
|
+
*
|
384
|
+
* @param {Object} of tags
|
385
|
+
*
|
386
|
+
* @return [[{Array}, {Number}]] - [[[array of mcids], page of error]]
|
387
|
+
*/
|
388
|
+
function findAllMcid(tagObject) {
|
389
|
+
const mcidMap = {};
|
390
|
+
|
391
|
+
function func(obj) {
|
392
|
+
if (!obj) return;
|
393
|
+
if (obj.mcid || obj.mcid === 0) {
|
394
|
+
if (!mcidMap[obj.pageIndex]) mcidMap[obj.pageIndex] = [];
|
395
|
+
mcidMap[obj.pageIndex].push(obj.mcid);
|
396
|
+
}
|
397
|
+
if (!obj.children) {
|
398
|
+
return;
|
399
|
+
}
|
400
|
+
|
401
|
+
if (!(obj.children instanceof Array)) {
|
402
|
+
func(obj.children);
|
403
|
+
} else {
|
404
|
+
[...obj.children].forEach(child => func(child));
|
405
|
+
}
|
406
|
+
}
|
407
|
+
|
408
|
+
func(tagObject);
|
409
|
+
return _.map(mcidMap, (value, key) => [value, _.toNumber(key)]);
|
410
|
+
}
|
411
|
+
|
412
|
+
/*
|
413
|
+
* Convert returning from veraPDF api path to error in array of nodes
|
414
|
+
*
|
415
|
+
* @param errorContext {string} ugly path to error
|
416
|
+
*
|
417
|
+
* @return arrayOfNodes {array} of nodes from Document to error Tag
|
418
|
+
*/
|
419
|
+
type Node = [number, string];
|
420
|
+
type ConvertContextToPathReturn = pathObject | Node[];
|
421
|
+
|
422
|
+
const convertContextToPath = (errorContext = ''): ConvertContextToPathReturn => {
|
423
|
+
let arrayOfNodes: Node[] = [];
|
424
|
+
if (!errorContext) {
|
425
|
+
return arrayOfNodes;
|
426
|
+
}
|
427
|
+
|
428
|
+
const contextString = errorContext;
|
429
|
+
|
430
|
+
try {
|
431
|
+
if (contextString.includes('contentItem') && !contextString.includes('mcid')) {
|
432
|
+
const result = contextString.match(
|
433
|
+
/pages\[(?<pages>\d+)\](\(.+\))?\/contentStream\[(?<contentStream>\d+)\](\(.+\))?\/content\[(?<content>\d+)\](?<contentItems>((\(.+\))?\/contentItem\[(\d+)\])+)/,
|
434
|
+
);
|
435
|
+
if (result) {
|
436
|
+
try {
|
437
|
+
let path: pathObject;
|
438
|
+
path.pageIndex = parseInt(result.groups.pages, 10);
|
439
|
+
path.contentStream = parseInt(result.groups.contentStream, 10);
|
440
|
+
path.content = parseInt(result.groups.content, 10);
|
441
|
+
path.contentItems = result.groups.contentItems
|
442
|
+
.split('/')
|
443
|
+
.filter(ci => ci.includes('contentItem'))
|
444
|
+
.map(ci => {
|
445
|
+
const contentItemIndex = ci.match(/\[(?<contentItem>\d+)\]/);
|
446
|
+
return parseInt(contentItemIndex?.groups?.contentItem || '-1', 10);
|
447
|
+
});
|
448
|
+
return path;
|
449
|
+
} catch (err) {
|
450
|
+
console.log('NoMCIDContentItemPathParseError:', err.message || err);
|
451
|
+
}
|
452
|
+
}
|
453
|
+
}
|
454
|
+
|
455
|
+
if (contextString.includes('contentItem')) {
|
456
|
+
let path: pathObject;
|
457
|
+
contextString.split('/').forEach(nodeString => {
|
458
|
+
if (nodeString.includes('page')) {
|
459
|
+
path.pageIndex = parseInt(nodeString.split(/[[\]]/)[1], 10);
|
460
|
+
} else if (nodeString.includes('contentItem') && nodeString.includes('mcid')) {
|
461
|
+
path.mcid = parseInt(nodeString.split('mcid:')[1].slice(0, -1), 10);
|
462
|
+
}
|
463
|
+
});
|
464
|
+
return path;
|
465
|
+
}
|
466
|
+
if (contextString.includes('annots')) {
|
467
|
+
let path: pathObject;
|
468
|
+
contextString.split('/').forEach(nodeString => {
|
469
|
+
if (nodeString.includes('page')) {
|
470
|
+
path.pageIndex = parseInt(nodeString.split(/[[\]]/)[1], 10);
|
471
|
+
} else if (nodeString.includes('annots')) {
|
472
|
+
path.annot = parseInt(nodeString.split(/[[\]]/)[1], 10);
|
473
|
+
}
|
474
|
+
});
|
475
|
+
return path;
|
476
|
+
}
|
477
|
+
|
478
|
+
const contextStringArray: string[] = contextString.split('PDStructTreeRoot)/')[1].split('/'); // cut path before start of Document
|
479
|
+
contextStringArray.forEach(nodeString => {
|
480
|
+
const nextIndex = parseInt(nodeString.split('](')[0].split('K[')[1], 10);
|
481
|
+
let nextTag: string | string[] = nodeString.split('(')[1].split(')')[0].split(' ');
|
482
|
+
nextTag = nextTag[nextTag.length - 1];
|
483
|
+
|
484
|
+
arrayOfNodes = [...arrayOfNodes, [nextIndex, nextTag]];
|
485
|
+
});
|
486
|
+
return arrayOfNodes;
|
487
|
+
} catch {
|
488
|
+
return [];
|
489
|
+
}
|
490
|
+
};
|
491
|
+
|
492
|
+
const getTagsFromErrorPlace = (context: string, structure: StructureTree) => {
|
493
|
+
const defaultValue = [[[], -1, undefined]];
|
494
|
+
const selectedTag = convertContextToPath(context);
|
495
|
+
|
496
|
+
if (_.isEmpty(selectedTag)) {
|
497
|
+
return defaultValue;
|
498
|
+
}
|
499
|
+
// Type guard function
|
500
|
+
function isPathObject(value: any): value is pathObject {
|
501
|
+
return (
|
502
|
+
value !== null &&
|
503
|
+
typeof value === 'object' &&
|
504
|
+
(Object.prototype.hasOwnProperty.call(value, 'mcid') ||
|
505
|
+
Object.prototype.hasOwnProperty.call(value, 'pageIndex') ||
|
506
|
+
Object.prototype.hasOwnProperty.call(value, 'annot') ||
|
507
|
+
Object.prototype.hasOwnProperty.call(value, 'contentItems'))
|
508
|
+
);
|
509
|
+
}
|
510
|
+
|
511
|
+
if (isPathObject(selectedTag)) {
|
512
|
+
if (
|
513
|
+
Object.prototype.hasOwnProperty.call(selectedTag, 'mcid') &&
|
514
|
+
Object.prototype.hasOwnProperty.call(selectedTag, 'pageIndex')
|
515
|
+
) {
|
516
|
+
return [[[selectedTag.mcid], selectedTag.pageIndex]];
|
517
|
+
}
|
518
|
+
if (
|
519
|
+
Object.prototype.hasOwnProperty.call(selectedTag, 'annot') &&
|
520
|
+
Object.prototype.hasOwnProperty.call(selectedTag, 'pageIndex')
|
521
|
+
) {
|
522
|
+
return [[{ annot: selectedTag.annot }, selectedTag.pageIndex]];
|
523
|
+
}
|
524
|
+
if (Object.prototype.hasOwnProperty.call(selectedTag, 'contentItems')) {
|
525
|
+
return [
|
526
|
+
[
|
527
|
+
undefined,
|
528
|
+
selectedTag.pageIndex,
|
529
|
+
[selectedTag.contentStream, selectedTag.content, ...selectedTag.contentItems],
|
530
|
+
],
|
531
|
+
];
|
532
|
+
}
|
533
|
+
} else if (selectedTag instanceof Array) {
|
534
|
+
let objectOfErrors = { ...structure };
|
535
|
+
selectedTag.forEach((node, index) => {
|
536
|
+
let nextStepObject;
|
537
|
+
if (!objectOfErrors.children) {
|
538
|
+
nextStepObject = objectOfErrors[node[0]];
|
539
|
+
} else if (!(objectOfErrors.children instanceof Array)) {
|
540
|
+
if (objectOfErrors.children.name === node[1]) {
|
541
|
+
nextStepObject = objectOfErrors.children;
|
542
|
+
} else {
|
543
|
+
nextStepObject = objectOfErrors;
|
544
|
+
}
|
545
|
+
} else if (objectOfErrors?.name === node[1] && index === 0) {
|
546
|
+
nextStepObject = objectOfErrors;
|
547
|
+
} else {
|
548
|
+
const clearedChildrenArray = [...objectOfErrors.children].filter(tag => !tag?.mcid);
|
549
|
+
nextStepObject = {
|
550
|
+
...(clearedChildrenArray.length ? clearedChildrenArray : objectOfErrors.children)[
|
551
|
+
node[0]
|
552
|
+
],
|
553
|
+
};
|
554
|
+
}
|
555
|
+
objectOfErrors = { ...nextStepObject };
|
556
|
+
});
|
557
|
+
return findAllMcid(objectOfErrors);
|
558
|
+
}
|
559
|
+
return defaultValue;
|
560
|
+
};
|
561
|
+
|
562
|
+
const calculateLocation = location => {
|
563
|
+
const bboxes = [];
|
564
|
+
const [pages, boundingBox] = location.split('/');
|
565
|
+
const [start, end] = pages.replace('pages[', '').replace(']', '').split('-');
|
566
|
+
const [x, y, x1, y1] = boundingBox.replace('boundingBox[', '').replace(']', '').split(',');
|
567
|
+
const width = parseFloat(x1) - parseFloat(x);
|
568
|
+
|
569
|
+
if (end) {
|
570
|
+
for (let i = parseInt(start) + 1; i <= parseInt(end) + 1; i++) {
|
571
|
+
switch (i) {
|
572
|
+
case parseInt(start) + 1:
|
573
|
+
bboxes.push({
|
574
|
+
page: i,
|
575
|
+
location: [parseFloat(x), parseFloat(y1), width, 'bottom'],
|
576
|
+
});
|
577
|
+
break;
|
578
|
+
case parseInt(end) + 1:
|
579
|
+
bboxes.push({
|
580
|
+
page: i,
|
581
|
+
location: [parseFloat(x), parseFloat(y), width, 'top'],
|
582
|
+
});
|
583
|
+
break;
|
584
|
+
default:
|
585
|
+
bboxes.push({
|
586
|
+
page: i,
|
587
|
+
location: [parseFloat(x), 0, width, 'top'],
|
588
|
+
});
|
589
|
+
break;
|
590
|
+
}
|
591
|
+
}
|
592
|
+
} else {
|
593
|
+
const height = parseFloat(y1) - parseFloat(y);
|
594
|
+
bboxes.push({
|
595
|
+
page: parseInt(start) + 1,
|
596
|
+
location: [parseFloat(x), parseFloat(y), width, height],
|
597
|
+
});
|
598
|
+
}
|
599
|
+
|
600
|
+
return bboxes;
|
601
|
+
};
|
602
|
+
|
603
|
+
const calculateLocationJSON = location => {
|
604
|
+
const bboxes = [];
|
605
|
+
const bboxMap = JSON.parse(location);
|
606
|
+
|
607
|
+
bboxMap.bbox.forEach(({ p, rect }) => {
|
608
|
+
const [x, y, x1, y1] = rect;
|
609
|
+
const width = parseFloat(x1) - parseFloat(x);
|
610
|
+
const height = parseFloat(y1) - parseFloat(y);
|
611
|
+
bboxes.push({
|
612
|
+
page: parseFloat(p) + 1,
|
613
|
+
location: [parseFloat(x), parseFloat(y), width, height],
|
614
|
+
});
|
615
|
+
});
|
616
|
+
return bboxes;
|
617
|
+
};
|
618
|
+
|
619
|
+
export const calculateLocationInStreamOperator = location => {
|
620
|
+
const path = location.split('/');
|
621
|
+
let pageIndex = -1;
|
622
|
+
let operatorIndex = -1;
|
623
|
+
let glyphIndex = -1;
|
624
|
+
path.forEach(step => {
|
625
|
+
if (step.startsWith('pages')) {
|
626
|
+
pageIndex = parseInt(step.split(/[\[\]]/)[1]);
|
627
|
+
}
|
628
|
+
if (step.startsWith('operators')) {
|
629
|
+
operatorIndex = parseInt(step.split(/[\[\]]/)[1]);
|
630
|
+
}
|
631
|
+
if (step.startsWith('usedGlyphs')) {
|
632
|
+
glyphIndex = parseInt(step.split(/[\[\]]/)[1]);
|
633
|
+
}
|
634
|
+
});
|
635
|
+
if (pageIndex === -1 || operatorIndex === -1 || glyphIndex === -1) {
|
636
|
+
return null;
|
637
|
+
}
|
638
|
+
return {
|
639
|
+
pageIndex,
|
640
|
+
operatorIndex,
|
641
|
+
glyphIndex,
|
642
|
+
};
|
643
|
+
};
|
644
|
+
|
645
|
+
export const buildBboxMap = (bboxes: IBboxLocation[], structure: StructureTree) => {
|
646
|
+
const bboxMap = {};
|
647
|
+
bboxes.forEach((bbox, index) => {
|
648
|
+
try {
|
649
|
+
if (bbox.location.includes('contentStream') && bbox.location.includes('operators')) {
|
650
|
+
const bboxPosition = calculateLocationInStreamOperator(bbox.location);
|
651
|
+
if (!bboxPosition) {
|
652
|
+
return;
|
653
|
+
}
|
654
|
+
bboxMap[bboxPosition.pageIndex + 1] = [
|
655
|
+
...(bboxMap[bboxPosition.pageIndex + 1] || []),
|
656
|
+
{
|
657
|
+
index,
|
658
|
+
operatorIndex: bboxPosition.operatorIndex,
|
659
|
+
glyphIndex: bboxPosition.glyphIndex,
|
660
|
+
bboxTitle: bbox.bboxTitle,
|
661
|
+
},
|
662
|
+
];
|
663
|
+
} else if (
|
664
|
+
bbox.location.includes('StructTreeRoot') ||
|
665
|
+
bbox.location.includes('root/doc') ||
|
666
|
+
bbox.location === 'root'
|
667
|
+
) {
|
668
|
+
const mcidData = getTagsFromErrorPlace(bbox.location, structure);
|
669
|
+
mcidData.forEach(([mcidList, pageIndex, contentItemPath]) => {
|
670
|
+
bboxMap[pageIndex + 1] = [
|
671
|
+
...(bboxMap[pageIndex + 1] || []),
|
672
|
+
{
|
673
|
+
index,
|
674
|
+
mcidList,
|
675
|
+
contentItemPath,
|
676
|
+
groupId: bbox.groupId || undefined,
|
677
|
+
bboxTitle: bbox.bboxTitle,
|
678
|
+
},
|
679
|
+
];
|
680
|
+
});
|
681
|
+
} else {
|
682
|
+
const bboxesFromLocation = bbox.location.includes('pages[')
|
683
|
+
? calculateLocation(bbox.location)
|
684
|
+
: calculateLocationJSON(bbox.location);
|
685
|
+
bboxesFromLocation.forEach(bboxWithLocation => {
|
686
|
+
bboxMap[bboxWithLocation.page] = [
|
687
|
+
...(bboxMap[bboxWithLocation.page] || []),
|
688
|
+
{
|
689
|
+
index,
|
690
|
+
location: bboxWithLocation.location,
|
691
|
+
groupId: bbox.groupId || undefined,
|
692
|
+
bboxTitle: bbox.bboxTitle,
|
693
|
+
},
|
694
|
+
];
|
695
|
+
});
|
696
|
+
}
|
697
|
+
} catch {
|
698
|
+
console.error(`Location not supported: ${bbox.location}`);
|
699
|
+
}
|
700
|
+
});
|
701
|
+
return bboxMap;
|
702
|
+
};
|
703
|
+
|
704
|
+
export const getSelectedPageByLocation = bboxLocation => {
|
705
|
+
const location = bboxLocation;
|
706
|
+
const path = location.split('/');
|
707
|
+
let pageNumber = -1;
|
708
|
+
if (location?.includes('pages') && path[path.length - 1].startsWith('pages')) {
|
709
|
+
location.split('/').forEach(nodeString => {
|
710
|
+
if (nodeString.includes('pages')) {
|
711
|
+
pageNumber = parseInt(nodeString.split(/[[\]]/)[1], 10) + 1;
|
712
|
+
}
|
713
|
+
});
|
714
|
+
}
|
715
|
+
return pageNumber;
|
716
|
+
};
|
717
|
+
|
718
|
+
export const getBboxPage = (bbox, structure) => {
|
719
|
+
try {
|
720
|
+
if (
|
721
|
+
bbox.location.includes('StructTreeRoot') ||
|
722
|
+
bbox.location.includes('root/doc') ||
|
723
|
+
bbox.location === 'root'
|
724
|
+
) {
|
725
|
+
const mcidData = getTagsFromErrorPlace(bbox.location, structure);
|
726
|
+
const pageIndex = mcidData[0][1] as number;
|
727
|
+
return pageIndex + 1;
|
728
|
+
}
|
729
|
+
const bboxesFromLocation = bbox.location.includes('pages[')
|
730
|
+
? calculateLocation(bbox.location)
|
731
|
+
: calculateLocationJSON(bbox.location);
|
732
|
+
return bboxesFromLocation.length ? bboxesFromLocation[0].page : 0;
|
733
|
+
} catch (e) {
|
734
|
+
console.error(e);
|
735
|
+
console.error(`Location not supported: ${bbox.location}`);
|
736
|
+
return -1;
|
737
|
+
}
|
738
|
+
};
|
739
|
+
|
740
|
+
export const getPageFromContext = async (context: string, pdfFilePath: string): Promise<number> => {
|
741
|
+
try {
|
742
|
+
const loadingTask = pdfjs.getDocument({
|
743
|
+
url: pdfFilePath,
|
744
|
+
standardFontDataUrl: path.join(dirname, '../../node_modules/pdfjs-dist/standard_fonts/'),
|
745
|
+
disableFontFace: true,
|
746
|
+
verbosity: 0,
|
747
|
+
});
|
748
|
+
const pdf = await loadingTask.promise;
|
749
|
+
const structureTree = await pdf._pdfInfo.structureTree;
|
750
|
+
|
751
|
+
const page = getBboxPage({ location: context }, structureTree);
|
752
|
+
return page;
|
753
|
+
} catch {
|
754
|
+
// Error handling
|
755
|
+
}
|
756
|
+
};
|
757
|
+
|
758
|
+
export const getBboxPages = (bboxes, structure) => {
|
759
|
+
return bboxes.map(bbox => {
|
760
|
+
getBboxPage(bbox, structure);
|
761
|
+
});
|
762
|
+
};
|