@govtechsg/oobee 0.10.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (123) hide show
  1. package/.dockerignore +22 -0
  2. package/.github/pull_request_template.md +11 -0
  3. package/.github/workflows/docker-test.yml +54 -0
  4. package/.github/workflows/image.yml +107 -0
  5. package/.github/workflows/publish.yml +18 -0
  6. package/.idea/modules.xml +8 -0
  7. package/.idea/purple-a11y.iml +9 -0
  8. package/.idea/vcs.xml +6 -0
  9. package/.prettierrc.json +12 -0
  10. package/.vscode/extensions.json +5 -0
  11. package/.vscode/settings.json +10 -0
  12. package/CODE_OF_CONDUCT.md +128 -0
  13. package/DETAILS.md +163 -0
  14. package/Dockerfile +60 -0
  15. package/INSTALLATION.md +146 -0
  16. package/INTEGRATION.md +785 -0
  17. package/LICENSE +22 -0
  18. package/README.md +587 -0
  19. package/SECURITY.md +5 -0
  20. package/__mocks__/mock-report.html +1431 -0
  21. package/__mocks__/mockFunctions.ts +32 -0
  22. package/__mocks__/mockIssues.ts +64 -0
  23. package/__mocks__/mock_all_issues/000000001.json +64 -0
  24. package/__mocks__/mock_all_issues/000000002.json +53 -0
  25. package/__mocks__/mock_all_issues/fake-file.txt +0 -0
  26. package/__tests__/logs.test.ts +25 -0
  27. package/__tests__/mergeAxeResults.test.ts +278 -0
  28. package/__tests__/utils.test.ts +118 -0
  29. package/a11y-scan-results.zip +0 -0
  30. package/eslint.config.js +53 -0
  31. package/exclusions.txt +2 -0
  32. package/gitlab-pipeline-template.yml +54 -0
  33. package/jest.config.js +1 -0
  34. package/package.json +96 -0
  35. package/scripts/copyFiles.js +44 -0
  36. package/scripts/install_oobee_dependencies.cmd +13 -0
  37. package/scripts/install_oobee_dependencies.command +101 -0
  38. package/scripts/install_oobee_dependencies.ps1 +110 -0
  39. package/scripts/oobee_shell.cmd +13 -0
  40. package/scripts/oobee_shell.command +11 -0
  41. package/scripts/oobee_shell.sh +55 -0
  42. package/scripts/oobee_shell_ps.ps1 +54 -0
  43. package/src/cli.ts +401 -0
  44. package/src/combine.ts +240 -0
  45. package/src/constants/__tests__/common.test.ts +44 -0
  46. package/src/constants/cliFunctions.ts +305 -0
  47. package/src/constants/common.ts +1840 -0
  48. package/src/constants/constants.ts +443 -0
  49. package/src/constants/errorMeta.json +319 -0
  50. package/src/constants/itemTypeDescription.ts +11 -0
  51. package/src/constants/oobeeAi.ts +141 -0
  52. package/src/constants/questions.ts +181 -0
  53. package/src/constants/sampleData.ts +187 -0
  54. package/src/crawlers/__tests__/commonCrawlerFunc.test.ts +51 -0
  55. package/src/crawlers/commonCrawlerFunc.ts +656 -0
  56. package/src/crawlers/crawlDomain.ts +877 -0
  57. package/src/crawlers/crawlIntelligentSitemap.ts +156 -0
  58. package/src/crawlers/crawlLocalFile.ts +193 -0
  59. package/src/crawlers/crawlSitemap.ts +356 -0
  60. package/src/crawlers/custom/extractAndGradeText.ts +57 -0
  61. package/src/crawlers/custom/flagUnlabelledClickableElements.ts +964 -0
  62. package/src/crawlers/custom/utils.ts +486 -0
  63. package/src/crawlers/customAxeFunctions.ts +82 -0
  64. package/src/crawlers/pdfScanFunc.ts +468 -0
  65. package/src/crawlers/runCustom.ts +117 -0
  66. package/src/index.ts +173 -0
  67. package/src/logs.ts +66 -0
  68. package/src/mergeAxeResults.ts +964 -0
  69. package/src/npmIndex.ts +284 -0
  70. package/src/screenshotFunc/htmlScreenshotFunc.ts +411 -0
  71. package/src/screenshotFunc/pdfScreenshotFunc.ts +762 -0
  72. package/src/static/ejs/partials/components/categorySelector.ejs +4 -0
  73. package/src/static/ejs/partials/components/categorySelectorDropdown.ejs +57 -0
  74. package/src/static/ejs/partials/components/pagesScannedModal.ejs +70 -0
  75. package/src/static/ejs/partials/components/reportSearch.ejs +47 -0
  76. package/src/static/ejs/partials/components/ruleOffcanvas.ejs +105 -0
  77. package/src/static/ejs/partials/components/scanAbout.ejs +263 -0
  78. package/src/static/ejs/partials/components/screenshotLightbox.ejs +13 -0
  79. package/src/static/ejs/partials/components/summaryScanAbout.ejs +141 -0
  80. package/src/static/ejs/partials/components/summaryScanResults.ejs +16 -0
  81. package/src/static/ejs/partials/components/summaryTable.ejs +20 -0
  82. package/src/static/ejs/partials/components/summaryWcagCompliance.ejs +94 -0
  83. package/src/static/ejs/partials/components/topFive.ejs +6 -0
  84. package/src/static/ejs/partials/components/wcagCompliance.ejs +70 -0
  85. package/src/static/ejs/partials/footer.ejs +21 -0
  86. package/src/static/ejs/partials/header.ejs +230 -0
  87. package/src/static/ejs/partials/main.ejs +40 -0
  88. package/src/static/ejs/partials/scripts/bootstrap.ejs +8 -0
  89. package/src/static/ejs/partials/scripts/categorySelectorDropdownScript.ejs +190 -0
  90. package/src/static/ejs/partials/scripts/categorySummary.ejs +141 -0
  91. package/src/static/ejs/partials/scripts/highlightjs.ejs +335 -0
  92. package/src/static/ejs/partials/scripts/popper.ejs +7 -0
  93. package/src/static/ejs/partials/scripts/reportSearch.ejs +248 -0
  94. package/src/static/ejs/partials/scripts/ruleOffcanvas.ejs +801 -0
  95. package/src/static/ejs/partials/scripts/screenshotLightbox.ejs +71 -0
  96. package/src/static/ejs/partials/scripts/summaryScanResults.ejs +14 -0
  97. package/src/static/ejs/partials/scripts/summaryTable.ejs +78 -0
  98. package/src/static/ejs/partials/scripts/utils.ejs +441 -0
  99. package/src/static/ejs/partials/styles/bootstrap.ejs +12375 -0
  100. package/src/static/ejs/partials/styles/highlightjs.ejs +54 -0
  101. package/src/static/ejs/partials/styles/styles.ejs +1843 -0
  102. package/src/static/ejs/partials/styles/summaryBootstrap.ejs +12458 -0
  103. package/src/static/ejs/partials/summaryHeader.ejs +70 -0
  104. package/src/static/ejs/partials/summaryMain.ejs +75 -0
  105. package/src/static/ejs/report.ejs +420 -0
  106. package/src/static/ejs/summary.ejs +47 -0
  107. package/src/static/mustache/.prettierrc +4 -0
  108. package/src/static/mustache/Attention Deficit.mustache +11 -0
  109. package/src/static/mustache/Blind.mustache +11 -0
  110. package/src/static/mustache/Cognitive.mustache +7 -0
  111. package/src/static/mustache/Colorblindness.mustache +20 -0
  112. package/src/static/mustache/Deaf.mustache +12 -0
  113. package/src/static/mustache/Deafblind.mustache +7 -0
  114. package/src/static/mustache/Dyslexia.mustache +14 -0
  115. package/src/static/mustache/Low Vision.mustache +7 -0
  116. package/src/static/mustache/Mobility.mustache +15 -0
  117. package/src/static/mustache/Sighted Keyboard Users.mustache +42 -0
  118. package/src/static/mustache/report.mustache +1709 -0
  119. package/src/types/print-message.d.ts +28 -0
  120. package/src/types/types.ts +46 -0
  121. package/src/types/xpath-to-css.d.ts +3 -0
  122. package/src/utils.ts +332 -0
  123. package/tsconfig.json +15 -0
@@ -0,0 +1,762 @@
1
+ import _ from 'lodash';
2
+ import pdfjs, { PDFPageProxy } from 'pdfjs-dist';
3
+ import fs from 'fs';
4
+ import { Canvas, createCanvas, SKRSContext2D } from '@napi-rs/canvas';
5
+ import assert from 'assert';
6
+ import path from 'path';
7
+ import { fileURLToPath } from 'url';
8
+ import { silentLogger } from '../logs.js';
9
+ import { TransformedRuleObject } from '../crawlers/pdfScanFunc.js';
10
+ import { IBboxLocation, StructureTree, ViewportSize } from '../types/types.js';
11
+
12
+ const filename = fileURLToPath(import.meta.url);
13
+ const dirname = path.dirname(filename);
14
+
15
+ // CONSTANTS
16
+ const BBOX_PADDING = 50;
17
+
18
+ // Interfaces
19
+ interface pathObject {
20
+ pageIndex?: number;
21
+ contentStream?: number;
22
+ content?: number;
23
+ contentItems?: number[];
24
+ mcid?: number;
25
+ annot?: number;
26
+ }
27
+
28
+ function NodeCanvasFactory() {}
29
+ NodeCanvasFactory.prototype = {
30
+ create: function NodeCanvasFactory_create(width: number, height: number) {
31
+ assert(width > 0 && height > 0, 'Invalid canvas size');
32
+ const canvas = createCanvas(width, height);
33
+ const context = canvas.getContext('2d');
34
+ return {
35
+ canvas,
36
+ context,
37
+ };
38
+ },
39
+
40
+ reset: function NodeCanvasFactory_reset(
41
+ canvasAndContext: { canvas: Canvas; context: SKRSContext2D },
42
+ width: number,
43
+ height: number,
44
+ ) {
45
+ assert(canvasAndContext.canvas, 'Canvas is not specified');
46
+ assert(width > 0 && height > 0, 'Invalid canvas size');
47
+ canvasAndContext.canvas.width = width;
48
+ canvasAndContext.canvas.height = height;
49
+ },
50
+
51
+ destroy: function NodeCanvasFactory_destroy(canvasAndContext: {
52
+ canvas: Canvas;
53
+ context: SKRSContext2D;
54
+ }) {
55
+ assert(canvasAndContext.canvas, 'Canvas is not specified');
56
+
57
+ canvasAndContext.canvas = null;
58
+ canvasAndContext.context = null;
59
+ },
60
+ };
61
+
62
+ const canvasFactory = new NodeCanvasFactory();
63
+
64
+ export async function getPdfScreenshots(
65
+ pdfFilePath: string,
66
+ items: TransformedRuleObject['items'],
67
+ screenshotPath: string,
68
+ ) {
69
+ const newItems = _.cloneDeep(items);
70
+ const loadingTask = pdfjs.getDocument({
71
+ url: pdfFilePath,
72
+ standardFontDataUrl: path.join(dirname, '../node_modules/pdfjs-dist/standard_fonts/'),
73
+ disableFontFace: true,
74
+ verbosity: 0,
75
+ });
76
+ const pdf = await loadingTask.promise;
77
+ const structureTree = await pdf._pdfInfo.structureTree;
78
+
79
+ // save some resources by caching page canvases to be reused by diff violations
80
+ const pageCanvasCache = {};
81
+
82
+ // iterate through each violation
83
+ for (let i = 0; i < newItems.length; i++) {
84
+ const { context } = newItems[i];
85
+ const bbox: IBboxLocation = { location: context };
86
+ const bboxMap = buildBboxMap([bbox], structureTree);
87
+
88
+ for (const [pageNum, bboxList] of Object.entries(bboxMap)) {
89
+ const page = await pdf.getPage(parseInt(pageNum, 10));
90
+
91
+ // an array of length 1, containing location of current violation
92
+ const bboxesWithCoords = await Promise.all([
93
+ page.getOperatorList(),
94
+ page.getAnnotations(),
95
+ ]).then(getBboxesList(bboxList, page));
96
+
97
+ // Render the page on a Node canvas with 200% scale.
98
+ const viewport = page.getViewport({ scale: 2.0 });
99
+
100
+ const canvasAndContext =
101
+ pageCanvasCache[pageNum] ?? canvasFactory.create(viewport.width, viewport.height);
102
+ if (!pageCanvasCache[pageNum]) {
103
+ pageCanvasCache[pageNum] = canvasAndContext;
104
+ }
105
+ const { canvas: origCanvas, context: origCtx } = canvasAndContext;
106
+
107
+ const renderContext = {
108
+ canvasContext: origCtx,
109
+ viewport,
110
+ canvasFactory,
111
+ };
112
+ const renderTask = page.render(renderContext); // render pdf page onto a canvas
113
+ await renderTask.promise;
114
+
115
+ const finalScreenshotPath = annotateAndSave(
116
+ origCanvas,
117
+ screenshotPath,
118
+ viewport,
119
+ )(bboxesWithCoords[0]);
120
+
121
+ newItems[i].screenshotPath = finalScreenshotPath;
122
+ newItems[i].page = parseInt(pageNum, 10);
123
+
124
+ page.cleanup();
125
+ }
126
+ }
127
+ return newItems;
128
+ }
129
+
130
+ const annotateAndSave = (origCanvas: Canvas, screenshotPath: string, viewport: ViewportSize) => {
131
+ return ({ location }) => {
132
+ const [left, bottom, width, height] = location.map(loc => loc * 2); // scale up by 2
133
+ const rectParams = [left, viewport.height - bottom - height, width, height];
134
+
135
+ // create new canvas to annotate so we do not "pollute" the original
136
+ const { context: highlightCtx, canvas: highlightCanvas } = canvasFactory.create(
137
+ viewport.width,
138
+ viewport.height,
139
+ );
140
+
141
+ highlightCtx.drawImage(origCanvas, 0, 0);
142
+ highlightCtx.fillStyle = 'rgba(0, 255, 255, 0.2)';
143
+ highlightCtx.fillRect(...rectParams);
144
+
145
+ const rectParamsWithPadding = [
146
+ left - BBOX_PADDING,
147
+ viewport.height - bottom - height - BBOX_PADDING,
148
+ width + BBOX_PADDING * 2,
149
+ height + BBOX_PADDING * 2,
150
+ ];
151
+
152
+ // create new canvas to crop image
153
+ const { context: croppedCtx, canvas: croppedCanvas } = canvasFactory.create(
154
+ rectParamsWithPadding[2],
155
+ rectParamsWithPadding[3],
156
+ );
157
+
158
+ croppedCtx.drawImage(
159
+ highlightCanvas,
160
+ ...rectParamsWithPadding,
161
+ 0,
162
+ 0,
163
+ rectParamsWithPadding[2],
164
+ rectParamsWithPadding[3],
165
+ );
166
+
167
+ // convert the canvas to an image
168
+ // const croppedImage = croppedCanvas.toBuffer();
169
+ const croppedImage = croppedCanvas.toBuffer('image/png');
170
+
171
+ // save image
172
+ let counter = 0;
173
+ let indexedScreenshotPath = `${screenshotPath}-${counter}.png`;
174
+ let fileExists = fs.existsSync(indexedScreenshotPath);
175
+ while (fileExists) {
176
+ counter++;
177
+ indexedScreenshotPath = `${screenshotPath}-${counter}.png`;
178
+ fileExists = fs.existsSync(indexedScreenshotPath);
179
+ }
180
+ try {
181
+ fs.writeFileSync(indexedScreenshotPath, croppedImage);
182
+ } catch (e) {
183
+ silentLogger.error('Error in writing screenshot:', e);
184
+ }
185
+
186
+ canvasFactory.destroy({ canvas: croppedCanvas, context: croppedCtx });
187
+ canvasFactory.destroy({ canvas: highlightCanvas, context: highlightCtx });
188
+
189
+ // current screenshot path leads to a temp dir, so modify to save the final file path
190
+ const [_, ...rest] = indexedScreenshotPath.split(path.sep);
191
+ const finalScreenshotPath = path.join(...rest);
192
+ return finalScreenshotPath;
193
+ };
194
+ };
195
+
196
+ export const rotateViewport = (rotateAngle, viewport) => {
197
+ if ([0, 180].includes(rotateAngle)) {
198
+ return viewport;
199
+ }
200
+ return [viewport[1], viewport[0], viewport[3], viewport[2]];
201
+ };
202
+
203
+ export const rotatePoint = (rotateAngle, point, viewport) => {
204
+ const rad = (rotateAngle * Math.PI) / 180;
205
+ let x = point[0] * Math.cos(rad) + point[1] * Math.sin(rad);
206
+ let y = -point[0] * Math.sin(rad) + point[1] * Math.cos(rad);
207
+ switch (rotateAngle) {
208
+ case 90:
209
+ y += viewport[2] + viewport[0];
210
+ break;
211
+ case 180:
212
+ x += viewport[2] + viewport[0];
213
+ y += viewport[3] + viewport[1];
214
+ break;
215
+ case 270:
216
+ x += viewport[3] + viewport[1];
217
+ break;
218
+ default:
219
+ break;
220
+ }
221
+ return [x, y];
222
+ };
223
+
224
+ export const rotateCoordinates = (coords, rotateAngle, viewport) => {
225
+ if (rotateAngle === 0) return coords;
226
+ const [x1, y1] = rotatePoint(rotateAngle, [coords[0], coords[1]], viewport);
227
+ const [x2, y2] = rotatePoint(
228
+ rotateAngle,
229
+ [coords[0] + coords[2], coords[1] + coords[3]],
230
+ viewport,
231
+ );
232
+ return [Math.min(x1, x2), Math.min(y1, y2), Math.abs(x1 - x2), Math.abs(y1 - y2)];
233
+ };
234
+
235
+ function concatBoundingBoxes(newBoundingBox, oldBoundingBox) {
236
+ if (_.isNil(oldBoundingBox) && _.isNil(newBoundingBox)) {
237
+ return {};
238
+ }
239
+
240
+ if (_.isNil(newBoundingBox)) {
241
+ return oldBoundingBox || {};
242
+ }
243
+ if (_.isNil(oldBoundingBox)) {
244
+ return _.cloneDeep(newBoundingBox);
245
+ }
246
+ return {
247
+ x: Math.min(newBoundingBox.x, oldBoundingBox.x),
248
+ y: Math.min(newBoundingBox.y, oldBoundingBox.y),
249
+ width:
250
+ Math.max(newBoundingBox.x + newBoundingBox.width, oldBoundingBox.x + oldBoundingBox.width) -
251
+ Math.min(newBoundingBox.x, oldBoundingBox.x),
252
+ height:
253
+ Math.max(newBoundingBox.y + newBoundingBox.height, oldBoundingBox.y + oldBoundingBox.height) -
254
+ Math.min(newBoundingBox.y, oldBoundingBox.y),
255
+ };
256
+ }
257
+
258
+ export const parseMcidToBbox = (listOfMcid, pageMap, annotations, viewport, rotateAngle) => {
259
+ type coordsObject = {
260
+ x: number;
261
+ y: number;
262
+ width: number;
263
+ height: number;
264
+ };
265
+ let coords: coordsObject = { x: undefined, y: undefined, width: undefined, height: undefined };
266
+
267
+ if (listOfMcid instanceof Array) {
268
+ listOfMcid.forEach(mcid => {
269
+ const currentBbox = pageMap[mcid];
270
+ if (
271
+ !_.isNil(currentBbox) &&
272
+ !_.isNaN(currentBbox.x) &&
273
+ !_.isNaN(currentBbox.y) &&
274
+ !_.isNaN(currentBbox.width) &&
275
+ !_.isNaN(currentBbox.height)
276
+ ) {
277
+ coords = concatBoundingBoxes(currentBbox, coords.x ? coords : undefined);
278
+ }
279
+ });
280
+ } else if (Object.prototype.hasOwnProperty.call(listOfMcid, 'annot')) {
281
+ const rect = annotations[listOfMcid.annot]?.rect;
282
+ if (rect) {
283
+ coords = {
284
+ x: rect[0],
285
+ y: rect[1],
286
+ width: Math.abs(rect[0] - rect[2]),
287
+ height: Math.abs(rect[1] - rect[3]),
288
+ };
289
+ }
290
+ }
291
+ if (!coords) return [];
292
+ const coordsArray = rotateCoordinates(
293
+ [coords.x, coords.y, coords.width, coords.height],
294
+ rotateAngle,
295
+ viewport,
296
+ );
297
+ const rotatedViewport = rotateViewport(rotateAngle, viewport);
298
+ return [
299
+ coordsArray[0] - rotatedViewport[0],
300
+ coordsArray[1] - rotatedViewport[1],
301
+ coordsArray[2],
302
+ coordsArray[3],
303
+ ];
304
+ };
305
+
306
+ export const getBboxForGlyph = (
307
+ operatorIndex,
308
+ glyphIndex,
309
+ operationsList,
310
+ viewport,
311
+ rotateAngle,
312
+ ) => {
313
+ const bbox = operationsList[operatorIndex] ? operationsList[operatorIndex][glyphIndex] : null;
314
+ if (!bbox) {
315
+ return [];
316
+ }
317
+ const coordsArray = rotateCoordinates(bbox, rotateAngle, viewport);
318
+ const rotatedViewport = rotateViewport(rotateAngle, viewport);
319
+ return [
320
+ coordsArray[0] - rotatedViewport[0],
321
+ coordsArray[1] - rotatedViewport[1],
322
+ coordsArray[2],
323
+ coordsArray[3],
324
+ ];
325
+ };
326
+
327
+ // Below are methods adapted from
328
+ // https://github.com/veraPDF/verapdf-js-viewer/blob/master/src/services/bboxService.ts
329
+ // to determine the bounding box data of the violations from the context field
330
+
331
+ export const getBboxesList = (bboxList, page: PDFPageProxy) => {
332
+ return ([operatorList, annotations]) => {
333
+ const operationData = operatorList.argsArray[operatorList.argsArray.length - 2];
334
+ const [positionData, noMCIDData] = operatorList.argsArray[operatorList.argsArray.length - 1];
335
+ const bboxes = bboxList.map(bbox => {
336
+ if (bbox.mcidList) {
337
+ bbox.location = parseMcidToBbox(
338
+ bbox.mcidList,
339
+ positionData,
340
+ annotations,
341
+ page.view,
342
+ page.rotate,
343
+ );
344
+ } else if (bbox.contentItemPath) {
345
+ const contentItemsPath = bbox.contentItemPath.slice(2);
346
+ let contentItemsBBoxes = noMCIDData[bbox.contentItemPath[1]];
347
+ try {
348
+ contentItemsPath.forEach((ci, i) => {
349
+ if (contentItemsPath.length > i + 1 || !contentItemsBBoxes.final) {
350
+ contentItemsBBoxes = contentItemsBBoxes.contentItems[0];
351
+ }
352
+ contentItemsBBoxes = contentItemsBBoxes.contentItems[ci];
353
+ });
354
+
355
+ bbox.location = [
356
+ contentItemsBBoxes.contentItem.x,
357
+ contentItemsBBoxes.contentItem.y,
358
+ contentItemsBBoxes.contentItem.w,
359
+ contentItemsBBoxes.contentItem.h,
360
+ ];
361
+ } catch (err) {
362
+ console.log('NoMCIDDataParseError:', err.message || err);
363
+ bbox.location = [0, 0, 0, 0];
364
+ }
365
+ }
366
+ if (_.isNumber(bbox.operatorIndex) && _.isNumber(bbox.glyphIndex)) {
367
+ bbox.location = getBboxForGlyph(
368
+ bbox.operatorIndex,
369
+ bbox.glyphIndex,
370
+ operationData,
371
+ page.view,
372
+ page.rotate,
373
+ );
374
+ }
375
+ return bbox;
376
+ });
377
+ return bboxes;
378
+ };
379
+ };
380
+
381
+ /*
382
+ * Going through object of tags from error placement and return array of its MCIDs
383
+ *
384
+ * @param {Object} of tags
385
+ *
386
+ * @return [[{Array}, {Number}]] - [[[array of mcids], page of error]]
387
+ */
388
+ function findAllMcid(tagObject) {
389
+ const mcidMap = {};
390
+
391
+ function func(obj) {
392
+ if (!obj) return;
393
+ if (obj.mcid || obj.mcid === 0) {
394
+ if (!mcidMap[obj.pageIndex]) mcidMap[obj.pageIndex] = [];
395
+ mcidMap[obj.pageIndex].push(obj.mcid);
396
+ }
397
+ if (!obj.children) {
398
+ return;
399
+ }
400
+
401
+ if (!(obj.children instanceof Array)) {
402
+ func(obj.children);
403
+ } else {
404
+ [...obj.children].forEach(child => func(child));
405
+ }
406
+ }
407
+
408
+ func(tagObject);
409
+ return _.map(mcidMap, (value, key) => [value, _.toNumber(key)]);
410
+ }
411
+
412
+ /*
413
+ * Convert returning from veraPDF api path to error in array of nodes
414
+ *
415
+ * @param errorContext {string} ugly path to error
416
+ *
417
+ * @return arrayOfNodes {array} of nodes from Document to error Tag
418
+ */
419
+ type Node = [number, string];
420
+ type ConvertContextToPathReturn = pathObject | Node[];
421
+
422
+ const convertContextToPath = (errorContext = ''): ConvertContextToPathReturn => {
423
+ let arrayOfNodes: Node[] = [];
424
+ if (!errorContext) {
425
+ return arrayOfNodes;
426
+ }
427
+
428
+ const contextString = errorContext;
429
+
430
+ try {
431
+ if (contextString.includes('contentItem') && !contextString.includes('mcid')) {
432
+ const result = contextString.match(
433
+ /pages\[(?<pages>\d+)\](\(.+\))?\/contentStream\[(?<contentStream>\d+)\](\(.+\))?\/content\[(?<content>\d+)\](?<contentItems>((\(.+\))?\/contentItem\[(\d+)\])+)/,
434
+ );
435
+ if (result) {
436
+ try {
437
+ let path: pathObject;
438
+ path.pageIndex = parseInt(result.groups.pages, 10);
439
+ path.contentStream = parseInt(result.groups.contentStream, 10);
440
+ path.content = parseInt(result.groups.content, 10);
441
+ path.contentItems = result.groups.contentItems
442
+ .split('/')
443
+ .filter(ci => ci.includes('contentItem'))
444
+ .map(ci => {
445
+ const contentItemIndex = ci.match(/\[(?<contentItem>\d+)\]/);
446
+ return parseInt(contentItemIndex?.groups?.contentItem || '-1', 10);
447
+ });
448
+ return path;
449
+ } catch (err) {
450
+ console.log('NoMCIDContentItemPathParseError:', err.message || err);
451
+ }
452
+ }
453
+ }
454
+
455
+ if (contextString.includes('contentItem')) {
456
+ let path: pathObject;
457
+ contextString.split('/').forEach(nodeString => {
458
+ if (nodeString.includes('page')) {
459
+ path.pageIndex = parseInt(nodeString.split(/[[\]]/)[1], 10);
460
+ } else if (nodeString.includes('contentItem') && nodeString.includes('mcid')) {
461
+ path.mcid = parseInt(nodeString.split('mcid:')[1].slice(0, -1), 10);
462
+ }
463
+ });
464
+ return path;
465
+ }
466
+ if (contextString.includes('annots')) {
467
+ let path: pathObject;
468
+ contextString.split('/').forEach(nodeString => {
469
+ if (nodeString.includes('page')) {
470
+ path.pageIndex = parseInt(nodeString.split(/[[\]]/)[1], 10);
471
+ } else if (nodeString.includes('annots')) {
472
+ path.annot = parseInt(nodeString.split(/[[\]]/)[1], 10);
473
+ }
474
+ });
475
+ return path;
476
+ }
477
+
478
+ const contextStringArray: string[] = contextString.split('PDStructTreeRoot)/')[1].split('/'); // cut path before start of Document
479
+ contextStringArray.forEach(nodeString => {
480
+ const nextIndex = parseInt(nodeString.split('](')[0].split('K[')[1], 10);
481
+ let nextTag: string | string[] = nodeString.split('(')[1].split(')')[0].split(' ');
482
+ nextTag = nextTag[nextTag.length - 1];
483
+
484
+ arrayOfNodes = [...arrayOfNodes, [nextIndex, nextTag]];
485
+ });
486
+ return arrayOfNodes;
487
+ } catch {
488
+ return [];
489
+ }
490
+ };
491
+
492
+ const getTagsFromErrorPlace = (context: string, structure: StructureTree) => {
493
+ const defaultValue = [[[], -1, undefined]];
494
+ const selectedTag = convertContextToPath(context);
495
+
496
+ if (_.isEmpty(selectedTag)) {
497
+ return defaultValue;
498
+ }
499
+ // Type guard function
500
+ function isPathObject(value: any): value is pathObject {
501
+ return (
502
+ value !== null &&
503
+ typeof value === 'object' &&
504
+ (Object.prototype.hasOwnProperty.call(value, 'mcid') ||
505
+ Object.prototype.hasOwnProperty.call(value, 'pageIndex') ||
506
+ Object.prototype.hasOwnProperty.call(value, 'annot') ||
507
+ Object.prototype.hasOwnProperty.call(value, 'contentItems'))
508
+ );
509
+ }
510
+
511
+ if (isPathObject(selectedTag)) {
512
+ if (
513
+ Object.prototype.hasOwnProperty.call(selectedTag, 'mcid') &&
514
+ Object.prototype.hasOwnProperty.call(selectedTag, 'pageIndex')
515
+ ) {
516
+ return [[[selectedTag.mcid], selectedTag.pageIndex]];
517
+ }
518
+ if (
519
+ Object.prototype.hasOwnProperty.call(selectedTag, 'annot') &&
520
+ Object.prototype.hasOwnProperty.call(selectedTag, 'pageIndex')
521
+ ) {
522
+ return [[{ annot: selectedTag.annot }, selectedTag.pageIndex]];
523
+ }
524
+ if (Object.prototype.hasOwnProperty.call(selectedTag, 'contentItems')) {
525
+ return [
526
+ [
527
+ undefined,
528
+ selectedTag.pageIndex,
529
+ [selectedTag.contentStream, selectedTag.content, ...selectedTag.contentItems],
530
+ ],
531
+ ];
532
+ }
533
+ } else if (selectedTag instanceof Array) {
534
+ let objectOfErrors = { ...structure };
535
+ selectedTag.forEach((node, index) => {
536
+ let nextStepObject;
537
+ if (!objectOfErrors.children) {
538
+ nextStepObject = objectOfErrors[node[0]];
539
+ } else if (!(objectOfErrors.children instanceof Array)) {
540
+ if (objectOfErrors.children.name === node[1]) {
541
+ nextStepObject = objectOfErrors.children;
542
+ } else {
543
+ nextStepObject = objectOfErrors;
544
+ }
545
+ } else if (objectOfErrors?.name === node[1] && index === 0) {
546
+ nextStepObject = objectOfErrors;
547
+ } else {
548
+ const clearedChildrenArray = [...objectOfErrors.children].filter(tag => !tag?.mcid);
549
+ nextStepObject = {
550
+ ...(clearedChildrenArray.length ? clearedChildrenArray : objectOfErrors.children)[
551
+ node[0]
552
+ ],
553
+ };
554
+ }
555
+ objectOfErrors = { ...nextStepObject };
556
+ });
557
+ return findAllMcid(objectOfErrors);
558
+ }
559
+ return defaultValue;
560
+ };
561
+
562
+ const calculateLocation = location => {
563
+ const bboxes = [];
564
+ const [pages, boundingBox] = location.split('/');
565
+ const [start, end] = pages.replace('pages[', '').replace(']', '').split('-');
566
+ const [x, y, x1, y1] = boundingBox.replace('boundingBox[', '').replace(']', '').split(',');
567
+ const width = parseFloat(x1) - parseFloat(x);
568
+
569
+ if (end) {
570
+ for (let i = parseInt(start) + 1; i <= parseInt(end) + 1; i++) {
571
+ switch (i) {
572
+ case parseInt(start) + 1:
573
+ bboxes.push({
574
+ page: i,
575
+ location: [parseFloat(x), parseFloat(y1), width, 'bottom'],
576
+ });
577
+ break;
578
+ case parseInt(end) + 1:
579
+ bboxes.push({
580
+ page: i,
581
+ location: [parseFloat(x), parseFloat(y), width, 'top'],
582
+ });
583
+ break;
584
+ default:
585
+ bboxes.push({
586
+ page: i,
587
+ location: [parseFloat(x), 0, width, 'top'],
588
+ });
589
+ break;
590
+ }
591
+ }
592
+ } else {
593
+ const height = parseFloat(y1) - parseFloat(y);
594
+ bboxes.push({
595
+ page: parseInt(start) + 1,
596
+ location: [parseFloat(x), parseFloat(y), width, height],
597
+ });
598
+ }
599
+
600
+ return bboxes;
601
+ };
602
+
603
+ const calculateLocationJSON = location => {
604
+ const bboxes = [];
605
+ const bboxMap = JSON.parse(location);
606
+
607
+ bboxMap.bbox.forEach(({ p, rect }) => {
608
+ const [x, y, x1, y1] = rect;
609
+ const width = parseFloat(x1) - parseFloat(x);
610
+ const height = parseFloat(y1) - parseFloat(y);
611
+ bboxes.push({
612
+ page: parseFloat(p) + 1,
613
+ location: [parseFloat(x), parseFloat(y), width, height],
614
+ });
615
+ });
616
+ return bboxes;
617
+ };
618
+
619
+ export const calculateLocationInStreamOperator = location => {
620
+ const path = location.split('/');
621
+ let pageIndex = -1;
622
+ let operatorIndex = -1;
623
+ let glyphIndex = -1;
624
+ path.forEach(step => {
625
+ if (step.startsWith('pages')) {
626
+ pageIndex = parseInt(step.split(/[\[\]]/)[1]);
627
+ }
628
+ if (step.startsWith('operators')) {
629
+ operatorIndex = parseInt(step.split(/[\[\]]/)[1]);
630
+ }
631
+ if (step.startsWith('usedGlyphs')) {
632
+ glyphIndex = parseInt(step.split(/[\[\]]/)[1]);
633
+ }
634
+ });
635
+ if (pageIndex === -1 || operatorIndex === -1 || glyphIndex === -1) {
636
+ return null;
637
+ }
638
+ return {
639
+ pageIndex,
640
+ operatorIndex,
641
+ glyphIndex,
642
+ };
643
+ };
644
+
645
+ export const buildBboxMap = (bboxes: IBboxLocation[], structure: StructureTree) => {
646
+ const bboxMap = {};
647
+ bboxes.forEach((bbox, index) => {
648
+ try {
649
+ if (bbox.location.includes('contentStream') && bbox.location.includes('operators')) {
650
+ const bboxPosition = calculateLocationInStreamOperator(bbox.location);
651
+ if (!bboxPosition) {
652
+ return;
653
+ }
654
+ bboxMap[bboxPosition.pageIndex + 1] = [
655
+ ...(bboxMap[bboxPosition.pageIndex + 1] || []),
656
+ {
657
+ index,
658
+ operatorIndex: bboxPosition.operatorIndex,
659
+ glyphIndex: bboxPosition.glyphIndex,
660
+ bboxTitle: bbox.bboxTitle,
661
+ },
662
+ ];
663
+ } else if (
664
+ bbox.location.includes('StructTreeRoot') ||
665
+ bbox.location.includes('root/doc') ||
666
+ bbox.location === 'root'
667
+ ) {
668
+ const mcidData = getTagsFromErrorPlace(bbox.location, structure);
669
+ mcidData.forEach(([mcidList, pageIndex, contentItemPath]) => {
670
+ bboxMap[pageIndex + 1] = [
671
+ ...(bboxMap[pageIndex + 1] || []),
672
+ {
673
+ index,
674
+ mcidList,
675
+ contentItemPath,
676
+ groupId: bbox.groupId || undefined,
677
+ bboxTitle: bbox.bboxTitle,
678
+ },
679
+ ];
680
+ });
681
+ } else {
682
+ const bboxesFromLocation = bbox.location.includes('pages[')
683
+ ? calculateLocation(bbox.location)
684
+ : calculateLocationJSON(bbox.location);
685
+ bboxesFromLocation.forEach(bboxWithLocation => {
686
+ bboxMap[bboxWithLocation.page] = [
687
+ ...(bboxMap[bboxWithLocation.page] || []),
688
+ {
689
+ index,
690
+ location: bboxWithLocation.location,
691
+ groupId: bbox.groupId || undefined,
692
+ bboxTitle: bbox.bboxTitle,
693
+ },
694
+ ];
695
+ });
696
+ }
697
+ } catch {
698
+ console.error(`Location not supported: ${bbox.location}`);
699
+ }
700
+ });
701
+ return bboxMap;
702
+ };
703
+
704
+ export const getSelectedPageByLocation = bboxLocation => {
705
+ const location = bboxLocation;
706
+ const path = location.split('/');
707
+ let pageNumber = -1;
708
+ if (location?.includes('pages') && path[path.length - 1].startsWith('pages')) {
709
+ location.split('/').forEach(nodeString => {
710
+ if (nodeString.includes('pages')) {
711
+ pageNumber = parseInt(nodeString.split(/[[\]]/)[1], 10) + 1;
712
+ }
713
+ });
714
+ }
715
+ return pageNumber;
716
+ };
717
+
718
+ export const getBboxPage = (bbox, structure) => {
719
+ try {
720
+ if (
721
+ bbox.location.includes('StructTreeRoot') ||
722
+ bbox.location.includes('root/doc') ||
723
+ bbox.location === 'root'
724
+ ) {
725
+ const mcidData = getTagsFromErrorPlace(bbox.location, structure);
726
+ const pageIndex = mcidData[0][1] as number;
727
+ return pageIndex + 1;
728
+ }
729
+ const bboxesFromLocation = bbox.location.includes('pages[')
730
+ ? calculateLocation(bbox.location)
731
+ : calculateLocationJSON(bbox.location);
732
+ return bboxesFromLocation.length ? bboxesFromLocation[0].page : 0;
733
+ } catch (e) {
734
+ console.error(e);
735
+ console.error(`Location not supported: ${bbox.location}`);
736
+ return -1;
737
+ }
738
+ };
739
+
740
+ export const getPageFromContext = async (context: string, pdfFilePath: string): Promise<number> => {
741
+ try {
742
+ const loadingTask = pdfjs.getDocument({
743
+ url: pdfFilePath,
744
+ standardFontDataUrl: path.join(dirname, '../../node_modules/pdfjs-dist/standard_fonts/'),
745
+ disableFontFace: true,
746
+ verbosity: 0,
747
+ });
748
+ const pdf = await loadingTask.promise;
749
+ const structureTree = await pdf._pdfInfo.structureTree;
750
+
751
+ const page = getBboxPage({ location: context }, structureTree);
752
+ return page;
753
+ } catch {
754
+ // Error handling
755
+ }
756
+ };
757
+
758
+ export const getBboxPages = (bboxes, structure) => {
759
+ return bboxes.map(bbox => {
760
+ getBboxPage(bbox, structure);
761
+ });
762
+ };