@govtechsg/oobee 0.10.76 → 0.10.77

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. package/.github/workflows/publish.yml +8 -1
  2. package/INTEGRATION.md +7 -3
  3. package/dist/cli.js +252 -0
  4. package/dist/combine.js +221 -0
  5. package/dist/constants/cliFunctions.js +306 -0
  6. package/dist/constants/common.js +1669 -0
  7. package/dist/constants/constants.js +913 -0
  8. package/dist/constants/errorMeta.json +319 -0
  9. package/dist/constants/itemTypeDescription.js +7 -0
  10. package/dist/constants/oobeeAi.js +121 -0
  11. package/dist/constants/questions.js +151 -0
  12. package/dist/constants/sampleData.js +176 -0
  13. package/dist/crawlers/commonCrawlerFunc.js +428 -0
  14. package/dist/crawlers/crawlDomain.js +613 -0
  15. package/dist/crawlers/crawlIntelligentSitemap.js +135 -0
  16. package/dist/crawlers/crawlLocalFile.js +151 -0
  17. package/dist/crawlers/crawlSitemap.js +303 -0
  18. package/dist/crawlers/custom/escapeCssSelector.js +10 -0
  19. package/dist/crawlers/custom/evaluateAltText.js +11 -0
  20. package/dist/crawlers/custom/extractAndGradeText.js +44 -0
  21. package/dist/crawlers/custom/extractText.js +27 -0
  22. package/dist/crawlers/custom/findElementByCssSelector.js +36 -0
  23. package/dist/crawlers/custom/flagUnlabelledClickableElements.js +963 -0
  24. package/dist/crawlers/custom/framesCheck.js +37 -0
  25. package/dist/crawlers/custom/getAxeConfiguration.js +111 -0
  26. package/dist/crawlers/custom/gradeReadability.js +23 -0
  27. package/dist/crawlers/custom/utils.js +1024 -0
  28. package/dist/crawlers/custom/xPathToCss.js +147 -0
  29. package/dist/crawlers/guards/urlGuard.js +71 -0
  30. package/dist/crawlers/pdfScanFunc.js +276 -0
  31. package/dist/crawlers/runCustom.js +89 -0
  32. package/dist/exclusions.txt +7 -0
  33. package/dist/generateHtmlReport.js +144 -0
  34. package/dist/index.js +62 -0
  35. package/dist/logs.js +84 -0
  36. package/dist/mergeAxeResults.js +1571 -0
  37. package/dist/npmIndex.js +429 -0
  38. package/dist/proxyService.js +360 -0
  39. package/dist/runGenerateJustHtmlReport.js +16 -0
  40. package/dist/screenshotFunc/htmlScreenshotFunc.js +355 -0
  41. package/dist/screenshotFunc/pdfScreenshotFunc.js +645 -0
  42. package/dist/services/s3Uploader.js +127 -0
  43. package/dist/static/ejs/partials/components/allIssues/AllIssues.ejs +9 -0
  44. package/dist/static/ejs/partials/components/allIssues/CategoryBadges.ejs +82 -0
  45. package/dist/static/ejs/partials/components/allIssues/FilterBar.ejs +33 -0
  46. package/dist/static/ejs/partials/components/allIssues/IssuesTable.ejs +41 -0
  47. package/dist/static/ejs/partials/components/header/SiteInfo.ejs +119 -0
  48. package/dist/static/ejs/partials/components/header/aboutScanModal/AboutScanModal.ejs +15 -0
  49. package/dist/static/ejs/partials/components/header/aboutScanModal/ScanConfiguration.ejs +44 -0
  50. package/dist/static/ejs/partials/components/header/aboutScanModal/ScanDetails.ejs +142 -0
  51. package/dist/static/ejs/partials/components/prioritiseIssues/IssueDetailCard.ejs +36 -0
  52. package/dist/static/ejs/partials/components/prioritiseIssues/PrioritiseIssues.ejs +47 -0
  53. package/dist/static/ejs/partials/components/ruleModal/ruleOffcanvas.ejs +196 -0
  54. package/dist/static/ejs/partials/components/scannedPagesSegmentedTabs.ejs +48 -0
  55. package/dist/static/ejs/partials/components/screenshotLightbox.ejs +13 -0
  56. package/dist/static/ejs/partials/components/shared/InfoAlert.ejs +3 -0
  57. package/dist/static/ejs/partials/components/summaryScanAbout.ejs +141 -0
  58. package/dist/static/ejs/partials/components/summaryScanResults.ejs +16 -0
  59. package/dist/static/ejs/partials/components/summaryTable.ejs +20 -0
  60. package/dist/static/ejs/partials/components/summaryWcagCompliance.ejs +94 -0
  61. package/dist/static/ejs/partials/components/topTen.ejs +6 -0
  62. package/dist/static/ejs/partials/components/wcagCompliance/FailedCriteria.ejs +47 -0
  63. package/dist/static/ejs/partials/components/wcagCompliance/WcagCompliance.ejs +16 -0
  64. package/dist/static/ejs/partials/components/wcagCompliance/WcagGaugeBar.ejs +16 -0
  65. package/dist/static/ejs/partials/components/wcagCoverageDetails.ejs +18 -0
  66. package/dist/static/ejs/partials/footer.ejs +24 -0
  67. package/dist/static/ejs/partials/header.ejs +14 -0
  68. package/dist/static/ejs/partials/main.ejs +29 -0
  69. package/dist/static/ejs/partials/scripts/allIssues/AllIssues.ejs +376 -0
  70. package/dist/static/ejs/partials/scripts/bootstrap.ejs +8 -0
  71. package/dist/static/ejs/partials/scripts/categorySummary.ejs +141 -0
  72. package/dist/static/ejs/partials/scripts/decodeUnzipParse.ejs +3 -0
  73. package/dist/static/ejs/partials/scripts/header/SiteInfo.ejs +44 -0
  74. package/dist/static/ejs/partials/scripts/header/aboutScanModal/AboutScanModal.ejs +51 -0
  75. package/dist/static/ejs/partials/scripts/header/aboutScanModal/ScanConfiguration.ejs +127 -0
  76. package/dist/static/ejs/partials/scripts/header/aboutScanModal/ScanDetails.ejs +60 -0
  77. package/dist/static/ejs/partials/scripts/highlightjs.ejs +335 -0
  78. package/dist/static/ejs/partials/scripts/popper.ejs +7 -0
  79. package/dist/static/ejs/partials/scripts/prioritiseIssues/IssueDetailCard.ejs +137 -0
  80. package/dist/static/ejs/partials/scripts/prioritiseIssues/PrioritiseIssues.ejs +214 -0
  81. package/dist/static/ejs/partials/scripts/prioritiseIssues/wcagSvgMap.ejs +861 -0
  82. package/dist/static/ejs/partials/scripts/ruleModal/constants.ejs +957 -0
  83. package/dist/static/ejs/partials/scripts/ruleModal/itemCardRenderer.ejs +353 -0
  84. package/dist/static/ejs/partials/scripts/ruleModal/pageAccordionBuilder.ejs +468 -0
  85. package/dist/static/ejs/partials/scripts/ruleModal/ruleOffcanvas.ejs +306 -0
  86. package/dist/static/ejs/partials/scripts/ruleModal/utilities.ejs +483 -0
  87. package/dist/static/ejs/partials/scripts/scannedPagesSegmentedTabs.ejs +35 -0
  88. package/dist/static/ejs/partials/scripts/screenshotLightbox.ejs +75 -0
  89. package/dist/static/ejs/partials/scripts/summaryScanResults.ejs +14 -0
  90. package/dist/static/ejs/partials/scripts/summaryTable.ejs +78 -0
  91. package/dist/static/ejs/partials/scripts/topTen.ejs +61 -0
  92. package/dist/static/ejs/partials/scripts/utils.ejs +453 -0
  93. package/dist/static/ejs/partials/scripts/wcagCompliance/FailedCriteria.ejs +103 -0
  94. package/dist/static/ejs/partials/scripts/wcagCompliance/WcagGaugeBar.ejs +47 -0
  95. package/dist/static/ejs/partials/scripts/wcagCompliance.ejs +15 -0
  96. package/dist/static/ejs/partials/scripts/wcagCoverageDetails.ejs +75 -0
  97. package/dist/static/ejs/partials/styles/allIssues/AllIssues.ejs +384 -0
  98. package/dist/static/ejs/partials/styles/bootstrap.ejs +12391 -0
  99. package/dist/static/ejs/partials/styles/header/SiteInfo.ejs +121 -0
  100. package/dist/static/ejs/partials/styles/header/aboutScanModal/AboutScanModal.ejs +82 -0
  101. package/dist/static/ejs/partials/styles/header/aboutScanModal/ScanConfiguration.ejs +50 -0
  102. package/dist/static/ejs/partials/styles/header/aboutScanModal/ScanDetails.ejs +149 -0
  103. package/dist/static/ejs/partials/styles/header.ejs +7 -0
  104. package/dist/static/ejs/partials/styles/highlightjs.ejs +54 -0
  105. package/dist/static/ejs/partials/styles/prioritiseIssues/IssueDetailCard.ejs +141 -0
  106. package/dist/static/ejs/partials/styles/prioritiseIssues/PrioritiseIssues.ejs +204 -0
  107. package/dist/static/ejs/partials/styles/ruleModal/ruleOffcanvas.ejs +456 -0
  108. package/dist/static/ejs/partials/styles/scannedPagesSegmentedTabs.ejs +46 -0
  109. package/dist/static/ejs/partials/styles/shared/InfoAlert.ejs +12 -0
  110. package/dist/static/ejs/partials/styles/styles.ejs +1607 -0
  111. package/dist/static/ejs/partials/styles/summaryBootstrap.ejs +12458 -0
  112. package/dist/static/ejs/partials/styles/topTenCard.ejs +44 -0
  113. package/dist/static/ejs/partials/styles/wcagCompliance/FailedCriteria.ejs +59 -0
  114. package/dist/static/ejs/partials/styles/wcagCompliance/WcagGaugeBar.ejs +62 -0
  115. package/dist/static/ejs/partials/styles/wcagCompliance.ejs +36 -0
  116. package/dist/static/ejs/partials/styles/wcagCoverageDetails.ejs +33 -0
  117. package/dist/static/ejs/partials/summaryHeader.ejs +70 -0
  118. package/dist/static/ejs/partials/summaryMain.ejs +49 -0
  119. package/dist/static/ejs/report.ejs +226 -0
  120. package/dist/static/ejs/summary.ejs +47 -0
  121. package/dist/types/types.js +1 -0
  122. package/dist/utils.js +1070 -0
  123. package/examples/oobee-cypress-integration-js/cypress/support/e2e.js +36 -6
  124. package/examples/oobee-cypress-integration-js/cypress.config.js +45 -1
  125. package/examples/oobee-cypress-integration-ts/cypress.config.ts +47 -1
  126. package/examples/oobee-cypress-integration-ts/src/cypress/support/e2e.ts +36 -6
  127. package/examples/oobee-playwright-integration-js/oobee-playwright-demo.js +2 -1
  128. package/examples/oobee-playwright-integration-ts/src/oobee-playwright-demo.ts +2 -1
  129. package/package.json +9 -3
  130. package/src/constants/common.ts +2 -2
  131. package/src/constants/constants.ts +3 -1
  132. package/src/crawlers/crawlDomain.ts +1 -0
  133. package/src/crawlers/runCustom.ts +0 -1
  134. package/src/npmIndex.ts +42 -24
@@ -0,0 +1,645 @@
1
+ // Monkey patch Path2D to avoid PDF.js crashing
2
+ globalThis.Path2D = class {
3
+ constructor(_path) { }
4
+ rect(_x, _y, _width, _height) { }
5
+ addPath(_path, _transform) { }
6
+ };
7
+ import _ from 'lodash';
8
+ import { getDocument } from 'pdfjs-dist';
9
+ import fs from 'fs';
10
+ import { createCanvas } from '@napi-rs/canvas';
11
+ import assert from 'assert';
12
+ import path from 'path';
13
+ import { fileURLToPath } from 'url';
14
+ import { consoleLogger } from '../logs.js';
15
+ const filename = fileURLToPath(import.meta.url);
16
+ const dirname = path.dirname(filename);
17
+ // CONSTANTS
18
+ const BBOX_PADDING = 50;
19
+ // Use safe canvas to avoid Path2D issues
20
+ function createSafeCanvas(width, height) {
21
+ const canvas = createCanvas(width, height);
22
+ const ctx = canvas.getContext('2d');
23
+ // Patch clip/stroke/fill/etc. to skip if Path2D is passed
24
+ const wrapIgnorePath2D = (fn) => function (...args) {
25
+ if (args.length > 0 && args[0] instanceof globalThis.Path2D) {
26
+ // Skip the operation
27
+ return;
28
+ }
29
+ return fn.apply(this, args);
30
+ };
31
+ ctx.clip = wrapIgnorePath2D(ctx.clip);
32
+ ctx.fill = wrapIgnorePath2D(ctx.fill);
33
+ ctx.stroke = wrapIgnorePath2D(ctx.stroke);
34
+ ctx.isPointInPath = wrapIgnorePath2D(ctx.isPointInPath);
35
+ ctx.isPointInStroke = wrapIgnorePath2D(ctx.isPointInStroke);
36
+ return canvas;
37
+ }
38
+ // CanvasFactory for Node.js
39
+ function NodeCanvasFactory() { }
40
+ NodeCanvasFactory.prototype = {
41
+ create: function NodeCanvasFactory_create(width, height) {
42
+ assert(width > 0 && height > 0, 'Invalid canvas size');
43
+ const canvas = createSafeCanvas(width, height);
44
+ const context = canvas.getContext('2d');
45
+ return {
46
+ canvas,
47
+ context,
48
+ };
49
+ },
50
+ reset: function NodeCanvasFactory_reset(canvasAndContext, width, height) {
51
+ assert(canvasAndContext.canvas, 'Canvas is not specified');
52
+ assert(width > 0 && height > 0, 'Invalid canvas size');
53
+ canvasAndContext.canvas.width = width;
54
+ canvasAndContext.canvas.height = height;
55
+ },
56
+ destroy: function NodeCanvasFactory_destroy(canvasAndContext) {
57
+ assert(canvasAndContext.canvas, 'Canvas is not specified');
58
+ canvasAndContext.canvas = null;
59
+ canvasAndContext.context = null;
60
+ },
61
+ };
62
+ const canvasFactory = new NodeCanvasFactory();
63
+ export async function getPdfScreenshots(pdfFilePath, items, screenshotPath) {
64
+ const newItems = _.cloneDeep(items);
65
+ const loadingTask = getDocument({
66
+ url: pdfFilePath,
67
+ canvasFactory,
68
+ standardFontDataUrl: path.join(dirname, '../node_modules/pdfjs-dist/standard_fonts/'),
69
+ disableFontFace: true,
70
+ verbosity: 0,
71
+ });
72
+ const pdf = await loadingTask.promise;
73
+ const structureTree = await pdf._pdfInfo.structureTree;
74
+ // save some resources by caching page canvases to be reused by diff violations
75
+ const pageCanvasCache = {};
76
+ // iterate through each violation
77
+ for (let i = 0; i < newItems.length; i++) {
78
+ const { context } = newItems[i];
79
+ const bbox = { location: context };
80
+ const bboxMap = buildBboxMap([bbox], structureTree);
81
+ for (const [pageNum, bboxList] of Object.entries(bboxMap)) {
82
+ const page = await pdf.getPage(parseInt(pageNum, 10));
83
+ // an array of length 1, containing location of current violation
84
+ const bboxesWithCoords = await Promise.all([
85
+ page.getOperatorList(),
86
+ page.getAnnotations(),
87
+ ]).then(getBboxesList(bboxList, page));
88
+ // Render the page on a Node canvas with 200% scale.
89
+ const viewport = page.getViewport({ scale: 2.0 });
90
+ const canvasAndContext = pageCanvasCache[pageNum] ?? canvasFactory.create(viewport.width, viewport.height);
91
+ if (!pageCanvasCache[pageNum]) {
92
+ pageCanvasCache[pageNum] = canvasAndContext;
93
+ }
94
+ const { canvas: origCanvas, context: origCtx } = canvasAndContext;
95
+ const renderContext = {
96
+ canvasContext: origCtx,
97
+ viewport,
98
+ canvasFactory,
99
+ };
100
+ const renderTask = page.render(renderContext); // render pdf page onto a canvas
101
+ await renderTask.promise;
102
+ const finalScreenshotPath = annotateAndSave(origCanvas, screenshotPath, viewport)(bboxesWithCoords[0]);
103
+ newItems[i].screenshotPath = path.join('elemScreenshots', 'pdf', finalScreenshotPath);
104
+ newItems[i].page = parseInt(pageNum, 10);
105
+ page.cleanup();
106
+ }
107
+ }
108
+ return newItems;
109
+ }
110
+ const annotateAndSave = (origCanvas, screenshotPath, viewport) => {
111
+ return ({ location }) => {
112
+ const [left, bottom, width, height] = location.map(loc => loc * 2); // scale up by 2
113
+ const rectParams = [left, viewport.height - bottom - height, width, height];
114
+ // create new canvas to annotate so we do not "pollute" the original
115
+ const { context: highlightCtx, canvas: highlightCanvas } = canvasFactory.create(viewport.width, viewport.height);
116
+ highlightCtx.drawImage(origCanvas, 0, 0);
117
+ highlightCtx.fillStyle = 'rgba(0, 255, 255, 0.2)';
118
+ highlightCtx.fillRect(...rectParams);
119
+ const rectParamsWithPadding = [
120
+ left - BBOX_PADDING,
121
+ viewport.height - bottom - height - BBOX_PADDING,
122
+ width + BBOX_PADDING * 2,
123
+ height + BBOX_PADDING * 2,
124
+ ];
125
+ // create new canvas to crop image
126
+ const { context: croppedCtx, canvas: croppedCanvas } = canvasFactory.create(rectParamsWithPadding[2], rectParamsWithPadding[3]);
127
+ croppedCtx.drawImage(highlightCanvas, ...rectParamsWithPadding, 0, 0, rectParamsWithPadding[2], rectParamsWithPadding[3]);
128
+ // convert the canvas to an image
129
+ // const croppedImage = croppedCanvas.toBuffer();
130
+ const croppedImage = croppedCanvas.toBuffer('image/png');
131
+ // save image
132
+ let counter = 0;
133
+ let indexedScreenshotPath = `${screenshotPath}-${counter}.png`;
134
+ let fileExists = fs.existsSync(indexedScreenshotPath);
135
+ while (fileExists) {
136
+ counter++;
137
+ indexedScreenshotPath = `${screenshotPath}-${counter}.png`;
138
+ fileExists = fs.existsSync(indexedScreenshotPath);
139
+ }
140
+ try {
141
+ fs.writeFileSync(indexedScreenshotPath, croppedImage);
142
+ }
143
+ catch (e) {
144
+ consoleLogger.error('Error in writing screenshot:', e);
145
+ }
146
+ canvasFactory.destroy({ canvas: croppedCanvas, context: croppedCtx });
147
+ canvasFactory.destroy({ canvas: highlightCanvas, context: highlightCtx });
148
+ return path.basename(indexedScreenshotPath);
149
+ };
150
+ };
151
+ export const rotateViewport = (rotateAngle, viewport) => {
152
+ if ([0, 180].includes(rotateAngle)) {
153
+ return viewport;
154
+ }
155
+ return [viewport[1], viewport[0], viewport[3], viewport[2]];
156
+ };
157
+ export const rotatePoint = (rotateAngle, point, viewport) => {
158
+ const rad = (rotateAngle * Math.PI) / 180;
159
+ let x = point[0] * Math.cos(rad) + point[1] * Math.sin(rad);
160
+ let y = -point[0] * Math.sin(rad) + point[1] * Math.cos(rad);
161
+ switch (rotateAngle) {
162
+ case 90:
163
+ y += viewport[2] + viewport[0];
164
+ break;
165
+ case 180:
166
+ x += viewport[2] + viewport[0];
167
+ y += viewport[3] + viewport[1];
168
+ break;
169
+ case 270:
170
+ x += viewport[3] + viewport[1];
171
+ break;
172
+ default:
173
+ break;
174
+ }
175
+ return [x, y];
176
+ };
177
+ export const rotateCoordinates = (coords, rotateAngle, viewport) => {
178
+ if (rotateAngle === 0)
179
+ return coords;
180
+ const [x1, y1] = rotatePoint(rotateAngle, [coords[0], coords[1]], viewport);
181
+ const [x2, y2] = rotatePoint(rotateAngle, [coords[0] + coords[2], coords[1] + coords[3]], viewport);
182
+ return [Math.min(x1, x2), Math.min(y1, y2), Math.abs(x1 - x2), Math.abs(y1 - y2)];
183
+ };
184
+ function concatBoundingBoxes(newBoundingBox, oldBoundingBox) {
185
+ if (_.isNil(oldBoundingBox) && _.isNil(newBoundingBox)) {
186
+ return {};
187
+ }
188
+ if (_.isNil(newBoundingBox)) {
189
+ return oldBoundingBox || {};
190
+ }
191
+ if (_.isNil(oldBoundingBox)) {
192
+ return _.cloneDeep(newBoundingBox);
193
+ }
194
+ return {
195
+ x: Math.min(newBoundingBox.x, oldBoundingBox.x),
196
+ y: Math.min(newBoundingBox.y, oldBoundingBox.y),
197
+ width: Math.max(newBoundingBox.x + newBoundingBox.width, oldBoundingBox.x + oldBoundingBox.width) -
198
+ Math.min(newBoundingBox.x, oldBoundingBox.x),
199
+ height: Math.max(newBoundingBox.y + newBoundingBox.height, oldBoundingBox.y + oldBoundingBox.height) -
200
+ Math.min(newBoundingBox.y, oldBoundingBox.y),
201
+ };
202
+ }
203
+ export const parseMcidToBbox = (listOfMcid, pageMap, annotations, viewport, rotateAngle) => {
204
+ let coords = { x: undefined, y: undefined, width: undefined, height: undefined };
205
+ if (listOfMcid instanceof Array) {
206
+ listOfMcid.forEach(mcid => {
207
+ const currentBbox = pageMap[mcid];
208
+ if (!_.isNil(currentBbox) &&
209
+ !_.isNaN(currentBbox.x) &&
210
+ !_.isNaN(currentBbox.y) &&
211
+ !_.isNaN(currentBbox.width) &&
212
+ !_.isNaN(currentBbox.height)) {
213
+ coords = concatBoundingBoxes(currentBbox, coords.x ? coords : undefined);
214
+ }
215
+ });
216
+ }
217
+ else if (Object.prototype.hasOwnProperty.call(listOfMcid, 'annot')) {
218
+ const rect = annotations[listOfMcid.annot]?.rect;
219
+ if (rect) {
220
+ coords = {
221
+ x: rect[0],
222
+ y: rect[1],
223
+ width: Math.abs(rect[0] - rect[2]),
224
+ height: Math.abs(rect[1] - rect[3]),
225
+ };
226
+ }
227
+ }
228
+ if (!coords)
229
+ return [];
230
+ const coordsArray = rotateCoordinates([coords.x, coords.y, coords.width, coords.height], rotateAngle, viewport);
231
+ const rotatedViewport = rotateViewport(rotateAngle, viewport);
232
+ return [
233
+ coordsArray[0] - rotatedViewport[0],
234
+ coordsArray[1] - rotatedViewport[1],
235
+ coordsArray[2],
236
+ coordsArray[3],
237
+ ];
238
+ };
239
+ export const getBboxForGlyph = (operatorIndex, glyphIndex, operationsList, viewport, rotateAngle) => {
240
+ const bbox = operationsList[operatorIndex] ? operationsList[operatorIndex][glyphIndex] : null;
241
+ if (!bbox) {
242
+ return [];
243
+ }
244
+ const coordsArray = rotateCoordinates(bbox, rotateAngle, viewport);
245
+ const rotatedViewport = rotateViewport(rotateAngle, viewport);
246
+ return [
247
+ coordsArray[0] - rotatedViewport[0],
248
+ coordsArray[1] - rotatedViewport[1],
249
+ coordsArray[2],
250
+ coordsArray[3],
251
+ ];
252
+ };
253
+ // Below are methods adapted from
254
+ // https://github.com/veraPDF/verapdf-js-viewer/blob/master/src/services/bboxService.ts
255
+ // to determine the bounding box data of the violations from the context field
256
+ export const getBboxesList = (bboxList, page) => {
257
+ return ([operatorList, annotations]) => {
258
+ const operationData = operatorList.argsArray[operatorList.argsArray.length - 2];
259
+ const [positionData, noMCIDData] = operatorList.argsArray[operatorList.argsArray.length - 1];
260
+ const bboxes = bboxList.map(bbox => {
261
+ if (bbox.mcidList) {
262
+ bbox.location = parseMcidToBbox(bbox.mcidList, positionData, annotations, page.view, page.rotate);
263
+ }
264
+ else if (bbox.contentItemPath) {
265
+ const contentItemsPath = bbox.contentItemPath.slice(2);
266
+ let contentItemsBBoxes = noMCIDData[bbox.contentItemPath[1]];
267
+ try {
268
+ contentItemsPath.forEach((ci, i) => {
269
+ if (contentItemsPath.length > i + 1 || !contentItemsBBoxes.final) {
270
+ contentItemsBBoxes = contentItemsBBoxes.contentItems[0];
271
+ }
272
+ contentItemsBBoxes = contentItemsBBoxes.contentItems[ci];
273
+ });
274
+ bbox.location = [
275
+ contentItemsBBoxes.contentItem.x,
276
+ contentItemsBBoxes.contentItem.y,
277
+ contentItemsBBoxes.contentItem.w,
278
+ contentItemsBBoxes.contentItem.h,
279
+ ];
280
+ }
281
+ catch (err) {
282
+ console.log('NoMCIDDataParseError:', err.message || err);
283
+ bbox.location = [0, 0, 0, 0];
284
+ }
285
+ }
286
+ if (_.isNumber(bbox.operatorIndex) && _.isNumber(bbox.glyphIndex)) {
287
+ bbox.location = getBboxForGlyph(bbox.operatorIndex, bbox.glyphIndex, operationData, page.view, page.rotate);
288
+ }
289
+ return bbox;
290
+ });
291
+ return bboxes;
292
+ };
293
+ };
294
+ /*
295
+ * Going through object of tags from error placement and return array of its MCIDs
296
+ *
297
+ * @param {Object} of tags
298
+ *
299
+ * @return [[{Array}, {Number}]] - [[[array of mcids], page of error]]
300
+ */
301
+ function findAllMcid(tagObject) {
302
+ const mcidMap = {};
303
+ function func(obj) {
304
+ if (!obj)
305
+ return;
306
+ if (obj.mcid || obj.mcid === 0) {
307
+ if (!mcidMap[obj.pageIndex])
308
+ mcidMap[obj.pageIndex] = [];
309
+ mcidMap[obj.pageIndex].push(obj.mcid);
310
+ }
311
+ if (!obj.children) {
312
+ return;
313
+ }
314
+ if (!(obj.children instanceof Array)) {
315
+ func(obj.children);
316
+ }
317
+ else {
318
+ [...obj.children].forEach(child => func(child));
319
+ }
320
+ }
321
+ func(tagObject);
322
+ return _.map(mcidMap, (value, key) => [value, _.toNumber(key)]);
323
+ }
324
+ const convertContextToPath = (errorContext = '') => {
325
+ let arrayOfNodes = [];
326
+ if (!errorContext) {
327
+ return arrayOfNodes;
328
+ }
329
+ const contextString = errorContext;
330
+ try {
331
+ if (contextString.includes('contentItem') && !contextString.includes('mcid')) {
332
+ const result = contextString.match(/pages\[(?<pages>\d+)\](\(.+\))?\/contentStream\[(?<contentStream>\d+)\](\(.+\))?\/content\[(?<content>\d+)\](?<contentItems>((\(.+\))?\/contentItem\[(\d+)\])+)/);
333
+ if (result) {
334
+ try {
335
+ let path;
336
+ path.pageIndex = parseInt(result.groups.pages, 10);
337
+ path.contentStream = parseInt(result.groups.contentStream, 10);
338
+ path.content = parseInt(result.groups.content, 10);
339
+ path.contentItems = result.groups.contentItems
340
+ .split('/')
341
+ .filter(ci => ci.includes('contentItem'))
342
+ .map(ci => {
343
+ const contentItemIndex = ci.match(/\[(?<contentItem>\d+)\]/);
344
+ return parseInt(contentItemIndex?.groups?.contentItem || '-1', 10);
345
+ });
346
+ return path;
347
+ }
348
+ catch (err) {
349
+ console.log('NoMCIDContentItemPathParseError:', err.message || err);
350
+ }
351
+ }
352
+ }
353
+ if (contextString.includes('contentItem')) {
354
+ let path;
355
+ contextString.split('/').forEach(nodeString => {
356
+ if (nodeString.includes('page')) {
357
+ path.pageIndex = parseInt(nodeString.split(/[[\]]/)[1], 10);
358
+ }
359
+ else if (nodeString.includes('contentItem') && nodeString.includes('mcid')) {
360
+ path.mcid = parseInt(nodeString.split('mcid:')[1].slice(0, -1), 10);
361
+ }
362
+ });
363
+ return path;
364
+ }
365
+ if (contextString.includes('annots')) {
366
+ let path;
367
+ contextString.split('/').forEach(nodeString => {
368
+ if (nodeString.includes('page')) {
369
+ path.pageIndex = parseInt(nodeString.split(/[[\]]/)[1], 10);
370
+ }
371
+ else if (nodeString.includes('annots')) {
372
+ path.annot = parseInt(nodeString.split(/[[\]]/)[1], 10);
373
+ }
374
+ });
375
+ return path;
376
+ }
377
+ const contextStringArray = contextString.split('PDStructTreeRoot)/')[1].split('/'); // cut path before start of Document
378
+ contextStringArray.forEach(nodeString => {
379
+ const nextIndex = parseInt(nodeString.split('](')[0].split('K[')[1], 10);
380
+ let nextTag = nodeString.split('(')[1].split(')')[0].split(' ');
381
+ nextTag = nextTag[nextTag.length - 1];
382
+ arrayOfNodes = [...arrayOfNodes, [nextIndex, nextTag]];
383
+ });
384
+ return arrayOfNodes;
385
+ }
386
+ catch {
387
+ return [];
388
+ }
389
+ };
390
+ const getTagsFromErrorPlace = (context, structure) => {
391
+ const defaultValue = [[[], -1, undefined]];
392
+ const selectedTag = convertContextToPath(context);
393
+ if (_.isEmpty(selectedTag)) {
394
+ return defaultValue;
395
+ }
396
+ // Type guard function
397
+ function isPathObject(value) {
398
+ return (value !== null &&
399
+ typeof value === 'object' &&
400
+ (Object.prototype.hasOwnProperty.call(value, 'mcid') ||
401
+ Object.prototype.hasOwnProperty.call(value, 'pageIndex') ||
402
+ Object.prototype.hasOwnProperty.call(value, 'annot') ||
403
+ Object.prototype.hasOwnProperty.call(value, 'contentItems')));
404
+ }
405
+ if (isPathObject(selectedTag)) {
406
+ if (Object.prototype.hasOwnProperty.call(selectedTag, 'mcid') &&
407
+ Object.prototype.hasOwnProperty.call(selectedTag, 'pageIndex')) {
408
+ return [[[selectedTag.mcid], selectedTag.pageIndex]];
409
+ }
410
+ if (Object.prototype.hasOwnProperty.call(selectedTag, 'annot') &&
411
+ Object.prototype.hasOwnProperty.call(selectedTag, 'pageIndex')) {
412
+ return [[{ annot: selectedTag.annot }, selectedTag.pageIndex]];
413
+ }
414
+ if (Object.prototype.hasOwnProperty.call(selectedTag, 'contentItems')) {
415
+ return [
416
+ [
417
+ undefined,
418
+ selectedTag.pageIndex,
419
+ [selectedTag.contentStream, selectedTag.content, ...selectedTag.contentItems],
420
+ ],
421
+ ];
422
+ }
423
+ }
424
+ else if (selectedTag instanceof Array) {
425
+ let objectOfErrors = { ...structure };
426
+ selectedTag.forEach((node, index) => {
427
+ let nextStepObject;
428
+ if (!objectOfErrors.children) {
429
+ nextStepObject = objectOfErrors[node[0]];
430
+ }
431
+ else if (!(objectOfErrors.children instanceof Array)) {
432
+ if (objectOfErrors.children.name === node[1]) {
433
+ nextStepObject = objectOfErrors.children;
434
+ }
435
+ else {
436
+ nextStepObject = objectOfErrors;
437
+ }
438
+ }
439
+ else if (objectOfErrors?.name === node[1] && index === 0) {
440
+ nextStepObject = objectOfErrors;
441
+ }
442
+ else {
443
+ const clearedChildrenArray = [...objectOfErrors.children].filter(tag => !tag?.mcid);
444
+ nextStepObject = {
445
+ ...(clearedChildrenArray.length ? clearedChildrenArray : objectOfErrors.children)[node[0]],
446
+ };
447
+ }
448
+ objectOfErrors = { ...nextStepObject };
449
+ });
450
+ return findAllMcid(objectOfErrors);
451
+ }
452
+ return defaultValue;
453
+ };
454
+ const calculateLocation = location => {
455
+ const bboxes = [];
456
+ const [pages, boundingBox] = location.split('/');
457
+ const [start, end] = pages.replace('pages[', '').replace(']', '').split('-');
458
+ const [x, y, x1, y1] = boundingBox.replace('boundingBox[', '').replace(']', '').split(',');
459
+ const width = parseFloat(x1) - parseFloat(x);
460
+ if (end) {
461
+ for (let i = parseInt(start) + 1; i <= parseInt(end) + 1; i++) {
462
+ switch (i) {
463
+ case parseInt(start) + 1:
464
+ bboxes.push({
465
+ page: i,
466
+ location: [parseFloat(x), parseFloat(y1), width, 'bottom'],
467
+ });
468
+ break;
469
+ case parseInt(end) + 1:
470
+ bboxes.push({
471
+ page: i,
472
+ location: [parseFloat(x), parseFloat(y), width, 'top'],
473
+ });
474
+ break;
475
+ default:
476
+ bboxes.push({
477
+ page: i,
478
+ location: [parseFloat(x), 0, width, 'top'],
479
+ });
480
+ break;
481
+ }
482
+ }
483
+ }
484
+ else {
485
+ const height = parseFloat(y1) - parseFloat(y);
486
+ bboxes.push({
487
+ page: parseInt(start) + 1,
488
+ location: [parseFloat(x), parseFloat(y), width, height],
489
+ });
490
+ }
491
+ return bboxes;
492
+ };
493
+ const calculateLocationJSON = location => {
494
+ const bboxes = [];
495
+ const bboxMap = JSON.parse(location);
496
+ bboxMap.bbox.forEach(({ p, rect }) => {
497
+ const [x, y, x1, y1] = rect;
498
+ const width = parseFloat(x1) - parseFloat(x);
499
+ const height = parseFloat(y1) - parseFloat(y);
500
+ bboxes.push({
501
+ page: parseFloat(p) + 1,
502
+ location: [parseFloat(x), parseFloat(y), width, height],
503
+ });
504
+ });
505
+ return bboxes;
506
+ };
507
+ export const calculateLocationInStreamOperator = location => {
508
+ const path = location.split('/');
509
+ let pageIndex = -1;
510
+ let operatorIndex = -1;
511
+ let glyphIndex = -1;
512
+ path.forEach(step => {
513
+ if (step.startsWith('pages')) {
514
+ pageIndex = parseInt(step.split(/[\[\]]/)[1]);
515
+ }
516
+ if (step.startsWith('operators')) {
517
+ operatorIndex = parseInt(step.split(/[\[\]]/)[1]);
518
+ }
519
+ if (step.startsWith('usedGlyphs')) {
520
+ glyphIndex = parseInt(step.split(/[\[\]]/)[1]);
521
+ }
522
+ });
523
+ if (pageIndex === -1 || operatorIndex === -1 || glyphIndex === -1) {
524
+ return null;
525
+ }
526
+ return {
527
+ pageIndex,
528
+ operatorIndex,
529
+ glyphIndex,
530
+ };
531
+ };
532
+ export const buildBboxMap = (bboxes, structure) => {
533
+ const bboxMap = {};
534
+ bboxes.forEach((bbox, index) => {
535
+ try {
536
+ if (bbox.location.includes('contentStream') && bbox.location.includes('operators')) {
537
+ const bboxPosition = calculateLocationInStreamOperator(bbox.location);
538
+ if (!bboxPosition) {
539
+ return;
540
+ }
541
+ bboxMap[bboxPosition.pageIndex + 1] = [
542
+ ...(bboxMap[bboxPosition.pageIndex + 1] || []),
543
+ {
544
+ index,
545
+ operatorIndex: bboxPosition.operatorIndex,
546
+ glyphIndex: bboxPosition.glyphIndex,
547
+ bboxTitle: bbox.bboxTitle,
548
+ },
549
+ ];
550
+ }
551
+ else if (bbox.location.includes('StructTreeRoot') ||
552
+ bbox.location.includes('root/doc') ||
553
+ bbox.location === 'root') {
554
+ const mcidData = getTagsFromErrorPlace(bbox.location, structure);
555
+ mcidData.forEach(([mcidList, pageIndex, contentItemPath]) => {
556
+ bboxMap[pageIndex + 1] = [
557
+ ...(bboxMap[pageIndex + 1] || []),
558
+ {
559
+ index,
560
+ mcidList,
561
+ contentItemPath,
562
+ groupId: bbox.groupId || undefined,
563
+ bboxTitle: bbox.bboxTitle,
564
+ },
565
+ ];
566
+ });
567
+ }
568
+ else {
569
+ const bboxesFromLocation = bbox.location.includes('pages[')
570
+ ? calculateLocation(bbox.location)
571
+ : calculateLocationJSON(bbox.location);
572
+ bboxesFromLocation.forEach(bboxWithLocation => {
573
+ bboxMap[bboxWithLocation.page] = [
574
+ ...(bboxMap[bboxWithLocation.page] || []),
575
+ {
576
+ index,
577
+ location: bboxWithLocation.location,
578
+ groupId: bbox.groupId || undefined,
579
+ bboxTitle: bbox.bboxTitle,
580
+ },
581
+ ];
582
+ });
583
+ }
584
+ }
585
+ catch {
586
+ console.error(`Location not supported: ${bbox.location}`);
587
+ }
588
+ });
589
+ return bboxMap;
590
+ };
591
+ export const getSelectedPageByLocation = bboxLocation => {
592
+ const location = bboxLocation;
593
+ const path = location.split('/');
594
+ let pageNumber = -1;
595
+ if (location?.includes('pages') && path[path.length - 1].startsWith('pages')) {
596
+ location.split('/').forEach(nodeString => {
597
+ if (nodeString.includes('pages')) {
598
+ pageNumber = parseInt(nodeString.split(/[[\]]/)[1], 10) + 1;
599
+ }
600
+ });
601
+ }
602
+ return pageNumber;
603
+ };
604
+ export const getBboxPage = (bbox, structure) => {
605
+ try {
606
+ if (bbox.location.includes('StructTreeRoot') ||
607
+ bbox.location.includes('root/doc') ||
608
+ bbox.location === 'root') {
609
+ const mcidData = getTagsFromErrorPlace(bbox.location, structure);
610
+ const pageIndex = mcidData[0][1];
611
+ return pageIndex + 1;
612
+ }
613
+ const bboxesFromLocation = bbox.location.includes('pages[')
614
+ ? calculateLocation(bbox.location)
615
+ : calculateLocationJSON(bbox.location);
616
+ return bboxesFromLocation.length ? bboxesFromLocation[0].page : 0;
617
+ }
618
+ catch (e) {
619
+ console.error(e);
620
+ console.error(`Location not supported: ${bbox.location}`);
621
+ return -1;
622
+ }
623
+ };
624
+ export const getPageFromContext = async (context, pdfFilePath) => {
625
+ try {
626
+ const loadingTask = getDocument({
627
+ url: pdfFilePath,
628
+ standardFontDataUrl: path.join(dirname, '../../node_modules/pdfjs-dist/standard_fonts/'),
629
+ disableFontFace: true,
630
+ verbosity: 0,
631
+ });
632
+ const pdf = await loadingTask.promise;
633
+ const structureTree = await pdf._pdfInfo.structureTree;
634
+ const page = getBboxPage({ location: context }, structureTree);
635
+ return page;
636
+ }
637
+ catch {
638
+ // Error handling
639
+ }
640
+ };
641
+ export const getBboxPages = (bboxes, structure) => {
642
+ return bboxes.map(bbox => {
643
+ getBboxPage(bbox, structure);
644
+ });
645
+ };