hdoc-tools 0.11.6 → 0.11.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/hdoc-validate.js CHANGED
@@ -1,549 +1,549 @@
1
- (function () {
2
- 'use strict';
3
-
4
- const axios = require('axios'),
5
- cheerio = require('cheerio'),
6
- dree = require('dree'),
7
- fs = require('fs'),
8
- path = require('path'),
9
- https = require('https'),
10
- hdoc = require(path.join(__dirname, 'hdoc-module.js')),
11
- translator = require('american-british-english-translator'),
12
- { trueCasePathSync } = require('true-case-path');
13
-
14
- const spellcheck_options = {
15
- british: true,
16
- spelling: true
17
- },
18
- regex_nav_paths = /[a-z0-9-\/]+[a-z0-9]+#{0,1}[a-z0-9-\/]+/,
19
- agent = new https.Agent({
20
- rejectUnauthorized: false
21
- });
22
-
23
- let errors = {},
24
- messages = {},
25
- warnings = {},
26
- html_to_validate = [],
27
- md_to_validate = [],
28
- exclude_links = {},
29
- exclude_spellcheck = {},
30
- exclude_h1_count = {},
31
- exclude_spellcheck_output = [];
32
-
33
- const excludeLink = async function (url) {
34
- if (exclude_links[url]) return true;
35
-
36
- for (let key in exclude_links) {
37
- if (key.endsWith('*')) {
38
- key = key.substring(0, key.length - 1);
39
- if (url.startsWith(key)) return true;
40
- }
41
- }
42
-
43
- return false;
44
- };
45
-
46
- const spellcheckContent = async function (sourceFile, excludes) {
47
- let spelling_errors = {};
48
- let words = [];
49
- const text = fs.readFileSync(sourceFile.path, 'utf8');
50
- const source_path = sourceFile.relativePath.replace('.' + sourceFile.extension, '');
51
- const translate_output = translator.translate(text, spellcheck_options);
52
- if (Object.keys(translate_output).length) {
53
- for (const key in translate_output) {
54
- if (translate_output.hasOwnProperty(key)) {
55
- let error_message = `Line ${key} - British spelling:`;
56
- for (let i = 0; i < translate_output[key].length; i++) {
57
- for (const spelling in translate_output[key][i]) {
58
- if (translate_output[key][i].hasOwnProperty(spelling) && (typeof translate_output[key][i][spelling].details === 'string')) {
59
- if (!excludes[source_path]) {
60
- errors[sourceFile.relativePath].push(`${error_message} ${spelling} should be ${translate_output[key][i][spelling].details}`);
61
- spelling_errors[spelling] = true;
62
- } else if (!excludes[source_path].includes(spelling.toLowerCase())) {
63
- errors[sourceFile.relativePath].push(`${error_message} ${spelling} should be ${translate_output[key][i][spelling].details}`);
64
- spelling_errors[spelling] = true;
65
- }
66
- }
67
- }
68
- }
69
- }
70
- }
71
- }
72
- if (Object.keys(spelling_errors).length) {
73
- let exclude_output = {
74
- document_path: sourceFile.relativePath.replace(path.extname(sourceFile.relativePath), ''),
75
- words: []
76
- };
77
- for (const word in spelling_errors) {
78
- if (spelling_errors.hasOwnProperty(word)) {
79
- words.push(word);
80
- exclude_output.words.push(word);
81
- }
82
- }
83
- exclude_spellcheck_output.push(exclude_output);
84
- }
85
- return words;
86
- };
87
-
88
- const checkNavigation = async function (source_path, flat_nav, excludes) {
89
- let nav_errors = [];
90
- for (let key in flat_nav) {
91
- if (flat_nav.hasOwnProperty(key)) {
92
- // doc paths should only contain a-z - characters
93
- const invalid_chars = key.replace(regex_nav_paths, '');
94
- if (invalid_chars !== '') {
95
- nav_errors.push(`Navigation path [${key}] contains the following invalid characters: [${[...invalid_chars].join('] [')}]`);
96
- }
97
- const key_split = key.split('#');
98
- const key_no_hash = key_split[0];
99
-
100
- // Validate path exists - key should be a html file at this point
101
- let file_exists = true;
102
- let file_name = path.join(source_path, key_no_hash + '.html');
103
- if (!fs.existsSync(file_name)) {
104
- file_name = path.join(source_path, key_no_hash + '.htm');
105
- if (!fs.existsSync(file_name)) {
106
- file_name = path.join(source_path, key_no_hash, 'index.html');
107
- if (!fs.existsSync(file_name)) {
108
- file_name = path.join(source_path, key_no_hash, 'index.htm');
109
- if (!fs.existsSync(file_name)) {
110
- file_exists = false;
111
- nav_errors.push(`Navigation path [${key_no_hash}] file does not exist.`);
112
- }
113
- }
114
- }
115
- }
116
-
117
- if (file_exists) {
118
- const true_file = trueCasePathSync(file_name).replace(source_path, '').replaceAll('\\', '/');
119
- const relative_file = file_name.replace(source_path, '').replaceAll('\\', '/');
120
- if (true_file !== relative_file) {
121
- nav_errors.push(`Navigation path [${key}] for filename [${relative_file}] does not match filename case [${true_file}].`);
122
- }
123
- }
124
-
125
- // Validate path spellings
126
- const paths = key.split('/');
127
- for (let i = 0; i < paths.length; i++) {
128
- const path_words = paths[i].split('-');
129
- for (let j = 0; j < path_words.length; j++) {
130
- const translate_output = translator.translate(path_words[j], spellcheck_options);
131
- if (Object.keys(translate_output).length) {
132
- for (const spell_val in translate_output) {
133
- if (translate_output.hasOwnProperty(spell_val)) {
134
- for (const spelling in translate_output[spell_val][0]) {
135
- if (translate_output[spell_val][0].hasOwnProperty(spelling)) {
136
- if (!excludes[key]) {
137
- nav_errors.push(`Navigation path [${key}] key contains a British English spelling: ${spelling} should be ${translate_output[spell_val][0][spelling].details}`);
138
- } else if (!excludes[key].includes(spelling.toLowerCase())) {
139
- nav_errors.push(`Navigation path [${key}] key contains a British English spelling: ${spelling} should be ${translate_output[spell_val][0][spelling].details}`);
140
- }
141
- }
142
- }
143
- }
144
- }
145
- }
146
- }
147
- }
148
-
149
- // Validate display names/bookmarks
150
- for (let i = 0; i < flat_nav[key].length; i++) {
151
- if (flat_nav[key][i].link === key) {
152
- const translate_output = translator.translate(flat_nav[key][i].text, spellcheck_options);
153
- if (Object.keys(translate_output).length) {
154
- for (const spell_val in translate_output) {
155
- if (translate_output.hasOwnProperty(spell_val)) {
156
- for (let j = 0; j < translate_output[spell_val].length; j++) {
157
- for (const spelling in translate_output[spell_val][j]) {
158
- if (translate_output[spell_val][j].hasOwnProperty(spelling)) {
159
- if (!excludes[key]) {
160
- nav_errors.push(`Navigation path [${key}] display text contains a British English spelling: ${spelling} should be ${translate_output[spell_val][j][spelling].details}`);
161
- } else if (!excludes[key].includes(spelling.toLowerCase())) {
162
- nav_errors.push(`Navigation path [${key}] display text contains a British English spelling: ${spelling} should be ${translate_output[spell_val][j][spelling].details}`);
163
- }
164
- }
165
- }
166
- }
167
- }
168
- }
169
- }
170
- }
171
- }
172
-
173
- }
174
- }
175
- return nav_errors;
176
- };
177
-
178
- const checkLinks = async function (source_path, htmlFile, links, hdocbook_config) {
179
- for (let i = 0; i < links.length; i++) {
180
-
181
- // Validate that link is a valid URL first
182
- const valid_url = hdoc.valid_url(links[i]);
183
- if (!valid_url) {
184
- // Could be a relative path, check
185
- if (links[i].startsWith('/')) {
186
- let link_root = links[i].split('/');
187
- link_root = link_root[0] !== '' ? link_root[0] : link_root[1];
188
- if (link_root !== hdocbook_config.docId) continue;
189
- isRelativePath(source_path, htmlFile, links[i]);
190
- } else if (links[i].startsWith('#')) {
191
- //Anchor - do nothing
192
- } else {
193
- errors[htmlFile.relativePath].push(`Root relative links should start with a forward-slash: ${links[i]}`);
194
- }
195
- } else {
196
- messages[htmlFile.relativePath].push(`Link is a properly formatted external URL: ${links[i]}`);
197
-
198
- // Skip if it's the auto-generated edit url, as these could be part of a private repo which would return a 404
199
- if (hdocbook_config.publicSource !== undefined && links[i] === hdoc.get_github_api_path(hdocbook_config.publicSource, htmlFile.relativePath).edit_path.replace(path.extname(htmlFile.relativePath), '.md')) {
200
- continue;
201
- }
202
-
203
- if (valid_url.protocol === 'mailto:') {
204
- continue;
205
- }
206
-
207
- // Skip if the link is excluded in the project config
208
- if (excludeLink(links[i])) {
209
- messages[htmlFile.relativePath].push(`Skipping link validation for: ${links[i]}`);
210
- continue;
211
- }
212
-
213
- if (links[i].toLowerCase().includes('docs.hornbill.com') || links[i].toLowerCase().includes('docs-internal.hornbill.com')) {
214
- errors[htmlFile.relativePath].push(`Links to Hornbill Docs should rooted and not fully-qualified: ${links[i]}`);
215
- continue;
216
- }
217
-
218
- try {
219
- await axios.get(links[i], {
220
- httpsAgent: agent
221
- });
222
- messages[htmlFile.relativePath].push(`Link is a valid external URL: ${links[i]}`);
223
- } catch (e) {
224
- // Handle errors
225
- errors[htmlFile.relativePath].push(`Link is not responding: ${links[i]} - [${e.message}]`);
226
- }
227
- }
228
- }
229
- };
230
-
231
- const checkImages = async function (source_path, htmlFile, links) {
232
- for (let i = 0; i < links.length; i++) {
233
-
234
- // Validate that image is a valid URL first
235
- if (!hdoc.valid_url(links[i])) {
236
- // Could be a relative path, check image exists
237
- doesFileExist(source_path, htmlFile, links[i]);
238
- } else {
239
- messages[htmlFile.relativePath].push(`Image link is a properly formatted external URL: ${links[i]}`);
240
- // Do a Get to the URL to see if it exists
241
- try {
242
- const res = await axios.get(links[i]);
243
- messages[htmlFile.relativePath].push(`Image link is a valid external URL: ${links[i]}`);
244
- } catch (e) {
245
- // Handle errors
246
- errors[htmlFile.relativePath].push(`Unexpected Error from external image link: ${links[i]} - ${e.message}`);
247
- }
248
- }
249
- }
250
- };
251
-
252
- const checkTags = async function (htmlFile) {
253
- // Check if file is excluded from tag check
254
- const file_no_ext = htmlFile.relativePath.replace(path.extname(htmlFile.relativePath), '');
255
- if (exclude_h1_count[file_no_ext]) return;
256
-
257
- // Check tags
258
- const htmlBody = fs.readFileSync(htmlFile.path, 'utf8');
259
- const $ = cheerio.load(htmlBody);
260
-
261
- const h1_tags = $('h1').map(function () {
262
- return $(this);
263
- }).get();
264
- if (h1_tags.length && h1_tags.length > 1) {
265
- let error_msg = `${h1_tags.length} <h1> tags found in content: `;
266
- for (let i = 0; i < h1_tags.length; i++) {
267
- error_msg += h1_tags[i].text();
268
- if (i < h1_tags.length - 1) error_msg += '; ';
269
- }
270
- errors[htmlFile.relativePath].push(error_msg);
271
- }
272
- };
273
-
274
- const dreeOptions = {
275
- descendants: true,
276
- depth: 10,
277
- extensions: ['htm', 'html', 'md'],
278
- hash: false,
279
- normalize: true,
280
- size: false,
281
- sizeInBytes: false,
282
- stat: false,
283
- symbolicLinks: false
284
- };
285
-
286
- // File scan callback
287
- const fileCallback = function (element) {
288
- if (element.extension.toLowerCase() === 'md') {
289
- md_to_validate.push(element);
290
- } else {
291
- html_to_validate.push(element);
292
- }
293
- };
294
-
295
- const isRelativePath = function (source_path, html_path, relative_path) {
296
- const rel_path_ext = path.extname(relative_path);
297
- let response = {
298
- is_rel_path: false,
299
- has_md_extension: rel_path_ext === '.md'
300
- };
301
-
302
- const supported_relpaths = [
303
- path.sep + 'index.htm',
304
- path.sep + 'index.html',
305
- '.htm',
306
- '.html',
307
- '.md'
308
- ];
309
-
310
- // Remove explicit anchor links and _books prefix
311
- relative_path = relative_path.split('#')[0].replace('_books/', '');
312
-
313
- // Make full file path
314
- const file_path = path.join(source_path, relative_path);
315
-
316
- // Does path exist?
317
- if (fs.existsSync(file_path)) {
318
- response.is_rel_path = true;
319
- } else {
320
- // Path
321
- for (let i = 0; i < supported_relpaths.length; i++) {
322
- if (fs.existsSync(`${file_path}${supported_relpaths[i]}`)) {
323
- response.is_rel_path = true;
324
- break;
325
- }
326
- }
327
- }
328
- if (response.has_md_extension) {
329
- errors[html_path.relativePath].push(`Relative link contains MD extension, but should not: ${relative_path}`);
330
- } else {
331
- if (response.is_rel_path) {
332
- messages[html_path.relativePath].push(`Relative path exists: ${relative_path}`);
333
- } else {
334
- errors[html_path.relativePath].push(`Link path does not exist: ${relative_path}`);
335
- }
336
- }
337
- }
338
-
339
- const doesFileExist = function (source_path, html_path, relative_path) {
340
- // Remove explicit anchor links and _books prefix
341
- relative_path = relative_path.split('#')[0].replace('_books/', '');
342
- const file_path = path.join(source_path, relative_path);
343
- if (!fs.existsSync(file_path) && !fs.existsSync(file_path + path.sep + 'index.htm') && !fs.existsSync(file_path + 'index.html') && !fs.existsSync(file_path + '.htm') && !fs.existsSync(file_path + '.html')) {
344
- errors[html_path.relativePath].push(`Book resource does not exist: ${relative_path}`);
345
- return false;
346
- } else {
347
- messages[html_path.relativePath].push(`Book resource exists: ${relative_path}`);
348
- }
349
- return true;
350
- };
351
-
352
- // Takes a dree element, returns an object with a pair of arrays
353
- const getLinks = function (file) {
354
- messages[file.relativePath].push('Parsing HTML file');
355
- const htmlBody = fs.readFileSync(file.path, 'utf8');
356
- let links = {
357
- href: [],
358
- img: []
359
- };
360
- const $ = cheerio.load(htmlBody);
361
- const hrefs = $('a').map(function (i) {
362
- return $(this).attr('href');
363
- }).get();
364
- const srcs = $('img').map(function (i) {
365
- return $(this).attr('src');
366
- }).get();
367
- links.href.push(...hrefs);
368
- links.img.push(...srcs);
369
- return links;
370
- };
371
-
372
- exports.run = async function (source_path, doc_id, verbose, hdocbook_config, hdocbook_project, nav_items, prod_families, prods_supported, gen_exclude) {
373
- console.log(`Performing Validation and Building SEO Link List...`);
374
-
375
- // Get a list of HTML files in source_path
376
- dree.scan(source_path, dreeOptions, fileCallback);
377
-
378
- // Check product family
379
- let valid_product = false;
380
- let meta_errors = [];
381
- for (let i = 0; i < prod_families.products.length; i++) {
382
- if (prod_families.products[i].id === hdocbook_config.productFamily) {
383
- valid_product = true;
384
- }
385
- }
386
- if (!valid_product) {
387
- let val_prod_error = `Incorrect productFamily: ${hdocbook_config.productFamily}. Supported values:`;
388
- for (let i = 0; i < prods_supported.length; i++) {
389
- val_prod_error += `\n - ${prods_supported[i]}`
390
- }
391
- meta_errors.push(val_prod_error)
392
- }
393
-
394
- if (hdocbook_config.publicSource && hdocbook_config.publicSource !== '') {
395
- // Validate publicSource
396
- if (hdocbook_config.publicSource.toLowerCase() === '--publicsource--') {
397
- meta_errors.push(`Value for publicSource in book metadata is set to its default template value`);
398
- } else {
399
- // Check URL exists
400
- if (!hdocbook_config.publicSource.startsWith('https://github.com') && !hdocbook_config.publicSource.startsWith('https://api.github.com')) {
401
- meta_errors.push(`Value for publicSource in book metadata is not a recognised GitHub URL: ${hdocbook_config.publicSource}`);
402
- }
403
- }
404
- }
405
-
406
- if (!hdocbook_config.audience || !(hdocbook_config.audience instanceof Array) || hdocbook_config.audience.length === 0) {
407
- meta_errors.push(`Property audience of type array in book metadata is mandatory.`);
408
- }
409
- if (hdocbook_project.validation) {
410
- if (hdocbook_project.validation.exclude_links && hdocbook_project.validation.exclude_links instanceof Array) {
411
- hdocbook_project.validation.exclude_links.forEach(function (excl_link) {
412
- exclude_links[excl_link] = true;
413
- });
414
- }
415
- if (hdocbook_project.validation.exclude_spellcheck && hdocbook_project.validation.exclude_spellcheck instanceof Array) {
416
- hdocbook_project.validation.exclude_spellcheck.forEach(function (excl_sc) {
417
- exclude_spellcheck[excl_sc.document_path] = excl_sc.words;
418
- });
419
- }
420
- if (hdocbook_project.validation.exclude_h1_count && hdocbook_project.validation.exclude_h1_count instanceof Array) {
421
- hdocbook_project.validation.exclude_h1_count.forEach(function (excl_h1) {
422
- exclude_h1_count[excl_h1] = true;
423
- });
424
- }
425
- }
426
-
427
- // Check navigation spellings
428
- const nav_errors = await checkNavigation(source_path, nav_items, exclude_spellcheck);
429
- if (nav_errors.length > 0) meta_errors.push(...nav_errors);
430
-
431
- if (meta_errors.length > 0) {
432
- console.log('\r\n-----------------------');
433
- console.log(' Validation Output ');
434
- console.log('-----------------------');
435
- for (let i = 0; i < meta_errors.length; i++) {
436
- console.log(`- ${meta_errors[i]}`);
437
- }
438
- console.log(`\r\n${meta_errors.length} Validation Errors Found`);
439
- return false;
440
- }
441
-
442
-
443
- let excl_output = [];
444
-
445
- // Do spellchecking on markdown files
446
- let md_files_spellchecked = {};
447
- let mdPromiseArray = [];
448
- for (let i = 0; i < md_to_validate.length; i++) {
449
- errors[md_to_validate[i].relativePath] = [];
450
- messages[md_to_validate[i].relativePath] = [];
451
- warnings[md_to_validate[i].relativePath] = [];
452
- mdPromiseArray.push(md_to_validate[i]);
453
- }
454
- await Promise.all(mdPromiseArray.map(async (file) => {
455
- // Initiate maps for errors and verbose messages for markdown file
456
- const exclusions = await spellcheckContent(file, exclude_spellcheck);
457
- if (gen_exclude && exclusions.length > 0) excl_output.push({ document_path: file.relativePath.replace('.' + file.extension, ''), words: exclusions });
458
- md_files_spellchecked[file.relativePath.replace('.' + file.extension, '')] = true;
459
- }));
460
-
461
- // Perform rest of validation against HTML files
462
- let listContent = '';
463
- let htmlPromiseArray = [];
464
- for (let i = 0; i < html_to_validate.length; i++) {
465
- errors[html_to_validate[i].relativePath] = [];
466
- messages[html_to_validate[i].relativePath] = [];
467
- warnings[html_to_validate[i].relativePath] = [];
468
- htmlPromiseArray.push(html_to_validate[i]);
469
- }
470
- await Promise.all(htmlPromiseArray.map(async (file) => {
471
- // Check for British spellings in static HTML content
472
- if (!md_files_spellchecked[file.relativePath.replace('.' + file.extension, '')]) {
473
- const exclusions = await spellcheckContent(file, exclude_spellcheck);
474
- if (gen_exclude && exclusions.length > 0) excl_output.push({ document_path: file.relativePath.replace('.' + file.extension, ''), words: exclusions });
475
- }
476
-
477
- const links = getLinks(file);
478
- if (links.href.length === 0) {
479
- messages[file.relativePath].push('No links found in file');
480
- } else {
481
- await checkLinks(source_path, file, links.href, hdocbook_config);
482
- }
483
- if (links.img.length === 0) {
484
- messages[file.relativePath].push('No images found in file');
485
- } else {
486
- await checkImages(source_path, file, links.img);
487
- }
488
-
489
- // Check for multiple H1 tags
490
- await checkTags(file);
491
-
492
- // Build list content for Google
493
- listContent += `/${file.relativePath.replace(path.extname(file.relativePath), '')}`;
494
- listContent += '\r\n';
495
- }));
496
-
497
- if (gen_exclude) console.log(JSON.stringify(excl_output, null, 2));
498
-
499
- try {
500
- // Write list
501
- const listFile = path.join(source_path, doc_id, 'links.txt');
502
- fs.writeFileSync(listFile, listContent);
503
- console.log(`\r\nLink list text file created successfully: ${listFile}`);
504
- } catch (err) {
505
- console.error(err);
506
- }
507
-
508
- if (verbose) {
509
- console.log('\r\n-------------');
510
- console.log(' Verbose ');
511
- console.log('-------------');
512
- for (const key in messages) {
513
- if (messages.hasOwnProperty(key) && messages[key].length > 0) {
514
- console.log(`\r\nMessage output for ${key}`);
515
- for (let i = 0; i < messages[key].length; i++) {
516
- console.log(` - ${messages[key][i]}`);
517
- }
518
- }
519
- }
520
- }
521
-
522
- console.log('\r\n-----------------------');
523
- console.log(' Validation Output ');
524
- console.log('-----------------------');
525
- if (Object.keys(errors).length > 0) {
526
- let error_count = 0;
527
- for (const key in errors) {
528
- if (errors.hasOwnProperty(key) && errors[key].length > 0) {
529
- console.log(`\r\n${errors[key].length} error(s) in ${key}`);
530
- for (let i = 0; i < errors[key].length; i++) {
531
- console.log(` - ${errors[key][i]}`);
532
- error_count++
533
- }
534
- }
535
- }
536
- if (error_count > 0) {
537
- console.log(`\r\n${error_count} Validation Errors Found`);
538
- if (verbose) {
539
- console.log(`\n`);
540
- console.log(JSON.stringify(exclude_spellcheck_output, null, 2));
541
- }
542
- return false;
543
- }
544
- }
545
-
546
- console.log(`\r\nNo Validation Errors Found!\n`);
547
- return true;
548
- };
1
+ (function () {
2
+ 'use strict';
3
+
4
+ const axios = require('axios'),
5
+ cheerio = require('cheerio'),
6
+ dree = require('dree'),
7
+ fs = require('fs'),
8
+ path = require('path'),
9
+ https = require('https'),
10
+ hdoc = require(path.join(__dirname, 'hdoc-module.js')),
11
+ translator = require('american-british-english-translator'),
12
+ { trueCasePathSync } = require('true-case-path');
13
+
14
+ const spellcheck_options = {
15
+ british: true,
16
+ spelling: true
17
+ },
18
+ regex_nav_paths = /[a-z0-9-\/]+[a-z0-9]+#{0,1}[a-z0-9-\/]+/,
19
+ agent = new https.Agent({
20
+ rejectUnauthorized: false
21
+ });
22
+
23
+ let errors = {},
24
+ messages = {},
25
+ warnings = {},
26
+ html_to_validate = [],
27
+ md_to_validate = [],
28
+ exclude_links = {},
29
+ exclude_spellcheck = {},
30
+ exclude_h1_count = {},
31
+ exclude_spellcheck_output = [];
32
+
33
+ const excludeLink = async function (url) {
34
+ if (exclude_links[url]) return true;
35
+
36
+ for (let key in exclude_links) {
37
+ if (key.endsWith('*')) {
38
+ key = key.substring(0, key.length - 1);
39
+ if (url.startsWith(key)) return true;
40
+ }
41
+ }
42
+
43
+ return false;
44
+ };
45
+
46
+ const spellcheckContent = async function (sourceFile, excludes) {
47
+ let spelling_errors = {};
48
+ let words = [];
49
+ const text = fs.readFileSync(sourceFile.path, 'utf8');
50
+ const source_path = sourceFile.relativePath.replace('.' + sourceFile.extension, '');
51
+ const translate_output = translator.translate(text, spellcheck_options);
52
+ if (Object.keys(translate_output).length) {
53
+ for (const key in translate_output) {
54
+ if (translate_output.hasOwnProperty(key)) {
55
+ let error_message = `Line ${key} - British spelling:`;
56
+ for (let i = 0; i < translate_output[key].length; i++) {
57
+ for (const spelling in translate_output[key][i]) {
58
+ if (translate_output[key][i].hasOwnProperty(spelling) && (typeof translate_output[key][i][spelling].details === 'string')) {
59
+ if (!excludes[source_path]) {
60
+ errors[sourceFile.relativePath].push(`${error_message} ${spelling} should be ${translate_output[key][i][spelling].details}`);
61
+ spelling_errors[spelling] = true;
62
+ } else if (!excludes[source_path].includes(spelling.toLowerCase())) {
63
+ errors[sourceFile.relativePath].push(`${error_message} ${spelling} should be ${translate_output[key][i][spelling].details}`);
64
+ spelling_errors[spelling] = true;
65
+ }
66
+ }
67
+ }
68
+ }
69
+ }
70
+ }
71
+ }
72
+ if (Object.keys(spelling_errors).length) {
73
+ let exclude_output = {
74
+ document_path: sourceFile.relativePath.replace(path.extname(sourceFile.relativePath), ''),
75
+ words: []
76
+ };
77
+ for (const word in spelling_errors) {
78
+ if (spelling_errors.hasOwnProperty(word)) {
79
+ words.push(word);
80
+ exclude_output.words.push(word);
81
+ }
82
+ }
83
+ exclude_spellcheck_output.push(exclude_output);
84
+ }
85
+ return words;
86
+ };
87
+
88
+ const checkNavigation = async function (source_path, flat_nav, excludes) {
89
+ let nav_errors = [];
90
+ for (let key in flat_nav) {
91
+ if (flat_nav.hasOwnProperty(key)) {
92
+ // doc paths should only contain a-z - characters
93
+ const invalid_chars = key.replace(regex_nav_paths, '');
94
+ if (invalid_chars !== '') {
95
+ nav_errors.push(`Navigation path [${key}] contains the following invalid characters: [${[...invalid_chars].join('] [')}]`);
96
+ }
97
+ const key_split = key.split('#');
98
+ const key_no_hash = key_split[0];
99
+
100
+ // Validate path exists - key should be a html file at this point
101
+ let file_exists = true;
102
+ let file_name = path.join(source_path, key_no_hash + '.html');
103
+ if (!fs.existsSync(file_name)) {
104
+ file_name = path.join(source_path, key_no_hash + '.htm');
105
+ if (!fs.existsSync(file_name)) {
106
+ file_name = path.join(source_path, key_no_hash, 'index.html');
107
+ if (!fs.existsSync(file_name)) {
108
+ file_name = path.join(source_path, key_no_hash, 'index.htm');
109
+ if (!fs.existsSync(file_name)) {
110
+ file_exists = false;
111
+ nav_errors.push(`Navigation path [${key_no_hash}] file does not exist.`);
112
+ }
113
+ }
114
+ }
115
+ }
116
+
117
+ if (file_exists) {
118
+ const true_file = trueCasePathSync(file_name).replace(source_path, '').replaceAll('\\', '/');
119
+ const relative_file = file_name.replace(source_path, '').replaceAll('\\', '/');
120
+ if (true_file !== relative_file) {
121
+ nav_errors.push(`Navigation path [${key}] for filename [${relative_file}] does not match filename case [${true_file}].`);
122
+ }
123
+ }
124
+
125
+ // Validate path spellings
126
+ const paths = key.split('/');
127
+ for (let i = 0; i < paths.length; i++) {
128
+ const path_words = paths[i].split('-');
129
+ for (let j = 0; j < path_words.length; j++) {
130
+ const translate_output = translator.translate(path_words[j], spellcheck_options);
131
+ if (Object.keys(translate_output).length) {
132
+ for (const spell_val in translate_output) {
133
+ if (translate_output.hasOwnProperty(spell_val)) {
134
+ for (const spelling in translate_output[spell_val][0]) {
135
+ if (translate_output[spell_val][0].hasOwnProperty(spelling)) {
136
+ if (!excludes[key]) {
137
+ nav_errors.push(`Navigation path [${key}] key contains a British English spelling: ${spelling} should be ${translate_output[spell_val][0][spelling].details}`);
138
+ } else if (!excludes[key].includes(spelling.toLowerCase())) {
139
+ nav_errors.push(`Navigation path [${key}] key contains a British English spelling: ${spelling} should be ${translate_output[spell_val][0][spelling].details}`);
140
+ }
141
+ }
142
+ }
143
+ }
144
+ }
145
+ }
146
+ }
147
+ }
148
+
149
+ // Validate display names/bookmarks
150
+ for (let i = 0; i < flat_nav[key].length; i++) {
151
+ if (flat_nav[key][i].link === key) {
152
+ const translate_output = translator.translate(flat_nav[key][i].text, spellcheck_options);
153
+ if (Object.keys(translate_output).length) {
154
+ for (const spell_val in translate_output) {
155
+ if (translate_output.hasOwnProperty(spell_val)) {
156
+ for (let j = 0; j < translate_output[spell_val].length; j++) {
157
+ for (const spelling in translate_output[spell_val][j]) {
158
+ if (translate_output[spell_val][j].hasOwnProperty(spelling)) {
159
+ if (!excludes[key]) {
160
+ nav_errors.push(`Navigation path [${key}] display text contains a British English spelling: ${spelling} should be ${translate_output[spell_val][j][spelling].details}`);
161
+ } else if (!excludes[key].includes(spelling.toLowerCase())) {
162
+ nav_errors.push(`Navigation path [${key}] display text contains a British English spelling: ${spelling} should be ${translate_output[spell_val][j][spelling].details}`);
163
+ }
164
+ }
165
+ }
166
+ }
167
+ }
168
+ }
169
+ }
170
+ }
171
+ }
172
+
173
+ }
174
+ }
175
+ return nav_errors;
176
+ };
177
+
178
+ const checkLinks = async function (source_path, htmlFile, links, hdocbook_config) {
179
+ for (let i = 0; i < links.length; i++) {
180
+
181
+ // Validate that link is a valid URL first
182
+ const valid_url = hdoc.valid_url(links[i]);
183
+ if (!valid_url) {
184
+ // Could be a relative path, check
185
+ if (links[i].startsWith('/')) {
186
+ let link_root = links[i].split('/');
187
+ link_root = link_root[0] !== '' ? link_root[0] : link_root[1];
188
+ if (link_root !== hdocbook_config.docId) continue;
189
+ isRelativePath(source_path, htmlFile, links[i]);
190
+ } else if (links[i].startsWith('#')) {
191
+ //Anchor - do nothing
192
+ } else {
193
+ errors[htmlFile.relativePath].push(`Root relative links should start with a forward-slash: ${links[i]}`);
194
+ }
195
+ } else {
196
+ messages[htmlFile.relativePath].push(`Link is a properly formatted external URL: ${links[i]}`);
197
+
198
+ // Skip if it's the auto-generated edit url, as these could be part of a private repo which would return a 404
199
+ if (hdocbook_config.publicSource !== undefined && links[i] === hdoc.get_github_api_path(hdocbook_config.publicSource, htmlFile.relativePath).edit_path.replace(path.extname(htmlFile.relativePath), '.md')) {
200
+ continue;
201
+ }
202
+
203
+ if (valid_url.protocol === 'mailto:') {
204
+ continue;
205
+ }
206
+
207
+ // Skip if the link is excluded in the project config
208
+ if (excludeLink(links[i])) {
209
+ messages[htmlFile.relativePath].push(`Skipping link validation for: ${links[i]}`);
210
+ continue;
211
+ }
212
+
213
+ if (links[i].toLowerCase().includes('docs.hornbill.com') || links[i].toLowerCase().includes('docs-internal.hornbill.com')) {
214
+ errors[htmlFile.relativePath].push(`Links to Hornbill Docs should rooted and not fully-qualified: ${links[i]}`);
215
+ continue;
216
+ }
217
+
218
+ try {
219
+ await axios.get(links[i], {
220
+ httpsAgent: agent
221
+ });
222
+ messages[htmlFile.relativePath].push(`Link is a valid external URL: ${links[i]}`);
223
+ } catch (e) {
224
+ // Handle errors
225
+ errors[htmlFile.relativePath].push(`Link is not responding: ${links[i]} - [${e.message}]`);
226
+ }
227
+ }
228
+ }
229
+ };
230
+
231
+ const checkImages = async function (source_path, htmlFile, links) {
232
+ for (let i = 0; i < links.length; i++) {
233
+
234
+ // Validate that image is a valid URL first
235
+ if (!hdoc.valid_url(links[i])) {
236
+ // Could be a relative path, check image exists
237
+ doesFileExist(source_path, htmlFile, links[i]);
238
+ } else {
239
+ messages[htmlFile.relativePath].push(`Image link is a properly formatted external URL: ${links[i]}`);
240
+ // Do a Get to the URL to see if it exists
241
+ try {
242
+ const res = await axios.get(links[i]);
243
+ messages[htmlFile.relativePath].push(`Image link is a valid external URL: ${links[i]}`);
244
+ } catch (e) {
245
+ // Handle errors
246
+ errors[htmlFile.relativePath].push(`Unexpected Error from external image link: ${links[i]} - ${e.message}`);
247
+ }
248
+ }
249
+ }
250
+ };
251
+
252
+ const checkTags = async function (htmlFile) {
253
+ // Check if file is excluded from tag check
254
+ const file_no_ext = htmlFile.relativePath.replace(path.extname(htmlFile.relativePath), '');
255
+ if (exclude_h1_count[file_no_ext]) return;
256
+
257
+ // Check tags
258
+ const htmlBody = fs.readFileSync(htmlFile.path, 'utf8');
259
+ const $ = cheerio.load(htmlBody);
260
+
261
+ const h1_tags = $('h1').map(function () {
262
+ return $(this);
263
+ }).get();
264
+ if (h1_tags.length && h1_tags.length > 1) {
265
+ let error_msg = `${h1_tags.length} <h1> tags found in content: `;
266
+ for (let i = 0; i < h1_tags.length; i++) {
267
+ error_msg += h1_tags[i].text();
268
+ if (i < h1_tags.length - 1) error_msg += '; ';
269
+ }
270
+ errors[htmlFile.relativePath].push(error_msg);
271
+ }
272
+ };
273
+
274
+ const dreeOptions = {
275
+ descendants: true,
276
+ depth: 10,
277
+ extensions: ['htm', 'html', 'md'],
278
+ hash: false,
279
+ normalize: true,
280
+ size: false,
281
+ sizeInBytes: false,
282
+ stat: false,
283
+ symbolicLinks: false
284
+ };
285
+
286
+ // File scan callback
287
+ const fileCallback = function (element) {
288
+ if (element.extension.toLowerCase() === 'md') {
289
+ md_to_validate.push(element);
290
+ } else {
291
+ html_to_validate.push(element);
292
+ }
293
+ };
294
+
295
+ const isRelativePath = function (source_path, html_path, relative_path) {
296
+ const rel_path_ext = path.extname(relative_path);
297
+ let response = {
298
+ is_rel_path: false,
299
+ has_md_extension: rel_path_ext === '.md'
300
+ };
301
+
302
+ const supported_relpaths = [
303
+ path.sep + 'index.htm',
304
+ path.sep + 'index.html',
305
+ '.htm',
306
+ '.html',
307
+ '.md'
308
+ ];
309
+
310
+ // Remove explicit anchor links and _books prefix
311
+ relative_path = relative_path.split('#')[0].replace('_books/', '');
312
+
313
+ // Make full file path
314
+ const file_path = path.join(source_path, relative_path);
315
+
316
+ // Does path exist?
317
+ if (fs.existsSync(file_path)) {
318
+ response.is_rel_path = true;
319
+ } else {
320
+ // Path
321
+ for (let i = 0; i < supported_relpaths.length; i++) {
322
+ if (fs.existsSync(`${file_path}${supported_relpaths[i]}`)) {
323
+ response.is_rel_path = true;
324
+ break;
325
+ }
326
+ }
327
+ }
328
+ if (response.has_md_extension) {
329
+ errors[html_path.relativePath].push(`Relative link contains MD extension, but should not: ${relative_path}`);
330
+ } else {
331
+ if (response.is_rel_path) {
332
+ messages[html_path.relativePath].push(`Relative path exists: ${relative_path}`);
333
+ } else {
334
+ errors[html_path.relativePath].push(`Link path does not exist: ${relative_path}`);
335
+ }
336
+ }
337
+ }
338
+
339
+ const doesFileExist = function (source_path, html_path, relative_path) {
340
+ // Remove explicit anchor links and _books prefix
341
+ relative_path = relative_path.split('#')[0].replace('_books/', '');
342
+ const file_path = path.join(source_path, relative_path);
343
+ if (!fs.existsSync(file_path) && !fs.existsSync(file_path + path.sep + 'index.htm') && !fs.existsSync(file_path + 'index.html') && !fs.existsSync(file_path + '.htm') && !fs.existsSync(file_path + '.html')) {
344
+ errors[html_path.relativePath].push(`Book resource does not exist: ${relative_path}`);
345
+ return false;
346
+ } else {
347
+ messages[html_path.relativePath].push(`Book resource exists: ${relative_path}`);
348
+ }
349
+ return true;
350
+ };
351
+
352
+ // Takes a dree element, returns an object with a pair of arrays
353
+ const getLinks = function (file) {
354
+ messages[file.relativePath].push('Parsing HTML file');
355
+ const htmlBody = fs.readFileSync(file.path, 'utf8');
356
+ let links = {
357
+ href: [],
358
+ img: []
359
+ };
360
+ const $ = cheerio.load(htmlBody);
361
+ const hrefs = $('a').map(function (i) {
362
+ return $(this).attr('href');
363
+ }).get();
364
+ const srcs = $('img').map(function (i) {
365
+ return $(this).attr('src');
366
+ }).get();
367
+ links.href.push(...hrefs);
368
+ links.img.push(...srcs);
369
+ return links;
370
+ };
371
+
372
+ exports.run = async function (source_path, doc_id, verbose, hdocbook_config, hdocbook_project, nav_items, prod_families, prods_supported, gen_exclude) {
373
+ console.log(`Performing Validation and Building SEO Link List...`);
374
+
375
+ // Get a list of HTML files in source_path
376
+ dree.scan(source_path, dreeOptions, fileCallback);
377
+
378
+ // Check product family
379
+ let valid_product = false;
380
+ let meta_errors = [];
381
+ for (let i = 0; i < prod_families.products.length; i++) {
382
+ if (prod_families.products[i].id === hdocbook_config.productFamily) {
383
+ valid_product = true;
384
+ }
385
+ }
386
+ if (!valid_product) {
387
+ let val_prod_error = `Incorrect productFamily: ${hdocbook_config.productFamily}. Supported values:`;
388
+ for (let i = 0; i < prods_supported.length; i++) {
389
+ val_prod_error += `\n - ${prods_supported[i]}`
390
+ }
391
+ meta_errors.push(val_prod_error)
392
+ }
393
+
394
+ if (hdocbook_config.publicSource && hdocbook_config.publicSource !== '') {
395
+ // Validate publicSource
396
+ if (hdocbook_config.publicSource.toLowerCase() === '--publicsource--') {
397
+ meta_errors.push(`Value for publicSource in book metadata is set to its default template value`);
398
+ } else {
399
+ // Check URL exists
400
+ if (!hdocbook_config.publicSource.startsWith('https://github.com') && !hdocbook_config.publicSource.startsWith('https://api.github.com')) {
401
+ meta_errors.push(`Value for publicSource in book metadata is not a recognised GitHub URL: ${hdocbook_config.publicSource}`);
402
+ }
403
+ }
404
+ }
405
+
406
+ if (!hdocbook_config.audience || !(hdocbook_config.audience instanceof Array) || hdocbook_config.audience.length === 0) {
407
+ meta_errors.push(`Property audience of type array in book metadata is mandatory.`);
408
+ }
409
+ if (hdocbook_project.validation) {
410
+ if (hdocbook_project.validation.exclude_links && hdocbook_project.validation.exclude_links instanceof Array) {
411
+ hdocbook_project.validation.exclude_links.forEach(function (excl_link) {
412
+ exclude_links[excl_link] = true;
413
+ });
414
+ }
415
+ if (hdocbook_project.validation.exclude_spellcheck && hdocbook_project.validation.exclude_spellcheck instanceof Array) {
416
+ hdocbook_project.validation.exclude_spellcheck.forEach(function (excl_sc) {
417
+ exclude_spellcheck[excl_sc.document_path] = excl_sc.words;
418
+ });
419
+ }
420
+ if (hdocbook_project.validation.exclude_h1_count && hdocbook_project.validation.exclude_h1_count instanceof Array) {
421
+ hdocbook_project.validation.exclude_h1_count.forEach(function (excl_h1) {
422
+ exclude_h1_count[excl_h1] = true;
423
+ });
424
+ }
425
+ }
426
+
427
+ // Check navigation spellings
428
+ const nav_errors = await checkNavigation(source_path, nav_items, exclude_spellcheck);
429
+ if (nav_errors.length > 0) meta_errors.push(...nav_errors);
430
+
431
+ if (meta_errors.length > 0) {
432
+ console.log('\r\n-----------------------');
433
+ console.log(' Validation Output ');
434
+ console.log('-----------------------');
435
+ for (let i = 0; i < meta_errors.length; i++) {
436
+ console.log(`- ${meta_errors[i]}`);
437
+ }
438
+ console.log(`\r\n${meta_errors.length} Validation Errors Found`);
439
+ return false;
440
+ }
441
+
442
+
443
+ let excl_output = [];
444
+
445
+ // Do spellchecking on markdown files
446
+ let md_files_spellchecked = {};
447
+ let mdPromiseArray = [];
448
+ for (let i = 0; i < md_to_validate.length; i++) {
449
+ errors[md_to_validate[i].relativePath] = [];
450
+ messages[md_to_validate[i].relativePath] = [];
451
+ warnings[md_to_validate[i].relativePath] = [];
452
+ mdPromiseArray.push(md_to_validate[i]);
453
+ }
454
+ await Promise.all(mdPromiseArray.map(async (file) => {
455
+ // Initiate maps for errors and verbose messages for markdown file
456
+ const exclusions = await spellcheckContent(file, exclude_spellcheck);
457
+ if (gen_exclude && exclusions.length > 0) excl_output.push({ document_path: file.relativePath.replace('.' + file.extension, ''), words: exclusions });
458
+ md_files_spellchecked[file.relativePath.replace('.' + file.extension, '')] = true;
459
+ }));
460
+
461
+ // Perform rest of validation against HTML files
462
+ let listContent = '';
463
+ let htmlPromiseArray = [];
464
+ for (let i = 0; i < html_to_validate.length; i++) {
465
+ errors[html_to_validate[i].relativePath] = [];
466
+ messages[html_to_validate[i].relativePath] = [];
467
+ warnings[html_to_validate[i].relativePath] = [];
468
+ htmlPromiseArray.push(html_to_validate[i]);
469
+ }
470
+ await Promise.all(htmlPromiseArray.map(async (file) => {
471
+ // Check for British spellings in static HTML content
472
+ if (!md_files_spellchecked[file.relativePath.replace('.' + file.extension, '')]) {
473
+ const exclusions = await spellcheckContent(file, exclude_spellcheck);
474
+ if (gen_exclude && exclusions.length > 0) excl_output.push({ document_path: file.relativePath.replace('.' + file.extension, ''), words: exclusions });
475
+ }
476
+
477
+ const links = getLinks(file);
478
+ if (links.href.length === 0) {
479
+ messages[file.relativePath].push('No links found in file');
480
+ } else {
481
+ await checkLinks(source_path, file, links.href, hdocbook_config);
482
+ }
483
+ if (links.img.length === 0) {
484
+ messages[file.relativePath].push('No images found in file');
485
+ } else {
486
+ await checkImages(source_path, file, links.img);
487
+ }
488
+
489
+ // Check for multiple H1 tags
490
+ await checkTags(file);
491
+
492
+ // Build list content for Google
493
+ listContent += `/${file.relativePath.replace(path.extname(file.relativePath), '')}`;
494
+ listContent += '\r\n';
495
+ }));
496
+
497
+ if (gen_exclude) console.log(JSON.stringify(excl_output, null, 2));
498
+
499
+ try {
500
+ // Write list
501
+ const listFile = path.join(source_path, doc_id, 'links.txt');
502
+ fs.writeFileSync(listFile, listContent);
503
+ console.log(`\r\nLink list text file created successfully: ${listFile}`);
504
+ } catch (err) {
505
+ console.error(err);
506
+ }
507
+
508
+ if (verbose) {
509
+ console.log('\r\n-------------');
510
+ console.log(' Verbose ');
511
+ console.log('-------------');
512
+ for (const key in messages) {
513
+ if (messages.hasOwnProperty(key) && messages[key].length > 0) {
514
+ console.log(`\r\nMessage output for ${key}`);
515
+ for (let i = 0; i < messages[key].length; i++) {
516
+ console.log(` - ${messages[key][i]}`);
517
+ }
518
+ }
519
+ }
520
+ }
521
+
522
+ console.log('\r\n-----------------------');
523
+ console.log(' Validation Output ');
524
+ console.log('-----------------------');
525
+ if (Object.keys(errors).length > 0) {
526
+ let error_count = 0;
527
+ for (const key in errors) {
528
+ if (errors.hasOwnProperty(key) && errors[key].length > 0) {
529
+ console.log(`\r\n${errors[key].length} error(s) in ${key}`);
530
+ for (let i = 0; i < errors[key].length; i++) {
531
+ console.log(` - ${errors[key][i]}`);
532
+ error_count++
533
+ }
534
+ }
535
+ }
536
+ if (error_count > 0) {
537
+ console.log(`\r\n${error_count} Validation Errors Found`);
538
+ if (verbose) {
539
+ console.log(`\n`);
540
+ console.log(JSON.stringify(exclude_spellcheck_output, null, 2));
541
+ }
542
+ return false;
543
+ }
544
+ }
545
+
546
+ console.log(`\r\nNo Validation Errors Found!\n`);
547
+ return true;
548
+ };
549
549
  })();