hdoc-tools 0.19.8 → 0.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/hdoc-validate.js CHANGED
@@ -1,717 +1,958 @@
1
- (function () {
2
- 'use strict';
3
-
4
- const axios = require('axios'),
5
- cheerio = require('cheerio'),
6
- dree = require('dree'),
7
- fs = require('fs'),
8
- path = require('path'),
9
- https = require('https'),
10
- hdoc = require(path.join(__dirname, 'hdoc-module.js')),
11
- translator = require('american-british-english-translator'),
12
- { trueCasePathSync } = require('true-case-path');
13
-
14
- const spellcheck_options = {
15
- british: true,
16
- spelling: true
17
- },
18
- regex_nav_paths = /[a-z0-9-\/]+[a-z0-9]+#{0,1}[a-z0-9-\/]+/,
19
- agent = new https.Agent({
20
- rejectUnauthorized: false
21
- });
22
-
23
- let errors = {},
24
- messages = {},
25
- warnings = {},
26
- html_to_validate = [],
27
- md_to_validate = [],
28
- exclude_links = {},
29
- exclude_spellcheck = {},
30
- redirects = {},
31
- exclude_h1_count = {},
32
- exclude_spellcheck_output = [];
33
-
34
- const excludeLink = async function (url) {
35
- if (exclude_links[url]) return true;
36
-
37
- for (let key in exclude_links) {
38
- if (key.endsWith('*')) {
39
- key = key.substring(0, key.length - 1);
40
- if (url.startsWith(key)) return true;
41
- }
42
- }
43
-
44
- return false;
45
- };
46
-
47
- const spellcheckContent = async function (sourceFile, excludes) {
48
- let spelling_errors = {};
49
- let words = [];
50
- const text = fs.readFileSync(sourceFile.path, 'utf8');
51
- const source_path = sourceFile.relativePath.replace('.' + sourceFile.extension, '');
52
- const translate_output = translator.translate(text, spellcheck_options);
53
- if (Object.keys(translate_output).length) {
54
- for (const key in translate_output) {
55
- if (translate_output.hasOwnProperty(key)) {
56
- let error_message = `Line ${key} - British spelling:`;
57
- for (let i = 0; i < translate_output[key].length; i++) {
58
- for (const spelling in translate_output[key][i]) {
59
- if (translate_output[key][i].hasOwnProperty(spelling) && (typeof translate_output[key][i][spelling].details === 'string')) {
60
- if (!excludes[source_path]) {
61
- errors[sourceFile.relativePath].push(`${error_message} ${spelling} should be ${translate_output[key][i][spelling].details}`);
62
- spelling_errors[spelling] = true;
63
- } else if (!excludes[source_path].includes(spelling.toLowerCase())) {
64
- errors[sourceFile.relativePath].push(`${error_message} ${spelling} should be ${translate_output[key][i][spelling].details}`);
65
- spelling_errors[spelling] = true;
66
- }
67
- }
68
- }
69
- }
70
- }
71
- }
72
- }
73
- if (Object.keys(spelling_errors).length) {
74
- let exclude_output = {
75
- document_path: sourceFile.relativePath.replace(path.extname(sourceFile.relativePath), ''),
76
- words: []
77
- };
78
- for (const word in spelling_errors) {
79
- if (spelling_errors.hasOwnProperty(word)) {
80
- words.push(word);
81
- exclude_output.words.push(word);
82
- }
83
- }
84
- exclude_spellcheck_output.push(exclude_output);
85
- }
86
- return words;
87
- };
88
-
89
- const checkInline = async function (source_path, inline, excludes) {
90
- let inline_errors = [];
91
- for (let i = 0; i < inline.length; i++) {
92
- const title = inline[i].title,
93
- link = inline[i].link;
94
-
95
- // Validate link segment spellings
96
- const paths = link.split('/');
97
- for (let i = 0; i < paths.length; i++) {
98
- const path_words = paths[i].split('-');
99
- for (let j = 0; j < path_words.length; j++) {
100
- const translate_output = translator.translate(path_words[j], spellcheck_options);
101
- if (Object.keys(translate_output).length) {
102
- for (const spell_val in translate_output) {
103
- if (translate_output.hasOwnProperty(spell_val)) {
104
- for (const spelling in translate_output[spell_val][0]) {
105
- if (translate_output[spell_val][0].hasOwnProperty(spelling)) {
106
- if (!excludes[link]) {
107
- inline_errors.push(`Inline Link [${link}] contains a British English spelling: ${spelling} should be ${translate_output[spell_val][0][spelling].details}`);
108
- } else if (!excludes[link].includes(spelling.toLowerCase())) {
109
- inline_errors.push(`Inline Link [${link}] contains a British English spelling: ${spelling} should be ${translate_output[spell_val][0][spelling].details}`);
110
- }
111
- }
112
- }
113
- }
114
- }
115
- }
116
- }
117
- }
118
-
119
- // Validate display names
120
- const translate_output = translator.translate(title, spellcheck_options);
121
- if (Object.keys(translate_output).length) {
122
- for (const spell_val in translate_output) {
123
- if (translate_output.hasOwnProperty(spell_val)) {
124
- for (let j = 0; j < translate_output[spell_val].length; j++) {
125
- for (const spelling in translate_output[spell_val][j]) {
126
- if (translate_output[spell_val][j].hasOwnProperty(spelling)) {
127
- if (!excludes[link]) {
128
- inline_errors.push(`Inline title for link [${link}] contains a British English spelling: ${spelling} should be ${translate_output[spell_val][j][spelling].details}`);
129
- } else if (!excludes[link].includes(spelling.toLowerCase())) {
130
- inline_errors.push(`Inline title for link [${link}] contains a British English spelling: ${spelling} should be ${translate_output[spell_val][j][spelling].details}`);
131
- }
132
- }
133
- }
134
- }
135
- }
136
- }
137
- }
138
-
139
- // Validate path exists - link should be a html file at this point as its after the content has been built
140
- let file_exists = true;
141
- let file_name = path.join(source_path, link + '.html');
142
- if (!fs.existsSync(file_name)) {
143
- file_name = path.join(source_path, link + '.htm');
144
- if (!fs.existsSync(file_name)) {
145
- file_name = path.join(source_path, link, 'index.html');
146
- if (!fs.existsSync(file_name)) {
147
- file_name = path.join(source_path, link, 'index.htm');
148
- if (!fs.existsSync(file_name)) {
149
- file_exists = false;
150
- inline_errors.push(`Inline link [${link}] file does not exist.`);
151
- }
152
- }
153
- }
154
- }
155
- }
156
-
157
- return inline_errors;
158
- };
159
-
160
- const checkNavigation = async function (source_path, flat_nav, excludes) {
161
- let nav_errors = [];
162
- for (let key in flat_nav) {
163
- if (flat_nav.hasOwnProperty(key)) {
164
- // doc paths should only contain a-z - characters
165
- const invalid_chars = key.replace(regex_nav_paths, '');
166
- if (invalid_chars !== '') {
167
- nav_errors.push(`Navigation path [${key}] contains the following invalid characters: [${[...invalid_chars].join('] [')}]`);
168
- }
169
- const key_split = key.split('#');
170
- const key_no_hash = key_split[0];
171
-
172
- // See if there's a redirect in place
173
- let redirected = false;
174
- let redirect_errored = false;
175
- const redir = checkRedirect(source_path, key_no_hash);
176
-
177
- if (redir.exists && redir.error !== null) {
178
- nav_errors.push(redir.error);
179
- redirect_errored = true;
180
- } else if (redir.exists && redir.error === null) {
181
- redirected = true;
182
- }
183
-
184
- // Validate path exists - key should be a html file at this point
185
- let file_exists = true;
186
- let file_name = path.join(source_path, key_no_hash + '.html');
187
- if (!fs.existsSync(file_name)) {
188
- file_name = path.join(source_path, key_no_hash + '.htm');
189
- if (!fs.existsSync(file_name)) {
190
- file_name = path.join(source_path, key_no_hash, 'index.html');
191
- if (!fs.existsSync(file_name)) {
192
- file_name = path.join(source_path, key_no_hash, 'index.htm');
193
- if (!fs.existsSync(file_name)) {
194
- file_exists = false;
195
- if (!redirected && !redirect_errored)
196
- nav_errors.push(`Navigation path [${key_no_hash}] file does not exist.`);
197
- }
198
- }
199
- }
200
- }
201
-
202
- if (file_exists) {
203
- // File exists - but is there a redirect? If so, we want to flag this as an error
204
- if (redirected)
205
- nav_errors.push(`Navigation path [${key_no_hash}] is redirected, but path still exists.`);
206
-
207
- // Check file path case match
208
- const true_file = trueCasePathSync(file_name).replace(source_path, '').replaceAll('\\', '/');
209
- const relative_file = file_name.replace(source_path, '').replaceAll('\\', '/');
210
- if (true_file !== relative_file) {
211
- nav_errors.push(`Navigation path [${key}] for filename [${relative_file}] does not match filename case [${true_file}].`);
212
- }
213
- }
214
-
215
- // Validate path spellings
216
- const paths = key.split('/');
217
- for (let i = 0; i < paths.length; i++) {
218
- const path_words = paths[i].split('-');
219
- for (let j = 0; j < path_words.length; j++) {
220
- const translate_output = translator.translate(path_words[j], spellcheck_options);
221
- if (Object.keys(translate_output).length) {
222
- for (const spell_val in translate_output) {
223
- if (translate_output.hasOwnProperty(spell_val)) {
224
- for (const spelling in translate_output[spell_val][0]) {
225
- if (translate_output[spell_val][0].hasOwnProperty(spelling)) {
226
- if (!excludes[key]) {
227
- nav_errors.push(`Navigation path [${key}] key contains a British English spelling: ${spelling} should be ${translate_output[spell_val][0][spelling].details}`);
228
- } else if (!excludes[key].includes(spelling.toLowerCase())) {
229
- nav_errors.push(`Navigation path [${key}] key contains a British English spelling: ${spelling} should be ${translate_output[spell_val][0][spelling].details}`);
230
- }
231
- }
232
- }
233
- }
234
- }
235
- }
236
- }
237
- }
238
-
239
- // Validate display names/bookmarks
240
- for (let i = 0; i < flat_nav[key].length; i++) {
241
- if (flat_nav[key][i].link === key) {
242
- const translate_output = translator.translate(flat_nav[key][i].text, spellcheck_options);
243
- if (Object.keys(translate_output).length) {
244
- for (const spell_val in translate_output) {
245
- if (translate_output.hasOwnProperty(spell_val)) {
246
- for (let j = 0; j < translate_output[spell_val].length; j++) {
247
- for (const spelling in translate_output[spell_val][j]) {
248
- if (translate_output[spell_val][j].hasOwnProperty(spelling)) {
249
- if (!excludes[key]) {
250
- nav_errors.push(`Navigation path [${key}] display text contains a British English spelling: ${spelling} should be ${translate_output[spell_val][j][spelling].details}`);
251
- } else if (!excludes[key].includes(spelling.toLowerCase())) {
252
- nav_errors.push(`Navigation path [${key}] display text contains a British English spelling: ${spelling} should be ${translate_output[spell_val][j][spelling].details}`);
253
- }
254
- }
255
- }
256
- }
257
- }
258
- }
259
- }
260
- }
261
- }
262
-
263
- }
264
- }
265
- return nav_errors;
266
- };
267
-
268
- const checkRedirects = async function (source_path) {
269
- let errors = [];
270
- for (const key in redirects) {
271
- if (redirects.hasOwnProperty(key)) {
272
- if (redirects[key].code !== 301 && redirects[key].code !== 308 && redirects[key].code !== 410 )
273
- errors.push(`Invalid redirect code: ${redirects[key].code}`);
274
-
275
- if (redirects[key].location) {
276
- let redir_locations = [
277
- path.join(source_path, redirects[key].location + '.md'),
278
- path.join(source_path, redirects[key].location, 'index.md'),
279
- path.join(source_path, redirects[key].location + '.html'),
280
- path.join(source_path, redirects[key].location + '.htm'),
281
- path.join(source_path, redirects[key].location, 'index.html'),
282
- path.join(source_path, redirects[key].location, 'index.htm')
283
- ];
284
- let redir_location_ok = false;
285
- for (let i = 0; i < redir_locations.length; i++) {
286
- if (fs.existsSync(redir_locations[i])) {
287
- redir_location_ok = true;
288
- break;
289
- }
290
- }
291
- if (!redir_location_ok)
292
- errors.push(`Redirect location does not exist: ${redirects[key].location}`);
293
- }
294
- }
295
- }
296
- return errors;
297
- };
298
-
299
- const checkRedirect = function (source_path, nav_path) {
300
- let response = {
301
- exists: false,
302
- error: null
303
- }
304
- if (redirects[nav_path]) {
305
- response.exists = true;
306
- if (redirects[nav_path].location) {
307
- // We have a redirect, check if it's a valid location
308
- let file_path = path.join(source_path, redirects[nav_path].location + '.html');
309
- if (!fs.existsSync(file_path)) {
310
- file_path = path.join(source_path, redirects[nav_path].location + '.htm');
311
- if (!fs.existsSync(file_path)) {
312
- file_path = path.join(source_path, redirects[nav_path].location, 'index.html');
313
- if (!fs.existsSync(file_path)) {
314
- file_path = path.join(source_path, redirects[nav_path].location, 'index.htm');
315
- if (!fs.existsSync(file_path)) {
316
- response.error = `Redirect path for [${nav_path}] does not exist: ${redirects[nav_path].location}`;
317
- }
318
- }
319
- }
320
- }
321
- }
322
- }
323
- return response;
324
- }
325
-
326
- const checkLinks = async function (source_path, htmlFile, links, hdocbook_config, redirects) {
327
- for (let i = 0; i < links.length; i++) {
328
-
329
- // Validate that link is a valid URL first
330
- const valid_url = hdoc.valid_url(links[i]);
331
- if (!valid_url) {
332
- // Could be a relative path, check
333
- if (links[i].startsWith('/')) {
334
- let link_root = links[i].split('/');
335
- link_root = link_root[0] !== '' ? link_root[0] : link_root[1];
336
- if (link_root !== hdocbook_config.docId) continue;
337
- isRelativePath(source_path, htmlFile, links[i]);
338
- } else if (links[i].startsWith('#')) {
339
- //Anchor - do nothing
340
- } else {
341
- errors[htmlFile.relativePath].push(`Root relative links should start with a forward-slash: ${links[i]}`);
342
- }
343
- } else {
344
- messages[htmlFile.relativePath].push(`Link is a properly formatted external URL: ${links[i]}`);
345
-
346
- // Skip if it's the auto-generated edit url, as these could be part of a private repo which would return a 404
347
- if (hdocbook_config.publicSource !== undefined && links[i] === hdoc.get_github_api_path(hdocbook_config.publicSource, htmlFile.relativePath).edit_path.replace(path.extname(htmlFile.relativePath), '.md')) {
348
- continue;
349
- }
350
-
351
- if (valid_url.protocol === 'mailto:') {
352
- continue;
353
- }
354
-
355
- // Skip if the link is excluded in the project config
356
- if (excludeLink(links[i])) {
357
- messages[htmlFile.relativePath].push(`Skipping link validation for: ${links[i]}`);
358
- continue;
359
- }
360
-
361
- if (links[i].toLowerCase().includes('docs.hornbill.com') || links[i].toLowerCase().includes('docs-internal.hornbill.com')) {
362
- errors[htmlFile.relativePath].push(`Links to Hornbill Docs should rooted and not fully-qualified: ${links[i]}`);
363
- continue;
364
- }
365
-
366
- try {
367
- await axios.get(links[i], {
368
- httpsAgent: agent
369
- });
370
- messages[htmlFile.relativePath].push(`Link is a valid external URL: ${links[i]}`);
371
- } catch (e) {
372
- // Handle errors
373
- errors[htmlFile.relativePath].push(`Link is not responding: ${links[i]} - [${e.message}]`);
374
- }
375
- }
376
- }
377
- };
378
-
379
- const checkImages = async function (source_path, htmlFile, links) {
380
- for (let i = 0; i < links.length; i++) {
381
-
382
- // Validate that image is a valid URL first
383
- if (!hdoc.valid_url(links[i])) {
384
- // Could be a relative path, check image exists
385
- doesFileExist(source_path, htmlFile, links[i]);
386
- } else {
387
- messages[htmlFile.relativePath].push(`Image link is a properly formatted external URL: ${links[i]}`);
388
- // Do a Get to the URL to see if it exists
389
- try {
390
- const res = await axios.get(links[i]);
391
- messages[htmlFile.relativePath].push(`Image link is a valid external URL: ${links[i]}`);
392
- } catch (e) {
393
- // Handle errors
394
- errors[htmlFile.relativePath].push(`Unexpected Error from external image link: ${links[i]} - ${e.message}`);
395
- }
396
- }
397
- }
398
- };
399
-
400
- const checkTags = async function (htmlFile) {
401
- // Check if file is excluded from tag check
402
- const file_no_ext = htmlFile.relativePath.replace(path.extname(htmlFile.relativePath), '');
403
- if (exclude_h1_count[file_no_ext]) return;
404
-
405
- // Check tags
406
- const htmlBody = fs.readFileSync(htmlFile.path, 'utf8');
407
- const $ = cheerio.load(htmlBody);
408
-
409
- const h1_tags = $('h1').map(function () {
410
- return $(this);
411
- }).get();
412
- if (h1_tags.length && h1_tags.length > 1) {
413
- let error_msg = `${h1_tags.length} <h1> tags found in content: `;
414
- for (let i = 0; i < h1_tags.length; i++) {
415
- error_msg += h1_tags[i].text();
416
- if (i < h1_tags.length - 1) error_msg += '; ';
417
- }
418
- errors[htmlFile.relativePath].push(error_msg);
419
- }
420
- };
421
-
422
- const dreeOptions = {
423
- descendants: true,
424
- excludeEmptyDirectories: true,
425
- extensions: ['htm', 'html', 'md'],
426
- hash: false,
427
- normalize: true,
428
- size: false,
429
- sizeInBytes: false,
430
- stat: false,
431
- symbolicLinks: false
432
- };
433
-
434
- // File scan callback for content type files
435
- const fileContentCallback = function (element) {
436
- if (element.extension.toLowerCase() === 'md') {
437
- md_to_validate.push(element);
438
- } else {
439
- html_to_validate.push(element);
440
- }
441
- };
442
-
443
- const isRelativePath = function (source_path, html_path, relative_path) {
444
- const rel_path_ext = path.extname(relative_path);
445
- let response = {
446
- is_rel_path: false,
447
- has_md_extension: rel_path_ext === '.md'
448
- };
449
-
450
- const supported_relpaths = [
451
- path.sep + 'index.htm',
452
- path.sep + 'index.html',
453
- '.htm',
454
- '.html',
455
- '.md'
456
- ];
457
-
458
- // Remove explicit anchor links and _books prefix
459
- relative_path = relative_path.split('#')[0].replace('_books/', '');
460
-
461
- // Make full file path
462
- const file_path = path.join(source_path, relative_path);
463
-
464
- // Does path exist?
465
- if (fs.existsSync(file_path)) {
466
- response.is_rel_path = true;
467
- } else {
468
- // Path
469
- for (let i = 0; i < supported_relpaths.length; i++) {
470
- if (fs.existsSync(`${file_path}${supported_relpaths[i]}`)) {
471
- response.is_rel_path = true;
472
- break;
473
- }
474
- }
475
- }
476
- if (response.has_md_extension) {
477
- errors[html_path.relativePath].push(`Relative link contains MD extension, but should not: ${relative_path}`);
478
- } else {
479
- if (response.is_rel_path) {
480
- messages[html_path.relativePath].push(`Relative path exists: ${relative_path}`);
481
- } else {
482
-
483
- // See if there's a redirect in place
484
- const relpath = relative_path.indexOf('/') == 0 ? relative_path.substring(1) : relative_path;
485
- const redir = checkRedirect(source_path, relpath);
486
- if (redir.exists) {
487
- if (redir.error !== null)
488
- errors[html_path.relativePath].push(redir.error);
489
- } else {
490
- errors[html_path.relativePath].push(`Link path does not exist: ${relative_path}`);
491
- }
492
- }
493
- }
494
- }
495
-
496
- const doesFileExist = function (source_path, html_path, relative_path) {
497
- // Remove explicit anchor links and _books prefix
498
- relative_path = relative_path.split('#')[0].replace('_books/', '');
499
- const file_path = path.join(source_path, relative_path);
500
- if (!fs.existsSync(file_path) && !fs.existsSync(file_path + path.sep + 'index.htm') && !fs.existsSync(file_path + 'index.html') && !fs.existsSync(file_path + '.htm') && !fs.existsSync(file_path + '.html')) {
501
- errors[html_path.relativePath].push(`Book resource does not exist: ${relative_path}`);
502
- return false;
503
- } else {
504
- messages[html_path.relativePath].push(`Book resource exists: ${relative_path}`);
505
- }
506
- return true;
507
- };
508
-
509
- // Takes a dree element, returns an object with a pair of arrays
510
- const getLinks = function (file) {
511
- messages[file.relativePath].push('Parsing HTML file');
512
- const htmlBody = fs.readFileSync(file.path, 'utf8');
513
- let links = {
514
- href: [],
515
- img: []
516
- };
517
- const $ = cheerio.load(htmlBody);
518
- const hrefs = $('a').map(function (i) {
519
- return $(this).attr('href');
520
- }).get();
521
- const srcs = $('img').map(function (i) {
522
- return $(this).attr('src');
523
- }).get();
524
- links.href.push(...hrefs);
525
- links.img.push(...srcs);
526
- return links;
527
- };
528
-
529
- exports.run = async function (source_path, doc_id, verbose, hdocbook_config, hdocbook_project, nav_items, prod_families, prods_supported, gen_exclude, gen_redirects) {
530
- console.log(`Performing Validation and Building SEO Link List...`);
531
- redirects = gen_redirects;
532
-
533
- // Get a list of HTML files in source_path
534
- dree.scan(source_path, dreeOptions, fileContentCallback);
535
-
536
- // Check product family
537
- let valid_product = false;
538
- let meta_errors = [];
539
- for (let i = 0; i < prod_families.products.length; i++) {
540
- if (prod_families.products[i].id === hdocbook_config.productFamily) {
541
- valid_product = true;
542
- }
543
- }
544
- if (!valid_product) {
545
- let val_prod_error = `Incorrect productFamily: ${hdocbook_config.productFamily}. Supported values:`;
546
- for (let i = 0; i < prods_supported.length; i++) {
547
- val_prod_error += `\n - ${prods_supported[i]}`
548
- }
549
- meta_errors.push(val_prod_error)
550
- }
551
-
552
- if (hdocbook_config.publicSource && hdocbook_config.publicSource !== '') {
553
- // Validate publicSource
554
- if (hdocbook_config.publicSource.toLowerCase() === '--publicsource--') {
555
- meta_errors.push(`Value for publicSource in book metadata is set to its default template value`);
556
- } else {
557
- // Check URL exists
558
- if (!hdocbook_config.publicSource.startsWith('https://github.com') && !hdocbook_config.publicSource.startsWith('https://api.github.com')) {
559
- meta_errors.push(`Value for publicSource in book metadata is not a recognised GitHub URL: ${hdocbook_config.publicSource}`);
560
- }
561
- }
562
- }
563
-
564
- if (!hdocbook_config.audience || !(hdocbook_config.audience instanceof Array) || hdocbook_config.audience.length === 0) {
565
- meta_errors.push(`Property audience of type array in book metadata is mandatory.`);
566
- }
567
- if (hdocbook_project.validation) {
568
- if (hdocbook_project.validation.exclude_links && hdocbook_project.validation.exclude_links instanceof Array) {
569
- hdocbook_project.validation.exclude_links.forEach(function (excl_link) {
570
- exclude_links[excl_link] = true;
571
- });
572
- }
573
- if (hdocbook_project.validation.exclude_spellcheck && hdocbook_project.validation.exclude_spellcheck instanceof Array) {
574
- hdocbook_project.validation.exclude_spellcheck.forEach(function (excl_sc) {
575
- exclude_spellcheck[excl_sc.document_path] = excl_sc.words;
576
- });
577
- }
578
- if (hdocbook_project.validation.exclude_h1_count && hdocbook_project.validation.exclude_h1_count instanceof Array) {
579
- hdocbook_project.validation.exclude_h1_count.forEach(function (excl_h1) {
580
- exclude_h1_count[excl_h1] = true;
581
- });
582
- }
583
- }
584
-
585
- // Check navigation spellings & paths exist
586
- const nav_errors = await checkNavigation(source_path, nav_items, exclude_spellcheck);
587
- if (nav_errors.length > 0) meta_errors.push(...nav_errors);
588
-
589
- // Check inline content spellings & paths exist
590
- if (hdocbook_config.inline && hdocbook_config.inline.length > 0) {
591
- const inline_errors = await checkInline(source_path, hdocbook_config.inline, exclude_spellcheck);
592
- if (inline_errors.length > 0) meta_errors.push(...inline_errors);
593
- }
594
-
595
- // Check redirects
596
- const redirect_errors = await checkRedirects(source_path);
597
- if (redirect_errors.length > 0) meta_errors.push(...redirect_errors);
598
-
599
- if (meta_errors.length > 0) {
600
- console.log('\r\n-----------------------');
601
- console.log(' Validation Output ');
602
- console.log('-----------------------');
603
- for (let i = 0; i < meta_errors.length; i++) {
604
- console.error(`- ${meta_errors[i]}`);
605
- }
606
- console.error(`\r\n${meta_errors.length} Validation Errors Found`);
607
- return false;
608
- }
609
-
610
-
611
- let excl_output = [];
612
-
613
- // Do spellchecking on markdown files
614
- let md_files_spellchecked = {};
615
- let mdPromiseArray = [];
616
- for (let i = 0; i < md_to_validate.length; i++) {
617
- errors[md_to_validate[i].relativePath] = [];
618
- messages[md_to_validate[i].relativePath] = [];
619
- warnings[md_to_validate[i].relativePath] = [];
620
- mdPromiseArray.push(md_to_validate[i]);
621
- }
622
- await Promise.all(mdPromiseArray.map(async (file) => {
623
- // Initiate maps for errors and verbose messages for markdown file
624
- const exclusions = await spellcheckContent(file, exclude_spellcheck);
625
- if (gen_exclude && exclusions.length > 0) excl_output.push({ document_path: file.relativePath.replace('.' + file.extension, ''), words: exclusions });
626
- md_files_spellchecked[file.relativePath.replace('.' + file.extension, '')] = true;
627
- }));
628
-
629
- // Perform rest of validation against HTML files
630
- let listContent = '';
631
- let htmlPromiseArray = [];
632
- for (let i = 0; i < html_to_validate.length; i++) {
633
- errors[html_to_validate[i].relativePath] = [];
634
- messages[html_to_validate[i].relativePath] = [];
635
- warnings[html_to_validate[i].relativePath] = [];
636
- htmlPromiseArray.push(html_to_validate[i]);
637
- }
638
- await Promise.all(htmlPromiseArray.map(async (file) => {
639
- // Check for British spellings in static HTML content
640
- if (!md_files_spellchecked[file.relativePath.replace('.' + file.extension, '')]) {
641
- const exclusions = await spellcheckContent(file, exclude_spellcheck);
642
- if (gen_exclude && exclusions.length > 0) excl_output.push({ document_path: file.relativePath.replace('.' + file.extension, ''), words: exclusions });
643
- }
644
-
645
- const links = getLinks(file);
646
- if (links.href.length === 0) {
647
- messages[file.relativePath].push('No links found in file');
648
- } else {
649
- await checkLinks(source_path, file, links.href, hdocbook_config);
650
- }
651
- if (links.img.length === 0) {
652
- messages[file.relativePath].push('No images found in file');
653
- } else {
654
- await checkImages(source_path, file, links.img);
655
- }
656
-
657
- // Check for multiple H1 tags
658
- await checkTags(file);
659
-
660
- // Build list content for Google
661
- listContent += `/${file.relativePath.replace(path.extname(file.relativePath), '')}`;
662
- listContent += '\r\n';
663
- }));
664
-
665
- if (gen_exclude) console.log(JSON.stringify(excl_output, null, 2));
666
-
667
- try {
668
- // Write list
669
- const listFile = path.join(source_path, doc_id, 'links.txt');
670
- fs.writeFileSync(listFile, listContent);
671
- console.log(`\r\nLink list text file created successfully: ${listFile}`);
672
- } catch (err) {
673
- console.error(err);
674
- }
675
-
676
- if (verbose) {
677
- console.log('\r\n-------------');
678
- console.log(' Verbose ');
679
- console.log('-------------');
680
- for (const key in messages) {
681
- if (messages.hasOwnProperty(key) && messages[key].length > 0) {
682
- console.log(`\r\nMessage output for ${key}`);
683
- for (let i = 0; i < messages[key].length; i++) {
684
- console.log(` - ${messages[key][i]}`);
685
- }
686
- }
687
- }
688
- }
689
-
690
- console.log('\r\n-----------------------');
691
- console.log(' Validation Output ');
692
- console.log('-----------------------');
693
- if (Object.keys(errors).length > 0) {
694
- let error_count = 0;
695
- for (const key in errors) {
696
- if (errors.hasOwnProperty(key) && errors[key].length > 0) {
697
- console.error(`\r\n${errors[key].length} error(s) in ${key}`);
698
- for (let i = 0; i < errors[key].length; i++) {
699
- console.error(` - ${errors[key][i]}`);
700
- error_count++
701
- }
702
- }
703
- }
704
- if (error_count > 0) {
705
- console.error(`\r\n${error_count} Validation Errors Found`);
706
- if (verbose) {
707
- console.log(`\n`);
708
- console.error(JSON.stringify(exclude_spellcheck_output, null, 2));
709
- }
710
- return false;
711
- }
712
- }
713
-
714
- console.log(`\r\nNo Validation Errors Found!\n`);
715
- return true;
716
- };
717
- })();
1
+ (() => {
2
+ const axios = require("axios");
3
+ const cheerio = require("cheerio");
4
+ const dree = require("dree");
5
+ const fs = require("node:fs");
6
+ const path = require("node:path");
7
+ const https = require("node:https");
8
+ const hdoc = require(path.join(__dirname, "hdoc-module.js"));
9
+ const translator = require("american-british-english-translator");
10
+ const { trueCasePathSync } = require("true-case-path");
11
+
12
+ const spellcheck_options = {
13
+ british: true,
14
+ spelling: true,
15
+ };
16
+ const regex_nav_paths = /[a-z0-9-\/]+[a-z0-9]+#{0,1}[a-z0-9-\/]+/;
17
+ const agent = new https.Agent({
18
+ rejectUnauthorized: false,
19
+ });
20
+
21
+ const errors = {};
22
+ const messages = {};
23
+ const warnings = {};
24
+ const html_to_validate = [];
25
+ const md_to_validate = [];
26
+ const exclude_links = {};
27
+ const exclude_spellcheck = {};
28
+ let redirects = {};
29
+ const exclude_h1_count = {};
30
+ const exclude_spellcheck_output = [];
31
+
32
+ const excludeLink = async (url) => {
33
+ if (exclude_links[url]) return true;
34
+
35
+ for (let key in exclude_links) {
36
+ if (key.endsWith("*")) {
37
+ key = key.substring(0, key.length - 1);
38
+ if (url.startsWith(key)) return true;
39
+ }
40
+ }
41
+
42
+ return false;
43
+ };
44
+
45
+ const spellcheckContent = async (sourceFile, excludes) => {
46
+ const spelling_errors = {};
47
+ const words = [];
48
+ const text = fs.readFileSync(sourceFile.path, "utf8");
49
+ const source_path = sourceFile.relativePath.replace(
50
+ `.${sourceFile.extension}`,
51
+ "",
52
+ );
53
+ const translate_output = translator.translate(text, spellcheck_options);
54
+ if (Object.keys(translate_output).length) {
55
+ for (const key in translate_output) {
56
+ if (Object.hasOwn(translate_output, key)) {
57
+ const error_message = `Line ${key} - British spelling:`;
58
+ for (let i = 0; i < translate_output[key].length; i++) {
59
+ for (const spelling in translate_output[key][i]) {
60
+ if (
61
+ Object.hasOwn(translate_output[key][i], spelling) &&
62
+ typeof translate_output[key][i][spelling].details === "string"
63
+ ) {
64
+ if (!excludes[source_path]) {
65
+ errors[sourceFile.relativePath].push(
66
+ `${error_message} ${spelling} should be ${translate_output[key][i][spelling].details}`,
67
+ );
68
+ spelling_errors[spelling] = true;
69
+ } else if (
70
+ !excludes[source_path].includes(spelling.toLowerCase())
71
+ ) {
72
+ errors[sourceFile.relativePath].push(
73
+ `${error_message} ${spelling} should be ${translate_output[key][i][spelling].details}`,
74
+ );
75
+ spelling_errors[spelling] = true;
76
+ }
77
+ }
78
+ }
79
+ }
80
+ }
81
+ }
82
+ }
83
+ if (Object.keys(spelling_errors).length) {
84
+ const exclude_output = {
85
+ document_path: sourceFile.relativePath.replace(
86
+ path.extname(sourceFile.relativePath),
87
+ "",
88
+ ),
89
+ words: [],
90
+ };
91
+ for (const word in spelling_errors) {
92
+ if (Object.hasOwn(spelling_errors, word)) {
93
+ words.push(word);
94
+ exclude_output.words.push(word);
95
+ }
96
+ }
97
+ exclude_spellcheck_output.push(exclude_output);
98
+ }
99
+ return words;
100
+ };
101
+
102
+ const checkInline = async (source_path, inline, excludes) => {
103
+ const inline_errors = [];
104
+ for (let i = 0; i < inline.length; i++) {
105
+ const title = inline[i].title;
106
+ const link = inline[i].link;
107
+
108
+ // Validate link segment spellings
109
+ const paths = link.split("/");
110
+ for (let i = 0; i < paths.length; i++) {
111
+ const path_words = paths[i].split("-");
112
+ for (let j = 0; j < path_words.length; j++) {
113
+ const translate_output = translator.translate(
114
+ path_words[j],
115
+ spellcheck_options,
116
+ );
117
+ if (Object.keys(translate_output).length) {
118
+ for (const spell_val in translate_output) {
119
+ if (Object.hasOwn(translate_output, spell_val)) {
120
+ for (const spelling in translate_output[spell_val][0]) {
121
+ if (Object.hasOwn(translate_output[spell_val][0], spelling)) {
122
+ if (!excludes[link]) {
123
+ inline_errors.push(
124
+ `Inline Link [${link}] contains a British English spelling: ${spelling} should be ${translate_output[spell_val][0][spelling].details}`,
125
+ );
126
+ } else if (
127
+ !excludes[link].includes(spelling.toLowerCase())
128
+ ) {
129
+ inline_errors.push(
130
+ `Inline Link [${link}] contains a British English spelling: ${spelling} should be ${translate_output[spell_val][0][spelling].details}`,
131
+ );
132
+ }
133
+ }
134
+ }
135
+ }
136
+ }
137
+ }
138
+ }
139
+ }
140
+
141
+ // Validate display names
142
+ const translate_output = translator.translate(title, spellcheck_options);
143
+ if (Object.keys(translate_output).length) {
144
+ for (const spell_val in translate_output) {
145
+ if (Object.hasOwn(translate_output, spell_val)) {
146
+ for (let j = 0; j < translate_output[spell_val].length; j++) {
147
+ for (const spelling in translate_output[spell_val][j]) {
148
+ if (Object.hasOwn(translate_output[spell_val][j], spelling)) {
149
+ if (!excludes[link]) {
150
+ inline_errors.push(
151
+ `Inline title for link [${link}] contains a British English spelling: ${spelling} should be ${translate_output[spell_val][j][spelling].details}`,
152
+ );
153
+ } else if (!excludes[link].includes(spelling.toLowerCase())) {
154
+ inline_errors.push(
155
+ `Inline title for link [${link}] contains a British English spelling: ${spelling} should be ${translate_output[spell_val][j][spelling].details}`,
156
+ );
157
+ }
158
+ }
159
+ }
160
+ }
161
+ }
162
+ }
163
+ }
164
+
165
+ // Validate path exists - link should be a html file at this point as its after the content has been built
166
+ let file_exists = true;
167
+ let file_name = path.join(source_path, `${link}.html`);
168
+ if (!fs.existsSync(file_name)) {
169
+ file_name = path.join(source_path, `${link}.htm`);
170
+ if (!fs.existsSync(file_name)) {
171
+ file_name = path.join(source_path, link, "index.html");
172
+ if (!fs.existsSync(file_name)) {
173
+ file_name = path.join(source_path, link, "index.htm");
174
+ if (!fs.existsSync(file_name)) {
175
+ file_exists = false;
176
+ inline_errors.push(`Inline link [${link}] file does not exist.`);
177
+ }
178
+ }
179
+ }
180
+ }
181
+ }
182
+
183
+ return inline_errors;
184
+ };
185
+
186
+ const checkNavigation = async (source_path, flat_nav, excludes, draft_links) => {
187
+ const nav_errors = [];
188
+ for (const key in flat_nav) {
189
+ if (Object.hasOwn(flat_nav, key)) {
190
+ // doc paths should only contain a-z - characters
191
+ const invalid_chars = key.replace(regex_nav_paths, "");
192
+ if (invalid_chars !== "") {
193
+ nav_errors.push(
194
+ `Navigation path [${key}] contains the following invalid characters: [${[...invalid_chars].join("] [")}]`,
195
+ );
196
+ }
197
+ const key_split = key.split("#");
198
+ const key_no_hash = key_split[0];
199
+
200
+ // See if there's a redirect in place
201
+ let redirected = false;
202
+ let redirect_errored = false;
203
+ const redir = checkRedirect(source_path, key_no_hash);
204
+
205
+ if (redir.exists && redir.error !== null) {
206
+ nav_errors.push(redir.error);
207
+ redirect_errored = true;
208
+ } else if (redir.exists && redir.error === null) {
209
+ redirected = true;
210
+ }
211
+
212
+ // Validate path exists - key should be a html file at this point
213
+ let file_exists = true;
214
+ let file_name = path.join(source_path, `${key_no_hash}.html`);
215
+ if (!fs.existsSync(file_name)) {
216
+ file_name = path.join(source_path, `${key_no_hash}.htm`);
217
+ if (!fs.existsSync(file_name)) {
218
+ file_name = path.join(source_path, key_no_hash, "index.html");
219
+ if (!fs.existsSync(file_name)) {
220
+ file_name = path.join(source_path, key_no_hash, "index.htm");
221
+ if (!fs.existsSync(file_name)) {
222
+ file_exists = false;
223
+ if (!redirected && !redirect_errored && draft_links.indexOf(key_no_hash) === -1)
224
+ nav_errors.push(
225
+ `Navigation path [${key_no_hash}] file does not exist.`,
226
+ );
227
+ }
228
+ }
229
+ }
230
+ }
231
+
232
+ if (file_exists) {
233
+ // File exists - but is there a redirect? If so, we want to flag this as an error
234
+ if (redirected)
235
+ nav_errors.push(
236
+ `Navigation path [${key_no_hash}] is redirected, but path still exists.`,
237
+ );
238
+
239
+ // Check file path case match
240
+ const true_file = trueCasePathSync(file_name)
241
+ .replace(source_path, "")
242
+ .replaceAll("\\", "/");
243
+ const relative_file = file_name
244
+ .replace(source_path, "")
245
+ .replaceAll("\\", "/");
246
+ if (true_file !== relative_file) {
247
+ nav_errors.push(
248
+ `Navigation path [${key}] for filename [${relative_file}] does not match filename case [${true_file}].`,
249
+ );
250
+ }
251
+ }
252
+
253
+ // Validate path spellings
254
+ const paths = key.split("/");
255
+ for (let i = 0; i < paths.length; i++) {
256
+ const path_words = paths[i].split("-");
257
+ for (let j = 0; j < path_words.length; j++) {
258
+ const translate_output = translator.translate(
259
+ path_words[j],
260
+ spellcheck_options,
261
+ );
262
+ if (Object.keys(translate_output).length) {
263
+ for (const spell_val in translate_output) {
264
+ if (Object.hasOwn(translate_output, spell_val)) {
265
+ for (const spelling in translate_output[spell_val][0]) {
266
+ if (
267
+ Object.hasOwn(translate_output[spell_val][0], spelling)
268
+ ) {
269
+ if (!excludes[key]) {
270
+ nav_errors.push(
271
+ `Navigation path [${key}] key contains a British English spelling: ${spelling} should be ${translate_output[spell_val][0][spelling].details}`,
272
+ );
273
+ } else if (
274
+ !excludes[key].includes(spelling.toLowerCase())
275
+ ) {
276
+ nav_errors.push(
277
+ `Navigation path [${key}] key contains a British English spelling: ${spelling} should be ${translate_output[spell_val][0][spelling].details}`,
278
+ );
279
+ }
280
+ }
281
+ }
282
+ }
283
+ }
284
+ }
285
+ }
286
+ }
287
+
288
+ // Validate display names/bookmarks
289
+ for (let i = 0; i < flat_nav[key].length; i++) {
290
+ if (flat_nav[key][i].link === key) {
291
+ const translate_output = translator.translate(
292
+ flat_nav[key][i].text,
293
+ spellcheck_options,
294
+ );
295
+ if (Object.keys(translate_output).length) {
296
+ for (const spell_val in translate_output) {
297
+ if (Object.hasOwn(translate_output, spell_val)) {
298
+ for (let j = 0; j < translate_output[spell_val].length; j++) {
299
+ for (const spelling in translate_output[spell_val][j]) {
300
+ if (
301
+ Object.hasOwn(translate_output[spell_val][j], spelling)
302
+ ) {
303
+ if (!excludes[key]) {
304
+ nav_errors.push(
305
+ `Navigation path [${key}] display text contains a British English spelling: ${spelling} should be ${translate_output[spell_val][j][spelling].details}`,
306
+ );
307
+ } else if (
308
+ !excludes[key].includes(spelling.toLowerCase())
309
+ ) {
310
+ nav_errors.push(
311
+ `Navigation path [${key}] display text contains a British English spelling: ${spelling} should be ${translate_output[spell_val][j][spelling].details}`,
312
+ );
313
+ }
314
+ }
315
+ }
316
+ }
317
+ }
318
+ }
319
+ }
320
+ }
321
+ }
322
+ }
323
+ }
324
+ return nav_errors;
325
+ };
326
+
327
+ const checkRedirects = async (source_path) => {
328
+ const errors = [];
329
+ for (const key in redirects) {
330
+ if (Object.hasOwn(redirects, key)) {
331
+ if (
332
+ redirects[key].code !== 301 &&
333
+ redirects[key].code !== 308 &&
334
+ redirects[key].code !== 410
335
+ )
336
+ errors.push(`Invalid redirect code: ${redirects[key].code}`);
337
+
338
+ if (redirects[key].location) {
339
+ const redir_locations = [
340
+ path.join(source_path, `${redirects[key].location}.md`),
341
+ path.join(source_path, redirects[key].location, "index.md"),
342
+ path.join(source_path, `${redirects[key].location}.html`),
343
+ path.join(source_path, `${redirects[key].location}.htm`),
344
+ path.join(source_path, redirects[key].location, "index.html"),
345
+ path.join(source_path, redirects[key].location, "index.htm"),
346
+ ];
347
+ let redir_location_ok = false;
348
+ for (let i = 0; i < redir_locations.length; i++) {
349
+ if (fs.existsSync(redir_locations[i])) {
350
+ redir_location_ok = true;
351
+ break;
352
+ }
353
+ }
354
+ if (!redir_location_ok)
355
+ errors.push(
356
+ `Redirect location does not exist: ${redirects[key].location}`,
357
+ );
358
+ }
359
+ }
360
+ }
361
+ return errors;
362
+ };
363
+
364
+ const checkRedirect = (source_path, nav_path) => {
365
+ const response = {
366
+ exists: false,
367
+ error: null,
368
+ };
369
+ if (redirects[nav_path]) {
370
+ response.exists = true;
371
+ if (redirects[nav_path].location) {
372
+ // We have a redirect, check if it's a valid location
373
+ let file_path = path.join(
374
+ source_path,
375
+ `${redirects[nav_path].location}.html`,
376
+ );
377
+ if (!fs.existsSync(file_path)) {
378
+ file_path = path.join(
379
+ source_path,
380
+ `${redirects[nav_path].location}.htm`,
381
+ );
382
+ if (!fs.existsSync(file_path)) {
383
+ file_path = path.join(
384
+ source_path,
385
+ redirects[nav_path].location,
386
+ "index.html",
387
+ );
388
+ if (!fs.existsSync(file_path)) {
389
+ file_path = path.join(
390
+ source_path,
391
+ redirects[nav_path].location,
392
+ "index.htm",
393
+ );
394
+ if (!fs.existsSync(file_path)) {
395
+ response.error = `Redirect path for [${nav_path}] does not exist: ${redirects[nav_path].location}`;
396
+ }
397
+ }
398
+ }
399
+ }
400
+ }
401
+ }
402
+ return response;
403
+ };
404
+
405
+ const isHashAnchor = (html_file, hash_anchor, full_hash_anchor_link = "") => {
406
+ try {
407
+ const file_content = fs.readFileSync(html_file.path, {
408
+ encoding: "utf-8",
409
+ });
410
+ const clean_hash_anchor = hash_anchor.startsWith("/")
411
+ ? hash_anchor.substring(2, hash_anchor.length)
412
+ : hash_anchor.substring(1, hash_anchor.length);
413
+ if (
414
+ !file_content.includes(`<div id="hb-doc-anchor-${clean_hash_anchor}"`)
415
+ ) {
416
+ errors[html_file.relativePath].push(
417
+ `Target hash anchor is not present in page content: ${full_hash_anchor_link !== "" ? full_hash_anchor_link : hash_anchor}`,
418
+ );
419
+ }
420
+ } catch (e) {
421
+ errors[html_file.relativePath].push(e);
422
+ }
423
+ };
424
+
425
+ const checkLinks = async (source_path, htmlFile, links, hdocbook_config) => {
426
+ for (let i = 0; i < links.length; i++) {
427
+ // Validate that link is a valid URL first
428
+ const valid_url = hdoc.valid_url(links[i]);
429
+ if (!valid_url) {
430
+ // Could be a relative path, check
431
+ if (links[i].startsWith("/") && !links[i].startsWith("/#")) {
432
+ let link_root = links[i].split("/");
433
+ if (link_root[0] === "") link_root.shift();
434
+ link_root = link_root[0] === "_books" ? link_root[1] : link_root[0];
435
+
436
+ // Checking for internal links in other books - can't easily validate those here, returning
437
+ if (link_root !== hdocbook_config.docId) {
438
+ continue;
439
+ }
440
+
441
+ isRelativePath(source_path, htmlFile, links[i]);
442
+ } else if (links[i].startsWith("#") || links[i].startsWith("/#")) {
443
+ //Flat Anchor - validate we have a same-file hit
444
+ isHashAnchor(htmlFile, links[i]);
445
+ } else {
446
+ errors[htmlFile.relativePath].push(
447
+ `Root relative links should start with a forward-slash: ${links[i]}`,
448
+ );
449
+ }
450
+ } else {
451
+ messages[htmlFile.relativePath].push(
452
+ `Link is a properly formatted external URL: ${links[i]}`,
453
+ );
454
+
455
+ // Skip if it's the auto-generated edit url, as these could be part of a private repo which would return a 404
456
+ if (
457
+ hdocbook_config.publicSource !== undefined &&
458
+ links[i] ===
459
+ hdoc
460
+ .get_github_api_path(
461
+ hdocbook_config.publicSource,
462
+ htmlFile.relativePath,
463
+ )
464
+ .edit_path.replace(path.extname(htmlFile.relativePath), ".md")
465
+ ) {
466
+ continue;
467
+ }
468
+
469
+ if (valid_url.protocol === "mailto:") {
470
+ continue;
471
+ }
472
+
473
+ // Skip if the link is excluded in the project config
474
+ if (excludeLink(links[i])) {
475
+ messages[htmlFile.relativePath].push(
476
+ `Skipping link validation for: ${links[i]}`,
477
+ );
478
+ continue;
479
+ }
480
+
481
+ if (
482
+ links[i].toLowerCase().includes("docs.hornbill.com") ||
483
+ links[i].toLowerCase().includes("docs-internal.hornbill.com")
484
+ ) {
485
+ errors[htmlFile.relativePath].push(
486
+ `Links to Hornbill Docs should rooted and not fully-qualified: ${links[i]}`,
487
+ );
488
+ continue;
489
+ }
490
+
491
+ try {
492
+ await axios.get(links[i], {
493
+ httpsAgent: agent,
494
+ });
495
+ messages[htmlFile.relativePath].push(
496
+ `Link is a valid external URL: ${links[i]}`,
497
+ );
498
+ } catch (e) {
499
+ // Handle errors
500
+ errors[htmlFile.relativePath].push(
501
+ `Link is not responding: ${links[i]} - [${e.message}]`,
502
+ );
503
+ }
504
+ }
505
+ }
506
+ };
507
+
508
+ const checkImages = async (source_path, htmlFile, links) => {
509
+ for (let i = 0; i < links.length; i++) {
510
+ // Validate that image is a valid URL first
511
+ if (!hdoc.valid_url(links[i])) {
512
+ // Could be a relative path, check image exists
513
+ doesFileExist(source_path, htmlFile, links[i]);
514
+ } else {
515
+ messages[htmlFile.relativePath].push(
516
+ `Image link is a properly formatted external URL: ${links[i]}`,
517
+ );
518
+ // Do a Get to the URL to see if it exists
519
+ try {
520
+ const res = await axios.get(links[i]);
521
+ messages[htmlFile.relativePath].push(
522
+ `Image link is a valid external URL: ${links[i]}`,
523
+ );
524
+ } catch (e) {
525
+ // Handle errors
526
+ errors[htmlFile.relativePath].push(
527
+ `Unexpected Error from external image link: ${links[i]} - ${e.message}`,
528
+ );
529
+ }
530
+ }
531
+ }
532
+ };
533
+
534
+ const checkTags = async (htmlFile) => {
535
+ // Check if file is excluded from tag check
536
+ const file_no_ext = htmlFile.relativePath.replace(
537
+ path.extname(htmlFile.relativePath),
538
+ "",
539
+ );
540
+ if (exclude_h1_count[file_no_ext]) return;
541
+
542
+ // Check tags
543
+ const htmlBody = fs.readFileSync(htmlFile.path, "utf8");
544
+ const $ = cheerio.load(htmlBody);
545
+
546
+ const h1_tags = $("h1")
547
+ .map(function () {
548
+ return $(this);
549
+ })
550
+ .get();
551
+ if (h1_tags.length && h1_tags.length > 1) {
552
+ let error_msg = `${h1_tags.length} <h1> tags found in content: `;
553
+ for (let i = 0; i < h1_tags.length; i++) {
554
+ error_msg += h1_tags[i].text();
555
+ if (i < h1_tags.length - 1) error_msg += "; ";
556
+ }
557
+ errors[htmlFile.relativePath].push(error_msg);
558
+ }
559
+ };
560
+
561
+ const dreeOptions = {
562
+ descendants: true,
563
+ excludeEmptyDirectories: true,
564
+ extensions: ["htm", "html", "md"],
565
+ hash: false,
566
+ normalize: true,
567
+ size: false,
568
+ sizeInBytes: false,
569
+ stat: false,
570
+ symbolicLinks: false,
571
+ };
572
+
573
+ // File scan callback for content type files
574
+ const fileContentCallback = (element) => {
575
+ if (element.extension.toLowerCase() === "md") {
576
+ md_to_validate.push(element);
577
+ } else {
578
+ html_to_validate.push(element);
579
+ }
580
+ };
581
+
582
+ const isRelativePath = (source_path, html_path, relative_path) => {
583
+ const rel_path_ext = path.extname(relative_path);
584
+ const response = {
585
+ is_rel_path: false,
586
+ has_md_extension: rel_path_ext === ".md",
587
+ };
588
+ const supported_relpaths = [
589
+ `${path.sep}index.htm`,
590
+ `${path.sep}index.html`,
591
+ ".htm",
592
+ ".html",
593
+ ".md",
594
+ ];
595
+
596
+ // Remove explicit anchor links and _books prefix
597
+ const clean_relative_path = relative_path
598
+ .split("#")[0]
599
+ .replace("_books/", "");
600
+
601
+ let hash_anchor = null;
602
+ if (relative_path.split("#")[1])
603
+ hash_anchor = `#${relative_path.split("#")[1]}`;
604
+
605
+ // Make full file path
606
+ const file_path = path.join(source_path, clean_relative_path);
607
+
608
+ // Does path exist?
609
+ if (fs.existsSync(file_path)) {
610
+ response.is_rel_path = true;
611
+ } else {
612
+ // Path
613
+ for (let i = 0; i < supported_relpaths.length; i++) {
614
+ const html_file_path = `${file_path}${supported_relpaths[i]}`;
615
+ if (fs.existsSync(html_file_path)) {
616
+ response.is_rel_path = true;
617
+
618
+ // Check for hash anchor
619
+ if (hash_anchor !== null) {
620
+ isHashAnchor(
621
+ { path: html_file_path, relativePath: html_path.relativePath },
622
+ hash_anchor,
623
+ relative_path,
624
+ );
625
+ }
626
+ break;
627
+ }
628
+ }
629
+ }
630
+ if (response.has_md_extension) {
631
+ errors[html_path.relativePath].push(
632
+ `Relative link contains MD extension, but should not: ${clean_relative_path}`,
633
+ );
634
+ return;
635
+ }
636
+ if (response.is_rel_path) {
637
+ messages[html_path.relativePath].push(
638
+ `Relative path exists: ${clean_relative_path}`,
639
+ );
640
+ return;
641
+ }
642
+
643
+ // See if there's a redirect in place
644
+ const relpath =
645
+ clean_relative_path.indexOf("/") === 0
646
+ ? clean_relative_path.substring(1)
647
+ : clean_relative_path;
648
+ const redir = checkRedirect(source_path, relpath);
649
+ if (redir.exists) {
650
+ if (redir.error !== null)
651
+ errors[html_path.relativePath].push(redir.error);
652
+ } else {
653
+ errors[html_path.relativePath].push(
654
+ `Link path does not exist: ${clean_relative_path}`,
655
+ );
656
+ }
657
+ };
658
+
659
+ const doesFileExist = (source_path, html_path, relative_path) => {
660
+ // Remove explicit anchor links and _books prefix
661
+ const clean_relative_path = relative_path
662
+ .split("#")[0]
663
+ .replace("_books/", "");
664
+ const file_path = path.join(source_path, clean_relative_path);
665
+ if (
666
+ !fs.existsSync(file_path) &&
667
+ !fs.existsSync(`${file_path + path.sep}index.htm`) &&
668
+ !fs.existsSync(`${file_path}index.html`) &&
669
+ !fs.existsSync(`${file_path}.htm`) &&
670
+ !fs.existsSync(`${file_path}.html`)
671
+ ) {
672
+ errors[html_path.relativePath].push(
673
+ `Book resource does not exist: ${clean_relative_path}`,
674
+ );
675
+ return false;
676
+ }
677
+ messages[html_path.relativePath].push(
678
+ `Book resource exists: ${clean_relative_path}`,
679
+ );
680
+ return true;
681
+ };
682
+
683
+ // Takes a dree element, returns an object with a pair of arrays
684
+ const getLinks = (file) => {
685
+ messages[file.relativePath].push("Parsing HTML file");
686
+ const htmlBody = fs.readFileSync(file.path, "utf8");
687
+ const links = {
688
+ href: [],
689
+ img: [],
690
+ };
691
+ const $ = cheerio.load(htmlBody);
692
+ const hrefs = $("a")
693
+ .map(function (i) {
694
+ return $(this).attr("href");
695
+ })
696
+ .get();
697
+ const srcs = $("img")
698
+ .map(function (i) {
699
+ return $(this).attr("src");
700
+ })
701
+ .get();
702
+ links.href.push(...hrefs);
703
+ links.img.push(...srcs);
704
+ return links;
705
+ };
706
+
707
+ exports.run = async (
708
+ source_path,
709
+ doc_id,
710
+ verbose,
711
+ hdocbook_config,
712
+ hdocbook_project,
713
+ nav_items,
714
+ prod_families,
715
+ prods_supported,
716
+ gen_exclude,
717
+ gen_redirects,
718
+ draft_links,
719
+ ) => {
720
+ console.log("Performing Validation and Building SEO Link List...");
721
+ redirects = gen_redirects;
722
+
723
+ // Get a list of HTML files in source_path
724
+ dree.scan(source_path, dreeOptions, fileContentCallback);
725
+
726
+ // Check product family
727
+ let valid_product = false;
728
+ const meta_errors = [];
729
+ for (let i = 0; i < prod_families.products.length; i++) {
730
+ if (prod_families.products[i].id === hdocbook_config.productFamily) {
731
+ valid_product = true;
732
+ }
733
+ }
734
+ if (!valid_product) {
735
+ let val_prod_error = `Incorrect productFamily: ${hdocbook_config.productFamily}. Supported values:`;
736
+ for (let i = 0; i < prods_supported.length; i++) {
737
+ val_prod_error += `\n - ${prods_supported[i]}`;
738
+ }
739
+ meta_errors.push(val_prod_error);
740
+ }
741
+
742
+ if (hdocbook_config.publicSource && hdocbook_config.publicSource !== "") {
743
+ // Validate publicSource
744
+ if (hdocbook_config.publicSource.toLowerCase() === "--publicsource--") {
745
+ meta_errors.push(
746
+ "Value for publicSource in book metadata is set to its default template value",
747
+ );
748
+ } else {
749
+ // Check URL exists
750
+ if (
751
+ !hdocbook_config.publicSource.startsWith("https://github.com") &&
752
+ !hdocbook_config.publicSource.startsWith("https://api.github.com")
753
+ ) {
754
+ meta_errors.push(
755
+ `Value for publicSource in book metadata is not a recognised GitHub URL: ${hdocbook_config.publicSource}`,
756
+ );
757
+ }
758
+ }
759
+ }
760
+
761
+ if (
762
+ !hdocbook_config.audience ||
763
+ !Array.isArray(hdocbook_config.audience) ||
764
+ hdocbook_config.audience.length === 0
765
+ ) {
766
+ meta_errors.push(
767
+ "Property audience of type array in book metadata is mandatory.",
768
+ );
769
+ }
770
+ if (hdocbook_project.validation) {
771
+ if (
772
+ hdocbook_project.validation.exclude_links &&
773
+ Array.isArray(hdocbook_project.validation.exclude_links)
774
+ ) {
775
+ for (const excl_link of hdocbook_project.validation.exclude_links) {
776
+ exclude_links[excl_link] = true;
777
+ }
778
+ }
779
+ if (
780
+ hdocbook_project.validation.exclude_spellcheck &&
781
+ Array.isArray(hdocbook_project.validation.exclude_spellcheck)
782
+ ) {
783
+ for (const excl_sc of hdocbook_project.validation.exclude_spellcheck) {
784
+ exclude_spellcheck[excl_sc.document_path] = excl_sc.words;
785
+ }
786
+ }
787
+ if (
788
+ hdocbook_project.validation.exclude_h1_count &&
789
+ Array.isArray(hdocbook_project.validation.exclude_h1_count)
790
+ ) {
791
+ for (const excl_h1 of hdocbook_project.validation.exclude_h1_count) {
792
+ exclude_h1_count[excl_h1] = true;
793
+ }
794
+ }
795
+ }
796
+
797
+ // Check navigation spellings & paths exist
798
+ const nav_errors = await checkNavigation(
799
+ source_path,
800
+ nav_items,
801
+ exclude_spellcheck,
802
+ draft_links,
803
+ );
804
+ if (nav_errors.length > 0) meta_errors.push(...nav_errors);
805
+
806
+ // Check inline content spellings & paths exist
807
+ if (hdocbook_config.inline && hdocbook_config.inline.length > 0) {
808
+ const inline_errors = await checkInline(
809
+ source_path,
810
+ hdocbook_config.inline,
811
+ exclude_spellcheck,
812
+ );
813
+ if (inline_errors.length > 0) meta_errors.push(...inline_errors);
814
+ }
815
+
816
+ // Check redirects
817
+ const redirect_errors = await checkRedirects(source_path);
818
+ if (redirect_errors.length > 0) meta_errors.push(...redirect_errors);
819
+
820
+ if (meta_errors.length > 0) {
821
+ console.log("\r\n-----------------------");
822
+ console.log(" Validation Output ");
823
+ console.log("-----------------------");
824
+ for (let i = 0; i < meta_errors.length; i++) {
825
+ console.error(`- ${meta_errors[i]}`);
826
+ }
827
+ console.error(`\r\n${meta_errors.length} Validation Errors Found`);
828
+ return false;
829
+ }
830
+
831
+ const excl_output = [];
832
+
833
+ // Do spellchecking on markdown files
834
+ const md_files_spellchecked = {};
835
+ const mdPromiseArray = [];
836
+ for (let i = 0; i < md_to_validate.length; i++) {
837
+ errors[md_to_validate[i].relativePath] = [];
838
+ messages[md_to_validate[i].relativePath] = [];
839
+ warnings[md_to_validate[i].relativePath] = [];
840
+ mdPromiseArray.push(md_to_validate[i]);
841
+ }
842
+ await Promise.all(
843
+ mdPromiseArray.map(async (file) => {
844
+ // Initiate maps for errors and verbose messages for markdown file
845
+ const exclusions = await spellcheckContent(file, exclude_spellcheck);
846
+ if (gen_exclude && exclusions.length > 0)
847
+ excl_output.push({
848
+ document_path: file.relativePath.replace(`.${file.extension}`, ""),
849
+ words: exclusions,
850
+ });
851
+ md_files_spellchecked[
852
+ file.relativePath.replace(`.${file.extension}`, "")
853
+ ] = true;
854
+ }),
855
+ );
856
+
857
+ // Perform rest of validation against HTML files
858
+ let listContent = "";
859
+ const htmlPromiseArray = [];
860
+ for (let i = 0; i < html_to_validate.length; i++) {
861
+ errors[html_to_validate[i].relativePath] = [];
862
+ messages[html_to_validate[i].relativePath] = [];
863
+ warnings[html_to_validate[i].relativePath] = [];
864
+ htmlPromiseArray.push(html_to_validate[i]);
865
+ }
866
+ await Promise.all(
867
+ htmlPromiseArray.map(async (file) => {
868
+ // Check for British spellings in static HTML content
869
+ if (
870
+ !md_files_spellchecked[
871
+ file.relativePath.replace(`.${file.extension}`, "")
872
+ ]
873
+ ) {
874
+ const exclusions = await spellcheckContent(file, exclude_spellcheck);
875
+ if (gen_exclude && exclusions.length > 0)
876
+ excl_output.push({
877
+ document_path: file.relativePath.replace(
878
+ `.${file.extension}`,
879
+ "",
880
+ ),
881
+ words: exclusions,
882
+ });
883
+ }
884
+
885
+ const links = getLinks(file);
886
+ if (links.href.length === 0) {
887
+ messages[file.relativePath].push("No links found in file");
888
+ } else {
889
+ await checkLinks(source_path, file, links.href, hdocbook_config);
890
+ }
891
+ if (links.img.length === 0) {
892
+ messages[file.relativePath].push("No images found in file");
893
+ } else {
894
+ await checkImages(source_path, file, links.img);
895
+ }
896
+
897
+ // Check for multiple H1 tags
898
+ await checkTags(file);
899
+
900
+ // Build list content for Google
901
+ listContent += `/${file.relativePath.replace(path.extname(file.relativePath), "")}`;
902
+ listContent += "\r\n";
903
+ }),
904
+ );
905
+
906
+ if (gen_exclude) console.log(JSON.stringify(excl_output, null, 2));
907
+
908
+ try {
909
+ // Write list
910
+ const listFile = path.join(source_path, doc_id, "links.txt");
911
+ fs.writeFileSync(listFile, listContent);
912
+ console.log(`\r\nLink list text file created successfully: ${listFile}`);
913
+ } catch (err) {
914
+ console.error(err);
915
+ }
916
+
917
+ if (verbose) {
918
+ console.log("\r\n-------------");
919
+ console.log(" Verbose ");
920
+ console.log("-------------");
921
+ for (const key in messages) {
922
+ if (Object.hasOwn(messages, key) && messages[key].length > 0) {
923
+ console.log(`\r\nMessage output for ${key}`);
924
+ for (let i = 0; i < messages[key].length; i++) {
925
+ console.log(` - ${messages[key][i]}`);
926
+ }
927
+ }
928
+ }
929
+ }
930
+
931
+ console.log("\r\n-----------------------");
932
+ console.log(" Validation Output ");
933
+ console.log("-----------------------");
934
+ if (Object.keys(errors).length > 0) {
935
+ let error_count = 0;
936
+ for (const key in errors) {
937
+ if (Object.hasOwn(errors, key) && errors[key].length > 0) {
938
+ console.error(`\r\n${errors[key].length} error(s) in ${key}`);
939
+ for (let i = 0; i < errors[key].length; i++) {
940
+ console.error(` - ${errors[key][i]}`);
941
+ error_count++;
942
+ }
943
+ }
944
+ }
945
+ if (error_count > 0) {
946
+ console.error(`\r\n${error_count} Validation Errors Found`);
947
+ if (verbose) {
948
+ console.log("\n");
949
+ console.error(JSON.stringify(exclude_spellcheck_output, null, 2));
950
+ }
951
+ return false;
952
+ }
953
+ }
954
+
955
+ console.log("\r\nNo Validation Errors Found!\n");
956
+ return true;
957
+ };
958
+ })();