hdoc-tools 0.47.1 → 0.47.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -10
- package/hdoc-build-db.js +103 -116
- package/hdoc-build.js +43 -67
- package/hdoc-db.js +37 -15
- package/hdoc-module.js +81 -84
- package/hdoc-validate.js +84 -107
- package/npm-shrinkwrap.json +0 -41
- package/package.json +1 -2
package/README.md
CHANGED
|
@@ -35,7 +35,7 @@ Returns statistics regarding the book you are working on:
|
|
|
35
35
|
- Number of Markdown Files in the Book
|
|
36
36
|
- Number of Static HTML Files in the Book
|
|
37
37
|
|
|
38
|
-
If the
|
|
38
|
+
If the `-v` switch is provided, then more verbose output is output, which includes a list of each MD and HTML file found, the file sizes, and file-specific word count.
|
|
39
39
|
|
|
40
40
|
The book statistics do not include counts for any externally hosted content injected into the book content using the [[INCLUDE]] tags.
|
|
41
41
|
|
|
@@ -43,25 +43,25 @@ The book statistics do not include counts for any externally hosted content inje
|
|
|
43
43
|
|
|
44
44
|
Performs a local build of the book, validates the links and static content are present and correct and outputs as a ZIP file.
|
|
45
45
|
|
|
46
|
-
If the
|
|
46
|
+
If the `-v` switch is provided, then a more verbose output is provided, which includes a list of all HTML files created and checked for embedded links and static content.
|
|
47
47
|
|
|
48
|
-
Use the
|
|
48
|
+
Use the `--set-version` argument to set the version number of the built book.
|
|
49
49
|
|
|
50
|
-
Use the
|
|
50
|
+
Use the `--no-color` argument to remove any color control characters from the output.
|
|
51
51
|
|
|
52
|
-
Use the
|
|
52
|
+
Use the `--no-links` argument to skip link output to CLI during validation.
|
|
53
53
|
|
|
54
54
|
### validate
|
|
55
55
|
|
|
56
56
|
Performs a minimum local build of the book, then validates the links and static content are present and correct.
|
|
57
57
|
|
|
58
|
-
If the
|
|
58
|
+
If the `-v` switch is provided, then a more verbose output is provided, which includes a list of all HTML files created and checked for embedded links and static content.
|
|
59
59
|
|
|
60
|
-
Use the
|
|
60
|
+
Use the `--set-version` argument to set the version number of the built book.
|
|
61
61
|
|
|
62
|
-
Use the
|
|
62
|
+
Use the `--no-color` argument to remove any color control characters from the output.
|
|
63
63
|
|
|
64
|
-
Use the
|
|
64
|
+
Use the `--no-links` argument to skip link output to CLI during validation.
|
|
65
65
|
|
|
66
66
|
### serve
|
|
67
67
|
|
|
@@ -72,4 +72,4 @@ command `hdoc serve` and in a local browser go to the URL `http://localhost:3000
|
|
|
72
72
|
|
|
73
73
|
## Installation
|
|
74
74
|
|
|
75
|
-
|
|
75
|
+
> `npm install hdoc-tools -g`
|
package/hdoc-build-db.js
CHANGED
|
@@ -87,41 +87,43 @@
|
|
|
87
87
|
return response;
|
|
88
88
|
};
|
|
89
89
|
|
|
90
|
-
exports.populate_redirects = (db, redirect_records,
|
|
90
|
+
exports.populate_redirects = (db, redirect_records, _verbose = false) => {
|
|
91
91
|
const response = {
|
|
92
92
|
success: true,
|
|
93
93
|
errors: [],
|
|
94
94
|
index_success_count: 0,
|
|
95
95
|
};
|
|
96
96
|
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
response.success = false;
|
|
111
|
-
response.errors.push(
|
|
112
|
-
`Redirect record creation failed - ${redirect_records[i].url}: ${index_response.error}`,
|
|
113
|
-
);
|
|
114
|
-
} else {
|
|
115
|
-
response.index_success_count++;
|
|
97
|
+
// Prepare once, insert all in one transaction
|
|
98
|
+
const cols = db_schema.hdoc_redirects.map(c => c.replace(/\b(UNINDEXED|INTEGER)\b/g, "").trim());
|
|
99
|
+
const stmt = db.prepare(`INSERT INTO hdoc_redirects (${cols.join(", ")}) VALUES (${cols.map(() => "?").join(", ")})`);
|
|
100
|
+
|
|
101
|
+
const run_all = db.transaction(() => {
|
|
102
|
+
for (const record of redirect_records) {
|
|
103
|
+
try {
|
|
104
|
+
stmt.run(record.url, record.location ? record.location : "", record.code);
|
|
105
|
+
response.index_success_count++;
|
|
106
|
+
} catch (e) {
|
|
107
|
+
response.success = false;
|
|
108
|
+
response.errors.push(`Redirect record creation failed - ${record.url}: ${e}`);
|
|
109
|
+
}
|
|
116
110
|
}
|
|
111
|
+
});
|
|
112
|
+
|
|
113
|
+
try {
|
|
114
|
+
run_all();
|
|
115
|
+
} catch (e) {
|
|
116
|
+
response.success = false;
|
|
117
|
+
response.errors.push(`Redirect index transaction failed: ${e}`);
|
|
117
118
|
}
|
|
119
|
+
|
|
118
120
|
console.log(
|
|
119
121
|
`\nRedirect Index Build Complete: ${response.index_success_count} document records created.`,
|
|
120
122
|
);
|
|
121
123
|
return response;
|
|
122
124
|
};
|
|
123
125
|
|
|
124
|
-
exports.populate_index =
|
|
126
|
+
exports.populate_index = (
|
|
125
127
|
db,
|
|
126
128
|
doc_id,
|
|
127
129
|
book_config,
|
|
@@ -136,124 +138,109 @@
|
|
|
136
138
|
|
|
137
139
|
if (!book_config.tags) book_config.tags = [];
|
|
138
140
|
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
141
|
+
// Build a prepared statement from a schema entry once, reusing it for every row.
|
|
142
|
+
// Previously insert_record() called db.prepare() on every single insert.
|
|
143
|
+
const make_stmt = (table) => {
|
|
144
|
+
const cols = db_schema[table].map(c => c.replace(/\b(UNINDEXED|INTEGER)\b/g, "").trim());
|
|
145
|
+
return db.prepare(`INSERT INTO ${table} (${cols.join(", ")}) VALUES (${cols.map(() => "?").join(", ")})`);
|
|
146
|
+
};
|
|
147
|
+
const stmt_index = make_stmt("hdoc_index");
|
|
148
|
+
const stmt_meta = make_stmt("hdoc_meta");
|
|
149
|
+
const stmt_contrib = make_stmt("hdoc_contributors");
|
|
150
|
+
|
|
151
|
+
// A single transaction batches all disk flushes into one — critical for
|
|
152
|
+
// FTS5 which otherwise re-indexes on every individual insert.
|
|
153
|
+
const run_all = db.transaction(() => {
|
|
154
|
+
let curr_file = "";
|
|
155
|
+
for (const file of index_records) {
|
|
146
156
|
let index_path_name = file.relative_path.replaceAll("\\", "/");
|
|
147
157
|
if (
|
|
148
158
|
index_path_name.endsWith("/index.md") ||
|
|
149
159
|
index_path_name.endsWith("/index.html") ||
|
|
150
160
|
index_path_name.endsWith("/index.htm")
|
|
151
161
|
) {
|
|
152
|
-
index_path_name = index_path_name.substring(
|
|
153
|
-
0,
|
|
154
|
-
index_path_name.lastIndexOf("/"),
|
|
155
|
-
);
|
|
162
|
+
index_path_name = index_path_name.substring(0, index_path_name.lastIndexOf("/"));
|
|
156
163
|
}
|
|
157
164
|
index_path_name = `/${index_path_name.replace(path.extname(file.relative_path), "")}`;
|
|
158
165
|
|
|
159
|
-
let
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
if (file.index_html.id !== null)
|
|
165
|
-
index_content_path += `#${file.index_html.id}`;
|
|
166
|
+
let inserted_row_id = null;
|
|
167
|
+
const index_content_path = file.index_html.id !== null
|
|
168
|
+
? `${index_path_name}#${file.index_html.id}`
|
|
169
|
+
: index_path_name;
|
|
170
|
+
|
|
166
171
|
if (!file.inline) {
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
172
|
+
try {
|
|
173
|
+
const info = stmt_index.run(
|
|
174
|
+
index_content_path,
|
|
175
|
+
doc_id,
|
|
176
|
+
book_config.audience.join(","),
|
|
177
|
+
book_config.tags.join(","),
|
|
178
|
+
file.index_html.fm_props.title,
|
|
179
|
+
file.index_html.text,
|
|
180
|
+
file.index_html.preview,
|
|
181
|
+
book_config.productFamily,
|
|
182
|
+
file.md5,
|
|
183
|
+
file.lastmod,
|
|
184
|
+
file.status,
|
|
185
|
+
file.keywords,
|
|
186
|
+
);
|
|
187
|
+
inserted_row_id = info.lastInsertRowid;
|
|
188
|
+
} catch (e) {
|
|
189
|
+
console.error(`Index record creation failed - ${doc_id}/${file.index_html.fm_props.title}: ${e}`);
|
|
190
|
+
continue;
|
|
191
|
+
}
|
|
187
192
|
}
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
curr_file = index_path_name;
|
|
195
|
-
// Now add metadata
|
|
196
|
-
const meta_vals = [
|
|
193
|
+
|
|
194
|
+
if (curr_file === index_path_name) continue;
|
|
195
|
+
curr_file = index_path_name;
|
|
196
|
+
|
|
197
|
+
try {
|
|
198
|
+
const meta_info = stmt_meta.run(
|
|
197
199
|
index_path_name,
|
|
198
200
|
doc_id,
|
|
199
201
|
file.metadata.contributor_count,
|
|
200
202
|
file.metadata.edit_url,
|
|
201
203
|
file.metadata.last_commit,
|
|
202
204
|
file.pdf_size,
|
|
203
|
-
];
|
|
204
|
-
const meta_response = await hdoc_index.insert_record(
|
|
205
|
-
db,
|
|
206
|
-
"hdoc_meta",
|
|
207
|
-
db_schema.hdoc_meta,
|
|
208
|
-
meta_vals,
|
|
209
205
|
);
|
|
210
|
-
if (
|
|
211
|
-
console.
|
|
212
|
-
|
|
206
|
+
if (verbose) {
|
|
207
|
+
console.log(`Inserted index record ${inserted_row_id}: ${doc_id} - ${file.index_html.fm_props.title}`);
|
|
208
|
+
console.log(`Inserted index metadata record for index ID: ${meta_info.lastInsertRowid}`);
|
|
209
|
+
}
|
|
210
|
+
} catch (e) {
|
|
211
|
+
console.error(`Index metadata record creation failed - ${doc_id}/${inserted_row_id}/${file.index_html.fm_props.title}: ${e}`);
|
|
212
|
+
continue;
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
for (const contrib of file.contributors) {
|
|
216
|
+
try {
|
|
217
|
+
const cont_info = stmt_contrib.run(
|
|
218
|
+
index_path_name,
|
|
219
|
+
doc_id,
|
|
220
|
+
contrib.login,
|
|
221
|
+
contrib.name,
|
|
222
|
+
contrib.avatar_url,
|
|
223
|
+
contrib.html_url,
|
|
213
224
|
);
|
|
214
|
-
} else {
|
|
215
225
|
if (verbose) {
|
|
216
|
-
console.log(
|
|
217
|
-
`Inserted index record ${index_response.row_id}: ${doc_id} - ${file.index_html.fm_props.title}`,
|
|
218
|
-
);
|
|
219
|
-
console.log(
|
|
220
|
-
`Inserted index metadata record for index ID: ${meta_response.row_id}`,
|
|
221
|
-
);
|
|
222
|
-
}
|
|
223
|
-
|
|
224
|
-
// Now add contributor records
|
|
225
|
-
for (let j = 0; j < file.contributors.length; j++) {
|
|
226
|
-
const contrib_vals = [
|
|
227
|
-
index_path_name,
|
|
228
|
-
doc_id,
|
|
229
|
-
file.contributors[j].login,
|
|
230
|
-
file.contributors[j].name,
|
|
231
|
-
file.contributors[j].avatar_url,
|
|
232
|
-
file.contributors[j].html_url,
|
|
233
|
-
];
|
|
234
|
-
const cont_response = await hdoc_index.insert_record(
|
|
235
|
-
db,
|
|
236
|
-
"hdoc_contributors",
|
|
237
|
-
db_schema.hdoc_contributors,
|
|
238
|
-
contrib_vals,
|
|
239
|
-
);
|
|
240
|
-
if (!cont_response.success) {
|
|
241
|
-
console.error(
|
|
242
|
-
`Index document contributor record creation failed - ${doc_id}/${index_response.row_id}/${file.index_html.fm_props.title}: ${cont_response.error}`,
|
|
243
|
-
);
|
|
244
|
-
continue;
|
|
245
|
-
}
|
|
246
|
-
if (verbose) {
|
|
247
|
-
console.log(
|
|
248
|
-
`Inserted document contributor record ${cont_response.row_id}`,
|
|
249
|
-
);
|
|
250
|
-
}
|
|
226
|
+
console.log(`Inserted document contributor record ${cont_info.lastInsertRowid}`);
|
|
251
227
|
}
|
|
252
|
-
|
|
228
|
+
} catch (e) {
|
|
229
|
+
console.error(`Index document contributor record creation failed - ${doc_id}/${inserted_row_id}/${file.index_html.fm_props.title}: ${e}`);
|
|
253
230
|
}
|
|
254
231
|
}
|
|
255
|
-
|
|
256
|
-
|
|
232
|
+
|
|
233
|
+
response.index_success_count++;
|
|
234
|
+
}
|
|
235
|
+
});
|
|
236
|
+
|
|
237
|
+
try {
|
|
238
|
+
run_all();
|
|
239
|
+
} catch (e) {
|
|
240
|
+
response.error = e.message;
|
|
241
|
+
console.error(`Index build transaction failed: ${e}`);
|
|
242
|
+
return response;
|
|
243
|
+
}
|
|
257
244
|
|
|
258
245
|
response.success = true;
|
|
259
246
|
console.log(
|
package/hdoc-build.js
CHANGED
|
@@ -252,6 +252,11 @@
|
|
|
252
252
|
// Render markdown into HTML
|
|
253
253
|
html_txt = md.render(md_txt);
|
|
254
254
|
|
|
255
|
+
// Single pass: wrap h2/h3 divs + extract heading, paragraph, read-time.
|
|
256
|
+
// Replaces separate wrapHContent + getFirstHTMLHeading + get_html_read_time calls.
|
|
257
|
+
const extracted = hdoc.wrapAndExtract(html_txt, h_tags_to_search);
|
|
258
|
+
html_txt = extracted.html;
|
|
259
|
+
|
|
255
260
|
// Parse frontmatter properties from the YAML block
|
|
256
261
|
let fm_contains_title = false;
|
|
257
262
|
let fm_contains_reading_time = false;
|
|
@@ -297,17 +302,12 @@
|
|
|
297
302
|
|
|
298
303
|
// Title from heading if not in frontmatter
|
|
299
304
|
if (!fm_contains_title) {
|
|
300
|
-
|
|
301
|
-
html_txt,
|
|
302
|
-
h_tags_to_search,
|
|
303
|
-
);
|
|
304
|
-
|
|
305
|
-
if (html_heading?.[0]?.children?.[0]?.data) {
|
|
305
|
+
if (extracted.firstHeadingText) {
|
|
306
306
|
fm_headers.push({
|
|
307
307
|
id: "title",
|
|
308
|
-
value:
|
|
308
|
+
value: extracted.firstHeadingText,
|
|
309
309
|
});
|
|
310
|
-
doc_title =
|
|
310
|
+
doc_title = extracted.firstHeadingText;
|
|
311
311
|
} else if (
|
|
312
312
|
file_path.name !== "description_ext.md" &&
|
|
313
313
|
file_path.name !== "article_ext.md" &&
|
|
@@ -320,24 +320,19 @@
|
|
|
320
320
|
}
|
|
321
321
|
|
|
322
322
|
// Description from first paragraph if not in frontmatter
|
|
323
|
-
if (!fm_contains_description) {
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
value:
|
|
329
|
-
`${doc_title}: ${html_p_tag[0].children[0].data.split(".")[0]}.`.trim(),
|
|
330
|
-
});
|
|
331
|
-
}
|
|
323
|
+
if (!fm_contains_description && extracted.firstParagraphText) {
|
|
324
|
+
fm_headers.push({
|
|
325
|
+
id: "description",
|
|
326
|
+
value: `${doc_title}: ${extracted.firstParagraphText.split(".")[0]}.`.trim(),
|
|
327
|
+
});
|
|
332
328
|
}
|
|
333
329
|
|
|
334
330
|
// Reading time from content if not in frontmatter
|
|
335
331
|
if (!fm_contains_reading_time) {
|
|
336
|
-
|
|
337
|
-
book_read_time += read_time_mins;
|
|
332
|
+
book_read_time += extracted.readTimeMins;
|
|
338
333
|
fm_headers.push({
|
|
339
334
|
id: "reading-time",
|
|
340
|
-
value:
|
|
335
|
+
value: extracted.readTimeMins,
|
|
341
336
|
});
|
|
342
337
|
}
|
|
343
338
|
} else {
|
|
@@ -348,6 +343,12 @@
|
|
|
348
343
|
// Check if we have a frontmatter comment
|
|
349
344
|
html_fm = hdoc.getHTMLFrontmatterHeader(html_txt);
|
|
350
345
|
|
|
346
|
+
// Single pass: wrap h2/h3 divs + extract heading, paragraph, read-time.
|
|
347
|
+
// Must run after getHTMLFrontmatterHeader (which reads the top-level comment)
|
|
348
|
+
// but before any per-field extraction; the resulting html replaces html_txt.
|
|
349
|
+
const extracted = hdoc.wrapAndExtract(html_txt, h_tags_to_search);
|
|
350
|
+
html_txt = extracted.html;
|
|
351
|
+
|
|
351
352
|
if (Object.keys(html_fm.fm_properties).length > 0) {
|
|
352
353
|
existing_fm_headers = true;
|
|
353
354
|
|
|
@@ -374,9 +375,8 @@
|
|
|
374
375
|
|
|
375
376
|
// Is reading-time in the fm headers?
|
|
376
377
|
if (html_fm.fm_properties["reading-time"] === undefined) {
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
html_fm.fm_properties["reading-time"] = read_time_mins;
|
|
378
|
+
book_read_time += extracted.readTimeMins;
|
|
379
|
+
html_fm.fm_properties["reading-time"] = extracted.readTimeMins;
|
|
380
380
|
}
|
|
381
381
|
|
|
382
382
|
for (const key in html_fm.fm_properties) {
|
|
@@ -397,21 +397,13 @@
|
|
|
397
397
|
file_path.name !== "article_ext.md" &&
|
|
398
398
|
file_path.name !== "internal_ext.md"
|
|
399
399
|
) {
|
|
400
|
-
|
|
401
|
-
const html_heading = hdoc.getFirstHTMLHeading(
|
|
402
|
-
html_txt,
|
|
403
|
-
h_tags_to_search,
|
|
404
|
-
);
|
|
405
|
-
|
|
406
|
-
if (html_heading?.[0]?.children?.[0]?.data) {
|
|
407
|
-
// We've found a heading tag, add that as a title to the existing frontmatter properties
|
|
400
|
+
if (extracted.firstHeadingText) {
|
|
408
401
|
fm_headers.push({
|
|
409
402
|
id: "title",
|
|
410
|
-
value:
|
|
403
|
+
value: extracted.firstHeadingText,
|
|
411
404
|
});
|
|
412
|
-
doc_title =
|
|
405
|
+
doc_title = extracted.firstHeadingText;
|
|
413
406
|
} else {
|
|
414
|
-
// No header tag, no frontmatter title, output a warning
|
|
415
407
|
console.info(
|
|
416
408
|
`[WARNING] No frontmatter title property, or ${h_tags_to_search.join(
|
|
417
409
|
", ",
|
|
@@ -426,12 +418,10 @@
|
|
|
426
418
|
html_fm.fm_properties.description !== undefined
|
|
427
419
|
) {
|
|
428
420
|
if (html_fm.fm_properties.description === "") {
|
|
429
|
-
|
|
430
|
-
if (html_p_tag?.[0]?.children?.[0]?.data) {
|
|
421
|
+
if (extracted.firstParagraphText) {
|
|
431
422
|
fm_headers.push({
|
|
432
423
|
id: "description",
|
|
433
|
-
value:
|
|
434
|
-
`${doc_title}: ${html_p_tag[0].children[0].data.split(".")[0]}.`.trim(),
|
|
424
|
+
value: `${doc_title}: ${extracted.firstParagraphText.split(".")[0]}.`.trim(),
|
|
435
425
|
});
|
|
436
426
|
}
|
|
437
427
|
} else {
|
|
@@ -440,30 +430,22 @@
|
|
|
440
430
|
value: html_fm.fm_properties.description.trim(),
|
|
441
431
|
});
|
|
442
432
|
}
|
|
443
|
-
} else {
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
value:
|
|
449
|
-
`${doc_title}: ${html_p_tag[0].children[0].data.split(".")[0]}.`.trim(),
|
|
450
|
-
});
|
|
451
|
-
}
|
|
433
|
+
} else if (extracted.firstParagraphText) {
|
|
434
|
+
fm_headers.push({
|
|
435
|
+
id: "description",
|
|
436
|
+
value: `${doc_title}: ${extracted.firstParagraphText.split(".")[0]}.`.trim(),
|
|
437
|
+
});
|
|
452
438
|
}
|
|
453
439
|
} else {
|
|
454
440
|
// We have no frontmatter headers, get and build one from the html headings
|
|
455
|
-
const html_heading = hdoc.getFirstHTMLHeading(
|
|
456
|
-
html_txt,
|
|
457
|
-
h_tags_to_search,
|
|
458
|
-
);
|
|
459
441
|
let doc_title_local = "";
|
|
460
442
|
// Add the title
|
|
461
|
-
if (
|
|
443
|
+
if (extracted.firstHeadingText) {
|
|
462
444
|
fm_headers.push({
|
|
463
445
|
id: "title",
|
|
464
|
-
value:
|
|
446
|
+
value: extracted.firstHeadingText,
|
|
465
447
|
});
|
|
466
|
-
doc_title_local =
|
|
448
|
+
doc_title_local = extracted.firstHeadingText;
|
|
467
449
|
doc_title = doc_title_local;
|
|
468
450
|
} else if (
|
|
469
451
|
file_path.name !== "description_ext.md" &&
|
|
@@ -478,19 +460,16 @@
|
|
|
478
460
|
}
|
|
479
461
|
|
|
480
462
|
// Add the reading time
|
|
481
|
-
|
|
482
|
-
book_read_time += read_time_mins;
|
|
463
|
+
book_read_time += extracted.readTimeMins;
|
|
483
464
|
fm_headers.push({
|
|
484
465
|
id: "reading-time",
|
|
485
|
-
value:
|
|
466
|
+
value: extracted.readTimeMins,
|
|
486
467
|
});
|
|
487
468
|
|
|
488
|
-
|
|
489
|
-
if (html_p_tag?.[0]?.children?.[0]?.data) {
|
|
469
|
+
if (extracted.firstParagraphText) {
|
|
490
470
|
fm_headers.push({
|
|
491
471
|
id: "description",
|
|
492
|
-
value:
|
|
493
|
-
`${doc_title_local}: ${html_p_tag[0].children[0].data.split(".")[0]}.`.trim(),
|
|
472
|
+
value: `${doc_title_local}: ${extracted.firstParagraphText.split(".")[0]}.`.trim(),
|
|
494
473
|
});
|
|
495
474
|
}
|
|
496
475
|
}
|
|
@@ -675,9 +654,6 @@
|
|
|
675
654
|
}
|
|
676
655
|
if (pdf_size > 0) pdf_created++;
|
|
677
656
|
|
|
678
|
-
// Wrap h2 and h3 tags, plus content, in id'd divs
|
|
679
|
-
html_txt = hdoc.wrapHContent(html_txt);
|
|
680
|
-
|
|
681
657
|
if (inline_content) html_txt = `${fm_header_str}\n${html_txt}`;
|
|
682
658
|
else html_txt = `${fm_header_str}\n${doc_header}\n${html_txt}`;
|
|
683
659
|
|
|
@@ -732,7 +708,7 @@
|
|
|
732
708
|
|
|
733
709
|
const tidy_code_tags = (markdown, file) => {
|
|
734
710
|
let clean_markdown = markdown;
|
|
735
|
-
const json_to_tidy = clean_markdown.match(/```json[\r\n]
|
|
711
|
+
const json_to_tidy = clean_markdown.match(/```json[\r\n][\s\S]*?```/g);
|
|
736
712
|
if (json_to_tidy && json_to_tidy.length > 0) {
|
|
737
713
|
for (let i = 0; i < json_to_tidy.length; i++) {
|
|
738
714
|
if (json_to_tidy[i] !== "") {
|
|
@@ -754,7 +730,7 @@
|
|
|
754
730
|
}
|
|
755
731
|
}
|
|
756
732
|
|
|
757
|
-
const xml_to_tidy = clean_markdown.match(/```xml[\r\n]
|
|
733
|
+
const xml_to_tidy = clean_markdown.match(/```xml[\r\n][\s\S]*?```/g);
|
|
758
734
|
if (xml_to_tidy && xml_to_tidy.length > 0) {
|
|
759
735
|
for (let i = 0; i < xml_to_tidy.length; i++) {
|
|
760
736
|
if (xml_to_tidy[i] !== "") {
|
|
@@ -1357,7 +1333,7 @@
|
|
|
1357
1333
|
process.exit(1);
|
|
1358
1334
|
}
|
|
1359
1335
|
// Populate primary index tables
|
|
1360
|
-
const index =
|
|
1336
|
+
const index = hdoc_build_db.populate_index(
|
|
1361
1337
|
db.db,
|
|
1362
1338
|
doc_id,
|
|
1363
1339
|
hdocbook_config,
|
package/hdoc-db.js
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
(() => {
|
|
2
|
+
const cheerio = require("cheerio");
|
|
2
3
|
const path = require("node:path");
|
|
3
4
|
const hdoc = require(path.join(__dirname, "hdoc-module.js"));
|
|
4
5
|
|
|
@@ -63,23 +64,44 @@
|
|
|
63
64
|
sections: [],
|
|
64
65
|
};
|
|
65
66
|
|
|
66
|
-
//
|
|
67
|
-
|
|
68
|
-
|
|
67
|
+
// Single parse covers frontmatter extraction, full-text, and preview —
|
|
68
|
+
// previously three separate cheerio.load() calls.
|
|
69
|
+
const $ = cheerio.load(html_txt, { decodeEntities: false });
|
|
69
70
|
|
|
70
|
-
//
|
|
71
|
-
|
|
71
|
+
// Extract frontmatter properties from the leading HTML comment
|
|
72
|
+
if ($._root?.children && Array.isArray($._root.children)) {
|
|
73
|
+
for (const child of $._root.children) {
|
|
74
|
+
if (child.type === "comment" && child.data?.startsWith("[[FRONTMATTER")) {
|
|
75
|
+
for (const line of child.data.split(/\r?\n/)) {
|
|
76
|
+
if (line.includes(":")) {
|
|
77
|
+
const parts = line.split(/:(.*)/s);
|
|
78
|
+
if (parts.length > 1) {
|
|
79
|
+
const key = parts[0].trim().toLowerCase();
|
|
80
|
+
let val = parts[1].trim();
|
|
81
|
+
if (/^".*"$/.test(val)) val = val.slice(1, -1);
|
|
82
|
+
if (key === "title") {
|
|
83
|
+
val = val.replace(
|
|
84
|
+
/&|<|>|"|'|'|&#(\d+);|&#x([0-9a-fA-F]+);/g,
|
|
85
|
+
(m, dec, hex) => dec ? String.fromCharCode(+dec) : hex ? String.fromCharCode(parseInt(hex, 16)) : ({ "&": "&", "<": "<", ">": ">", """: '"', "'": "'", "'": "'" })[m],
|
|
86
|
+
);
|
|
87
|
+
}
|
|
88
|
+
response.fm_props[key] = val;
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
break;
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
// Full-document plain text for search indexing
|
|
98
|
+
const text = $("body").text();
|
|
99
|
+
|
|
100
|
+
// Preview: first paragraph texts joined, then truncated
|
|
101
|
+
let preview = $("p").map((_i, el) => $(el).text()).get().join("\n");
|
|
102
|
+
preview = hdoc.truncate_string(preview, 200, true).replace(/(?:\r\n|\r|\n)/g, " ");
|
|
72
103
|
|
|
73
|
-
|
|
74
|
-
let preview = hdoc.html_to_text(html_txt, { baseElement: "p" });
|
|
75
|
-
preview = hdoc
|
|
76
|
-
.truncate_string(preview, 200, true)
|
|
77
|
-
.replace(/(?:\r\n|\r|\n)/g, " ");
|
|
78
|
-
response.sections.push({
|
|
79
|
-
text: response.text,
|
|
80
|
-
preview: preview,
|
|
81
|
-
});
|
|
82
|
-
//}
|
|
104
|
+
response.sections.push({ text, preview });
|
|
83
105
|
return response;
|
|
84
106
|
};
|
|
85
107
|
})();
|
package/hdoc-module.js
CHANGED
|
@@ -3,7 +3,6 @@
|
|
|
3
3
|
const crypto = require("node:crypto");
|
|
4
4
|
const fs = require("node:fs");
|
|
5
5
|
const os = require("node:os");
|
|
6
|
-
const { JSDOM } = require("jsdom");
|
|
7
6
|
const path = require("node:path");
|
|
8
7
|
|
|
9
8
|
const includesCache = {};
|
|
@@ -221,15 +220,9 @@
|
|
|
221
220
|
// Looks for h1 tags first, then hX, hY, hZ in order
|
|
222
221
|
exports.getFirstHTMLHeading = (html_body, h_to_search = ["h1"]) => {
|
|
223
222
|
const $ = cheerio.load(html_body);
|
|
224
|
-
for (
|
|
225
|
-
const
|
|
226
|
-
|
|
227
|
-
return $(this);
|
|
228
|
-
})
|
|
229
|
-
.get();
|
|
230
|
-
if (heading.length > 0) {
|
|
231
|
-
return heading[0];
|
|
232
|
-
}
|
|
223
|
+
for (const tag of h_to_search) {
|
|
224
|
+
const el = $(tag).first();
|
|
225
|
+
if (el.length > 0) return el;
|
|
233
226
|
}
|
|
234
227
|
return false;
|
|
235
228
|
};
|
|
@@ -245,82 +238,89 @@
|
|
|
245
238
|
|
|
246
239
|
// Processes HTML, wraps h2 and h3 tags and their content in divs with an id matching that of the h text
|
|
247
240
|
exports.wrapHContent = (htmlContent) => {
|
|
248
|
-
const
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
if (
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
currentH2Div.id = makeAnchorIdFriendly(node.textContent.trim());
|
|
270
|
-
currentH2Div.appendChild(node);
|
|
271
|
-
} else if (node.tagName.toLowerCase() === "h3") {
|
|
272
|
-
// When an <h3> is found, close the current <h3> div (if any) and start a new one
|
|
273
|
-
if (currentH3Div) {
|
|
274
|
-
if (currentH2Div) {
|
|
275
|
-
currentH2Div.appendChild(currentH3Div);
|
|
276
|
-
} else {
|
|
277
|
-
newContent.appendChild(currentH3Div);
|
|
278
|
-
}
|
|
279
|
-
}
|
|
280
|
-
currentH3Div = document.createElement("div");
|
|
281
|
-
currentH3Div.id = makeAnchorIdFriendly(node.textContent.trim());
|
|
282
|
-
currentH3Div.appendChild(node);
|
|
283
|
-
} else {
|
|
284
|
-
if (currentH3Div) {
|
|
285
|
-
currentH3Div.appendChild(node);
|
|
286
|
-
} else if (currentH2Div) {
|
|
287
|
-
currentH2Div.appendChild(node);
|
|
288
|
-
} else {
|
|
289
|
-
newContent.appendChild(node);
|
|
290
|
-
}
|
|
291
|
-
}
|
|
241
|
+
const $ = cheerio.load(htmlContent, { decodeEntities: false });
|
|
242
|
+
let result = '';
|
|
243
|
+
let inH2 = false;
|
|
244
|
+
let inH3 = false;
|
|
245
|
+
|
|
246
|
+
$('body').contents().each(function() {
|
|
247
|
+
const tagName = this.type === 'tag' ? this.name?.toLowerCase() : null;
|
|
248
|
+
|
|
249
|
+
if (tagName === 'h2') {
|
|
250
|
+
// Close open h3 (nested inside h2), then close h2
|
|
251
|
+
if (inH3) { result += '</div>'; inH3 = false; }
|
|
252
|
+
if (inH2) { result += '</div>'; inH2 = false; }
|
|
253
|
+
const anchorId = makeAnchorIdFriendly($(this).text().trim());
|
|
254
|
+
result += `<div id="${anchorId}">${$.html(this)}`;
|
|
255
|
+
inH2 = true;
|
|
256
|
+
} else if (tagName === 'h3') {
|
|
257
|
+
// Close previous h3 (it stays nested inside any open h2)
|
|
258
|
+
if (inH3) { result += '</div>'; inH3 = false; }
|
|
259
|
+
const anchorId = makeAnchorIdFriendly($(this).text().trim());
|
|
260
|
+
result += `<div id="${anchorId}">${$.html(this)}`;
|
|
261
|
+
inH3 = true;
|
|
292
262
|
} else {
|
|
293
|
-
|
|
294
|
-
currentH3Div.appendChild(node);
|
|
295
|
-
} else if (currentH2Div) {
|
|
296
|
-
currentH2Div.appendChild(node);
|
|
297
|
-
} else {
|
|
298
|
-
newContent.appendChild(node);
|
|
299
|
-
}
|
|
263
|
+
result += $.html(this);
|
|
300
264
|
}
|
|
301
|
-
}
|
|
265
|
+
});
|
|
266
|
+
|
|
267
|
+
// Flush remaining open divs — h3 is nested inside h2 so close inner first
|
|
268
|
+
if (inH3) result += '</div>';
|
|
269
|
+
if (inH2) result += '</div>';
|
|
270
|
+
|
|
271
|
+
return `<html><head></head><body>${result}</body></html>`;
|
|
272
|
+
};
|
|
302
273
|
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
274
|
+
// Combined single-pass version of wrapHContent + getFirstHTMLHeading + get_html_read_time.
|
|
275
|
+
// Iterates body contents once to wrap h2/h3 divs AND extract the first matching heading text,
|
|
276
|
+
// first paragraph text, and reading-time estimate — avoiding 3 extra cheerio.load() calls.
|
|
277
|
+
exports.wrapAndExtract = (htmlContent, h_tags_to_search = ["h1"]) => {
|
|
278
|
+
const $ = cheerio.load(htmlContent, { decodeEntities: false });
|
|
279
|
+
let result = '';
|
|
280
|
+
let inH2 = false;
|
|
281
|
+
let inH3 = false;
|
|
282
|
+
let firstHeadingText = null;
|
|
283
|
+
let firstParagraphText = null;
|
|
284
|
+
|
|
285
|
+
$('body').contents().each(function() {
|
|
286
|
+
const tagName = this.type === 'tag' ? this.name?.toLowerCase() : null;
|
|
287
|
+
const text = tagName ? $(this).text().trim() : null;
|
|
288
|
+
|
|
289
|
+
if (firstHeadingText === null && tagName && h_tags_to_search.includes(tagName)) {
|
|
290
|
+
firstHeadingText = text;
|
|
291
|
+
}
|
|
292
|
+
if (firstParagraphText === null && tagName === 'p') {
|
|
293
|
+
firstParagraphText = text;
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
if (tagName === 'h2') {
|
|
297
|
+
if (inH3) { result += '</div>'; inH3 = false; }
|
|
298
|
+
if (inH2) { result += '</div>'; inH2 = false; }
|
|
299
|
+
result += `<div id="${makeAnchorIdFriendly(text)}">${$.html(this)}`;
|
|
300
|
+
inH2 = true;
|
|
301
|
+
} else if (tagName === 'h3') {
|
|
302
|
+
if (inH3) { result += '</div>'; inH3 = false; }
|
|
303
|
+
result += `<div id="${makeAnchorIdFriendly(text)}">${$.html(this)}`;
|
|
304
|
+
inH3 = true;
|
|
307
305
|
} else {
|
|
308
|
-
|
|
306
|
+
result += $.html(this);
|
|
309
307
|
}
|
|
310
|
-
}
|
|
308
|
+
});
|
|
311
309
|
|
|
312
|
-
|
|
313
|
-
if (
|
|
314
|
-
newContent.appendChild(currentH2Div);
|
|
315
|
-
}
|
|
310
|
+
if (inH3) result += '</div>';
|
|
311
|
+
if (inH2) result += '</div>';
|
|
316
312
|
|
|
317
|
-
//
|
|
318
|
-
|
|
319
|
-
|
|
313
|
+
// Word count re-uses the already-parsed DOM — no extra cheerio.load()
|
|
314
|
+
const bodyText = $("body").text();
|
|
315
|
+
const wordCount = bodyText.trim().split(/\s+/).filter(Boolean).length;
|
|
316
|
+
const readTimeMins = wordCount === 0 ? 0 : (Math.round(wordCount / 200) || 1);
|
|
320
317
|
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
318
|
+
return {
|
|
319
|
+
html: `<html><head></head><body>${result}</body></html>`,
|
|
320
|
+
firstHeadingText,
|
|
321
|
+
firstParagraphText,
|
|
322
|
+
readTimeMins,
|
|
323
|
+
};
|
|
324
324
|
};
|
|
325
325
|
|
|
326
326
|
exports.getIDDivs = (html_body) => {
|
|
@@ -407,14 +407,11 @@
|
|
|
407
407
|
};
|
|
408
408
|
|
|
409
409
|
exports.html_to_text = (html, { baseElement } = {}) => {
|
|
410
|
-
const
|
|
411
|
-
const document = dom.window.document;
|
|
410
|
+
const $ = cheerio.load(html, { decodeEntities: false });
|
|
412
411
|
if (baseElement) {
|
|
413
|
-
return
|
|
414
|
-
.map((el) => el.textContent)
|
|
415
|
-
.join("\n");
|
|
412
|
+
return $(baseElement).map((_i, el) => $(el).text()).get().join("\n");
|
|
416
413
|
}
|
|
417
|
-
return
|
|
414
|
+
return $("body").text();
|
|
418
415
|
};
|
|
419
416
|
|
|
420
417
|
exports.get_html_read_time = (html) => {
|
package/hdoc-validate.js
CHANGED
|
@@ -8,7 +8,6 @@ const { error } = require("node:console");
|
|
|
8
8
|
const path = require("node:path");
|
|
9
9
|
const hdoc = require(path.join(__dirname, "hdoc-module.js"));
|
|
10
10
|
const translator = require("american-british-english-translator");
|
|
11
|
-
const puppeteer = require("puppeteer");
|
|
12
11
|
|
|
13
12
|
const spellcheck_options = {
|
|
14
13
|
british: true,
|
|
@@ -26,10 +25,11 @@ const { error } = require("node:console");
|
|
|
26
25
|
let private_repo = false;
|
|
27
26
|
let redirects = {};
|
|
28
27
|
let skip_link_file = '';
|
|
28
|
+
let _on_int_net_cached = null; // null = not yet checked; cached after first DNS lookup
|
|
29
29
|
const exclude_h1_count = {};
|
|
30
30
|
const exclude_spellcheck_output = [];
|
|
31
31
|
|
|
32
|
-
const excludeLink =
|
|
32
|
+
const excludeLink = (url) => {
|
|
33
33
|
if (exclude_links[url]) return true;
|
|
34
34
|
for (let key in exclude_links) {
|
|
35
35
|
if (Object.hasOwn(exclude_links, key)) {
|
|
@@ -465,18 +465,46 @@ const { error } = require("node:console");
|
|
|
465
465
|
return returnPaths;
|
|
466
466
|
}
|
|
467
467
|
|
|
468
|
-
const
|
|
468
|
+
const _fetch_headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36' };
|
|
469
|
+
|
|
470
|
+
// Checks a single external URL by sending a HEAD request (falling back to GET
|
|
471
|
+
// if the server returns 405 Method Not Allowed). Returns the HTTP status code.
|
|
472
|
+
const fetchExternalLinkStatus = async (url) => {
|
|
473
|
+
const resp = await fetch(url, { method: 'HEAD', headers: _fetch_headers, signal: AbortSignal.timeout(10000), redirect: 'follow' });
|
|
474
|
+
if (resp.status === 405) {
|
|
475
|
+
const getResp = await fetch(url, { method: 'GET', headers: _fetch_headers, signal: AbortSignal.timeout(10000), redirect: 'follow' });
|
|
476
|
+
return getResp.status;
|
|
477
|
+
}
|
|
478
|
+
return resp.status;
|
|
479
|
+
};
|
|
480
|
+
|
|
481
|
+
const checkLinks = async (source_path, htmlFile, links, hdocbook_config, hdocbook_project, global_links_checked, output_links) => {
|
|
469
482
|
const markdown_paths = getMDPathFromHtmlPath(htmlFile);
|
|
470
483
|
const markdown_content = fs.readFileSync(markdown_paths.markdownPath, 'utf8');
|
|
471
484
|
|
|
472
|
-
|
|
485
|
+
// Resolve the "are we on the internal network?" question once per process
|
|
486
|
+
// rather than once per internal.hornbill.com link.
|
|
487
|
+
const ensureIntNetCached = async () => {
|
|
488
|
+
if (_on_int_net_cached === null) {
|
|
489
|
+
try {
|
|
490
|
+
_on_int_net_cached = await checkHostExistsInDNS('docs-internal.hornbill.com');
|
|
491
|
+
} catch (_e) {
|
|
492
|
+
_on_int_net_cached = false;
|
|
493
|
+
}
|
|
494
|
+
}
|
|
495
|
+
return _on_int_net_cached;
|
|
496
|
+
};
|
|
497
|
+
|
|
498
|
+
// Collect external links that need an HTTP check so they can be run
|
|
499
|
+
// concurrently rather than one-at-a-time.
|
|
500
|
+
const externalChecks = [];
|
|
501
|
+
|
|
473
502
|
for (let i = 0; i < links.length; i++) {
|
|
474
|
-
// Validate that link is a valid URL first
|
|
475
503
|
if (output_links) console.log(` - ${links[i]}`);
|
|
476
504
|
if (exclude_links[links[i]]) continue;
|
|
477
505
|
if (global_links_checked.includes(links[i])) continue;
|
|
478
506
|
global_links_checked.push(links[i]);
|
|
479
|
-
|
|
507
|
+
|
|
480
508
|
const valid_url = hdoc.valid_url(links[i]);
|
|
481
509
|
if (!valid_url) {
|
|
482
510
|
// Could be a relative path, check
|
|
@@ -509,7 +537,7 @@ const { error } = require("node:console");
|
|
|
509
537
|
messages[htmlFile.relativePath].push(
|
|
510
538
|
`Link is a properly formatted external URL: ${links[i]}`,
|
|
511
539
|
);
|
|
512
|
-
|
|
540
|
+
|
|
513
541
|
// Skip if it's the auto-generated edit url, as these could be part of a private repo which would return a 404
|
|
514
542
|
if (
|
|
515
543
|
hdocbook_config.publicSource !== undefined &&
|
|
@@ -524,38 +552,14 @@ const { error } = require("node:console");
|
|
|
524
552
|
fs.appendFileSync(skip_link_file, `${links[i]}\n`);
|
|
525
553
|
continue;
|
|
526
554
|
}
|
|
527
|
-
|
|
555
|
+
|
|
528
556
|
if (valid_url.protocol === "mailto:") {
|
|
529
557
|
fs.appendFileSync(skip_link_file, `${links[i]}\n`);
|
|
530
558
|
continue;
|
|
531
559
|
}
|
|
532
560
|
|
|
533
|
-
// Skip internal.hornbill.com link validation if run outside of the Hornbill network
|
|
534
|
-
if (links[i].toLowerCase().includes("internal.hornbill.com")) {
|
|
535
|
-
// DNS lookup internal docs endpoint
|
|
536
|
-
const hostname = 'docs-internal.hornbill.com';
|
|
537
|
-
let on_int_net = false;
|
|
538
|
-
try {
|
|
539
|
-
on_int_net = await checkHostExistsInDNS(hostname);
|
|
540
|
-
} catch (e) {
|
|
541
|
-
// Don't need to do anything here
|
|
542
|
-
}
|
|
543
|
-
|
|
544
|
-
if (!on_int_net) {
|
|
545
|
-
messages[htmlFile.relativePath].push(
|
|
546
|
-
`Outside of Hornbill network - skipping internal link validation for: ${links[i]}`,
|
|
547
|
-
);
|
|
548
|
-
fs.appendFileSync(skip_link_file, `${links[i]}\n`);
|
|
549
|
-
continue;
|
|
550
|
-
}
|
|
551
|
-
messages[htmlFile.relativePath].push(
|
|
552
|
-
`Inside of Hornbill network - performing internal link validation for: ${links[i]}`,
|
|
553
|
-
);
|
|
554
|
-
}
|
|
555
|
-
|
|
556
561
|
// Skip if the link is excluded in the project config
|
|
557
|
-
|
|
558
|
-
if (skip_link) {
|
|
562
|
+
if (excludeLink(links[i])) {
|
|
559
563
|
messages[htmlFile.relativePath].push(
|
|
560
564
|
`Skipping link validation for: ${links[i]}`,
|
|
561
565
|
);
|
|
@@ -563,99 +567,76 @@ const { error } = require("node:console");
|
|
|
563
567
|
}
|
|
564
568
|
|
|
565
569
|
if (
|
|
566
|
-
(links[i].toLowerCase().includes("docs.hornbill.com") ||
|
|
570
|
+
(links[i].toLowerCase().includes("docs.hornbill.com") ||
|
|
567
571
|
links[i].toLowerCase().includes("docs-internal.hornbill.com")) &&
|
|
568
572
|
!markdown_paths.relativePath.includes('/_inline/')
|
|
569
573
|
) {
|
|
570
574
|
const error_message = processErrorMessage(`Hornbill Docs links should not be fully-qualified: ${links[i]}`, markdown_paths.relativePath, markdown_content, links[i]);
|
|
571
|
-
errors[htmlFile.relativePath].push(
|
|
575
|
+
errors[htmlFile.relativePath].push(error_message);
|
|
572
576
|
continue;
|
|
573
577
|
}
|
|
574
578
|
|
|
575
|
-
if (
|
|
579
|
+
if (
|
|
576
580
|
links[i].toLowerCase().includes("docs-internal.hornbill.com") &&
|
|
577
581
|
markdown_paths.relativePath.includes('/_inline/') &&
|
|
578
582
|
!private_repo
|
|
579
583
|
) {
|
|
580
584
|
// Is the parent book in a public repo? If so, flag this as an error.
|
|
581
585
|
const error_message = processErrorMessage(`Hornbill docs-internal links should not be used in public book inline content: ${links[i]}`, markdown_paths.relativePath, markdown_content, links[i]);
|
|
582
|
-
errors[htmlFile.relativePath].push(
|
|
586
|
+
errors[htmlFile.relativePath].push(error_message);
|
|
583
587
|
continue;
|
|
584
588
|
}
|
|
585
589
|
|
|
586
|
-
//
|
|
587
|
-
const
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
590
|
+
// Capture url in closure for the async check below
|
|
591
|
+
const url = links[i];
|
|
592
|
+
const isInternal = url.toLowerCase().includes("internal.hornbill.com");
|
|
593
|
+
|
|
594
|
+
externalChecks.push(async () => {
|
|
595
|
+
// For internal.hornbill.com links, check network reachability first (result cached)
|
|
596
|
+
if (isInternal) {
|
|
597
|
+
const on_int_net = await ensureIntNetCached();
|
|
598
|
+
if (!on_int_net) {
|
|
599
|
+
messages[htmlFile.relativePath].push(
|
|
600
|
+
`Outside of Hornbill network - skipping internal link validation for: ${url}`,
|
|
601
|
+
);
|
|
602
|
+
fs.appendFileSync(skip_link_file, `${url}\n`);
|
|
603
|
+
return;
|
|
604
|
+
}
|
|
605
|
+
messages[htmlFile.relativePath].push(
|
|
606
|
+
`Inside of Hornbill network - performing internal link validation for: ${url}`,
|
|
607
|
+
);
|
|
608
|
+
}
|
|
592
609
|
|
|
593
610
|
try {
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
if (request.isNavigationRequest() && request.redirectChain().length) {
|
|
599
|
-
redirectChain = request.redirectChain().map((req) => req.url());
|
|
600
|
-
}
|
|
601
|
-
});
|
|
602
|
-
|
|
603
|
-
// Capture the response
|
|
604
|
-
page.on('response', (res) => {
|
|
605
|
-
const chain = res.request().redirectChain();
|
|
606
|
-
if (chain.length > 0) {
|
|
607
|
-
redirectChain = chain.map((req) => req.url());
|
|
608
|
-
lastRedirectStatus = res.status(); // Status of the last redirect
|
|
609
|
-
}
|
|
610
|
-
});
|
|
611
|
-
|
|
612
|
-
// Try loading the URL
|
|
613
|
-
response = await page.goto(links[i], { waitUntil: 'networkidle2', timeout: 10000 });
|
|
614
|
-
|
|
615
|
-
if (response) {
|
|
616
|
-
let status = response.status();
|
|
617
|
-
const contentType = response.headers()['content-type'];
|
|
618
|
-
|
|
619
|
-
// If it's a PDF switch to direct fetching
|
|
620
|
-
if (contentType && contentType.includes('application/')) {
|
|
621
|
-
status = await page.evaluate(async (url) => {
|
|
622
|
-
const res = await fetch(url, { method: 'HEAD' });
|
|
623
|
-
return res.status;
|
|
624
|
-
}, links[i]);
|
|
625
|
-
}
|
|
626
|
-
if ((status < 200 || status > 299) && status !== 304) {
|
|
627
|
-
if (process.env.GITHUB_ACTIONS === 'true' && status === 403 && links[i].includes(".hornbill.com")) {
|
|
628
|
-
// STEVEG - do nothing here, as it always returns a 403 for Hornbill sites when accessing through GitHub Actions
|
|
629
|
-
// Works totally fine locally or in hdocpub, still trying to work out what's causing this in GitHub
|
|
630
|
-
} else {
|
|
631
|
-
throw `Unexpected Status Returned: ${status}`;
|
|
632
|
-
}
|
|
611
|
+
const status = await fetchExternalLinkStatus(url);
|
|
612
|
+
if ((status < 200 || status > 299) && status !== 304) {
|
|
613
|
+
if (process.env.GITHUB_ACTIONS === 'true' && status === 403 && url.includes(".hornbill.com")) {
|
|
614
|
+
// Always returns 403 for Hornbill sites through GitHub Actions — not a real error
|
|
633
615
|
} else {
|
|
634
|
-
|
|
616
|
+
throw `Unexpected Status Returned: ${status}`;
|
|
635
617
|
}
|
|
636
618
|
} else {
|
|
637
|
-
|
|
619
|
+
fs.appendFileSync(skip_link_file, `${url}\n`);
|
|
638
620
|
}
|
|
639
|
-
} catch (
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
621
|
+
} catch (e) {
|
|
622
|
+
let error_message;
|
|
623
|
+
if (e instanceof AggregateError) {
|
|
624
|
+
error_message = processErrorMessage(`Issue with external link [${url}]: ${e.message} - ${JSON.stringify(e.errors)}`, markdown_paths.relativePath, markdown_content, url);
|
|
625
|
+
} else {
|
|
626
|
+
error_message = processErrorMessage(`Issue with external link [${url}]: ${e}`, markdown_paths.relativePath, markdown_content, url);
|
|
627
|
+
}
|
|
628
|
+
if (hdocbook_project.validation.external_link_warnings || process.env.GITHUB_ACTIONS === 'true')
|
|
629
|
+
warnings[htmlFile.relativePath].push(error_message);
|
|
630
|
+
else
|
|
631
|
+
errors[htmlFile.relativePath].push(error_message);
|
|
648
632
|
}
|
|
649
|
-
|
|
650
|
-
warnings[htmlFile.relativePath].push(error_message);
|
|
651
|
-
else
|
|
652
|
-
errors[htmlFile.relativePath].push(error_message);
|
|
653
|
-
|
|
654
|
-
}
|
|
655
|
-
// Close the headless browser tab
|
|
656
|
-
page.close();
|
|
633
|
+
});
|
|
657
634
|
}
|
|
658
635
|
}
|
|
636
|
+
|
|
637
|
+
// Run all external HTTP checks concurrently — fetch is lightweight enough
|
|
638
|
+
// that uncapped concurrency is fine for the link counts seen in practice.
|
|
639
|
+
await Promise.all(externalChecks.map(fn => fn()));
|
|
659
640
|
};
|
|
660
641
|
|
|
661
642
|
const checkHostExistsInDNS = async (hostname) => {
|
|
@@ -1069,8 +1050,7 @@ const { error } = require("node:console");
|
|
|
1069
1050
|
|
|
1070
1051
|
|
|
1071
1052
|
const global_links_checked = [];
|
|
1072
|
-
|
|
1073
|
-
|
|
1053
|
+
|
|
1074
1054
|
for (const key in html_to_validate) {
|
|
1075
1055
|
const file = html_to_validate[key];
|
|
1076
1056
|
// Check for British spellings in static HTML content
|
|
@@ -1095,7 +1075,7 @@ const { error } = require("node:console");
|
|
|
1095
1075
|
messages[file.relativePath].push("No links found in file");
|
|
1096
1076
|
} else {
|
|
1097
1077
|
console.log(`\r\nChecking ${links.href.length} Links in ${file.relativePath}`);
|
|
1098
|
-
await checkLinks(source_path, file, links.href, hdocbook_config, hdocbook_project,
|
|
1078
|
+
await checkLinks(source_path, file, links.href, hdocbook_config, hdocbook_project, global_links_checked, output_links);
|
|
1099
1079
|
}
|
|
1100
1080
|
if (links.img.length === 0) {
|
|
1101
1081
|
messages[file.relativePath].push("No images found in file");
|
|
@@ -1107,9 +1087,6 @@ const { error } = require("node:console");
|
|
|
1107
1087
|
await checkTags(file);
|
|
1108
1088
|
}
|
|
1109
1089
|
|
|
1110
|
-
// Close the Chromium browser instance
|
|
1111
|
-
await validateBrowser.close();
|
|
1112
|
-
|
|
1113
1090
|
if (gen_exclude) console.log(JSON.stringify(excl_output, null, 2));
|
|
1114
1091
|
|
|
1115
1092
|
if (verbose) {
|
package/npm-shrinkwrap.json
CHANGED
|
@@ -16,7 +16,6 @@
|
|
|
16
16
|
"better-sqlite3": "12.8.0",
|
|
17
17
|
"cheerio": "1.2.0",
|
|
18
18
|
"express": "4.22.1",
|
|
19
|
-
"jsdom": "25.0.1",
|
|
20
19
|
"markdown-it": "14.1.1",
|
|
21
20
|
"markdown-it-container": "4.0.0",
|
|
22
21
|
"markdown-it-front-matter": "0.2.4",
|
|
@@ -3969,46 +3968,6 @@
|
|
|
3969
3968
|
"js-yaml": "bin/js-yaml.js"
|
|
3970
3969
|
}
|
|
3971
3970
|
},
|
|
3972
|
-
"node_modules/jsdom": {
|
|
3973
|
-
"version": "25.0.1",
|
|
3974
|
-
"resolved": "https://registry.npmjs.org/jsdom/-/jsdom-25.0.1.tgz",
|
|
3975
|
-
"integrity": "sha512-8i7LzZj7BF8uplX+ZyOlIz86V6TAsSs+np6m1kpW9u0JWi4z/1t+FzcK1aek+ybTnAC4KhBL4uXCNT0wcUIeCw==",
|
|
3976
|
-
"license": "MIT",
|
|
3977
|
-
"dependencies": {
|
|
3978
|
-
"cssstyle": "4.1.0",
|
|
3979
|
-
"data-urls": "5.0.0",
|
|
3980
|
-
"decimal.js": "10.4.3",
|
|
3981
|
-
"form-data": "4.0.0",
|
|
3982
|
-
"html-encoding-sniffer": "4.0.0",
|
|
3983
|
-
"http-proxy-agent": "7.0.2",
|
|
3984
|
-
"https-proxy-agent": "7.0.5",
|
|
3985
|
-
"is-potential-custom-element-name": "1.0.1",
|
|
3986
|
-
"nwsapi": "2.2.12",
|
|
3987
|
-
"parse5": "7.1.2",
|
|
3988
|
-
"rrweb-cssom": "0.7.1",
|
|
3989
|
-
"saxes": "6.0.0",
|
|
3990
|
-
"symbol-tree": "3.2.4",
|
|
3991
|
-
"tough-cookie": "5.0.0",
|
|
3992
|
-
"w3c-xmlserializer": "5.0.0",
|
|
3993
|
-
"webidl-conversions": "7.0.0",
|
|
3994
|
-
"whatwg-encoding": "3.1.1",
|
|
3995
|
-
"whatwg-mimetype": "4.0.0",
|
|
3996
|
-
"whatwg-url": "14.0.0",
|
|
3997
|
-
"ws": "8.18.0",
|
|
3998
|
-
"xml-name-validator": "5.0.0"
|
|
3999
|
-
},
|
|
4000
|
-
"engines": {
|
|
4001
|
-
"node": ">=18"
|
|
4002
|
-
},
|
|
4003
|
-
"peerDependencies": {
|
|
4004
|
-
"canvas": "2.11.2"
|
|
4005
|
-
},
|
|
4006
|
-
"peerDependenciesMeta": {
|
|
4007
|
-
"canvas": {
|
|
4008
|
-
"optional": true
|
|
4009
|
-
}
|
|
4010
|
-
}
|
|
4011
|
-
},
|
|
4012
3971
|
"node_modules/json-parse-even-better-errors": {
|
|
4013
3972
|
"version": "2.3.1",
|
|
4014
3973
|
"resolved": "https://registry.npmjs.org/json-parse-even-better-errors/-/json-parse-even-better-errors-2.3.1.tgz",
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "hdoc-tools",
|
|
3
|
-
"version": "0.47.
|
|
3
|
+
"version": "0.47.3",
|
|
4
4
|
"description": "Hornbill HDocBook Development Support Tool",
|
|
5
5
|
"main": "hdoc.js",
|
|
6
6
|
"bin": {
|
|
@@ -43,7 +43,6 @@
|
|
|
43
43
|
"better-sqlite3": "12.8.0",
|
|
44
44
|
"cheerio": "1.2.0",
|
|
45
45
|
"express": "4.22.1",
|
|
46
|
-
"jsdom": "25.0.1",
|
|
47
46
|
"markdown-it": "14.1.1",
|
|
48
47
|
"markdown-it-container": "4.0.0",
|
|
49
48
|
"markdown-it-front-matter": "0.2.4",
|