hdoc-tools 0.17.33 → 0.18.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/hdoc-build-db.js +7 -1
- package/hdoc-build.js +33 -19
- package/hdoc-db.js +50 -20
- package/hdoc-module.js +99 -2
- package/hdoc.js +1 -1
- package/package.json +16 -19
package/hdoc-build-db.js
CHANGED
@@ -114,19 +114,23 @@
|
|
114
114
|
for (let i = 0; i < index_records.length; i++) {
|
115
115
|
indexPromises.push(index_records[i]);
|
116
116
|
}
|
117
|
+
let curr_file = '';
|
117
118
|
await Promise.all(indexPromises.map(async (file) => {
|
118
119
|
let index_path_name = file.relative_path.replaceAll('\\', '/');
|
119
120
|
if (index_path_name.endsWith('/index.md') || index_path_name.endsWith('/index.html') || index_path_name.endsWith('/index.htm')) {
|
120
121
|
index_path_name = index_path_name.substring(0, index_path_name.lastIndexOf('/'));
|
121
122
|
}
|
122
123
|
index_path_name = '/' + index_path_name.replace(path.extname(file.relative_path), '');
|
124
|
+
|
123
125
|
let index_response = {
|
124
126
|
success: true,
|
125
127
|
row_id: 0
|
126
128
|
};
|
129
|
+
let index_content_path = index_path_name;
|
130
|
+
if (file.index_html.id !== null) index_content_path += `#${file.index_html.id}`;
|
127
131
|
if (!file.inline) {
|
128
132
|
const index_vals = [
|
129
|
-
|
133
|
+
index_content_path,
|
130
134
|
doc_id,
|
131
135
|
book_config.audience.join(','),
|
132
136
|
book_config.tags.join(','),
|
@@ -142,6 +146,8 @@
|
|
142
146
|
if (!index_response.success) {
|
143
147
|
console.error(`Index record creation failed - ${doc_id}/${file.index_html.fm_props.title}: ${index_response.error}`);
|
144
148
|
} else {
|
149
|
+
if (curr_file === index_path_name) return;
|
150
|
+
curr_file = index_path_name;
|
145
151
|
// Now add metadata
|
146
152
|
const meta_vals = [
|
147
153
|
index_path_name,
|
package/hdoc-build.js
CHANGED
@@ -450,6 +450,9 @@
|
|
450
450
|
if (inline_content) html_txt = `${fm_header_content}\n${html_txt}`;
|
451
451
|
else html_txt = `${fm_header_content}\n${doc_header}\n${html_txt}`;
|
452
452
|
|
453
|
+
// Wrap h2 and h3 tags, plus content, in id'd divs
|
454
|
+
html_txt = hdoc.wrapHContent(html_txt);
|
455
|
+
|
453
456
|
let relative_path = file_path.relativePath;
|
454
457
|
if (
|
455
458
|
!bc[relative_path.replace(".html", "")] &&
|
@@ -458,15 +461,19 @@
|
|
458
461
|
relative_path = relative_path.replace("/index.html", "");
|
459
462
|
}
|
460
463
|
|
461
|
-
|
462
|
-
|
463
|
-
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
|
464
|
+
const index_data = hdoc_index.transform_html_for_index(html_txt);
|
465
|
+
|
466
|
+
index_data.sections.forEach(section => {
|
467
|
+
index_records.push({
|
468
|
+
relative_path: relative_path,
|
469
|
+
index_html: { fm_props: index_data.fm_props, text: section.text, preview: section.preview, id: section.id ? section.id : null},
|
470
|
+
metadata: metadata,
|
471
|
+
contributors: contribs,
|
472
|
+
pdf_size: pdf_size,
|
473
|
+
md5: file_path.hash,
|
474
|
+
lastmod: last_commit !== null ? last_commit : file_path.hb_lastmod,
|
475
|
+
inline: inline_content,
|
476
|
+
});
|
470
477
|
});
|
471
478
|
|
472
479
|
// Save HTML into HTML file
|
@@ -804,6 +811,9 @@
|
|
804
811
|
if (inline_content) html_txt = `${fm_header}\n${html_txt}`;
|
805
812
|
else html_txt = `${fm_header}\n${doc_header}\n${html_txt}`;
|
806
813
|
|
814
|
+
// Wrap h2 and h3 tags, plus content, in id'd divs
|
815
|
+
html_txt = hdoc.wrapHContent(html_txt);
|
816
|
+
|
807
817
|
// Save HTML into HTML file
|
808
818
|
const target_file = file_path.path.replace(
|
809
819
|
path.extname(file_path.path),
|
@@ -826,15 +836,19 @@
|
|
826
836
|
relative_path = relative_path.replace("/index.html", "");
|
827
837
|
}
|
828
838
|
|
829
|
-
|
830
|
-
|
831
|
-
|
832
|
-
|
833
|
-
|
834
|
-
|
835
|
-
|
836
|
-
|
837
|
-
|
839
|
+
const index_data = hdoc_index.transform_html_for_index(html_txt);
|
840
|
+
|
841
|
+
index_data.sections.forEach(section => {
|
842
|
+
index_records.push({
|
843
|
+
relative_path: relative_path,
|
844
|
+
index_html: { fm_props: index_data.fm_props, text: section.text, preview: section.preview, id: section.id ? section.id : null},
|
845
|
+
metadata: metadata,
|
846
|
+
contributors: contribs,
|
847
|
+
pdf_size: pdf_size,
|
848
|
+
md5: file_path.hash,
|
849
|
+
lastmod: last_commit !== null ? last_commit : file_path.hb_lastmod,
|
850
|
+
inline: inline_content,
|
851
|
+
});
|
838
852
|
});
|
839
853
|
|
840
854
|
// Add MD file to delete queue
|
@@ -1297,7 +1311,7 @@
|
|
1297
1311
|
|
1298
1312
|
if (pdf_enable) {
|
1299
1313
|
// Create a Chromium browser instance generate PDFs with
|
1300
|
-
browser = await puppeteer.launch({ headless:
|
1314
|
+
browser = await puppeteer.launch({ headless: 'shell' });
|
1301
1315
|
}
|
1302
1316
|
|
1303
1317
|
// Work through MD files and convert to HTML
|
package/hdoc-db.js
CHANGED
@@ -62,32 +62,62 @@
|
|
62
62
|
|
63
63
|
exports.transform_html_for_index = function (html_txt) {
|
64
64
|
let response = {
|
65
|
-
|
66
|
-
|
67
|
-
fm_props: {}
|
65
|
+
fm_props: {},
|
66
|
+
sections: []
|
68
67
|
};
|
69
68
|
|
69
|
+
const divs = hdoc.getIDDivs(html_txt);
|
70
|
+
|
70
71
|
// Get frontmatter properties
|
71
72
|
const fm_headers = hdoc.getHTMLFrontmatterHeader(html_txt);
|
72
73
|
response.fm_props = fm_headers.fm_properties;
|
73
74
|
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
75
|
+
if (divs.length > 0) {
|
76
|
+
divs.forEach(div => {
|
77
|
+
// Convert HTML into plain text
|
78
|
+
let text = response.text = html2text.convert(div.html, {
|
79
|
+
ignoreHref: true,
|
80
|
+
ignoreImage: true,
|
81
|
+
uppercaseHeadings: false,
|
82
|
+
wordwrap: null
|
83
|
+
});
|
84
|
+
// Convert HTML into preview text
|
85
|
+
let preview = html2text.convert(div.html, {
|
86
|
+
baseElement: 'p',
|
87
|
+
ignoreHref: true,
|
88
|
+
ignoreImage: true,
|
89
|
+
uppercaseHeadings: false,
|
90
|
+
wordwrap: null
|
91
|
+
});
|
92
|
+
preview = hdoc.truncate_string(preview, 200, true).replace(/(?:\r\n|\r|\n)/g, ' ');
|
93
|
+
response.sections.push({
|
94
|
+
id: div.id.replace('hb-doc-anchor-', ''),
|
95
|
+
text: text,
|
96
|
+
preview: preview
|
97
|
+
})
|
98
|
+
});
|
99
|
+
} else {
|
100
|
+
// Convert HTML into plain text
|
101
|
+
let text = response.text = html2text.convert(html_txt, {
|
102
|
+
ignoreHref: true,
|
103
|
+
ignoreImage: true,
|
104
|
+
uppercaseHeadings: false,
|
105
|
+
wordwrap: null
|
106
|
+
});
|
107
|
+
// Convert HTML into preview text
|
108
|
+
let preview = html2text.convert(html_txt, {
|
109
|
+
baseElement: 'p',
|
110
|
+
ignoreHref: true,
|
111
|
+
ignoreImage: true,
|
112
|
+
uppercaseHeadings: false,
|
113
|
+
wordwrap: null
|
114
|
+
});
|
115
|
+
preview = hdoc.truncate_string(preview, 200, true).replace(/(?:\r\n|\r|\n)/g, ' ');
|
116
|
+
response.sections.push({
|
117
|
+
text: text,
|
118
|
+
preview: preview
|
119
|
+
})
|
120
|
+
}
|
91
121
|
return response;
|
92
122
|
};
|
93
123
|
|
package/hdoc-module.js
CHANGED
@@ -2,12 +2,13 @@
|
|
2
2
|
'use strict';
|
3
3
|
|
4
4
|
const axios = require('axios'),
|
5
|
-
axiosRetry = require('axios-retry'),
|
5
|
+
axiosRetry = require('axios-retry').default,
|
6
6
|
cheerio = require('cheerio'),
|
7
7
|
fs = require('fs'),
|
8
|
-
html2text = require('html-to-text'),
|
9
8
|
https = require('https'),
|
10
9
|
htmlentities = require('html-entities'),
|
10
|
+
html2text = require('html-to-text'),
|
11
|
+
{ JSDOM } = require('jsdom'),
|
11
12
|
path = require('path'),
|
12
13
|
wordsCount = require('words-count').default;
|
13
14
|
|
@@ -202,6 +203,102 @@
|
|
202
203
|
return false;
|
203
204
|
};
|
204
205
|
|
206
|
+
const makeAnchorIdFriendly = function(str) {
|
207
|
+
return 'hb-doc-anchor-' + str // Add prefix
|
208
|
+
.toLowerCase() // Convert to lowercase
|
209
|
+
.trim() // Trim leading and trailing spaces
|
210
|
+
.replace(/[^a-z0-9\s-]/g, '') // Remove all non-alphanumeric characters except spaces and hyphens
|
211
|
+
.replace(/\s+/g, '-') // Replace spaces with hyphens
|
212
|
+
.replace(/-+/g, '-'); // Replace multiple hyphens with a single hyphen
|
213
|
+
};
|
214
|
+
|
215
|
+
// Processes HTML, wraps h2 and h3 tags and their content in divs with an id matching that of the h text
|
216
|
+
exports.wrapHContent = function (htmlContent) {
|
217
|
+
const dom = new JSDOM(htmlContent);
|
218
|
+
const document = dom.window.document;
|
219
|
+
|
220
|
+
let nodes = Array.from(document.body.childNodes); // Convert NodeList to Array for easier manipulation
|
221
|
+
let newContent = document.createDocumentFragment(); // Create a document fragment to hold the new structure
|
222
|
+
|
223
|
+
let currentH2Div = null;
|
224
|
+
let currentH3Div = null;
|
225
|
+
|
226
|
+
nodes.forEach(node => {
|
227
|
+
if (node.nodeType === dom.window.Node.ELEMENT_NODE) {
|
228
|
+
if (node.tagName.toLowerCase() === 'h2') {
|
229
|
+
// When an <h2> is found, close the current <div> (if any) and start a new one
|
230
|
+
if (currentH2Div) {
|
231
|
+
newContent.appendChild(currentH2Div);
|
232
|
+
}
|
233
|
+
currentH2Div = document.createElement('div');
|
234
|
+
currentH2Div.id = makeAnchorIdFriendly(node.textContent.trim()); // Set the id to the anchor-friendly text content of the <h2>
|
235
|
+
currentH2Div.appendChild(node); // Move the <h2> into the new <div>
|
236
|
+
currentH3Div = null; // Reset currentH3Div
|
237
|
+
} else if (node.tagName.toLowerCase() === 'h3') {
|
238
|
+
// When an <h3> is found, close the current <div> (if any) and start a new one
|
239
|
+
if (currentH3Div) {
|
240
|
+
currentH2Div.appendChild(currentH3Div);
|
241
|
+
}
|
242
|
+
currentH3Div = document.createElement('div');
|
243
|
+
currentH3Div.id = makeAnchorIdFriendly(node.textContent.trim()); // Set the id to the anchor-friendly text content of the <h3>
|
244
|
+
currentH3Div.appendChild(node); // Move the <h3> into the new <div>
|
245
|
+
} else if (currentH3Div) {
|
246
|
+
// Append any other nodes to the current <h3> <div> if it exists
|
247
|
+
currentH3Div.appendChild(node);
|
248
|
+
} else if (currentH2Div) {
|
249
|
+
// Append any other nodes to the current <h2> <div> if no current <h3> <div> exists
|
250
|
+
currentH2Div.appendChild(node);
|
251
|
+
} else {
|
252
|
+
// If there is no current <h2> or <h3> <div>, append the node directly to the fragment
|
253
|
+
newContent.appendChild(node);
|
254
|
+
}
|
255
|
+
} else if (currentH3Div) {
|
256
|
+
// Append any text nodes to the current <h3> <div> if it exists
|
257
|
+
currentH3Div.appendChild(node);
|
258
|
+
} else if (currentH2Div) {
|
259
|
+
// Append any text nodes to the current <h2> <div> if it exists
|
260
|
+
currentH2Div.appendChild(node);
|
261
|
+
} else {
|
262
|
+
// If there is no current <h2> or <h3> <div>, append the node directly to the fragment
|
263
|
+
newContent.appendChild(node);
|
264
|
+
}
|
265
|
+
});
|
266
|
+
|
267
|
+
// Append the last <h3> <div> if any
|
268
|
+
if (currentH3Div) {
|
269
|
+
currentH2Div.appendChild(currentH3Div);
|
270
|
+
}
|
271
|
+
|
272
|
+
// Append the last <h2> <div> if any
|
273
|
+
if (currentH2Div) {
|
274
|
+
newContent.appendChild(currentH2Div);
|
275
|
+
}
|
276
|
+
|
277
|
+
// Replace the old body content with the new content
|
278
|
+
document.body.innerHTML = '';
|
279
|
+
document.body.appendChild(newContent);
|
280
|
+
|
281
|
+
// Serialize the document back to HTML and return
|
282
|
+
const outputHtml = dom.serialize();
|
283
|
+
return outputHtml;
|
284
|
+
};
|
285
|
+
|
286
|
+
|
287
|
+
exports.getIDDivs = function(html_body) {
|
288
|
+
const $ = cheerio.load(html_body, {
|
289
|
+
decodeEntities: false
|
290
|
+
});
|
291
|
+
|
292
|
+
const divs = [];
|
293
|
+
|
294
|
+
$('div').each(function(i, element){
|
295
|
+
if ($(this).attr('id') && $(this).attr('id').startsWith('hb-doc-anchor-')) {
|
296
|
+
divs.push({id: $(this).attr('id'), html: $(this).html(), text: $(this).text()})
|
297
|
+
}
|
298
|
+
});
|
299
|
+
return divs;
|
300
|
+
};
|
301
|
+
|
205
302
|
exports.getHTMLFrontmatterHeader = function (html_body) {
|
206
303
|
let response = {
|
207
304
|
fm_header: '',
|
package/hdoc.js
CHANGED
package/package.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
{
|
2
2
|
"name": "hdoc-tools",
|
3
|
-
"version": "0.
|
3
|
+
"version": "0.18.0",
|
4
4
|
"description": "Hornbill HDocBook Development Support Tool",
|
5
5
|
"main": "hdoc.js",
|
6
6
|
"bin": {
|
@@ -37,29 +37,26 @@
|
|
37
37
|
"dependencies": {
|
38
38
|
"american-british-english-translator": "^0.2.1",
|
39
39
|
"archiver": "7.0.1",
|
40
|
-
"axios": "^1.
|
41
|
-
"axios-retry": "^
|
42
|
-
"better-sqlite3": "^
|
43
|
-
"body-parser": "^1.20.1",
|
40
|
+
"axios": "^1.7.2",
|
41
|
+
"axios-retry": "^4.4.1",
|
42
|
+
"better-sqlite3": "^11.1.2",
|
44
43
|
"cheerio": "^1.0.0-rc.12",
|
45
|
-
"
|
46
|
-
"
|
47
|
-
"
|
48
|
-
"
|
49
|
-
"html-
|
50
|
-
"html-to-text": "^8.2.1",
|
44
|
+
"dree": "^5.0.7",
|
45
|
+
"express": "^4.19.2",
|
46
|
+
"fs-extra": "^11.2.0",
|
47
|
+
"html-entities": "^2.5.2",
|
48
|
+
"html-to-text": "^9.0.5",
|
51
49
|
"js-yaml": "^4.1.0",
|
52
|
-
"
|
53
|
-
"markdown-it
|
54
|
-
"markdown-it-
|
50
|
+
"jsdom": "^24.1.0",
|
51
|
+
"markdown-it": "14.1.0",
|
52
|
+
"markdown-it-container": "^4.0.0",
|
53
|
+
"markdown-it-front-matter": "^0.2.4",
|
55
54
|
"mime-types": "^2.1.35",
|
56
|
-
"multer": "^1.4.5-lts.1",
|
57
55
|
"prompt": "^1.3.0",
|
58
|
-
"puppeteer": "22.
|
59
|
-
"
|
60
|
-
"stream": "0.0.2",
|
56
|
+
"puppeteer": "^22.12.1",
|
57
|
+
"stream": "0.0.3",
|
61
58
|
"true-case-path": "^2.2.1",
|
62
59
|
"words-count": "^2.0.2",
|
63
|
-
"xml-formatter": "^3.6.
|
60
|
+
"xml-formatter": "^3.6.2"
|
64
61
|
}
|
65
62
|
}
|