hdoc-tools 0.17.33 → 0.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/hdoc-build-db.js CHANGED
@@ -114,19 +114,23 @@
114
114
  for (let i = 0; i < index_records.length; i++) {
115
115
  indexPromises.push(index_records[i]);
116
116
  }
117
+ let curr_file = '';
117
118
  await Promise.all(indexPromises.map(async (file) => {
118
119
  let index_path_name = file.relative_path.replaceAll('\\', '/');
119
120
  if (index_path_name.endsWith('/index.md') || index_path_name.endsWith('/index.html') || index_path_name.endsWith('/index.htm')) {
120
121
  index_path_name = index_path_name.substring(0, index_path_name.lastIndexOf('/'));
121
122
  }
122
123
  index_path_name = '/' + index_path_name.replace(path.extname(file.relative_path), '');
124
+
123
125
  let index_response = {
124
126
  success: true,
125
127
  row_id: 0
126
128
  };
129
+ let index_content_path = index_path_name;
130
+ if (file.index_html.id !== null) index_content_path += `#${file.index_html.id}`;
127
131
  if (!file.inline) {
128
132
  const index_vals = [
129
- index_path_name,
133
+ index_content_path,
130
134
  doc_id,
131
135
  book_config.audience.join(','),
132
136
  book_config.tags.join(','),
@@ -142,6 +146,8 @@
142
146
  if (!index_response.success) {
143
147
  console.error(`Index record creation failed - ${doc_id}/${file.index_html.fm_props.title}: ${index_response.error}`);
144
148
  } else {
149
+ if (curr_file === index_path_name) return;
150
+ curr_file = index_path_name;
145
151
  // Now add metadata
146
152
  const meta_vals = [
147
153
  index_path_name,
package/hdoc-build.js CHANGED
@@ -450,6 +450,9 @@
450
450
  if (inline_content) html_txt = `${fm_header_content}\n${html_txt}`;
451
451
  else html_txt = `${fm_header_content}\n${doc_header}\n${html_txt}`;
452
452
 
453
+ // Wrap h2 and h3 tags, plus content, in id'd divs
454
+ html_txt = hdoc.wrapHContent(html_txt);
455
+
453
456
  let relative_path = file_path.relativePath;
454
457
  if (
455
458
  !bc[relative_path.replace(".html", "")] &&
@@ -458,15 +461,19 @@
458
461
  relative_path = relative_path.replace("/index.html", "");
459
462
  }
460
463
 
461
- index_records.push({
462
- relative_path: relative_path,
463
- index_html: hdoc_index.transform_html_for_index(html_txt),
464
- metadata: metadata,
465
- contributors: contribs,
466
- pdf_size: pdf_size,
467
- md5: file_path.hash,
468
- lastmod: last_commit !== null ? last_commit : file_path.hb_lastmod,
469
- inline: inline_content,
464
+ const index_data = hdoc_index.transform_html_for_index(html_txt);
465
+
466
+ index_data.sections.forEach(section => {
467
+ index_records.push({
468
+ relative_path: relative_path,
469
+ index_html: { fm_props: index_data.fm_props, text: section.text, preview: section.preview, id: section.id ? section.id : null},
470
+ metadata: metadata,
471
+ contributors: contribs,
472
+ pdf_size: pdf_size,
473
+ md5: file_path.hash,
474
+ lastmod: last_commit !== null ? last_commit : file_path.hb_lastmod,
475
+ inline: inline_content,
476
+ });
470
477
  });
471
478
 
472
479
  // Save HTML into HTML file
@@ -804,6 +811,9 @@
804
811
  if (inline_content) html_txt = `${fm_header}\n${html_txt}`;
805
812
  else html_txt = `${fm_header}\n${doc_header}\n${html_txt}`;
806
813
 
814
+ // Wrap h2 and h3 tags, plus content, in id'd divs
815
+ html_txt = hdoc.wrapHContent(html_txt);
816
+
807
817
  // Save HTML into HTML file
808
818
  const target_file = file_path.path.replace(
809
819
  path.extname(file_path.path),
@@ -826,15 +836,19 @@
826
836
  relative_path = relative_path.replace("/index.html", "");
827
837
  }
828
838
 
829
- index_records.push({
830
- relative_path: relative_path,
831
- index_html: hdoc_index.transform_html_for_index(html_txt),
832
- metadata: metadata,
833
- contributors: contribs,
834
- pdf_size: pdf_size,
835
- md5: file_path.hash,
836
- lastmod: last_commit !== null ? last_commit : file_path.hb_lastmod,
837
- inline: inline_content,
839
+ const index_data = hdoc_index.transform_html_for_index(html_txt);
840
+
841
+ index_data.sections.forEach(section => {
842
+ index_records.push({
843
+ relative_path: relative_path,
844
+ index_html: { fm_props: index_data.fm_props, text: section.text, preview: section.preview, id: section.id ? section.id : null},
845
+ metadata: metadata,
846
+ contributors: contribs,
847
+ pdf_size: pdf_size,
848
+ md5: file_path.hash,
849
+ lastmod: last_commit !== null ? last_commit : file_path.hb_lastmod,
850
+ inline: inline_content,
851
+ });
838
852
  });
839
853
 
840
854
  // Add MD file to delete queue
@@ -1297,7 +1311,7 @@
1297
1311
 
1298
1312
  if (pdf_enable) {
1299
1313
  // Create a Chromium browser instance generate PDFs with
1300
- browser = await puppeteer.launch({ headless: "new" });
1314
+ browser = await puppeteer.launch({ headless: 'shell' });
1301
1315
  }
1302
1316
 
1303
1317
  // Work through MD files and convert to HTML
package/hdoc-db.js CHANGED
@@ -62,32 +62,62 @@
62
62
 
63
63
  exports.transform_html_for_index = function (html_txt) {
64
64
  let response = {
65
- text: '',
66
- preview: '',
67
- fm_props: {}
65
+ fm_props: {},
66
+ sections: []
68
67
  };
69
68
 
69
+ const divs = hdoc.getIDDivs(html_txt);
70
+
70
71
  // Get frontmatter properties
71
72
  const fm_headers = hdoc.getHTMLFrontmatterHeader(html_txt);
72
73
  response.fm_props = fm_headers.fm_properties;
73
74
 
74
- // Convert HTML into plain text
75
- response.text = html2text.convert(html_txt, {
76
- ignoreHref: true,
77
- ignoreImage: true,
78
- uppercaseHeadings: false,
79
- wordwrap: null
80
- });
81
-
82
- // Convert HTML into preview text
83
- response.preview = html2text.convert(html_txt, {
84
- baseElement: 'p',
85
- ignoreHref: true,
86
- ignoreImage: true,
87
- uppercaseHeadings: false,
88
- wordwrap: null
89
- });
90
- response.preview = hdoc.truncate_string(response.preview, 200, true).replace(/(?:\r\n|\r|\n)/g, ' ');
75
+ if (divs.length > 0) {
76
+ divs.forEach(div => {
77
+ // Convert HTML into plain text
78
+ let text = response.text = html2text.convert(div.html, {
79
+ ignoreHref: true,
80
+ ignoreImage: true,
81
+ uppercaseHeadings: false,
82
+ wordwrap: null
83
+ });
84
+ // Convert HTML into preview text
85
+ let preview = html2text.convert(div.html, {
86
+ baseElement: 'p',
87
+ ignoreHref: true,
88
+ ignoreImage: true,
89
+ uppercaseHeadings: false,
90
+ wordwrap: null
91
+ });
92
+ preview = hdoc.truncate_string(preview, 200, true).replace(/(?:\r\n|\r|\n)/g, ' ');
93
+ response.sections.push({
94
+ id: div.id.replace('hb-doc-anchor-', ''),
95
+ text: text,
96
+ preview: preview
97
+ })
98
+ });
99
+ } else {
100
+ // Convert HTML into plain text
101
+ let text = response.text = html2text.convert(html_txt, {
102
+ ignoreHref: true,
103
+ ignoreImage: true,
104
+ uppercaseHeadings: false,
105
+ wordwrap: null
106
+ });
107
+ // Convert HTML into preview text
108
+ let preview = html2text.convert(html_txt, {
109
+ baseElement: 'p',
110
+ ignoreHref: true,
111
+ ignoreImage: true,
112
+ uppercaseHeadings: false,
113
+ wordwrap: null
114
+ });
115
+ preview = hdoc.truncate_string(preview, 200, true).replace(/(?:\r\n|\r|\n)/g, ' ');
116
+ response.sections.push({
117
+ text: text,
118
+ preview: preview
119
+ })
120
+ }
91
121
  return response;
92
122
  };
93
123
 
package/hdoc-module.js CHANGED
@@ -2,12 +2,13 @@
2
2
  'use strict';
3
3
 
4
4
  const axios = require('axios'),
5
- axiosRetry = require('axios-retry'),
5
+ axiosRetry = require('axios-retry').default,
6
6
  cheerio = require('cheerio'),
7
7
  fs = require('fs'),
8
- html2text = require('html-to-text'),
9
8
  https = require('https'),
10
9
  htmlentities = require('html-entities'),
10
+ html2text = require('html-to-text'),
11
+ { JSDOM } = require('jsdom'),
11
12
  path = require('path'),
12
13
  wordsCount = require('words-count').default;
13
14
 
@@ -202,6 +203,102 @@
202
203
  return false;
203
204
  };
204
205
 
206
+ const makeAnchorIdFriendly = function(str) {
207
+ return 'hb-doc-anchor-' + str // Add prefix
208
+ .toLowerCase() // Convert to lowercase
209
+ .trim() // Trim leading and trailing spaces
210
+ .replace(/[^a-z0-9\s-]/g, '') // Remove all non-alphanumeric characters except spaces and hyphens
211
+ .replace(/\s+/g, '-') // Replace spaces with hyphens
212
+ .replace(/-+/g, '-'); // Replace multiple hyphens with a single hyphen
213
+ };
214
+
215
+ // Processes HTML, wraps h2 and h3 tags and their content in divs with an id matching that of the h text
216
+ exports.wrapHContent = function (htmlContent) {
217
+ const dom = new JSDOM(htmlContent);
218
+ const document = dom.window.document;
219
+
220
+ let nodes = Array.from(document.body.childNodes); // Convert NodeList to Array for easier manipulation
221
+ let newContent = document.createDocumentFragment(); // Create a document fragment to hold the new structure
222
+
223
+ let currentH2Div = null;
224
+ let currentH3Div = null;
225
+
226
+ nodes.forEach(node => {
227
+ if (node.nodeType === dom.window.Node.ELEMENT_NODE) {
228
+ if (node.tagName.toLowerCase() === 'h2') {
229
+ // When an <h2> is found, close the current <div> (if any) and start a new one
230
+ if (currentH2Div) {
231
+ newContent.appendChild(currentH2Div);
232
+ }
233
+ currentH2Div = document.createElement('div');
234
+ currentH2Div.id = makeAnchorIdFriendly(node.textContent.trim()); // Set the id to the anchor-friendly text content of the <h2>
235
+ currentH2Div.appendChild(node); // Move the <h2> into the new <div>
236
+ currentH3Div = null; // Reset currentH3Div
237
+ } else if (node.tagName.toLowerCase() === 'h3') {
238
+ // When an <h3> is found, close the current <div> (if any) and start a new one
239
+ if (currentH3Div) {
240
+ currentH2Div.appendChild(currentH3Div);
241
+ }
242
+ currentH3Div = document.createElement('div');
243
+ currentH3Div.id = makeAnchorIdFriendly(node.textContent.trim()); // Set the id to the anchor-friendly text content of the <h3>
244
+ currentH3Div.appendChild(node); // Move the <h3> into the new <div>
245
+ } else if (currentH3Div) {
246
+ // Append any other nodes to the current <h3> <div> if it exists
247
+ currentH3Div.appendChild(node);
248
+ } else if (currentH2Div) {
249
+ // Append any other nodes to the current <h2> <div> if no current <h3> <div> exists
250
+ currentH2Div.appendChild(node);
251
+ } else {
252
+ // If there is no current <h2> or <h3> <div>, append the node directly to the fragment
253
+ newContent.appendChild(node);
254
+ }
255
+ } else if (currentH3Div) {
256
+ // Append any text nodes to the current <h3> <div> if it exists
257
+ currentH3Div.appendChild(node);
258
+ } else if (currentH2Div) {
259
+ // Append any text nodes to the current <h2> <div> if it exists
260
+ currentH2Div.appendChild(node);
261
+ } else {
262
+ // If there is no current <h2> or <h3> <div>, append the node directly to the fragment
263
+ newContent.appendChild(node);
264
+ }
265
+ });
266
+
267
+ // Append the last <h3> <div> if any
268
+ if (currentH3Div) {
269
+ currentH2Div.appendChild(currentH3Div);
270
+ }
271
+
272
+ // Append the last <h2> <div> if any
273
+ if (currentH2Div) {
274
+ newContent.appendChild(currentH2Div);
275
+ }
276
+
277
+ // Replace the old body content with the new content
278
+ document.body.innerHTML = '';
279
+ document.body.appendChild(newContent);
280
+
281
+ // Serialize the document back to HTML and return
282
+ const outputHtml = dom.serialize();
283
+ return outputHtml;
284
+ };
285
+
286
+
287
+ exports.getIDDivs = function(html_body) {
288
+ const $ = cheerio.load(html_body, {
289
+ decodeEntities: false
290
+ });
291
+
292
+ const divs = [];
293
+
294
+ $('div').each(function(i, element){
295
+ if ($(this).attr('id') && $(this).attr('id').startsWith('hb-doc-anchor-')) {
296
+ divs.push({id: $(this).attr('id'), html: $(this).html(), text: $(this).text()})
297
+ }
298
+ });
299
+ return divs;
300
+ };
301
+
205
302
  exports.getHTMLFrontmatterHeader = function (html_body) {
206
303
  let response = {
207
304
  fm_header: '',
package/hdoc.js CHANGED
@@ -1,4 +1,4 @@
1
- #!/usr/bin/env node
1
+ #!/usr/bin/env node
2
2
  (async function () {
3
3
  'use strict';
4
4
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "hdoc-tools",
3
- "version": "0.17.33",
3
+ "version": "0.18.0",
4
4
  "description": "Hornbill HDocBook Development Support Tool",
5
5
  "main": "hdoc.js",
6
6
  "bin": {
@@ -37,29 +37,26 @@
37
37
  "dependencies": {
38
38
  "american-british-english-translator": "^0.2.1",
39
39
  "archiver": "7.0.1",
40
- "axios": "^1.3.2",
41
- "axios-retry": "^3.5.0",
42
- "better-sqlite3": "^9.4.0",
43
- "body-parser": "^1.20.1",
40
+ "axios": "^1.7.2",
41
+ "axios-retry": "^4.4.1",
42
+ "better-sqlite3": "^11.1.2",
44
43
  "cheerio": "^1.0.0-rc.12",
45
- "cookie-parser": "^1.4.6",
46
- "dree": "^3.4.2",
47
- "express": "^4.18.2",
48
- "fs-extra": "^11.1.0",
49
- "html-entities": "^2.4.0",
50
- "html-to-text": "^8.2.1",
44
+ "dree": "^5.0.7",
45
+ "express": "^4.19.2",
46
+ "fs-extra": "^11.2.0",
47
+ "html-entities": "^2.5.2",
48
+ "html-to-text": "^9.0.5",
51
49
  "js-yaml": "^4.1.0",
52
- "markdown-it": "^13.0.1",
53
- "markdown-it-container": "^3.0.0",
54
- "markdown-it-front-matter": "^0.2.3",
50
+ "jsdom": "^24.1.0",
51
+ "markdown-it": "14.1.0",
52
+ "markdown-it-container": "^4.0.0",
53
+ "markdown-it-front-matter": "^0.2.4",
55
54
  "mime-types": "^2.1.35",
56
- "multer": "^1.4.5-lts.1",
57
55
  "prompt": "^1.3.0",
58
- "puppeteer": "22.8.2",
59
- "retry": "^0.13.1",
60
- "stream": "0.0.2",
56
+ "puppeteer": "^22.12.1",
57
+ "stream": "0.0.3",
61
58
  "true-case-path": "^2.2.1",
62
59
  "words-count": "^2.0.2",
63
- "xml-formatter": "^3.6.0"
60
+ "xml-formatter": "^3.6.2"
64
61
  }
65
62
  }