hdoc-tools 0.31.0 → 0.32.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/hdoc-build-pdf.js +233 -219
- package/hdoc-build.js +16 -1
- package/hdoc-module.js +54 -0
- package/hdoc-validate.js +14 -0
- package/package.json +1 -1
package/hdoc-build-pdf.js
CHANGED
@@ -1,219 +1,233 @@
|
|
1
|
-
(() => {
|
2
|
-
const axios = require("axios");
|
3
|
-
const cheerio = require("cheerio");
|
4
|
-
const fs = require("fs-extra");
|
5
|
-
const mime = require("mime-types");
|
6
|
-
const path = require("node:path");
|
7
|
-
const hdoc = require(path.join(__dirname, "hdoc-module.js"));
|
8
|
-
|
9
|
-
let hb_logo = "";
|
10
|
-
let footer = "";
|
11
|
-
let header = "";
|
12
|
-
|
13
|
-
const get_footer = (template_path) => {
|
14
|
-
let footer_content = null;
|
15
|
-
try {
|
16
|
-
footer_content = fs.readFileSync(
|
17
|
-
path.join(template_path, "template-footer.html"),
|
18
|
-
"utf8",
|
19
|
-
);
|
20
|
-
} catch (err) {
|
21
|
-
console.error(`Error loading template: ${err}`);
|
22
|
-
}
|
23
|
-
return footer_content;
|
24
|
-
};
|
25
|
-
|
26
|
-
const get_header = (template_path) => {
|
27
|
-
let header_content = null;
|
28
|
-
try {
|
29
|
-
header_content = fs.readFileSync(
|
30
|
-
path.join(template_path, "template-header.html"),
|
31
|
-
"utf8",
|
32
|
-
);
|
33
|
-
} catch (err) {
|
34
|
-
console.error(`Error loading template: ${err}`);
|
35
|
-
}
|
36
|
-
return header_content;
|
37
|
-
};
|
38
|
-
|
39
|
-
exports.process_images = async (file_path, html_source, verbose) => {
|
40
|
-
const book_work_root = file_path.path.replace(file_path.relativePath, "");
|
41
|
-
if (verbose) console.log("Parsing img tags from HTML source");
|
42
|
-
|
43
|
-
let processed_html_source = html_source;
|
44
|
-
// Use cheerio to parse html
|
45
|
-
const $ = cheerio.load(processed_html_source);
|
46
|
-
|
47
|
-
// Get iFrames from HTML, to replace with a tags
|
48
|
-
const iframes = [];
|
49
|
-
const iframe_html = $("iframe")
|
50
|
-
.map(function () {
|
51
|
-
const response = {
|
52
|
-
html: $.html(this),
|
53
|
-
src: $(this).attr("src"),
|
54
|
-
title: $(this).attr("title")
|
55
|
-
? $(this).attr("title")
|
56
|
-
: "No Link Title Provided",
|
57
|
-
};
|
58
|
-
return response;
|
59
|
-
})
|
60
|
-
.get();
|
61
|
-
iframes.push(...iframe_html);
|
62
|
-
for (let i = 0; i < iframes.length; i++) {
|
63
|
-
const link = `<p><a href="${iframes[i].src}">${iframes[i].title}</a></p>`;
|
64
|
-
const regex = new RegExp(
|
65
|
-
`<iframe.*src="${iframes[i].src.replace("/", "\\/")}".*</iframe>`,
|
66
|
-
);
|
67
|
-
processed_html_source = processed_html_source.replace(regex, link);
|
68
|
-
}
|
69
|
-
|
70
|
-
// Get image links from HTML, to embed into the pdf
|
71
|
-
const imgs = [];
|
72
|
-
const srcs = $("img")
|
73
|
-
.map(function (i) {
|
74
|
-
return $(this).attr("src");
|
75
|
-
})
|
76
|
-
.get();
|
77
|
-
imgs.push(...srcs);
|
78
|
-
for (let i = 0; i < imgs.length; i++) {
|
79
|
-
if (!hdoc.valid_url(imgs[i])) {
|
80
|
-
// Internal link
|
81
|
-
const image_path = path.join(
|
82
|
-
book_work_root,
|
83
|
-
imgs[i].replace("_books/", ""),
|
84
|
-
);
|
85
|
-
try {
|
86
|
-
const image_buffer = fs.readFileSync(image_path);
|
87
|
-
const mime_type = mime.lookup(image_path);
|
88
|
-
let image_b64 = image_buffer.toString("base64");
|
89
|
-
image_b64 = `data:${mime_type};base64,${image_b64}`;
|
90
|
-
processed_html_source = processed_html_source.replace(
|
91
|
-
imgs[i],
|
92
|
-
image_b64,
|
93
|
-
);
|
94
|
-
} catch (err) {
|
95
|
-
console.error(
|
96
|
-
"Error reading image from HTML source [",
|
97
|
-
image_path,
|
98
|
-
"] -",
|
99
|
-
err,
|
100
|
-
);
|
101
|
-
return null;
|
102
|
-
}
|
103
|
-
} else {
|
104
|
-
// External Link
|
105
|
-
try {
|
106
|
-
const file_response = await axios.get(imgs[i]
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
let image_b64 =
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
let
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
}
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
1
|
+
(() => {
|
2
|
+
const axios = require("axios");
|
3
|
+
const cheerio = require("cheerio");
|
4
|
+
const fs = require("fs-extra");
|
5
|
+
const mime = require("mime-types");
|
6
|
+
const path = require("node:path");
|
7
|
+
const hdoc = require(path.join(__dirname, "hdoc-module.js"));
|
8
|
+
|
9
|
+
let hb_logo = "";
|
10
|
+
let footer = "";
|
11
|
+
let header = "";
|
12
|
+
|
13
|
+
const get_footer = (template_path) => {
|
14
|
+
let footer_content = null;
|
15
|
+
try {
|
16
|
+
footer_content = fs.readFileSync(
|
17
|
+
path.join(template_path, "template-footer.html"),
|
18
|
+
"utf8",
|
19
|
+
);
|
20
|
+
} catch (err) {
|
21
|
+
console.error(`Error loading template: ${err}`);
|
22
|
+
}
|
23
|
+
return footer_content;
|
24
|
+
};
|
25
|
+
|
26
|
+
const get_header = (template_path) => {
|
27
|
+
let header_content = null;
|
28
|
+
try {
|
29
|
+
header_content = fs.readFileSync(
|
30
|
+
path.join(template_path, "template-header.html"),
|
31
|
+
"utf8",
|
32
|
+
);
|
33
|
+
} catch (err) {
|
34
|
+
console.error(`Error loading template: ${err}`);
|
35
|
+
}
|
36
|
+
return header_content;
|
37
|
+
};
|
38
|
+
|
39
|
+
exports.process_images = async (file_path, html_source, verbose) => {
|
40
|
+
const book_work_root = file_path.path.replace(file_path.relativePath, "");
|
41
|
+
if (verbose) console.log("Parsing img tags from HTML source");
|
42
|
+
|
43
|
+
let processed_html_source = html_source;
|
44
|
+
// Use cheerio to parse html
|
45
|
+
const $ = cheerio.load(processed_html_source);
|
46
|
+
|
47
|
+
// Get iFrames from HTML, to replace with a tags
|
48
|
+
const iframes = [];
|
49
|
+
const iframe_html = $("iframe")
|
50
|
+
.map(function () {
|
51
|
+
const response = {
|
52
|
+
html: $.html(this),
|
53
|
+
src: $(this).attr("src"),
|
54
|
+
title: $(this).attr("title")
|
55
|
+
? $(this).attr("title")
|
56
|
+
: "No Link Title Provided",
|
57
|
+
};
|
58
|
+
return response;
|
59
|
+
})
|
60
|
+
.get();
|
61
|
+
iframes.push(...iframe_html);
|
62
|
+
for (let i = 0; i < iframes.length; i++) {
|
63
|
+
const link = `<p><a href="${iframes[i].src}">${iframes[i].title}</a></p>`;
|
64
|
+
const regex = new RegExp(
|
65
|
+
`<iframe.*src="${iframes[i].src.replace("/", "\\/")}".*</iframe>`,
|
66
|
+
);
|
67
|
+
processed_html_source = processed_html_source.replace(regex, link);
|
68
|
+
}
|
69
|
+
|
70
|
+
// Get image links from HTML, to embed into the pdf
|
71
|
+
const imgs = [];
|
72
|
+
const srcs = $("img")
|
73
|
+
.map(function (i) {
|
74
|
+
return $(this).attr("src");
|
75
|
+
})
|
76
|
+
.get();
|
77
|
+
imgs.push(...srcs);
|
78
|
+
for (let i = 0; i < imgs.length; i++) {
|
79
|
+
if (!hdoc.valid_url(imgs[i])) {
|
80
|
+
// Internal link
|
81
|
+
const image_path = path.join(
|
82
|
+
book_work_root,
|
83
|
+
imgs[i].replace("_books/", ""),
|
84
|
+
);
|
85
|
+
try {
|
86
|
+
const image_buffer = fs.readFileSync(image_path);
|
87
|
+
const mime_type = mime.lookup(image_path);
|
88
|
+
let image_b64 = image_buffer.toString("base64");
|
89
|
+
image_b64 = `data:${mime_type};base64,${image_b64}`;
|
90
|
+
processed_html_source = processed_html_source.replace(
|
91
|
+
imgs[i],
|
92
|
+
image_b64,
|
93
|
+
);
|
94
|
+
} catch (err) {
|
95
|
+
console.error(
|
96
|
+
"Error reading image from HTML source [",
|
97
|
+
image_path,
|
98
|
+
"] -",
|
99
|
+
err,
|
100
|
+
);
|
101
|
+
return null;
|
102
|
+
}
|
103
|
+
} else {
|
104
|
+
// External Link
|
105
|
+
try {
|
106
|
+
const file_response = await axios.get(imgs[i], {
|
107
|
+
responseType: 'arraybuffer'
|
108
|
+
});
|
109
|
+
if (file_response.status === 200) {
|
110
|
+
let image_b64 = imageEncode(file_response.data, file_response.headers['content-type']);
|
111
|
+
|
112
|
+
|
113
|
+
const regexQ = `<img\\s+[^>]*src=["']${imgs[i].replaceAll('/', '\\/').replaceAll('.', '\\.')}["'][^>]*>`;
|
114
|
+
const regex = new RegExp(regexQ);
|
115
|
+
|
116
|
+
const found_img_tag = processed_html_source.match(regex);
|
117
|
+
const new_img_tag = found_img_tag[0].replace(imgs[i], image_b64);
|
118
|
+
|
119
|
+
processed_html_source = processed_html_source.replace(
|
120
|
+
found_img_tag,
|
121
|
+
new_img_tag,
|
122
|
+
);
|
123
|
+
} else {
|
124
|
+
throw `Unexpected Status ${file_response.status}`;
|
125
|
+
}
|
126
|
+
} catch (err) {
|
127
|
+
console.error(
|
128
|
+
`Error downloading external source [${imgs[i]}] - ${err}`,
|
129
|
+
);
|
130
|
+
}
|
131
|
+
}
|
132
|
+
}
|
133
|
+
|
134
|
+
return processed_html_source;
|
135
|
+
};
|
136
|
+
|
137
|
+
const imageEncode = (arrayBuffer, mimeType) => {
|
138
|
+
let u8 = new Uint8Array(arrayBuffer)
|
139
|
+
let b64encoded = btoa([].reduce.call(new Uint8Array(arrayBuffer),function(p,c){return p+String.fromCharCode(c)},''))
|
140
|
+
let mimetype=`image/${mimeType}`
|
141
|
+
return "data:"+mimetype+";base64,"+b64encoded
|
142
|
+
}
|
143
|
+
|
144
|
+
exports.generate_pdf = async (
|
145
|
+
browser,
|
146
|
+
pdf_template_path,
|
147
|
+
pdf_template_content,
|
148
|
+
book_config,
|
149
|
+
html_source,
|
150
|
+
target_file,
|
151
|
+
css_templates,
|
152
|
+
verbose = false,
|
153
|
+
) => {
|
154
|
+
let pdf_size = 0;
|
155
|
+
// Cache footer
|
156
|
+
if (footer === "") footer = get_footer(pdf_template_path);
|
157
|
+
|
158
|
+
// Read svg logo file into buffer, convert to B64 string
|
159
|
+
if (hb_logo === "") {
|
160
|
+
const hb_logo_path = path.join(
|
161
|
+
pdf_template_path,
|
162
|
+
"images",
|
163
|
+
"hornbill-logo-full.svg",
|
164
|
+
);
|
165
|
+
try {
|
166
|
+
const hb_logo_file_buffer = fs.readFileSync(hb_logo_path);
|
167
|
+
hb_logo = hb_logo_file_buffer.toString("base64");
|
168
|
+
hb_logo = `data:image/svg+xml;base64,${hb_logo}`;
|
169
|
+
} catch (err) {
|
170
|
+
console.error("Error reading logo from template:", err);
|
171
|
+
return pdf_size;
|
172
|
+
}
|
173
|
+
}
|
174
|
+
|
175
|
+
// Cache header
|
176
|
+
if (header === "") {
|
177
|
+
header = get_header(pdf_template_path)
|
178
|
+
.replace("{{book_title}}", book_config.title)
|
179
|
+
.replace("{{hb_logo}}", hb_logo);
|
180
|
+
}
|
181
|
+
|
182
|
+
const processed_html_source = pdf_template_content
|
183
|
+
.replace("{{book_title}}", book_config.title)
|
184
|
+
.replace("{{document_content}}", html_source);
|
185
|
+
|
186
|
+
const page = await browser.newPage();
|
187
|
+
|
188
|
+
// To reflect CSS used for screens instead of print
|
189
|
+
await page.emulateMediaType("screen");
|
190
|
+
|
191
|
+
// Set HTML content from HTML source
|
192
|
+
await page.setContent(processed_html_source, {
|
193
|
+
waitUntil: "domcontentloaded",
|
194
|
+
});
|
195
|
+
for (let i = 0; i < css_templates.length; i++) {
|
196
|
+
try {
|
197
|
+
await page.addStyleTag({
|
198
|
+
content: css_templates[i],
|
199
|
+
});
|
200
|
+
} catch (e) {
|
201
|
+
console.error(`Error applying template for [${target_file}]: ${e}`);
|
202
|
+
}
|
203
|
+
}
|
204
|
+
|
205
|
+
try {
|
206
|
+
const pdf_gen = await page.pdf({
|
207
|
+
path: target_file,
|
208
|
+
printBackground: true,
|
209
|
+
format: "A4",
|
210
|
+
displayHeaderFooter: true,
|
211
|
+
headerTemplate: header,
|
212
|
+
footerTemplate: footer,
|
213
|
+
margin: {
|
214
|
+
top: "90px",
|
215
|
+
right: "30px",
|
216
|
+
bottom: "60px",
|
217
|
+
left: "30px",
|
218
|
+
},
|
219
|
+
timeout: 0,
|
220
|
+
});
|
221
|
+
const currdate = new Date();
|
222
|
+
const datetime = currdate.toISOString();
|
223
|
+
if (verbose)
|
224
|
+
console.log(`[${datetime}] PDF generation success: ${target_file}`);
|
225
|
+
|
226
|
+
pdf_size = pdf_gen.byteLength;
|
227
|
+
} catch (err) {
|
228
|
+
console.error(`Error generating PDF ${target_file} - ${err}`);
|
229
|
+
}
|
230
|
+
await page.close();
|
231
|
+
return pdf_size;
|
232
|
+
};
|
233
|
+
})();
|
package/hdoc-build.js
CHANGED
@@ -60,6 +60,7 @@
|
|
60
60
|
let conversion_failed = 0;
|
61
61
|
let doc_header_template = "";
|
62
62
|
let doc_header_template_non_git = "";
|
63
|
+
let github_repo_details = {};
|
63
64
|
let global_source_path = "";
|
64
65
|
let pdf_created = 0;
|
65
66
|
let pdf_enable = false;
|
@@ -306,7 +307,8 @@
|
|
306
307
|
hdocbook_config.publicSource,
|
307
308
|
file_path.relativePath,
|
308
309
|
);
|
309
|
-
|
310
|
+
|
311
|
+
const contributors = await hdoc.get_github_contributors(
|
310
312
|
github_paths.api_path,
|
311
313
|
git_token,
|
312
314
|
);
|
@@ -1184,6 +1186,18 @@
|
|
1184
1186
|
prods_supported = prods.prods_supported;
|
1185
1187
|
}
|
1186
1188
|
|
1189
|
+
const clean_repo = hdocbook_config.publicSource.endsWith("/") ? hdocbook_config.publicSource.slice(0, -1) : hdocbook_config.publicSource;
|
1190
|
+
const api_path = clean_repo.replace(
|
1191
|
+
"https://github.com/",
|
1192
|
+
"https://api.github.com/repos/",
|
1193
|
+
);
|
1194
|
+
|
1195
|
+
// Get github repo details
|
1196
|
+
github_repo_details = await hdoc.get_github_repo_details( api_path, git_token );
|
1197
|
+
if (github_repo_details.success) {
|
1198
|
+
console.warn(`Unable to retrieve GitHub Repository details: ${github_repo_details.error}`);
|
1199
|
+
}
|
1200
|
+
|
1187
1201
|
if (!validate) {
|
1188
1202
|
console.log("Caching CSS for PDF generation...");
|
1189
1203
|
const css_files = [
|
@@ -1394,6 +1408,7 @@
|
|
1394
1408
|
gen_exclude,
|
1395
1409
|
redirects,
|
1396
1410
|
draft_links,
|
1411
|
+
github_repo_details.data.private,
|
1397
1412
|
);
|
1398
1413
|
if (!validation_success) {
|
1399
1414
|
const end_time = Date.now();
|
package/hdoc-module.js
CHANGED
@@ -434,6 +434,60 @@
|
|
434
434
|
return github_paths;
|
435
435
|
};
|
436
436
|
|
437
|
+
exports.get_github_repo_details = async (
|
438
|
+
github_url,
|
439
|
+
github_api_token,
|
440
|
+
) => {
|
441
|
+
const response = {
|
442
|
+
success: false,
|
443
|
+
error: "",
|
444
|
+
data: {},
|
445
|
+
private: false
|
446
|
+
};
|
447
|
+
const request_options = {
|
448
|
+
headers: {
|
449
|
+
"User-Agent": "HornbillDocsBuild",
|
450
|
+
"Cache-Control": "no-cache",
|
451
|
+
Host: "api.github.com",
|
452
|
+
Accept: "application/json",
|
453
|
+
},
|
454
|
+
timeout: 5000,
|
455
|
+
};
|
456
|
+
if (github_api_token !== "") {
|
457
|
+
request_options.headers.authorization = `Bearer ${github_api_token}`;
|
458
|
+
}
|
459
|
+
|
460
|
+
let github_response;
|
461
|
+
try {
|
462
|
+
github_response = await axios.get(github_url, request_options);
|
463
|
+
if (retried) {
|
464
|
+
retried = false;
|
465
|
+
console.log("API call retry success!");
|
466
|
+
}
|
467
|
+
} catch (err) {
|
468
|
+
if (err.response) {
|
469
|
+
if (err.response.status !== 403 && err.response.status !== 401) {
|
470
|
+
response.error = err;
|
471
|
+
return response;
|
472
|
+
}
|
473
|
+
github_response = err.response;
|
474
|
+
} else {
|
475
|
+
response.error = `Unexpected response from GitHub for [${github_url}:\n${JSON.stringify(
|
476
|
+
err,
|
477
|
+
)}]`;
|
478
|
+
}
|
479
|
+
}
|
480
|
+
if (github_response.status === 200) {
|
481
|
+
response.success = true;
|
482
|
+
response.data = github_response.data;
|
483
|
+
response.private = github_response.data.private;
|
484
|
+
} else {
|
485
|
+
// Is it a 404 or 403?
|
486
|
+
response.error = `${github_response.status} : ${data.message}`;
|
487
|
+
}
|
488
|
+
return response;
|
489
|
+
};
|
490
|
+
|
437
491
|
exports.get_github_contributors = async (
|
438
492
|
github_url,
|
439
493
|
github_api_token,
|
package/hdoc-validate.js
CHANGED
@@ -28,6 +28,7 @@ const e = require("express");
|
|
28
28
|
const md_to_validate = [];
|
29
29
|
const exclude_links = {};
|
30
30
|
const exclude_spellcheck = {};
|
31
|
+
let private_repo = false;
|
31
32
|
let redirects = {};
|
32
33
|
const exclude_h1_count = {};
|
33
34
|
const exclude_spellcheck_output = [];
|
@@ -545,6 +546,17 @@ const e = require("express");
|
|
545
546
|
continue;
|
546
547
|
}
|
547
548
|
|
549
|
+
if (
|
550
|
+
links[i].toLowerCase().includes("docs-internal.hornbill.com") &&
|
551
|
+
markdown_paths.relativePath.includes('/_inline/') &&
|
552
|
+
!private_repo
|
553
|
+
) {
|
554
|
+
// Is the parent book in a public repo? If so, flag this as an error.
|
555
|
+
const error_message = processErrorMessage(`Hornbill docs-internal links should not be used in public book inline content: ${links[i]}`, markdown_paths.relativePath, markdown_content, links[i]);
|
556
|
+
errors[htmlFile.relativePath].push( error_message );
|
557
|
+
continue;
|
558
|
+
}
|
559
|
+
|
548
560
|
try {
|
549
561
|
await axios({
|
550
562
|
url: links[i],
|
@@ -810,9 +822,11 @@ const e = require("express");
|
|
810
822
|
gen_exclude,
|
811
823
|
gen_redirects,
|
812
824
|
draft_links,
|
825
|
+
is_private,
|
813
826
|
) => {
|
814
827
|
console.log("Performing Validation and Building SEO Link List...");
|
815
828
|
redirects = gen_redirects;
|
829
|
+
private_repo = is_private;
|
816
830
|
|
817
831
|
// Get a list of HTML files in source_path
|
818
832
|
dree.scan(source_path, dreeOptions, fileContentCallback);
|