hdoc-tools 0.32.0 → 0.33.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/hdoc-build-pdf.js +233 -219
- package/hdoc-build.js +13 -9
- package/package.json +1 -1
package/hdoc-build-pdf.js
CHANGED
@@ -1,219 +1,233 @@
|
|
1
|
-
(() => {
|
2
|
-
const axios = require("axios");
|
3
|
-
const cheerio = require("cheerio");
|
4
|
-
const fs = require("fs-extra");
|
5
|
-
const mime = require("mime-types");
|
6
|
-
const path = require("node:path");
|
7
|
-
const hdoc = require(path.join(__dirname, "hdoc-module.js"));
|
8
|
-
|
9
|
-
let hb_logo = "";
|
10
|
-
let footer = "";
|
11
|
-
let header = "";
|
12
|
-
|
13
|
-
const get_footer = (template_path) => {
|
14
|
-
let footer_content = null;
|
15
|
-
try {
|
16
|
-
footer_content = fs.readFileSync(
|
17
|
-
path.join(template_path, "template-footer.html"),
|
18
|
-
"utf8",
|
19
|
-
);
|
20
|
-
} catch (err) {
|
21
|
-
console.error(`Error loading template: ${err}`);
|
22
|
-
}
|
23
|
-
return footer_content;
|
24
|
-
};
|
25
|
-
|
26
|
-
const get_header = (template_path) => {
|
27
|
-
let header_content = null;
|
28
|
-
try {
|
29
|
-
header_content = fs.readFileSync(
|
30
|
-
path.join(template_path, "template-header.html"),
|
31
|
-
"utf8",
|
32
|
-
);
|
33
|
-
} catch (err) {
|
34
|
-
console.error(`Error loading template: ${err}`);
|
35
|
-
}
|
36
|
-
return header_content;
|
37
|
-
};
|
38
|
-
|
39
|
-
exports.process_images = async (file_path, html_source, verbose) => {
|
40
|
-
const book_work_root = file_path.path.replace(file_path.relativePath, "");
|
41
|
-
if (verbose) console.log("Parsing img tags from HTML source");
|
42
|
-
|
43
|
-
let processed_html_source = html_source;
|
44
|
-
// Use cheerio to parse html
|
45
|
-
const $ = cheerio.load(processed_html_source);
|
46
|
-
|
47
|
-
// Get iFrames from HTML, to replace with a tags
|
48
|
-
const iframes = [];
|
49
|
-
const iframe_html = $("iframe")
|
50
|
-
.map(function () {
|
51
|
-
const response = {
|
52
|
-
html: $.html(this),
|
53
|
-
src: $(this).attr("src"),
|
54
|
-
title: $(this).attr("title")
|
55
|
-
? $(this).attr("title")
|
56
|
-
: "No Link Title Provided",
|
57
|
-
};
|
58
|
-
return response;
|
59
|
-
})
|
60
|
-
.get();
|
61
|
-
iframes.push(...iframe_html);
|
62
|
-
for (let i = 0; i < iframes.length; i++) {
|
63
|
-
const link = `<p><a href="${iframes[i].src}">${iframes[i].title}</a></p>`;
|
64
|
-
const regex = new RegExp(
|
65
|
-
`<iframe.*src="${iframes[i].src.replace("/", "\\/")}".*</iframe>`,
|
66
|
-
);
|
67
|
-
processed_html_source = processed_html_source.replace(regex, link);
|
68
|
-
}
|
69
|
-
|
70
|
-
// Get image links from HTML, to embed into the pdf
|
71
|
-
const imgs = [];
|
72
|
-
const srcs = $("img")
|
73
|
-
.map(function (i) {
|
74
|
-
return $(this).attr("src");
|
75
|
-
})
|
76
|
-
.get();
|
77
|
-
imgs.push(...srcs);
|
78
|
-
for (let i = 0; i < imgs.length; i++) {
|
79
|
-
if (!hdoc.valid_url(imgs[i])) {
|
80
|
-
// Internal link
|
81
|
-
const image_path = path.join(
|
82
|
-
book_work_root,
|
83
|
-
imgs[i].replace("_books/", ""),
|
84
|
-
);
|
85
|
-
try {
|
86
|
-
const image_buffer = fs.readFileSync(image_path);
|
87
|
-
const mime_type = mime.lookup(image_path);
|
88
|
-
let image_b64 = image_buffer.toString("base64");
|
89
|
-
image_b64 = `data:${mime_type};base64,${image_b64}`;
|
90
|
-
processed_html_source = processed_html_source.replace(
|
91
|
-
imgs[i],
|
92
|
-
image_b64,
|
93
|
-
);
|
94
|
-
} catch (err) {
|
95
|
-
console.error(
|
96
|
-
"Error reading image from HTML source [",
|
97
|
-
image_path,
|
98
|
-
"] -",
|
99
|
-
err,
|
100
|
-
);
|
101
|
-
return null;
|
102
|
-
}
|
103
|
-
} else {
|
104
|
-
// External Link
|
105
|
-
try {
|
106
|
-
const file_response = await axios.get(imgs[i]
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
let image_b64 =
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
let
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
}
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
1
|
+
(() => {
|
2
|
+
const axios = require("axios");
|
3
|
+
const cheerio = require("cheerio");
|
4
|
+
const fs = require("fs-extra");
|
5
|
+
const mime = require("mime-types");
|
6
|
+
const path = require("node:path");
|
7
|
+
const hdoc = require(path.join(__dirname, "hdoc-module.js"));
|
8
|
+
|
9
|
+
let hb_logo = "";
|
10
|
+
let footer = "";
|
11
|
+
let header = "";
|
12
|
+
|
13
|
+
const get_footer = (template_path) => {
|
14
|
+
let footer_content = null;
|
15
|
+
try {
|
16
|
+
footer_content = fs.readFileSync(
|
17
|
+
path.join(template_path, "template-footer.html"),
|
18
|
+
"utf8",
|
19
|
+
);
|
20
|
+
} catch (err) {
|
21
|
+
console.error(`Error loading template: ${err}`);
|
22
|
+
}
|
23
|
+
return footer_content;
|
24
|
+
};
|
25
|
+
|
26
|
+
const get_header = (template_path) => {
|
27
|
+
let header_content = null;
|
28
|
+
try {
|
29
|
+
header_content = fs.readFileSync(
|
30
|
+
path.join(template_path, "template-header.html"),
|
31
|
+
"utf8",
|
32
|
+
);
|
33
|
+
} catch (err) {
|
34
|
+
console.error(`Error loading template: ${err}`);
|
35
|
+
}
|
36
|
+
return header_content;
|
37
|
+
};
|
38
|
+
|
39
|
+
exports.process_images = async (file_path, html_source, verbose) => {
|
40
|
+
const book_work_root = file_path.path.replace(file_path.relativePath, "");
|
41
|
+
if (verbose) console.log("Parsing img tags from HTML source");
|
42
|
+
|
43
|
+
let processed_html_source = html_source;
|
44
|
+
// Use cheerio to parse html
|
45
|
+
const $ = cheerio.load(processed_html_source);
|
46
|
+
|
47
|
+
// Get iFrames from HTML, to replace with a tags
|
48
|
+
const iframes = [];
|
49
|
+
const iframe_html = $("iframe")
|
50
|
+
.map(function () {
|
51
|
+
const response = {
|
52
|
+
html: $.html(this),
|
53
|
+
src: $(this).attr("src"),
|
54
|
+
title: $(this).attr("title")
|
55
|
+
? $(this).attr("title")
|
56
|
+
: "No Link Title Provided",
|
57
|
+
};
|
58
|
+
return response;
|
59
|
+
})
|
60
|
+
.get();
|
61
|
+
iframes.push(...iframe_html);
|
62
|
+
for (let i = 0; i < iframes.length; i++) {
|
63
|
+
const link = `<p><a href="${iframes[i].src}">${iframes[i].title}</a></p>`;
|
64
|
+
const regex = new RegExp(
|
65
|
+
`<iframe.*src="${iframes[i].src.replace("/", "\\/")}".*</iframe>`,
|
66
|
+
);
|
67
|
+
processed_html_source = processed_html_source.replace(regex, link);
|
68
|
+
}
|
69
|
+
|
70
|
+
// Get image links from HTML, to embed into the pdf
|
71
|
+
const imgs = [];
|
72
|
+
const srcs = $("img")
|
73
|
+
.map(function (i) {
|
74
|
+
return $(this).attr("src");
|
75
|
+
})
|
76
|
+
.get();
|
77
|
+
imgs.push(...srcs);
|
78
|
+
for (let i = 0; i < imgs.length; i++) {
|
79
|
+
if (!hdoc.valid_url(imgs[i])) {
|
80
|
+
// Internal link
|
81
|
+
const image_path = path.join(
|
82
|
+
book_work_root,
|
83
|
+
imgs[i].replace("_books/", ""),
|
84
|
+
);
|
85
|
+
try {
|
86
|
+
const image_buffer = fs.readFileSync(image_path);
|
87
|
+
const mime_type = mime.lookup(image_path);
|
88
|
+
let image_b64 = image_buffer.toString("base64");
|
89
|
+
image_b64 = `data:${mime_type};base64,${image_b64}`;
|
90
|
+
processed_html_source = processed_html_source.replace(
|
91
|
+
imgs[i],
|
92
|
+
image_b64,
|
93
|
+
);
|
94
|
+
} catch (err) {
|
95
|
+
console.error(
|
96
|
+
"Error reading image from HTML source [",
|
97
|
+
image_path,
|
98
|
+
"] -",
|
99
|
+
err,
|
100
|
+
);
|
101
|
+
return null;
|
102
|
+
}
|
103
|
+
} else {
|
104
|
+
// External Link
|
105
|
+
try {
|
106
|
+
const file_response = await axios.get(imgs[i], {
|
107
|
+
responseType: 'arraybuffer'
|
108
|
+
});
|
109
|
+
if (file_response.status === 200) {
|
110
|
+
let image_b64 = imageEncode(file_response.data, file_response.headers['content-type']);
|
111
|
+
|
112
|
+
|
113
|
+
const regexQ = `<img\\s+[^>]*src=["']${imgs[i].replaceAll('/', '\\/').replaceAll('.', '\\.')}["'][^>]*>`;
|
114
|
+
const regex = new RegExp(regexQ);
|
115
|
+
|
116
|
+
const found_img_tag = processed_html_source.match(regex);
|
117
|
+
const new_img_tag = found_img_tag[0].replace(imgs[i], image_b64);
|
118
|
+
|
119
|
+
processed_html_source = processed_html_source.replace(
|
120
|
+
found_img_tag,
|
121
|
+
new_img_tag,
|
122
|
+
);
|
123
|
+
} else {
|
124
|
+
throw `Unexpected Status ${file_response.status}`;
|
125
|
+
}
|
126
|
+
} catch (err) {
|
127
|
+
console.error(
|
128
|
+
`Error downloading external source [${imgs[i]}] - ${err}`,
|
129
|
+
);
|
130
|
+
}
|
131
|
+
}
|
132
|
+
}
|
133
|
+
|
134
|
+
return processed_html_source;
|
135
|
+
};
|
136
|
+
|
137
|
+
const imageEncode = (arrayBuffer, mimeType) => {
|
138
|
+
let u8 = new Uint8Array(arrayBuffer)
|
139
|
+
let b64encoded = btoa([].reduce.call(new Uint8Array(arrayBuffer),function(p,c){return p+String.fromCharCode(c)},''))
|
140
|
+
let mimetype=`image/${mimeType}`
|
141
|
+
return "data:"+mimetype+";base64,"+b64encoded
|
142
|
+
}
|
143
|
+
|
144
|
+
exports.generate_pdf = async (
|
145
|
+
browser,
|
146
|
+
pdf_template_path,
|
147
|
+
pdf_template_content,
|
148
|
+
book_config,
|
149
|
+
html_source,
|
150
|
+
target_file,
|
151
|
+
css_templates,
|
152
|
+
verbose = false,
|
153
|
+
) => {
|
154
|
+
let pdf_size = 0;
|
155
|
+
// Cache footer
|
156
|
+
if (footer === "") footer = get_footer(pdf_template_path);
|
157
|
+
|
158
|
+
// Read svg logo file into buffer, convert to B64 string
|
159
|
+
if (hb_logo === "") {
|
160
|
+
const hb_logo_path = path.join(
|
161
|
+
pdf_template_path,
|
162
|
+
"images",
|
163
|
+
"hornbill-logo-full.svg",
|
164
|
+
);
|
165
|
+
try {
|
166
|
+
const hb_logo_file_buffer = fs.readFileSync(hb_logo_path);
|
167
|
+
hb_logo = hb_logo_file_buffer.toString("base64");
|
168
|
+
hb_logo = `data:image/svg+xml;base64,${hb_logo}`;
|
169
|
+
} catch (err) {
|
170
|
+
console.error("Error reading logo from template:", err);
|
171
|
+
return pdf_size;
|
172
|
+
}
|
173
|
+
}
|
174
|
+
|
175
|
+
// Cache header
|
176
|
+
if (header === "") {
|
177
|
+
header = get_header(pdf_template_path)
|
178
|
+
.replace("{{book_title}}", book_config.title)
|
179
|
+
.replace("{{hb_logo}}", hb_logo);
|
180
|
+
}
|
181
|
+
|
182
|
+
const processed_html_source = pdf_template_content
|
183
|
+
.replace("{{book_title}}", book_config.title)
|
184
|
+
.replace("{{document_content}}", html_source);
|
185
|
+
|
186
|
+
const page = await browser.newPage();
|
187
|
+
|
188
|
+
// To reflect CSS used for screens instead of print
|
189
|
+
await page.emulateMediaType("screen");
|
190
|
+
|
191
|
+
// Set HTML content from HTML source
|
192
|
+
await page.setContent(processed_html_source, {
|
193
|
+
waitUntil: "domcontentloaded",
|
194
|
+
});
|
195
|
+
for (let i = 0; i < css_templates.length; i++) {
|
196
|
+
try {
|
197
|
+
await page.addStyleTag({
|
198
|
+
content: css_templates[i],
|
199
|
+
});
|
200
|
+
} catch (e) {
|
201
|
+
console.error(`Error applying template for [${target_file}]: ${e}`);
|
202
|
+
}
|
203
|
+
}
|
204
|
+
|
205
|
+
try {
|
206
|
+
const pdf_gen = await page.pdf({
|
207
|
+
path: target_file,
|
208
|
+
printBackground: true,
|
209
|
+
format: "A4",
|
210
|
+
displayHeaderFooter: true,
|
211
|
+
headerTemplate: header,
|
212
|
+
footerTemplate: footer,
|
213
|
+
margin: {
|
214
|
+
top: "90px",
|
215
|
+
right: "30px",
|
216
|
+
bottom: "60px",
|
217
|
+
left: "30px",
|
218
|
+
},
|
219
|
+
timeout: 0,
|
220
|
+
});
|
221
|
+
const currdate = new Date();
|
222
|
+
const datetime = currdate.toISOString();
|
223
|
+
if (verbose)
|
224
|
+
console.log(`[${datetime}] PDF generation success: ${target_file}`);
|
225
|
+
|
226
|
+
pdf_size = pdf_gen.byteLength;
|
227
|
+
} catch (err) {
|
228
|
+
console.error(`Error generating PDF ${target_file} - ${err}`);
|
229
|
+
}
|
230
|
+
await page.close();
|
231
|
+
return pdf_size;
|
232
|
+
};
|
233
|
+
})();
|
package/hdoc-build.js
CHANGED
@@ -40,7 +40,8 @@
|
|
40
40
|
const pdf_template_file_path = path.join(pdf_template_path, "template.html");
|
41
41
|
const regex_version = /^[0-9]{1,3}[.][0-9]{1,3}[.][0-9]{1,6}$/;
|
42
42
|
const h1_pattern = /(<h1.*?>)\s*.*\s*(.*<\/h1>)/;
|
43
|
-
const regex_filename = /^[a-z]
|
43
|
+
const regex_filename = /^[a-z]+[-a-z0-9]+[a-z0-9]$/;
|
44
|
+
const regex_filename_img = /^[a-z]+[_\-a-z0-9]+[a-z0-9]$/;
|
44
45
|
|
45
46
|
const built_file_hashes = [];
|
46
47
|
const css_templates = [];
|
@@ -1044,15 +1045,18 @@
|
|
1044
1045
|
const filename_validation_callback = (element) => {
|
1045
1046
|
if (element.relativePath.startsWith("_inline/")) return;
|
1046
1047
|
if (element.name.toLowerCase() === ".ds_store") return;
|
1047
|
-
if (
|
1048
|
-
|
1049
|
-
element.name === "description_ext.md"
|
1050
|
-
)
|
1051
|
-
return;
|
1052
|
-
if (image_extensions.includes(element.extension)) return;
|
1048
|
+
if (element.name === "article_ext.md" || element.name === "description_ext.md" ) return;
|
1049
|
+
|
1053
1050
|
const file_no_ext = element.name.replace(`.${element.extension}`, "");
|
1054
|
-
|
1055
|
-
|
1051
|
+
|
1052
|
+
if (image_extensions.includes(element.extension)) {
|
1053
|
+
if (!file_no_ext.match(regex_filename_img)) {
|
1054
|
+
errors_filename.push(element.relativePath);
|
1055
|
+
}
|
1056
|
+
return;
|
1057
|
+
}
|
1058
|
+
|
1059
|
+
if (!file_no_ext.match(regex_filename)) errors_filename.push(element.relativePath);
|
1056
1060
|
};
|
1057
1061
|
|
1058
1062
|
const dreeOptions = {
|