hdoc-tools 0.32.0 → 0.33.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/hdoc-build-pdf.js CHANGED
@@ -1,219 +1,233 @@
1
- (() => {
2
- const axios = require("axios");
3
- const cheerio = require("cheerio");
4
- const fs = require("fs-extra");
5
- const mime = require("mime-types");
6
- const path = require("node:path");
7
- const hdoc = require(path.join(__dirname, "hdoc-module.js"));
8
-
9
- let hb_logo = "";
10
- let footer = "";
11
- let header = "";
12
-
13
- const get_footer = (template_path) => {
14
- let footer_content = null;
15
- try {
16
- footer_content = fs.readFileSync(
17
- path.join(template_path, "template-footer.html"),
18
- "utf8",
19
- );
20
- } catch (err) {
21
- console.error(`Error loading template: ${err}`);
22
- }
23
- return footer_content;
24
- };
25
-
26
- const get_header = (template_path) => {
27
- let header_content = null;
28
- try {
29
- header_content = fs.readFileSync(
30
- path.join(template_path, "template-header.html"),
31
- "utf8",
32
- );
33
- } catch (err) {
34
- console.error(`Error loading template: ${err}`);
35
- }
36
- return header_content;
37
- };
38
-
39
- exports.process_images = async (file_path, html_source, verbose) => {
40
- const book_work_root = file_path.path.replace(file_path.relativePath, "");
41
- if (verbose) console.log("Parsing img tags from HTML source");
42
-
43
- let processed_html_source = html_source;
44
- // Use cheerio to parse html
45
- const $ = cheerio.load(processed_html_source);
46
-
47
- // Get iFrames from HTML, to replace with a tags
48
- const iframes = [];
49
- const iframe_html = $("iframe")
50
- .map(function () {
51
- const response = {
52
- html: $.html(this),
53
- src: $(this).attr("src"),
54
- title: $(this).attr("title")
55
- ? $(this).attr("title")
56
- : "No Link Title Provided",
57
- };
58
- return response;
59
- })
60
- .get();
61
- iframes.push(...iframe_html);
62
- for (let i = 0; i < iframes.length; i++) {
63
- const link = `<p><a href="${iframes[i].src}">${iframes[i].title}</a></p>`;
64
- const regex = new RegExp(
65
- `<iframe.*src="${iframes[i].src.replace("/", "\\/")}".*</iframe>`,
66
- );
67
- processed_html_source = processed_html_source.replace(regex, link);
68
- }
69
-
70
- // Get image links from HTML, to embed into the pdf
71
- const imgs = [];
72
- const srcs = $("img")
73
- .map(function (i) {
74
- return $(this).attr("src");
75
- })
76
- .get();
77
- imgs.push(...srcs);
78
- for (let i = 0; i < imgs.length; i++) {
79
- if (!hdoc.valid_url(imgs[i])) {
80
- // Internal link
81
- const image_path = path.join(
82
- book_work_root,
83
- imgs[i].replace("_books/", ""),
84
- );
85
- try {
86
- const image_buffer = fs.readFileSync(image_path);
87
- const mime_type = mime.lookup(image_path);
88
- let image_b64 = image_buffer.toString("base64");
89
- image_b64 = `data:${mime_type};base64,${image_b64}`;
90
- processed_html_source = processed_html_source.replace(
91
- imgs[i],
92
- image_b64,
93
- );
94
- } catch (err) {
95
- console.error(
96
- "Error reading image from HTML source [",
97
- image_path,
98
- "] -",
99
- err,
100
- );
101
- return null;
102
- }
103
- } else {
104
- // External Link
105
- try {
106
- const file_response = await axios.get(imgs[i]);
107
- if (file_response.status === 200) {
108
- const image_buffer = file_response.data;
109
- const mime_type = mime.lookup(imgs[i]);
110
- let image_b64 = image_buffer.toString("base64");
111
- image_b64 = `data:${mime_type};base64,${image_b64}`;
112
- processed_html_source = processed_html_source.replace(
113
- imgs[i],
114
- image_b64,
115
- );
116
- } else {
117
- throw `Unexpected Status ${file_response.status}`;
118
- }
119
- } catch (err) {
120
- console.error(
121
- `Error downloading external source [${imgs[i]}] - ${err}`,
122
- );
123
- }
124
- }
125
- }
126
-
127
- return processed_html_source;
128
- };
129
-
130
- exports.generate_pdf = async (
131
- browser,
132
- pdf_template_path,
133
- pdf_template_content,
134
- book_config,
135
- html_source,
136
- target_file,
137
- css_templates,
138
- verbose = false,
139
- ) => {
140
- let pdf_size = 0;
141
- // Cache footer
142
- if (footer === "") footer = get_footer(pdf_template_path);
143
-
144
- // Read svg logo file into buffer, convert to B64 string
145
- if (hb_logo === "") {
146
- const hb_logo_path = path.join(
147
- pdf_template_path,
148
- "images",
149
- "hornbill-logo-full.svg",
150
- );
151
- try {
152
- const hb_logo_file_buffer = fs.readFileSync(hb_logo_path);
153
- hb_logo = hb_logo_file_buffer.toString("base64");
154
- hb_logo = `data:image/svg+xml;base64,${hb_logo}`;
155
- } catch (err) {
156
- console.error("Error reading logo from template:", err);
157
- return pdf_size;
158
- }
159
- }
160
-
161
- // Cache header
162
- if (header === "") {
163
- header = get_header(pdf_template_path)
164
- .replace("{{book_title}}", book_config.title)
165
- .replace("{{hb_logo}}", hb_logo);
166
- }
167
-
168
- const processed_html_source = pdf_template_content
169
- .replace("{{book_title}}", book_config.title)
170
- .replace("{{document_content}}", html_source);
171
-
172
- const page = await browser.newPage();
173
-
174
- // To reflect CSS used for screens instead of print
175
- await page.emulateMediaType("screen");
176
-
177
- // Set HTML content from HTML source
178
- await page.setContent(processed_html_source, {
179
- waitUntil: "domcontentloaded",
180
- });
181
- for (let i = 0; i < css_templates.length; i++) {
182
- try {
183
- await page.addStyleTag({
184
- content: css_templates[i],
185
- });
186
- } catch (e) {
187
- console.error(`Error applying template for [${target_file}]: ${e}`);
188
- }
189
- }
190
-
191
- try {
192
- const pdf_gen = await page.pdf({
193
- path: target_file,
194
- printBackground: true,
195
- format: "A4",
196
- displayHeaderFooter: true,
197
- headerTemplate: header,
198
- footerTemplate: footer,
199
- margin: {
200
- top: "90px",
201
- right: "30px",
202
- bottom: "60px",
203
- left: "30px",
204
- },
205
- timeout: 0,
206
- });
207
- const currdate = new Date();
208
- const datetime = currdate.toISOString();
209
- if (verbose)
210
- console.log(`[${datetime}] PDF generation success: ${target_file}`);
211
-
212
- pdf_size = pdf_gen.byteLength;
213
- } catch (err) {
214
- console.error(`Error generating PDF ${target_file} - ${err}`);
215
- }
216
- await page.close();
217
- return pdf_size;
218
- };
219
- })();
1
+ (() => {
2
+ const axios = require("axios");
3
+ const cheerio = require("cheerio");
4
+ const fs = require("fs-extra");
5
+ const mime = require("mime-types");
6
+ const path = require("node:path");
7
+ const hdoc = require(path.join(__dirname, "hdoc-module.js"));
8
+
9
+ let hb_logo = "";
10
+ let footer = "";
11
+ let header = "";
12
+
13
+ const get_footer = (template_path) => {
14
+ let footer_content = null;
15
+ try {
16
+ footer_content = fs.readFileSync(
17
+ path.join(template_path, "template-footer.html"),
18
+ "utf8",
19
+ );
20
+ } catch (err) {
21
+ console.error(`Error loading template: ${err}`);
22
+ }
23
+ return footer_content;
24
+ };
25
+
26
+ const get_header = (template_path) => {
27
+ let header_content = null;
28
+ try {
29
+ header_content = fs.readFileSync(
30
+ path.join(template_path, "template-header.html"),
31
+ "utf8",
32
+ );
33
+ } catch (err) {
34
+ console.error(`Error loading template: ${err}`);
35
+ }
36
+ return header_content;
37
+ };
38
+
39
+ exports.process_images = async (file_path, html_source, verbose) => {
40
+ const book_work_root = file_path.path.replace(file_path.relativePath, "");
41
+ if (verbose) console.log("Parsing img tags from HTML source");
42
+
43
+ let processed_html_source = html_source;
44
+ // Use cheerio to parse html
45
+ const $ = cheerio.load(processed_html_source);
46
+
47
+ // Get iFrames from HTML, to replace with a tags
48
+ const iframes = [];
49
+ const iframe_html = $("iframe")
50
+ .map(function () {
51
+ const response = {
52
+ html: $.html(this),
53
+ src: $(this).attr("src"),
54
+ title: $(this).attr("title")
55
+ ? $(this).attr("title")
56
+ : "No Link Title Provided",
57
+ };
58
+ return response;
59
+ })
60
+ .get();
61
+ iframes.push(...iframe_html);
62
+ for (let i = 0; i < iframes.length; i++) {
63
+ const link = `<p><a href="${iframes[i].src}">${iframes[i].title}</a></p>`;
64
+ const regex = new RegExp(
65
+ `<iframe.*src="${iframes[i].src.replace("/", "\\/")}".*</iframe>`,
66
+ );
67
+ processed_html_source = processed_html_source.replace(regex, link);
68
+ }
69
+
70
+ // Get image links from HTML, to embed into the pdf
71
+ const imgs = [];
72
+ const srcs = $("img")
73
+ .map(function (i) {
74
+ return $(this).attr("src");
75
+ })
76
+ .get();
77
+ imgs.push(...srcs);
78
+ for (let i = 0; i < imgs.length; i++) {
79
+ if (!hdoc.valid_url(imgs[i])) {
80
+ // Internal link
81
+ const image_path = path.join(
82
+ book_work_root,
83
+ imgs[i].replace("_books/", ""),
84
+ );
85
+ try {
86
+ const image_buffer = fs.readFileSync(image_path);
87
+ const mime_type = mime.lookup(image_path);
88
+ let image_b64 = image_buffer.toString("base64");
89
+ image_b64 = `data:${mime_type};base64,${image_b64}`;
90
+ processed_html_source = processed_html_source.replace(
91
+ imgs[i],
92
+ image_b64,
93
+ );
94
+ } catch (err) {
95
+ console.error(
96
+ "Error reading image from HTML source [",
97
+ image_path,
98
+ "] -",
99
+ err,
100
+ );
101
+ return null;
102
+ }
103
+ } else {
104
+ // External Link
105
+ try {
106
+ const file_response = await axios.get(imgs[i], {
107
+ responseType: 'arraybuffer'
108
+ });
109
+ if (file_response.status === 200) {
110
+ let image_b64 = imageEncode(file_response.data, file_response.headers['content-type']);
111
+
112
+
113
+ const regexQ = `<img\\s+[^>]*src=["']${imgs[i].replaceAll('/', '\\/').replaceAll('.', '\\.')}["'][^>]*>`;
114
+ const regex = new RegExp(regexQ);
115
+
116
+ const found_img_tag = processed_html_source.match(regex);
117
+ const new_img_tag = found_img_tag[0].replace(imgs[i], image_b64);
118
+
119
+ processed_html_source = processed_html_source.replace(
120
+ found_img_tag,
121
+ new_img_tag,
122
+ );
123
+ } else {
124
+ throw `Unexpected Status ${file_response.status}`;
125
+ }
126
+ } catch (err) {
127
+ console.error(
128
+ `Error downloading external source [${imgs[i]}] - ${err}`,
129
+ );
130
+ }
131
+ }
132
+ }
133
+
134
+ return processed_html_source;
135
+ };
136
+
137
+ const imageEncode = (arrayBuffer, mimeType) => {
138
+ let u8 = new Uint8Array(arrayBuffer)
139
+ let b64encoded = btoa([].reduce.call(new Uint8Array(arrayBuffer),function(p,c){return p+String.fromCharCode(c)},''))
140
+ let mimetype=`image/${mimeType}`
141
+ return "data:"+mimetype+";base64,"+b64encoded
142
+ }
143
+
144
+ exports.generate_pdf = async (
145
+ browser,
146
+ pdf_template_path,
147
+ pdf_template_content,
148
+ book_config,
149
+ html_source,
150
+ target_file,
151
+ css_templates,
152
+ verbose = false,
153
+ ) => {
154
+ let pdf_size = 0;
155
+ // Cache footer
156
+ if (footer === "") footer = get_footer(pdf_template_path);
157
+
158
+ // Read svg logo file into buffer, convert to B64 string
159
+ if (hb_logo === "") {
160
+ const hb_logo_path = path.join(
161
+ pdf_template_path,
162
+ "images",
163
+ "hornbill-logo-full.svg",
164
+ );
165
+ try {
166
+ const hb_logo_file_buffer = fs.readFileSync(hb_logo_path);
167
+ hb_logo = hb_logo_file_buffer.toString("base64");
168
+ hb_logo = `data:image/svg+xml;base64,${hb_logo}`;
169
+ } catch (err) {
170
+ console.error("Error reading logo from template:", err);
171
+ return pdf_size;
172
+ }
173
+ }
174
+
175
+ // Cache header
176
+ if (header === "") {
177
+ header = get_header(pdf_template_path)
178
+ .replace("{{book_title}}", book_config.title)
179
+ .replace("{{hb_logo}}", hb_logo);
180
+ }
181
+
182
+ const processed_html_source = pdf_template_content
183
+ .replace("{{book_title}}", book_config.title)
184
+ .replace("{{document_content}}", html_source);
185
+
186
+ const page = await browser.newPage();
187
+
188
+ // To reflect CSS used for screens instead of print
189
+ await page.emulateMediaType("screen");
190
+
191
+ // Set HTML content from HTML source
192
+ await page.setContent(processed_html_source, {
193
+ waitUntil: "domcontentloaded",
194
+ });
195
+ for (let i = 0; i < css_templates.length; i++) {
196
+ try {
197
+ await page.addStyleTag({
198
+ content: css_templates[i],
199
+ });
200
+ } catch (e) {
201
+ console.error(`Error applying template for [${target_file}]: ${e}`);
202
+ }
203
+ }
204
+
205
+ try {
206
+ const pdf_gen = await page.pdf({
207
+ path: target_file,
208
+ printBackground: true,
209
+ format: "A4",
210
+ displayHeaderFooter: true,
211
+ headerTemplate: header,
212
+ footerTemplate: footer,
213
+ margin: {
214
+ top: "90px",
215
+ right: "30px",
216
+ bottom: "60px",
217
+ left: "30px",
218
+ },
219
+ timeout: 0,
220
+ });
221
+ const currdate = new Date();
222
+ const datetime = currdate.toISOString();
223
+ if (verbose)
224
+ console.log(`[${datetime}] PDF generation success: ${target_file}`);
225
+
226
+ pdf_size = pdf_gen.byteLength;
227
+ } catch (err) {
228
+ console.error(`Error generating PDF ${target_file} - ${err}`);
229
+ }
230
+ await page.close();
231
+ return pdf_size;
232
+ };
233
+ })();
package/hdoc-build.js CHANGED
@@ -40,7 +40,8 @@
40
40
  const pdf_template_file_path = path.join(pdf_template_path, "template.html");
41
41
  const regex_version = /^[0-9]{1,3}[.][0-9]{1,3}[.][0-9]{1,6}$/;
42
42
  const h1_pattern = /(<h1.*?>)\s*.*\s*(.*<\/h1>)/;
43
- const regex_filename = /^[a-z]+-{0,1}([-a-z0-9]+)*$/;
43
+ const regex_filename = /^[a-z]+[-a-z0-9]+[a-z0-9]$/;
44
+ const regex_filename_img = /^[a-z]+[_\-a-z0-9]+[a-z0-9]$/;
44
45
 
45
46
  const built_file_hashes = [];
46
47
  const css_templates = [];
@@ -1044,15 +1045,18 @@
1044
1045
  const filename_validation_callback = (element) => {
1045
1046
  if (element.relativePath.startsWith("_inline/")) return;
1046
1047
  if (element.name.toLowerCase() === ".ds_store") return;
1047
- if (
1048
- element.name === "article_ext.md" ||
1049
- element.name === "description_ext.md"
1050
- )
1051
- return;
1052
- if (image_extensions.includes(element.extension)) return;
1048
+ if (element.name === "article_ext.md" || element.name === "description_ext.md" ) return;
1049
+
1053
1050
  const file_no_ext = element.name.replace(`.${element.extension}`, "");
1054
- if (!file_no_ext.match(regex_filename))
1055
- errors_filename.push(element.relativePath);
1051
+
1052
+ if (image_extensions.includes(element.extension)) {
1053
+ if (!file_no_ext.match(regex_filename_img)) {
1054
+ errors_filename.push(element.relativePath);
1055
+ }
1056
+ return;
1057
+ }
1058
+
1059
+ if (!file_no_ext.match(regex_filename)) errors_filename.push(element.relativePath);
1056
1060
  };
1057
1061
 
1058
1062
  const dreeOptions = {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "hdoc-tools",
3
- "version": "0.32.0",
3
+ "version": "0.33.0",
4
4
  "description": "Hornbill HDocBook Development Support Tool",
5
5
  "main": "hdoc.js",
6
6
  "bin": {