hdoc-tools 0.32.0 → 0.32.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (2) hide show
  1. package/hdoc-build-pdf.js +233 -219
  2. package/package.json +1 -1
package/hdoc-build-pdf.js CHANGED
@@ -1,219 +1,233 @@
1
- (() => {
2
- const axios = require("axios");
3
- const cheerio = require("cheerio");
4
- const fs = require("fs-extra");
5
- const mime = require("mime-types");
6
- const path = require("node:path");
7
- const hdoc = require(path.join(__dirname, "hdoc-module.js"));
8
-
9
- let hb_logo = "";
10
- let footer = "";
11
- let header = "";
12
-
13
- const get_footer = (template_path) => {
14
- let footer_content = null;
15
- try {
16
- footer_content = fs.readFileSync(
17
- path.join(template_path, "template-footer.html"),
18
- "utf8",
19
- );
20
- } catch (err) {
21
- console.error(`Error loading template: ${err}`);
22
- }
23
- return footer_content;
24
- };
25
-
26
- const get_header = (template_path) => {
27
- let header_content = null;
28
- try {
29
- header_content = fs.readFileSync(
30
- path.join(template_path, "template-header.html"),
31
- "utf8",
32
- );
33
- } catch (err) {
34
- console.error(`Error loading template: ${err}`);
35
- }
36
- return header_content;
37
- };
38
-
39
- exports.process_images = async (file_path, html_source, verbose) => {
40
- const book_work_root = file_path.path.replace(file_path.relativePath, "");
41
- if (verbose) console.log("Parsing img tags from HTML source");
42
-
43
- let processed_html_source = html_source;
44
- // Use cheerio to parse html
45
- const $ = cheerio.load(processed_html_source);
46
-
47
- // Get iFrames from HTML, to replace with a tags
48
- const iframes = [];
49
- const iframe_html = $("iframe")
50
- .map(function () {
51
- const response = {
52
- html: $.html(this),
53
- src: $(this).attr("src"),
54
- title: $(this).attr("title")
55
- ? $(this).attr("title")
56
- : "No Link Title Provided",
57
- };
58
- return response;
59
- })
60
- .get();
61
- iframes.push(...iframe_html);
62
- for (let i = 0; i < iframes.length; i++) {
63
- const link = `<p><a href="${iframes[i].src}">${iframes[i].title}</a></p>`;
64
- const regex = new RegExp(
65
- `<iframe.*src="${iframes[i].src.replace("/", "\\/")}".*</iframe>`,
66
- );
67
- processed_html_source = processed_html_source.replace(regex, link);
68
- }
69
-
70
- // Get image links from HTML, to embed into the pdf
71
- const imgs = [];
72
- const srcs = $("img")
73
- .map(function (i) {
74
- return $(this).attr("src");
75
- })
76
- .get();
77
- imgs.push(...srcs);
78
- for (let i = 0; i < imgs.length; i++) {
79
- if (!hdoc.valid_url(imgs[i])) {
80
- // Internal link
81
- const image_path = path.join(
82
- book_work_root,
83
- imgs[i].replace("_books/", ""),
84
- );
85
- try {
86
- const image_buffer = fs.readFileSync(image_path);
87
- const mime_type = mime.lookup(image_path);
88
- let image_b64 = image_buffer.toString("base64");
89
- image_b64 = `data:${mime_type};base64,${image_b64}`;
90
- processed_html_source = processed_html_source.replace(
91
- imgs[i],
92
- image_b64,
93
- );
94
- } catch (err) {
95
- console.error(
96
- "Error reading image from HTML source [",
97
- image_path,
98
- "] -",
99
- err,
100
- );
101
- return null;
102
- }
103
- } else {
104
- // External Link
105
- try {
106
- const file_response = await axios.get(imgs[i]);
107
- if (file_response.status === 200) {
108
- const image_buffer = file_response.data;
109
- const mime_type = mime.lookup(imgs[i]);
110
- let image_b64 = image_buffer.toString("base64");
111
- image_b64 = `data:${mime_type};base64,${image_b64}`;
112
- processed_html_source = processed_html_source.replace(
113
- imgs[i],
114
- image_b64,
115
- );
116
- } else {
117
- throw `Unexpected Status ${file_response.status}`;
118
- }
119
- } catch (err) {
120
- console.error(
121
- `Error downloading external source [${imgs[i]}] - ${err}`,
122
- );
123
- }
124
- }
125
- }
126
-
127
- return processed_html_source;
128
- };
129
-
130
- exports.generate_pdf = async (
131
- browser,
132
- pdf_template_path,
133
- pdf_template_content,
134
- book_config,
135
- html_source,
136
- target_file,
137
- css_templates,
138
- verbose = false,
139
- ) => {
140
- let pdf_size = 0;
141
- // Cache footer
142
- if (footer === "") footer = get_footer(pdf_template_path);
143
-
144
- // Read svg logo file into buffer, convert to B64 string
145
- if (hb_logo === "") {
146
- const hb_logo_path = path.join(
147
- pdf_template_path,
148
- "images",
149
- "hornbill-logo-full.svg",
150
- );
151
- try {
152
- const hb_logo_file_buffer = fs.readFileSync(hb_logo_path);
153
- hb_logo = hb_logo_file_buffer.toString("base64");
154
- hb_logo = `data:image/svg+xml;base64,${hb_logo}`;
155
- } catch (err) {
156
- console.error("Error reading logo from template:", err);
157
- return pdf_size;
158
- }
159
- }
160
-
161
- // Cache header
162
- if (header === "") {
163
- header = get_header(pdf_template_path)
164
- .replace("{{book_title}}", book_config.title)
165
- .replace("{{hb_logo}}", hb_logo);
166
- }
167
-
168
- const processed_html_source = pdf_template_content
169
- .replace("{{book_title}}", book_config.title)
170
- .replace("{{document_content}}", html_source);
171
-
172
- const page = await browser.newPage();
173
-
174
- // To reflect CSS used for screens instead of print
175
- await page.emulateMediaType("screen");
176
-
177
- // Set HTML content from HTML source
178
- await page.setContent(processed_html_source, {
179
- waitUntil: "domcontentloaded",
180
- });
181
- for (let i = 0; i < css_templates.length; i++) {
182
- try {
183
- await page.addStyleTag({
184
- content: css_templates[i],
185
- });
186
- } catch (e) {
187
- console.error(`Error applying template for [${target_file}]: ${e}`);
188
- }
189
- }
190
-
191
- try {
192
- const pdf_gen = await page.pdf({
193
- path: target_file,
194
- printBackground: true,
195
- format: "A4",
196
- displayHeaderFooter: true,
197
- headerTemplate: header,
198
- footerTemplate: footer,
199
- margin: {
200
- top: "90px",
201
- right: "30px",
202
- bottom: "60px",
203
- left: "30px",
204
- },
205
- timeout: 0,
206
- });
207
- const currdate = new Date();
208
- const datetime = currdate.toISOString();
209
- if (verbose)
210
- console.log(`[${datetime}] PDF generation success: ${target_file}`);
211
-
212
- pdf_size = pdf_gen.byteLength;
213
- } catch (err) {
214
- console.error(`Error generating PDF ${target_file} - ${err}`);
215
- }
216
- await page.close();
217
- return pdf_size;
218
- };
219
- })();
1
+ (() => {
2
+ const axios = require("axios");
3
+ const cheerio = require("cheerio");
4
+ const fs = require("fs-extra");
5
+ const mime = require("mime-types");
6
+ const path = require("node:path");
7
+ const hdoc = require(path.join(__dirname, "hdoc-module.js"));
8
+
9
+ let hb_logo = "";
10
+ let footer = "";
11
+ let header = "";
12
+
13
+ const get_footer = (template_path) => {
14
+ let footer_content = null;
15
+ try {
16
+ footer_content = fs.readFileSync(
17
+ path.join(template_path, "template-footer.html"),
18
+ "utf8",
19
+ );
20
+ } catch (err) {
21
+ console.error(`Error loading template: ${err}`);
22
+ }
23
+ return footer_content;
24
+ };
25
+
26
+ const get_header = (template_path) => {
27
+ let header_content = null;
28
+ try {
29
+ header_content = fs.readFileSync(
30
+ path.join(template_path, "template-header.html"),
31
+ "utf8",
32
+ );
33
+ } catch (err) {
34
+ console.error(`Error loading template: ${err}`);
35
+ }
36
+ return header_content;
37
+ };
38
+
39
+ exports.process_images = async (file_path, html_source, verbose) => {
40
+ const book_work_root = file_path.path.replace(file_path.relativePath, "");
41
+ if (verbose) console.log("Parsing img tags from HTML source");
42
+
43
+ let processed_html_source = html_source;
44
+ // Use cheerio to parse html
45
+ const $ = cheerio.load(processed_html_source);
46
+
47
+ // Get iFrames from HTML, to replace with a tags
48
+ const iframes = [];
49
+ const iframe_html = $("iframe")
50
+ .map(function () {
51
+ const response = {
52
+ html: $.html(this),
53
+ src: $(this).attr("src"),
54
+ title: $(this).attr("title")
55
+ ? $(this).attr("title")
56
+ : "No Link Title Provided",
57
+ };
58
+ return response;
59
+ })
60
+ .get();
61
+ iframes.push(...iframe_html);
62
+ for (let i = 0; i < iframes.length; i++) {
63
+ const link = `<p><a href="${iframes[i].src}">${iframes[i].title}</a></p>`;
64
+ const regex = new RegExp(
65
+ `<iframe.*src="${iframes[i].src.replace("/", "\\/")}".*</iframe>`,
66
+ );
67
+ processed_html_source = processed_html_source.replace(regex, link);
68
+ }
69
+
70
+ // Get image links from HTML, to embed into the pdf
71
+ const imgs = [];
72
+ const srcs = $("img")
73
+ .map(function (i) {
74
+ return $(this).attr("src");
75
+ })
76
+ .get();
77
+ imgs.push(...srcs);
78
+ for (let i = 0; i < imgs.length; i++) {
79
+ if (!hdoc.valid_url(imgs[i])) {
80
+ // Internal link
81
+ const image_path = path.join(
82
+ book_work_root,
83
+ imgs[i].replace("_books/", ""),
84
+ );
85
+ try {
86
+ const image_buffer = fs.readFileSync(image_path);
87
+ const mime_type = mime.lookup(image_path);
88
+ let image_b64 = image_buffer.toString("base64");
89
+ image_b64 = `data:${mime_type};base64,${image_b64}`;
90
+ processed_html_source = processed_html_source.replace(
91
+ imgs[i],
92
+ image_b64,
93
+ );
94
+ } catch (err) {
95
+ console.error(
96
+ "Error reading image from HTML source [",
97
+ image_path,
98
+ "] -",
99
+ err,
100
+ );
101
+ return null;
102
+ }
103
+ } else {
104
+ // External Link
105
+ try {
106
+ const file_response = await axios.get(imgs[i], {
107
+ responseType: 'arraybuffer'
108
+ });
109
+ if (file_response.status === 200) {
110
+ let image_b64 = imageEncode(file_response.data, file_response.headers['content-type']);
111
+
112
+
113
+ const regexQ = `<img\\s+[^>]*src=["']${imgs[i].replaceAll('/', '\\/').replaceAll('.', '\\.')}["'][^>]*>`;
114
+ const regex = new RegExp(regexQ);
115
+
116
+ const found_img_tag = processed_html_source.match(regex);
117
+ const new_img_tag = found_img_tag[0].replace(imgs[i], image_b64);
118
+
119
+ processed_html_source = processed_html_source.replace(
120
+ found_img_tag,
121
+ new_img_tag,
122
+ );
123
+ } else {
124
+ throw `Unexpected Status ${file_response.status}`;
125
+ }
126
+ } catch (err) {
127
+ console.error(
128
+ `Error downloading external source [${imgs[i]}] - ${err}`,
129
+ );
130
+ }
131
+ }
132
+ }
133
+
134
+ return processed_html_source;
135
+ };
136
+
137
+ const imageEncode = (arrayBuffer, mimeType) => {
138
+ let u8 = new Uint8Array(arrayBuffer)
139
+ let b64encoded = btoa([].reduce.call(new Uint8Array(arrayBuffer),function(p,c){return p+String.fromCharCode(c)},''))
140
+ let mimetype=`image/${mimeType}`
141
+ return "data:"+mimetype+";base64,"+b64encoded
142
+ }
143
+
144
+ exports.generate_pdf = async (
145
+ browser,
146
+ pdf_template_path,
147
+ pdf_template_content,
148
+ book_config,
149
+ html_source,
150
+ target_file,
151
+ css_templates,
152
+ verbose = false,
153
+ ) => {
154
+ let pdf_size = 0;
155
+ // Cache footer
156
+ if (footer === "") footer = get_footer(pdf_template_path);
157
+
158
+ // Read svg logo file into buffer, convert to B64 string
159
+ if (hb_logo === "") {
160
+ const hb_logo_path = path.join(
161
+ pdf_template_path,
162
+ "images",
163
+ "hornbill-logo-full.svg",
164
+ );
165
+ try {
166
+ const hb_logo_file_buffer = fs.readFileSync(hb_logo_path);
167
+ hb_logo = hb_logo_file_buffer.toString("base64");
168
+ hb_logo = `data:image/svg+xml;base64,${hb_logo}`;
169
+ } catch (err) {
170
+ console.error("Error reading logo from template:", err);
171
+ return pdf_size;
172
+ }
173
+ }
174
+
175
+ // Cache header
176
+ if (header === "") {
177
+ header = get_header(pdf_template_path)
178
+ .replace("{{book_title}}", book_config.title)
179
+ .replace("{{hb_logo}}", hb_logo);
180
+ }
181
+
182
+ const processed_html_source = pdf_template_content
183
+ .replace("{{book_title}}", book_config.title)
184
+ .replace("{{document_content}}", html_source);
185
+
186
+ const page = await browser.newPage();
187
+
188
+ // To reflect CSS used for screens instead of print
189
+ await page.emulateMediaType("screen");
190
+
191
+ // Set HTML content from HTML source
192
+ await page.setContent(processed_html_source, {
193
+ waitUntil: "domcontentloaded",
194
+ });
195
+ for (let i = 0; i < css_templates.length; i++) {
196
+ try {
197
+ await page.addStyleTag({
198
+ content: css_templates[i],
199
+ });
200
+ } catch (e) {
201
+ console.error(`Error applying template for [${target_file}]: ${e}`);
202
+ }
203
+ }
204
+
205
+ try {
206
+ const pdf_gen = await page.pdf({
207
+ path: target_file,
208
+ printBackground: true,
209
+ format: "A4",
210
+ displayHeaderFooter: true,
211
+ headerTemplate: header,
212
+ footerTemplate: footer,
213
+ margin: {
214
+ top: "90px",
215
+ right: "30px",
216
+ bottom: "60px",
217
+ left: "30px",
218
+ },
219
+ timeout: 0,
220
+ });
221
+ const currdate = new Date();
222
+ const datetime = currdate.toISOString();
223
+ if (verbose)
224
+ console.log(`[${datetime}] PDF generation success: ${target_file}`);
225
+
226
+ pdf_size = pdf_gen.byteLength;
227
+ } catch (err) {
228
+ console.error(`Error generating PDF ${target_file} - ${err}`);
229
+ }
230
+ await page.close();
231
+ return pdf_size;
232
+ };
233
+ })();
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "hdoc-tools",
3
- "version": "0.32.0",
3
+ "version": "0.32.1",
4
4
  "description": "Hornbill HDocBook Development Support Tool",
5
5
  "main": "hdoc.js",
6
6
  "bin": {