Dhalang 0.2.0 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
data/Rakefile CHANGED
@@ -1,6 +1,6 @@
1
- require "bundler/gem_tasks"
2
- require "rspec/core/rake_task"
3
-
4
- RSpec::Core::RakeTask.new(:spec)
5
-
6
- task :default => :spec
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
@@ -1,4 +1,14 @@
1
- module Dhalang
2
- require 'PDF'
3
- require 'Screenshot'
4
- end
1
+ module Dhalang
2
+ require_relative 'PDF'
3
+ require_relative 'Screenshot'
4
+ require_relative 'Dhalang/version'
5
+ require_relative 'Dhalang/url_utils'
6
+ require_relative 'Dhalang/file_utils'
7
+ require_relative 'Dhalang/error'
8
+ require_relative 'Dhalang/puppeteer'
9
+ require 'uri'
10
+ require 'tempfile'
11
+ require 'shellwords'
12
+ require 'json'
13
+ require 'open3'
14
+ end
@@ -0,0 +1 @@
1
+ class DhalangError < StandardError; end
@@ -0,0 +1,37 @@
1
+ module Dhalang
2
+ # Contains common logic for files.
3
+ class FileUtils
4
+
5
+ # Reads the file under the given filepath as a binary.
6
+ #
7
+ # @param [String] file_path The absolute path of the file to read.
8
+ #
9
+ # @return [String] The binary content under the file_path.
10
+ def self.read_binary(file_path)
11
+ IO.binread(file_path)
12
+ end
13
+
14
+ # Creates a new temp file.
15
+ #
16
+ # @param [String] extension The extension of the file.
17
+ # @param [String] content The content of the file. (Optional)
18
+ #
19
+ # @return [Tempfile] The created temp file.
20
+ def self.create_temp_file(extension, content = nil)
21
+ temp_file = Tempfile.new(["dhalang",".#{extension}"])
22
+ unless(content == nil)
23
+ temp_file.write(content)
24
+ temp_file.rewind
25
+ end
26
+ temp_file
27
+ end
28
+
29
+ # Deletes the given file.
30
+ #
31
+ # @param [File] file The file to delete.
32
+ def self.delete(file)
33
+ file.close unless file.closed?
34
+ file.unlink
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,95 @@
1
+ module Dhalang
2
+ # Contains common logic for interacting with Puppeteer.
3
+ class Puppeteer
4
+ NODE_MODULES_PATH = Dir.pwd + '/node_modules/'.freeze
5
+ private_constant :NODE_MODULES_PATH
6
+
7
+ USER_OPTIONS = {
8
+ navigationTimeout: 10000,
9
+ navigationWaitUntil: 'load',
10
+ navigationWaitForSelector: '',
11
+ navigationWaitForXPath: '',
12
+ userAgent: '',
13
+ isHeadless: true,
14
+ viewPort: '',
15
+ httpAuthenticationCredentials: '',
16
+ isAutoHeight: false
17
+ }
18
+ private_constant :USER_OPTIONS
19
+
20
+ DEFAULT_PDF_OPTIONS = {
21
+ scale: 1,
22
+ displayHeaderFooter: false,
23
+ headerTemplate: '',
24
+ footerTemplate: '',
25
+ printBackground: true,
26
+ landscape: false,
27
+ pageRanges: '',
28
+ format: 'A4',
29
+ width: '',
30
+ height: '',
31
+ margin: { top: 36, right: 36, bottom: 20, left: 36 },
32
+ preferCSSPageSiz: false
33
+ }
34
+ private_constant :DEFAULT_PDF_OPTIONS
35
+
36
+ DEFAULT_PNG_OPTIONS = {
37
+ fullPage: true,
38
+ clip: nil,
39
+ omitBackground: false
40
+ }
41
+ private_constant :DEFAULT_PNG_OPTIONS
42
+
43
+ DEFAULT_JPEG_OPTIONS = {
44
+ quality: 100,
45
+ fullPage: true,
46
+ clip: nil,
47
+ omitBackground: false
48
+ }
49
+ private_constant :DEFAULT_JPEG_OPTIONS
50
+
51
+
52
+ # Launches a new Node process, executing the (Puppeteer) script under the given script_path.
53
+ #
54
+ # @param [String] page_url The url to pass to the goTo method of Puppeteer.
55
+ # @param [String] script_path The absolute path of the JS script to execute.
56
+ # @param [String] temp_file_path The absolute path of the temp file to use to write any actions from Puppeteer.
57
+ # @param [String] temp_file_extension The extension of the temp file.
58
+ # @param [Object] options Set of options to use, configurable by the user.
59
+ def self.visit(page_url, script_path, temp_file_path, temp_file_extension, options)
60
+ configuration = create_configuration(page_url, script_path, temp_file_path, temp_file_extension, options)
61
+
62
+ command = "node #{script_path} #{Shellwords.escape(configuration)}"
63
+
64
+ Open3.popen2e(command) do |_stdin, stdouterr, wait|
65
+ return nil if wait.value.success?
66
+
67
+ output = stdouterr.read.strip
68
+ output = nil if output == ''
69
+ message = output || "Exited with status #{wait.value.exitstatus}"
70
+ raise DhalangError, message
71
+ end
72
+ end
73
+
74
+
75
+ # Returns a JSON string with the configuration to use within the Puppeteer script.
76
+ #
77
+ # @param [String] page_url The url to pass to the goTo method of Puppeteer.
78
+ # @param [String] script_path The absolute path of the JS script to execute.
79
+ # @param [String] temp_file_path The absolute path of the temp file to use to write any actions from Puppeteer.
80
+ # @param [String] temp_file_extension The extension of the temp file.
81
+ # @param [Hash] options Set of options to use, configurable by the user.
82
+ private_class_method def self.create_configuration(page_url, script_path, temp_file_path, temp_file_extension, options)
83
+ {
84
+ webPageUrl: page_url,
85
+ tempFilePath: temp_file_path,
86
+ puppeteerPath: NODE_MODULES_PATH,
87
+ imageType: temp_file_extension,
88
+ userOptions: USER_OPTIONS.map { |option, value| [option, options.has_key?(option) ? options[option] : value]}.to_h,
89
+ pdfOptions: DEFAULT_PDF_OPTIONS.map { |option, value| [option, options.has_key?(option) ? options[option] : value] }.to_h,
90
+ pngOptions: DEFAULT_PNG_OPTIONS.map { |option, value| [option, options.has_key?(option) ? options[option] : value] }.to_h,
91
+ jpegOptions: DEFAULT_JPEG_OPTIONS.map { |option, value| [option, options.has_key?(option) ? options[option] : value] }.to_h
92
+ }.to_json
93
+ end
94
+ end
95
+ end
@@ -0,0 +1,14 @@
1
+ module Dhalang
2
+ # Contains common logic for URL's.
3
+ class UrlUtils
4
+
5
+ # Raises an error if the given URL cannot be used for navigation with Puppeteer.
6
+ #
7
+ # @param [String] url The url to validate
8
+ def self.validate(url)
9
+ if (url !~ URI::DEFAULT_PARSER.regexp[:ABS_URI])
10
+ raise URI::InvalidURIError, 'The given url was invalid, use format http://www.example.com'
11
+ end
12
+ end
13
+ end
14
+ end
@@ -1,3 +1,3 @@
1
- module Dhalang
2
- VERSION = "0.2.0"
3
- end
1
+ module Dhalang
2
+ VERSION = "0.6.0"
3
+ end
data/lib/PDF.rb CHANGED
@@ -1,65 +1,54 @@
1
- require "Dhalang/version"
2
- require 'uri'
3
- require 'tempfile'
4
-
5
- module Dhalang
6
- class PDF
7
- PDF_GENERATOR_JS_PATH = File.expand_path('../js/pdfgenerator.js', __FILE__)
8
- PROJECT_PATH = Dir.pwd + '/node_modules/'
9
-
10
- def self.get_from_url(url)
11
- validate_url(url)
12
- temporary_pdf_save_file = create_temporary_pdf_file
13
- begin
14
- visit_page_with_puppeteer(url, temporary_pdf_save_file.path)
15
- binary_pdf_content = get_file_content_as_binary_string(temporary_pdf_save_file)
16
- ensure
17
- temporary_pdf_save_file.close unless temporary_pdf_save_file.closed?
18
- temporary_pdf_save_file.unlink
19
- end
20
- return binary_pdf_content
21
- end
22
-
23
- def self.get_from_html(html)
24
- html_file = create_temporary_html_file(html)
25
- temporary_pdf_save_file = create_temporary_pdf_file
26
- begin
27
- visit_page_with_puppeteer("file://" + html_file.path, temporary_pdf_save_file.path)
28
- binary_pdf_content = get_file_content_as_binary_string(temporary_pdf_save_file)
29
- ensure
30
- temporary_pdf_save_file.close unless temporary_pdf_save_file.closed?
31
- html_file.close unless html_file.closed?
32
- temporary_pdf_save_file.unlink
33
- html_file.unlink
34
- end
35
- return binary_pdf_content
36
- end
37
-
38
- private
39
- def self.validate_url(url)
40
- if (url !~ URI::DEFAULT_PARSER.regexp[:ABS_URI])
41
- raise URI::InvalidURIError, 'The given url was invalid, use format http://www.example.com'
42
- end
43
- end
44
-
45
- def self.create_temporary_pdf_file
46
- Tempfile.new("pdf")
47
- end
48
-
49
- ## Creates a temp .html file which can be browsed to by puppeteer for creating a pdf
50
- def self.create_temporary_html_file(content)
51
- html_file = Tempfile.new(['page', '.html'])
52
- html_file.write(content)
53
- html_file.rewind
54
- return html_file
55
- end
56
-
57
- def self.visit_page_with_puppeteer(page_to_visit, path_to_save_pdf_to)
58
- system("node #{PDF_GENERATOR_JS_PATH} #{page_to_visit} #{Shellwords.escape(path_to_save_pdf_to)} #{Shellwords.escape(PROJECT_PATH)}")
59
- end
60
-
61
- def self.get_file_content_as_binary_string(file)
62
- IO.binread(file.path)
63
- end
64
- end
65
- end
1
+ module Dhalang
2
+ # Allows consumers of this library to create PDFs with Puppeteer.
3
+ class PDF
4
+ PUPPETEER_SCRIPT_PATH = File.expand_path('../js/pdf-generator.js', __FILE__).freeze
5
+ private_constant :PUPPETEER_SCRIPT_PATH
6
+
7
+ # Captures the full webpage under the given url as PDF.
8
+ #
9
+ # @param [String] url The url to get as PDF.
10
+ # @param [Hash] options User configurable options.
11
+ #
12
+ # @return [String] The PDF that was created as binary.
13
+ def self.get_from_url(url, options = {})
14
+ UrlUtils.validate(url)
15
+ get(url, options)
16
+ end
17
+
18
+ # Captures the full HTML as PDF.
19
+ # Useful when creating dynamic content, for example invoices.
20
+ #
21
+ # @param [String] html The html to get as PDF.
22
+ # @param [Hash] options User configurable options.
23
+ #
24
+ # @return [String] The PDF that was created as binary.
25
+ def self.get_from_html(html, options = {})
26
+ html_file = FileUtils.create_temp_file("html", html)
27
+ url = "file://" + html_file.path
28
+ begin
29
+ binary_pdf_content = get(url, options)
30
+ ensure
31
+ FileUtils.delete(html_file)
32
+ end
33
+ return binary_pdf_content
34
+ end
35
+
36
+
37
+ # Groups and executes the logic for creating a PDF of a webpage.
38
+ #
39
+ # @param [String] url The url to create a PDF for.
40
+ # @param [Hash] options Set of options to use, passed by the user of this library.
41
+ #
42
+ # @return [String] The PDF that was created as binary.
43
+ private_class_method def self.get(url, options)
44
+ temp_file = FileUtils.create_temp_file("pdf")
45
+ begin
46
+ Puppeteer.visit(url, PUPPETEER_SCRIPT_PATH, temp_file.path, "pdf", options)
47
+ binary_pdf_content = FileUtils.read_binary(temp_file.path)
48
+ ensure
49
+ FileUtils.delete(temp_file)
50
+ end
51
+ return binary_pdf_content
52
+ end
53
+ end
54
+ end
@@ -1,51 +1,57 @@
1
- require "Dhalang/version"
2
- require 'uri'
3
- require 'tempfile'
4
-
5
- module Dhalang
6
- class Screenshot
7
- SCREENSHOT_GENERATOR_JS_PATH = File.expand_path('../js/screenshotgenerator.js', __FILE__)
8
- PROJECT_PATH = Dir.pwd + '/node_modules/'
9
-
10
- def self.get_from_url_as_jpeg(url)
11
- validate_url(url)
12
- get_image(url, :jpeg)
13
- end
14
-
15
- def self.get_from_url_as_png(url)
16
- validate_url(url)
17
- get_image(url, :png)
18
- end
19
-
20
- private
21
- def self.validate_url(url)
22
- if (url !~ URI::DEFAULT_PARSER.regexp[:ABS_URI])
23
- raise URI::InvalidURIError, 'The given url was invalid, use format http://www.example.com'
24
- end
25
- end
26
-
27
- def self.create_temporary_screenshot_file
28
- Tempfile.new("png")
29
- end
30
-
31
- def self.get_image(url, type)
32
- temporary_screenshot_save_file = create_temporary_screenshot_file
33
- begin
34
- visit_page_with_puppeteer(url, temporary_screenshot_save_file.path, type)
35
- binary_image_content = get_file_content_as_binary_string(temporary_screenshot_save_file)
36
- ensure
37
- temporary_screenshot_save_file.close unless temporary_screenshot_save_file.closed?
38
- temporary_screenshot_save_file.unlink
39
- end
40
- return binary_image_content
41
- end
42
-
43
- def self.visit_page_with_puppeteer(page_to_visit, path_to_save_pdf_to, image_save_type)
44
- system("node #{SCREENSHOT_GENERATOR_JS_PATH} #{page_to_visit} #{Shellwords.escape(path_to_save_pdf_to)} #{Shellwords.escape(PROJECT_PATH)} #{Shellwords.escape(image_save_type)}")
45
- end
46
-
47
- def self.get_file_content_as_binary_string(file)
48
- IO.binread(file.path)
49
- end
50
- end
51
- end
1
+ module Dhalang
2
+ # Allows consumers of this library to take screenshots with Puppeteer.
3
+ class Screenshot
4
+ PUPPETEER_SCRIPT_PATH = File.expand_path('../js/screenshot-generator.js', __FILE__).freeze
5
+ private_constant :PUPPETEER_SCRIPT_PATH
6
+
7
+ # Captures a full JPEG screenshot of the webpage under the given url.
8
+ #
9
+ # @param [String] url The url to take a screenshot of.
10
+ # @param [Hash] options User configurable options.
11
+ #
12
+ # @return [String] the screenshot that was taken as binary.
13
+ def self.get_from_url_as_jpeg(url, options = {})
14
+ get(url, "jpeg", options)
15
+ end
16
+
17
+ # Captures a full PNG screenshot of the webpage under the given url.
18
+ #
19
+ # @param [String] url The url to take a screenshot of.
20
+ # @param [Hash] options User configurable options.
21
+ #
22
+ # @return [String] The screenshot that was taken as binary.
23
+ def self.get_from_url_as_png(url, options = {})
24
+ get(url, "png", options)
25
+ end
26
+
27
+ # Groups and executes the logic for taking a screenhot of a webpage.
28
+ #
29
+ # @param [String] url The url to take a screenshot of.
30
+ # @param [String] image_type The image type to use for storing the screenshot.
31
+ # @param [Hash] options Set of options to use, passed by the user of this library.
32
+ #
33
+ # @return [String] The screenshot that was taken as binary.
34
+ private_class_method def self.get(url, image_type, options)
35
+ UrlUtils.validate(url)
36
+ validate_options(options)
37
+ temp_file = FileUtils.create_temp_file(image_type)
38
+ begin
39
+ Puppeteer.visit(url, PUPPETEER_SCRIPT_PATH, temp_file.path, image_type, options)
40
+ binary_image_content = FileUtils.read_binary(temp_file.path)
41
+ ensure
42
+ FileUtils.delete(temp_file)
43
+ end
44
+ return binary_image_content
45
+ end
46
+
47
+ # Raises an error if the given options might conflict with the Puppeteer configuration.
48
+ #
49
+ # @param [Hash] options The options to validate
50
+ private_class_method def self.validate_options(options)
51
+ symbolized_options = options.transform_keys(&:to_sym)
52
+ if symbolized_options.has_key?(:type)
53
+ raise DhalangError, 'Invalid option set: "type"'
54
+ end
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,146 @@
1
+ /**
2
+ * @typedef {Object} Configuration
3
+ * @property {string} webPageUrl - The url of the webpage to visit.
4
+ * @property {string} tempFilePath - The path of the tempfile to write the screenshot/pdf to.
5
+ * @property {string} puppeteerModulePath - The path of the Puppeteer module.
6
+ * @property {string} imageType - The type of image to save ( undefined for pdfgenerator ).
7
+ * @property {UserOptions} userOptions - User defined and default parameters to use when navigating to pages.
8
+ * @property {Object} pdfOptions - User defined and default parameters to use when creating PDFs. Note: Do not use directly, rather use {@link getConfiguredPdfOptions}.
9
+ * @property {Object} pngOptions - User defined and default parameters to use when creating PNGs.
10
+ * @property {Object} jpegOptions - User defined and default parameters to use when creating JPEGs.
11
+ */
12
+
13
+ /**
14
+ * @typedef {Object} UserOptions
15
+ * @property {number} navigationTimeout - Maximum in milliseconds until navigation times out, we use a default of 10 seconds as timeout.
16
+ * @property {string} navigationWaitUntil - Determines when the navigation was finished, we wait here until the Window.load event is fired ( meaning all images, stylesheet, etc was loaded ).
17
+ * @property {string} navigationWaitForSelector - If set, specifies the selector Puppeteer should wait for to appear before continuing.
18
+ * @property {string} navigationWaitForXPath - If set, specifies the XPath Puppeteer should wait for to appear before continuing.
19
+ * @property {string} userAgent - The user agent to send with requests.
20
+ * @property {boolean} isHeadless - Indicates if Puppeteer should launch Chromium in headless mode.
21
+ * @property {Object} viewPort - The view port to use.
22
+ * @property {Object} httpAuthenticationCredentials - The credentials to use for HTTP authentication.
23
+ * @property {boolean} isAutoHeight - The height is automatically set
24
+ */
25
+
26
+ /**
27
+ * @typedef {Object} NavigationParameters
28
+ * @property {number} timeout - Maximum in milliseconds until navigation times out, we use a default of 10 seconds as timeout.
29
+ * @property {string} waituntil - Determines when the navigation was finished, we wait here until the Window.load event is fired ( meaning all images, stylesheet, etc was loaded ).
30
+ */
31
+
32
+ /**
33
+ * @typedef {Object} WaitingParameters
34
+ * @property {number} timeout - Maximum in milliseconds until navigation times out, we use a default of 10 seconds as timeout.
35
+ */
36
+
37
+ /**
38
+ * Parses the given configuration process argument from Ruby to a JS object.
39
+ * @returns {Configuration}
40
+ * The configuration object.
41
+ */
42
+ exports.getConfiguration = function () {
43
+ return JSON.parse(process.argv[2])
44
+ }
45
+
46
+ /**
47
+ * Launches Puppeteer and returns its instance.
48
+ * @param {UserOptions} configuration - The configuration to use.
49
+ * @returns {Promise<Object>}
50
+ * The launched instance of Puppeteer.
51
+ */
52
+ exports.launchPuppeteer = async function (configuration) {
53
+ module.paths.push(configuration.puppeteerPath);
54
+ const puppeteer = require('puppeteer');
55
+ const launchArgs = ['--no-sandbox', '--disable-setuid-sandbox'];
56
+ return await puppeteer.launch({
57
+ args: launchArgs,
58
+ headless: configuration.userOptions.isHeadless
59
+ });
60
+ }
61
+
62
+ /**
63
+ * Configures the given Puppeteer page object.
64
+ * @param {Object} page - The Puppeteer page object to configure.
65
+ * @param {UserOptions} userOptions - The user options to use.
66
+ */
67
+ exports.configure = async function (page, userOptions) {
68
+ if (userOptions.userAgent !== "") {
69
+ await page.setUserAgent(userOptions.userAgent)
70
+ }
71
+
72
+ if (userOptions.viewPort !== "") {
73
+ await page.setViewport(userOptions.viewPort)
74
+ }
75
+
76
+ if (userOptions.httpAuthenticationCredentials !== "") {
77
+ await page.authenticate(userOptions.authenticationCredentials)
78
+ }
79
+ }
80
+
81
+ /**
82
+ * Makes the Puppeteer page object open the url with the specified navigation logic as specified in the given configuration.
83
+ * @param {Object} page - The Puppeteer page object to use for navigation.
84
+ * @param {Configuration} configuration - The configuration to use.
85
+ */
86
+ exports.navigate = async function (page, configuration) {
87
+ const navigationWaitForSelector = configuration.userOptions.navigationWaitForSelector;
88
+ const navigationWaitForXPath = configuration.userOptions.navigationWaitForXPath;
89
+
90
+ await page.goto(configuration.webPageUrl, this.getNavigationParameters(configuration));
91
+
92
+ if (navigationWaitForSelector !== "") {
93
+ await page.waitForSelector(navigationWaitForSelector, this.getWaitingParameters(configuration));
94
+ } else if (navigationWaitForXPath !== "") {
95
+ await page.waitForXPath(navigationWaitForXPath, this.getWaitingParameters(configuration));
96
+ } else {
97
+ await page.waitForTimeout(250);
98
+ }
99
+ }
100
+
101
+ /**
102
+ * Returns the PDF options to pass to Puppeteer based on the set user options and the documents body.
103
+ * @param {Object} page - The Puppeteer page to configure.
104
+ * @param {UserOptions} configuration - The configuration to use.
105
+ * @returns {Object} - pdfOptions
106
+ */
107
+ exports.getConfiguredPdfOptions = async function (page, configuration) {
108
+ const pdfOptions = configuration.pdfOptions
109
+
110
+ if (configuration.userOptions.isAutoHeight === true) {
111
+ const pageHeight = await page.evaluate(() => {
112
+ return Math.max(document.body.scrollHeight, document.body.offsetHeight);
113
+ })
114
+ if (pageHeight) {
115
+ pdfOptions['height'] = pageHeight + 1 + 'px'
116
+ }
117
+ }
118
+
119
+ return pdfOptions
120
+ }
121
+
122
+ /**
123
+ * Extracts the navigation parameters from the configuration in a format that is usable by Puppeteer.
124
+ * @param {Configuration} configuration - The configuration to extract the navigation parameters from.
125
+ * @returns {NavigationParameters}
126
+ * The extracted navigation parameters.
127
+ */
128
+ exports.getNavigationParameters = function (configuration) {
129
+ return {
130
+ timeout: configuration.userOptions.navigationTimeout,
131
+ waituntil: configuration.userOptions.navigationWaitUntil
132
+ }
133
+ }
134
+
135
+
136
+ /**
137
+ * Extracts the waiting parameters from the configuration in a format that is usable by Puppeteer.
138
+ * @param {Configuration} configuration - The configuration to extract the waiting parameters from.
139
+ * @returns {WaitingParameters}
140
+ * The extracted waiting parameters.
141
+ */
142
+ exports.getWaitingParameters = function (configuration) {
143
+ return {
144
+ timeout: configuration.userOptions.navigationTimeout
145
+ }
146
+ }