Dhalang 0.2.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Rakefile CHANGED
@@ -1,6 +1,6 @@
1
- require "bundler/gem_tasks"
2
- require "rspec/core/rake_task"
3
-
4
- RSpec::Core::RakeTask.new(:spec)
5
-
6
- task :default => :spec
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
@@ -1,4 +1,14 @@
1
- module Dhalang
2
- require 'PDF'
3
- require 'Screenshot'
4
- end
1
+ module Dhalang
2
+ require_relative 'PDF'
3
+ require_relative 'Screenshot'
4
+ require_relative 'Dhalang/version'
5
+ require_relative 'Dhalang/url_utils'
6
+ require_relative 'Dhalang/file_utils'
7
+ require_relative 'Dhalang/error'
8
+ require_relative 'Dhalang/puppeteer'
9
+ require 'uri'
10
+ require 'tempfile'
11
+ require 'shellwords'
12
+ require 'json'
13
+ require 'open3'
14
+ end
@@ -0,0 +1 @@
1
+ class DhalangError < StandardError; end
@@ -0,0 +1,37 @@
1
+ module Dhalang
2
+ # Contains common logic for files.
3
+ class FileUtils
4
+
5
+ # Reads the file under the given filepath as a binary.
6
+ #
7
+ # @param [String] file_path The absolute path of the file to read.
8
+ #
9
+ # @return [String] The binary content under the file_path.
10
+ def self.read_binary(file_path)
11
+ IO.binread(file_path)
12
+ end
13
+
14
+ # Creates a new temp file.
15
+ #
16
+ # @param [String] extension The extension of the file.
17
+ # @param [String] content The content of the file. (Optional)
18
+ #
19
+ # @return [Tempfile] The created temp file.
20
+ def self.create_temp_file(extension, content = nil)
21
+ temp_file = Tempfile.new(["dhalang",".#{extension}"])
22
+ unless(content == nil)
23
+ temp_file.write(content)
24
+ temp_file.rewind
25
+ end
26
+ temp_file
27
+ end
28
+
29
+ # Deletes the given file.
30
+ #
31
+ # @param [File] file The file to delete.
32
+ def self.delete(file)
33
+ file.close unless file.closed?
34
+ file.unlink
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,95 @@
1
+ module Dhalang
2
+ # Contains common logic for interacting with Puppeteer.
3
+ class Puppeteer
4
+ NODE_MODULES_PATH = Dir.pwd + '/node_modules/'.freeze
5
+ private_constant :NODE_MODULES_PATH
6
+
7
+ USER_OPTIONS = {
8
+ navigationTimeout: 10000,
9
+ navigationWaitUntil: 'load',
10
+ navigationWaitForSelector: '',
11
+ navigationWaitForXPath: '',
12
+ userAgent: '',
13
+ isHeadless: true,
14
+ viewPort: '',
15
+ httpAuthenticationCredentials: '',
16
+ isAutoHeight: false
17
+ }
18
+ private_constant :USER_OPTIONS
19
+
20
+ DEFAULT_PDF_OPTIONS = {
21
+ scale: 1,
22
+ displayHeaderFooter: false,
23
+ headerTemplate: '',
24
+ footerTemplate: '',
25
+ printBackground: true,
26
+ landscape: false,
27
+ pageRanges: '',
28
+ format: 'A4',
29
+ width: '',
30
+ height: '',
31
+ margin: { top: 36, right: 36, bottom: 20, left: 36 },
32
+ preferCSSPageSiz: false
33
+ }
34
+ private_constant :DEFAULT_PDF_OPTIONS
35
+
36
+ DEFAULT_PNG_OPTIONS = {
37
+ fullPage: true,
38
+ clip: nil,
39
+ omitBackground: false
40
+ }
41
+ private_constant :DEFAULT_PNG_OPTIONS
42
+
43
+ DEFAULT_JPEG_OPTIONS = {
44
+ quality: 100,
45
+ fullPage: true,
46
+ clip: nil,
47
+ omitBackground: false
48
+ }
49
+ private_constant :DEFAULT_JPEG_OPTIONS
50
+
51
+
52
+ # Launches a new Node process, executing the (Puppeteer) script under the given script_path.
53
+ #
54
+ # @param [String] page_url The url to pass to the goTo method of Puppeteer.
55
+ # @param [String] script_path The absolute path of the JS script to execute.
56
+ # @param [String] temp_file_path The absolute path of the temp file to use to write any actions from Puppeteer.
57
+ # @param [String] temp_file_extension The extension of the temp file.
58
+ # @param [Object] options Set of options to use, configurable by the user.
59
+ def self.visit(page_url, script_path, temp_file_path, temp_file_extension, options)
60
+ configuration = create_configuration(page_url, script_path, temp_file_path, temp_file_extension, options)
61
+
62
+ command = "node #{script_path} #{Shellwords.escape(configuration)}"
63
+
64
+ Open3.popen2e(command) do |_stdin, stdouterr, wait|
65
+ return nil if wait.value.success?
66
+
67
+ output = stdouterr.read.strip
68
+ output = nil if output == ''
69
+ message = output || "Exited with status #{wait.value.exitstatus}"
70
+ raise DhalangError, message
71
+ end
72
+ end
73
+
74
+
75
+ # Returns a JSON string with the configuration to use within the Puppeteer script.
76
+ #
77
+ # @param [String] page_url The url to pass to the goTo method of Puppeteer.
78
+ # @param [String] script_path The absolute path of the JS script to execute.
79
+ # @param [String] temp_file_path The absolute path of the temp file to use to write any actions from Puppeteer.
80
+ # @param [String] temp_file_extension The extension of the temp file.
81
+ # @param [Hash] options Set of options to use, configurable by the user.
82
+ private_class_method def self.create_configuration(page_url, script_path, temp_file_path, temp_file_extension, options)
83
+ {
84
+ webPageUrl: page_url,
85
+ tempFilePath: temp_file_path,
86
+ puppeteerPath: NODE_MODULES_PATH,
87
+ imageType: temp_file_extension,
88
+ userOptions: USER_OPTIONS.map { |option, value| [option, options.has_key?(option) ? options[option] : value]}.to_h,
89
+ pdfOptions: DEFAULT_PDF_OPTIONS.map { |option, value| [option, options.has_key?(option) ? options[option] : value] }.to_h,
90
+ pngOptions: DEFAULT_PNG_OPTIONS.map { |option, value| [option, options.has_key?(option) ? options[option] : value] }.to_h,
91
+ jpegOptions: DEFAULT_JPEG_OPTIONS.map { |option, value| [option, options.has_key?(option) ? options[option] : value] }.to_h
92
+ }.to_json
93
+ end
94
+ end
95
+ end
@@ -0,0 +1,14 @@
1
+ module Dhalang
2
+ # Contains common logic for URL's.
3
+ class UrlUtils
4
+
5
+ # Raises an error if the given URL cannot be used for navigation with Puppeteer.
6
+ #
7
+ # @param [String] url The url to validate
8
+ def self.validate(url)
9
+ if (url !~ URI::DEFAULT_PARSER.regexp[:ABS_URI])
10
+ raise URI::InvalidURIError, 'The given url was invalid, use format http://www.example.com'
11
+ end
12
+ end
13
+ end
14
+ end
@@ -1,3 +1,3 @@
1
- module Dhalang
2
- VERSION = "0.2.0"
3
- end
1
+ module Dhalang
2
+ VERSION = "0.6.0"
3
+ end
data/lib/PDF.rb CHANGED
@@ -1,65 +1,54 @@
1
- require "Dhalang/version"
2
- require 'uri'
3
- require 'tempfile'
4
-
5
- module Dhalang
6
- class PDF
7
- PDF_GENERATOR_JS_PATH = File.expand_path('../js/pdfgenerator.js', __FILE__)
8
- PROJECT_PATH = Dir.pwd + '/node_modules/'
9
-
10
- def self.get_from_url(url)
11
- validate_url(url)
12
- temporary_pdf_save_file = create_temporary_pdf_file
13
- begin
14
- visit_page_with_puppeteer(url, temporary_pdf_save_file.path)
15
- binary_pdf_content = get_file_content_as_binary_string(temporary_pdf_save_file)
16
- ensure
17
- temporary_pdf_save_file.close unless temporary_pdf_save_file.closed?
18
- temporary_pdf_save_file.unlink
19
- end
20
- return binary_pdf_content
21
- end
22
-
23
- def self.get_from_html(html)
24
- html_file = create_temporary_html_file(html)
25
- temporary_pdf_save_file = create_temporary_pdf_file
26
- begin
27
- visit_page_with_puppeteer("file://" + html_file.path, temporary_pdf_save_file.path)
28
- binary_pdf_content = get_file_content_as_binary_string(temporary_pdf_save_file)
29
- ensure
30
- temporary_pdf_save_file.close unless temporary_pdf_save_file.closed?
31
- html_file.close unless html_file.closed?
32
- temporary_pdf_save_file.unlink
33
- html_file.unlink
34
- end
35
- return binary_pdf_content
36
- end
37
-
38
- private
39
- def self.validate_url(url)
40
- if (url !~ URI::DEFAULT_PARSER.regexp[:ABS_URI])
41
- raise URI::InvalidURIError, 'The given url was invalid, use format http://www.example.com'
42
- end
43
- end
44
-
45
- def self.create_temporary_pdf_file
46
- Tempfile.new("pdf")
47
- end
48
-
49
- ## Creates a temp .html file which can be browsed to by puppeteer for creating a pdf
50
- def self.create_temporary_html_file(content)
51
- html_file = Tempfile.new(['page', '.html'])
52
- html_file.write(content)
53
- html_file.rewind
54
- return html_file
55
- end
56
-
57
- def self.visit_page_with_puppeteer(page_to_visit, path_to_save_pdf_to)
58
- system("node #{PDF_GENERATOR_JS_PATH} #{page_to_visit} #{Shellwords.escape(path_to_save_pdf_to)} #{Shellwords.escape(PROJECT_PATH)}")
59
- end
60
-
61
- def self.get_file_content_as_binary_string(file)
62
- IO.binread(file.path)
63
- end
64
- end
65
- end
1
+ module Dhalang
2
+ # Allows consumers of this library to create PDFs with Puppeteer.
3
+ class PDF
4
+ PUPPETEER_SCRIPT_PATH = File.expand_path('../js/pdf-generator.js', __FILE__).freeze
5
+ private_constant :PUPPETEER_SCRIPT_PATH
6
+
7
+ # Captures the full webpage under the given url as PDF.
8
+ #
9
+ # @param [String] url The url to get as PDF.
10
+ # @param [Hash] options User configurable options.
11
+ #
12
+ # @return [String] The PDF that was created as binary.
13
+ def self.get_from_url(url, options = {})
14
+ UrlUtils.validate(url)
15
+ get(url, options)
16
+ end
17
+
18
+ # Captures the full HTML as PDF.
19
+ # Useful when creating dynamic content, for example invoices.
20
+ #
21
+ # @param [String] html The html to get as PDF.
22
+ # @param [Hash] options User configurable options.
23
+ #
24
+ # @return [String] The PDF that was created as binary.
25
+ def self.get_from_html(html, options = {})
26
+ html_file = FileUtils.create_temp_file("html", html)
27
+ url = "file://" + html_file.path
28
+ begin
29
+ binary_pdf_content = get(url, options)
30
+ ensure
31
+ FileUtils.delete(html_file)
32
+ end
33
+ return binary_pdf_content
34
+ end
35
+
36
+
37
+ # Groups and executes the logic for creating a PDF of a webpage.
38
+ #
39
+ # @param [String] url The url to create a PDF for.
40
+ # @param [Hash] options Set of options to use, passed by the user of this library.
41
+ #
42
+ # @return [String] The PDF that was created as binary.
43
+ private_class_method def self.get(url, options)
44
+ temp_file = FileUtils.create_temp_file("pdf")
45
+ begin
46
+ Puppeteer.visit(url, PUPPETEER_SCRIPT_PATH, temp_file.path, "pdf", options)
47
+ binary_pdf_content = FileUtils.read_binary(temp_file.path)
48
+ ensure
49
+ FileUtils.delete(temp_file)
50
+ end
51
+ return binary_pdf_content
52
+ end
53
+ end
54
+ end
@@ -1,51 +1,57 @@
1
- require "Dhalang/version"
2
- require 'uri'
3
- require 'tempfile'
4
-
5
- module Dhalang
6
- class Screenshot
7
- SCREENSHOT_GENERATOR_JS_PATH = File.expand_path('../js/screenshotgenerator.js', __FILE__)
8
- PROJECT_PATH = Dir.pwd + '/node_modules/'
9
-
10
- def self.get_from_url_as_jpeg(url)
11
- validate_url(url)
12
- get_image(url, :jpeg)
13
- end
14
-
15
- def self.get_from_url_as_png(url)
16
- validate_url(url)
17
- get_image(url, :png)
18
- end
19
-
20
- private
21
- def self.validate_url(url)
22
- if (url !~ URI::DEFAULT_PARSER.regexp[:ABS_URI])
23
- raise URI::InvalidURIError, 'The given url was invalid, use format http://www.example.com'
24
- end
25
- end
26
-
27
- def self.create_temporary_screenshot_file
28
- Tempfile.new("png")
29
- end
30
-
31
- def self.get_image(url, type)
32
- temporary_screenshot_save_file = create_temporary_screenshot_file
33
- begin
34
- visit_page_with_puppeteer(url, temporary_screenshot_save_file.path, type)
35
- binary_image_content = get_file_content_as_binary_string(temporary_screenshot_save_file)
36
- ensure
37
- temporary_screenshot_save_file.close unless temporary_screenshot_save_file.closed?
38
- temporary_screenshot_save_file.unlink
39
- end
40
- return binary_image_content
41
- end
42
-
43
- def self.visit_page_with_puppeteer(page_to_visit, path_to_save_pdf_to, image_save_type)
44
- system("node #{SCREENSHOT_GENERATOR_JS_PATH} #{page_to_visit} #{Shellwords.escape(path_to_save_pdf_to)} #{Shellwords.escape(PROJECT_PATH)} #{Shellwords.escape(image_save_type)}")
45
- end
46
-
47
- def self.get_file_content_as_binary_string(file)
48
- IO.binread(file.path)
49
- end
50
- end
51
- end
1
+ module Dhalang
2
+ # Allows consumers of this library to take screenshots with Puppeteer.
3
+ class Screenshot
4
+ PUPPETEER_SCRIPT_PATH = File.expand_path('../js/screenshot-generator.js', __FILE__).freeze
5
+ private_constant :PUPPETEER_SCRIPT_PATH
6
+
7
+ # Captures a full JPEG screenshot of the webpage under the given url.
8
+ #
9
+ # @param [String] url The url to take a screenshot of.
10
+ # @param [Hash] options User configurable options.
11
+ #
12
+ # @return [String] the screenshot that was taken as binary.
13
+ def self.get_from_url_as_jpeg(url, options = {})
14
+ get(url, "jpeg", options)
15
+ end
16
+
17
+ # Captures a full PNG screenshot of the webpage under the given url.
18
+ #
19
+ # @param [String] url The url to take a screenshot of.
20
+ # @param [Hash] options User configurable options.
21
+ #
22
+ # @return [String] The screenshot that was taken as binary.
23
+ def self.get_from_url_as_png(url, options = {})
24
+ get(url, "png", options)
25
+ end
26
+
27
+ # Groups and executes the logic for taking a screenhot of a webpage.
28
+ #
29
+ # @param [String] url The url to take a screenshot of.
30
+ # @param [String] image_type The image type to use for storing the screenshot.
31
+ # @param [Hash] options Set of options to use, passed by the user of this library.
32
+ #
33
+ # @return [String] The screenshot that was taken as binary.
34
+ private_class_method def self.get(url, image_type, options)
35
+ UrlUtils.validate(url)
36
+ validate_options(options)
37
+ temp_file = FileUtils.create_temp_file(image_type)
38
+ begin
39
+ Puppeteer.visit(url, PUPPETEER_SCRIPT_PATH, temp_file.path, image_type, options)
40
+ binary_image_content = FileUtils.read_binary(temp_file.path)
41
+ ensure
42
+ FileUtils.delete(temp_file)
43
+ end
44
+ return binary_image_content
45
+ end
46
+
47
+ # Raises an error if the given options might conflict with the Puppeteer configuration.
48
+ #
49
+ # @param [Hash] options The options to validate
50
+ private_class_method def self.validate_options(options)
51
+ symbolized_options = options.transform_keys(&:to_sym)
52
+ if symbolized_options.has_key?(:type)
53
+ raise DhalangError, 'Invalid option set: "type"'
54
+ end
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,146 @@
1
+ /**
2
+ * @typedef {Object} Configuration
3
+ * @property {string} webPageUrl - The url of the webpage to visit.
4
+ * @property {string} tempFilePath - The path of the tempfile to write the screenshot/pdf to.
5
+ * @property {string} puppeteerModulePath - The path of the Puppeteer module.
6
+ * @property {string} imageType - The type of image to save ( undefined for pdfgenerator ).
7
+ * @property {UserOptions} userOptions - User defined and default parameters to use when navigating to pages.
8
+ * @property {Object} pdfOptions - User defined and default parameters to use when creating PDFs. Note: Do not use directly, rather use {@link getConfiguredPdfOptions}.
9
+ * @property {Object} pngOptions - User defined and default parameters to use when creating PNGs.
10
+ * @property {Object} jpegOptions - User defined and default parameters to use when creating JPEGs.
11
+ */
12
+
13
+ /**
14
+ * @typedef {Object} UserOptions
15
+ * @property {number} navigationTimeout - Maximum in milliseconds until navigation times out, we use a default of 10 seconds as timeout.
16
+ * @property {string} navigationWaitUntil - Determines when the navigation was finished, we wait here until the Window.load event is fired ( meaning all images, stylesheet, etc was loaded ).
17
+ * @property {string} navigationWaitForSelector - If set, specifies the selector Puppeteer should wait for to appear before continuing.
18
+ * @property {string} navigationWaitForXPath - If set, specifies the XPath Puppeteer should wait for to appear before continuing.
19
+ * @property {string} userAgent - The user agent to send with requests.
20
+ * @property {boolean} isHeadless - Indicates if Puppeteer should launch Chromium in headless mode.
21
+ * @property {Object} viewPort - The view port to use.
22
+ * @property {Object} httpAuthenticationCredentials - The credentials to use for HTTP authentication.
23
+ * @property {boolean} isAutoHeight - The height is automatically set
24
+ */
25
+
26
+ /**
27
+ * @typedef {Object} NavigationParameters
28
+ * @property {number} timeout - Maximum in milliseconds until navigation times out, we use a default of 10 seconds as timeout.
29
+ * @property {string} waituntil - Determines when the navigation was finished, we wait here until the Window.load event is fired ( meaning all images, stylesheet, etc was loaded ).
30
+ */
31
+
32
+ /**
33
+ * @typedef {Object} WaitingParameters
34
+ * @property {number} timeout - Maximum in milliseconds until navigation times out, we use a default of 10 seconds as timeout.
35
+ */
36
+
37
+ /**
38
+ * Parses the given configuration process argument from Ruby to a JS object.
39
+ * @returns {Configuration}
40
+ * The configuration object.
41
+ */
42
+ exports.getConfiguration = function () {
43
+ return JSON.parse(process.argv[2])
44
+ }
45
+
46
+ /**
47
+ * Launches Puppeteer and returns its instance.
48
+ * @param {UserOptions} configuration - The configuration to use.
49
+ * @returns {Promise<Object>}
50
+ * The launched instance of Puppeteer.
51
+ */
52
+ exports.launchPuppeteer = async function (configuration) {
53
+ module.paths.push(configuration.puppeteerPath);
54
+ const puppeteer = require('puppeteer');
55
+ const launchArgs = ['--no-sandbox', '--disable-setuid-sandbox'];
56
+ return await puppeteer.launch({
57
+ args: launchArgs,
58
+ headless: configuration.userOptions.isHeadless
59
+ });
60
+ }
61
+
62
+ /**
63
+ * Configures the given Puppeteer page object.
64
+ * @param {Object} page - The Puppeteer page object to configure.
65
+ * @param {UserOptions} userOptions - The user options to use.
66
+ */
67
+ exports.configure = async function (page, userOptions) {
68
+ if (userOptions.userAgent !== "") {
69
+ await page.setUserAgent(userOptions.userAgent)
70
+ }
71
+
72
+ if (userOptions.viewPort !== "") {
73
+ await page.setViewport(userOptions.viewPort)
74
+ }
75
+
76
+ if (userOptions.httpAuthenticationCredentials !== "") {
77
+ await page.authenticate(userOptions.authenticationCredentials)
78
+ }
79
+ }
80
+
81
+ /**
82
+ * Makes the Puppeteer page object open the url with the specified navigation logic as specified in the given configuration.
83
+ * @param {Object} page - The Puppeteer page object to use for navigation.
84
+ * @param {Configuration} configuration - The configuration to use.
85
+ */
86
+ exports.navigate = async function (page, configuration) {
87
+ const navigationWaitForSelector = configuration.userOptions.navigationWaitForSelector;
88
+ const navigationWaitForXPath = configuration.userOptions.navigationWaitForXPath;
89
+
90
+ await page.goto(configuration.webPageUrl, this.getNavigationParameters(configuration));
91
+
92
+ if (navigationWaitForSelector !== "") {
93
+ await page.waitForSelector(navigationWaitForSelector, this.getWaitingParameters(configuration));
94
+ } else if (navigationWaitForXPath !== "") {
95
+ await page.waitForXPath(navigationWaitForXPath, this.getWaitingParameters(configuration));
96
+ } else {
97
+ await page.waitForTimeout(250);
98
+ }
99
+ }
100
+
101
+ /**
102
+ * Returns the PDF options to pass to Puppeteer based on the set user options and the documents body.
103
+ * @param {Object} page - The Puppeteer page to configure.
104
+ * @param {UserOptions} configuration - The configuration to use.
105
+ * @returns {Object} - pdfOptions
106
+ */
107
+ exports.getConfiguredPdfOptions = async function (page, configuration) {
108
+ const pdfOptions = configuration.pdfOptions
109
+
110
+ if (configuration.userOptions.isAutoHeight === true) {
111
+ const pageHeight = await page.evaluate(() => {
112
+ return Math.max(document.body.scrollHeight, document.body.offsetHeight);
113
+ })
114
+ if (pageHeight) {
115
+ pdfOptions['height'] = pageHeight + 1 + 'px'
116
+ }
117
+ }
118
+
119
+ return pdfOptions
120
+ }
121
+
122
+ /**
123
+ * Extracts the navigation parameters from the configuration in a format that is usable by Puppeteer.
124
+ * @param {Configuration} configuration - The configuration to extract the navigation parameters from.
125
+ * @returns {NavigationParameters}
126
+ * The extracted navigation parameters.
127
+ */
128
+ exports.getNavigationParameters = function (configuration) {
129
+ return {
130
+ timeout: configuration.userOptions.navigationTimeout,
131
+ waituntil: configuration.userOptions.navigationWaitUntil
132
+ }
133
+ }
134
+
135
+
136
+ /**
137
+ * Extracts the waiting parameters from the configuration in a format that is usable by Puppeteer.
138
+ * @param {Configuration} configuration - The configuration to extract the waiting parameters from.
139
+ * @returns {WaitingParameters}
140
+ * The extracted waiting parameters.
141
+ */
142
+ exports.getWaitingParameters = function (configuration) {
143
+ return {
144
+ timeout: configuration.userOptions.navigationTimeout
145
+ }
146
+ }