pdfkit 0.8.2 → 0.8.4.3.1
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of pdfkit might be problematic. Click here for more details.
- checksums.yaml +5 -5
- data/.github/workflows/stale.yml +19 -0
- data/.ruby-version +1 -1
- data/.travis.yml +12 -8
- data/CHANGELOG.md +30 -0
- data/Gemfile +1 -1
- data/README.md +31 -10
- data/lib/pdfkit.rb +3 -0
- data/lib/pdfkit/configuration.rb +36 -3
- data/lib/pdfkit/html_preprocessor.rb +23 -0
- data/lib/pdfkit/middleware.rb +36 -28
- data/lib/pdfkit/os.rb +19 -0
- data/lib/pdfkit/pdfkit.rb +40 -101
- data/lib/pdfkit/source.rb +2 -1
- data/lib/pdfkit/version.rb +1 -1
- data/lib/pdfkit/wkhtmltopdf.rb +80 -0
- data/pdfkit.gemspec +6 -6
- data/spec/configuration_spec.rb +83 -12
- data/spec/html_preprocessor_spec.rb +69 -0
- data/spec/middleware_spec.rb +164 -71
- data/spec/os_spec.rb +65 -0
- data/spec/pdfkit_spec.rb +33 -2
- data/spec/source_spec.rb +25 -0
- metadata +39 -44
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 1abe171f52890a9f122ae67cfc5c743ac486414bf6ffe78a72d767fccb7b43a6
|
4
|
+
data.tar.gz: c979fcb4caa6d667be6b2f636e1acff3e4edaa59bf9e5ce45a709225fd4a3491
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 70ac2e72c5aab61b4cfffe9bd26da109dee62286f8486d3596522dff5c0eb9b9ae5c1886b53d2b979247b2b6f38d9e8de07344d4963b8f1109879fd077550d0a
|
7
|
+
data.tar.gz: 28c303321d80381c13e3071442cfd9652469e8c15cd977224762ddb93342ec3f5bed8be6e9b014f771490f38f365b7d3b21d771ead7931b751b7eee1cebf962f
|
@@ -0,0 +1,19 @@
|
|
1
|
+
name: Mark stale issues and pull requests
|
2
|
+
|
3
|
+
on:
|
4
|
+
schedule:
|
5
|
+
- cron: "0 0 * * *"
|
6
|
+
|
7
|
+
jobs:
|
8
|
+
stale:
|
9
|
+
|
10
|
+
runs-on: ubuntu-latest
|
11
|
+
|
12
|
+
steps:
|
13
|
+
- uses: actions/stale@v1
|
14
|
+
with:
|
15
|
+
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
16
|
+
stale-issue-message: 'This issue has been marked as stale and will be automatically closed.'
|
17
|
+
stale-pr-message: 'This pull request has been marked as stale and will be automatically closed.'
|
18
|
+
stale-issue-label: 'no-issue-activity'
|
19
|
+
stale-pr-label: 'no-pr-activity'
|
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
2.6.4
|
data/.travis.yml
CHANGED
@@ -1,12 +1,16 @@
|
|
1
|
+
language: ruby
|
2
|
+
|
1
3
|
rvm:
|
2
|
-
-
|
3
|
-
-
|
4
|
-
- 2.
|
5
|
-
- 2.
|
4
|
+
- 2.3
|
5
|
+
- 2.4
|
6
|
+
- 2.5
|
7
|
+
- 2.6
|
8
|
+
|
9
|
+
before_install:
|
10
|
+
- gem update --system
|
11
|
+
- gem update bundler
|
6
12
|
|
7
13
|
before_script:
|
8
|
-
- "export DISPLAY=:99.0"
|
9
|
-
- "sh -e /etc/init.d/xvfb start"
|
10
14
|
- "sudo apt-get -qq -y install fontconfig libxrender1"
|
11
|
-
- "wget
|
12
|
-
- "sudo
|
15
|
+
- "wget https://github.com/wkhtmltopdf/wkhtmltopdf/releases/download/0.12.5/wkhtmltox_0.12.5-1.xenial_amd64.deb"
|
16
|
+
- "sudo apt-get install ./wkhtmltox_0.12.5-1.xenial_amd64.deb"
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,33 @@
|
|
1
|
+
2020-07-05
|
2
|
+
=================
|
3
|
+
* Bump to 0.8.4.3.1
|
4
|
+
* Don't override request level Content-Disposition header if it exists (#466)
|
5
|
+
* Update rake (#471)
|
6
|
+
* Add missing require statements for tempfile (#467)
|
7
|
+
* Only grab last line of bundle exec which output (#464)
|
8
|
+
* Return 500 status when an exception is caught in middleware (#469)
|
9
|
+
* Update Travis CI URL for wkhtmltopf (#473)
|
10
|
+
|
11
|
+
2020-04-01
|
12
|
+
=================
|
13
|
+
* Bump to 0.8.4.2
|
14
|
+
* Improve path detection feedback (#460)
|
15
|
+
* Fix typos (#444)
|
16
|
+
* Update readme (#439)
|
17
|
+
|
18
|
+
2019-02-22
|
19
|
+
=================
|
20
|
+
* Bump to 0.8.4.1
|
21
|
+
* Make PDFkit threadsafe (#377)
|
22
|
+
* Update activesupport (#434)
|
23
|
+
|
24
|
+
2019-02-21
|
25
|
+
=================
|
26
|
+
* Bump to 0.8.4
|
27
|
+
* Removed support for Ruby < 2.2
|
28
|
+
* Xvfb support (#277)
|
29
|
+
* Remove 'config.protocol' from the README (#389)
|
30
|
+
|
1
31
|
2015-08-26
|
2
32
|
=================
|
3
33
|
* Bump to 0.8.2
|
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -14,7 +14,7 @@ gem install pdfkit
|
|
14
14
|
|
15
15
|
<https://github.com/pdfkit/pdfkit/wiki/Installing-WKHTMLTOPDF>
|
16
16
|
|
17
|
-
2. Try using the `wkhtmltopdf-binary` gem (mac + linux i386)
|
17
|
+
2. Try using the `wkhtmltopdf-binary-edge` gem (mac + linux i386)
|
18
18
|
```
|
19
19
|
gem install wkhtmltopdf-binary
|
20
20
|
```
|
@@ -34,7 +34,7 @@ pdf = kit.to_pdf
|
|
34
34
|
file = kit.to_file('/path/to/save/pdf')
|
35
35
|
|
36
36
|
# PDFKit.new can optionally accept a URL or a File.
|
37
|
-
# Stylesheets can not be added when source is provided as a URL
|
37
|
+
# Stylesheets can not be added when source is provided as a URL or File.
|
38
38
|
kit = PDFKit.new('http://google.com')
|
39
39
|
kit = PDFKit.new(File.new('/path/to/html'))
|
40
40
|
|
@@ -43,8 +43,22 @@ PDFKit.new('
|
|
43
43
|
PDFKit.new('<html><head><meta name="pdfkit-cookie cookie_name1" content="cookie_value1"')
|
44
44
|
PDFKit.new('<html><head><meta name="pdfkit-cookie cookie_name2" content="cookie_value2"')
|
45
45
|
```
|
46
|
+
|
47
|
+
### Resolving relative URLs and protocols
|
48
|
+
|
49
|
+
If the source HTML has relative URLs (`/images/cat.png`) or
|
50
|
+
[protocols](https://en.wikipedia.org/wiki/Uniform_Resource_Locator#prurl)
|
51
|
+
(`//example.com/site.css`) that need to be resolved, you can pass `:root_url`
|
52
|
+
and `:protocol` options to PDFKit:
|
53
|
+
|
54
|
+
```ruby
|
55
|
+
PDFKit.new(html, root_url: 'http://mysite.com/').to_file
|
56
|
+
# or:
|
57
|
+
PDFKit.new(html, protocol: 'https').to_file
|
58
|
+
```
|
59
|
+
|
46
60
|
### Using cookies in scraping
|
47
|
-
If you want to pass a cookie to cookie to pdfkit to scrape a website, you can
|
61
|
+
If you want to pass a cookie to cookie to pdfkit to scrape a website, you can
|
48
62
|
pass it in a hash:
|
49
63
|
```ruby
|
50
64
|
kit = PDFKit.new(url, cookie: {cookie_name: :cookie_value})
|
@@ -102,6 +116,13 @@ config.middleware.use PDFKit::Middleware, {}, :except => [%r[^/prawn], %r[^/secr
|
|
102
116
|
# conditions can be strings (either one or an array)
|
103
117
|
config.middleware.use PDFKit::Middleware, {}, :except => ['/secret']
|
104
118
|
```
|
119
|
+
**With conditions to force download**
|
120
|
+
```ruby
|
121
|
+
# force download with attachment disposition
|
122
|
+
config.middleware.use PDFKit::Middleware, {}, :disposition => 'attachment'
|
123
|
+
# conditions can force a filename
|
124
|
+
config.middleware.use PDFKit::Middleware, {}, :disposition => 'attachment; filename=report.pdf'
|
125
|
+
```
|
105
126
|
**Saving the generated .pdf to disk**
|
106
127
|
|
107
128
|
Setting the `PDFKit-save-pdf` header will cause PDFKit to write the generated .pdf to the file indicated by the value of the header.
|
@@ -111,7 +132,7 @@ For example:
|
|
111
132
|
headers['PDFKit-save-pdf'] = 'path/to/saved.pdf'
|
112
133
|
```
|
113
134
|
|
114
|
-
Will cause the .pdf to be saved to `path/to/saved.pdf` in addition to being sent back to the client. If the path is not writable/non-
|
135
|
+
Will cause the .pdf to be saved to `path/to/saved.pdf` in addition to being sent back to the client. If the path is not writable/non-existent the write will fail silently. The `PDFKit-save-pdf` header is never sent back to the client.
|
115
136
|
|
116
137
|
## Troubleshooting
|
117
138
|
|
@@ -126,13 +147,13 @@ Will cause the .pdf to be saved to `path/to/saved.pdf` in addition to being sent
|
|
126
147
|
around this issue you may want to run a server with multiple workers
|
127
148
|
like Passenger or try to embed your resources within your HTML to
|
128
149
|
avoid extra HTTP requests.
|
129
|
-
|
130
|
-
Example solution (rails / bundler), add unicorn to the development
|
131
|
-
group in your Gemfile `gem 'unicorn'` then run `bundle`. Next, add a
|
150
|
+
|
151
|
+
Example solution (rails / bundler), add unicorn to the development
|
152
|
+
group in your Gemfile `gem 'unicorn'` then run `bundle`. Next, add a
|
132
153
|
file `config/unicorn.conf` with
|
133
|
-
|
154
|
+
|
134
155
|
worker_processes 3
|
135
|
-
|
156
|
+
|
136
157
|
Then to run the app `unicorn_rails -c config/unicorn.conf` (from rails_root)
|
137
158
|
|
138
159
|
* **Resources aren't included in the PDF:** Images, CSS, or JavaScript
|
@@ -146,7 +167,7 @@ Will cause the .pdf to be saved to `path/to/saved.pdf` in addition to being sent
|
|
146
167
|
asset host.
|
147
168
|
|
148
169
|
* **Mangled output in the browser:** Be sure that your HTTP response
|
149
|
-
headers specify "Content-Type: application/pdf"
|
170
|
+
headers specify "Content-Type: application/pdf"
|
150
171
|
|
151
172
|
## Note on Patches/Pull Requests
|
152
173
|
|
data/lib/pdfkit.rb
CHANGED
data/lib/pdfkit/configuration.rb
CHANGED
@@ -1,10 +1,12 @@
|
|
1
1
|
class PDFKit
|
2
2
|
class Configuration
|
3
|
-
attr_accessor :meta_tag_prefix, :
|
4
|
-
attr_writer :
|
3
|
+
attr_accessor :meta_tag_prefix, :root_url
|
4
|
+
attr_writer :use_xvfb, :verbose
|
5
|
+
attr_reader :default_options
|
5
6
|
|
6
7
|
def initialize
|
7
8
|
@verbose = false
|
9
|
+
@use_xvfb = false
|
8
10
|
@meta_tag_prefix = 'pdfkit-'
|
9
11
|
@default_options = {
|
10
12
|
:disable_smart_shrinking => false,
|
@@ -19,7 +21,33 @@ class PDFKit
|
|
19
21
|
end
|
20
22
|
|
21
23
|
def wkhtmltopdf
|
22
|
-
@wkhtmltopdf ||=
|
24
|
+
@wkhtmltopdf ||= default_wkhtmltopdf
|
25
|
+
end
|
26
|
+
|
27
|
+
def default_wkhtmltopdf
|
28
|
+
return @default_command_path if @default_command_path
|
29
|
+
if defined?(Bundler::GemfileError) && File.exists?('Gemfile')
|
30
|
+
@default_command_path = `bundle exec which wkhtmltopdf`.chomp.lines.last
|
31
|
+
end
|
32
|
+
@default_command_path = `which wkhtmltopdf`.chomp if @default_command_path.nil? || @default_command_path.empty?
|
33
|
+
@default_command_path
|
34
|
+
end
|
35
|
+
|
36
|
+
def wkhtmltopdf=(path)
|
37
|
+
if File.exist?(path)
|
38
|
+
@wkhtmltopdf = path
|
39
|
+
else
|
40
|
+
warn "No executable found at #{path}. Will fall back to #{default_wkhtmltopdf}"
|
41
|
+
@wkhtmltopdf = default_wkhtmltopdf
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def executable
|
46
|
+
using_xvfb? ? "xvfb-run #{wkhtmltopdf}" : wkhtmltopdf
|
47
|
+
end
|
48
|
+
|
49
|
+
def using_xvfb?
|
50
|
+
@use_xvfb
|
23
51
|
end
|
24
52
|
|
25
53
|
def quiet?
|
@@ -29,6 +57,10 @@ class PDFKit
|
|
29
57
|
def verbose?
|
30
58
|
@verbose
|
31
59
|
end
|
60
|
+
|
61
|
+
def default_options=(options)
|
62
|
+
@default_options.merge!(options)
|
63
|
+
end
|
32
64
|
end
|
33
65
|
|
34
66
|
class << self
|
@@ -41,6 +73,7 @@ class PDFKit
|
|
41
73
|
# @example
|
42
74
|
# PDFKit.configure do |config|
|
43
75
|
# config.wkhtmltopdf = '/usr/bin/wkhtmltopdf'
|
76
|
+
# config.use_xvfb = true
|
44
77
|
# config.verbose = true
|
45
78
|
# end
|
46
79
|
|
@@ -0,0 +1,23 @@
|
|
1
|
+
class PDFKit
|
2
|
+
module HTMLPreprocessor
|
3
|
+
|
4
|
+
# Change relative paths to absolute, and relative protocols to absolute protocols
|
5
|
+
def self.process(html, root_url, protocol)
|
6
|
+
html = translate_relative_paths(html, root_url) if root_url
|
7
|
+
html = translate_relative_protocols(html, protocol) if protocol
|
8
|
+
html
|
9
|
+
end
|
10
|
+
|
11
|
+
private
|
12
|
+
|
13
|
+
def self.translate_relative_paths(html, root_url)
|
14
|
+
# Try out this regexp using rubular http://rubular.com/r/hiAxBNX7KE
|
15
|
+
html.gsub(/(href|src)=(['"])\/([^\/"']([^\"']*|[^"']*))?['"]/, "\\1=\\2#{root_url}\\3\\2")
|
16
|
+
end
|
17
|
+
|
18
|
+
def self.translate_relative_protocols(body, protocol)
|
19
|
+
# Try out this regexp using rubular http://rubular.com/r/0Ohk0wFYxV
|
20
|
+
body.gsub(/(href|src)=(['"])\/\/([^\"']*|[^"']*)['"]/, "\\1=\\2#{protocol}://\\3\\2")
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
data/lib/pdfkit/middleware.rb
CHANGED
@@ -9,6 +9,10 @@ class PDFKit
|
|
9
9
|
end
|
10
10
|
|
11
11
|
def call(env)
|
12
|
+
dup._call(env)
|
13
|
+
end
|
14
|
+
|
15
|
+
def _call(env)
|
12
16
|
@request = Rack::Request.new(env)
|
13
17
|
@render_pdf = false
|
14
18
|
|
@@ -18,7 +22,16 @@ class PDFKit
|
|
18
22
|
if rendering_pdf? && headers['Content-Type'] =~ /text\/html|application\/xhtml\+xml/
|
19
23
|
body = response.respond_to?(:body) ? response.body : response.join
|
20
24
|
body = body.join if body.is_a?(Array)
|
21
|
-
|
25
|
+
|
26
|
+
root_url = root_url(env)
|
27
|
+
protocol = protocol(env)
|
28
|
+
options = @options.merge(root_url: root_url, protocol: protocol)
|
29
|
+
|
30
|
+
if headers['PDFKit-javascript-delay']
|
31
|
+
options.merge!(javascript_delay: headers.delete('PDFKit-javascript-delay').to_i)
|
32
|
+
end
|
33
|
+
|
34
|
+
body = PDFKit.new(body, options).to_pdf
|
22
35
|
response = [body]
|
23
36
|
|
24
37
|
if headers['PDFKit-save-pdf']
|
@@ -32,31 +45,28 @@ class PDFKit
|
|
32
45
|
headers.delete('Cache-Control')
|
33
46
|
end
|
34
47
|
|
35
|
-
headers['Content-Length']
|
36
|
-
headers['Content-Type']
|
48
|
+
headers['Content-Length'] = (body.respond_to?(:bytesize) ? body.bytesize : body.size).to_s
|
49
|
+
headers['Content-Type'] = 'application/pdf'
|
50
|
+
headers['Content-Disposition'] ||= @conditions[:disposition] || 'inline'
|
37
51
|
end
|
38
52
|
|
39
53
|
[status, headers, response]
|
40
|
-
end
|
41
54
|
|
42
|
-
|
55
|
+
rescue StandardError => e
|
56
|
+
status = 500
|
57
|
+
response = [e.message]
|
43
58
|
|
44
|
-
|
45
|
-
def translate_paths(body, env)
|
46
|
-
body = translate_relative_paths(body, env)
|
47
|
-
translate_relative_protocols(body, env)
|
59
|
+
[status, headers, response]
|
48
60
|
end
|
49
61
|
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
62
|
+
private
|
63
|
+
|
64
|
+
def root_url(env)
|
65
|
+
PDFKit.configuration.root_url || "#{env['rack.url_scheme']}://#{env['HTTP_HOST']}/"
|
54
66
|
end
|
55
67
|
|
56
|
-
def
|
57
|
-
|
58
|
-
# Try out this regexp using rubular http://rubular.com/r/0Ohk0wFYxV
|
59
|
-
body.gsub(/(href|src)=(['"])\/\/([^\"']*|[^"']*)['"]/, "\\1=\\2#{protocol}\\3\\2")
|
68
|
+
def protocol(env)
|
69
|
+
env['rack.url_scheme']
|
60
70
|
end
|
61
71
|
|
62
72
|
def rendering_pdf?
|
@@ -65,20 +75,18 @@ class PDFKit
|
|
65
75
|
|
66
76
|
def render_as_pdf?
|
67
77
|
request_path = @request.path
|
68
|
-
|
78
|
+
return false unless request_path.end_with?('.pdf')
|
69
79
|
|
70
|
-
if
|
80
|
+
if @conditions[:only]
|
71
81
|
conditions_as_regexp(@conditions[:only]).any? do |pattern|
|
72
|
-
|
82
|
+
pattern === request_path
|
73
83
|
end
|
74
|
-
elsif
|
75
|
-
conditions_as_regexp(@conditions[:except]).
|
76
|
-
|
84
|
+
elsif @conditions[:except]
|
85
|
+
conditions_as_regexp(@conditions[:except]).none? do |pattern|
|
86
|
+
pattern === request_path
|
77
87
|
end
|
78
|
-
|
79
|
-
return true
|
80
88
|
else
|
81
|
-
|
89
|
+
true
|
82
90
|
end
|
83
91
|
end
|
84
92
|
|
@@ -99,8 +107,8 @@ class PDFKit
|
|
99
107
|
end
|
100
108
|
|
101
109
|
def conditions_as_regexp(conditions)
|
102
|
-
|
103
|
-
pattern.is_a?(Regexp) ? pattern : Regexp.new(
|
110
|
+
Array(conditions).map do |pattern|
|
111
|
+
pattern.is_a?(Regexp) ? pattern : Regexp.new("^#{pattern}")
|
104
112
|
end
|
105
113
|
end
|
106
114
|
end
|
data/lib/pdfkit/os.rb
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
require 'rbconfig'
|
2
|
+
|
3
|
+
class PDFKit
|
4
|
+
module OS
|
5
|
+
def self.host_is_windows?
|
6
|
+
!(RbConfig::CONFIG['host_os'] =~ /mswin|msys|mingw|cygwin|bccwin|wince/).nil?
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.shell_escape_for_os(args)
|
10
|
+
if (host_is_windows?)
|
11
|
+
# Windows reserved shell characters are: & | ( ) < > ^
|
12
|
+
# See http://technet.microsoft.com/en-us/library/cc723564.aspx#XSLTsection123121120120
|
13
|
+
args.map { |arg| arg.gsub(/([&|()<>^])/,'^\1') }.join(" ")
|
14
|
+
else
|
15
|
+
args.shelljoin
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
data/lib/pdfkit/pdfkit.rb
CHANGED
@@ -1,8 +1,10 @@
|
|
1
1
|
require 'shellwords'
|
2
|
-
require '
|
2
|
+
require 'tempfile'
|
3
3
|
|
4
4
|
class PDFKit
|
5
|
-
class
|
5
|
+
class Error < StandardError; end
|
6
|
+
|
7
|
+
class NoExecutableError < Error
|
6
8
|
def initialize
|
7
9
|
msg = "No wkhtmltopdf executable found at #{PDFKit.configuration.wkhtmltopdf}\n"
|
8
10
|
msg << ">> Please install wkhtmltopdf - https://github.com/pdfkit/PDFKit/wiki/Installing-WKHTMLTOPDF"
|
@@ -10,31 +12,40 @@ class PDFKit
|
|
10
12
|
end
|
11
13
|
end
|
12
14
|
|
13
|
-
class ImproperSourceError <
|
15
|
+
class ImproperSourceError < Error
|
14
16
|
def initialize(msg)
|
15
17
|
super("Improper Source: #{msg}")
|
16
18
|
end
|
17
19
|
end
|
18
20
|
|
21
|
+
class ImproperWkhtmltopdfExitStatus < Error
|
22
|
+
def initialize(invoke)
|
23
|
+
super("Command failed (exitstatus=#{$?.exitstatus}): #{invoke}")
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
19
27
|
attr_accessor :source, :stylesheets
|
20
|
-
attr_reader :
|
28
|
+
attr_reader :renderer
|
21
29
|
|
22
30
|
def initialize(url_file_or_html, options = {})
|
23
31
|
@source = Source.new(url_file_or_html)
|
24
32
|
|
25
33
|
@stylesheets = []
|
26
34
|
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
@
|
35
|
+
options = PDFKit.configuration.default_options.merge(options)
|
36
|
+
options.delete(:quiet) if PDFKit.configuration.verbose?
|
37
|
+
options.merge! find_options_in_meta(url_file_or_html) unless source.url?
|
38
|
+
@root_url = options.delete(:root_url)
|
39
|
+
@protocol = options.delete(:protocol)
|
40
|
+
@renderer = WkHTMLtoPDF.new options
|
41
|
+
@renderer.normalize_options
|
31
42
|
|
32
|
-
raise NoExecutableError
|
43
|
+
raise NoExecutableError unless File.exists?(PDFKit.configuration.wkhtmltopdf)
|
33
44
|
end
|
34
45
|
|
35
46
|
def command(path = nil)
|
36
|
-
args = @
|
37
|
-
shell_escaped_command = [executable, shell_escape_for_os(args)].join ' '
|
47
|
+
args = @renderer.options_for_command
|
48
|
+
shell_escaped_command = [executable, OS::shell_escape_for_os(args)].join ' '
|
38
49
|
|
39
50
|
# In order to allow for URL parameters (e.g. https://www.google.com/search?q=pdfkit) we do
|
40
51
|
# not escape the source. The user is responsible for ensuring that no vulnerabilities exist
|
@@ -45,17 +56,17 @@ class PDFKit
|
|
45
56
|
"#{shell_escaped_command} #{input_for_command} #{output_for_command}"
|
46
57
|
end
|
47
58
|
|
59
|
+
def options
|
60
|
+
# TODO(cdwort,sigmavirus24): Replace this with an attr_reader for @renderer instead in 1.0.0
|
61
|
+
@renderer.options
|
62
|
+
end
|
63
|
+
|
48
64
|
def executable
|
49
|
-
|
50
|
-
return default if default !~ /^\// # its not a path, so nothing we can do
|
51
|
-
if File.exist?(default)
|
52
|
-
default
|
53
|
-
else
|
54
|
-
default.split('/').last
|
55
|
-
end
|
65
|
+
PDFKit.configuration.executable
|
56
66
|
end
|
57
67
|
|
58
68
|
def to_pdf(path=nil)
|
69
|
+
preprocess_html
|
59
70
|
append_stylesheets
|
60
71
|
|
61
72
|
invoke = command(path)
|
@@ -68,7 +79,7 @@ class PDFKit
|
|
68
79
|
|
69
80
|
# $? is thread safe per
|
70
81
|
# http://stackoverflow.com/questions/2164887/thread-safe-external-process-in-ruby-plus-checking-exitstatus
|
71
|
-
raise
|
82
|
+
raise ImproperWkhtmltopdfExitStatus, invoke if empty_result?(path, result) || !successful?($?)
|
72
83
|
return result
|
73
84
|
end
|
74
85
|
|
@@ -79,14 +90,9 @@ class PDFKit
|
|
79
90
|
|
80
91
|
protected
|
81
92
|
|
82
|
-
# Pulled from:
|
83
|
-
# https://github.com/wkhtmltopdf/wkhtmltopdf/blob/ebf9b6cfc4c58a31349fb94c568b254fac37b3d3/README_WKHTMLTOIMAGE#L27
|
84
|
-
REPEATABLE_OPTIONS = %w[--allow --cookie --custom-header --post --post-file --run-script]
|
85
|
-
SPECIAL_OPTIONS = %w[cover toc]
|
86
|
-
|
87
93
|
def find_options_in_meta(content)
|
88
94
|
# Read file if content is a File
|
89
|
-
content = content.read if content.is_a?(File)
|
95
|
+
content = content.read if content.is_a?(File) || content.is_a?(Tempfile)
|
90
96
|
|
91
97
|
found = {}
|
92
98
|
content.scan(/<meta [^>]*>/) do |meta|
|
@@ -111,8 +117,15 @@ class PDFKit
|
|
111
117
|
"<style>#{File.read(stylesheet)}</style>"
|
112
118
|
end
|
113
119
|
|
120
|
+
def preprocess_html
|
121
|
+
if @source.html?
|
122
|
+
processed_html = PDFKit::HTMLPreprocessor.process(@source.to_s, @root_url, @protocol)
|
123
|
+
@source = Source.new(processed_html)
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
114
127
|
def append_stylesheets
|
115
|
-
raise ImproperSourceError
|
128
|
+
raise ImproperSourceError, 'Stylesheets may only be added to an HTML source' if stylesheets.any? && !@source.html?
|
116
129
|
|
117
130
|
stylesheets.each do |stylesheet|
|
118
131
|
if @source.to_s.match(/<\/head>/)
|
@@ -123,65 +136,12 @@ class PDFKit
|
|
123
136
|
end
|
124
137
|
end
|
125
138
|
|
126
|
-
def normalize_options(options)
|
127
|
-
normalized_options = {}
|
128
|
-
|
129
|
-
options.each do |key, value|
|
130
|
-
next if !value
|
131
|
-
|
132
|
-
# The actual option for wkhtmltopdf
|
133
|
-
normalized_key = normalize_arg key
|
134
|
-
normalized_key = "--#{normalized_key}" unless SPECIAL_OPTIONS.include?(normalized_key)
|
135
|
-
|
136
|
-
# If the option is repeatable, attempt to normalize all values
|
137
|
-
if REPEATABLE_OPTIONS.include? normalized_key
|
138
|
-
normalize_repeatable_value(normalized_key, value) do |normalized_unique_key, normalized_value|
|
139
|
-
normalized_options[normalized_unique_key] = normalized_value
|
140
|
-
end
|
141
|
-
else # Otherwise, just normalize it like usual
|
142
|
-
normalized_options[normalized_key] = normalize_value(value)
|
143
|
-
end
|
144
|
-
end
|
145
|
-
|
146
|
-
normalized_options
|
147
|
-
end
|
148
|
-
|
149
|
-
def normalize_arg(arg)
|
150
|
-
arg.to_s.downcase.gsub(/[^a-z0-9]/,'-')
|
151
|
-
end
|
152
|
-
|
153
|
-
def normalize_value(value)
|
154
|
-
case value
|
155
|
-
when nil
|
156
|
-
nil
|
157
|
-
when TrueClass, 'true' #ie, ==true, see http://www.ruby-doc.org/core-1.9.3/TrueClass.html
|
158
|
-
nil
|
159
|
-
when Hash
|
160
|
-
value.to_a.flatten.collect{|x| normalize_value(x)}.compact
|
161
|
-
when Array
|
162
|
-
value.flatten.collect{|x| x.to_s}
|
163
|
-
else
|
164
|
-
(host_is_windows? && value.to_s.index(' ')) ? "'#{ value.to_s }'" : value.to_s
|
165
|
-
end
|
166
|
-
end
|
167
|
-
|
168
|
-
def normalize_repeatable_value(option_name, value)
|
169
|
-
case value
|
170
|
-
when Hash, Array
|
171
|
-
value.each do |(key, val)|
|
172
|
-
yield [[option_name, normalize_value(key)], normalize_value(val)]
|
173
|
-
end
|
174
|
-
else
|
175
|
-
yield [[option_name, normalize_value(value)], nil]
|
176
|
-
end
|
177
|
-
end
|
178
|
-
|
179
139
|
def successful?(status)
|
180
140
|
return true if status.success?
|
181
141
|
|
182
142
|
# Some of the codes: https://code.google.com/p/wkhtmltopdf/issues/detail?id=1088
|
183
143
|
# returned when assets are missing (404): https://code.google.com/p/wkhtmltopdf/issues/detail?id=548
|
184
|
-
return true if status.exitstatus == 2 && error_handling?
|
144
|
+
return true if status.exitstatus == 2 && @renderer.error_handling?
|
185
145
|
|
186
146
|
false
|
187
147
|
end
|
@@ -189,25 +149,4 @@ class PDFKit
|
|
189
149
|
def empty_result?(path, result)
|
190
150
|
(path && File.size(path) == 0) || (path.nil? && result.to_s.strip.empty?)
|
191
151
|
end
|
192
|
-
|
193
|
-
def error_handling?
|
194
|
-
@options.key?('--ignore-load-errors') ||
|
195
|
-
# wkhtmltopdf v0.10.0 beta4 replaces ignore-load-errors with load-error-handling
|
196
|
-
# https://code.google.com/p/wkhtmltopdf/issues/detail?id=55
|
197
|
-
%w(skip ignore).include?(@options['--load-error-handling'])
|
198
|
-
end
|
199
|
-
|
200
|
-
def host_is_windows?
|
201
|
-
@host_is_windows ||= !(RbConfig::CONFIG['host_os'] =~ /mswin|msys|mingw|cygwin|bccwin|wince/).nil?
|
202
|
-
end
|
203
|
-
|
204
|
-
def shell_escape_for_os(args)
|
205
|
-
if (host_is_windows?)
|
206
|
-
# Windows reserved shell characters are: & | ( ) < > ^
|
207
|
-
# See http://technet.microsoft.com/en-us/library/cc723564.aspx#XSLTsection123121120120
|
208
|
-
args.map { |arg| arg.gsub(/([&|()<>^])/,'^\1') }.join(" ")
|
209
|
-
else
|
210
|
-
args.shelljoin
|
211
|
-
end
|
212
|
-
end
|
213
152
|
end
|