my_pdfkit 0.1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,89 @@
1
+ # frozen_string_literal: true
2
+
3
+ class MyPDFKit
4
+ class Configuration
5
+ attr_accessor :meta_tag_prefix, :root_url
6
+ attr_writer :use_xvfb, :verbose
7
+ attr_reader :default_options
8
+
9
+ def initialize
10
+ @verbose = false
11
+ @use_xvfb = false
12
+ @meta_tag_prefix = 'pdfkit-'
13
+ @default_options = {
14
+ :disable_smart_shrinking => false,
15
+ :quiet => true,
16
+ :page_size => 'Letter',
17
+ :margin_top => '0.75in',
18
+ :margin_right => '0.75in',
19
+ :margin_bottom => '0.75in',
20
+ :margin_left => '0.75in',
21
+ :encoding => 'UTF-8'
22
+ }
23
+ end
24
+
25
+ def wkhtmltopdf
26
+ @wkhtmltopdf ||= default_wkhtmltopdf
27
+ end
28
+
29
+ def default_wkhtmltopdf
30
+ return @default_command_path if @default_command_path
31
+ if defined?(Bundler::GemfileError) && File.exist?('Gemfile')
32
+ @default_command_path = `bundle exec which wkhtmltopdf`.chomp.lines.last
33
+ end
34
+ @default_command_path = `which wkhtmltopdf`.chomp if @default_command_path.nil? || @default_command_path.empty?
35
+ @default_command_path
36
+ end
37
+
38
+ def wkhtmltopdf=(path)
39
+ if File.exist?(path)
40
+ @wkhtmltopdf = path
41
+ else
42
+ warn "No executable found at #{path}. Will fall back to #{default_wkhtmltopdf}"
43
+ @wkhtmltopdf = default_wkhtmltopdf
44
+ end
45
+ end
46
+
47
+ def executable
48
+ using_xvfb? ? ['xvfb-run', wkhtmltopdf] : wkhtmltopdf
49
+ end
50
+
51
+ def using_xvfb?
52
+ @use_xvfb
53
+ end
54
+
55
+ def quiet?
56
+ !@verbose
57
+ end
58
+
59
+ def verbose?
60
+ @verbose
61
+ end
62
+
63
+ def default_options=(options)
64
+ @default_options.merge!(options)
65
+ end
66
+ end
67
+
68
+ class << self
69
+ attr_accessor :configuration
70
+ end
71
+
72
+ # Configure MyPDFKit someplace sensible,
73
+ # like config/initializers/my_pdfkit.rb
74
+ #
75
+ # @example
76
+ # MyPDFKit.configure do |config|
77
+ # config.wkhtmltopdf = '/usr/bin/wkhtmltopdf'
78
+ # config.use_xvfb = true
79
+ # config.verbose = true
80
+ # end
81
+
82
+ def self.configuration
83
+ @configuration ||= Configuration.new
84
+ end
85
+
86
+ def self.configure
87
+ yield(configuration)
88
+ end
89
+ end
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ class MyPDFKit
4
+ module HTMLPreprocessor
5
+
6
+ # Change relative paths to absolute, and relative protocols to absolute protocols
7
+ def self.process(html, root_url, protocol)
8
+ html = translate_relative_paths(html, root_url) if root_url
9
+ html = translate_relative_protocols(html, protocol) if protocol
10
+ html
11
+ end
12
+
13
+ private
14
+
15
+ def self.translate_relative_paths(html, root_url)
16
+ # Try out this regexp using rubular http://rubular.com/r/hiAxBNX7KE
17
+ html.gsub(/(href|src)=(['"])\/([^\/"']([^\"']*|[^"']*))?['"]/, "\\1=\\2#{root_url}\\3\\2")
18
+ end
19
+
20
+ def self.translate_relative_protocols(body, protocol)
21
+ # Try out this regexp using rubular http://rubular.com/r/0Ohk0wFYxV
22
+ body.gsub(/(href|src)=(['"])\/\/([^\"']*|[^"']*)['"]/, "\\1=\\2#{protocol}://\\3\\2")
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,117 @@
1
+ # frozen_string_literal: true
2
+
3
+ class MyPDFKit
4
+ class Middleware
5
+ def initialize(app, options = {}, conditions = {})
6
+ @app = app
7
+ @options = options
8
+ @conditions = conditions
9
+ @render_pdf = false
10
+ @caching = @conditions.delete(:caching) { false }
11
+ end
12
+
13
+ def call(env)
14
+ dup._call(env)
15
+ end
16
+
17
+ def _call(env)
18
+ @request = Rack::Request.new(env)
19
+ @render_pdf = false
20
+
21
+ set_request_to_render_as_pdf(env) if render_as_pdf?
22
+ status, headers, response = @app.call(env)
23
+
24
+ begin
25
+ content_type_header = headers.has_key?('Content-Type') ? 'Content-Type' : 'content-type'
26
+ if rendering_pdf? && headers[content_type_header] =~ /text\/html|application\/xhtml\+xml/
27
+ body = response.respond_to?(:body) ? response.body : response.join
28
+ body = body.join if body.is_a?(Array)
29
+
30
+ root_url = root_url(env)
31
+ protocol = protocol(env)
32
+ options = @options.merge(root_url: root_url, protocol: protocol)
33
+
34
+ if headers['MyPDFKit-javascript-delay']
35
+ options.merge!(javascript_delay: headers.delete('MyPDFKit-javascript-delay').to_i)
36
+ end
37
+
38
+ body = MyPDFKit.new(body, options).to_pdf
39
+ response = [body]
40
+
41
+ if headers['MyPDFKit-save-pdf']
42
+ File.open(headers['MyPDFKit-save-pdf'], 'wb') { |file| file.write(body) } rescue nil
43
+ headers.delete('MyPDFKit-save-pdf')
44
+ end
45
+
46
+ unless @caching
47
+ # Do not cache PDFs
48
+ headers.delete('etag')
49
+ headers.delete('cache-control')
50
+ end
51
+
52
+ headers['content-length'] = (body.respond_to?(:bytesize) ? body.bytesize : body.size).to_s
53
+ headers[content_type_header] = 'application/pdf'
54
+ headers['content-disposition'] ||= @conditions[:disposition] || 'inline'
55
+ end
56
+ rescue StandardError => e
57
+ status = 500
58
+ response = [e.message]
59
+ end
60
+
61
+ [status, headers, response]
62
+ end
63
+
64
+ private
65
+
66
+ def root_url(env)
67
+ MyPDFKit.configuration.root_url || "#{env['rack.url_scheme']}://#{env['HTTP_HOST']}/"
68
+ end
69
+
70
+ def protocol(env)
71
+ env['rack.url_scheme']
72
+ end
73
+
74
+ def rendering_pdf?
75
+ @render_pdf
76
+ end
77
+
78
+ def render_as_pdf?
79
+ request_path = @request.path
80
+ return false unless request_path.end_with?('.pdf')
81
+
82
+ if @conditions[:only]
83
+ conditions_as_regexp(@conditions[:only]).any? do |pattern|
84
+ pattern === request_path
85
+ end
86
+ elsif @conditions[:except]
87
+ conditions_as_regexp(@conditions[:except]).none? do |pattern|
88
+ pattern === request_path
89
+ end
90
+ else
91
+ true
92
+ end
93
+ end
94
+
95
+ def set_request_to_render_as_pdf(env)
96
+ @render_pdf = true
97
+
98
+ path = @request.path.sub(%r{\.pdf$}, '')
99
+ path = path.sub(@request.script_name, '')
100
+
101
+ %w[PATH_INFO REQUEST_URI].each { |e| env[e] = path }
102
+
103
+ env['HTTP_ACCEPT'] = concat(env['HTTP_ACCEPT'], Rack::Mime.mime_type('.html'))
104
+ env['Rack-Middleware-MyPDFKit'] = 'true'
105
+ end
106
+
107
+ def concat(accepts, type)
108
+ (accepts || '').split(',').unshift(type).compact.join(',')
109
+ end
110
+
111
+ def conditions_as_regexp(conditions)
112
+ Array(conditions).map do |pattern|
113
+ pattern.is_a?(Regexp) ? pattern : Regexp.new("^#{pattern}")
114
+ end
115
+ end
116
+ end
117
+ end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rbconfig'
4
+
5
+ class MyPDFKit
6
+ module OS
7
+ def self.host_is_windows?
8
+ !(RbConfig::CONFIG['host_os'] =~ /mswin|msys|mingw|cygwin|bccwin|wince/).nil?
9
+ end
10
+
11
+ def self.shell_escape_for_os(args)
12
+ if (host_is_windows?)
13
+ # Windows reserved shell characters are: & | ( ) < > ^
14
+ # See http://technet.microsoft.com/en-us/library/cc723564.aspx#XSLTsection123121120120
15
+ args.map { |arg| arg.gsub(/([&|()<>^])/,'^\1') }.join(" ")
16
+ else
17
+ args.shelljoin
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,153 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'shellwords'
4
+ require 'tempfile'
5
+
6
+ class MyPDFKit
7
+ class Error < StandardError; end
8
+
9
+ class NoExecutableError < Error
10
+ def initialize
11
+ msg = "No wkhtmltopdf executable found at #{MyPDFKit.configuration.wkhtmltopdf}\n" \
12
+ ">> Please install wkhtmltopdf - https://github.com/pdfkit/MyPDFKit/wiki/Installing-WKHTMLTOPDF"
13
+ super(msg)
14
+ end
15
+ end
16
+
17
+ class ImproperSourceError < Error
18
+ def initialize(msg)
19
+ super("Improper Source: #{msg}")
20
+ end
21
+ end
22
+
23
+ class ImproperWkhtmltopdfExitStatus < Error
24
+ def initialize(invoke)
25
+ super("Command failed (exitstatus=#{$?.exitstatus}): #{invoke}")
26
+ end
27
+ end
28
+
29
+ attr_accessor :source, :stylesheets
30
+ attr_reader :renderer
31
+
32
+ def initialize(url_file_or_html, options = {})
33
+ @source = Source.new(url_file_or_html)
34
+
35
+ @stylesheets = []
36
+
37
+ options = MyPDFKit.configuration.default_options.merge(options)
38
+ options.delete(:quiet) if MyPDFKit.configuration.verbose?
39
+ options.merge! find_options_in_meta(url_file_or_html) unless source.url?
40
+ @root_url = options.delete(:root_url)
41
+ @protocol = options.delete(:protocol)
42
+ @renderer = WkHTMLtoPDF.new options
43
+ @renderer.normalize_options
44
+
45
+ raise NoExecutableError unless File.exist?(MyPDFKit.configuration.wkhtmltopdf)
46
+ end
47
+
48
+ def command(path = nil)
49
+ args = [*executable]
50
+ args.concat(@renderer.options_for_command)
51
+ args << @source.to_input_for_command
52
+ args << (path ? path.to_s : '-')
53
+ args
54
+ end
55
+
56
+ def options
57
+ # TODO(cdwort,sigmavirus24): Replace this with an attr_reader for @renderer instead in 1.0.0
58
+ @renderer.options
59
+ end
60
+
61
+ def executable
62
+ MyPDFKit.configuration.executable
63
+ end
64
+
65
+ def to_pdf(path=nil)
66
+ preprocess_html
67
+ append_stylesheets
68
+
69
+ invoke = command(path)
70
+
71
+ result = IO.popen(invoke, "wb+") do |pdf|
72
+ pdf.puts(@source.to_s) if @source.html?
73
+ pdf.close_write
74
+ pdf.gets(nil) if path.nil?
75
+ end
76
+
77
+ # $? is thread safe per
78
+ # http://stackoverflow.com/questions/2164887/thread-safe-external-process-in-ruby-plus-checking-exitstatus
79
+ raise ImproperWkhtmltopdfExitStatus, invoke if empty_result?(path, result) || !successful?($?)
80
+ return result
81
+ end
82
+
83
+ def to_file(path)
84
+ self.to_pdf(path)
85
+ File.new(path)
86
+ end
87
+
88
+ protected
89
+
90
+ def find_options_in_meta(content)
91
+ # Read file if content is a File
92
+ content = content.read if content.is_a?(File) || content.is_a?(Tempfile)
93
+
94
+ found = {}
95
+ content.scan(/<meta [^>]*>/) do |meta|
96
+ if meta.match(/name=["']#{MyPDFKit.configuration.meta_tag_prefix}/)
97
+ name = meta.scan(/name=["']#{MyPDFKit.configuration.meta_tag_prefix}([^"']*)/)[0][0].split
98
+ found[name] = meta.scan(/content=["']([^"'\\]+)["']/)[0][0]
99
+ end
100
+ end
101
+
102
+ tuple_keys = found.keys.select { |k| k.is_a? Array }
103
+ tuple_keys.each do |key|
104
+ value = found.delete key
105
+ new_key = key.shift
106
+ found[new_key] ||= {}
107
+ found[new_key][key] = value
108
+ end
109
+
110
+ found
111
+ end
112
+
113
+ def style_tag_for(stylesheet)
114
+ style = "<style>#{File.read(stylesheet)}</style>"
115
+ style = style.html_safe if style.respond_to?(:html_safe)
116
+ style
117
+ end
118
+
119
+ def preprocess_html
120
+ if @source.html?
121
+ processed_html = MyPDFKit::HTMLPreprocessor.process(@source.to_s, @root_url, @protocol)
122
+ @source = Source.new(processed_html)
123
+ end
124
+ end
125
+
126
+ def append_stylesheets
127
+ raise ImproperSourceError, 'Stylesheets may only be added to an HTML source' if stylesheets.any? && !@source.html?
128
+
129
+ stylesheets.each do |stylesheet|
130
+ if @source.to_s.match(/<\/head>/)
131
+ @source = Source.new(@source.to_s.gsub(/(<\/head>)/) {|s|
132
+ style_tag_for(stylesheet) + (s.respond_to?(:html_safe) ? s.html_safe : s)
133
+ })
134
+ else
135
+ @source.to_s.insert(0, style_tag_for(stylesheet))
136
+ end
137
+ end
138
+ end
139
+
140
+ def successful?(status)
141
+ return true if status.success?
142
+
143
+ # Some of the codes: https://code.google.com/p/wkhtmltopdf/issues/detail?id=1088
144
+ # returned when assets are missing (404): https://code.google.com/p/wkhtmltopdf/issues/detail?id=548
145
+ return true if status.exitstatus == 2 && @renderer.error_handling?
146
+
147
+ false
148
+ end
149
+
150
+ def empty_result?(path, result)
151
+ (path && File.size(path) == 0) || (path.nil? && result.to_s.strip.empty?)
152
+ end
153
+ end
@@ -0,0 +1,52 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'tempfile'
4
+ require 'uri'
5
+
6
+ class MyPDFKit
7
+ class Source
8
+ SOURCE_FROM_STDIN = '-'
9
+
10
+ def initialize(url_file_or_html)
11
+ @source = url_file_or_html
12
+ # @source is assumed to be modifiable, so make sure it is.
13
+ @source = @source.dup if @source.is_a?(String) && @source.frozen?
14
+ end
15
+
16
+ def url?
17
+ @is_url ||= @source.is_a?(String) && @source.match(/\Ahttp/)
18
+ end
19
+
20
+ def file?
21
+ @is_file ||= @source.kind_of?(File) || @source.kind_of?(Tempfile)
22
+ end
23
+
24
+ def html?
25
+ @is_html ||= !(url? || file?)
26
+ end
27
+
28
+ def to_input_for_command
29
+ if file?
30
+ @source.path
31
+ elsif url?
32
+ escaped_url
33
+ else
34
+ SOURCE_FROM_STDIN
35
+ end
36
+ end
37
+
38
+ def to_s
39
+ file? ? @source.path : @source
40
+ end
41
+
42
+ private
43
+
44
+ def escaped_url
45
+ url_needs_escaping? ? URI::DEFAULT_PARSER.escape(@source) : @source
46
+ end
47
+
48
+ def url_needs_escaping?
49
+ URI::DEFAULT_PARSER.escape(URI::DEFAULT_PARSER.unescape(@source)) != @source
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ class MyPDFKit
4
+ VERSION = '0.1.0.0'
5
+ end
@@ -0,0 +1,82 @@
1
+ # frozen_string_literal: true
2
+
3
+ class MyPDFKit
4
+ class WkHTMLtoPDF
5
+ attr_reader :options
6
+ # Pulled from:
7
+ # https://github.com/wkhtmltopdf/wkhtmltopdf/blob/6a57c1449797d6cb915921fb747f3ac36199241f/docs/usage/wkhtmltopdf.txt#L104
8
+ REPEATABLE_OPTIONS = %w[--allow --bypass-proxy-for --cookie --custom-header --post --post-file --run-script --replace].freeze
9
+ SPECIAL_OPTIONS = %w[cover toc].freeze
10
+
11
+ def initialize(options)
12
+ @options = options
13
+ end
14
+
15
+ def normalize_options
16
+ # TODO(cdwort,sigmavirus24): Make this method idempotent in a future release so it can be called repeatedly
17
+ normalized_options = {}
18
+
19
+ @options.each do |key, value|
20
+ next if !value
21
+
22
+ # The actual option for wkhtmltopdf
23
+ normalized_key = normalize_arg key
24
+ normalized_key = "--#{normalized_key}" unless SPECIAL_OPTIONS.include?(normalized_key)
25
+
26
+ # If the option is repeatable, attempt to normalize all values
27
+ if REPEATABLE_OPTIONS.include? normalized_key
28
+ normalize_repeatable_value(normalized_key, value) do |normalized_unique_key, normalized_value|
29
+ normalized_options[normalized_unique_key] = normalized_value
30
+ end
31
+ else # Otherwise, just normalize it like usual
32
+ normalized_options[normalized_key] = normalize_value(value)
33
+ end
34
+ end
35
+
36
+ @options = normalized_options
37
+ end
38
+
39
+ def error_handling?
40
+ @options.key?('--ignore-load-errors') ||
41
+ # wkhtmltopdf v0.10.0 beta4 replaces ignore-load-errors with load-error-handling
42
+ # https://code.google.com/p/wkhtmltopdf/issues/detail?id=55
43
+ %w(skip ignore).include?(@options['--load-error-handling'])
44
+ end
45
+
46
+ def options_for_command
47
+ @options.to_a.flatten.compact
48
+ end
49
+
50
+ private
51
+
52
+ def normalize_arg(arg)
53
+ arg.to_s.downcase.gsub(/[^a-z0-9]/,'-')
54
+ end
55
+
56
+ def normalize_value(value)
57
+ case value
58
+ when nil
59
+ nil
60
+ when TrueClass, 'true' #ie, ==true, see http://www.ruby-doc.org/core-1.9.3/TrueClass.html
61
+ nil
62
+ when Hash
63
+ value.to_a.flatten.collect{|x| normalize_value(x)}.compact
64
+ when Array
65
+ value.flatten.collect{|x| x.to_s}
66
+ else
67
+ value.to_s
68
+ end
69
+ end
70
+
71
+ def normalize_repeatable_value(option_name, value)
72
+ case value
73
+ when Hash, Array
74
+ value.each do |(key, val)|
75
+ yield [[option_name, normalize_value(key)], normalize_value(val)]
76
+ end
77
+ else
78
+ yield [[option_name, normalize_value(value)], nil]
79
+ end
80
+ end
81
+ end
82
+ end
data/lib/my_pdfkit.rb ADDED
@@ -0,0 +1,10 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'my_pdfkit/source'
4
+ require 'my_pdfkit/pdfkit'
5
+ require 'my_pdfkit/middleware'
6
+ require 'my_pdfkit/html_preprocessor'
7
+ require 'my_pdfkit/os'
8
+ require 'my_pdfkit/configuration'
9
+ require 'my_pdfkit/wkhtmltopdf'
10
+ require 'my_pdfkit/version'
data/my_pdfkit.gemspec ADDED
@@ -0,0 +1,33 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require "my_pdfkit/version"
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = "my_pdfkit"
7
+ s.version = MyPDFKit::VERSION
8
+ s.platform = Gem::Platform::RUBY
9
+ s.authors = ["Jared Pace", "Relevance"]
10
+ s.email = ["jared@codewordstudios.com"]
11
+ s.homepage = "https://github.com/pdfkit/pdfkit"
12
+ s.summary = "HTML+CSS -> PDF"
13
+ s.description = "Uses wkhtmltopdf to create PDFs using HTML"
14
+ s.license = "MIT"
15
+
16
+ s.files = `git ls-files`.split("\n")
17
+ p `git ls-files`.split("\n")
18
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
19
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
20
+ s.require_paths = ["lib"]
21
+
22
+ s.required_ruby_version = ">= 2.5"
23
+
24
+ s.requirements << "wkhtmltopdf"
25
+
26
+ # Development Dependencies
27
+ s.add_development_dependency(%q<activesupport>, [">= 4.1.11"])
28
+ s.add_development_dependency(%q<mocha>, [">= 0.9.10"])
29
+ s.add_development_dependency(%q<rack-test>, [">= 0.5.6"])
30
+ s.add_development_dependency(%q<rake>, [">= 12.3.3"])
31
+ s.add_development_dependency(%q<rdoc>, [">= 4.0.1"])
32
+ s.add_development_dependency(%q<rspec>, ["~> 3.0"])
33
+ end