breezy_pdf 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,69 @@
1
+ # frozen_string_literal: true
2
+
3
+ module BreezyPDF::Intercept
4
+ # :nodoc
5
+ class PrivateUrl < Base
6
+ def call
7
+ BreezyPDF.logger.info(
8
+ "[BreezyPDF] Requesting render of #{public_url} with metadata: #{html_private_asset.metadata}"
9
+ )
10
+
11
+ response = BreezyPDF::RenderRequest.new(public_url, html_private_asset.metadata).submit
12
+
13
+ BreezyPDF.logger.info("[BreezyPDF] Redirect to pdf at #{response.download_url}")
14
+ [
15
+ 302,
16
+ { "Location" => response.download_url, "Content-Type" => "text/html", "Content-Length" => "0" },
17
+ []
18
+ ]
19
+ end
20
+
21
+ private
22
+
23
+ def public_url
24
+ @public_url ||= BreezyPDF::Uploads::Base.new(
25
+ html_private_asset.filename, html_private_asset.content_type, html_private_asset.file_path
26
+ ).public_url
27
+ end
28
+
29
+ def html_private_asset
30
+ @html_private_asset ||= BreezyPDF::PrivateAssets::HTML.new(base_url, body)
31
+ end
32
+
33
+ def status
34
+ @status ||= response[0]
35
+ end
36
+
37
+ def headers
38
+ @headers ||= response[1]
39
+ end
40
+
41
+ def body
42
+ @body ||= response[2].respond_to?(:body) ? response[2].body : response[2].join
43
+ end
44
+
45
+ def response
46
+ @response ||= app.call(doctored_env)
47
+ end
48
+
49
+ def doctored_env
50
+ env.dup.tap do |hash|
51
+ hash["PATH_INFO"] = path
52
+ end
53
+ end
54
+
55
+ def path
56
+ path = env["PATH_INFO"]
57
+
58
+ BreezyPDF.middleware_path_matchers.each do |regex|
59
+ path = path.gsub(regex, "")
60
+ end
61
+
62
+ path
63
+ end
64
+
65
+ def base_url
66
+ "#{env['rack.url_scheme']}://#{env['SERVER_NAME']}:#{env['SERVER_PORT']}"
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ module BreezyPDF::Intercept
4
+ # :nodoc
5
+ class PublicUrl < Base
6
+ def call
7
+ BreezyPDF.logger.info("[BreezyPDF] Requesting render of #{public_url}")
8
+ response = BreezyPDF::RenderRequest.new(public_url).submit
9
+
10
+ [
11
+ 302,
12
+ { "Location" => response.download_url, "Content-Type" => "text/html", "Content-Length" => "0" },
13
+ []
14
+ ]
15
+ end
16
+
17
+ private
18
+
19
+ def public_url
20
+ "#{env['rack.url_scheme']}://#{env['SERVER_NAME']}:#{env['SERVER_PORT']}" \
21
+ "#{path}?#{env['QUERY_STRING']}"
22
+ end
23
+
24
+ def path
25
+ path = env["PATH_INFO"]
26
+
27
+ BreezyPDF.middleware_path_matchers.each do |regex|
28
+ path = path.gsub(regex, "")
29
+ end
30
+
31
+ path
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,10 @@
1
+ # frozen_string_literal: true
2
+
3
+ module BreezyPDF
4
+ # :nodoc
5
+ module Intercept
6
+ autoload :Base, "breezy_pdf/intercept/base"
7
+ autoload :PublicUrl, "breezy_pdf/intercept/public_url"
8
+ autoload :PrivateUrl, "breezy_pdf/intercept/private_url"
9
+ end
10
+ end
@@ -0,0 +1,48 @@
1
+ # frozen_string_literal: true
2
+
3
+ module BreezyPDF
4
+ # Intercept a Rack request
5
+ class Interceptor
6
+ attr_reader :app, :env
7
+
8
+ def initialize(app, env)
9
+ @app = app
10
+ @env = env
11
+ end
12
+
13
+ def intercept!
14
+ if intercept?
15
+ BreezyPDF.logger.info("[BreezyPDF] Intercepting request for PDF rendering")
16
+ intercept.new(@app, @env).call
17
+ else
18
+ app.call(env)
19
+ end
20
+ end
21
+
22
+ private
23
+
24
+ def intercept?
25
+ get? && matching_uri?
26
+ end
27
+
28
+ def matching_uri?
29
+ matchers.any? { |regex| env["REQUEST_URI"].match?(regex) }
30
+ end
31
+
32
+ def get?
33
+ env["REQUEST_METHOD"].match?(/get/i)
34
+ end
35
+
36
+ def matchers
37
+ @matchers ||= BreezyPDF.middleware_path_matchers
38
+ end
39
+
40
+ def intercept
41
+ if BreezyPDF.treat_urls_as_private
42
+ BreezyPDF::Intercept::PrivateUrl
43
+ else
44
+ BreezyPDF::Intercept::PublicUrl
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+
3
+ module BreezyPDF
4
+ # :nodoc
5
+ class Middleware
6
+ def initialize(app, _options = {})
7
+ @app = app
8
+ end
9
+
10
+ def call(env)
11
+ Interceptor.new(@app, env).intercept!
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,47 @@
1
+ # frozen_string_literal: true
2
+
3
+ module BreezyPDF::PrivateAssets
4
+ # :nodoc
5
+ class Asset
6
+ def initialize(base_url, asset_path_or_url)
7
+ @base_url = base_url
8
+ @asset_path_or_url = asset_path_or_url
9
+ end
10
+
11
+ def content_type
12
+ io_object.content_type
13
+ end
14
+
15
+ def filename
16
+ @filename ||= URI(asset_url).path.split("/").last
17
+ end
18
+
19
+ def file_path
20
+ file.path
21
+ end
22
+
23
+ private
24
+
25
+ def file
26
+ @file ||= if io_object.is_a?(StringIO)
27
+ Tempfile.new.tap do |f|
28
+ f.write io_object.to_s
29
+ end
30
+ else
31
+ io_object
32
+ end
33
+ end
34
+
35
+ def io_object
36
+ @io_object ||= open(asset_url)
37
+ end
38
+
39
+ def asset_url
40
+ @asset_url ||= if URI(@asset_path_or_url).host
41
+ @asset_path_or_url
42
+ else
43
+ "#{@base_url}#{@asset_path_or_url}"
44
+ end
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,66 @@
1
+ # frozen_string_literal: true
2
+
3
+ module BreezyPDF::PrivateAssets
4
+ # :nodoc
5
+ class HTML
6
+ def initialize(base_url, html_fragment)
7
+ @base_url = base_url
8
+ @html_fragment = html_fragment
9
+ end
10
+
11
+ def content_type
12
+ "text/html"
13
+ end
14
+
15
+ def filename
16
+ @filename ||= "#{SecureRandom.hex}.html"
17
+ end
18
+
19
+ def file_path
20
+ file.path
21
+ end
22
+
23
+ def metadata
24
+ @metadata ||= BreezyPDF.extract_metadata ? Hash[*meta_tags] : {}
25
+ end
26
+
27
+ private
28
+
29
+ def file
30
+ @file ||= Tempfile.new(filename).tap do |f|
31
+ f.write(modified_html_fragment)
32
+ f.rewind
33
+ end
34
+ end
35
+
36
+ def modified_html_fragment
37
+ @modified_html_fragment ||= modify_html_fragment!
38
+ end
39
+
40
+ def modify_html_fragment!
41
+ if BreezyPDF.filter_elements
42
+ @html_fragment = BreezyPDF::HTML::Strip.new(
43
+ @html_fragment
44
+ ).stripped_fragment
45
+ end
46
+
47
+ if BreezyPDF.upload_assets
48
+ @html_fragment = BreezyPDF::HTML::Publicize.new(
49
+ @base_url, @html_fragment
50
+ ).public_fragment
51
+ end
52
+
53
+ @html_fragment
54
+ end
55
+
56
+ def parsed_document
57
+ @parsed_document ||= Nokogiri::HTML(modified_html_fragment)
58
+ end
59
+
60
+ def meta_tags
61
+ @meta_tags ||= parsed_document.css(%(meta[name^="breezy-pdf-"])).collect do |tag|
62
+ [tag["name"].gsub(/^breezy\-pdf\-/, ""), tag["content"]]
63
+ end.flatten
64
+ end
65
+ end
66
+ end
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+
3
+ module BreezyPDF
4
+ # :nodoc
5
+ module PrivateAssets
6
+ autoload :HTML, "breezy_pdf/private_assets/html"
7
+ autoload :Asset, "breezy_pdf/private_assets/asset"
8
+ end
9
+ end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ module BreezyPDF
4
+ # Request conversion of a public URL to PDF
5
+ class RenderRequest
6
+ def initialize(public_url, metadata = nil)
7
+ @public_url = public_url
8
+ @metadata = metadata
9
+ end
10
+
11
+ def submit
12
+ client.post("/pdf/public_urls", url_to_render: @public_url, metadata: @metadata.to_h)
13
+ end
14
+
15
+ private
16
+
17
+ def client
18
+ @client ||= Client.new
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,44 @@
1
+ # frozen_string_literal: true
2
+
3
+ module BreezyPDF
4
+ # API HTTP Response
5
+ class Response
6
+ def initialize(http_response)
7
+ @http_response = http_response
8
+ BreezyPDF.logger.fatal("[BreezyPDF] Network request failed: #{@http_response.body}") if failure?
9
+ end
10
+
11
+ def success?
12
+ code >= 200 && code < 400
13
+ end
14
+
15
+ def failure?
16
+ !success?
17
+ end
18
+
19
+ def method_missing(method, *_args, &_blk)
20
+ if body.keys.include?(method.to_s)
21
+ body[method.to_s]
22
+ else
23
+ super
24
+ end
25
+ end
26
+
27
+ def respond_to_missing?(method, *)
28
+ body.keys.include?(method.to_s)
29
+ end
30
+
31
+ private
32
+
33
+ def code
34
+ @code ||= @http_response.code.to_i
35
+ end
36
+
37
+ def body
38
+ @body ||= JSON.parse(@http_response.body)
39
+ rescue JSON::ParserError => e
40
+ BreezyPDF.logger.fatal("[BreezyPDF] Server responded with invalid JSON: #{e}")
41
+ raise e
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,76 @@
1
+ # frozen_string_literal: true
2
+
3
+ module BreezyPDF::Uploads
4
+ # Upload an asset
5
+ class Base
6
+ def initialize(filename, content_type, file_path)
7
+ @filename = filename
8
+ @content_type = content_type
9
+ @file_path = file_path
10
+ end
11
+
12
+ def public_url
13
+ BreezyPDF.logger.info(%([BreezyPDF] Starting private asset upload for #{@filename}))
14
+ upload!
15
+ complete_upload!
16
+
17
+ BreezyPDF.logger.info(%([BreezyPDF] Private asset upload for #{@filename} completed))
18
+ resource.presigned_url
19
+ end
20
+
21
+ private
22
+
23
+ def client
24
+ @client ||= BreezyPDF::Client.new
25
+ end
26
+
27
+ def file
28
+ @file ||= File.open(@file_path)
29
+ end
30
+
31
+ def complete_upload!
32
+ BreezyPDF.logger.info(%([BreezyPDF] Initiating completion of private asset upload for #{@filename}))
33
+ client.put("/uploads/#{resource.id}", {})
34
+ rescue Net::HTTP => error
35
+ BreezyPDF.logger.fatal(%([BreezyPDF] Unable to complete private asset upload for #{@filename}))
36
+ raise CompletionError, error.message
37
+ end
38
+
39
+ def upload!
40
+ BreezyPDF.logger.info(%([BreezyPDF] Initiating private asset upload of #{@filename}))
41
+ upload_response = upload_http.request(upload_request)
42
+
43
+ return if upload_response.code.to_i == 204
44
+
45
+ raise UploadError, "HTTP Status: #{upload_response.code}: #{upload_response.body}"
46
+ rescue Net::HTTP => error
47
+ BreezyPDF.logger.fatal(%([BreezyPDF] Unable to upload private asset #{@filename}))
48
+ raise UploadError, error.message
49
+ end
50
+
51
+ def resource
52
+ BreezyPDF.logger.info(%([BreezyPDF] Initiating presign of private asset upload #{@filename}))
53
+ @resource ||= client.post("/uploads", filename: @filename, size: file.size, content_type: @content_type)
54
+ rescue Net::HTTP => error
55
+ BreezyPDF.logger.fatal(%([BreezyPDF] Unable to presign private asset upload for #{@filename}))
56
+ raise PresignError, error.message
57
+ end
58
+
59
+ def upload_uri
60
+ @upload_uri ||= URI.parse(resource.presigned_upload_url)
61
+ end
62
+
63
+ def upload_http
64
+ @upload_http ||= Net::HTTP.new(upload_uri.host, upload_uri.port).tap { |http| http.use_ssl = true }
65
+ end
66
+
67
+ def upload_request
68
+ @upload_request ||= Net::HTTP::Post.new(upload_uri.request_uri).tap do |post|
69
+ file_form_data = FileFormData.new(resource.presigned_upload_fields, @content_type, @filename, file)
70
+
71
+ post.content_type = "multipart/form-data; boundary=#{file_form_data.boundary}"
72
+ post.body = file_form_data.data
73
+ end
74
+ end
75
+ end
76
+ end
@@ -0,0 +1,60 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "securerandom"
4
+
5
+ module BreezyPDF::Uploads
6
+ # Compose the form data for an HTTP POST of a file
7
+ class FileFormData
8
+ def initialize(fields, content_type, filename, file)
9
+ @fields = fields
10
+ @content_type = content_type
11
+ @filename = filename
12
+ @file = file
13
+ end
14
+
15
+ def data
16
+ @data ||= [
17
+ field_data,
18
+ file_data,
19
+ closing_data
20
+ ].join
21
+ end
22
+
23
+ def boundary
24
+ @boundary ||= SecureRandom.hex
25
+ end
26
+
27
+ private
28
+
29
+ def field_data
30
+ field_data = []
31
+
32
+ @fields.each do |key, value|
33
+ field_data << "--#{boundary}\r\n"
34
+ field_data << %(Content-Disposition: form-data; name="#{key}"\r\n\r\n)
35
+ field_data << value.chomp + "\r\n"
36
+ end
37
+
38
+ field_data.join
39
+ end
40
+
41
+ def file_data
42
+ BreezyPDF.logger.info(%([BreezyPDF] Compressing file contents for #{@filename}))
43
+ [
44
+ "--#{boundary}\r\n",
45
+ %(Content-Disposition: form-data; name="file"; filename="#{@filename}"\r\n),
46
+ "Content-Type: #{@content_type}\r\n\r\n",
47
+ BreezyPDF::Gzip.compress(@file.read)
48
+ ].join
49
+ end
50
+
51
+ def closing_data
52
+ [
53
+ "--#{boundary}\r\n",
54
+ %(Content-Disposition: form-data; name="submit"\r\n\r\n),
55
+ %(Upload) + "\r\n",
56
+ "--#{boundary}--"
57
+ ].join
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ module BreezyPDF
4
+ # :nodoc
5
+ module Uploads
6
+ autoload :Base, "breezy_pdf/uploads/base"
7
+ autoload :FileFormData, "breezy_pdf/uploads/file_form_data"
8
+
9
+ PresignError = Class.new(BreezyPDFError)
10
+ UploadError = Class.new(BreezyPDFError)
11
+ CompletionError = Class.new(BreezyPDFError)
12
+ end
13
+ end
@@ -0,0 +1,49 @@
1
+ # frozen_string_literal: true
2
+
3
+ module BreezyPDF
4
+ # Utility methods
5
+ module Util
6
+ def mattr_reader(*syms)
7
+ syms.each do |sym|
8
+ raise NameError, "invalid attribute name: #{sym}" unless /\A[_A-Za-z]\w*\z/.match?(sym)
9
+ class_eval(<<-EOS, __FILE__, __LINE__ + 1)
10
+ @@#{sym} = nil unless defined? @@#{sym}
11
+ def self.#{sym}
12
+ @@#{sym}
13
+ end
14
+ EOS
15
+
16
+ class_eval(<<-EOS, __FILE__, __LINE__ + 1)
17
+ def #{sym}
18
+ @@#{sym}
19
+ end
20
+ EOS
21
+ class_variable_set("@@#{sym}", yield) if block_given?
22
+ end
23
+ end
24
+
25
+ def mattr_writer(*syms)
26
+ syms.each do |sym|
27
+ raise NameError, "invalid attribute name: #{sym}" unless /\A[_A-Za-z]\w*\z/.match?(sym)
28
+ class_eval(<<-EOS, __FILE__, __LINE__ + 1)
29
+ @@#{sym} = nil unless defined? @@#{sym}
30
+ def self.#{sym}=(obj)
31
+ @@#{sym} = obj
32
+ end
33
+ EOS
34
+
35
+ class_eval(<<-EOS, __FILE__, __LINE__ + 1)
36
+ def #{sym}=(obj)
37
+ @@#{sym} = obj
38
+ end
39
+ EOS
40
+ send("#{sym}=", yield) if block_given?
41
+ end
42
+ end
43
+
44
+ def mattr_accessor(*syms, &blk)
45
+ mattr_reader(*syms, &blk)
46
+ mattr_writer(*syms)
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module BreezyPDF
4
+ VERSION = "0.0.1"
5
+ end
data/lib/breezy_pdf.rb ADDED
@@ -0,0 +1,78 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "uri"
4
+ require "net/http"
5
+ require "json"
6
+ require "tempfile"
7
+ require "securerandom"
8
+ require "zlib"
9
+ require "stringio"
10
+ require "open-uri"
11
+
12
+ require "nokogiri"
13
+ require "concurrent"
14
+
15
+ require "breezy_pdf/util"
16
+ require "breezy_pdf/gzip"
17
+
18
+ # :nodoc
19
+ module BreezyPDF
20
+ extend BreezyPDF::Util
21
+
22
+ autoload :VERSION, "breezy_pdf/version"
23
+ autoload :RenderRequest, "breezy_pdf/render_request"
24
+ autoload :Client, "breezy_pdf/client"
25
+ autoload :Response, "breezy_pdf/response"
26
+ autoload :Middleware, "breezy_pdf/middleware"
27
+ autoload :Interceptor, "breezy_pdf/interceptor"
28
+ autoload :Uploads, "breezy_pdf/uploads"
29
+ autoload :Intercept, "breezy_pdf/intercept"
30
+ autoload :PrivateAssets, "breezy_pdf/private_assets"
31
+ autoload :HTML, "breezy_pdf/html"
32
+
33
+ BreezyPDFError = Class.new(StandardError)
34
+
35
+ mattr_accessor :secret_api_key
36
+ @@secret_api_key = nil
37
+
38
+ mattr_accessor :base_url
39
+ @@base_url = "https://www.breezypdf.com/api"
40
+
41
+ mattr_accessor :middleware_path_matchers
42
+ @@middleware_path_matchers = [/\.pdf$/]
43
+
44
+ mattr_accessor :treat_urls_as_private
45
+ @@treat_urls_as_private = true
46
+
47
+ mattr_accessor :upload_assets
48
+ @@upload_assets = true
49
+
50
+ mattr_accessor :asset_selectors
51
+ @@asset_selectors = %w(img script link[rel="stylesheet"])
52
+
53
+ mattr_accessor :asset_path_matchers
54
+ @@asset_path_matchers = {
55
+ href: %r{^\/\w+},
56
+ src: %r{^\/\w+}
57
+ }
58
+
59
+ mattr_accessor :extract_metadata
60
+ @@extract_metadata = true
61
+
62
+ mattr_accessor :threads
63
+ @@threads = 1
64
+
65
+ mattr_accessor :filter_elements
66
+ @@filter_elements = false
67
+
68
+ mattr_accessor :filter_elements_selectors
69
+ @@filtered_element_selectors = %w[.breezy-pdf-remove]
70
+
71
+ mattr_accessor :logger
72
+ @@logger = Logger.new(STDOUT)
73
+ @@logger.level = Logger::FATAL
74
+
75
+ def self.setup
76
+ yield self
77
+ end
78
+ end
data/test.html ADDED
@@ -0,0 +1,8 @@
1
+ <html>
2
+ <head>
3
+ <title>Hello</title>
4
+ </head>
5
+ <body>
6
+ <h1>Hey there!</h1>
7
+ </body>
8
+ </html>