breezy_pdf 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,69 @@
1
+ # frozen_string_literal: true
2
+
3
+ module BreezyPDF::Intercept
4
+ # :nodoc
5
+ class PrivateUrl < Base
6
+ def call
7
+ BreezyPDF.logger.info(
8
+ "[BreezyPDF] Requesting render of #{public_url} with metadata: #{html_private_asset.metadata}"
9
+ )
10
+
11
+ response = BreezyPDF::RenderRequest.new(public_url, html_private_asset.metadata).submit
12
+
13
+ BreezyPDF.logger.info("[BreezyPDF] Redirect to pdf at #{response.download_url}")
14
+ [
15
+ 302,
16
+ { "Location" => response.download_url, "Content-Type" => "text/html", "Content-Length" => "0" },
17
+ []
18
+ ]
19
+ end
20
+
21
+ private
22
+
23
+ def public_url
24
+ @public_url ||= BreezyPDF::Uploads::Base.new(
25
+ html_private_asset.filename, html_private_asset.content_type, html_private_asset.file_path
26
+ ).public_url
27
+ end
28
+
29
+ def html_private_asset
30
+ @html_private_asset ||= BreezyPDF::PrivateAssets::HTML.new(base_url, body)
31
+ end
32
+
33
+ def status
34
+ @status ||= response[0]
35
+ end
36
+
37
+ def headers
38
+ @headers ||= response[1]
39
+ end
40
+
41
+ def body
42
+ @body ||= response[2].respond_to?(:body) ? response[2].body : response[2].join
43
+ end
44
+
45
+ def response
46
+ @response ||= app.call(doctored_env)
47
+ end
48
+
49
+ def doctored_env
50
+ env.dup.tap do |hash|
51
+ hash["PATH_INFO"] = path
52
+ end
53
+ end
54
+
55
+ def path
56
+ path = env["PATH_INFO"]
57
+
58
+ BreezyPDF.middleware_path_matchers.each do |regex|
59
+ path = path.gsub(regex, "")
60
+ end
61
+
62
+ path
63
+ end
64
+
65
+ def base_url
66
+ "#{env['rack.url_scheme']}://#{env['SERVER_NAME']}:#{env['SERVER_PORT']}"
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ module BreezyPDF::Intercept
4
+ # :nodoc
5
+ class PublicUrl < Base
6
+ def call
7
+ BreezyPDF.logger.info("[BreezyPDF] Requesting render of #{public_url}")
8
+ response = BreezyPDF::RenderRequest.new(public_url).submit
9
+
10
+ [
11
+ 302,
12
+ { "Location" => response.download_url, "Content-Type" => "text/html", "Content-Length" => "0" },
13
+ []
14
+ ]
15
+ end
16
+
17
+ private
18
+
19
+ def public_url
20
+ "#{env['rack.url_scheme']}://#{env['SERVER_NAME']}:#{env['SERVER_PORT']}" \
21
+ "#{path}?#{env['QUERY_STRING']}"
22
+ end
23
+
24
+ def path
25
+ path = env["PATH_INFO"]
26
+
27
+ BreezyPDF.middleware_path_matchers.each do |regex|
28
+ path = path.gsub(regex, "")
29
+ end
30
+
31
+ path
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,10 @@
1
+ # frozen_string_literal: true
2
+
3
+ module BreezyPDF
4
+ # :nodoc
5
+ module Intercept
6
+ autoload :Base, "breezy_pdf/intercept/base"
7
+ autoload :PublicUrl, "breezy_pdf/intercept/public_url"
8
+ autoload :PrivateUrl, "breezy_pdf/intercept/private_url"
9
+ end
10
+ end
@@ -0,0 +1,48 @@
1
+ # frozen_string_literal: true
2
+
3
+ module BreezyPDF
4
+ # Intercept a Rack request
5
+ class Interceptor
6
+ attr_reader :app, :env
7
+
8
+ def initialize(app, env)
9
+ @app = app
10
+ @env = env
11
+ end
12
+
13
+ def intercept!
14
+ if intercept?
15
+ BreezyPDF.logger.info("[BreezyPDF] Intercepting request for PDF rendering")
16
+ intercept.new(@app, @env).call
17
+ else
18
+ app.call(env)
19
+ end
20
+ end
21
+
22
+ private
23
+
24
+ def intercept?
25
+ get? && matching_uri?
26
+ end
27
+
28
+ def matching_uri?
29
+ matchers.any? { |regex| env["REQUEST_URI"].match?(regex) }
30
+ end
31
+
32
+ def get?
33
+ env["REQUEST_METHOD"].match?(/get/i)
34
+ end
35
+
36
+ def matchers
37
+ @matchers ||= BreezyPDF.middleware_path_matchers
38
+ end
39
+
40
+ def intercept
41
+ if BreezyPDF.treat_urls_as_private
42
+ BreezyPDF::Intercept::PrivateUrl
43
+ else
44
+ BreezyPDF::Intercept::PublicUrl
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+
3
+ module BreezyPDF
4
+ # :nodoc
5
+ class Middleware
6
+ def initialize(app, _options = {})
7
+ @app = app
8
+ end
9
+
10
+ def call(env)
11
+ Interceptor.new(@app, env).intercept!
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,47 @@
1
+ # frozen_string_literal: true
2
+
3
+ module BreezyPDF::PrivateAssets
4
+ # :nodoc
5
+ class Asset
6
+ def initialize(base_url, asset_path_or_url)
7
+ @base_url = base_url
8
+ @asset_path_or_url = asset_path_or_url
9
+ end
10
+
11
+ def content_type
12
+ io_object.content_type
13
+ end
14
+
15
+ def filename
16
+ @filename ||= URI(asset_url).path.split("/").last
17
+ end
18
+
19
+ def file_path
20
+ file.path
21
+ end
22
+
23
+ private
24
+
25
+ def file
26
+ @file ||= if io_object.is_a?(StringIO)
27
+ Tempfile.new.tap do |f|
28
+ f.write io_object.to_s
29
+ end
30
+ else
31
+ io_object
32
+ end
33
+ end
34
+
35
+ def io_object
36
+ @io_object ||= open(asset_url)
37
+ end
38
+
39
+ def asset_url
40
+ @asset_url ||= if URI(@asset_path_or_url).host
41
+ @asset_path_or_url
42
+ else
43
+ "#{@base_url}#{@asset_path_or_url}"
44
+ end
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,66 @@
1
+ # frozen_string_literal: true
2
+
3
+ module BreezyPDF::PrivateAssets
4
+ # :nodoc
5
+ class HTML
6
+ def initialize(base_url, html_fragment)
7
+ @base_url = base_url
8
+ @html_fragment = html_fragment
9
+ end
10
+
11
+ def content_type
12
+ "text/html"
13
+ end
14
+
15
+ def filename
16
+ @filename ||= "#{SecureRandom.hex}.html"
17
+ end
18
+
19
+ def file_path
20
+ file.path
21
+ end
22
+
23
+ def metadata
24
+ @metadata ||= BreezyPDF.extract_metadata ? Hash[*meta_tags] : {}
25
+ end
26
+
27
+ private
28
+
29
+ def file
30
+ @file ||= Tempfile.new(filename).tap do |f|
31
+ f.write(modified_html_fragment)
32
+ f.rewind
33
+ end
34
+ end
35
+
36
+ def modified_html_fragment
37
+ @modified_html_fragment ||= modify_html_fragment!
38
+ end
39
+
40
+ def modify_html_fragment!
41
+ if BreezyPDF.filter_elements
42
+ @html_fragment = BreezyPDF::HTML::Strip.new(
43
+ @html_fragment
44
+ ).stripped_fragment
45
+ end
46
+
47
+ if BreezyPDF.upload_assets
48
+ @html_fragment = BreezyPDF::HTML::Publicize.new(
49
+ @base_url, @html_fragment
50
+ ).public_fragment
51
+ end
52
+
53
+ @html_fragment
54
+ end
55
+
56
+ def parsed_document
57
+ @parsed_document ||= Nokogiri::HTML(modified_html_fragment)
58
+ end
59
+
60
+ def meta_tags
61
+ @meta_tags ||= parsed_document.css(%(meta[name^="breezy-pdf-"])).collect do |tag|
62
+ [tag["name"].gsub(/^breezy\-pdf\-/, ""), tag["content"]]
63
+ end.flatten
64
+ end
65
+ end
66
+ end
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+
3
+ module BreezyPDF
4
+ # :nodoc
5
+ module PrivateAssets
6
+ autoload :HTML, "breezy_pdf/private_assets/html"
7
+ autoload :Asset, "breezy_pdf/private_assets/asset"
8
+ end
9
+ end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ module BreezyPDF
4
+ # Request conversion of a public URL to PDF
5
+ class RenderRequest
6
+ def initialize(public_url, metadata = nil)
7
+ @public_url = public_url
8
+ @metadata = metadata
9
+ end
10
+
11
+ def submit
12
+ client.post("/pdf/public_urls", url_to_render: @public_url, metadata: @metadata.to_h)
13
+ end
14
+
15
+ private
16
+
17
+ def client
18
+ @client ||= Client.new
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,44 @@
1
+ # frozen_string_literal: true
2
+
3
+ module BreezyPDF
4
+ # API HTTP Response
5
+ class Response
6
+ def initialize(http_response)
7
+ @http_response = http_response
8
+ BreezyPDF.logger.fatal("[BreezyPDF] Network request failed: #{@http_response.body}") if failure?
9
+ end
10
+
11
+ def success?
12
+ code >= 200 && code < 400
13
+ end
14
+
15
+ def failure?
16
+ !success?
17
+ end
18
+
19
+ def method_missing(method, *_args, &_blk)
20
+ if body.keys.include?(method.to_s)
21
+ body[method.to_s]
22
+ else
23
+ super
24
+ end
25
+ end
26
+
27
+ def respond_to_missing?(method, *)
28
+ body.keys.include?(method.to_s)
29
+ end
30
+
31
+ private
32
+
33
+ def code
34
+ @code ||= @http_response.code.to_i
35
+ end
36
+
37
+ def body
38
+ @body ||= JSON.parse(@http_response.body)
39
+ rescue JSON::ParserError => e
40
+ BreezyPDF.logger.fatal("[BreezyPDF] Server responded with invalid JSON: #{e}")
41
+ raise e
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,76 @@
1
+ # frozen_string_literal: true
2
+
3
+ module BreezyPDF::Uploads
4
+ # Upload an asset
5
+ class Base
6
+ def initialize(filename, content_type, file_path)
7
+ @filename = filename
8
+ @content_type = content_type
9
+ @file_path = file_path
10
+ end
11
+
12
+ def public_url
13
+ BreezyPDF.logger.info(%([BreezyPDF] Starting private asset upload for #{@filename}))
14
+ upload!
15
+ complete_upload!
16
+
17
+ BreezyPDF.logger.info(%([BreezyPDF] Private asset upload for #{@filename} completed))
18
+ resource.presigned_url
19
+ end
20
+
21
+ private
22
+
23
+ def client
24
+ @client ||= BreezyPDF::Client.new
25
+ end
26
+
27
+ def file
28
+ @file ||= File.open(@file_path)
29
+ end
30
+
31
+ def complete_upload!
32
+ BreezyPDF.logger.info(%([BreezyPDF] Initiating completion of private asset upload for #{@filename}))
33
+ client.put("/uploads/#{resource.id}", {})
34
+ rescue Net::HTTP => error
35
+ BreezyPDF.logger.fatal(%([BreezyPDF] Unable to complete private asset upload for #{@filename}))
36
+ raise CompletionError, error.message
37
+ end
38
+
39
+ def upload!
40
+ BreezyPDF.logger.info(%([BreezyPDF] Initiating private asset upload of #{@filename}))
41
+ upload_response = upload_http.request(upload_request)
42
+
43
+ return if upload_response.code.to_i == 204
44
+
45
+ raise UploadError, "HTTP Status: #{upload_response.code}: #{upload_response.body}"
46
+ rescue Net::HTTP => error
47
+ BreezyPDF.logger.fatal(%([BreezyPDF] Unable to upload private asset #{@filename}))
48
+ raise UploadError, error.message
49
+ end
50
+
51
+ def resource
52
+ BreezyPDF.logger.info(%([BreezyPDF] Initiating presign of private asset upload #{@filename}))
53
+ @resource ||= client.post("/uploads", filename: @filename, size: file.size, content_type: @content_type)
54
+ rescue Net::HTTP => error
55
+ BreezyPDF.logger.fatal(%([BreezyPDF] Unable to presign private asset upload for #{@filename}))
56
+ raise PresignError, error.message
57
+ end
58
+
59
+ def upload_uri
60
+ @upload_uri ||= URI.parse(resource.presigned_upload_url)
61
+ end
62
+
63
+ def upload_http
64
+ @upload_http ||= Net::HTTP.new(upload_uri.host, upload_uri.port).tap { |http| http.use_ssl = true }
65
+ end
66
+
67
+ def upload_request
68
+ @upload_request ||= Net::HTTP::Post.new(upload_uri.request_uri).tap do |post|
69
+ file_form_data = FileFormData.new(resource.presigned_upload_fields, @content_type, @filename, file)
70
+
71
+ post.content_type = "multipart/form-data; boundary=#{file_form_data.boundary}"
72
+ post.body = file_form_data.data
73
+ end
74
+ end
75
+ end
76
+ end
@@ -0,0 +1,60 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "securerandom"
4
+
5
+ module BreezyPDF::Uploads
6
+ # Compose the form data for an HTTP POST of a file
7
+ class FileFormData
8
+ def initialize(fields, content_type, filename, file)
9
+ @fields = fields
10
+ @content_type = content_type
11
+ @filename = filename
12
+ @file = file
13
+ end
14
+
15
+ def data
16
+ @data ||= [
17
+ field_data,
18
+ file_data,
19
+ closing_data
20
+ ].join
21
+ end
22
+
23
+ def boundary
24
+ @boundary ||= SecureRandom.hex
25
+ end
26
+
27
+ private
28
+
29
+ def field_data
30
+ field_data = []
31
+
32
+ @fields.each do |key, value|
33
+ field_data << "--#{boundary}\r\n"
34
+ field_data << %(Content-Disposition: form-data; name="#{key}"\r\n\r\n)
35
+ field_data << value.chomp + "\r\n"
36
+ end
37
+
38
+ field_data.join
39
+ end
40
+
41
+ def file_data
42
+ BreezyPDF.logger.info(%([BreezyPDF] Compressing file contents for #{@filename}))
43
+ [
44
+ "--#{boundary}\r\n",
45
+ %(Content-Disposition: form-data; name="file"; filename="#{@filename}"\r\n),
46
+ "Content-Type: #{@content_type}\r\n\r\n",
47
+ BreezyPDF::Gzip.compress(@file.read)
48
+ ].join
49
+ end
50
+
51
+ def closing_data
52
+ [
53
+ "--#{boundary}\r\n",
54
+ %(Content-Disposition: form-data; name="submit"\r\n\r\n),
55
+ %(Upload) + "\r\n",
56
+ "--#{boundary}--"
57
+ ].join
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ module BreezyPDF
4
+ # :nodoc
5
+ module Uploads
6
+ autoload :Base, "breezy_pdf/uploads/base"
7
+ autoload :FileFormData, "breezy_pdf/uploads/file_form_data"
8
+
9
+ PresignError = Class.new(BreezyPDFError)
10
+ UploadError = Class.new(BreezyPDFError)
11
+ CompletionError = Class.new(BreezyPDFError)
12
+ end
13
+ end
@@ -0,0 +1,49 @@
1
+ # frozen_string_literal: true
2
+
3
+ module BreezyPDF
4
+ # Utility methods
5
+ module Util
6
+ def mattr_reader(*syms)
7
+ syms.each do |sym|
8
+ raise NameError, "invalid attribute name: #{sym}" unless /\A[_A-Za-z]\w*\z/.match?(sym)
9
+ class_eval(<<-EOS, __FILE__, __LINE__ + 1)
10
+ @@#{sym} = nil unless defined? @@#{sym}
11
+ def self.#{sym}
12
+ @@#{sym}
13
+ end
14
+ EOS
15
+
16
+ class_eval(<<-EOS, __FILE__, __LINE__ + 1)
17
+ def #{sym}
18
+ @@#{sym}
19
+ end
20
+ EOS
21
+ class_variable_set("@@#{sym}", yield) if block_given?
22
+ end
23
+ end
24
+
25
+ def mattr_writer(*syms)
26
+ syms.each do |sym|
27
+ raise NameError, "invalid attribute name: #{sym}" unless /\A[_A-Za-z]\w*\z/.match?(sym)
28
+ class_eval(<<-EOS, __FILE__, __LINE__ + 1)
29
+ @@#{sym} = nil unless defined? @@#{sym}
30
+ def self.#{sym}=(obj)
31
+ @@#{sym} = obj
32
+ end
33
+ EOS
34
+
35
+ class_eval(<<-EOS, __FILE__, __LINE__ + 1)
36
+ def #{sym}=(obj)
37
+ @@#{sym} = obj
38
+ end
39
+ EOS
40
+ send("#{sym}=", yield) if block_given?
41
+ end
42
+ end
43
+
44
+ def mattr_accessor(*syms, &blk)
45
+ mattr_reader(*syms, &blk)
46
+ mattr_writer(*syms)
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module BreezyPDF
4
+ VERSION = "0.0.1"
5
+ end
data/lib/breezy_pdf.rb ADDED
@@ -0,0 +1,78 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "uri"
4
+ require "net/http"
5
+ require "json"
6
+ require "tempfile"
7
+ require "securerandom"
8
+ require "zlib"
9
+ require "stringio"
10
+ require "open-uri"
11
+
12
+ require "nokogiri"
13
+ require "concurrent"
14
+
15
+ require "breezy_pdf/util"
16
+ require "breezy_pdf/gzip"
17
+
18
+ # :nodoc
19
+ module BreezyPDF
20
+ extend BreezyPDF::Util
21
+
22
+ autoload :VERSION, "breezy_pdf/version"
23
+ autoload :RenderRequest, "breezy_pdf/render_request"
24
+ autoload :Client, "breezy_pdf/client"
25
+ autoload :Response, "breezy_pdf/response"
26
+ autoload :Middleware, "breezy_pdf/middleware"
27
+ autoload :Interceptor, "breezy_pdf/interceptor"
28
+ autoload :Uploads, "breezy_pdf/uploads"
29
+ autoload :Intercept, "breezy_pdf/intercept"
30
+ autoload :PrivateAssets, "breezy_pdf/private_assets"
31
+ autoload :HTML, "breezy_pdf/html"
32
+
33
+ BreezyPDFError = Class.new(StandardError)
34
+
35
+ mattr_accessor :secret_api_key
36
+ @@secret_api_key = nil
37
+
38
+ mattr_accessor :base_url
39
+ @@base_url = "https://www.breezypdf.com/api"
40
+
41
+ mattr_accessor :middleware_path_matchers
42
+ @@middleware_path_matchers = [/\.pdf$/]
43
+
44
+ mattr_accessor :treat_urls_as_private
45
+ @@treat_urls_as_private = true
46
+
47
+ mattr_accessor :upload_assets
48
+ @@upload_assets = true
49
+
50
+ mattr_accessor :asset_selectors
51
+ @@asset_selectors = %w(img script link[rel="stylesheet"])
52
+
53
+ mattr_accessor :asset_path_matchers
54
+ @@asset_path_matchers = {
55
+ href: %r{^\/\w+},
56
+ src: %r{^\/\w+}
57
+ }
58
+
59
+ mattr_accessor :extract_metadata
60
+ @@extract_metadata = true
61
+
62
+ mattr_accessor :threads
63
+ @@threads = 1
64
+
65
+ mattr_accessor :filter_elements
66
+ @@filter_elements = false
67
+
68
+ mattr_accessor :filter_elements_selectors
69
+ @@filtered_element_selectors = %w[.breezy-pdf-remove]
70
+
71
+ mattr_accessor :logger
72
+ @@logger = Logger.new(STDOUT)
73
+ @@logger.level = Logger::FATAL
74
+
75
+ def self.setup
76
+ yield self
77
+ end
78
+ end
data/test.html ADDED
@@ -0,0 +1,8 @@
1
+ <html>
2
+ <head>
3
+ <title>Hello</title>
4
+ </head>
5
+ <body>
6
+ <h1>Hey there!</h1>
7
+ </body>
8
+ </html>