grover 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: cf861057bca3ab2b9c59bf0c52b329bb70cd1e5c
4
- data.tar.gz: 910e24710cfd955ca779b600680ce7b50bc8458b
3
+ metadata.gz: ec9fbd6ca4d7cd40c4a8d3a0da2627bb599bbe20
4
+ data.tar.gz: 169db229a825e41c40914bedb74057bb9935de4a
5
5
  SHA512:
6
- metadata.gz: 6c101f3d8bf4b9cdefa35ed26994e24498c9eede363ed798450b1cc849b288547b34e28ded856387bda3e3d20335b76aed0547527baaa5632a6e3164f5bbc1a8
7
- data.tar.gz: 3fbc34457ebb9a151c85e01217ddc884baaac14c36532c0eec77856557188b7351389f6ec73cdc400ad2379589fccf158fd0b20c5f2626f27de5e850ffa7ff31
6
+ metadata.gz: c7a89544aefdf652135d81e4d8f565bd63dc131604717c158affb77a97523abc5e5ea6192263842484727a6c617215ebf92b76fc662293ee4b861e127dd06f65
7
+ data.tar.gz: 757d7d1f0df9958f29cd6f5f52fb7d2a00424570ba8e3b231cd95de40b806ffd93c105948ed24948bdb7d05b7c95cb57d703e8a1e892e6508edf8c4565e58d1e
@@ -2,6 +2,7 @@ require 'grover/version'
2
2
 
3
3
  require 'grover/utils'
4
4
  require 'grover/processor'
5
+ require 'grover/html_preprocessor'
5
6
  require 'grover/middleware'
6
7
 
7
8
  #
@@ -0,0 +1,28 @@
1
+ class Grover
2
+ #
3
+ # Helper module for preparing HTML for conversion
4
+ #
5
+ # Sourced from the PDFKit project
6
+ # @see https://github.com/pdfkit/pdfkit
7
+ #
8
+ module HTMLPreprocessor
9
+ # Change relative paths to absolute, and relative protocols to absolute protocols
10
+ def self.process(html, root_url, protocol)
11
+ html = translate_relative_paths(html, root_url) if root_url
12
+ html = translate_relative_protocols(html, protocol) if protocol
13
+ html
14
+ end
15
+
16
+ def self.translate_relative_paths(html, root_url)
17
+ # Try out this regexp using rubular http://rubular.com/r/hiAxBNX7KE
18
+ html.gsub(%r{(href|src)=(['"])/([^/"']([^\"']*|[^"']*))?['"]}, "\\1=\\2#{root_url}\\3\\2")
19
+ end
20
+ private_class_method :translate_relative_paths
21
+
22
+ def self.translate_relative_protocols(body, protocol)
23
+ # Try out this regexp using rubular http://rubular.com/r/0Ohk0wFYxV
24
+ body.gsub(%r{(href|src)=(['"])//([^\"']*|[^"']*)['"]}, "\\1=\\2#{protocol}://\\3\\2")
25
+ end
26
+ private_class_method :translate_relative_protocols
27
+ end
28
+ end
@@ -2,6 +2,9 @@ class Grover
2
2
  #
3
3
  # Rack middleware for catching PDF requests and returning the upstream HTML as a PDF
4
4
  #
5
+ # Much of this code was sourced from the PDFKit project
6
+ # @see https://github.com/pdfkit/pdfkit
7
+ #
5
8
  class Middleware
6
9
  def initialize(app)
7
10
  @app = app
@@ -12,22 +15,12 @@ class Grover
12
15
  @request = Rack::Request.new(env)
13
16
  @render_pdf = false
14
17
 
15
- set_request_to_render_as_pdf(env) if render_as_pdf?
18
+ configure_env_for_pdf_request(env) if render_as_pdf?
16
19
  status, headers, response = @app.call(env)
17
20
 
18
- if rendering_pdf? && headers['Content-Type'] =~ %r{text/html|application/xhtml\+xml}
19
- body = response.respond_to?(:body) ? response.body : response.join
20
- body = body.join if body.is_a?(Array)
21
-
22
- body = Grover.new(body).to_pdf
23
- response = [body]
24
-
25
- # Do not cache PDFs
26
- headers.delete 'ETag'
27
- headers.delete 'Cache-Control'
28
-
29
- headers['Content-Length'] = (body.respond_to?(:bytesize) ? body.bytesize : body.size).to_s
30
- headers['Content-Type'] = 'application/pdf'
21
+ if rendering_pdf? && html_content?(headers)
22
+ response = convert_to_pdf response
23
+ update_headers headers, body
31
24
  end
32
25
 
33
26
  [status, headers, response]
@@ -43,13 +36,49 @@ class Grover
43
36
  @request.path.end_with?('.pdf')
44
37
  end
45
38
 
46
- def set_request_to_render_as_pdf(env)
39
+ def html_content?(headers)
40
+ headers['Content-Type'] =~ %r{text/html|application/xhtml\+xml}
41
+ end
42
+
43
+ def convert_to_pdf(response)
44
+ body = response.respond_to?(:body) ? response.body : response.join
45
+ body = body.join if body.is_a?(Array)
46
+
47
+ body = HTMLPreprocessor.process body, request_url, protocol
48
+ body = Grover.new(body).to_pdf
49
+ [body]
50
+ end
51
+
52
+ def update_headers(headers, body)
53
+ # Do not cache PDFs
54
+ headers.delete 'ETag'
55
+ headers.delete 'Cache-Control'
56
+
57
+ headers['Content-Length'] = (body.respond_to?(:bytesize) ? body.bytesize : body.size).to_s
58
+ headers['Content-Type'] = 'application/pdf'
59
+ end
60
+
61
+ def configure_env_for_pdf_request(env)
47
62
  @render_pdf = true
63
+
64
+ path = @request.path.sub(/\.pdf$/, '')
65
+ path = path.sub(@request.script_name, '')
66
+
67
+ %w[PATH_INFO REQUEST_URI].each { |e| env[e] = path }
68
+
48
69
  env['HTTP_ACCEPT'] = concat(env['HTTP_ACCEPT'], Rack::Mime.mime_type('.html'))
49
70
  end
50
71
 
51
72
  def concat(accepts, type)
52
73
  (accepts || '').split(',').unshift(type).compact.join(',')
53
74
  end
75
+
76
+ def request_url
77
+ "#{env['rack.url_scheme']}://#{env['HTTP_HOST']}/"
78
+ end
79
+
80
+ def protocol
81
+ env['rack.url_scheme']
82
+ end
54
83
  end
55
84
  end
@@ -21,7 +21,7 @@ class Grover
21
21
  if (url.match(/^http/i)) {
22
22
  await page.goto(url, { waitUntil: 'networkidle2' });
23
23
  } else {
24
- await page.setContent(url);
24
+ await page.goto(`data:text/html,${url}`, { waitUntil: 'networkidle0' });
25
25
  }
26
26
  return await page.pdf(options);
27
27
  } finally {
@@ -1,3 +1,3 @@
1
1
  class Grover
2
- VERSION = '0.2.0'.freeze
2
+ VERSION = '0.2.1'.freeze
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: grover
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Bromwich
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-08-22 00:00:00.000000000 Z
11
+ date: 2018-08-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: schmooze
@@ -102,6 +102,7 @@ extensions: []
102
102
  extra_rdoc_files: []
103
103
  files:
104
104
  - lib/grover.rb
105
+ - lib/grover/html_preprocessor.rb
105
106
  - lib/grover/middleware.rb
106
107
  - lib/grover/processor.rb
107
108
  - lib/grover/utils.rb