shrimp-orangejulius 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: bcb06ce8ced640f4b8f73bd159d06f784a28faeb
4
+ data.tar.gz: 33f37c32163be0f438763dbcd6f99e377d410ddb
5
+ SHA512:
6
+ metadata.gz: f6586310d4c7bf3302b8ef1b36fdd8632cc76fbb6bd0eedaf01d3e116819036b0de8960add26c36de16d836bcff900fa8ad356888fcaad33a3154753bc289819
7
+ data.tar.gz: 77f7ef5d6fc83dcc5144e0d43cb8d7224da878252606997ce6a3b1e10befd1ecf12bd58757b65c0809070417179226d541bdd1284e87eac210a8c60c4a5d1e00
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/.travis.yml ADDED
@@ -0,0 +1,9 @@
1
+ language: ruby
2
+ rvm:
3
+ - 1.9.3
4
+ - rbx-2
5
+ - 2.0.0
6
+ - 2.1.1
7
+ matrix:
8
+ allow_failures:
9
+ - rvm: rbx-2
data/ChangeLog.md ADDED
@@ -0,0 +1,9 @@
1
+ ## ChangeLog
2
+
3
+ 2012-12-18: Version 0.0.2
4
+ Improved Error handling
5
+ Improved Readme
6
+ Minor Bug fixes
7
+
8
+ 2012-12-17: Version 0.0.1
9
+ Initial launch.
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in shrimp.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2012 adeven GmbH Manuel Kniep
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,191 @@
1
+ # Shrimp
2
+ [![Build Status](https://travis-ci.org/adjust/shrimp.png?branch=master)](https://travis-ci.org/adjust/shrimp)
3
+ Creates PDFs from URLs using phantomjs
4
+
5
+ Read our [blogpost](http://big-elephants.com/2012-12/pdf-rendering-with-phantomjs/) about how it works.
6
+
7
+ ## Installation
8
+
9
+ Add this line to your application's Gemfile:
10
+
11
+ gem 'shrimp'
12
+
13
+ And then execute:
14
+
15
+ $ bundle
16
+
17
+ Or install it yourself as:
18
+
19
+ $ gem install shrimp
20
+
21
+
22
+ ### Phantomjs
23
+
24
+ See http://phantomjs.org/download.html on how to install phantomjs
25
+
26
+ ## Usage
27
+
28
+ ```
29
+ require 'shrimp'
30
+ url = 'http://www.google.com'
31
+ options = { :margin => "1cm"}
32
+ Shrimp::Phantom.new(url, options).to_pdf("~/output.pdf")
33
+ ```
34
+ ## Configuration
35
+
36
+ ```
37
+ Shrimp.configure do |config|
38
+
39
+ # The path to the phantomjs executable
40
+ # defaults to `where phantomjs`
41
+ # config.phantomjs = '/usr/local/bin/phantomjs'
42
+
43
+ # the default pdf output format
44
+ # e.g. "5in*7.5in", "10cm*20cm", "A4", "Letter"
45
+ # config.format = 'A4'
46
+
47
+ # the default margin
48
+ # config.margin = '1cm'
49
+
50
+ # the zoom factor
51
+ # config.zoom = 1
52
+
53
+ # the page orientation 'portrait' or 'landscape'
54
+ # config.orientation = 'portrait'
55
+
56
+ # a temporary dir used to store tempfiles
57
+ # config.tmpdir = Dir.tmpdir
58
+
59
+ # the default rendering time in ms
60
+ # increase if you need to render very complex pages
61
+ # config.rendering_time = 1000
62
+
63
+ # change the viewport size. If you rendering pages that have
64
+ # flexible page width and height then you may need to set this
65
+ # to enforce a specific size
66
+ # config.viewport_width = 600
67
+ # config.viewport_height = 600
68
+
69
+ # the timeout for the phantomjs rendering process in ms
70
+ # this needs always to be higher than rendering_time
71
+ # config.rendering_timeout = 90000
72
+
73
+ # maximum number of redirects to follow
74
+ # by default Shrimp does not follow any redirects which means that
75
+ # if the server responds with non HTTP 200 an error will be returned
76
+ # config.max_redirect_count = 0
77
+
78
+ # the path to a json configuration file for command-line options
79
+ # config.command_config_file = "#{Rails.root.join('config', 'shrimp', 'config.json')}"
80
+ end
81
+ ```
82
+
83
+ ### Command Configuration
84
+
85
+ ```
86
+ {
87
+ "diskCache": false,
88
+ "ignoreSslErrors": false,
89
+ "loadImages": true,
90
+ "outputEncoding": "utf8",
91
+ "webSecurity": true
92
+ }
93
+ ```
94
+
95
+ ## Middleware
96
+
97
+ Shrimp comes with a middleware that allows users to get a PDF view of any page on your site by appending .pdf to the URL.
98
+
99
+ ### Middleware Setup
100
+
101
+ **Non-Rails Rack apps**
102
+
103
+ # in config.ru
104
+ require 'shrimp'
105
+ use Shrimp::Middleware
106
+
107
+ **Rails apps**
108
+
109
+ # in application.rb(Rails3) or environment.rb(Rails2)
110
+ require 'shrimp'
111
+ config.middleware.use Shrimp::Middleware
112
+
113
+ **With Shrimp options**
114
+
115
+ # options will be passed to Shrimp::Phantom.new
116
+ config.middleware.use Shrimp::Middleware, :margin => '0.5cm', :format => 'Letter'
117
+
118
+ **With conditions to limit routes that can be generated in pdf**
119
+
120
+ # conditions can be regexps (either one or an array)
121
+ config.middleware.use Shrimp::Middleware, {}, :only => %r[^/public]
122
+ config.middleware.use Shrimp::Middleware, {}, :only => [%r[^/invoice], %r[^/public]]
123
+
124
+ # conditions can be strings (either one or an array)
125
+ config.middleware.use Shrimp::Middleware, {}, :only => '/public'
126
+ config.middleware.use Shrimp::Middleware, {}, :only => ['/invoice', '/public']
127
+
128
+ # conditions can be regexps (either one or an array)
129
+ config.middleware.use Shrimp::Middleware, {}, :except => [%r[^/prawn], %r[^/secret]]
130
+
131
+ # conditions can be strings (either one or an array)
132
+ config.middleware.use Shrimp::Middleware, {}, :except => ['/secret']
133
+
134
+
135
+ ### Polling
136
+
137
+ To avoid deadlocks Shrimp::Middleware renders the pdf in a separate process retuning a 503 Retry-After response Header.
138
+ you can setup the polling interval and the polling offset in seconds.
139
+
140
+ config.middleware.use Shrimp::Middleware, :polling_interval => 1, :polling_offset => 5
141
+
142
+ ### Caching
143
+
144
+ To avoid rendering the page on each request you can setup some the cache ttl in seconds
145
+
146
+ config.middleware.use Shrimp::Middleware, :cache_ttl => 3600, :out_path => "my/pdf/store"
147
+
148
+
149
+ ### Ajax requests
150
+
151
+ To include some fancy Ajax stuff with jquery
152
+
153
+ ```js
154
+
155
+ var url = '/my_page.pdf'
156
+ var statusCodes = {
157
+ 200: function() {
158
+ return window.location.assign(url);
159
+ },
160
+ 504: function() {
161
+ console.log("Shit's being wired")
162
+ },
163
+ 503: function(jqXHR, textStatus, errorThrown) {
164
+ var wait;
165
+ wait = parseInt(jqXHR.getResponseHeader('Retry-After'));
166
+ return setTimeout(function() {
167
+ return $.ajax({
168
+ url: url,
169
+ statusCode: statusCodes
170
+ });
171
+ }, wait * 1000);
172
+ }
173
+ }
174
+ $.ajax({
175
+ url: url,
176
+ statusCode: statusCodes
177
+ })
178
+
179
+ ```
180
+
181
+ ## Contributing
182
+
183
+ 1. Fork it
184
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
185
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
186
+ 4. Push to the branch (`git push origin my-new-feature`)
187
+ 5. Create new Pull Request
188
+
189
+ ## Copyright
190
+ Shrimp is Copyright © 2012 adeven (Manuel Kniep). It is free software, and may be redistributed under the terms
191
+ specified in the LICENSE file.
data/Rakefile ADDED
@@ -0,0 +1,5 @@
1
+ require "bundler/gem_tasks"
2
+ require 'rspec/core/rake_task'
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+ task default: :spec
data/lib/shrimp.rb ADDED
@@ -0,0 +1,5 @@
1
+ require 'shrimp/version'
2
+ require 'shrimp/source'
3
+ require 'shrimp/phantom'
4
+ require 'shrimp/middleware'
5
+ require 'shrimp/configuration'
@@ -0,0 +1,7 @@
1
+ {
2
+ "diskCache": false,
3
+ "ignoreSslErrors": false,
4
+ "loadImages": true,
5
+ "outputEncoding": "utf8",
6
+ "webSecurity": true
7
+ }
@@ -0,0 +1,55 @@
1
+ require 'tmpdir'
2
+
3
+ module Shrimp
4
+ class Configuration
5
+ attr_accessor :default_options
6
+ attr_writer :phantomjs
7
+
8
+ [:format, :margin, :zoom, :orientation, :tmpdir, :rendering_timeout, :rendering_time, :command_config_file, :viewport_width, :viewport_height, :max_redirect_count].each do |m|
9
+ define_method("#{m}=") do |val|
10
+ @default_options[m]=val
11
+ end
12
+ end
13
+
14
+ def initialize
15
+ @default_options = {
16
+ :format => 'A4',
17
+ :margin => '1cm',
18
+ :zoom => 1,
19
+ :orientation => 'portrait',
20
+ :tmpdir => Dir.tmpdir,
21
+ :rendering_timeout => 90000,
22
+ :rendering_time => 1000,
23
+ :command_config_file => File.expand_path('../config.json', __FILE__),
24
+ :viewport_width => 600,
25
+ :viewport_height => 600,
26
+ :max_redirect_count => 0
27
+ }
28
+ end
29
+
30
+ def phantomjs
31
+ @phantomjs ||= (defined?(Bundler::GemfileError) ? `bundle exec which phantomjs` : `which phantomjs`).chomp
32
+ end
33
+ end
34
+
35
+ class << self
36
+ attr_accessor :configuration
37
+ end
38
+
39
+ # Configure Phantomjs someplace sensible,
40
+ # like config/initializers/phantomjs.rb
41
+ #
42
+ # @example
43
+ # Shrimp.configure do |config|
44
+ # config.phantomjs = '/usr/local/bin/phantomjs'
45
+ # config.format = 'Letter'
46
+ # end
47
+
48
+ def self.configuration
49
+ @configuration ||= Configuration.new
50
+ end
51
+
52
+ def self.configure
53
+ yield(configuration)
54
+ end
55
+ end
@@ -0,0 +1,175 @@
1
+ module Shrimp
2
+ class Middleware
3
+ def initialize(app, options = { }, conditions = { })
4
+ @app = app
5
+ @options = options
6
+ @conditions = conditions
7
+ @options[:polling_interval] ||= 1
8
+ @options[:polling_offset] ||= 1
9
+ @options[:cache_ttl] ||= 1
10
+ @options[:request_timeout] ||= @options[:polling_interval] * 10
11
+ end
12
+
13
+ def call(env)
14
+ @request = Rack::Request.new(env)
15
+ if render_as_pdf? #&& headers['Content-Type'] =~ /text\/html|application\/xhtml\+xml/
16
+ if already_rendered? && (up_to_date?(@options[:cache_ttl]) || @options[:cache_ttl] == 0)
17
+ if File.size(render_to) == 0
18
+ File.delete(render_to)
19
+ remove_rendering_flag
20
+ return error_response
21
+ end
22
+ return ready_response if env['HTTP_X_REQUESTED_WITH']
23
+ file = File.open(render_to, "rb")
24
+ body = file.read
25
+ file.close
26
+ File.delete(render_to) if @options[:cache_ttl] == 0
27
+ remove_rendering_flag
28
+ response = [body]
29
+ headers = { }
30
+ headers["Content-Length"] = (body.respond_to?(:bytesize) ? body.bytesize : body.size).to_s
31
+ headers["Content-Type"] = "application/pdf"
32
+ [200, headers, response]
33
+ else
34
+ if rendering_in_progress?
35
+ if rendering_timed_out?
36
+ remove_rendering_flag
37
+ error_response
38
+ else
39
+ reload_response(@options[:polling_interval])
40
+ end
41
+ else
42
+ File.delete(render_to) if already_rendered?
43
+ set_rendering_flag
44
+ fire_phantom
45
+ reload_response(@options[:polling_offset])
46
+ end
47
+ end
48
+ else
49
+ @app.call(env)
50
+ end
51
+ end
52
+
53
+ private
54
+
55
+ # Private: start phantom rendering in a separate process
56
+ def fire_phantom
57
+ Process::detach fork { Phantom.new(@request.url.sub(%r{\.pdf$}, ''), @options, @request.cookies).to_pdf(render_to) }
58
+ end
59
+
60
+ def render_to
61
+ file_name = Digest::MD5.hexdigest(@request.path) + ".pdf"
62
+ file_path = @options[:out_path]
63
+ "#{file_path}/#{file_name}"
64
+ end
65
+
66
+ def already_rendered?
67
+ File.exists?(render_to)
68
+ end
69
+
70
+ def up_to_date?(ttl = 30)
71
+ (Time.now - File.new(render_to).mtime) <= ttl
72
+ end
73
+
74
+
75
+ def remove_rendering_flag
76
+ @request.session["phantom-rendering"] ||={ }
77
+ @request.session["phantom-rendering"].delete(render_to)
78
+ end
79
+
80
+ def set_rendering_flag
81
+ @request.session["phantom-rendering"] ||={ }
82
+ @request.session["phantom-rendering"][render_to] = Time.now
83
+ end
84
+
85
+ def rendering_timed_out?
86
+ Time.now - @request.session["phantom-rendering"][render_to] > @options[:request_timeout]
87
+ end
88
+
89
+ def rendering_in_progress?
90
+ @request.session["phantom-rendering"]||={ }
91
+ @request.session["phantom-rendering"][render_to]
92
+ end
93
+
94
+ def render_as_pdf?
95
+ request_path_is_pdf = !!@request.path.match(%r{\.pdf$})
96
+
97
+ if request_path_is_pdf && @conditions[:only]
98
+ rules = [@conditions[:only]].flatten
99
+ rules.any? do |pattern|
100
+ if pattern.is_a?(Regexp)
101
+ @request.path =~ pattern
102
+ else
103
+ @request.path[0, pattern.length] == pattern
104
+ end
105
+ end
106
+ elsif request_path_is_pdf && @conditions[:except]
107
+ rules = [@conditions[:except]].flatten
108
+ rules.map do |pattern|
109
+ if pattern.is_a?(Regexp)
110
+ return false if @request.path =~ pattern
111
+ else
112
+ return false if @request.path[0, pattern.length] == pattern
113
+ end
114
+ end
115
+ return true
116
+ else
117
+ request_path_is_pdf
118
+ end
119
+ end
120
+
121
+ def concat(accepts, type)
122
+ (accepts || '').split(',').unshift(type).compact.join(',')
123
+ end
124
+
125
+ def reload_response(interval=1)
126
+ body = <<-HTML.gsub(/[ \n]+/, ' ').strip
127
+ <html>
128
+ <head>
129
+ </head>
130
+ <body onLoad="setTimeout(function(){ window.location.reload()}, #{interval * 1000});">
131
+ <h2>Preparing pdf... </h2>
132
+ </body>
133
+ </ html>
134
+ HTML
135
+ headers = { }
136
+ headers["Content-Length"] = body.size.to_s
137
+ headers["Content-Type"] = "text/html"
138
+ headers["Retry-After"] = interval.to_s
139
+
140
+ [503, headers, [body]]
141
+ end
142
+
143
+ def ready_response
144
+ body = <<-HTML.gsub(/[ \n]+/, ' ').strip
145
+ <html>
146
+ <head>
147
+ </head>
148
+ <body>
149
+ <a href="#{@request.path}">PDF ready here</a>
150
+ </body>
151
+ </ html>
152
+ HTML
153
+ headers = { }
154
+ headers["Content-Length"] = body.size.to_s
155
+ headers["Content-Type"] = "text/html"
156
+ [200, headers, [body]]
157
+ end
158
+
159
+ def error_response
160
+ body = <<-HTML.gsub(/[ \n]+/, ' ').strip
161
+ <html>
162
+ <head>
163
+ </head>
164
+ <body>
165
+ <h2>Sorry request timed out... </h2>
166
+ </body>
167
+ </ html>
168
+ HTML
169
+ headers = { }
170
+ headers["Content-Length"] = body.size.to_s
171
+ headers["Content-Type"] = "text/html"
172
+ [504, headers, [body]]
173
+ end
174
+ end
175
+ end
@@ -0,0 +1,153 @@
1
+ require 'uri'
2
+ require 'json'
3
+ require 'shellwords'
4
+
5
+ module Shrimp
6
+ class NoExecutableError < StandardError
7
+ def initialize
8
+ msg = "No phantomjs executable found at #{Shrimp.configuration.phantomjs}\n"
9
+ msg << ">> Please install phantomjs - http://phantomjs.org/download.html"
10
+ super(msg)
11
+ end
12
+ end
13
+
14
+ class ImproperSourceError < StandardError
15
+ def initialize(msg = nil)
16
+ super("Improper Source: #{msg}")
17
+ end
18
+ end
19
+
20
+ class RenderingError < StandardError
21
+ def initialize(msg = nil)
22
+ super("Rendering Error: #{msg}")
23
+ end
24
+ end
25
+
26
+ class Phantom
27
+ attr_accessor :source, :configuration, :outfile
28
+ attr_reader :options, :cookies, :result, :error
29
+ SCRIPT_FILE = File.expand_path('../rasterize.js', __FILE__)
30
+
31
+ # Public: Runs the phantomjs binary
32
+ #
33
+ # Returns the stdout output of phantomjs
34
+ def run
35
+ @error = nil
36
+ @result = `#{cmd}`
37
+ unless $?.exitstatus == 0
38
+ @error = @result
39
+ @result = nil
40
+ end
41
+ @result
42
+ end
43
+
44
+ def run!
45
+ @error = nil
46
+ @result = `#{cmd}`
47
+ unless $?.exitstatus == 0
48
+ @error = @result
49
+ @result = nil
50
+ raise RenderingError.new(@error)
51
+ end
52
+ @result
53
+ end
54
+
55
+ # Public: Returns the phantom rasterize command
56
+ def cmd
57
+ cookie_file = dump_cookies
58
+ format, zoom, margin, orientation = options[:format], options[:zoom], options[:margin], options[:orientation]
59
+ rendering_time, timeout = options[:rendering_time], options[:rendering_timeout]
60
+ viewport_width, viewport_height = options[:viewport_width], options[:viewport_height]
61
+ max_redirect_count = options[:max_redirect_count]
62
+ @outfile ||= "#{options[:tmpdir]}/#{Digest::MD5.hexdigest((Time.now.to_i + rand(9001)).to_s)}.pdf"
63
+ command_config_file = "--config=#{options[:command_config_file]}"
64
+ [
65
+ Shrimp.configuration.phantomjs,
66
+ command_config_file,
67
+ SCRIPT_FILE,
68
+ @source.to_s.shellescape,
69
+ @outfile,
70
+ format,
71
+ zoom,
72
+ margin,
73
+ orientation,
74
+ cookie_file,
75
+ rendering_time,
76
+ timeout,
77
+ viewport_width,
78
+ viewport_height,
79
+ max_redirect_count
80
+ ].join(" ")
81
+ end
82
+
83
+ # Public: initializes a new Phantom Object
84
+ #
85
+ # url_or_file - The url of the html document to render
86
+ # options - a hash with options for rendering
87
+ # * format - the paper format for the output eg: "5in*7.5in", "10cm*20cm", "A4", "Letter"
88
+ # * zoom - the viewport zoom factor
89
+ # * margin - the margins for the pdf
90
+ # * command_config_file - the path to a json configuration file for command-line options
91
+ # cookies - hash with cookies to use for rendering
92
+ # outfile - optional path for the output file a Tempfile will be created if not given
93
+ #
94
+ # Returns self
95
+ def initialize(url_or_file, options = { }, cookies={ }, outfile = nil)
96
+ @source = Source.new(url_or_file)
97
+ @options = Shrimp.configuration.default_options.merge(options)
98
+ @cookies = cookies
99
+ @outfile = File.expand_path(outfile) if outfile
100
+ raise NoExecutableError.new unless File.exists?(Shrimp.configuration.phantomjs)
101
+ end
102
+
103
+ # Public: renders to pdf
104
+ # path - the destination path defaults to outfile
105
+ #
106
+ # Returns the path to the pdf file
107
+ def to_pdf(path=nil)
108
+ @outfile = File.expand_path(path) if path
109
+ self.run
110
+ @outfile
111
+ end
112
+
113
+ # Public: renders to pdf
114
+ # path - the destination path defaults to outfile
115
+ #
116
+ # Returns a File Handle of the Resulting pdf
117
+ def to_file(path=nil)
118
+ self.to_pdf(path)
119
+ File.new(@outfile)
120
+ end
121
+
122
+ # Public: renders to pdf
123
+ # path - the destination path defaults to outfile
124
+ #
125
+ # Returns the binary string of the pdf
126
+ def to_string(path=nil)
127
+ File.open(self.to_pdf(path)).read
128
+ end
129
+
130
+ def to_pdf!(path=nil)
131
+ @outfile = File.expand_path(path) if path
132
+ self.run!
133
+ @outfile
134
+ end
135
+
136
+ def to_file!(path=nil)
137
+ self.to_pdf!(path)
138
+ File.new(@outfile)
139
+ end
140
+
141
+ def to_string!(path=nil)
142
+ File.open(self.to_pdf!(path)).read
143
+ end
144
+
145
+ private
146
+
147
+ def dump_cookies
148
+ host = @source.url? ? URI::parse(@source.to_s).host : "/"
149
+ json = @cookies.inject([]) { |a, (k, v)| a.push({ :name => k, :value => v, :domain => host }); a }.to_json
150
+ File.open("#{options[:tmpdir]}/#{rand}.cookies", 'w') { |f| f.puts json; f }.path
151
+ end
152
+ end
153
+ end
@@ -0,0 +1,143 @@
1
+ var
2
+ webpage = require('webpage'),
3
+ fs = require('fs'),
4
+ system = require('system'),
5
+ margin = system.args[5] || '0cm',
6
+ orientation = system.args[6] || 'portrait',
7
+ cookie_file = system.args[7],
8
+ render_time = system.args[8] || 10000 ,
9
+ time_out = system.args[9] || 90000 ,
10
+ viewport_width = system.args[10] || 600,
11
+ viewport_height = system.args[11] || 600,
12
+ redirects_num = system.args[12] || 0,
13
+ cookies = {},
14
+ address, output, size;
15
+
16
+ function error(msg) {
17
+ msg = msg || 'Unknown error';
18
+ console.log(msg);
19
+ phantom.exit(1);
20
+ throw msg;
21
+ }
22
+
23
+ function print_usage() {
24
+ console.log('Usage: rasterize.js URL filename [paperwidth*paperheight|paperformat] [zoom] [margin] [orientation] [cookie_file] [render_time] [time_out] [viewport_width] [viewport_height] [max_redirects_count]');
25
+ console.log(' paper (pdf output) examples: "5in*7.5in", "10cm*20cm", "A4", "Letter"');
26
+ }
27
+
28
+ window.setTimeout(function () {
29
+ error("Shit's being weird no result within: " + time_out + "ms");
30
+ }, time_out);
31
+
32
+ function renderUrl(url, output, options) {
33
+ options = options || {};
34
+
35
+ var statusCode,
36
+ page = webpage.create();
37
+
38
+ for (var k in options) {
39
+ if (options.hasOwnProperty(k)) {
40
+ page[k] = options[k];
41
+ }
42
+ }
43
+
44
+ // determine the statusCode
45
+ page.onResourceReceived = function (resource) {
46
+ if (resource.url == url) {
47
+ statusCode = resource.status;
48
+ }
49
+ };
50
+
51
+ page.onResourceError = function (resourceErrorOrId, errorString) {
52
+ //phantomjs 1.9.1 and above uses a resourceError object
53
+ if(typeof resourceErrorOrId == Object) {
54
+ resourceError = resourceErrorOrId;
55
+ error(resourceError.errorString + ' (URL: ' + resourceError.url + ')');
56
+ //phantomjs 1.9.0 and below pass the request id and the error string
57
+ } else {
58
+ error(errorString)
59
+ }
60
+ };
61
+
62
+ page.onNavigationRequested = function (redirect_url, type, willNavigate, main) {
63
+ if (main) {
64
+ if (redirect_url !== url) {
65
+ page.close();
66
+
67
+ if (redirects_num-- >= 0) {
68
+ renderUrl(redirect_url, output, options);
69
+ } else {
70
+ error(url + ' redirects to ' + redirect_url + ' after maximum number of redirects reached');
71
+ }
72
+ }
73
+ }
74
+ };
75
+
76
+ page.open(url, function (status) {
77
+ if (status !== 'success' || (statusCode != 200 && statusCode != null)) {
78
+ if (fs.exists(output)) {
79
+ fs.remove(output);
80
+ }
81
+ try {
82
+ fs.touch(output);
83
+ } catch (e) {
84
+ console.log(e);
85
+ }
86
+
87
+ error('Unable to load the URL: ' + url + ' (HTTP ' + statusCode + ')');
88
+ } else {
89
+ window.setTimeout(function () {
90
+ page.render(output + '_tmp.pdf');
91
+
92
+ if (fs.exists(output)) {
93
+ fs.remove(output);
94
+ }
95
+
96
+ try {
97
+ fs.move(output + '_tmp.pdf', output);
98
+ } catch (e) {
99
+ error(e);
100
+ }
101
+ console.log('Rendered to: ' + output, new Date().getTime());
102
+ phantom.exit(0);
103
+ }, render_time);
104
+ }
105
+ });
106
+ }
107
+
108
+ if (cookie_file) {
109
+ try {
110
+ f = fs.open(cookie_file, "r");
111
+ cookies = JSON.parse(f.read());
112
+ fs.remove(cookie_file);
113
+ } catch (e) {
114
+ console.log(e);
115
+ }
116
+ phantom.cookiesEnabled = true;
117
+ phantom.cookies = cookies;
118
+ }
119
+
120
+ if (system.args.length < 3 || system.args.length > 13) {
121
+ print_usage() && phantom.exit(2);
122
+ } else {
123
+ address = system.args[1];
124
+ output = system.args[2];
125
+
126
+ page_options = {
127
+ viewportSize: {
128
+ width: viewport_width,
129
+ height: viewport_height
130
+ }
131
+ };
132
+
133
+ if (system.args.length > 3 && system.args[2].substr(-4) === ".pdf") {
134
+ size = system.args[3].split('*');
135
+ page_options.paperSize = size.length === 2 ? { width:size[0], height:size[1], margin:'0px' }
136
+ : { format:system.args[3], orientation:orientation, margin:margin };
137
+ }
138
+ if (system.args.length > 4) {
139
+ page_options.zoomFactor = system.args[4];
140
+ }
141
+
142
+ renderUrl(address, output, page_options);
143
+ }
@@ -0,0 +1,25 @@
1
+ require 'uri'
2
+ module Shrimp
3
+ class Source
4
+ def initialize(url_or_file)
5
+ @source = url_or_file
6
+ raise ImproperSourceError.new unless url? || file?
7
+ end
8
+
9
+ def url?
10
+ @source.is_a?(String) && @source.match(URI::regexp)
11
+ end
12
+
13
+ def file?
14
+ @source.kind_of?(File)
15
+ end
16
+
17
+ def html?
18
+ !(url? || file?)
19
+ end
20
+
21
+ def to_s
22
+ file? ? @source.path : @source
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,3 @@
1
+ module Shrimp
2
+ VERSION = "0.0.5"
3
+ end
data/shrimp.gemspec ADDED
@@ -0,0 +1,27 @@
1
+ # -*- encoding: utf-8 -*-
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'shrimp/version'
5
+
6
+ Gem::Specification.new do |gem|
7
+ gem.name = "shrimp-orangejulius"
8
+ gem.version = Shrimp::VERSION
9
+ gem.authors = ["Manuel Kniep"]
10
+ gem.email = %w(manuel@adeven.com)
11
+ gem.description = %q{html to pdf with phantomjs}
12
+ gem.summary = %q{a phantomjs based pdf renderer}
13
+ gem.homepage = "http://github.com/adeven/shrimp"
14
+ gem.files = `git ls-files`.split($/)
15
+ gem.files.reject! { |fn| fn.include? "script" }
16
+ gem.executables = gem.files.grep(%r{^bin/}).map { |f| File.basename(f) }
17
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
18
+ gem.require_paths = %w(lib)
19
+ gem.requirements << 'phantomjs, v1.6 or greater'
20
+ gem.add_runtime_dependency "json"
21
+
22
+ # Developmnet Dependencies
23
+ gem.add_development_dependency(%q<rake>, [">=0.9.2"])
24
+ gem.add_development_dependency(%q<rspec>, [">= 2.2.0"])
25
+ gem.add_development_dependency(%q<rack-test>, [">= 0.5.6"])
26
+ gem.add_development_dependency(%q<rack>, ["= 1.4.1"])
27
+ end
@@ -0,0 +1,123 @@
1
+ require 'spec_helper'
2
+
3
+ def app;
4
+ Rack::Lint.new(@app)
5
+ end
6
+
7
+ def options
8
+ { :margin => "1cm", :out_path => Dir.tmpdir,
9
+ :polling_offset => 10, :polling_interval => 1, :cache_ttl => 3600,
10
+ :request_timeout => 1 }
11
+ end
12
+
13
+ def mock_app(options = { }, conditions = { })
14
+ main_app = lambda { |env|
15
+ headers = { 'Content-Type' => "text/html" }
16
+ [200, headers, ['Hello world!']]
17
+ }
18
+
19
+ @middleware = Shrimp::Middleware.new(main_app, options, conditions)
20
+ @app = Rack::Session::Cookie.new(@middleware, :key => 'rack.session')
21
+ end
22
+
23
+
24
+ describe Shrimp::Middleware do
25
+ before { mock_app(options) }
26
+
27
+ context "matching pdf" do
28
+ it "should render as pdf" do
29
+ get '/test.pdf'
30
+ @middleware.send(:'render_as_pdf?').should be true
31
+ end
32
+ it "should return 503 the first time" do
33
+ get '/test.pdf'
34
+ last_response.status.should eq 503
35
+ last_response.header["Retry-After"].should eq "10"
36
+ end
37
+
38
+ it "should return 503 the with polling interval the second time" do
39
+ get '/test.pdf'
40
+ get '/test.pdf'
41
+ last_response.status.should eq 503
42
+ last_response.header["Retry-After"].should eq "1"
43
+ end
44
+
45
+ it "should set render to to outpath" do
46
+ get '/test.pdf'
47
+ @middleware.send(:render_to).should match (Regexp.new("^#{options[:out_path]}"))
48
+ end
49
+
50
+ it "should return 504 on timeout" do
51
+ get '/test.pdf'
52
+ sleep 1
53
+ get '/test.pdf'
54
+ last_response.status.should eq 504
55
+ end
56
+
57
+ it "should retry rendering after timeout" do
58
+ get '/test.pdf'
59
+ sleep 1
60
+ get '/test.pdf'
61
+ get '/test.pdf'
62
+ last_response.status.should eq 503
63
+ end
64
+
65
+ it "should return a pdf with 200 after rendering" do
66
+ mock_file = double(File, :read => "Hello World", :close => true, :mtime => Time.now)
67
+ File.should_receive(:'exists?').and_return true
68
+ File.should_receive(:'size').and_return 1000
69
+ File.should_receive(:'open').and_return mock_file
70
+ File.should_receive(:'new').and_return mock_file
71
+ get '/test.pdf'
72
+ last_response.status.should eq 200
73
+ last_response.body.should eq "Hello World"
74
+ end
75
+
76
+
77
+ end
78
+ context "not matching pdf" do
79
+ it "should skip pdf rendering" do
80
+ get 'http://www.example.org/test'
81
+ last_response.body.should include "Hello world!"
82
+ @middleware.send(:'render_as_pdf?').should be false
83
+ end
84
+ end
85
+ end
86
+
87
+ describe "Conditions" do
88
+ context "only" do
89
+ before { mock_app(options, :only => [%r[^/invoice], %r[^/public]]) }
90
+ it "render pdf for set only option" do
91
+ get '/invoice/test.pdf'
92
+ @middleware.send(:'render_as_pdf?').should be true
93
+ end
94
+
95
+ it "render pdf for set only option" do
96
+ get '/public/test.pdf'
97
+ @middleware.send(:'render_as_pdf?').should be true
98
+ end
99
+
100
+ it "not render pdf for any other path" do
101
+ get '/secret/test.pdf'
102
+ @middleware.send(:'render_as_pdf?').should be false
103
+ end
104
+ end
105
+
106
+ context "except" do
107
+ before { mock_app(options, :except => %w(/secret)) }
108
+ it "render pdf for set only option" do
109
+ get '/invoice/test.pdf'
110
+ @middleware.send(:'render_as_pdf?').should be true
111
+ end
112
+
113
+ it "render pdf for set only option" do
114
+ get '/public/test.pdf'
115
+ @middleware.send(:'render_as_pdf?').should be true
116
+ end
117
+
118
+ it "not render pdf for any other path" do
119
+ get '/secret/test.pdf'
120
+ @middleware.send(:'render_as_pdf?').should be false
121
+ end
122
+ end
123
+ end
@@ -0,0 +1,155 @@
1
+ #encoding: UTF-8
2
+ require 'spec_helper'
3
+
4
+ def valid_pdf(io)
5
+ case io
6
+ when File
7
+ io.read[0...4] == "%PDF"
8
+ when String
9
+ io[0...4] == "%PDF" || File.open(io).read[0...4] == "%PDF"
10
+ end
11
+ end
12
+
13
+ describe Shrimp::Phantom do
14
+ let(:testfile) { File.expand_path('../test_file.html', __FILE__) }
15
+
16
+ before do
17
+ Shrimp.configure { |config| config.rendering_time = 1000 }
18
+ end
19
+
20
+ # describe ".quote_arg" do
21
+ # subject { described_class }
22
+
23
+ # let(:arg) { "test" }
24
+
25
+ # it "wraps the argument with single quotes" do
26
+ # subject.quote_arg(arg).should eq "'test'"
27
+ # end
28
+
29
+ # context "when the argument contains single quotes" do
30
+ # let(:arg) { "'te''st'" }
31
+
32
+ # it "escapes them" do
33
+ # %x(echo #{subject.quote_arg(arg)}).strip.should eq arg
34
+ # end
35
+ # end
36
+ # end
37
+
38
+ it "should initialize attributes" do
39
+ phantom = Shrimp::Phantom.new("file://#{testfile}", { :margin => "2cm" }, { }, "#{Dir.tmpdir}/test.pdf")
40
+ phantom.source.to_s.should eq "file://#{testfile}"
41
+ phantom.options[:margin].should eq "2cm"
42
+ phantom.outfile.should eq "#{Dir.tmpdir}/test.pdf"
43
+ end
44
+
45
+ it "should render a pdf file" do
46
+ #phantom = Shrimp::Phantom.new("file://#{@path}")
47
+ #phantom.to_pdf("#{Dir.tmpdir}/test.pdf").first should eq "#{Dir.tmpdir}/test.pdf"
48
+ end
49
+
50
+ it "should accept a local file url" do
51
+ phantom = Shrimp::Phantom.new("file://#{testfile}")
52
+ phantom.source.should be_url
53
+ end
54
+
55
+ it "should accept a URL as source" do
56
+ phantom = Shrimp::Phantom.new("http://google.com")
57
+ phantom.source.should be_url
58
+ end
59
+
60
+ it "should parse options into a cmd line" do
61
+ phantom = Shrimp::Phantom.new("file://#{testfile}", { :margin => "2cm", :max_redirect_count => 10 }, { }, "#{Dir.tmpdir}/test.pdf")
62
+ phantom.cmd.should include "test.pdf A4 1 2cm portrait"
63
+ phantom.cmd.should include "file://#{testfile}"
64
+ phantom.cmd.should include "lib/shrimp/rasterize.js"
65
+ phantom.cmd.should end_with " 10"
66
+ end
67
+
68
+ it "should properly escape arguments" do
69
+ malicious_uri = "file:///hello';shutdown"
70
+ bogus_phantom = Shrimp::Phantom.new(malicious_uri)
71
+
72
+ bogus_phantom.cmd.should_not include malicious_uri
73
+
74
+ Shrimp.configuration.stub(:phantomjs).and_return "echo"
75
+ %x(#{bogus_phantom.cmd}).strip.should include malicious_uri
76
+ end
77
+
78
+ context "rendering to a file" do
79
+ before do
80
+ phantom = Shrimp::Phantom.new("file://#{testfile}", { :margin => "2cm" }, { }, "#{Dir.tmpdir}/test.pdf")
81
+ @result = phantom.to_file
82
+ end
83
+
84
+ it "should return a File" do
85
+ @result.should be_a File
86
+ end
87
+
88
+ it "should be a valid pdf" do
89
+ valid_pdf(@result)
90
+ end
91
+ end
92
+
93
+ context "rendering to a pdf" do
94
+ before do
95
+ @phantom = Shrimp::Phantom.new("file://#{testfile}", { :margin => "2cm" }, { })
96
+ @result = @phantom.to_pdf("#{Dir.tmpdir}/test.pdf")
97
+ end
98
+
99
+ it "should return a path to pdf" do
100
+ @result.should be_a String
101
+ @result.should eq "#{Dir.tmpdir}/test.pdf"
102
+ end
103
+
104
+ it "should be a valid pdf" do
105
+ valid_pdf(@result)
106
+ end
107
+ end
108
+
109
+ context "rendering to a String" do
110
+ before do
111
+ phantom = Shrimp::Phantom.new("file://#{testfile}", { :margin => "2cm" }, { })
112
+ @result = phantom.to_string("#{Dir.tmpdir}/test.pdf")
113
+ end
114
+
115
+ it "should return the File IO String" do
116
+ @result.should be_a String
117
+ end
118
+
119
+ it "should be a valid pdf" do
120
+ valid_pdf(@result)
121
+ end
122
+ end
123
+
124
+ context "Error" do
125
+ it "should return result nil" do
126
+ phantom = Shrimp::Phantom.new("file://foo/bar")
127
+ @result = phantom.run
128
+ @result.should be_nil
129
+ end
130
+
131
+ it "should be unable to load the address" do
132
+ phantom = Shrimp::Phantom.new("file:///foo/bar")
133
+ phantom.run
134
+ phantom.error.should include "Error opening /foo/bar: No such file or directory (URL: file:///foo/bar)"
135
+ end
136
+
137
+ it "should be unable to copy file" do
138
+ phantom = Shrimp::Phantom.new("file://#{testfile}")
139
+ phantom.to_pdf("/foo/bar/")
140
+ phantom.error.should include "Unable to copy file "
141
+ end
142
+ end
143
+
144
+ context "Error Bang!" do
145
+ it "should be unable to load the address" do
146
+ phantom = Shrimp::Phantom.new("file:///foo/bar")
147
+ expect { phantom.run! }.to raise_error Shrimp::RenderingError
148
+ end
149
+
150
+ it "should be unable to copy file" do
151
+ phantom = Shrimp::Phantom.new("file://#{testfile}")
152
+ expect { phantom.to_pdf!("/foo/bar/") }.to raise_error Shrimp::RenderingError
153
+ end
154
+ end
155
+ end
@@ -0,0 +1,16 @@
1
+ #encoding: UTF-8
2
+ require 'spec_helper'
3
+
4
+ describe Shrimp::Source do
5
+ context "url" do
6
+ it "should match file urls" do
7
+ source = Shrimp::Source.new("file:///test/test.html")
8
+ source.should be_url
9
+ end
10
+
11
+ it "should match http urls" do
12
+ source = Shrimp::Source.new("http://test/test.html")
13
+ source.should be_url
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,6 @@
1
+ <html>
2
+ <head></head>
3
+ <body>
4
+ <h1>Hello World!</h1>
5
+ </body>
6
+ </html>
@@ -0,0 +1,7 @@
1
+ require 'rack/test'
2
+ require 'shrimp'
3
+
4
+ RSpec.configure do |config|
5
+ include Rack::Test::Methods
6
+ end
7
+
metadata ADDED
@@ -0,0 +1,140 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: shrimp-orangejulius
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.5
5
+ platform: ruby
6
+ authors:
7
+ - Manuel Kniep
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-09-18 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: json
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: 0.9.2
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: 0.9.2
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: 2.2.0
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: 2.2.0
55
+ - !ruby/object:Gem::Dependency
56
+ name: rack-test
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: 0.5.6
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: 0.5.6
69
+ - !ruby/object:Gem::Dependency
70
+ name: rack
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - '='
74
+ - !ruby/object:Gem::Version
75
+ version: 1.4.1
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - '='
81
+ - !ruby/object:Gem::Version
82
+ version: 1.4.1
83
+ description: html to pdf with phantomjs
84
+ email:
85
+ - manuel@adeven.com
86
+ executables: []
87
+ extensions: []
88
+ extra_rdoc_files: []
89
+ files:
90
+ - ".gitignore"
91
+ - ".travis.yml"
92
+ - ChangeLog.md
93
+ - Gemfile
94
+ - LICENSE.txt
95
+ - README.md
96
+ - Rakefile
97
+ - lib/shrimp.rb
98
+ - lib/shrimp/config.json
99
+ - lib/shrimp/configuration.rb
100
+ - lib/shrimp/middleware.rb
101
+ - lib/shrimp/phantom.rb
102
+ - lib/shrimp/rasterize.js
103
+ - lib/shrimp/source.rb
104
+ - lib/shrimp/version.rb
105
+ - shrimp.gemspec
106
+ - spec/shrimp/middleware_spec.rb
107
+ - spec/shrimp/phantom_spec.rb
108
+ - spec/shrimp/source_spec.rb
109
+ - spec/shrimp/test_file.html
110
+ - spec/spec_helper.rb
111
+ homepage: http://github.com/adeven/shrimp
112
+ licenses: []
113
+ metadata: {}
114
+ post_install_message:
115
+ rdoc_options: []
116
+ require_paths:
117
+ - lib
118
+ required_ruby_version: !ruby/object:Gem::Requirement
119
+ requirements:
120
+ - - ">="
121
+ - !ruby/object:Gem::Version
122
+ version: '0'
123
+ required_rubygems_version: !ruby/object:Gem::Requirement
124
+ requirements:
125
+ - - ">="
126
+ - !ruby/object:Gem::Version
127
+ version: '0'
128
+ requirements:
129
+ - phantomjs, v1.6 or greater
130
+ rubyforge_project:
131
+ rubygems_version: 2.2.2
132
+ signing_key:
133
+ specification_version: 4
134
+ summary: a phantomjs based pdf renderer
135
+ test_files:
136
+ - spec/shrimp/middleware_spec.rb
137
+ - spec/shrimp/phantom_spec.rb
138
+ - spec/shrimp/source_spec.rb
139
+ - spec/shrimp/test_file.html
140
+ - spec/spec_helper.rb