shrimp-orangejulius 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: bcb06ce8ced640f4b8f73bd159d06f784a28faeb
4
+ data.tar.gz: 33f37c32163be0f438763dbcd6f99e377d410ddb
5
+ SHA512:
6
+ metadata.gz: f6586310d4c7bf3302b8ef1b36fdd8632cc76fbb6bd0eedaf01d3e116819036b0de8960add26c36de16d836bcff900fa8ad356888fcaad33a3154753bc289819
7
+ data.tar.gz: 77f7ef5d6fc83dcc5144e0d43cb8d7224da878252606997ce6a3b1e10befd1ecf12bd58757b65c0809070417179226d541bdd1284e87eac210a8c60c4a5d1e00
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/.travis.yml ADDED
@@ -0,0 +1,9 @@
1
+ language: ruby
2
+ rvm:
3
+ - 1.9.3
4
+ - rbx-2
5
+ - 2.0.0
6
+ - 2.1.1
7
+ matrix:
8
+ allow_failures:
9
+ - rvm: rbx-2
data/ChangeLog.md ADDED
@@ -0,0 +1,9 @@
1
+ ## ChangeLog
2
+
3
+ 2012-12-18: Version 0.0.2
4
+ Improved Error handling
5
+ Improved Readme
6
+ Minor Bug fixes
7
+
8
+ 2012-12-17: Version 0.0.1
9
+ Initial launch.
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in shrimp.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2012 adeven GmbH Manuel Kniep
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,191 @@
1
+ # Shrimp
2
+ [![Build Status](https://travis-ci.org/adjust/shrimp.png?branch=master)](https://travis-ci.org/adjust/shrimp)
3
+ Creates PDFs from URLs using phantomjs
4
+
5
+ Read our [blogpost](http://big-elephants.com/2012-12/pdf-rendering-with-phantomjs/) about how it works.
6
+
7
+ ## Installation
8
+
9
+ Add this line to your application's Gemfile:
10
+
11
+ gem 'shrimp'
12
+
13
+ And then execute:
14
+
15
+ $ bundle
16
+
17
+ Or install it yourself as:
18
+
19
+ $ gem install shrimp
20
+
21
+
22
+ ### Phantomjs
23
+
24
+ See http://phantomjs.org/download.html on how to install phantomjs
25
+
26
+ ## Usage
27
+
28
+ ```
29
+ require 'shrimp'
30
+ url = 'http://www.google.com'
31
+ options = { :margin => "1cm"}
32
+ Shrimp::Phantom.new(url, options).to_pdf("~/output.pdf")
33
+ ```
34
+ ## Configuration
35
+
36
+ ```
37
+ Shrimp.configure do |config|
38
+
39
+ # The path to the phantomjs executable
40
+ # defaults to `where phantomjs`
41
+ # config.phantomjs = '/usr/local/bin/phantomjs'
42
+
43
+ # the default pdf output format
44
+ # e.g. "5in*7.5in", "10cm*20cm", "A4", "Letter"
45
+ # config.format = 'A4'
46
+
47
+ # the default margin
48
+ # config.margin = '1cm'
49
+
50
+ # the zoom factor
51
+ # config.zoom = 1
52
+
53
+ # the page orientation 'portrait' or 'landscape'
54
+ # config.orientation = 'portrait'
55
+
56
+ # a temporary dir used to store tempfiles
57
+ # config.tmpdir = Dir.tmpdir
58
+
59
+ # the default rendering time in ms
60
+ # increase if you need to render very complex pages
61
+ # config.rendering_time = 1000
62
+
63
+ # change the viewport size. If you rendering pages that have
64
+ # flexible page width and height then you may need to set this
65
+ # to enforce a specific size
66
+ # config.viewport_width = 600
67
+ # config.viewport_height = 600
68
+
69
+ # the timeout for the phantomjs rendering process in ms
70
+ # this needs always to be higher than rendering_time
71
+ # config.rendering_timeout = 90000
72
+
73
+ # maximum number of redirects to follow
74
+ # by default Shrimp does not follow any redirects which means that
75
+ # if the server responds with non HTTP 200 an error will be returned
76
+ # config.max_redirect_count = 0
77
+
78
+ # the path to a json configuration file for command-line options
79
+ # config.command_config_file = "#{Rails.root.join('config', 'shrimp', 'config.json')}"
80
+ end
81
+ ```
82
+
83
+ ### Command Configuration
84
+
85
+ ```
86
+ {
87
+ "diskCache": false,
88
+ "ignoreSslErrors": false,
89
+ "loadImages": true,
90
+ "outputEncoding": "utf8",
91
+ "webSecurity": true
92
+ }
93
+ ```
94
+
95
+ ## Middleware
96
+
97
+ Shrimp comes with a middleware that allows users to get a PDF view of any page on your site by appending .pdf to the URL.
98
+
99
+ ### Middleware Setup
100
+
101
+ **Non-Rails Rack apps**
102
+
103
+ # in config.ru
104
+ require 'shrimp'
105
+ use Shrimp::Middleware
106
+
107
+ **Rails apps**
108
+
109
+ # in application.rb(Rails3) or environment.rb(Rails2)
110
+ require 'shrimp'
111
+ config.middleware.use Shrimp::Middleware
112
+
113
+ **With Shrimp options**
114
+
115
+ # options will be passed to Shrimp::Phantom.new
116
+ config.middleware.use Shrimp::Middleware, :margin => '0.5cm', :format => 'Letter'
117
+
118
+ **With conditions to limit routes that can be generated in pdf**
119
+
120
+ # conditions can be regexps (either one or an array)
121
+ config.middleware.use Shrimp::Middleware, {}, :only => %r[^/public]
122
+ config.middleware.use Shrimp::Middleware, {}, :only => [%r[^/invoice], %r[^/public]]
123
+
124
+ # conditions can be strings (either one or an array)
125
+ config.middleware.use Shrimp::Middleware, {}, :only => '/public'
126
+ config.middleware.use Shrimp::Middleware, {}, :only => ['/invoice', '/public']
127
+
128
+ # conditions can be regexps (either one or an array)
129
+ config.middleware.use Shrimp::Middleware, {}, :except => [%r[^/prawn], %r[^/secret]]
130
+
131
+ # conditions can be strings (either one or an array)
132
+ config.middleware.use Shrimp::Middleware, {}, :except => ['/secret']
133
+
134
+
135
+ ### Polling
136
+
137
+ To avoid deadlocks Shrimp::Middleware renders the pdf in a separate process retuning a 503 Retry-After response Header.
138
+ you can setup the polling interval and the polling offset in seconds.
139
+
140
+ config.middleware.use Shrimp::Middleware, :polling_interval => 1, :polling_offset => 5
141
+
142
+ ### Caching
143
+
144
+ To avoid rendering the page on each request you can setup some the cache ttl in seconds
145
+
146
+ config.middleware.use Shrimp::Middleware, :cache_ttl => 3600, :out_path => "my/pdf/store"
147
+
148
+
149
+ ### Ajax requests
150
+
151
+ To include some fancy Ajax stuff with jquery
152
+
153
+ ```js
154
+
155
+ var url = '/my_page.pdf'
156
+ var statusCodes = {
157
+ 200: function() {
158
+ return window.location.assign(url);
159
+ },
160
+ 504: function() {
161
+ console.log("Shit's being wired")
162
+ },
163
+ 503: function(jqXHR, textStatus, errorThrown) {
164
+ var wait;
165
+ wait = parseInt(jqXHR.getResponseHeader('Retry-After'));
166
+ return setTimeout(function() {
167
+ return $.ajax({
168
+ url: url,
169
+ statusCode: statusCodes
170
+ });
171
+ }, wait * 1000);
172
+ }
173
+ }
174
+ $.ajax({
175
+ url: url,
176
+ statusCode: statusCodes
177
+ })
178
+
179
+ ```
180
+
181
+ ## Contributing
182
+
183
+ 1. Fork it
184
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
185
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
186
+ 4. Push to the branch (`git push origin my-new-feature`)
187
+ 5. Create new Pull Request
188
+
189
+ ## Copyright
190
+ Shrimp is Copyright © 2012 adeven (Manuel Kniep). It is free software, and may be redistributed under the terms
191
+ specified in the LICENSE file.
data/Rakefile ADDED
@@ -0,0 +1,5 @@
1
+ require "bundler/gem_tasks"
2
+ require 'rspec/core/rake_task'
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+ task default: :spec
data/lib/shrimp.rb ADDED
@@ -0,0 +1,5 @@
1
+ require 'shrimp/version'
2
+ require 'shrimp/source'
3
+ require 'shrimp/phantom'
4
+ require 'shrimp/middleware'
5
+ require 'shrimp/configuration'
@@ -0,0 +1,7 @@
1
+ {
2
+ "diskCache": false,
3
+ "ignoreSslErrors": false,
4
+ "loadImages": true,
5
+ "outputEncoding": "utf8",
6
+ "webSecurity": true
7
+ }
@@ -0,0 +1,55 @@
1
+ require 'tmpdir'
2
+
3
+ module Shrimp
4
+ class Configuration
5
+ attr_accessor :default_options
6
+ attr_writer :phantomjs
7
+
8
+ [:format, :margin, :zoom, :orientation, :tmpdir, :rendering_timeout, :rendering_time, :command_config_file, :viewport_width, :viewport_height, :max_redirect_count].each do |m|
9
+ define_method("#{m}=") do |val|
10
+ @default_options[m]=val
11
+ end
12
+ end
13
+
14
+ def initialize
15
+ @default_options = {
16
+ :format => 'A4',
17
+ :margin => '1cm',
18
+ :zoom => 1,
19
+ :orientation => 'portrait',
20
+ :tmpdir => Dir.tmpdir,
21
+ :rendering_timeout => 90000,
22
+ :rendering_time => 1000,
23
+ :command_config_file => File.expand_path('../config.json', __FILE__),
24
+ :viewport_width => 600,
25
+ :viewport_height => 600,
26
+ :max_redirect_count => 0
27
+ }
28
+ end
29
+
30
+ def phantomjs
31
+ @phantomjs ||= (defined?(Bundler::GemfileError) ? `bundle exec which phantomjs` : `which phantomjs`).chomp
32
+ end
33
+ end
34
+
35
+ class << self
36
+ attr_accessor :configuration
37
+ end
38
+
39
+ # Configure Phantomjs someplace sensible,
40
+ # like config/initializers/phantomjs.rb
41
+ #
42
+ # @example
43
+ # Shrimp.configure do |config|
44
+ # config.phantomjs = '/usr/local/bin/phantomjs'
45
+ # config.format = 'Letter'
46
+ # end
47
+
48
+ def self.configuration
49
+ @configuration ||= Configuration.new
50
+ end
51
+
52
+ def self.configure
53
+ yield(configuration)
54
+ end
55
+ end
@@ -0,0 +1,175 @@
1
+ module Shrimp
2
+ class Middleware
3
+ def initialize(app, options = { }, conditions = { })
4
+ @app = app
5
+ @options = options
6
+ @conditions = conditions
7
+ @options[:polling_interval] ||= 1
8
+ @options[:polling_offset] ||= 1
9
+ @options[:cache_ttl] ||= 1
10
+ @options[:request_timeout] ||= @options[:polling_interval] * 10
11
+ end
12
+
13
+ def call(env)
14
+ @request = Rack::Request.new(env)
15
+ if render_as_pdf? #&& headers['Content-Type'] =~ /text\/html|application\/xhtml\+xml/
16
+ if already_rendered? && (up_to_date?(@options[:cache_ttl]) || @options[:cache_ttl] == 0)
17
+ if File.size(render_to) == 0
18
+ File.delete(render_to)
19
+ remove_rendering_flag
20
+ return error_response
21
+ end
22
+ return ready_response if env['HTTP_X_REQUESTED_WITH']
23
+ file = File.open(render_to, "rb")
24
+ body = file.read
25
+ file.close
26
+ File.delete(render_to) if @options[:cache_ttl] == 0
27
+ remove_rendering_flag
28
+ response = [body]
29
+ headers = { }
30
+ headers["Content-Length"] = (body.respond_to?(:bytesize) ? body.bytesize : body.size).to_s
31
+ headers["Content-Type"] = "application/pdf"
32
+ [200, headers, response]
33
+ else
34
+ if rendering_in_progress?
35
+ if rendering_timed_out?
36
+ remove_rendering_flag
37
+ error_response
38
+ else
39
+ reload_response(@options[:polling_interval])
40
+ end
41
+ else
42
+ File.delete(render_to) if already_rendered?
43
+ set_rendering_flag
44
+ fire_phantom
45
+ reload_response(@options[:polling_offset])
46
+ end
47
+ end
48
+ else
49
+ @app.call(env)
50
+ end
51
+ end
52
+
53
+ private
54
+
55
+ # Private: start phantom rendering in a separate process
56
+ def fire_phantom
57
+ Process::detach fork { Phantom.new(@request.url.sub(%r{\.pdf$}, ''), @options, @request.cookies).to_pdf(render_to) }
58
+ end
59
+
60
+ def render_to
61
+ file_name = Digest::MD5.hexdigest(@request.path) + ".pdf"
62
+ file_path = @options[:out_path]
63
+ "#{file_path}/#{file_name}"
64
+ end
65
+
66
+ def already_rendered?
67
+ File.exists?(render_to)
68
+ end
69
+
70
+ def up_to_date?(ttl = 30)
71
+ (Time.now - File.new(render_to).mtime) <= ttl
72
+ end
73
+
74
+
75
+ def remove_rendering_flag
76
+ @request.session["phantom-rendering"] ||={ }
77
+ @request.session["phantom-rendering"].delete(render_to)
78
+ end
79
+
80
+ def set_rendering_flag
81
+ @request.session["phantom-rendering"] ||={ }
82
+ @request.session["phantom-rendering"][render_to] = Time.now
83
+ end
84
+
85
+ def rendering_timed_out?
86
+ Time.now - @request.session["phantom-rendering"][render_to] > @options[:request_timeout]
87
+ end
88
+
89
+ def rendering_in_progress?
90
+ @request.session["phantom-rendering"]||={ }
91
+ @request.session["phantom-rendering"][render_to]
92
+ end
93
+
94
+ def render_as_pdf?
95
+ request_path_is_pdf = !!@request.path.match(%r{\.pdf$})
96
+
97
+ if request_path_is_pdf && @conditions[:only]
98
+ rules = [@conditions[:only]].flatten
99
+ rules.any? do |pattern|
100
+ if pattern.is_a?(Regexp)
101
+ @request.path =~ pattern
102
+ else
103
+ @request.path[0, pattern.length] == pattern
104
+ end
105
+ end
106
+ elsif request_path_is_pdf && @conditions[:except]
107
+ rules = [@conditions[:except]].flatten
108
+ rules.map do |pattern|
109
+ if pattern.is_a?(Regexp)
110
+ return false if @request.path =~ pattern
111
+ else
112
+ return false if @request.path[0, pattern.length] == pattern
113
+ end
114
+ end
115
+ return true
116
+ else
117
+ request_path_is_pdf
118
+ end
119
+ end
120
+
121
+ def concat(accepts, type)
122
+ (accepts || '').split(',').unshift(type).compact.join(',')
123
+ end
124
+
125
+ def reload_response(interval=1)
126
+ body = <<-HTML.gsub(/[ \n]+/, ' ').strip
127
+ <html>
128
+ <head>
129
+ </head>
130
+ <body onLoad="setTimeout(function(){ window.location.reload()}, #{interval * 1000});">
131
+ <h2>Preparing pdf... </h2>
132
+ </body>
133
+ </ html>
134
+ HTML
135
+ headers = { }
136
+ headers["Content-Length"] = body.size.to_s
137
+ headers["Content-Type"] = "text/html"
138
+ headers["Retry-After"] = interval.to_s
139
+
140
+ [503, headers, [body]]
141
+ end
142
+
143
+ def ready_response
144
+ body = <<-HTML.gsub(/[ \n]+/, ' ').strip
145
+ <html>
146
+ <head>
147
+ </head>
148
+ <body>
149
+ <a href="#{@request.path}">PDF ready here</a>
150
+ </body>
151
+ </ html>
152
+ HTML
153
+ headers = { }
154
+ headers["Content-Length"] = body.size.to_s
155
+ headers["Content-Type"] = "text/html"
156
+ [200, headers, [body]]
157
+ end
158
+
159
+ def error_response
160
+ body = <<-HTML.gsub(/[ \n]+/, ' ').strip
161
+ <html>
162
+ <head>
163
+ </head>
164
+ <body>
165
+ <h2>Sorry request timed out... </h2>
166
+ </body>
167
+ </ html>
168
+ HTML
169
+ headers = { }
170
+ headers["Content-Length"] = body.size.to_s
171
+ headers["Content-Type"] = "text/html"
172
+ [504, headers, [body]]
173
+ end
174
+ end
175
+ end
@@ -0,0 +1,153 @@
1
+ require 'uri'
2
+ require 'json'
3
+ require 'shellwords'
4
+
5
+ module Shrimp
6
+ class NoExecutableError < StandardError
7
+ def initialize
8
+ msg = "No phantomjs executable found at #{Shrimp.configuration.phantomjs}\n"
9
+ msg << ">> Please install phantomjs - http://phantomjs.org/download.html"
10
+ super(msg)
11
+ end
12
+ end
13
+
14
+ class ImproperSourceError < StandardError
15
+ def initialize(msg = nil)
16
+ super("Improper Source: #{msg}")
17
+ end
18
+ end
19
+
20
+ class RenderingError < StandardError
21
+ def initialize(msg = nil)
22
+ super("Rendering Error: #{msg}")
23
+ end
24
+ end
25
+
26
+ class Phantom
27
+ attr_accessor :source, :configuration, :outfile
28
+ attr_reader :options, :cookies, :result, :error
29
+ SCRIPT_FILE = File.expand_path('../rasterize.js', __FILE__)
30
+
31
+ # Public: Runs the phantomjs binary
32
+ #
33
+ # Returns the stdout output of phantomjs
34
+ def run
35
+ @error = nil
36
+ @result = `#{cmd}`
37
+ unless $?.exitstatus == 0
38
+ @error = @result
39
+ @result = nil
40
+ end
41
+ @result
42
+ end
43
+
44
+ def run!
45
+ @error = nil
46
+ @result = `#{cmd}`
47
+ unless $?.exitstatus == 0
48
+ @error = @result
49
+ @result = nil
50
+ raise RenderingError.new(@error)
51
+ end
52
+ @result
53
+ end
54
+
55
+ # Public: Returns the phantom rasterize command
56
+ def cmd
57
+ cookie_file = dump_cookies
58
+ format, zoom, margin, orientation = options[:format], options[:zoom], options[:margin], options[:orientation]
59
+ rendering_time, timeout = options[:rendering_time], options[:rendering_timeout]
60
+ viewport_width, viewport_height = options[:viewport_width], options[:viewport_height]
61
+ max_redirect_count = options[:max_redirect_count]
62
+ @outfile ||= "#{options[:tmpdir]}/#{Digest::MD5.hexdigest((Time.now.to_i + rand(9001)).to_s)}.pdf"
63
+ command_config_file = "--config=#{options[:command_config_file]}"
64
+ [
65
+ Shrimp.configuration.phantomjs,
66
+ command_config_file,
67
+ SCRIPT_FILE,
68
+ @source.to_s.shellescape,
69
+ @outfile,
70
+ format,
71
+ zoom,
72
+ margin,
73
+ orientation,
74
+ cookie_file,
75
+ rendering_time,
76
+ timeout,
77
+ viewport_width,
78
+ viewport_height,
79
+ max_redirect_count
80
+ ].join(" ")
81
+ end
82
+
83
+ # Public: initializes a new Phantom Object
84
+ #
85
+ # url_or_file - The url of the html document to render
86
+ # options - a hash with options for rendering
87
+ # * format - the paper format for the output eg: "5in*7.5in", "10cm*20cm", "A4", "Letter"
88
+ # * zoom - the viewport zoom factor
89
+ # * margin - the margins for the pdf
90
+ # * command_config_file - the path to a json configuration file for command-line options
91
+ # cookies - hash with cookies to use for rendering
92
+ # outfile - optional path for the output file a Tempfile will be created if not given
93
+ #
94
+ # Returns self
95
+ def initialize(url_or_file, options = { }, cookies={ }, outfile = nil)
96
+ @source = Source.new(url_or_file)
97
+ @options = Shrimp.configuration.default_options.merge(options)
98
+ @cookies = cookies
99
+ @outfile = File.expand_path(outfile) if outfile
100
+ raise NoExecutableError.new unless File.exists?(Shrimp.configuration.phantomjs)
101
+ end
102
+
103
+ # Public: renders to pdf
104
+ # path - the destination path defaults to outfile
105
+ #
106
+ # Returns the path to the pdf file
107
+ def to_pdf(path=nil)
108
+ @outfile = File.expand_path(path) if path
109
+ self.run
110
+ @outfile
111
+ end
112
+
113
+ # Public: renders to pdf
114
+ # path - the destination path defaults to outfile
115
+ #
116
+ # Returns a File Handle of the Resulting pdf
117
+ def to_file(path=nil)
118
+ self.to_pdf(path)
119
+ File.new(@outfile)
120
+ end
121
+
122
+ # Public: renders to pdf
123
+ # path - the destination path defaults to outfile
124
+ #
125
+ # Returns the binary string of the pdf
126
+ def to_string(path=nil)
127
+ File.open(self.to_pdf(path)).read
128
+ end
129
+
130
+ def to_pdf!(path=nil)
131
+ @outfile = File.expand_path(path) if path
132
+ self.run!
133
+ @outfile
134
+ end
135
+
136
+ def to_file!(path=nil)
137
+ self.to_pdf!(path)
138
+ File.new(@outfile)
139
+ end
140
+
141
+ def to_string!(path=nil)
142
+ File.open(self.to_pdf!(path)).read
143
+ end
144
+
145
+ private
146
+
147
+ def dump_cookies
148
+ host = @source.url? ? URI::parse(@source.to_s).host : "/"
149
+ json = @cookies.inject([]) { |a, (k, v)| a.push({ :name => k, :value => v, :domain => host }); a }.to_json
150
+ File.open("#{options[:tmpdir]}/#{rand}.cookies", 'w') { |f| f.puts json; f }.path
151
+ end
152
+ end
153
+ end
@@ -0,0 +1,143 @@
1
+ var
2
+ webpage = require('webpage'),
3
+ fs = require('fs'),
4
+ system = require('system'),
5
+ margin = system.args[5] || '0cm',
6
+ orientation = system.args[6] || 'portrait',
7
+ cookie_file = system.args[7],
8
+ render_time = system.args[8] || 10000 ,
9
+ time_out = system.args[9] || 90000 ,
10
+ viewport_width = system.args[10] || 600,
11
+ viewport_height = system.args[11] || 600,
12
+ redirects_num = system.args[12] || 0,
13
+ cookies = {},
14
+ address, output, size;
15
+
16
+ function error(msg) {
17
+ msg = msg || 'Unknown error';
18
+ console.log(msg);
19
+ phantom.exit(1);
20
+ throw msg;
21
+ }
22
+
23
+ function print_usage() {
24
+ console.log('Usage: rasterize.js URL filename [paperwidth*paperheight|paperformat] [zoom] [margin] [orientation] [cookie_file] [render_time] [time_out] [viewport_width] [viewport_height] [max_redirects_count]');
25
+ console.log(' paper (pdf output) examples: "5in*7.5in", "10cm*20cm", "A4", "Letter"');
26
+ }
27
+
28
+ window.setTimeout(function () {
29
+ error("Shit's being weird no result within: " + time_out + "ms");
30
+ }, time_out);
31
+
32
+ function renderUrl(url, output, options) {
33
+ options = options || {};
34
+
35
+ var statusCode,
36
+ page = webpage.create();
37
+
38
+ for (var k in options) {
39
+ if (options.hasOwnProperty(k)) {
40
+ page[k] = options[k];
41
+ }
42
+ }
43
+
44
+ // determine the statusCode
45
+ page.onResourceReceived = function (resource) {
46
+ if (resource.url == url) {
47
+ statusCode = resource.status;
48
+ }
49
+ };
50
+
51
+ page.onResourceError = function (resourceErrorOrId, errorString) {
52
+ //phantomjs 1.9.1 and above uses a resourceError object
53
+ if(typeof resourceErrorOrId == Object) {
54
+ resourceError = resourceErrorOrId;
55
+ error(resourceError.errorString + ' (URL: ' + resourceError.url + ')');
56
+ //phantomjs 1.9.0 and below pass the request id and the error string
57
+ } else {
58
+ error(errorString)
59
+ }
60
+ };
61
+
62
+ page.onNavigationRequested = function (redirect_url, type, willNavigate, main) {
63
+ if (main) {
64
+ if (redirect_url !== url) {
65
+ page.close();
66
+
67
+ if (redirects_num-- >= 0) {
68
+ renderUrl(redirect_url, output, options);
69
+ } else {
70
+ error(url + ' redirects to ' + redirect_url + ' after maximum number of redirects reached');
71
+ }
72
+ }
73
+ }
74
+ };
75
+
76
+ page.open(url, function (status) {
77
+ if (status !== 'success' || (statusCode != 200 && statusCode != null)) {
78
+ if (fs.exists(output)) {
79
+ fs.remove(output);
80
+ }
81
+ try {
82
+ fs.touch(output);
83
+ } catch (e) {
84
+ console.log(e);
85
+ }
86
+
87
+ error('Unable to load the URL: ' + url + ' (HTTP ' + statusCode + ')');
88
+ } else {
89
+ window.setTimeout(function () {
90
+ page.render(output + '_tmp.pdf');
91
+
92
+ if (fs.exists(output)) {
93
+ fs.remove(output);
94
+ }
95
+
96
+ try {
97
+ fs.move(output + '_tmp.pdf', output);
98
+ } catch (e) {
99
+ error(e);
100
+ }
101
+ console.log('Rendered to: ' + output, new Date().getTime());
102
+ phantom.exit(0);
103
+ }, render_time);
104
+ }
105
+ });
106
+ }
107
+
108
+ if (cookie_file) {
109
+ try {
110
+ f = fs.open(cookie_file, "r");
111
+ cookies = JSON.parse(f.read());
112
+ fs.remove(cookie_file);
113
+ } catch (e) {
114
+ console.log(e);
115
+ }
116
+ phantom.cookiesEnabled = true;
117
+ phantom.cookies = cookies;
118
+ }
119
+
120
+ if (system.args.length < 3 || system.args.length > 13) {
121
+ print_usage() && phantom.exit(2);
122
+ } else {
123
+ address = system.args[1];
124
+ output = system.args[2];
125
+
126
+ page_options = {
127
+ viewportSize: {
128
+ width: viewport_width,
129
+ height: viewport_height
130
+ }
131
+ };
132
+
133
+ if (system.args.length > 3 && system.args[2].substr(-4) === ".pdf") {
134
+ size = system.args[3].split('*');
135
+ page_options.paperSize = size.length === 2 ? { width:size[0], height:size[1], margin:'0px' }
136
+ : { format:system.args[3], orientation:orientation, margin:margin };
137
+ }
138
+ if (system.args.length > 4) {
139
+ page_options.zoomFactor = system.args[4];
140
+ }
141
+
142
+ renderUrl(address, output, page_options);
143
+ }
@@ -0,0 +1,25 @@
1
+ require 'uri'
2
+ module Shrimp
3
+ class Source
4
+ def initialize(url_or_file)
5
+ @source = url_or_file
6
+ raise ImproperSourceError.new unless url? || file?
7
+ end
8
+
9
+ def url?
10
+ @source.is_a?(String) && @source.match(URI::regexp)
11
+ end
12
+
13
+ def file?
14
+ @source.kind_of?(File)
15
+ end
16
+
17
+ def html?
18
+ !(url? || file?)
19
+ end
20
+
21
+ def to_s
22
+ file? ? @source.path : @source
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,3 @@
1
+ module Shrimp
2
+ VERSION = "0.0.5"
3
+ end
data/shrimp.gemspec ADDED
@@ -0,0 +1,27 @@
1
+ # -*- encoding: utf-8 -*-
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'shrimp/version'
5
+
6
+ Gem::Specification.new do |gem|
7
+ gem.name = "shrimp-orangejulius"
8
+ gem.version = Shrimp::VERSION
9
+ gem.authors = ["Manuel Kniep"]
10
+ gem.email = %w(manuel@adeven.com)
11
+ gem.description = %q{html to pdf with phantomjs}
12
+ gem.summary = %q{a phantomjs based pdf renderer}
13
+ gem.homepage = "http://github.com/adeven/shrimp"
14
+ gem.files = `git ls-files`.split($/)
15
+ gem.files.reject! { |fn| fn.include? "script" }
16
+ gem.executables = gem.files.grep(%r{^bin/}).map { |f| File.basename(f) }
17
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
18
+ gem.require_paths = %w(lib)
19
+ gem.requirements << 'phantomjs, v1.6 or greater'
20
+ gem.add_runtime_dependency "json"
21
+
22
+ # Developmnet Dependencies
23
+ gem.add_development_dependency(%q<rake>, [">=0.9.2"])
24
+ gem.add_development_dependency(%q<rspec>, [">= 2.2.0"])
25
+ gem.add_development_dependency(%q<rack-test>, [">= 0.5.6"])
26
+ gem.add_development_dependency(%q<rack>, ["= 1.4.1"])
27
+ end
@@ -0,0 +1,123 @@
1
+ require 'spec_helper'
2
+
3
+ def app;
4
+ Rack::Lint.new(@app)
5
+ end
6
+
7
+ def options
8
+ { :margin => "1cm", :out_path => Dir.tmpdir,
9
+ :polling_offset => 10, :polling_interval => 1, :cache_ttl => 3600,
10
+ :request_timeout => 1 }
11
+ end
12
+
13
+ def mock_app(options = { }, conditions = { })
14
+ main_app = lambda { |env|
15
+ headers = { 'Content-Type' => "text/html" }
16
+ [200, headers, ['Hello world!']]
17
+ }
18
+
19
+ @middleware = Shrimp::Middleware.new(main_app, options, conditions)
20
+ @app = Rack::Session::Cookie.new(@middleware, :key => 'rack.session')
21
+ end
22
+
23
+
24
+ describe Shrimp::Middleware do
25
+ before { mock_app(options) }
26
+
27
+ context "matching pdf" do
28
+ it "should render as pdf" do
29
+ get '/test.pdf'
30
+ @middleware.send(:'render_as_pdf?').should be true
31
+ end
32
+ it "should return 503 the first time" do
33
+ get '/test.pdf'
34
+ last_response.status.should eq 503
35
+ last_response.header["Retry-After"].should eq "10"
36
+ end
37
+
38
+ it "should return 503 the with polling interval the second time" do
39
+ get '/test.pdf'
40
+ get '/test.pdf'
41
+ last_response.status.should eq 503
42
+ last_response.header["Retry-After"].should eq "1"
43
+ end
44
+
45
+ it "should set render to to outpath" do
46
+ get '/test.pdf'
47
+ @middleware.send(:render_to).should match (Regexp.new("^#{options[:out_path]}"))
48
+ end
49
+
50
+ it "should return 504 on timeout" do
51
+ get '/test.pdf'
52
+ sleep 1
53
+ get '/test.pdf'
54
+ last_response.status.should eq 504
55
+ end
56
+
57
+ it "should retry rendering after timeout" do
58
+ get '/test.pdf'
59
+ sleep 1
60
+ get '/test.pdf'
61
+ get '/test.pdf'
62
+ last_response.status.should eq 503
63
+ end
64
+
65
+ it "should return a pdf with 200 after rendering" do
66
+ mock_file = double(File, :read => "Hello World", :close => true, :mtime => Time.now)
67
+ File.should_receive(:'exists?').and_return true
68
+ File.should_receive(:'size').and_return 1000
69
+ File.should_receive(:'open').and_return mock_file
70
+ File.should_receive(:'new').and_return mock_file
71
+ get '/test.pdf'
72
+ last_response.status.should eq 200
73
+ last_response.body.should eq "Hello World"
74
+ end
75
+
76
+
77
+ end
78
+ context "not matching pdf" do
79
+ it "should skip pdf rendering" do
80
+ get 'http://www.example.org/test'
81
+ last_response.body.should include "Hello world!"
82
+ @middleware.send(:'render_as_pdf?').should be false
83
+ end
84
+ end
85
+ end
86
+
87
+ describe "Conditions" do
88
+ context "only" do
89
+ before { mock_app(options, :only => [%r[^/invoice], %r[^/public]]) }
90
+ it "render pdf for set only option" do
91
+ get '/invoice/test.pdf'
92
+ @middleware.send(:'render_as_pdf?').should be true
93
+ end
94
+
95
+ it "render pdf for set only option" do
96
+ get '/public/test.pdf'
97
+ @middleware.send(:'render_as_pdf?').should be true
98
+ end
99
+
100
+ it "not render pdf for any other path" do
101
+ get '/secret/test.pdf'
102
+ @middleware.send(:'render_as_pdf?').should be false
103
+ end
104
+ end
105
+
106
+ context "except" do
107
+ before { mock_app(options, :except => %w(/secret)) }
108
+ it "render pdf for set only option" do
109
+ get '/invoice/test.pdf'
110
+ @middleware.send(:'render_as_pdf?').should be true
111
+ end
112
+
113
+ it "render pdf for set only option" do
114
+ get '/public/test.pdf'
115
+ @middleware.send(:'render_as_pdf?').should be true
116
+ end
117
+
118
+ it "not render pdf for any other path" do
119
+ get '/secret/test.pdf'
120
+ @middleware.send(:'render_as_pdf?').should be false
121
+ end
122
+ end
123
+ end
@@ -0,0 +1,155 @@
1
+ #encoding: UTF-8
2
+ require 'spec_helper'
3
+
4
+ def valid_pdf(io)
5
+ case io
6
+ when File
7
+ io.read[0...4] == "%PDF"
8
+ when String
9
+ io[0...4] == "%PDF" || File.open(io).read[0...4] == "%PDF"
10
+ end
11
+ end
12
+
13
+ describe Shrimp::Phantom do
14
+ let(:testfile) { File.expand_path('../test_file.html', __FILE__) }
15
+
16
+ before do
17
+ Shrimp.configure { |config| config.rendering_time = 1000 }
18
+ end
19
+
20
+ # describe ".quote_arg" do
21
+ # subject { described_class }
22
+
23
+ # let(:arg) { "test" }
24
+
25
+ # it "wraps the argument with single quotes" do
26
+ # subject.quote_arg(arg).should eq "'test'"
27
+ # end
28
+
29
+ # context "when the argument contains single quotes" do
30
+ # let(:arg) { "'te''st'" }
31
+
32
+ # it "escapes them" do
33
+ # %x(echo #{subject.quote_arg(arg)}).strip.should eq arg
34
+ # end
35
+ # end
36
+ # end
37
+
38
+ it "should initialize attributes" do
39
+ phantom = Shrimp::Phantom.new("file://#{testfile}", { :margin => "2cm" }, { }, "#{Dir.tmpdir}/test.pdf")
40
+ phantom.source.to_s.should eq "file://#{testfile}"
41
+ phantom.options[:margin].should eq "2cm"
42
+ phantom.outfile.should eq "#{Dir.tmpdir}/test.pdf"
43
+ end
44
+
45
+ it "should render a pdf file" do
46
+ #phantom = Shrimp::Phantom.new("file://#{@path}")
47
+ #phantom.to_pdf("#{Dir.tmpdir}/test.pdf").first should eq "#{Dir.tmpdir}/test.pdf"
48
+ end
49
+
50
+ it "should accept a local file url" do
51
+ phantom = Shrimp::Phantom.new("file://#{testfile}")
52
+ phantom.source.should be_url
53
+ end
54
+
55
+ it "should accept a URL as source" do
56
+ phantom = Shrimp::Phantom.new("http://google.com")
57
+ phantom.source.should be_url
58
+ end
59
+
60
+ it "should parse options into a cmd line" do
61
+ phantom = Shrimp::Phantom.new("file://#{testfile}", { :margin => "2cm", :max_redirect_count => 10 }, { }, "#{Dir.tmpdir}/test.pdf")
62
+ phantom.cmd.should include "test.pdf A4 1 2cm portrait"
63
+ phantom.cmd.should include "file://#{testfile}"
64
+ phantom.cmd.should include "lib/shrimp/rasterize.js"
65
+ phantom.cmd.should end_with " 10"
66
+ end
67
+
68
+ it "should properly escape arguments" do
69
+ malicious_uri = "file:///hello';shutdown"
70
+ bogus_phantom = Shrimp::Phantom.new(malicious_uri)
71
+
72
+ bogus_phantom.cmd.should_not include malicious_uri
73
+
74
+ Shrimp.configuration.stub(:phantomjs).and_return "echo"
75
+ %x(#{bogus_phantom.cmd}).strip.should include malicious_uri
76
+ end
77
+
78
+ context "rendering to a file" do
79
+ before do
80
+ phantom = Shrimp::Phantom.new("file://#{testfile}", { :margin => "2cm" }, { }, "#{Dir.tmpdir}/test.pdf")
81
+ @result = phantom.to_file
82
+ end
83
+
84
+ it "should return a File" do
85
+ @result.should be_a File
86
+ end
87
+
88
+ it "should be a valid pdf" do
89
+ valid_pdf(@result)
90
+ end
91
+ end
92
+
93
+ context "rendering to a pdf" do
94
+ before do
95
+ @phantom = Shrimp::Phantom.new("file://#{testfile}", { :margin => "2cm" }, { })
96
+ @result = @phantom.to_pdf("#{Dir.tmpdir}/test.pdf")
97
+ end
98
+
99
+ it "should return a path to pdf" do
100
+ @result.should be_a String
101
+ @result.should eq "#{Dir.tmpdir}/test.pdf"
102
+ end
103
+
104
+ it "should be a valid pdf" do
105
+ valid_pdf(@result)
106
+ end
107
+ end
108
+
109
+ context "rendering to a String" do
110
+ before do
111
+ phantom = Shrimp::Phantom.new("file://#{testfile}", { :margin => "2cm" }, { })
112
+ @result = phantom.to_string("#{Dir.tmpdir}/test.pdf")
113
+ end
114
+
115
+ it "should return the File IO String" do
116
+ @result.should be_a String
117
+ end
118
+
119
+ it "should be a valid pdf" do
120
+ valid_pdf(@result)
121
+ end
122
+ end
123
+
124
+ context "Error" do
125
+ it "should return result nil" do
126
+ phantom = Shrimp::Phantom.new("file://foo/bar")
127
+ @result = phantom.run
128
+ @result.should be_nil
129
+ end
130
+
131
+ it "should be unable to load the address" do
132
+ phantom = Shrimp::Phantom.new("file:///foo/bar")
133
+ phantom.run
134
+ phantom.error.should include "Error opening /foo/bar: No such file or directory (URL: file:///foo/bar)"
135
+ end
136
+
137
+ it "should be unable to copy file" do
138
+ phantom = Shrimp::Phantom.new("file://#{testfile}")
139
+ phantom.to_pdf("/foo/bar/")
140
+ phantom.error.should include "Unable to copy file "
141
+ end
142
+ end
143
+
144
+ context "Error Bang!" do
145
+ it "should be unable to load the address" do
146
+ phantom = Shrimp::Phantom.new("file:///foo/bar")
147
+ expect { phantom.run! }.to raise_error Shrimp::RenderingError
148
+ end
149
+
150
+ it "should be unable to copy file" do
151
+ phantom = Shrimp::Phantom.new("file://#{testfile}")
152
+ expect { phantom.to_pdf!("/foo/bar/") }.to raise_error Shrimp::RenderingError
153
+ end
154
+ end
155
+ end
@@ -0,0 +1,16 @@
1
+ #encoding: UTF-8
2
+ require 'spec_helper'
3
+
4
+ describe Shrimp::Source do
5
+ context "url" do
6
+ it "should match file urls" do
7
+ source = Shrimp::Source.new("file:///test/test.html")
8
+ source.should be_url
9
+ end
10
+
11
+ it "should match http urls" do
12
+ source = Shrimp::Source.new("http://test/test.html")
13
+ source.should be_url
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,6 @@
1
+ <html>
2
+ <head></head>
3
+ <body>
4
+ <h1>Hello World!</h1>
5
+ </body>
6
+ </html>
@@ -0,0 +1,7 @@
1
+ require 'rack/test'
2
+ require 'shrimp'
3
+
4
+ RSpec.configure do |config|
5
+ include Rack::Test::Methods
6
+ end
7
+
metadata ADDED
@@ -0,0 +1,140 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: shrimp-orangejulius
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.5
5
+ platform: ruby
6
+ authors:
7
+ - Manuel Kniep
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-09-18 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: json
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: 0.9.2
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: 0.9.2
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: 2.2.0
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: 2.2.0
55
+ - !ruby/object:Gem::Dependency
56
+ name: rack-test
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: 0.5.6
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: 0.5.6
69
+ - !ruby/object:Gem::Dependency
70
+ name: rack
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - '='
74
+ - !ruby/object:Gem::Version
75
+ version: 1.4.1
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - '='
81
+ - !ruby/object:Gem::Version
82
+ version: 1.4.1
83
+ description: html to pdf with phantomjs
84
+ email:
85
+ - manuel@adeven.com
86
+ executables: []
87
+ extensions: []
88
+ extra_rdoc_files: []
89
+ files:
90
+ - ".gitignore"
91
+ - ".travis.yml"
92
+ - ChangeLog.md
93
+ - Gemfile
94
+ - LICENSE.txt
95
+ - README.md
96
+ - Rakefile
97
+ - lib/shrimp.rb
98
+ - lib/shrimp/config.json
99
+ - lib/shrimp/configuration.rb
100
+ - lib/shrimp/middleware.rb
101
+ - lib/shrimp/phantom.rb
102
+ - lib/shrimp/rasterize.js
103
+ - lib/shrimp/source.rb
104
+ - lib/shrimp/version.rb
105
+ - shrimp.gemspec
106
+ - spec/shrimp/middleware_spec.rb
107
+ - spec/shrimp/phantom_spec.rb
108
+ - spec/shrimp/source_spec.rb
109
+ - spec/shrimp/test_file.html
110
+ - spec/spec_helper.rb
111
+ homepage: http://github.com/adeven/shrimp
112
+ licenses: []
113
+ metadata: {}
114
+ post_install_message:
115
+ rdoc_options: []
116
+ require_paths:
117
+ - lib
118
+ required_ruby_version: !ruby/object:Gem::Requirement
119
+ requirements:
120
+ - - ">="
121
+ - !ruby/object:Gem::Version
122
+ version: '0'
123
+ required_rubygems_version: !ruby/object:Gem::Requirement
124
+ requirements:
125
+ - - ">="
126
+ - !ruby/object:Gem::Version
127
+ version: '0'
128
+ requirements:
129
+ - phantomjs, v1.6 or greater
130
+ rubyforge_project:
131
+ rubygems_version: 2.2.2
132
+ signing_key:
133
+ specification_version: 4
134
+ summary: a phantomjs based pdf renderer
135
+ test_files:
136
+ - spec/shrimp/middleware_spec.rb
137
+ - spec/shrimp/phantom_spec.rb
138
+ - spec/shrimp/source_spec.rb
139
+ - spec/shrimp/test_file.html
140
+ - spec/spec_helper.rb