shrimple 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,108 @@
1
+ # Adds a pleasant API on top of Shrimple::Process
2
+
3
+ require 'shrimple/process'
4
+ require 'stringio'
5
+
6
+ class Shrimple
7
+ class PhantomError < StandardError; end
8
+ class TimedOut < StandardError; end
9
+
10
+ class Phantom < Process
11
+ attr_reader :options, :config
12
+
13
+ def initialize options
14
+ @options = options
15
+ @onSuccess = options.delete(:onSuccess)
16
+ @onError = options.delete(:onError)
17
+
18
+ # write the file required by phantom's --config option
19
+ if options[:config]
20
+ @config = Tempfile.new(File.basename(options[:output] || 'shrimple') + '.config')
21
+ @config.write(options[:config].to_json)
22
+ @config.close
23
+ end
24
+
25
+ # create the ios to supply input and read output
26
+ @stdin = new_io(options[:stdin] || StringIO.new(options.to_json))
27
+ @stdout = new_io(options[:output], 'wb')
28
+ @stderr = new_io(options[:stderr], 'wt')
29
+
30
+ if options[:debug]
31
+ # hm, should this be replaced with methods? or maybe a superclass?
32
+ $stderr.puts "COMMAND: #{command_line}"
33
+ $stderr.puts "STDIN: #{options.to_json}"
34
+ end
35
+
36
+ super(command_line, @stdin, @stdout, @stderr, options[:timeout])
37
+ end
38
+
39
+ # blocks until the PhantomJS process is finished. raises an exception if it failed.
40
+ def wait
41
+ stop
42
+ unless @child.value.success?
43
+ raise Shrimple::TimedOut.new if timed_out?
44
+ raise Shrimple::PhantomError.new("PhantomJS returned #{@child.value.exitstatus}: #{stderr}")
45
+ end
46
+ end
47
+
48
+ def stdout
49
+ read_io @stdout
50
+ end
51
+
52
+ def stderr
53
+ read_io @stderr
54
+ end
55
+
56
+
57
+ # cleans up after the process. synchronized so it's guaranteed to only be called once.
58
+ # process is removed from the process table after this call returns
59
+ def _cleanup
60
+ super
61
+
62
+ proc = (success? ? @onSuccess : @onError)
63
+ proc.call(self) if proc
64
+
65
+ @config.unlink if @config
66
+ end
67
+
68
+
69
+ private
70
+ def command_line
71
+ if @options[:executable].nil?
72
+ raise "PhantomJS not found. Specify its executable with 'executable' option."
73
+ end
74
+ if @options[:executable].kind_of? Array
75
+ # if executable is an array then we assume it contains all necessary args (so :renderer is ignored)
76
+ command = @options[:executable]
77
+ else
78
+ command = [@options[:executable]]
79
+ command << "--config=#{@config.path}" if @config
80
+ command << @options[:renderer]
81
+ end
82
+ end
83
+
84
+ # pass a filepath, an IO object or equivlanet, or nil to create an empty StringIO ready for data.
85
+ def new_io name, *opt
86
+ if name
87
+ if name.kind_of? String
88
+ return File.open(name, *opt)
89
+ else
90
+ name
91
+ end
92
+ else
93
+ StringIO.new
94
+ end
95
+ end
96
+
97
+ def read_io io
98
+ if io.kind_of?(StringIO)
99
+ # can't rewind because then writes go to wrong place
100
+ io.string
101
+ else
102
+ io.rewind
103
+ io.read
104
+ end
105
+ end
106
+ end
107
+ end
108
+
@@ -0,0 +1,131 @@
1
+ # Fires off a child process, feeds it, and keeps track of the results.
2
+
3
+ require 'open3'
4
+ require 'json'
5
+ require 'tempfile'
6
+ require 'shrimple/process_monitor'
7
+
8
+
9
+ class Shrimple
10
+ class Process
11
+ attr_reader :start_time, :stop_time # start and finish times of Phantom process
12
+
13
+ # runs cmd, passes instr on its stdin, and fills outio and
14
+ # errio with the command's output.
15
+ def initialize cmd, inio, outio, errio, timeout=nil
16
+ @start_time = Time.now
17
+ @chin, @chout, @cherr, @child = Open3.popen3(*cmd)
18
+
19
+ Shrimple.processes._add(self)
20
+ @chout.binmode
21
+
22
+ @killed = false
23
+ @timed_out = false
24
+
25
+ @thrin = Thread.new { drain(inio, @chin) }
26
+ @throut = Thread.new { drain(@chout, outio) }
27
+ @threrr = Thread.new { drain(@cherr, errio) }
28
+
29
+ # ensure cleanup is called when the child exits. (strange it requires a whole new thread...?)
30
+ @thrchild = Thread.new {
31
+ if timeout
32
+ outatime unless @child.join(timeout)
33
+ else
34
+ @child.join
35
+ end
36
+ stop
37
+ }
38
+ end
39
+
40
+
41
+ def finished?
42
+ @stop_time != nil
43
+ end
44
+
45
+ # returns false if the process hasn't finished yet
46
+ def success?
47
+ finished? && @child.value.success? ? true : false
48
+ end
49
+
50
+ def killed?
51
+ @killed
52
+ end
53
+
54
+ def timed_out?
55
+ @timed_out
56
+ end
57
+
58
+ # kill-o-zaps the phantom process now (using -9 if needed), then waits until it's truly gone
59
+ def kill seconds_until_panic=2
60
+ @killed = true
61
+ if @child.alive?
62
+ # rescue because process might have died between previous line and this one
63
+ ::Process.kill("TERM", @child.pid) rescue Errno::ESRCH
64
+ end
65
+ if !@child.join(seconds_until_panic)
66
+ ::Process.kill("KILL", @child.pid) if @child.alive?
67
+ end
68
+ # ensure kill doesn't return until process is truly gone
69
+ # (there may be a chance of this deadlocking with a blocking callback... not sure)
70
+ @thrchild.join unless Thread.current == @thrchild
71
+ end
72
+
73
+ # waits patiently until phantom process terminates, then cleans up
74
+ def stop
75
+ wait_for_the_end # do all our waiting outside the sync loop
76
+ Shrimple.processes._remove(self) do
77
+ _cleanup
78
+ end
79
+ end
80
+
81
+
82
+ # only meant to be used by the ProcessMonitor
83
+ def _child_thread
84
+ @child
85
+ end
86
+
87
+ # may only be called once, synchronized by stop()
88
+ def _cleanup
89
+ raise "Someone else already stopped this process??!!" if @stop_time
90
+ @stop_time = Time.now
91
+ end
92
+
93
+ # returns true if process was previously active. must be externally synchronized.
94
+ def _deactivate
95
+ retval = @inactive
96
+ @inactive = true
97
+ return !retval
98
+ end
99
+
100
+
101
+ private
102
+ def wait_for_the_end
103
+ [@thrin, @throut, @threrr, @child].each(&:join)
104
+ @thrchild.join unless Thread.current == @thrchild
105
+ end
106
+
107
+ def outatime
108
+ @timed_out = true
109
+ kill
110
+ end
111
+
112
+ # reads every last drop, then closes both files. must be threadsafe.
113
+ def drain reader, writer
114
+ begin
115
+ # randomly chosen buffer size
116
+ loop { writer.write(reader.readpartial(256*1024)) }
117
+ rescue EOFError
118
+ # not an error
119
+ # puts "EOF STDOUT" if reader == @chout
120
+ # puts "EOF STDERR" if reader == @cherr
121
+ # puts "EOF STDIN #{reader}" if writer == @chin
122
+ rescue Errno::EPIPE
123
+ # child was killed, no problem
124
+ ensure
125
+ reader.close
126
+ writer.close rescue Errno::EPIPE
127
+ end
128
+ end
129
+
130
+ end
131
+ end
@@ -0,0 +1,84 @@
1
+ # keeps track of running Shrimple processes
2
+
3
+ require 'thwait'
4
+
5
+
6
+ class Shrimple
7
+ class TooManyProcessesError < StandardError; end
8
+
9
+ class ProcessMonitor
10
+ attr_accessor :max_processes
11
+
12
+ # pass 0 to disable max_processes
13
+ def initialize(max_processes=20)
14
+ @mutex ||= Mutex.new
15
+ @processes ||= [] # TODO: convert this to a hash by child thread?
16
+ @max_processes = max_processes
17
+ end
18
+
19
+ def first
20
+ @mutex.synchronize do
21
+ @processes.first
22
+ end
23
+ end
24
+
25
+ def count
26
+ @mutex.synchronize do
27
+ @processes.count
28
+ end
29
+ end
30
+
31
+ def kill_all
32
+ while f = first
33
+ f.kill
34
+ end
35
+ end
36
+
37
+ # blocks until any child process returns (unless nonblock is true)
38
+ # raises an exception if no processes are running, or if called nonblocking
39
+ # and no processes have finished (see ThreadsWait#next_wait for details).
40
+ def wait_next nonblock=nil
41
+ # we wait on child threads since calling waitpid would produce a race condition.
42
+
43
+ threads = {}
44
+ @processes.each { |p|
45
+ threads[p._child_thread] = p
46
+ }
47
+
48
+ thread = ThreadsWait.new(threads.keys).next_wait(nonblock)
49
+ process = threads[thread]
50
+ process.stop # otherwise process will be in an indeterminite state
51
+ process
52
+ end
53
+
54
+
55
+ def _add process
56
+ @mutex.synchronize do
57
+ if @max_processes >= 0 && @processes.count >= @max_processes
58
+ raise Shrimple::TooManyProcessesError.new("launched process #{@processes.count+1} of #{@max_processes} maximum")
59
+ end
60
+ @processes.push process
61
+ end
62
+ end
63
+
64
+ # removes process from process table. pass a block that cleans up after the process.
65
+ # _remove may be called lots of times but block will only be called once
66
+ def _remove process
67
+ cleanup = false
68
+
69
+ @mutex.synchronize do
70
+ cleanup = process._deactivate
71
+ raise "process not in process table??" if cleanup && !@processes.include?(process)
72
+ end
73
+
74
+ # don't want to hold mutex when calling callback because it might block
75
+ if cleanup
76
+ yield
77
+ @mutex.synchronize do
78
+ value = @processes.delete(process)
79
+ raise "someone else deleted process??" unless value
80
+ end
81
+ end
82
+ end
83
+ end
84
+ end
data/shrimple.gemspec ADDED
@@ -0,0 +1,21 @@
1
+ Gem::Specification.new do |s|
2
+ s.name = 'shrimple'
3
+ s.version = '0.8.0'
4
+ s.authors = ['Scott Bronson']
5
+ s.email = ['brons_shrimple@rinspin.com']
6
+ s.homepage = 'http://github.com/bronson/shrimple'
7
+ s.summary = 'A simple Ruby interface to PhantomJS'
8
+ s.description = 'Use PhantomJS to generate PDFs, PNGs, text files, etc.'
9
+ s.license = 'MIT'
10
+
11
+ s.files = `git ls-files -z`.split("\x0")
12
+ s.executables = s.files.grep(%r{^bin/}) { |f| File.basename(f) }
13
+ s.test_files = s.files.grep(%r{^(test|spec|features)/})
14
+ s.require_paths = ['lib']
15
+
16
+ s.add_runtime_dependency 'hashie'
17
+
18
+ s.add_development_dependency 'rake'
19
+ s.add_development_dependency 'rspec'
20
+ s.add_development_dependency 'dimensions'
21
+ end
@@ -0,0 +1,10 @@
1
+ // ensures config correctly arrives via stdin
2
+ //
3
+ // reads json from stdin, adds "processed: true", and writes it to stdout.
4
+
5
+ var system = require('system')
6
+
7
+ config = JSON.parse(system.stdin.read())
8
+ config.processed = true
9
+ console.log(JSON.stringify(config))
10
+ phantom.exit(0)
@@ -0,0 +1,93 @@
1
+
2
+ require 'spec_helper'
3
+
4
+ describe Shrimple::Phantom do
5
+ it "doesn't create a config file if no options are set" do
6
+ s = Shrimple.new(executable: ['sleep', '1'], background: true)
7
+
8
+ phantom = s.render('/dev/null')
9
+ expect(phantom.config).to eq nil
10
+ phantom.kill
11
+
12
+ expect(phantom.config).to eq nil
13
+ expect(phantom.stdout).to eq ""
14
+ expect(phantom.stderr).to eq ""
15
+ end
16
+
17
+ it "creates a config file when there are config options and cleans on kill" do
18
+ s = Shrimple.new(executable: ['sleep', '1'], background: true)
19
+ s.config.ignoreSslErrors = true
20
+
21
+ phantom = s.render('infile')
22
+ expect(phantom.config).to be_a Tempfile
23
+ path = phantom.config.path
24
+ expect(File).to exist(path)
25
+ config = File.read(path)
26
+ phantom.kill
27
+
28
+ expect(File).not_to exist(path)
29
+ expect(JSON.parse(config)).to eq ({'ignoreSslErrors' => true})
30
+ expect(phantom.stdout).to eq ""
31
+ expect(phantom.stdout).to eq ""
32
+ expect(phantom.stderr).to eq ""
33
+ end
34
+
35
+ it "cleans up the config file when exiting normally" do
36
+ s = Shrimple.new(executable: ['/bin/cat'], background: true)
37
+ s.config.ignoreSslErrors = true
38
+
39
+ rd,wr = IO.pipe
40
+ phantom = s.render(stdin: rd)
41
+
42
+ expect(phantom.config).to be_a Tempfile
43
+ path = phantom.config.path
44
+ expect(File).to exist(path)
45
+ wr.write("done.\n")
46
+ wr.close
47
+ phantom.stop
48
+
49
+ expect(File).not_to exist(path)
50
+ expect(phantom.stdout).to eq "done.\n"
51
+ end
52
+
53
+ it "times out when running in the foreground" do
54
+ s = Shrimple.new(executable: ['sleep', '10'], timeout: 0)
55
+ expect {
56
+ phantom = s.render('/dev/null')
57
+ }.to raise_exception(Shrimple::TimedOut)
58
+ end
59
+
60
+ it "times out when running in the background" do
61
+ s = Shrimple.new(executable: ['sleep', '10'], background: true, timeout: 0)
62
+ phantom = s.render('/dev/null')
63
+ Shrimple.processes.wait_next
64
+ expect(phantom.timed_out?).to eq true
65
+ expect(phantom.killed?).to eq true
66
+ expect(phantom.success?).to eq false
67
+ end
68
+
69
+ it "can call multiple callbacks from the same renderer" do
70
+ success = 0
71
+ failure = 0
72
+ s = Shrimple.new(executable: ['cat'])
73
+ s.onSuccess = Proc.new { |result| success += 1 }
74
+ s.onError = Proc.new { |result| failure += 1 }
75
+ s.render('/dev/null')
76
+ s.render('/dev/null')
77
+ s.render('/dev/null')
78
+ s.render('/dev/null')
79
+ expect(success).to eq 4
80
+ expect(failure).to eq 0
81
+ end
82
+
83
+ it "can read partial string contents while writing" do
84
+ # ensure writes still go on the end of the buffer after reading
85
+ # pending
86
+ end
87
+
88
+ it "can read partial file contents while writing" do
89
+ # ensure writes still go on the end of the buffer after reading
90
+ # pending
91
+ end
92
+ end
93
+