forkify 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Manifest.txt +3 -1
- data/Rakefile +1 -1
- data/TODO.txt +2 -0
- data/examples/d.rb +31 -0
- data/lib/forkify.rb +162 -5
- data/spec/forkify_spec.rb +54 -0
- metadata +50 -13
- data/test/test_forkify.rb +0 -31
data/Manifest.txt
CHANGED
data/Rakefile
CHANGED
data/TODO.txt
ADDED
data/examples/d.rb
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# vim: set ts=2 sw=2 filetype=Ruby
|
3
|
+
#
|
4
|
+
# This example shows how pool forking can be faster than serial forking in
|
5
|
+
# some cases.
|
6
|
+
|
7
|
+
require 'forkify'
|
8
|
+
|
9
|
+
#FORKIFY_DEBUG = true
|
10
|
+
|
11
|
+
puts "Forkifying with a pool..."
|
12
|
+
pool_start_time = Time.now
|
13
|
+
pool_result = [1, 2, 3, 4, 5].forkify(:procs => 3, :method => :pool) { |n| puts "#{$$} sleeping for #{n}"; sleep(n); n }
|
14
|
+
#puts "..."
|
15
|
+
pool_result = [1, 2, 3, 4, 5].forkify(:procs => 3, :method => :pool) { |n| puts "#{$$} sleeping for #{n}"; sleep(n); n }
|
16
|
+
#pool_result = [1, 1, 1, 1, 5, 1, 3, 2].forkify(:procs => 5, :method => :pool) { |n| puts "#{$$} sleeping for #{n}"; sleep(n); n }
|
17
|
+
pool_stop_time = Time.now
|
18
|
+
|
19
|
+
#puts "Forkifying serially..."
|
20
|
+
serial_start_time = Time.now
|
21
|
+
#serial_result = [1, 1, 1, 1, 5, 1, 3, 2].forkify(:procs => 5, :method => :serial) { |n| puts "#{$$} sleeping for #{n}"; sleep(n); n }
|
22
|
+
serial_stop_time = Time.now
|
23
|
+
|
24
|
+
pool_time = pool_stop_time - pool_start_time
|
25
|
+
serial_time = serial_stop_time - serial_start_time
|
26
|
+
|
27
|
+
puts "Time with pool forking #{pool_time} seconds."
|
28
|
+
puts "Time with serial forking #{serial_time} seconds."
|
29
|
+
|
30
|
+
#puts "#{pool_result.inspect} - #{serial_result.inspect}"
|
31
|
+
|
data/lib/forkify.rb
CHANGED
@@ -1,5 +1,8 @@
|
|
1
1
|
FORKIFY_DEBUG = false
|
2
|
-
|
2
|
+
|
3
|
+
require 'pp'
|
4
|
+
require 'rinda/tuplespace'
|
5
|
+
require 'timeout'
|
3
6
|
|
4
7
|
module Enumerable
|
5
8
|
|
@@ -19,7 +22,161 @@ module Enumerable
|
|
19
22
|
#
|
20
23
|
# 10.times.forkify(10) { sleep(1) } => [1, 1, 1, 1, 1, 1, 1, 1, 1, 1] (runs for less than 2 seconds)
|
21
24
|
#
|
22
|
-
def forkify
|
25
|
+
def forkify(opts = {}, &block)
|
26
|
+
puts opts.inspect if FORKIFY_DEBUG
|
27
|
+
|
28
|
+
if opts.class == Fixnum # it's the number of processes
|
29
|
+
procs = opts
|
30
|
+
method = :serial
|
31
|
+
elsif opts.class == Hash
|
32
|
+
procs = opts[:procs] || 5
|
33
|
+
method = opts[:method] || :serial
|
34
|
+
end
|
35
|
+
|
36
|
+
puts "procs: #{procs}, method: #{method.inspect}" if FORKIFY_DEBUG
|
37
|
+
|
38
|
+
if method == :serial
|
39
|
+
forkify_serial(procs, &block)
|
40
|
+
elsif method == :pool
|
41
|
+
if RUBY_VERSION < "1.9.1"
|
42
|
+
raise "Pool forking is only supported on Ruby 1.9.1+"
|
43
|
+
end
|
44
|
+
forkify_pool(procs, &block)
|
45
|
+
else
|
46
|
+
raise "I don't know that method of forking: #{method}"
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
private # should I keep these private? not sure.
|
51
|
+
|
52
|
+
def forkify_pool procs = 5, &block
|
53
|
+
puts "Forkify Class: #{self.class}" if FORKIFY_DEBUG
|
54
|
+
if self === Array
|
55
|
+
items = self
|
56
|
+
else
|
57
|
+
begin
|
58
|
+
items = self.to_a
|
59
|
+
rescue NoMethodError => e
|
60
|
+
raise NoMethodError, "Unable to coerce #{self.inspect} to an Array type."
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
result_tuples = []
|
65
|
+
results = []
|
66
|
+
pids = []
|
67
|
+
items_remaining = items.size
|
68
|
+
|
69
|
+
num_procs = procs
|
70
|
+
num_procs = items_remaining if items_remaining < procs
|
71
|
+
|
72
|
+
num_procs.times do
|
73
|
+
|
74
|
+
pid = fork
|
75
|
+
unless pid
|
76
|
+
|
77
|
+
DRb.start_service
|
78
|
+
|
79
|
+
ts = Rinda::TupleSpaceProxy.new(DRbObject.new_with_uri('druby://127.0.0.1:53421'))
|
80
|
+
|
81
|
+
conn_attempts = 10
|
82
|
+
done_work = false
|
83
|
+
|
84
|
+
loop do
|
85
|
+
|
86
|
+
# break if no more items in the queue
|
87
|
+
break if done_work and ts.read_all([:enum, nil, nil]).empty?
|
88
|
+
|
89
|
+
puts "#{$$} Taking..." if FORKIFY_DEBUG
|
90
|
+
|
91
|
+
begin
|
92
|
+
item = ts.take([:enum, nil, nil])
|
93
|
+
rescue DRb::DRbConnError
|
94
|
+
conn_attempts -= 1
|
95
|
+
sleep(0.2)
|
96
|
+
retry if conn_attempts > 0
|
97
|
+
exit(-1)
|
98
|
+
end
|
99
|
+
pp "Got => #{item}" if FORKIFY_DEBUG
|
100
|
+
|
101
|
+
# our termination tuple
|
102
|
+
result =
|
103
|
+
begin
|
104
|
+
block.call(item[2])
|
105
|
+
rescue Object => e
|
106
|
+
e
|
107
|
+
end
|
108
|
+
|
109
|
+
# return result
|
110
|
+
puts "writing result: #{result.inspect}" if FORKIFY_DEBUG
|
111
|
+
ts.write([:result, item[1], result])
|
112
|
+
done_work ||= true
|
113
|
+
|
114
|
+
end
|
115
|
+
DRb.stop_service
|
116
|
+
|
117
|
+
puts "child #{$$} dying" if FORKIFY_DEBUG
|
118
|
+
exit!
|
119
|
+
end
|
120
|
+
|
121
|
+
pids << pid
|
122
|
+
end
|
123
|
+
|
124
|
+
pts = Rinda::TupleSpace.new
|
125
|
+
|
126
|
+
# write termination tuples
|
127
|
+
#num_procs.times do
|
128
|
+
#puts "pushing terminator" if FORKIFY_DEBUG
|
129
|
+
#pts.write([:enum, -1, nil])
|
130
|
+
#end
|
131
|
+
|
132
|
+
items.each_with_index { |item, index|
|
133
|
+
puts "pushing data" if FORKIFY_DEBUG
|
134
|
+
pts.write([:enum, index, item])
|
135
|
+
}
|
136
|
+
|
137
|
+
provider = nil
|
138
|
+
conn_attempts = 100
|
139
|
+
loop do
|
140
|
+
begin
|
141
|
+
provider = DRb.start_service('druby://127.0.0.1:53421', pts)
|
142
|
+
rescue Exception => e
|
143
|
+
conn_attempts -= 1
|
144
|
+
#print "."
|
145
|
+
retry if conn_attempts > 0
|
146
|
+
raise "bleh, I couldn't start DRb"
|
147
|
+
else
|
148
|
+
break
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
pp "Waiting for pids: #{pids.inspect}" if FORKIFY_DEBUG
|
153
|
+
pids.reverse.each { |p|
|
154
|
+
puts "Waiting for #{p}" if FORKIFY_DEBUG
|
155
|
+
Process.waitpid(p)
|
156
|
+
}
|
157
|
+
|
158
|
+
# Grab results
|
159
|
+
items.size.times do
|
160
|
+
puts "grabbing a result..." if FORKIFY_DEBUG
|
161
|
+
result_tuples << pts.take([:result, nil, nil])
|
162
|
+
end
|
163
|
+
|
164
|
+
provider.stop_service
|
165
|
+
# wait for death
|
166
|
+
while provider.alive? do
|
167
|
+
#print ":"
|
168
|
+
end
|
169
|
+
|
170
|
+
# gather results and sort them
|
171
|
+
result_tuples.map { |t|
|
172
|
+
puts "results[#{t[1]}] = #{t[2]}" if FORKIFY_DEBUG
|
173
|
+
results[t[1]] = t[2]
|
174
|
+
}
|
175
|
+
|
176
|
+
return results
|
177
|
+
end
|
178
|
+
|
179
|
+
def forkify_serial procs = 5, &block
|
23
180
|
puts "Forkify Class: #{self.class}" if FORKIFY_DEBUG
|
24
181
|
if self === Array
|
25
182
|
items = self
|
@@ -77,7 +234,7 @@ module Enumerable
|
|
77
234
|
#datawaiting_pipes = Kernel.select(rpipes, wpipes, nil, 2)
|
78
235
|
#readwaiting_pipes = datawaiting_pipes[0]
|
79
236
|
#writewaiting_pipes = datawaiting_pipes[1]
|
80
|
-
|
237
|
+
|
81
238
|
# Switch to 2 selects instead of 1
|
82
239
|
#readwaiting_pipes = Kernel.select(rpipes, nil, nil, 2)[0]
|
83
240
|
#writewaiting_pipes = Kernel.select(nil, wpipes, nil, 2)[1]
|
@@ -88,8 +245,8 @@ module Enumerable
|
|
88
245
|
r = rpipes[i]
|
89
246
|
w = wpipes[i]
|
90
247
|
|
91
|
-
pp "read: #{
|
92
|
-
pp "write: #{
|
248
|
+
pp "read: #{r}" if FORKIFY_DEBUG
|
249
|
+
pp "write: #{w}" if FORKIFY_DEBUG
|
93
250
|
|
94
251
|
w.close
|
95
252
|
data = ''
|
@@ -0,0 +1,54 @@
|
|
1
|
+
$: << File.dirname(__FILE__) + "/../lib"
|
2
|
+
|
3
|
+
require 'spec'
|
4
|
+
require 'forkify'
|
5
|
+
|
6
|
+
describe 'forkify' do
|
7
|
+
it 'should fork serially and take less time' do
|
8
|
+
time1 = Time.now
|
9
|
+
r = [1, 2, 3].forkify(3) { |n| sleep(1) }
|
10
|
+
time2 = Time.now
|
11
|
+
# Assert that it took less than 3 seconds
|
12
|
+
(time2 - time1).should < 3
|
13
|
+
end
|
14
|
+
|
15
|
+
it 'should fork with a pool and take less time' do
|
16
|
+
time1 = Time.now
|
17
|
+
r = [1, 2, 3].forkify(:procs => 3, :method => :pool) { |n| sleep(1) }
|
18
|
+
time2 = Time.now
|
19
|
+
# Assert that it took less than 3 seconds
|
20
|
+
(time2 - time1).should < 3
|
21
|
+
end
|
22
|
+
|
23
|
+
it 'should return an array of results from a serial fork' do
|
24
|
+
[1, 2, 3].forkify { |n| n * 2 }.should == [2, 4, 6]
|
25
|
+
end
|
26
|
+
|
27
|
+
it 'should return an array of results from a pool fork' do
|
28
|
+
[1, 2, 3].forkify(:method => :pool) { |n| n * 2 }.should == [2, 4, 6]
|
29
|
+
end
|
30
|
+
|
31
|
+
it 'should return a hash of results from a serial fork' do
|
32
|
+
r = {:a => 1, :b => 2, :c => 3}.forkify { |k, v| [k, v*2] }
|
33
|
+
r.should include([:a, 2])
|
34
|
+
r.should include([:b, 4])
|
35
|
+
r.should include([:c, 6])
|
36
|
+
r.size.should == 3
|
37
|
+
end
|
38
|
+
|
39
|
+
it 'should return a hash of results from a pool fork' do
|
40
|
+
r = {:a => 1, :b => 2, :c => 3}.forkify(:method => :pool) { |k, v| [k, v*2] }
|
41
|
+
r.should include([:a, 2])
|
42
|
+
r.should include([:b, 4])
|
43
|
+
r.should include([:c, 6])
|
44
|
+
r.size.should == 3
|
45
|
+
end
|
46
|
+
|
47
|
+
it 'should return an array of nils from a serial nil fork' do
|
48
|
+
[nil, nil].forkify { |n| n }.should == [nil, nil]
|
49
|
+
end
|
50
|
+
|
51
|
+
it 'should return an array of nils from a pool nil fork' do
|
52
|
+
[nil, nil].forkify(:method => :pool) { |n| n }.should == [nil, nil]
|
53
|
+
end
|
54
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: forkify
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
|
4
|
+
hash: 25
|
5
|
+
prerelease: false
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 0
|
9
|
+
- 3
|
10
|
+
version: 0.0.3
|
5
11
|
platform: ruby
|
6
12
|
authors:
|
7
13
|
- Lee Hinman
|
@@ -9,19 +15,41 @@ autorequire:
|
|
9
15
|
bindir: bin
|
10
16
|
cert_chain: []
|
11
17
|
|
12
|
-
date:
|
18
|
+
date: 2010-07-07 00:00:00 -06:00
|
13
19
|
default_executable:
|
14
20
|
dependencies:
|
15
21
|
- !ruby/object:Gem::Dependency
|
16
|
-
name:
|
22
|
+
name: rubyforge
|
23
|
+
prerelease: false
|
24
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ">="
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
hash: 7
|
30
|
+
segments:
|
31
|
+
- 2
|
32
|
+
- 0
|
33
|
+
- 4
|
34
|
+
version: 2.0.4
|
17
35
|
type: :development
|
18
|
-
|
19
|
-
|
36
|
+
version_requirements: *id001
|
37
|
+
- !ruby/object:Gem::Dependency
|
38
|
+
name: hoe
|
39
|
+
prerelease: false
|
40
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
20
42
|
requirements:
|
21
43
|
- - ">="
|
22
44
|
- !ruby/object:Gem::Version
|
23
|
-
|
24
|
-
|
45
|
+
hash: 21
|
46
|
+
segments:
|
47
|
+
- 2
|
48
|
+
- 6
|
49
|
+
- 1
|
50
|
+
version: 2.6.1
|
51
|
+
type: :development
|
52
|
+
version_requirements: *id002
|
25
53
|
description: |-
|
26
54
|
forkify.rb makes it easy to process a bunch of data using 'n'
|
27
55
|
worker processes. It is based off of forkoff and threadify by Ara Howard.
|
@@ -36,16 +64,19 @@ extra_rdoc_files:
|
|
36
64
|
- History.txt
|
37
65
|
- Manifest.txt
|
38
66
|
- README.txt
|
67
|
+
- TODO.txt
|
39
68
|
files:
|
40
69
|
- History.txt
|
41
70
|
- Manifest.txt
|
42
71
|
- README.txt
|
72
|
+
- TODO.txt
|
43
73
|
- Rakefile
|
44
74
|
- lib/forkify.rb
|
45
|
-
-
|
75
|
+
- spec/forkify_spec.rb
|
46
76
|
- examples/a.rb
|
47
77
|
- examples/b.rb
|
48
78
|
- examples/c.rb
|
79
|
+
- examples/d.rb
|
49
80
|
has_rdoc: true
|
50
81
|
homepage: http://github.com/dakrone/forkify
|
51
82
|
licenses: []
|
@@ -57,23 +88,29 @@ rdoc_options:
|
|
57
88
|
require_paths:
|
58
89
|
- lib
|
59
90
|
required_ruby_version: !ruby/object:Gem::Requirement
|
91
|
+
none: false
|
60
92
|
requirements:
|
61
93
|
- - ">="
|
62
94
|
- !ruby/object:Gem::Version
|
95
|
+
hash: 3
|
96
|
+
segments:
|
97
|
+
- 0
|
63
98
|
version: "0"
|
64
|
-
version:
|
65
99
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
100
|
+
none: false
|
66
101
|
requirements:
|
67
102
|
- - ">="
|
68
103
|
- !ruby/object:Gem::Version
|
104
|
+
hash: 3
|
105
|
+
segments:
|
106
|
+
- 0
|
69
107
|
version: "0"
|
70
|
-
version:
|
71
108
|
requirements: []
|
72
109
|
|
73
110
|
rubyforge_project: forkify
|
74
|
-
rubygems_version: 1.3.
|
111
|
+
rubygems_version: 1.3.7
|
75
112
|
signing_key:
|
76
113
|
specification_version: 3
|
77
114
|
summary: forkify.rb makes it easy to process a bunch of data using 'n' worker processes
|
78
|
-
test_files:
|
79
|
-
|
115
|
+
test_files: []
|
116
|
+
|
data/test/test_forkify.rb
DELETED
@@ -1,31 +0,0 @@
|
|
1
|
-
require "testy"
|
2
|
-
require "forkify"
|
3
|
-
|
4
|
-
Testy.testing 'forkify' do
|
5
|
-
test 'timings' do |t|
|
6
|
-
time1 = Time.now
|
7
|
-
r = [1, 2, 3].forkify(3) { |n| sleep(1) }
|
8
|
-
time2 = Time.now
|
9
|
-
# Assert that it took less than 3 seconds
|
10
|
-
less_than_3 = ((time2 - time1) < 3)
|
11
|
-
t.check :timing, :expect => true, :actual => less_than_3
|
12
|
-
end
|
13
|
-
|
14
|
-
test 'array results' do |t|
|
15
|
-
r = [1, 2, 3].forkify { |n| n * 2 }
|
16
|
-
t.check :array_results, :expect => [2, 4, 6], :actual => r
|
17
|
-
end
|
18
|
-
|
19
|
-
test 'hash results' do |t|
|
20
|
-
r = {:a => 1, :b => 2, :c => 3}.forkify { |k, v| [k, v*2] }
|
21
|
-
t.check :hash_contains_a, :expect => true, :actual => r.include?([:a, 2])
|
22
|
-
t.check :hash_contains_b, :expect => true, :actual => r.include?([:b, 4])
|
23
|
-
t.check :hash_contains_c, :expect => true, :actual => r.include?([:c, 6])
|
24
|
-
t.check :hash_length, :expect => 3, :actual => r.size
|
25
|
-
end
|
26
|
-
|
27
|
-
test 'array of nils' do |t|
|
28
|
-
r = [nil, nil].forkify { |n| n }
|
29
|
-
t.check :nil_array, :expect => [nil, nil], :actual => r
|
30
|
-
end
|
31
|
-
end
|