forkify 0.0.2 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,9 +1,11 @@
1
1
  History.txt
2
2
  Manifest.txt
3
3
  README.txt
4
+ TODO.txt
4
5
  Rakefile
5
6
  lib/forkify.rb
6
- test/test_forkify.rb
7
+ spec/forkify_spec.rb
7
8
  examples/a.rb
8
9
  examples/b.rb
9
10
  examples/c.rb
11
+ examples/d.rb
data/Rakefile CHANGED
@@ -4,7 +4,7 @@ require 'rubygems'
4
4
  require 'hoe'
5
5
  require './lib/forkify.rb'
6
6
 
7
- FORKIFY_VERSION = "0.0.2"
7
+ FORKIFY_VERSION = "0.0.3"
8
8
 
9
9
  Hoe.spec('forkify') do
10
10
  version = FORKIFY_VERSION
@@ -0,0 +1,2 @@
1
+ - Fix the race condition/deadlock bug that occurs when a pool forkify is run IMMEDIATELY after a previous one
2
+ - Have the pool method pick a random port that's not in use instead of a hardcoded one
@@ -0,0 +1,31 @@
1
+ #!/usr/bin/env ruby
2
+ # vim: set ts=2 sw=2 filetype=Ruby
3
+ #
4
+ # This example shows how pool forking can be faster than serial forking in
5
+ # some cases.
6
+
7
+ require 'forkify'
8
+
9
+ #FORKIFY_DEBUG = true
10
+
11
+ puts "Forkifying with a pool..."
12
+ pool_start_time = Time.now
13
+ pool_result = [1, 2, 3, 4, 5].forkify(:procs => 3, :method => :pool) { |n| puts "#{$$} sleeping for #{n}"; sleep(n); n }
14
+ #puts "..."
15
+ pool_result = [1, 2, 3, 4, 5].forkify(:procs => 3, :method => :pool) { |n| puts "#{$$} sleeping for #{n}"; sleep(n); n }
16
+ #pool_result = [1, 1, 1, 1, 5, 1, 3, 2].forkify(:procs => 5, :method => :pool) { |n| puts "#{$$} sleeping for #{n}"; sleep(n); n }
17
+ pool_stop_time = Time.now
18
+
19
+ #puts "Forkifying serially..."
20
+ serial_start_time = Time.now
21
+ #serial_result = [1, 1, 1, 1, 5, 1, 3, 2].forkify(:procs => 5, :method => :serial) { |n| puts "#{$$} sleeping for #{n}"; sleep(n); n }
22
+ serial_stop_time = Time.now
23
+
24
+ pool_time = pool_stop_time - pool_start_time
25
+ serial_time = serial_stop_time - serial_start_time
26
+
27
+ puts "Time with pool forking #{pool_time} seconds."
28
+ puts "Time with serial forking #{serial_time} seconds."
29
+
30
+ #puts "#{pool_result.inspect} - #{serial_result.inspect}"
31
+
@@ -1,5 +1,8 @@
1
1
  FORKIFY_DEBUG = false
2
- require 'pp' if FORKIFY_DEBUG
2
+
3
+ require 'pp'
4
+ require 'rinda/tuplespace'
5
+ require 'timeout'
3
6
 
4
7
  module Enumerable
5
8
 
@@ -19,7 +22,161 @@ module Enumerable
19
22
  #
20
23
  # 10.times.forkify(10) { sleep(1) } => [1, 1, 1, 1, 1, 1, 1, 1, 1, 1] (runs for less than 2 seconds)
21
24
  #
22
- def forkify procs = 5, &block
25
+ def forkify(opts = {}, &block)
26
+ puts opts.inspect if FORKIFY_DEBUG
27
+
28
+ if opts.class == Fixnum # it's the number of processes
29
+ procs = opts
30
+ method = :serial
31
+ elsif opts.class == Hash
32
+ procs = opts[:procs] || 5
33
+ method = opts[:method] || :serial
34
+ end
35
+
36
+ puts "procs: #{procs}, method: #{method.inspect}" if FORKIFY_DEBUG
37
+
38
+ if method == :serial
39
+ forkify_serial(procs, &block)
40
+ elsif method == :pool
41
+ if RUBY_VERSION < "1.9.1"
42
+ raise "Pool forking is only supported on Ruby 1.9.1+"
43
+ end
44
+ forkify_pool(procs, &block)
45
+ else
46
+ raise "I don't know that method of forking: #{method}"
47
+ end
48
+ end
49
+
50
+ private # should I keep these private? not sure.
51
+
52
+ def forkify_pool procs = 5, &block
53
+ puts "Forkify Class: #{self.class}" if FORKIFY_DEBUG
54
+ if self === Array
55
+ items = self
56
+ else
57
+ begin
58
+ items = self.to_a
59
+ rescue NoMethodError => e
60
+ raise NoMethodError, "Unable to coerce #{self.inspect} to an Array type."
61
+ end
62
+ end
63
+
64
+ result_tuples = []
65
+ results = []
66
+ pids = []
67
+ items_remaining = items.size
68
+
69
+ num_procs = procs
70
+ num_procs = items_remaining if items_remaining < procs
71
+
72
+ num_procs.times do
73
+
74
+ pid = fork
75
+ unless pid
76
+
77
+ DRb.start_service
78
+
79
+ ts = Rinda::TupleSpaceProxy.new(DRbObject.new_with_uri('druby://127.0.0.1:53421'))
80
+
81
+ conn_attempts = 10
82
+ done_work = false
83
+
84
+ loop do
85
+
86
+ # break if no more items in the queue
87
+ break if done_work and ts.read_all([:enum, nil, nil]).empty?
88
+
89
+ puts "#{$$} Taking..." if FORKIFY_DEBUG
90
+
91
+ begin
92
+ item = ts.take([:enum, nil, nil])
93
+ rescue DRb::DRbConnError
94
+ conn_attempts -= 1
95
+ sleep(0.2)
96
+ retry if conn_attempts > 0
97
+ exit(-1)
98
+ end
99
+ pp "Got => #{item}" if FORKIFY_DEBUG
100
+
101
+ # our termination tuple
102
+ result =
103
+ begin
104
+ block.call(item[2])
105
+ rescue Object => e
106
+ e
107
+ end
108
+
109
+ # return result
110
+ puts "writing result: #{result.inspect}" if FORKIFY_DEBUG
111
+ ts.write([:result, item[1], result])
112
+ done_work ||= true
113
+
114
+ end
115
+ DRb.stop_service
116
+
117
+ puts "child #{$$} dying" if FORKIFY_DEBUG
118
+ exit!
119
+ end
120
+
121
+ pids << pid
122
+ end
123
+
124
+ pts = Rinda::TupleSpace.new
125
+
126
+ # write termination tuples
127
+ #num_procs.times do
128
+ #puts "pushing terminator" if FORKIFY_DEBUG
129
+ #pts.write([:enum, -1, nil])
130
+ #end
131
+
132
+ items.each_with_index { |item, index|
133
+ puts "pushing data" if FORKIFY_DEBUG
134
+ pts.write([:enum, index, item])
135
+ }
136
+
137
+ provider = nil
138
+ conn_attempts = 100
139
+ loop do
140
+ begin
141
+ provider = DRb.start_service('druby://127.0.0.1:53421', pts)
142
+ rescue Exception => e
143
+ conn_attempts -= 1
144
+ #print "."
145
+ retry if conn_attempts > 0
146
+ raise "bleh, I couldn't start DRb"
147
+ else
148
+ break
149
+ end
150
+ end
151
+
152
+ pp "Waiting for pids: #{pids.inspect}" if FORKIFY_DEBUG
153
+ pids.reverse.each { |p|
154
+ puts "Waiting for #{p}" if FORKIFY_DEBUG
155
+ Process.waitpid(p)
156
+ }
157
+
158
+ # Grab results
159
+ items.size.times do
160
+ puts "grabbing a result..." if FORKIFY_DEBUG
161
+ result_tuples << pts.take([:result, nil, nil])
162
+ end
163
+
164
+ provider.stop_service
165
+ # wait for death
166
+ while provider.alive? do
167
+ #print ":"
168
+ end
169
+
170
+ # gather results and sort them
171
+ result_tuples.map { |t|
172
+ puts "results[#{t[1]}] = #{t[2]}" if FORKIFY_DEBUG
173
+ results[t[1]] = t[2]
174
+ }
175
+
176
+ return results
177
+ end
178
+
179
+ def forkify_serial procs = 5, &block
23
180
  puts "Forkify Class: #{self.class}" if FORKIFY_DEBUG
24
181
  if self === Array
25
182
  items = self
@@ -77,7 +234,7 @@ module Enumerable
77
234
  #datawaiting_pipes = Kernel.select(rpipes, wpipes, nil, 2)
78
235
  #readwaiting_pipes = datawaiting_pipes[0]
79
236
  #writewaiting_pipes = datawaiting_pipes[1]
80
-
237
+
81
238
  # Switch to 2 selects instead of 1
82
239
  #readwaiting_pipes = Kernel.select(rpipes, nil, nil, 2)[0]
83
240
  #writewaiting_pipes = Kernel.select(nil, wpipes, nil, 2)[1]
@@ -88,8 +245,8 @@ module Enumerable
88
245
  r = rpipes[i]
89
246
  w = wpipes[i]
90
247
 
91
- pp "read: #{readwaiting_pipes}" if FORKIFY_DEBUG
92
- pp "write: #{writewaiting_pipes}" if FORKIFY_DEBUG
248
+ pp "read: #{r}" if FORKIFY_DEBUG
249
+ pp "write: #{w}" if FORKIFY_DEBUG
93
250
 
94
251
  w.close
95
252
  data = ''
@@ -0,0 +1,54 @@
1
+ $: << File.dirname(__FILE__) + "/../lib"
2
+
3
+ require 'spec'
4
+ require 'forkify'
5
+
6
+ describe 'forkify' do
7
+ it 'should fork serially and take less time' do
8
+ time1 = Time.now
9
+ r = [1, 2, 3].forkify(3) { |n| sleep(1) }
10
+ time2 = Time.now
11
+ # Assert that it took less than 3 seconds
12
+ (time2 - time1).should < 3
13
+ end
14
+
15
+ it 'should fork with a pool and take less time' do
16
+ time1 = Time.now
17
+ r = [1, 2, 3].forkify(:procs => 3, :method => :pool) { |n| sleep(1) }
18
+ time2 = Time.now
19
+ # Assert that it took less than 3 seconds
20
+ (time2 - time1).should < 3
21
+ end
22
+
23
+ it 'should return an array of results from a serial fork' do
24
+ [1, 2, 3].forkify { |n| n * 2 }.should == [2, 4, 6]
25
+ end
26
+
27
+ it 'should return an array of results from a pool fork' do
28
+ [1, 2, 3].forkify(:method => :pool) { |n| n * 2 }.should == [2, 4, 6]
29
+ end
30
+
31
+ it 'should return a hash of results from a serial fork' do
32
+ r = {:a => 1, :b => 2, :c => 3}.forkify { |k, v| [k, v*2] }
33
+ r.should include([:a, 2])
34
+ r.should include([:b, 4])
35
+ r.should include([:c, 6])
36
+ r.size.should == 3
37
+ end
38
+
39
+ it 'should return a hash of results from a pool fork' do
40
+ r = {:a => 1, :b => 2, :c => 3}.forkify(:method => :pool) { |k, v| [k, v*2] }
41
+ r.should include([:a, 2])
42
+ r.should include([:b, 4])
43
+ r.should include([:c, 6])
44
+ r.size.should == 3
45
+ end
46
+
47
+ it 'should return an array of nils from a serial nil fork' do
48
+ [nil, nil].forkify { |n| n }.should == [nil, nil]
49
+ end
50
+
51
+ it 'should return an array of nils from a pool nil fork' do
52
+ [nil, nil].forkify(:method => :pool) { |n| n }.should == [nil, nil]
53
+ end
54
+ end
metadata CHANGED
@@ -1,7 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: forkify
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ hash: 25
5
+ prerelease: false
6
+ segments:
7
+ - 0
8
+ - 0
9
+ - 3
10
+ version: 0.0.3
5
11
  platform: ruby
6
12
  authors:
7
13
  - Lee Hinman
@@ -9,19 +15,41 @@ autorequire:
9
15
  bindir: bin
10
16
  cert_chain: []
11
17
 
12
- date: 2009-06-29 00:00:00 -06:00
18
+ date: 2010-07-07 00:00:00 -06:00
13
19
  default_executable:
14
20
  dependencies:
15
21
  - !ruby/object:Gem::Dependency
16
- name: hoe
22
+ name: rubyforge
23
+ prerelease: false
24
+ requirement: &id001 !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ">="
28
+ - !ruby/object:Gem::Version
29
+ hash: 7
30
+ segments:
31
+ - 2
32
+ - 0
33
+ - 4
34
+ version: 2.0.4
17
35
  type: :development
18
- version_requirement:
19
- version_requirements: !ruby/object:Gem::Requirement
36
+ version_requirements: *id001
37
+ - !ruby/object:Gem::Dependency
38
+ name: hoe
39
+ prerelease: false
40
+ requirement: &id002 !ruby/object:Gem::Requirement
41
+ none: false
20
42
  requirements:
21
43
  - - ">="
22
44
  - !ruby/object:Gem::Version
23
- version: 2.3.1
24
- version:
45
+ hash: 21
46
+ segments:
47
+ - 2
48
+ - 6
49
+ - 1
50
+ version: 2.6.1
51
+ type: :development
52
+ version_requirements: *id002
25
53
  description: |-
26
54
  forkify.rb makes it easy to process a bunch of data using 'n'
27
55
  worker processes. It is based off of forkoff and threadify by Ara Howard.
@@ -36,16 +64,19 @@ extra_rdoc_files:
36
64
  - History.txt
37
65
  - Manifest.txt
38
66
  - README.txt
67
+ - TODO.txt
39
68
  files:
40
69
  - History.txt
41
70
  - Manifest.txt
42
71
  - README.txt
72
+ - TODO.txt
43
73
  - Rakefile
44
74
  - lib/forkify.rb
45
- - test/test_forkify.rb
75
+ - spec/forkify_spec.rb
46
76
  - examples/a.rb
47
77
  - examples/b.rb
48
78
  - examples/c.rb
79
+ - examples/d.rb
49
80
  has_rdoc: true
50
81
  homepage: http://github.com/dakrone/forkify
51
82
  licenses: []
@@ -57,23 +88,29 @@ rdoc_options:
57
88
  require_paths:
58
89
  - lib
59
90
  required_ruby_version: !ruby/object:Gem::Requirement
91
+ none: false
60
92
  requirements:
61
93
  - - ">="
62
94
  - !ruby/object:Gem::Version
95
+ hash: 3
96
+ segments:
97
+ - 0
63
98
  version: "0"
64
- version:
65
99
  required_rubygems_version: !ruby/object:Gem::Requirement
100
+ none: false
66
101
  requirements:
67
102
  - - ">="
68
103
  - !ruby/object:Gem::Version
104
+ hash: 3
105
+ segments:
106
+ - 0
69
107
  version: "0"
70
- version:
71
108
  requirements: []
72
109
 
73
110
  rubyforge_project: forkify
74
- rubygems_version: 1.3.4
111
+ rubygems_version: 1.3.7
75
112
  signing_key:
76
113
  specification_version: 3
77
114
  summary: forkify.rb makes it easy to process a bunch of data using 'n' worker processes
78
- test_files:
79
- - test/test_forkify.rb
115
+ test_files: []
116
+
@@ -1,31 +0,0 @@
1
- require "testy"
2
- require "forkify"
3
-
4
- Testy.testing 'forkify' do
5
- test 'timings' do |t|
6
- time1 = Time.now
7
- r = [1, 2, 3].forkify(3) { |n| sleep(1) }
8
- time2 = Time.now
9
- # Assert that it took less than 3 seconds
10
- less_than_3 = ((time2 - time1) < 3)
11
- t.check :timing, :expect => true, :actual => less_than_3
12
- end
13
-
14
- test 'array results' do |t|
15
- r = [1, 2, 3].forkify { |n| n * 2 }
16
- t.check :array_results, :expect => [2, 4, 6], :actual => r
17
- end
18
-
19
- test 'hash results' do |t|
20
- r = {:a => 1, :b => 2, :c => 3}.forkify { |k, v| [k, v*2] }
21
- t.check :hash_contains_a, :expect => true, :actual => r.include?([:a, 2])
22
- t.check :hash_contains_b, :expect => true, :actual => r.include?([:b, 4])
23
- t.check :hash_contains_c, :expect => true, :actual => r.include?([:c, 6])
24
- t.check :hash_length, :expect => 3, :actual => r.size
25
- end
26
-
27
- test 'array of nils' do |t|
28
- r = [nil, nil].forkify { |n| n }
29
- t.check :nil_array, :expect => [nil, nil], :actual => r
30
- end
31
- end