ruby-spark 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (176) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +37 -0
  3. data/Gemfile +47 -0
  4. data/Guardfile +5 -0
  5. data/LICENSE.txt +22 -0
  6. data/README.md +185 -0
  7. data/Rakefile +35 -0
  8. data/TODO.md +7 -0
  9. data/benchmark/aggregate.rb +33 -0
  10. data/benchmark/bisect.rb +88 -0
  11. data/benchmark/custom_marshal.rb +94 -0
  12. data/benchmark/digest.rb +150 -0
  13. data/benchmark/enumerator.rb +88 -0
  14. data/benchmark/performance/prepare.sh +18 -0
  15. data/benchmark/performance/python.py +156 -0
  16. data/benchmark/performance/r.r +69 -0
  17. data/benchmark/performance/ruby.rb +167 -0
  18. data/benchmark/performance/run-all.sh +160 -0
  19. data/benchmark/performance/scala.scala +181 -0
  20. data/benchmark/serializer.rb +82 -0
  21. data/benchmark/sort.rb +43 -0
  22. data/benchmark/sort2.rb +164 -0
  23. data/benchmark/take.rb +28 -0
  24. data/bin/ruby-spark +8 -0
  25. data/example/pi.rb +28 -0
  26. data/ext/ruby_c/extconf.rb +3 -0
  27. data/ext/ruby_c/murmur.c +158 -0
  28. data/ext/ruby_c/murmur.h +9 -0
  29. data/ext/ruby_c/ruby-spark.c +18 -0
  30. data/ext/ruby_java/Digest.java +36 -0
  31. data/ext/ruby_java/Murmur2.java +98 -0
  32. data/ext/ruby_java/RubySparkExtService.java +28 -0
  33. data/ext/ruby_java/extconf.rb +3 -0
  34. data/ext/spark/build.sbt +73 -0
  35. data/ext/spark/project/plugins.sbt +9 -0
  36. data/ext/spark/sbt/sbt +34 -0
  37. data/ext/spark/src/main/scala/Exec.scala +91 -0
  38. data/ext/spark/src/main/scala/MLLibAPI.scala +4 -0
  39. data/ext/spark/src/main/scala/Marshal.scala +52 -0
  40. data/ext/spark/src/main/scala/MarshalDump.scala +113 -0
  41. data/ext/spark/src/main/scala/MarshalLoad.scala +220 -0
  42. data/ext/spark/src/main/scala/RubyAccumulatorParam.scala +69 -0
  43. data/ext/spark/src/main/scala/RubyBroadcast.scala +13 -0
  44. data/ext/spark/src/main/scala/RubyConstant.scala +13 -0
  45. data/ext/spark/src/main/scala/RubyMLLibAPI.scala +55 -0
  46. data/ext/spark/src/main/scala/RubyMLLibUtilAPI.scala +21 -0
  47. data/ext/spark/src/main/scala/RubyPage.scala +34 -0
  48. data/ext/spark/src/main/scala/RubyRDD.scala +364 -0
  49. data/ext/spark/src/main/scala/RubySerializer.scala +14 -0
  50. data/ext/spark/src/main/scala/RubyTab.scala +11 -0
  51. data/ext/spark/src/main/scala/RubyUtils.scala +15 -0
  52. data/ext/spark/src/main/scala/RubyWorker.scala +257 -0
  53. data/ext/spark/src/test/scala/MarshalSpec.scala +84 -0
  54. data/lib/ruby-spark.rb +1 -0
  55. data/lib/spark.rb +198 -0
  56. data/lib/spark/accumulator.rb +260 -0
  57. data/lib/spark/broadcast.rb +98 -0
  58. data/lib/spark/build.rb +43 -0
  59. data/lib/spark/cli.rb +169 -0
  60. data/lib/spark/command.rb +86 -0
  61. data/lib/spark/command/base.rb +154 -0
  62. data/lib/spark/command/basic.rb +345 -0
  63. data/lib/spark/command/pair.rb +124 -0
  64. data/lib/spark/command/sort.rb +51 -0
  65. data/lib/spark/command/statistic.rb +144 -0
  66. data/lib/spark/command_builder.rb +141 -0
  67. data/lib/spark/command_validator.rb +34 -0
  68. data/lib/spark/config.rb +244 -0
  69. data/lib/spark/constant.rb +14 -0
  70. data/lib/spark/context.rb +304 -0
  71. data/lib/spark/error.rb +50 -0
  72. data/lib/spark/ext/hash.rb +41 -0
  73. data/lib/spark/ext/integer.rb +25 -0
  74. data/lib/spark/ext/io.rb +57 -0
  75. data/lib/spark/ext/ip_socket.rb +29 -0
  76. data/lib/spark/ext/module.rb +58 -0
  77. data/lib/spark/ext/object.rb +24 -0
  78. data/lib/spark/ext/string.rb +24 -0
  79. data/lib/spark/helper.rb +10 -0
  80. data/lib/spark/helper/logger.rb +40 -0
  81. data/lib/spark/helper/parser.rb +85 -0
  82. data/lib/spark/helper/serialize.rb +71 -0
  83. data/lib/spark/helper/statistic.rb +93 -0
  84. data/lib/spark/helper/system.rb +42 -0
  85. data/lib/spark/java_bridge.rb +19 -0
  86. data/lib/spark/java_bridge/base.rb +203 -0
  87. data/lib/spark/java_bridge/jruby.rb +23 -0
  88. data/lib/spark/java_bridge/rjb.rb +41 -0
  89. data/lib/spark/logger.rb +76 -0
  90. data/lib/spark/mllib.rb +100 -0
  91. data/lib/spark/mllib/classification/common.rb +31 -0
  92. data/lib/spark/mllib/classification/logistic_regression.rb +223 -0
  93. data/lib/spark/mllib/classification/naive_bayes.rb +97 -0
  94. data/lib/spark/mllib/classification/svm.rb +135 -0
  95. data/lib/spark/mllib/clustering/gaussian_mixture.rb +82 -0
  96. data/lib/spark/mllib/clustering/kmeans.rb +118 -0
  97. data/lib/spark/mllib/matrix.rb +120 -0
  98. data/lib/spark/mllib/regression/common.rb +73 -0
  99. data/lib/spark/mllib/regression/labeled_point.rb +41 -0
  100. data/lib/spark/mllib/regression/lasso.rb +100 -0
  101. data/lib/spark/mllib/regression/linear.rb +124 -0
  102. data/lib/spark/mllib/regression/ridge.rb +97 -0
  103. data/lib/spark/mllib/ruby_matrix/matrix_adapter.rb +53 -0
  104. data/lib/spark/mllib/ruby_matrix/vector_adapter.rb +57 -0
  105. data/lib/spark/mllib/stat/distribution.rb +12 -0
  106. data/lib/spark/mllib/vector.rb +185 -0
  107. data/lib/spark/rdd.rb +1328 -0
  108. data/lib/spark/sampler.rb +92 -0
  109. data/lib/spark/serializer.rb +24 -0
  110. data/lib/spark/serializer/base.rb +170 -0
  111. data/lib/spark/serializer/cartesian.rb +37 -0
  112. data/lib/spark/serializer/marshal.rb +19 -0
  113. data/lib/spark/serializer/message_pack.rb +25 -0
  114. data/lib/spark/serializer/oj.rb +25 -0
  115. data/lib/spark/serializer/pair.rb +27 -0
  116. data/lib/spark/serializer/utf8.rb +25 -0
  117. data/lib/spark/sort.rb +189 -0
  118. data/lib/spark/stat_counter.rb +125 -0
  119. data/lib/spark/storage_level.rb +39 -0
  120. data/lib/spark/version.rb +3 -0
  121. data/lib/spark/worker/master.rb +144 -0
  122. data/lib/spark/worker/spark_files.rb +15 -0
  123. data/lib/spark/worker/worker.rb +197 -0
  124. data/ruby-spark.gemspec +36 -0
  125. data/spec/generator.rb +37 -0
  126. data/spec/inputs/lorem_300.txt +316 -0
  127. data/spec/inputs/numbers/1.txt +50 -0
  128. data/spec/inputs/numbers/10.txt +50 -0
  129. data/spec/inputs/numbers/11.txt +50 -0
  130. data/spec/inputs/numbers/12.txt +50 -0
  131. data/spec/inputs/numbers/13.txt +50 -0
  132. data/spec/inputs/numbers/14.txt +50 -0
  133. data/spec/inputs/numbers/15.txt +50 -0
  134. data/spec/inputs/numbers/16.txt +50 -0
  135. data/spec/inputs/numbers/17.txt +50 -0
  136. data/spec/inputs/numbers/18.txt +50 -0
  137. data/spec/inputs/numbers/19.txt +50 -0
  138. data/spec/inputs/numbers/2.txt +50 -0
  139. data/spec/inputs/numbers/20.txt +50 -0
  140. data/spec/inputs/numbers/3.txt +50 -0
  141. data/spec/inputs/numbers/4.txt +50 -0
  142. data/spec/inputs/numbers/5.txt +50 -0
  143. data/spec/inputs/numbers/6.txt +50 -0
  144. data/spec/inputs/numbers/7.txt +50 -0
  145. data/spec/inputs/numbers/8.txt +50 -0
  146. data/spec/inputs/numbers/9.txt +50 -0
  147. data/spec/inputs/numbers_0_100.txt +101 -0
  148. data/spec/inputs/numbers_1_100.txt +100 -0
  149. data/spec/lib/collect_spec.rb +42 -0
  150. data/spec/lib/command_spec.rb +68 -0
  151. data/spec/lib/config_spec.rb +64 -0
  152. data/spec/lib/context_spec.rb +163 -0
  153. data/spec/lib/ext_spec.rb +72 -0
  154. data/spec/lib/external_apps_spec.rb +45 -0
  155. data/spec/lib/filter_spec.rb +80 -0
  156. data/spec/lib/flat_map_spec.rb +100 -0
  157. data/spec/lib/group_spec.rb +109 -0
  158. data/spec/lib/helper_spec.rb +19 -0
  159. data/spec/lib/key_spec.rb +41 -0
  160. data/spec/lib/manipulation_spec.rb +114 -0
  161. data/spec/lib/map_partitions_spec.rb +87 -0
  162. data/spec/lib/map_spec.rb +91 -0
  163. data/spec/lib/mllib/classification_spec.rb +54 -0
  164. data/spec/lib/mllib/clustering_spec.rb +35 -0
  165. data/spec/lib/mllib/matrix_spec.rb +32 -0
  166. data/spec/lib/mllib/regression_spec.rb +116 -0
  167. data/spec/lib/mllib/vector_spec.rb +77 -0
  168. data/spec/lib/reduce_by_key_spec.rb +118 -0
  169. data/spec/lib/reduce_spec.rb +131 -0
  170. data/spec/lib/sample_spec.rb +46 -0
  171. data/spec/lib/serializer_spec.rb +13 -0
  172. data/spec/lib/sort_spec.rb +58 -0
  173. data/spec/lib/statistic_spec.rb +168 -0
  174. data/spec/lib/whole_text_files_spec.rb +33 -0
  175. data/spec/spec_helper.rb +39 -0
  176. metadata +301 -0
data/lib/spark/sort.rb ADDED
@@ -0,0 +1,189 @@
1
+ module Spark
2
+ module InternalSorter
3
+ class Base
4
+ def initialize(key_function)
5
+ @key_function = key_function
6
+ end
7
+ end
8
+
9
+ class Ascending < Base
10
+ def sort(data)
11
+ data.sort_by!(&@key_function)
12
+ end
13
+ end
14
+
15
+ class Descending < Ascending
16
+ def sort(data)
17
+ super
18
+ data.reverse!
19
+ end
20
+ end
21
+
22
+ def self.get(ascending, key_function)
23
+ if ascending
24
+ type = Ascending
25
+ else
26
+ type = Descending
27
+ end
28
+
29
+ type.new(key_function)
30
+ end
31
+ end
32
+ end
33
+
34
+
35
+ module Spark
36
+ class ExternalSorter
37
+
38
+ include Spark::Helper::System
39
+
40
+ # Items from GC cannot be destroyed so #make_parts need some reserve
41
+ MEMORY_RESERVE = 50 # %
42
+
43
+ # How big will be chunk for adding new memory because GC not cleaning
44
+ # immediately un-referenced variables
45
+ MEMORY_FREE_CHUNK = 10 # %
46
+
47
+ # How many items will be evaluate from iterator at start
48
+ START_SLICE_SIZE = 10
49
+
50
+ # Maximum of slicing. Memory control can be avoided by large value.
51
+ MAX_SLICE_SIZE = 10_000
52
+
53
+ # How many values will be taken from each enumerator.
54
+ EVAL_N_VALUES = 10
55
+
56
+ # Default key function
57
+ KEY_FUNCTION = lambda{|item| item}
58
+
59
+ attr_reader :total_memory, :memory_limit, :memory_chunk, :serializer
60
+
61
+ def initialize(total_memory, serializer)
62
+ @total_memory = total_memory
63
+ @memory_limit = total_memory * (100-MEMORY_RESERVE) / 100
64
+ @memory_chunk = total_memory * (100-MEMORY_FREE_CHUNK) / 100
65
+ @serializer = serializer
66
+ end
67
+
68
+ def add_memory!
69
+ @memory_limit += memory_chunk
70
+ end
71
+
72
+ def sort_by(iterator, ascending=true, key_function=KEY_FUNCTION)
73
+ return to_enum(__callee__, iterator, key_function) unless block_given?
74
+
75
+ create_temp_folder
76
+ internal_sorter = Spark::InternalSorter.get(ascending, key_function)
77
+
78
+ # Make N sorted enumerators
79
+ parts = make_parts(iterator, internal_sorter)
80
+
81
+ return [] if parts.empty?
82
+
83
+ # Need new key function because items have new structure
84
+ # From: [1,2,3] to [[1, Enumerator],[2, Enumerator],[3, Enumerator]]
85
+ key_function_with_enum = lambda{|(key, _)| key_function[key]}
86
+ internal_sorter = Spark::InternalSorter.get(ascending, key_function_with_enum)
87
+
88
+ heap = []
89
+ enums = []
90
+
91
+ # Load first items to heap
92
+ parts.each do |part|
93
+ EVAL_N_VALUES.times {
94
+ begin
95
+ heap << [part.next, part]
96
+ rescue StopIteration
97
+ break
98
+ end
99
+ }
100
+ end
101
+
102
+ # Parts can be empty but heap not
103
+ while parts.any? || heap.any?
104
+ internal_sorter.sort(heap)
105
+
106
+ # Since parts are sorted and heap contains EVAL_N_VALUES method
107
+ # can add EVAL_N_VALUES items to the result
108
+ EVAL_N_VALUES.times {
109
+ break if heap.empty?
110
+
111
+ item, enum = heap.shift
112
+ enums << enum
113
+
114
+ yield item
115
+ }
116
+
117
+ # Add new element to heap from part of which was result item
118
+ while (enum = enums.shift)
119
+ begin
120
+ heap << [enum.next, enum]
121
+ rescue StopIteration
122
+ parts.delete(enum)
123
+ enums.delete(enum)
124
+ end
125
+ end
126
+ end
127
+
128
+ ensure
129
+ destroy_temp_folder
130
+ end
131
+
132
+ private
133
+
134
+ def create_temp_folder
135
+ @dir = Dir.mktmpdir
136
+ end
137
+
138
+ def destroy_temp_folder
139
+ FileUtils.remove_entry_secure(@dir) if @dir
140
+ end
141
+
142
+ # New part is created when current part exceeds memory limit (is variable)
143
+ # Every new part have more memory because of ruby GC
144
+ def make_parts(iterator, internal_sorter)
145
+ slice = START_SLICE_SIZE
146
+
147
+ parts = []
148
+ part = []
149
+
150
+ loop do
151
+ begin
152
+ # Enumerator does not have slice method
153
+ slice.times { part << iterator.next }
154
+ rescue StopIteration
155
+ break
156
+ end
157
+
158
+ # Carefully memory_limit is variable
159
+ if memory_usage > memory_limit
160
+ # Sort current part with origin key_function
161
+ internal_sorter.sort(part)
162
+ # Tempfile for current part
163
+ # will be destroyed on #destroy_temp_folder
164
+ file = Tempfile.new("part", @dir)
165
+ serializer.dump(part, file)
166
+ # Peek is at the end of file
167
+ file.seek(0)
168
+ parts << serializer.load(file)
169
+
170
+ # Some memory will be released but not immediately
171
+ # need some new memory for start
172
+ part.clear
173
+ add_memory!
174
+ else
175
+ slice = [slice*2, MAX_SLICE_SIZE].min
176
+ end
177
+ end
178
+
179
+ # Last part which is not in the file
180
+ if part.any?
181
+ internal_sorter.sort(part)
182
+ parts << part.each
183
+ end
184
+
185
+ parts
186
+ end
187
+
188
+ end # ExternalSorter
189
+ end # Spark
@@ -0,0 +1,125 @@
1
+ module Spark
2
+ class StatCounter
3
+
4
+ attr_reader :n # count of our values
5
+ attr_reader :mu # mean of our values
6
+ attr_reader :m2 # variance numerator (sum of (x - mean)^2)
7
+ attr_reader :max # max of our values
8
+ attr_reader :min # min of our values
9
+
10
+ def initialize(iterator)
11
+ @n = 0
12
+ @mu = 0.0
13
+ @m2 = 0.0
14
+ @max = -Float::INFINITY
15
+ @min = Float::INFINITY
16
+
17
+ merge(iterator)
18
+ end
19
+
20
+ def merge(other)
21
+ if other.is_a?(Spark::StatCounter)
22
+ merge_stat_counter(other)
23
+ elsif other.respond_to?(:each)
24
+ merge_array(other)
25
+ else
26
+ merge_value(other)
27
+ end
28
+
29
+ self
30
+ end
31
+
32
+ def sum
33
+ @n * @mu
34
+ end
35
+
36
+ # Return the variance of the values.
37
+ def variance
38
+ if @n == 0
39
+ Float::NAN
40
+ else
41
+ @m2 / @n
42
+ end
43
+ end
44
+
45
+ # Return the sample variance, which corrects for bias in estimating the variance by dividing
46
+ # by N-1 instead of N.
47
+ def sample_variance
48
+ if @n <= 1
49
+ Float::NAN
50
+ else
51
+ @m2 / (@n - 1)
52
+ end
53
+ end
54
+
55
+ # Return the standard deviation of the values.
56
+ def stdev
57
+ Math.sqrt(variance)
58
+ end
59
+
60
+ # Return the sample standard deviation of the values, which corrects for bias in estimating the
61
+ # variance by dividing by N-1 instead of N.
62
+ def sample_stdev
63
+ Math.sqrt(sample_variance)
64
+ end
65
+
66
+ def to_s
67
+ "(count: #{count}, mean: #{mean}, stdev: #{stdev}, max: #{max}, min: #{min})"
68
+ end
69
+
70
+ alias_method :count, :n
71
+ alias_method :mean, :mu
72
+ alias_method :max_value, :max
73
+ alias_method :min_value, :min
74
+ alias_method :sampleStdev, :sample_stdev
75
+ alias_method :sampleVariance, :sample_variance
76
+
77
+ private
78
+
79
+ def merge_stat_counter(other)
80
+ if other == self
81
+ other = self.deep_copy
82
+ end
83
+
84
+ if @n == 0
85
+ @n = other.n
86
+ @mu = other.mu
87
+ @m2 = other.m2
88
+ @max = other.max
89
+ @min = other.min
90
+ elsif other.n != 0
91
+ delta = other.mu - @mu
92
+
93
+ if other.n * 10 < @n
94
+ @mu = @mu + (delta * other.n) / (@n + other.n)
95
+ elsif @n * 10 < other.n
96
+ @mu = other.mu - (delta * @n) / (@n + other.n)
97
+ else
98
+ @mu = (@mu * @n + other.mu * other.n) / (@n + other.n)
99
+ end
100
+
101
+ @max = [@max, other.max].max
102
+ @min = [@min, other.min].min
103
+
104
+ @m2 += other.m2 + (delta * delta * @n * other.n) / (@n + other.n)
105
+ @n += other.n
106
+ end
107
+ end
108
+
109
+ def merge_array(array)
110
+ array.each do |item|
111
+ merge_value(item)
112
+ end
113
+ end
114
+
115
+ def merge_value(value)
116
+ delta = value - @mu
117
+ @n += 1
118
+ @mu += delta / @n
119
+ @m2 += delta * (value - @mu)
120
+ @max = [@max, value].max
121
+ @min = [@min, value].min
122
+ end
123
+
124
+ end
125
+ end
@@ -0,0 +1,39 @@
1
+ # Necessary libraries
2
+ Spark.load_lib
3
+
4
+ module Spark
5
+ class StorageLevel
6
+
7
+ def self.reload
8
+ return if @reloaded
9
+ reload!
10
+ @reloaded = true
11
+ end
12
+
13
+ def self.reload!
14
+ self.const_set(:NONE, JStorageLevel.NONE)
15
+ self.const_set(:DISK_ONLY, JStorageLevel.DISK_ONLY)
16
+ self.const_set(:DISK_ONLY_2, JStorageLevel.DISK_ONLY_2)
17
+ self.const_set(:MEMORY_ONLY, JStorageLevel.MEMORY_ONLY)
18
+ self.const_set(:MEMORY_ONLY_SER, JStorageLevel.MEMORY_ONLY_SER)
19
+ self.const_set(:MEMORY_ONLY_2, JStorageLevel.MEMORY_ONLY_2)
20
+ self.const_set(:MEMORY_ONLY_SER_2, JStorageLevel.MEMORY_ONLY_SER_2)
21
+ self.const_set(:MEMORY_AND_DISK, JStorageLevel.MEMORY_AND_DISK)
22
+ self.const_set(:MEMORY_AND_DISK_2, JStorageLevel.MEMORY_AND_DISK_2)
23
+ self.const_set(:MEMORY_AND_DISK_SER, JStorageLevel.MEMORY_AND_DISK_SER)
24
+ self.const_set(:MEMORY_AND_DISK_SER_2, JStorageLevel.MEMORY_AND_DISK_SER_2)
25
+ self.const_set(:OFF_HEAP, JStorageLevel.OFF_HEAP)
26
+ end
27
+
28
+ def self.java_get(arg)
29
+ reload
30
+
31
+ if arg.is_a?(String)
32
+ const_get(arg.upcase)
33
+ else
34
+ arg
35
+ end
36
+ end
37
+
38
+ end
39
+ end
@@ -0,0 +1,3 @@
1
+ module Spark
2
+ VERSION = '1.0.0'
3
+ end
@@ -0,0 +1,144 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ $PROGRAM_NAME = 'RubySparkMaster'
4
+
5
+ require 'socket'
6
+ require 'io/wait'
7
+ require 'nio'
8
+
9
+ require_relative 'worker'
10
+
11
+ # New process group
12
+ # Otherwise master can be killed from pry console
13
+ Process.setsid
14
+
15
+ # =================================================================================================
16
+ # Master
17
+ #
18
+ module Master
19
+
20
+ def self.create
21
+ case ARGV[0].to_s.strip
22
+ when 'thread'
23
+ Master::Thread.new
24
+ else
25
+ Master::Process.new
26
+ end
27
+ end
28
+
29
+ class Base
30
+ include Spark::Constant
31
+
32
+ def initialize
33
+ @port = ARGV[1].to_s.strip.to_i
34
+ @socket = TCPSocket.open('localhost', @port)
35
+ @worker_arguments = @socket.read_string
36
+ end
37
+
38
+ def run
39
+ selector = NIO::Selector.new
40
+ monitor = selector.register(@socket, :r)
41
+ monitor.value = Proc.new { receive_message }
42
+ loop {
43
+ selector.select {|monitor| monitor.value.call}
44
+ }
45
+ end
46
+
47
+ def receive_message
48
+ command = @socket.read_int
49
+
50
+ case command
51
+ when CREATE_WORKER
52
+ create_worker
53
+ when KILL_WORKER
54
+ kill_worker
55
+ when KILL_WORKER_AND_WAIT
56
+ kill_worker_and_wait
57
+ end
58
+ end
59
+
60
+ def kill_worker_and_wait
61
+ if kill_worker
62
+ @socket.write_int(SUCCESSFULLY_KILLED)
63
+ else
64
+ @socket.write_int(UNSUCCESSFUL_KILLING)
65
+ end
66
+ end
67
+ end
68
+
69
+ # ===============================================================================================
70
+ # Worker::Process
71
+ #
72
+ class Process < Base
73
+
74
+ def create_worker
75
+ if fork?
76
+ pid = ::Process.fork do
77
+ Worker::Process.new(@port).run
78
+ end
79
+ else
80
+ pid = ::Process.spawn("ruby #{@worker_arguments} worker.rb #{@port}")
81
+ end
82
+
83
+ # Detach child from master to avoid zombie process
84
+ ::Process.detach(pid)
85
+ end
86
+
87
+ def kill_worker
88
+ worker_id = @socket.read_long
89
+ ::Process.kill('TERM', worker_id)
90
+ rescue
91
+ nil
92
+ end
93
+
94
+ def fork?
95
+ @can_fork ||= _fork?
96
+ end
97
+
98
+ def _fork?
99
+ return false if !::Process.respond_to?(:fork)
100
+
101
+ pid = ::Process.fork
102
+ exit unless pid # exit the child immediately
103
+ true
104
+ rescue NotImplementedError
105
+ false
106
+ end
107
+
108
+ end
109
+
110
+ # ===============================================================================================
111
+ # Worker::Thread
112
+ #
113
+ class Thread < Base
114
+
115
+ def initialize
116
+ ::Thread.abort_on_exception = true
117
+
118
+ # For synchronous access to socket IO
119
+ $mutex_for_command = Mutex.new
120
+ $mutex_for_iterator = Mutex.new
121
+
122
+ super
123
+ end
124
+
125
+ def create_worker
126
+ ::Thread.new do
127
+ Worker::Thread.new(@port).run
128
+ end
129
+ end
130
+
131
+ def kill_worker
132
+ worker_id = @socket.read_long
133
+
134
+ thread = ObjectSpace._id2ref(worker_id)
135
+ thread.kill
136
+ rescue
137
+ nil
138
+ end
139
+
140
+ end
141
+ end
142
+
143
+ # Create proper master by worker_type
144
+ Master.create.run