ruby-spark 1.1.0.1-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (180) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +37 -0
  3. data/Gemfile +47 -0
  4. data/Guardfile +5 -0
  5. data/LICENSE.txt +22 -0
  6. data/README.md +252 -0
  7. data/Rakefile +35 -0
  8. data/TODO.md +6 -0
  9. data/benchmark/aggregate.rb +33 -0
  10. data/benchmark/bisect.rb +88 -0
  11. data/benchmark/comparison/prepare.sh +18 -0
  12. data/benchmark/comparison/python.py +156 -0
  13. data/benchmark/comparison/r.r +69 -0
  14. data/benchmark/comparison/ruby.rb +167 -0
  15. data/benchmark/comparison/run-all.sh +160 -0
  16. data/benchmark/comparison/scala.scala +181 -0
  17. data/benchmark/custom_marshal.rb +94 -0
  18. data/benchmark/digest.rb +150 -0
  19. data/benchmark/enumerator.rb +88 -0
  20. data/benchmark/serializer.rb +82 -0
  21. data/benchmark/sort.rb +43 -0
  22. data/benchmark/sort2.rb +164 -0
  23. data/benchmark/take.rb +28 -0
  24. data/bin/ruby-spark +8 -0
  25. data/example/pi.rb +28 -0
  26. data/example/website_search.rb +83 -0
  27. data/ext/ruby_c/extconf.rb +3 -0
  28. data/ext/ruby_c/murmur.c +158 -0
  29. data/ext/ruby_c/murmur.h +9 -0
  30. data/ext/ruby_c/ruby-spark.c +18 -0
  31. data/ext/ruby_java/Digest.java +36 -0
  32. data/ext/ruby_java/Murmur2.java +98 -0
  33. data/ext/ruby_java/RubySparkExtService.java +28 -0
  34. data/ext/ruby_java/extconf.rb +3 -0
  35. data/ext/spark/build.sbt +73 -0
  36. data/ext/spark/project/plugins.sbt +9 -0
  37. data/ext/spark/sbt/sbt +34 -0
  38. data/ext/spark/src/main/scala/Exec.scala +91 -0
  39. data/ext/spark/src/main/scala/MLLibAPI.scala +4 -0
  40. data/ext/spark/src/main/scala/Marshal.scala +52 -0
  41. data/ext/spark/src/main/scala/MarshalDump.scala +113 -0
  42. data/ext/spark/src/main/scala/MarshalLoad.scala +220 -0
  43. data/ext/spark/src/main/scala/RubyAccumulatorParam.scala +69 -0
  44. data/ext/spark/src/main/scala/RubyBroadcast.scala +13 -0
  45. data/ext/spark/src/main/scala/RubyConstant.scala +13 -0
  46. data/ext/spark/src/main/scala/RubyMLLibAPI.scala +55 -0
  47. data/ext/spark/src/main/scala/RubyMLLibUtilAPI.scala +21 -0
  48. data/ext/spark/src/main/scala/RubyPage.scala +34 -0
  49. data/ext/spark/src/main/scala/RubyRDD.scala +392 -0
  50. data/ext/spark/src/main/scala/RubySerializer.scala +14 -0
  51. data/ext/spark/src/main/scala/RubyTab.scala +11 -0
  52. data/ext/spark/src/main/scala/RubyUtils.scala +15 -0
  53. data/ext/spark/src/main/scala/RubyWorker.scala +257 -0
  54. data/ext/spark/src/test/scala/MarshalSpec.scala +84 -0
  55. data/lib/ruby-spark.rb +1 -0
  56. data/lib/spark.rb +198 -0
  57. data/lib/spark/accumulator.rb +260 -0
  58. data/lib/spark/broadcast.rb +98 -0
  59. data/lib/spark/build.rb +43 -0
  60. data/lib/spark/cli.rb +169 -0
  61. data/lib/spark/command.rb +86 -0
  62. data/lib/spark/command/base.rb +158 -0
  63. data/lib/spark/command/basic.rb +345 -0
  64. data/lib/spark/command/pair.rb +124 -0
  65. data/lib/spark/command/sort.rb +51 -0
  66. data/lib/spark/command/statistic.rb +144 -0
  67. data/lib/spark/command_builder.rb +141 -0
  68. data/lib/spark/command_validator.rb +34 -0
  69. data/lib/spark/config.rb +238 -0
  70. data/lib/spark/constant.rb +14 -0
  71. data/lib/spark/context.rb +322 -0
  72. data/lib/spark/error.rb +50 -0
  73. data/lib/spark/ext/hash.rb +41 -0
  74. data/lib/spark/ext/integer.rb +25 -0
  75. data/lib/spark/ext/io.rb +67 -0
  76. data/lib/spark/ext/ip_socket.rb +29 -0
  77. data/lib/spark/ext/module.rb +58 -0
  78. data/lib/spark/ext/object.rb +24 -0
  79. data/lib/spark/ext/string.rb +24 -0
  80. data/lib/spark/helper.rb +10 -0
  81. data/lib/spark/helper/logger.rb +40 -0
  82. data/lib/spark/helper/parser.rb +85 -0
  83. data/lib/spark/helper/serialize.rb +71 -0
  84. data/lib/spark/helper/statistic.rb +93 -0
  85. data/lib/spark/helper/system.rb +42 -0
  86. data/lib/spark/java_bridge.rb +19 -0
  87. data/lib/spark/java_bridge/base.rb +203 -0
  88. data/lib/spark/java_bridge/jruby.rb +23 -0
  89. data/lib/spark/java_bridge/rjb.rb +41 -0
  90. data/lib/spark/logger.rb +76 -0
  91. data/lib/spark/mllib.rb +100 -0
  92. data/lib/spark/mllib/classification/common.rb +31 -0
  93. data/lib/spark/mllib/classification/logistic_regression.rb +223 -0
  94. data/lib/spark/mllib/classification/naive_bayes.rb +97 -0
  95. data/lib/spark/mllib/classification/svm.rb +135 -0
  96. data/lib/spark/mllib/clustering/gaussian_mixture.rb +82 -0
  97. data/lib/spark/mllib/clustering/kmeans.rb +118 -0
  98. data/lib/spark/mllib/matrix.rb +120 -0
  99. data/lib/spark/mllib/regression/common.rb +73 -0
  100. data/lib/spark/mllib/regression/labeled_point.rb +41 -0
  101. data/lib/spark/mllib/regression/lasso.rb +100 -0
  102. data/lib/spark/mllib/regression/linear.rb +124 -0
  103. data/lib/spark/mllib/regression/ridge.rb +97 -0
  104. data/lib/spark/mllib/ruby_matrix/matrix_adapter.rb +53 -0
  105. data/lib/spark/mllib/ruby_matrix/vector_adapter.rb +57 -0
  106. data/lib/spark/mllib/stat/distribution.rb +12 -0
  107. data/lib/spark/mllib/vector.rb +185 -0
  108. data/lib/spark/rdd.rb +1377 -0
  109. data/lib/spark/sampler.rb +92 -0
  110. data/lib/spark/serializer.rb +79 -0
  111. data/lib/spark/serializer/auto_batched.rb +59 -0
  112. data/lib/spark/serializer/base.rb +63 -0
  113. data/lib/spark/serializer/batched.rb +84 -0
  114. data/lib/spark/serializer/cartesian.rb +13 -0
  115. data/lib/spark/serializer/compressed.rb +27 -0
  116. data/lib/spark/serializer/marshal.rb +17 -0
  117. data/lib/spark/serializer/message_pack.rb +23 -0
  118. data/lib/spark/serializer/oj.rb +23 -0
  119. data/lib/spark/serializer/pair.rb +41 -0
  120. data/lib/spark/serializer/text.rb +25 -0
  121. data/lib/spark/sort.rb +189 -0
  122. data/lib/spark/stat_counter.rb +125 -0
  123. data/lib/spark/storage_level.rb +39 -0
  124. data/lib/spark/version.rb +3 -0
  125. data/lib/spark/worker/master.rb +144 -0
  126. data/lib/spark/worker/spark_files.rb +15 -0
  127. data/lib/spark/worker/worker.rb +200 -0
  128. data/ruby-spark.gemspec +47 -0
  129. data/spec/generator.rb +37 -0
  130. data/spec/inputs/lorem_300.txt +316 -0
  131. data/spec/inputs/numbers/1.txt +50 -0
  132. data/spec/inputs/numbers/10.txt +50 -0
  133. data/spec/inputs/numbers/11.txt +50 -0
  134. data/spec/inputs/numbers/12.txt +50 -0
  135. data/spec/inputs/numbers/13.txt +50 -0
  136. data/spec/inputs/numbers/14.txt +50 -0
  137. data/spec/inputs/numbers/15.txt +50 -0
  138. data/spec/inputs/numbers/16.txt +50 -0
  139. data/spec/inputs/numbers/17.txt +50 -0
  140. data/spec/inputs/numbers/18.txt +50 -0
  141. data/spec/inputs/numbers/19.txt +50 -0
  142. data/spec/inputs/numbers/2.txt +50 -0
  143. data/spec/inputs/numbers/20.txt +50 -0
  144. data/spec/inputs/numbers/3.txt +50 -0
  145. data/spec/inputs/numbers/4.txt +50 -0
  146. data/spec/inputs/numbers/5.txt +50 -0
  147. data/spec/inputs/numbers/6.txt +50 -0
  148. data/spec/inputs/numbers/7.txt +50 -0
  149. data/spec/inputs/numbers/8.txt +50 -0
  150. data/spec/inputs/numbers/9.txt +50 -0
  151. data/spec/inputs/numbers_0_100.txt +101 -0
  152. data/spec/inputs/numbers_1_100.txt +100 -0
  153. data/spec/lib/collect_spec.rb +42 -0
  154. data/spec/lib/command_spec.rb +68 -0
  155. data/spec/lib/config_spec.rb +64 -0
  156. data/spec/lib/context_spec.rb +165 -0
  157. data/spec/lib/ext_spec.rb +72 -0
  158. data/spec/lib/external_apps_spec.rb +45 -0
  159. data/spec/lib/filter_spec.rb +80 -0
  160. data/spec/lib/flat_map_spec.rb +100 -0
  161. data/spec/lib/group_spec.rb +109 -0
  162. data/spec/lib/helper_spec.rb +19 -0
  163. data/spec/lib/key_spec.rb +41 -0
  164. data/spec/lib/manipulation_spec.rb +122 -0
  165. data/spec/lib/map_partitions_spec.rb +87 -0
  166. data/spec/lib/map_spec.rb +91 -0
  167. data/spec/lib/mllib/classification_spec.rb +54 -0
  168. data/spec/lib/mllib/clustering_spec.rb +35 -0
  169. data/spec/lib/mllib/matrix_spec.rb +32 -0
  170. data/spec/lib/mllib/regression_spec.rb +116 -0
  171. data/spec/lib/mllib/vector_spec.rb +77 -0
  172. data/spec/lib/reduce_by_key_spec.rb +118 -0
  173. data/spec/lib/reduce_spec.rb +131 -0
  174. data/spec/lib/sample_spec.rb +46 -0
  175. data/spec/lib/serializer_spec.rb +88 -0
  176. data/spec/lib/sort_spec.rb +58 -0
  177. data/spec/lib/statistic_spec.rb +170 -0
  178. data/spec/lib/whole_text_files_spec.rb +33 -0
  179. data/spec/spec_helper.rb +38 -0
  180. metadata +389 -0
@@ -0,0 +1,88 @@
1
+ require "benchmark"
2
+
3
+ class Enumerator
4
+ def defer(&blk)
5
+ self.class.new do |y|
6
+ each do |*input|
7
+ blk.call(y, *input)
8
+ end
9
+ end
10
+ end
11
+ end
12
+
13
+ ARRAY_SIZE = 50_000_000
14
+
15
+ def type_yield
16
+ return to_enum(__callee__) unless block_given?
17
+
18
+ ARRAY_SIZE.times { |i|
19
+ yield i
20
+ }
21
+ end
22
+
23
+ def yield_map_x2(enum)
24
+ return to_enum(__callee__, enum) unless block_given?
25
+
26
+ enum.each do |item|
27
+ yield item*2
28
+ end
29
+ end
30
+
31
+ def type_enumerator_new
32
+ Enumerator.new do |e|
33
+ ARRAY_SIZE.times { |i|
34
+ e << i
35
+ }
36
+ end
37
+ end
38
+
39
+ def enumerator_new_map_x2(enum)
40
+ Enumerator.new do |e|
41
+ enum.each do |item|
42
+ e << item*2
43
+ end
44
+ end
45
+ end
46
+
47
+ def enumerator_defer_x2(enum)
48
+ enum.defer do |out, inp|
49
+ out << inp*2
50
+ end
51
+ end
52
+
53
+ Benchmark.bm(26) do |x|
54
+ x.report("yield max") do
55
+ type_yield.max
56
+ end
57
+
58
+ x.report("yield sum") do
59
+ type_yield.reduce(:+)
60
+ end
61
+
62
+ x.report("yield map x*2 sum") do
63
+ yield_map_x2(type_yield).reduce(:+)
64
+ end
65
+
66
+ x.report("yield defer map x*2 sum") do
67
+ enumerator_defer_x2(type_yield).reduce(:+)
68
+ end
69
+
70
+ x.report("-----"){}
71
+
72
+ x.report("Enum.new max") do
73
+ type_enumerator_new.max
74
+ end
75
+
76
+ x.report("Enum.new sum") do
77
+ type_enumerator_new.reduce(:+)
78
+ end
79
+
80
+ x.report("Enum.new map x*2 sum") do
81
+ enumerator_new_map_x2(type_enumerator_new).reduce(:+)
82
+ end
83
+
84
+ x.report("Enum.new defer map x*2 sum") do
85
+ enumerator_defer_x2(type_enumerator_new).reduce(:+)
86
+ end
87
+
88
+ end
@@ -0,0 +1,82 @@
1
+ require "benchmark"
2
+ require "yaml"
3
+ require "msgpack"
4
+ require "oj"
5
+ # require "thrift"
6
+
7
+ puts "Simple"
8
+
9
+ data = (0..100000).to_a
10
+
11
+ Benchmark.bmbm do |x|
12
+ x.report("YAML") do
13
+ serialized = YAML.dump(data)
14
+ deserialized = YAML.load(serialized)
15
+ puts "Size: #{serialized.size}, Equal: #{deserialized == data}"
16
+ end
17
+
18
+ x.report("Marshal") do
19
+ serialized = Marshal.dump(data)
20
+ deserialized = Marshal.load(serialized)
21
+ puts "Size: #{serialized.size}, Equal: #{deserialized == data}"
22
+ end
23
+
24
+ x.report("MessagePack") do
25
+ serialized = MessagePack.dump(data)
26
+ deserialized = MessagePack.load(serialized)
27
+ puts "Size: #{serialized.size}, Equal: #{deserialized == data}"
28
+ end
29
+
30
+ x.report("Oj") do
31
+ serialized = Oj.dump(data)
32
+ deserialized = Oj.load(serialized)
33
+ puts "Size: #{serialized.size}, Equal: #{deserialized == data}"
34
+ end
35
+
36
+ # x.report("Thrift") do
37
+ # serializer = Thrift::Serializer.new
38
+ # deserializer = Thrift::Deserializer.new
39
+
40
+ # serialized = serializer.serialize(data)
41
+ # end
42
+ end
43
+
44
+ puts ""
45
+ puts "More complex"
46
+
47
+ data = Array.new(10000000) {
48
+ [rand(97..122).chr, rand(10000000)]
49
+ }
50
+
51
+ Benchmark.bm do |x|
52
+ # Take too long
53
+ # x.report("YAML") do
54
+ # serialized = YAML.dump(data)
55
+ # YAML.load(serialized)
56
+ # end
57
+
58
+ x.report("Marshal") do
59
+ serialized = Marshal.dump(data)
60
+ deserialized = Marshal.load(serialized)
61
+ puts " Size: #{serialized.size}, Equal: #{deserialized == data}"
62
+ end
63
+
64
+ x.report("MessagePack") do
65
+ serialized = MessagePack.dump(data)
66
+ deserialized = MessagePack.load(serialized)
67
+ puts " Size: #{serialized.size}, Equal: #{deserialized == data}"
68
+ end
69
+
70
+ x.report("Oj") do
71
+ serialized = Oj.dump(data)
72
+ deserialized = Oj.load(serialized)
73
+ puts " Size: #{serialized.size}, Equal: #{deserialized == data}"
74
+ end
75
+
76
+ # x.report("Thrift") do
77
+ # serializer = Thrift::Serializer.new
78
+ # deserializer = Thrift::Deserializer.new
79
+
80
+ # serialized = serializer.serialize(data)
81
+ # end
82
+ end
@@ -0,0 +1,43 @@
1
+ require "benchmark"
2
+
3
+ array = []
4
+ 1000.times {
5
+ array << {:bar => rand(1000)}
6
+ }
7
+
8
+ n = 500
9
+ Benchmark.bm(20) do |x|
10
+ x.report("sort") { n.times { array.sort{ |a,b| b[:bar] <=> a[:bar] } } }
11
+ x.report("sort reverse") { n.times { array.sort{ |a,b| a[:bar] <=> b[:bar] }.reverse } }
12
+ x.report("sort_by -a[:bar]") { n.times { array.sort_by{ |a| -a[:bar] } } }
13
+ x.report("sort_by a[:bar]*-1") { n.times { array.sort_by{ |a| a[:bar]*-1 } } }
14
+ x.report("sort_by.reverse!") { n.times { array.sort_by{ |a| a[:bar] }.reverse } }
15
+ end
16
+
17
+
18
+ array = Array.new(10000) { Array.new(rand(1..10)){(97+rand(26)).chr}.join }
19
+
20
+ Benchmark.bm(20) do |x|
21
+ x.report("sort asc") { n.times { array.sort } }
22
+ x.report("sort asc block") { n.times { array.sort{|a,b| a <=> b} } }
23
+ x.report("sort desc") { n.times { array.sort{|a,b| b <=> a} } }
24
+ x.report("sort asc reverse") { n.times { array.sort.reverse } }
25
+ end
26
+
27
+
28
+ key_value = Struct.new(:key, :value) do
29
+ def <=>(other)
30
+ key <=> other.key
31
+ end
32
+ end
33
+
34
+ count = 10000
35
+ item_range = 1000000
36
+ array1 = Array.new(count) { [rand(item_range), rand(item_range)] }
37
+ array2 = Array.new(count) { key_value.new rand(item_range), rand(item_range) }
38
+
39
+ Benchmark.bm(20) do |x|
40
+ x.report("sort_by") { n.times { array1.sort_by {|a| a[0]} } }
41
+ x.report("sort struct") { n.times { array2.sort } }
42
+ end
43
+
@@ -0,0 +1,164 @@
1
+ require "benchmark"
2
+ require "algorithms"
3
+
4
+ NUMBER_OF_SORTING = 1
5
+ NUMBER_OF_ARRAY = 10
6
+ WORDS_IN_ARRAY = 100000
7
+ MAX_WORD_SIZE = 10
8
+ EVAL_N_VALUES = 10
9
+
10
+ puts "NUMBER_OF_SORTING: #{NUMBER_OF_SORTING}"
11
+ puts "NUMBER_OF_ARRAY: #{NUMBER_OF_ARRAY}"
12
+ puts "WORDS_IN_ARRAY: #{WORDS_IN_ARRAY}"
13
+ puts "MAX_WORD_SIZE: #{MAX_WORD_SIZE}"
14
+ puts "EVAL_N_VALUES: #{EVAL_N_VALUES}"
15
+
16
+ def words
17
+ Array.new(WORDS_IN_ARRAY) { word }
18
+ end
19
+
20
+ def word
21
+ Array.new(rand(1..MAX_WORD_SIZE)){(97+rand(26)).chr}.join
22
+ end
23
+
24
+ @array = Array.new(NUMBER_OF_ARRAY) { words.sort }
25
+
26
+
27
+ # =================================================================================================
28
+ # Sort1
29
+
30
+ # Vrátí nový (nevyhodnocený) enumerator
31
+ def sort1(data)
32
+ return to_enum(__callee__, data) unless block_given?
33
+
34
+ heap = []
35
+
36
+ # Inicializuji heap s prvními položkami
37
+ # připojím samotné enumeratory pro volání .next
38
+ data.each do |a|
39
+ heap << [a.next, a]
40
+ end
41
+
42
+ while data.any?
43
+ begin
44
+ # Seřadím pole podle hodnot
45
+ heap.sort_by!{|(item,_)| item}
46
+ # Uložím si hodnotu a enumerator
47
+ item, enum = heap.shift
48
+ # Hodnota půjde do výsledku
49
+ yield item
50
+ # Místo odstraněné položky nahradí další ze stejného seznamu
51
+ heap << [enum.next, enum]
52
+ rescue StopIteration
53
+ # Enumerator je prázdný
54
+ data.delete(enum)
55
+ end
56
+ end
57
+ end
58
+
59
+
60
+ # =================================================================================================
61
+ # Sort1_2
62
+
63
+ # Vrátí nový (nevyhodnocený) enumerator
64
+ def sort1_2(data)
65
+ return to_enum(__callee__, data) unless block_given?
66
+
67
+ heap = []
68
+ enums = []
69
+
70
+ # Inicializuji heap s prvními položkami
71
+ # připojím samotné enumeratory pro volání .next
72
+ data.each do |a|
73
+ EVAL_N_VALUES.times {
74
+ begin
75
+ heap << [a.next, a]
76
+ rescue StopIteration
77
+ end
78
+ }
79
+ end
80
+
81
+ while data.any? || heap.any?
82
+ # Seřadím pole podle hodnot
83
+ heap.sort_by!{|(item,_)| item}
84
+
85
+ # Minimálně můžu vzít EVAL_N_VALUES
86
+ EVAL_N_VALUES.times {
87
+ break if heap.empty?
88
+
89
+ # Uložím si hodnotu a enumerator
90
+ item, enum = heap.shift
91
+ # Hodnota půjde do výsledku
92
+ yield item
93
+
94
+ enums << enum
95
+ }
96
+
97
+ while (enum = enums.shift)
98
+ begin
99
+ heap << [enum.next, enum]
100
+ rescue StopIteration
101
+ data.delete(enum)
102
+ enums.delete(enum)
103
+ end
104
+ end
105
+
106
+ end
107
+ end
108
+
109
+
110
+ # =================================================================================================
111
+ # Sort 2
112
+
113
+ def sort2(data)
114
+ return to_enum(__callee__, data) unless block_given?
115
+
116
+ heap = Containers::Heap.new
117
+
118
+ data.each do |enum|
119
+ item = enum.next
120
+ heap.push(item, [item, enum])
121
+ end
122
+
123
+ while data.any?
124
+ begin
125
+ item, enum = heap.pop
126
+ yield item
127
+
128
+ item = enum.next
129
+ heap.push(item, [item, enum])
130
+ rescue StopIteration
131
+ data.delete(enum)
132
+ end
133
+ end
134
+ end
135
+
136
+
137
+ # =================================================================================================
138
+ # Benchmark
139
+
140
+ Benchmark.bm(10) do |x|
141
+ x.report("sort") do
142
+ NUMBER_OF_SORTING.times {
143
+ @result = @array.flatten.sort
144
+ }
145
+ end
146
+
147
+ x.report("sort 1") do
148
+ NUMBER_OF_SORTING.times {
149
+ raise "Bad sorting" if @result != sort1(@array.map(&:each)).to_a
150
+ }
151
+ end
152
+
153
+ x.report("sort 1_2") do
154
+ NUMBER_OF_SORTING.times {
155
+ raise "Bad sorting" if @result != sort1_2(@array.map(&:each)).to_a
156
+ }
157
+ end
158
+
159
+ # x.report("sort 2") do
160
+ # NUMBER_OF_SORTING.times {
161
+ # raise "Bad sorting" if @result != sort2(@array.map(&:each)).to_a
162
+ # }
163
+ # end
164
+ end
@@ -0,0 +1,28 @@
1
+ require "benchmark"
2
+
3
+ SIZE = 100_000_000
4
+
5
+ @array1 = (0..SIZE).to_a;
6
+ @array2 = (0..SIZE).to_a;
7
+ @array3 = (0..SIZE).to_a;
8
+
9
+ TAKE = 100_000
10
+
11
+ Benchmark.bm(15) do |x|
12
+ # Fastest
13
+ x.report("take"){
14
+ a=@array1.take(TAKE)
15
+ }
16
+
17
+ # Slowest and take most memory
18
+ x.report("reverse drop"){
19
+ @array2.reverse!
20
+ @array2.drop(@array2.size - TAKE)
21
+ @array2.reverse!
22
+ }
23
+
24
+ # Least memory
25
+ x.report("splice"){
26
+ a=@array2.slice!(0, TAKE)
27
+ }
28
+ end
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ lib = File.expand_path(File.dirname(__FILE__) + '/../lib')
4
+ $LOAD_PATH.unshift(lib) if File.directory?(lib) && !$LOAD_PATH.include?(lib)
5
+
6
+ require 'ruby-spark'
7
+
8
+ Spark::CLI.new.run
@@ -0,0 +1,28 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ lib = File.expand_path(File.dirname(__FILE__) + '/../lib')
4
+ $LOAD_PATH.unshift(lib) if File.directory?(lib) && !$LOAD_PATH.include?(lib)
5
+
6
+ require 'ruby-spark'
7
+
8
+ Spark.logger.disable
9
+ Spark.start
10
+
11
+ slices = 3
12
+ n = 100000 * slices
13
+
14
+ def map(_)
15
+ x = rand * 2 - 1
16
+ y = rand * 2 - 1
17
+
18
+ if x**2 + y**2 < 1
19
+ return 1
20
+ else
21
+ return 0
22
+ end
23
+ end
24
+
25
+ rdd = Spark.context.parallelize(1..n, slices)
26
+ rdd = rdd.map(method(:map))
27
+
28
+ puts 'Pi is roughly %f' % (4.0 * rdd.sum / n)