ruby-spark 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (176) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +37 -0
  3. data/Gemfile +47 -0
  4. data/Guardfile +5 -0
  5. data/LICENSE.txt +22 -0
  6. data/README.md +185 -0
  7. data/Rakefile +35 -0
  8. data/TODO.md +7 -0
  9. data/benchmark/aggregate.rb +33 -0
  10. data/benchmark/bisect.rb +88 -0
  11. data/benchmark/custom_marshal.rb +94 -0
  12. data/benchmark/digest.rb +150 -0
  13. data/benchmark/enumerator.rb +88 -0
  14. data/benchmark/performance/prepare.sh +18 -0
  15. data/benchmark/performance/python.py +156 -0
  16. data/benchmark/performance/r.r +69 -0
  17. data/benchmark/performance/ruby.rb +167 -0
  18. data/benchmark/performance/run-all.sh +160 -0
  19. data/benchmark/performance/scala.scala +181 -0
  20. data/benchmark/serializer.rb +82 -0
  21. data/benchmark/sort.rb +43 -0
  22. data/benchmark/sort2.rb +164 -0
  23. data/benchmark/take.rb +28 -0
  24. data/bin/ruby-spark +8 -0
  25. data/example/pi.rb +28 -0
  26. data/ext/ruby_c/extconf.rb +3 -0
  27. data/ext/ruby_c/murmur.c +158 -0
  28. data/ext/ruby_c/murmur.h +9 -0
  29. data/ext/ruby_c/ruby-spark.c +18 -0
  30. data/ext/ruby_java/Digest.java +36 -0
  31. data/ext/ruby_java/Murmur2.java +98 -0
  32. data/ext/ruby_java/RubySparkExtService.java +28 -0
  33. data/ext/ruby_java/extconf.rb +3 -0
  34. data/ext/spark/build.sbt +73 -0
  35. data/ext/spark/project/plugins.sbt +9 -0
  36. data/ext/spark/sbt/sbt +34 -0
  37. data/ext/spark/src/main/scala/Exec.scala +91 -0
  38. data/ext/spark/src/main/scala/MLLibAPI.scala +4 -0
  39. data/ext/spark/src/main/scala/Marshal.scala +52 -0
  40. data/ext/spark/src/main/scala/MarshalDump.scala +113 -0
  41. data/ext/spark/src/main/scala/MarshalLoad.scala +220 -0
  42. data/ext/spark/src/main/scala/RubyAccumulatorParam.scala +69 -0
  43. data/ext/spark/src/main/scala/RubyBroadcast.scala +13 -0
  44. data/ext/spark/src/main/scala/RubyConstant.scala +13 -0
  45. data/ext/spark/src/main/scala/RubyMLLibAPI.scala +55 -0
  46. data/ext/spark/src/main/scala/RubyMLLibUtilAPI.scala +21 -0
  47. data/ext/spark/src/main/scala/RubyPage.scala +34 -0
  48. data/ext/spark/src/main/scala/RubyRDD.scala +364 -0
  49. data/ext/spark/src/main/scala/RubySerializer.scala +14 -0
  50. data/ext/spark/src/main/scala/RubyTab.scala +11 -0
  51. data/ext/spark/src/main/scala/RubyUtils.scala +15 -0
  52. data/ext/spark/src/main/scala/RubyWorker.scala +257 -0
  53. data/ext/spark/src/test/scala/MarshalSpec.scala +84 -0
  54. data/lib/ruby-spark.rb +1 -0
  55. data/lib/spark.rb +198 -0
  56. data/lib/spark/accumulator.rb +260 -0
  57. data/lib/spark/broadcast.rb +98 -0
  58. data/lib/spark/build.rb +43 -0
  59. data/lib/spark/cli.rb +169 -0
  60. data/lib/spark/command.rb +86 -0
  61. data/lib/spark/command/base.rb +154 -0
  62. data/lib/spark/command/basic.rb +345 -0
  63. data/lib/spark/command/pair.rb +124 -0
  64. data/lib/spark/command/sort.rb +51 -0
  65. data/lib/spark/command/statistic.rb +144 -0
  66. data/lib/spark/command_builder.rb +141 -0
  67. data/lib/spark/command_validator.rb +34 -0
  68. data/lib/spark/config.rb +244 -0
  69. data/lib/spark/constant.rb +14 -0
  70. data/lib/spark/context.rb +304 -0
  71. data/lib/spark/error.rb +50 -0
  72. data/lib/spark/ext/hash.rb +41 -0
  73. data/lib/spark/ext/integer.rb +25 -0
  74. data/lib/spark/ext/io.rb +57 -0
  75. data/lib/spark/ext/ip_socket.rb +29 -0
  76. data/lib/spark/ext/module.rb +58 -0
  77. data/lib/spark/ext/object.rb +24 -0
  78. data/lib/spark/ext/string.rb +24 -0
  79. data/lib/spark/helper.rb +10 -0
  80. data/lib/spark/helper/logger.rb +40 -0
  81. data/lib/spark/helper/parser.rb +85 -0
  82. data/lib/spark/helper/serialize.rb +71 -0
  83. data/lib/spark/helper/statistic.rb +93 -0
  84. data/lib/spark/helper/system.rb +42 -0
  85. data/lib/spark/java_bridge.rb +19 -0
  86. data/lib/spark/java_bridge/base.rb +203 -0
  87. data/lib/spark/java_bridge/jruby.rb +23 -0
  88. data/lib/spark/java_bridge/rjb.rb +41 -0
  89. data/lib/spark/logger.rb +76 -0
  90. data/lib/spark/mllib.rb +100 -0
  91. data/lib/spark/mllib/classification/common.rb +31 -0
  92. data/lib/spark/mllib/classification/logistic_regression.rb +223 -0
  93. data/lib/spark/mllib/classification/naive_bayes.rb +97 -0
  94. data/lib/spark/mllib/classification/svm.rb +135 -0
  95. data/lib/spark/mllib/clustering/gaussian_mixture.rb +82 -0
  96. data/lib/spark/mllib/clustering/kmeans.rb +118 -0
  97. data/lib/spark/mllib/matrix.rb +120 -0
  98. data/lib/spark/mllib/regression/common.rb +73 -0
  99. data/lib/spark/mllib/regression/labeled_point.rb +41 -0
  100. data/lib/spark/mllib/regression/lasso.rb +100 -0
  101. data/lib/spark/mllib/regression/linear.rb +124 -0
  102. data/lib/spark/mllib/regression/ridge.rb +97 -0
  103. data/lib/spark/mllib/ruby_matrix/matrix_adapter.rb +53 -0
  104. data/lib/spark/mllib/ruby_matrix/vector_adapter.rb +57 -0
  105. data/lib/spark/mllib/stat/distribution.rb +12 -0
  106. data/lib/spark/mllib/vector.rb +185 -0
  107. data/lib/spark/rdd.rb +1328 -0
  108. data/lib/spark/sampler.rb +92 -0
  109. data/lib/spark/serializer.rb +24 -0
  110. data/lib/spark/serializer/base.rb +170 -0
  111. data/lib/spark/serializer/cartesian.rb +37 -0
  112. data/lib/spark/serializer/marshal.rb +19 -0
  113. data/lib/spark/serializer/message_pack.rb +25 -0
  114. data/lib/spark/serializer/oj.rb +25 -0
  115. data/lib/spark/serializer/pair.rb +27 -0
  116. data/lib/spark/serializer/utf8.rb +25 -0
  117. data/lib/spark/sort.rb +189 -0
  118. data/lib/spark/stat_counter.rb +125 -0
  119. data/lib/spark/storage_level.rb +39 -0
  120. data/lib/spark/version.rb +3 -0
  121. data/lib/spark/worker/master.rb +144 -0
  122. data/lib/spark/worker/spark_files.rb +15 -0
  123. data/lib/spark/worker/worker.rb +197 -0
  124. data/ruby-spark.gemspec +36 -0
  125. data/spec/generator.rb +37 -0
  126. data/spec/inputs/lorem_300.txt +316 -0
  127. data/spec/inputs/numbers/1.txt +50 -0
  128. data/spec/inputs/numbers/10.txt +50 -0
  129. data/spec/inputs/numbers/11.txt +50 -0
  130. data/spec/inputs/numbers/12.txt +50 -0
  131. data/spec/inputs/numbers/13.txt +50 -0
  132. data/spec/inputs/numbers/14.txt +50 -0
  133. data/spec/inputs/numbers/15.txt +50 -0
  134. data/spec/inputs/numbers/16.txt +50 -0
  135. data/spec/inputs/numbers/17.txt +50 -0
  136. data/spec/inputs/numbers/18.txt +50 -0
  137. data/spec/inputs/numbers/19.txt +50 -0
  138. data/spec/inputs/numbers/2.txt +50 -0
  139. data/spec/inputs/numbers/20.txt +50 -0
  140. data/spec/inputs/numbers/3.txt +50 -0
  141. data/spec/inputs/numbers/4.txt +50 -0
  142. data/spec/inputs/numbers/5.txt +50 -0
  143. data/spec/inputs/numbers/6.txt +50 -0
  144. data/spec/inputs/numbers/7.txt +50 -0
  145. data/spec/inputs/numbers/8.txt +50 -0
  146. data/spec/inputs/numbers/9.txt +50 -0
  147. data/spec/inputs/numbers_0_100.txt +101 -0
  148. data/spec/inputs/numbers_1_100.txt +100 -0
  149. data/spec/lib/collect_spec.rb +42 -0
  150. data/spec/lib/command_spec.rb +68 -0
  151. data/spec/lib/config_spec.rb +64 -0
  152. data/spec/lib/context_spec.rb +163 -0
  153. data/spec/lib/ext_spec.rb +72 -0
  154. data/spec/lib/external_apps_spec.rb +45 -0
  155. data/spec/lib/filter_spec.rb +80 -0
  156. data/spec/lib/flat_map_spec.rb +100 -0
  157. data/spec/lib/group_spec.rb +109 -0
  158. data/spec/lib/helper_spec.rb +19 -0
  159. data/spec/lib/key_spec.rb +41 -0
  160. data/spec/lib/manipulation_spec.rb +114 -0
  161. data/spec/lib/map_partitions_spec.rb +87 -0
  162. data/spec/lib/map_spec.rb +91 -0
  163. data/spec/lib/mllib/classification_spec.rb +54 -0
  164. data/spec/lib/mllib/clustering_spec.rb +35 -0
  165. data/spec/lib/mllib/matrix_spec.rb +32 -0
  166. data/spec/lib/mllib/regression_spec.rb +116 -0
  167. data/spec/lib/mllib/vector_spec.rb +77 -0
  168. data/spec/lib/reduce_by_key_spec.rb +118 -0
  169. data/spec/lib/reduce_spec.rb +131 -0
  170. data/spec/lib/sample_spec.rb +46 -0
  171. data/spec/lib/serializer_spec.rb +13 -0
  172. data/spec/lib/sort_spec.rb +58 -0
  173. data/spec/lib/statistic_spec.rb +168 -0
  174. data/spec/lib/whole_text_files_spec.rb +33 -0
  175. data/spec/spec_helper.rb +39 -0
  176. metadata +301 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 13c074c360edf1875745bf728b274f5bfb6e0d0a
4
+ data.tar.gz: a59ba09fac91e2e4c0a58aab99dd60ffb2ec5e3f
5
+ SHA512:
6
+ metadata.gz: 2e667dabd55b05100831cf3d0e58044941ce965965de1d6dce9a4e8fa5be843bfac1d57d5b6674ea056b9e9395650b84e3e77a86f0a256a1b1c4bfd8ca257340
7
+ data.tar.gz: 690b88857fa4f841c8c0a5940af75d926285f42a81ff6753337ccc926473a4a3ce510d1c7e9b07ce2ebfd2559c05dd77432cf1dba132f15629669fbf8dfc51b6
data/.gitignore ADDED
@@ -0,0 +1,37 @@
1
+ /.gemtags
2
+ /.tags
3
+ /java/spark.jar
4
+ .jbundler
5
+ target/*
6
+ *.class
7
+ *.jar
8
+ pom.xml
9
+ vendor/*
10
+ *.gem
11
+ *.rbc
12
+ .bundle
13
+ .config
14
+ .yardoc
15
+ Gemfile.lock
16
+ InstalledFiles
17
+ _yardoc
18
+ coverage
19
+ doc/
20
+ lib/bundler/man
21
+ pkg
22
+ rdoc
23
+ spec/reports
24
+ test/tmp
25
+ test/version_tmp
26
+ tmp
27
+ *.bundle
28
+ *.so
29
+ *.o
30
+ *.a
31
+ mkmf.log
32
+ ext/spark/target/*
33
+ ext/spark/project/target/*
34
+ ext/spark/project/project/target/*
35
+ wiki/*
36
+ /benchmark/performance/spark/*
37
+ /benchmark/performance/rspark/*
data/Gemfile ADDED
@@ -0,0 +1,47 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gemspec
4
+
5
+ gem 'sourcify', '0.6.0.rc4'
6
+ gem 'method_source'
7
+ gem 'commander'
8
+ gem 'pry'
9
+ gem 'nio4r'
10
+ gem 'distribution'
11
+
12
+ platform :mri do
13
+ gem 'rjb'
14
+ gem 'msgpack'
15
+ gem 'oj'
16
+ gem 'narray'
17
+ end
18
+
19
+ platform :jruby do
20
+ gem 'msgpack-jruby', require: 'msgpack'
21
+
22
+ # NameError: no constructorfor arguments (org.jruby.RubyFixnum,org.jruby.RubyFixnum,org.jruby.RubyFixnum,org.jruby.RubyFixnum,org.jruby.RubyFixnum,org.jruby.RubyFixnum,org.joda.time.chrono.GJChronology) on Java::OrgJodaTime::DateTime
23
+ # gem 'mdarray'
24
+ end
25
+
26
+ group :stats do
27
+ # gem 'nmatrix'
28
+ # gem 'statsample'
29
+ # gem 'statsample-glm'
30
+ # gem 'statsample-timeseries'
31
+ # gem 'statistics2'
32
+ # gem 'statsample-optimization' # libgsl0-dev
33
+ # gem 'narray'
34
+ # gem 'gsl-nmatrix'
35
+ end
36
+
37
+ group :development do
38
+ gem 'benchmark-ips'
39
+ gem 'rspec'
40
+ gem 'rake-compiler'
41
+ gem 'guard'
42
+ gem 'guard-rspec'
43
+ end
44
+
45
+ group :test do
46
+ gem 'simplecov', require: false
47
+ end
data/Guardfile ADDED
@@ -0,0 +1,5 @@
1
+ guard :rspec, cmd: 'rspec' do
2
+ watch(%r{^spec/.+_spec\.rb$})
3
+ watch(%r{^lib/(.+)\.rb$}) { |m| "spec/lib/#{m[1]}_spec.rb" }
4
+ watch('spec/spec_helper.rb') { "spec" }
5
+ end
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 Ondřej Moravčík
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,185 @@
1
+ # Ruby-Spark
2
+
3
+ Apache Spark™ is a fast and general engine for large-scale data processing.
4
+
5
+ This Gem allows you use Spark functionality on Ruby.
6
+
7
+ > Word count in Spark's Ruby API
8
+
9
+ ```ruby
10
+ file = spark.text_file("hdfs://...")
11
+
12
+ file.flat_map(:split)
13
+ .map(lambda{|word| [word, 1]})
14
+ .reduce_by_key(lambda{|a, b| a+b})
15
+ ```
16
+
17
+ - [Apache Spark](http://spark.apache.org)
18
+ - [Wiki](https://github.com/ondra-m/ruby-spark/wiki)
19
+ - [Ruby-doc](http://www.rubydoc.info/github/ondra-m/ruby-spark)
20
+
21
+ ## Installation
22
+
23
+ Add this line to your application's Gemfile:
24
+
25
+ ```ruby
26
+ gem 'ruby-spark'
27
+ ```
28
+
29
+ And then execute:
30
+
31
+ ```
32
+ $ bundle
33
+ ```
34
+
35
+ Or install it yourself as:
36
+
37
+ ```
38
+ $ gem install ruby-spark
39
+ ```
40
+
41
+ ### Install Apache Spark
42
+
43
+ To install latest supported Spark. First compile native extension:
44
+
45
+ ```
46
+ $ rake compile
47
+ ```
48
+ Then build Spark, [SBT](ext/spark/build.sbt) is used for compiling.
49
+ ```
50
+ $ ruby-spark build
51
+ ```
52
+
53
+ ## Usage
54
+
55
+ You can use Ruby Spark via interactive shell
56
+
57
+ ```
58
+ $ ruby-spark pry
59
+ ```
60
+
61
+ Or on existing project
62
+
63
+ ```ruby
64
+ require 'ruby-spark'
65
+ Spark.start
66
+
67
+ Spark.sc # => context
68
+ ```
69
+
70
+ If you want configure Spark first. See [configurations](https://github.com/ondra-m/ruby-spark/wiki/Configuration) for more details.
71
+
72
+ ```ruby
73
+ require 'ruby-spark'
74
+
75
+ Spark.load_lib(spark_home)
76
+ Spark.config do
77
+ set_app_name "RubySpark"
78
+ set 'spark.ruby.batch_size', 100
79
+ set 'spark.ruby.serializer', 'oj'
80
+ end
81
+ Spark.start
82
+
83
+ Spark.sc # => context
84
+ ```
85
+
86
+ ## Uploading a data
87
+
88
+ Single file
89
+
90
+ ```ruby
91
+ $sc.text_file(FILE, workers_num, custom_options)
92
+ ```
93
+
94
+ All files on directory
95
+
96
+ ```ruby
97
+ $sc.whole_text_files(DIRECTORY, workers_num, custom_options)
98
+ ```
99
+
100
+ Direct
101
+
102
+ ```ruby
103
+ $sc.parallelize([1,2,3,4,5], workers_num, custom_options)
104
+ $sc.parallelize(1..5, workers_num, custom_options)
105
+ ```
106
+
107
+ ### Options
108
+
109
+ <dl>
110
+ <dt>workers_num</dt>
111
+ <dd>
112
+ Min count of works computing this task.<br>
113
+ <i>(This value can be overwriten by spark)</i>
114
+ </dd>
115
+
116
+ <dt>custom_options</dt>
117
+ <dd>
118
+ <b>serializer</b>: name of serializator used for this RDD<br>
119
+ <b>batch_size</b>: see configuration<br>
120
+ <br>
121
+ <i>(Available only for parallelize)</i><br>
122
+ <b>use</b>: <i>direct (upload direct to java)</i>, <i>file (upload throught a file)</i>
123
+ </dd>
124
+ </dl>
125
+
126
+
127
+ ## Examples
128
+
129
+ Sum of numbers
130
+
131
+ ```ruby
132
+ $sc.parallelize(0..10).sum
133
+ # => 55
134
+ ```
135
+
136
+ Words count using methods
137
+
138
+ ```ruby
139
+ rdd = $sc.text_file(PATH)
140
+
141
+ rdd = rdd.flat_map(lambda{|line| line.split})
142
+ .map(lambda{|word| [word, 1]})
143
+ .reduce_by_key(lambda{|a, b| a+b})
144
+
145
+ rdd.collect_as_hash
146
+ ```
147
+
148
+ Estimating pi with a custom serializer
149
+
150
+ ```ruby
151
+ slices = 3
152
+ n = 100000 * slices
153
+
154
+ def map(_)
155
+ x = rand * 2 - 1
156
+ y = rand * 2 - 1
157
+
158
+ if x**2 + y**2 < 1
159
+ return 1
160
+ else
161
+ return 0
162
+ end
163
+ end
164
+
165
+ rdd = Spark.context.parallelize(1..n, slices, serializer: 'oj')
166
+ rdd = rdd.map(method(:map))
167
+
168
+ puts 'Pi is roughly %f' % (4.0 * rdd.sum / n)
169
+ ```
170
+
171
+ Linear regression
172
+
173
+ ```ruby
174
+ Spark::Mllib.import
175
+
176
+ data = [
177
+ LabeledPoint.new(0.0, [0.0]),
178
+ LabeledPoint.new(1.0, [1.0]),
179
+ LabeledPoint.new(3.0, [2.0]),
180
+ LabeledPoint.new(2.0, [3.0])
181
+ ]
182
+ lrm = LinearRegressionWithSGD.train($sc.parallelize(data), initial_weights: [1.0])
183
+
184
+ lrm.predict([0.0])
185
+ ```
data/Rakefile ADDED
@@ -0,0 +1,35 @@
1
+ #-*- mode: ruby -*-
2
+
3
+ require "bundler/gem_tasks"
4
+ require "rspec/core/rake_task"
5
+
6
+ RSpec::Core::RakeTask.new
7
+
8
+ task default: :spec
9
+ task test: :spec
10
+
11
+ def java?
12
+ RUBY_PLATFORM =~ /java/
13
+ end
14
+
15
+ if java?
16
+ require "rake/javaextensiontask"
17
+ Rake::JavaExtensionTask.new("ruby_java") do |ext|
18
+ ext.name = "ruby_spark_ext"
19
+ end
20
+ else
21
+ require "rake/extensiontask"
22
+ Rake::ExtensionTask.new("ruby_c") do |ext|
23
+ ext.name = "ruby_spark_ext"
24
+ end
25
+ end
26
+
27
+
28
+ task :clean do
29
+ Dir['lib/*.{jar,o,so}'].each do |path|
30
+ puts "Deleting #{path} ..."
31
+ File.delete(path)
32
+ end
33
+ FileUtils.rm_rf('./pkg')
34
+ FileUtils.rm_rf('./tmp')
35
+ end
data/TODO.md ADDED
@@ -0,0 +1,7 @@
1
+ - add compress
2
+ - refactor JavaBridge: to_java, from_java
3
+ - add Streaming
4
+ - add SQL
5
+ - autobatch serializer
6
+ - worker informations (time, memory, ...)
7
+ - encoding parameter to context.text_file
@@ -0,0 +1,33 @@
1
+ require 'benchmark'
2
+ require 'benchmark/ips'
3
+
4
+ data = 0..1_000_000
5
+ zero_value = rand(100_000)
6
+ function = Proc.new{|sum, n| sum+n}
7
+
8
+ Benchmark.ips do |r|
9
+ r.report('each') do
10
+ sum = zero_value
11
+ data.each do |n|
12
+ sum += n
13
+ end
14
+ end
15
+
16
+ r.report('reduce') do
17
+ data.reduce(zero_value){|sum, n| sum+n}
18
+ end
19
+
20
+ r.report('each with function') do
21
+ sum = zero_value
22
+ data.each do |n|
23
+ sum = function.call(sum, n)
24
+ end
25
+ end
26
+
27
+ r.report('reduce with function') do
28
+ data.reduce(zero_value, &function)
29
+ end
30
+
31
+ r.compare!
32
+ end
33
+
@@ -0,0 +1,88 @@
1
+ require "benchmark"
2
+
3
+ def bisect_left1(a, x, opts={})
4
+ return nil if a.nil?
5
+ return 0 if a.empty?
6
+
7
+ lo = (opts[:lo] || opts[:low]).to_i
8
+ hi = opts[:hi] || opts[:high] || a.length
9
+
10
+ while lo < hi
11
+ mid = (lo + hi) / 2
12
+ v = a[mid]
13
+ if v < x
14
+ lo = mid + 1
15
+ else
16
+ hi = mid
17
+ end
18
+ end
19
+ return lo
20
+ end
21
+
22
+ def bisect_left2(list, item)
23
+ count = 0
24
+ list.each{|i|
25
+ return count if i >= item
26
+ count += 1
27
+ }
28
+ nil
29
+ end
30
+
31
+ def bisect_left3(list, item, lo = 0, hi = list.size)
32
+ while lo < hi
33
+ i = (lo + hi - 1) >> 1
34
+
35
+ if 0 <= (list[i] <=> item)
36
+ hi = i
37
+ else
38
+ lo = i + 1
39
+ end
40
+ end
41
+ return hi
42
+ end
43
+
44
+ array = Array.new(1000000) { rand(0..1000000) };
45
+ to_find = Array.new(500) { rand(0..10000) };
46
+
47
+ Benchmark.bm(20) do |x|
48
+ x.report("bisect_left1") do
49
+ to_find.each do |item|
50
+ bisect_left1(array, item)
51
+ end
52
+ end
53
+
54
+ x.report("bisect_left2") do
55
+ to_find.each do |item|
56
+ bisect_left2(array, item)
57
+ end
58
+ end
59
+
60
+ x.report("bisect_left3") do
61
+ to_find.each do |item|
62
+ bisect_left3(array, item)
63
+ end
64
+ end
65
+ end
66
+
67
+ array = Array.new(100000) { Array.new(rand(1..10)){(97+rand(26)).chr}.join };
68
+ to_find = Array.new(500) { (97+rand(26)).chr };
69
+
70
+ Benchmark.bm(20) do |x|
71
+ x.report("bisect_left1") do
72
+ to_find.each do |item|
73
+ bisect_left1(array, item)
74
+ end
75
+ end
76
+
77
+ x.report("bisect_left2") do
78
+ to_find.each do |item|
79
+ bisect_left2(array, item)
80
+ end
81
+ end
82
+
83
+ x.report("bisect_left3") do
84
+ to_find.each do |item|
85
+ bisect_left3(array, item)
86
+ end
87
+ end
88
+ end