ruby-spark 1.1.0.1 → 1.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/.travis.yml +15 -0
- data/CHANGELOG.md +8 -0
- data/README.md +184 -57
- data/TODO.md +3 -1
- data/ext/spark/build.sbt +5 -5
- data/ext/spark/src/main/scala/RubyWorker.scala +7 -16
- data/lib/spark.rb +69 -10
- data/lib/spark/accumulator.rb +8 -0
- data/lib/spark/broadcast.rb +7 -0
- data/lib/spark/build.rb +10 -10
- data/lib/spark/cli.rb +68 -76
- data/lib/spark/config.rb +13 -17
- data/lib/spark/context.rb +10 -7
- data/lib/spark/error.rb +4 -0
- data/lib/spark/helper/statistic.rb +5 -1
- data/lib/spark/java_bridge.rb +5 -3
- data/lib/spark/java_bridge/base.rb +15 -15
- data/lib/spark/java_bridge/jruby.rb +3 -1
- data/lib/spark/java_bridge/rjb.rb +2 -0
- data/lib/spark/mllib/classification/logistic_regression.rb +10 -2
- data/lib/spark/mllib/classification/svm.rb +10 -2
- data/lib/spark/mllib/clustering/kmeans.rb +6 -2
- data/lib/spark/mllib/regression/lasso.rb +18 -2
- data/lib/spark/mllib/regression/linear.rb +11 -3
- data/lib/spark/mllib/regression/ridge.rb +18 -2
- data/lib/spark/rdd.rb +11 -2
- data/lib/spark/serializer.rb +1 -1
- data/lib/spark/serializer/auto_batched.rb +7 -0
- data/lib/spark/version.rb +1 -1
- data/ruby-spark.gemspec +4 -5
- data/spec/generator.rb +1 -1
- data/spec/lib/collect_spec.rb +10 -10
- data/spec/lib/config_spec.rb +10 -10
- data/spec/lib/context_spec.rb +116 -115
- data/spec/lib/ext_spec.rb +17 -17
- data/spec/lib/external_apps_spec.rb +1 -1
- data/spec/lib/filter_spec.rb +17 -17
- data/spec/lib/flat_map_spec.rb +22 -19
- data/spec/lib/group_spec.rb +22 -19
- data/spec/lib/helper_spec.rb +60 -12
- data/spec/lib/key_spec.rb +9 -8
- data/spec/lib/manipulation_spec.rb +15 -15
- data/spec/lib/map_partitions_spec.rb +6 -4
- data/spec/lib/map_spec.rb +22 -19
- data/spec/lib/reduce_by_key_spec.rb +19 -19
- data/spec/lib/reduce_spec.rb +22 -20
- data/spec/lib/sample_spec.rb +13 -12
- data/spec/lib/serializer_spec.rb +27 -0
- data/spec/lib/sort_spec.rb +16 -14
- data/spec/lib/statistic_spec.rb +4 -2
- data/spec/lib/whole_text_files_spec.rb +9 -8
- data/spec/spec_helper.rb +3 -3
- metadata +19 -18
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cd863f728212557da03e76f6e98eeed05695ea5d
|
4
|
+
data.tar.gz: 214b2022187727a50badcd1910313550e59aefdf
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 23c0c7b6ab63a2f9c191cddc4836c73cde61722b9e6f3c7e25b090afed7cda2eaff0d8718074ae3337ff5c4bd57e1223dab76f6cf7772b4c7dda3e7ed69d98c6
|
7
|
+
data.tar.gz: 234897b1851614ae1371b3a33417c8d036b00a4551185829b99ef398a110a614ffe1eaeac556c00859a45598bed4219e59eb98ddbffbe0fe2c25c024408b8628
|
data/.gitignore
CHANGED
data/.travis.yml
ADDED
data/CHANGELOG.md
ADDED
data/README.md
CHANGED
@@ -1,8 +1,8 @@
|
|
1
|
-
# Ruby-Spark
|
1
|
+
# Ruby-Spark [![Build Status](https://travis-ci.org/ondra-m/ruby-spark.svg?branch=master)](https://travis-ci.org/ondra-m/ruby-spark)
|
2
2
|
|
3
3
|
Apache Spark™ is a fast and general engine for large-scale data processing.
|
4
4
|
|
5
|
-
This Gem allows
|
5
|
+
This Gem allows the use Spark functionality on Ruby.
|
6
6
|
|
7
7
|
> Word count in Spark's Ruby API
|
8
8
|
|
@@ -16,7 +16,7 @@ file.flat_map(:split)
|
|
16
16
|
|
17
17
|
- [Apache Spark](http://spark.apache.org)
|
18
18
|
- [Wiki](https://github.com/ondra-m/ruby-spark/wiki)
|
19
|
-
- [
|
19
|
+
- [Rubydoc](http://www.rubydoc.info/gems/ruby-spark)
|
20
20
|
|
21
21
|
## Installation
|
22
22
|
|
@@ -24,6 +24,7 @@ file.flat_map(:split)
|
|
24
24
|
|
25
25
|
- Java 7+
|
26
26
|
- Ruby 2+
|
27
|
+
- wget or curl
|
27
28
|
- MRI or JRuby
|
28
29
|
|
29
30
|
Add this line to your application's Gemfile:
|
@@ -48,12 +49,13 @@ Run `rake compile` if you are using gem from local filesystem.
|
|
48
49
|
|
49
50
|
### Build Apache Spark
|
50
51
|
|
51
|
-
This command will download Spark and build extensions for this gem ([SBT](ext/spark/build.sbt) is used for compiling). For more informations check [wiki](https://github.com/ondra-m/ruby-spark/wiki/Installation).
|
52
|
+
This command will download Spark and build extensions for this gem ([SBT](ext/spark/build.sbt) is used for compiling). For more informations check [wiki](https://github.com/ondra-m/ruby-spark/wiki/Installation). Jars will be stored at you HOME directory.
|
52
53
|
|
53
54
|
```
|
54
55
|
$ ruby-spark build
|
55
56
|
```
|
56
57
|
|
58
|
+
|
57
59
|
## Usage
|
58
60
|
|
59
61
|
You can use Ruby Spark via interactive shell (Pry is used)
|
@@ -62,26 +64,13 @@ You can use Ruby Spark via interactive shell (Pry is used)
|
|
62
64
|
$ ruby-spark shell
|
63
65
|
```
|
64
66
|
|
65
|
-
Or on existing project
|
66
|
-
|
67
|
-
```ruby
|
68
|
-
require 'ruby-spark'
|
69
|
-
|
70
|
-
# Create a SparkContext
|
71
|
-
Spark.start
|
72
|
-
|
73
|
-
# Context reference
|
74
|
-
Spark.sc
|
75
|
-
```
|
67
|
+
Or on existing project.
|
76
68
|
|
77
69
|
If you want configure Spark first. See [configurations](https://github.com/ondra-m/ruby-spark/wiki/Configuration) for more details.
|
78
70
|
|
79
71
|
```ruby
|
80
72
|
require 'ruby-spark'
|
81
73
|
|
82
|
-
# Use if you have custom SPARK_HOME
|
83
|
-
Spark.load_lib(spark_home)
|
84
|
-
|
85
74
|
# Configuration
|
86
75
|
Spark.config do
|
87
76
|
set_app_name "RubySpark"
|
@@ -91,17 +80,21 @@ end
|
|
91
80
|
|
92
81
|
# Start Apache Spark
|
93
82
|
Spark.start
|
83
|
+
|
84
|
+
# Context reference
|
85
|
+
Spark.sc
|
94
86
|
```
|
95
87
|
|
96
|
-
Finally, to stop the cluster. On the shell is Spark stopped automatically when
|
88
|
+
Finally, to stop the cluster. On the shell is Spark stopped automatically when environment exit.
|
97
89
|
|
98
90
|
```ruby
|
99
91
|
Spark.stop
|
100
92
|
```
|
93
|
+
After first use, global configuration is created at **~/.ruby-spark.conf**. There can be specified properties for Spark and RubySpark.
|
101
94
|
|
102
95
|
|
103
96
|
|
104
|
-
## Creating RDD (
|
97
|
+
## Creating RDD (a new collection)
|
105
98
|
|
106
99
|
Single text file:
|
107
100
|
|
@@ -115,28 +108,18 @@ All files on directory:
|
|
115
108
|
rdd = sc.whole_text_files(DIRECTORY, workers_num, serializer=nil)
|
116
109
|
```
|
117
110
|
|
118
|
-
Direct uploading structures from ruby
|
111
|
+
Direct uploading structures from ruby:
|
119
112
|
|
120
113
|
```ruby
|
121
114
|
rdd = sc.parallelize([1,2,3,4,5], workers_num, serializer=nil)
|
122
115
|
rdd = sc.parallelize(1..5, workers_num, serializer=nil)
|
123
116
|
```
|
124
117
|
|
125
|
-
|
118
|
+
There is 2 conditions:
|
119
|
+
1. choosen serializer must be able to serialize it
|
120
|
+
2. data must be iterable
|
126
121
|
|
127
|
-
|
128
|
-
<dt>workers_num</dt>
|
129
|
-
<dd>
|
130
|
-
Min count of works computing this task.<br>
|
131
|
-
<i>(This value can be overwriten by spark)</i>
|
132
|
-
</dd>
|
133
|
-
|
134
|
-
<dt>serializer</dt>
|
135
|
-
<dd>
|
136
|
-
Custom serializer.<br>
|
137
|
-
<i>(default: by <b>spark.ruby.serializer</b> options)</i>
|
138
|
-
</dd>
|
139
|
-
</dl>
|
122
|
+
If you do not specified serializer -> default is used (defined from spark.ruby.serializer.* options). [Check this](https://github.com/ondra-m/ruby-spark/wiki/Loading-data#custom-serializer) if you want create custom serializer.
|
140
123
|
|
141
124
|
## Operations
|
142
125
|
|
@@ -145,39 +128,150 @@ All operations can be divided into 2 groups:
|
|
145
128
|
- **Transformations:** append new operation to current RDD and return new
|
146
129
|
- **Actions:** add operation and start calculations
|
147
130
|
|
148
|
-
|
131
|
+
More informations:
|
132
|
+
|
133
|
+
- [Wiki page](https://github.com/ondra-m/ruby-spark/wiki/RDD)
|
134
|
+
- [Rubydoc](http://www.rubydoc.info/github/ondra-m/ruby-spark/master/Spark/RDD)
|
135
|
+
- [rdd.rb](https://github.com/ondra-m/ruby-spark/blob/master/lib/spark/rdd.rb)
|
136
|
+
|
137
|
+
You can also check official Spark documentation. First make sure that method is implemented here.
|
138
|
+
|
139
|
+
- [Transformations](http://spark.apache.org/docs/latest/programming-guide.html#transformations)
|
140
|
+
- [Actions](http://spark.apache.org/docs/latest/programming-guide.html#actions)
|
149
141
|
|
150
142
|
#### Transformations
|
151
143
|
|
152
|
-
|
153
|
-
rdd.map(
|
154
|
-
|
155
|
-
|
156
|
-
rdd.
|
157
|
-
|
158
|
-
|
159
|
-
|
144
|
+
<dl>
|
145
|
+
<dt><code>rdd.map(function)</code></dt>
|
146
|
+
<dd>Return a new RDD by applying a function to all elements of this RDD.</dd>
|
147
|
+
|
148
|
+
<dt><code>rdd.flat_map(function)</code></dt>
|
149
|
+
<dd>Return a new RDD by first applying a function to all elements of this RDD, and then flattening the results.</dd>
|
150
|
+
|
151
|
+
<dt><code>rdd.map_partitions(function)</code></dt>
|
152
|
+
<dd>Return a new RDD by applying a function to each partition of this RDD.</dd>
|
153
|
+
|
154
|
+
<dt><code>rdd.filter(function)</code></dt>
|
155
|
+
<dd>Return a new RDD containing only the elements that satisfy a predicate.</dd>
|
156
|
+
|
157
|
+
<dt><code>rdd.cartesian(other)</code></dt>
|
158
|
+
<dd>Return the Cartesian product of this RDD and another one, that is, the RDD of all pairs of elements `(a, b)` where `a` is in `self` and `b` is in `other`.</dd>
|
159
|
+
|
160
|
+
<dt><code>rdd.intersection(other)</code></dt>
|
161
|
+
<dd>Return the intersection of this RDD and another one. The output will not contain any duplicate elements, even if the input RDDs did.</dd>
|
162
|
+
|
163
|
+
<dt><code>rdd.sample(with_replacement, fraction, seed)</code></dt>
|
164
|
+
<dd>Return a sampled subset of this RDD. Operations are base on Poisson and Uniform distributions.</dd>
|
165
|
+
|
166
|
+
<dt><code>rdd.group_by_key(num_partitions)</code></dt>
|
167
|
+
<dd>Group the values for each key in the RDD into a single sequence.</dd>
|
168
|
+
|
169
|
+
<dt><a href="http://www.rubydoc.info/gems/ruby-spark/Spark/RDD" target="_blank"><code>...many more...</code></a></dt>
|
170
|
+
<dd></dd>
|
171
|
+
</dl>
|
172
|
+
|
160
173
|
|
161
174
|
#### Actions
|
162
175
|
|
163
|
-
|
164
|
-
rdd.count
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
176
|
+
<dl>
|
177
|
+
<dt><code>rdd.take(count)</code></dt>
|
178
|
+
<dd>Take the first num elements of the RDD.</dd>
|
179
|
+
|
180
|
+
<dt><code>rdd.reduce(function)</code></dt>
|
181
|
+
<dd>Reduces the elements of this RDD using the specified lambda or method.</dd>
|
182
|
+
|
183
|
+
<dt><code>rdd.aggregate(zero_value, seq_op, comb_op)</code></dt>
|
184
|
+
<dd>Aggregate the elements of each partition, and then the results for all the partitions, using given combine functions and a neutral “zero value”.</dd>
|
185
|
+
|
186
|
+
<dt><code>rdd.histogram(buckets)</code></dt>
|
187
|
+
<dd>Compute a histogram using the provided buckets.</dd>
|
188
|
+
|
189
|
+
<dt><code>rdd.collect</code></dt>
|
190
|
+
<dd>Return an array that contains all of the elements in this RDD.</dd>
|
191
|
+
|
192
|
+
<dt><a href="http://www.rubydoc.info/gems/ruby-spark/Spark/RDD" target="_blank"><code>...many more...</code></a></dt>
|
193
|
+
<dd></dd>
|
194
|
+
</dl>
|
169
195
|
|
170
196
|
|
171
197
|
## Examples
|
172
198
|
|
173
|
-
|
199
|
+
##### Basic methods
|
174
200
|
|
175
201
|
```ruby
|
176
|
-
|
177
|
-
|
202
|
+
# Every batch will be serialized by Marshal and will have size 10
|
203
|
+
ser = Spark::Serializer.build('batched(marshal, 10)')
|
204
|
+
|
205
|
+
# Range 0..100, 2 workers, custom serializer
|
206
|
+
rdd = Spark.sc.parallelize(0..100, 2, ser)
|
207
|
+
|
208
|
+
|
209
|
+
# Take first 5 items
|
210
|
+
rdd.take(5)
|
211
|
+
# => [0, 1, 2, 3, 4]
|
212
|
+
|
213
|
+
|
214
|
+
# Numbers reducing
|
215
|
+
rdd.reduce(lambda{|sum, x| sum+x})
|
216
|
+
rdd.reduce(:+)
|
217
|
+
rdd.sum
|
218
|
+
# => 5050
|
219
|
+
|
220
|
+
|
221
|
+
# Reducing with zero items
|
222
|
+
seq = lambda{|x,y| x+y}
|
223
|
+
com = lambda{|x,y| x*y}
|
224
|
+
rdd.aggregate(1, seq, com)
|
225
|
+
# 1. Every workers adds numbers
|
226
|
+
# => [1226, 3826]
|
227
|
+
# 2. Results are multiplied
|
228
|
+
# => 4690676
|
229
|
+
|
230
|
+
|
231
|
+
# Statistic method
|
232
|
+
rdd.stats
|
233
|
+
# => StatCounter: (count, mean, max, min, variance,
|
234
|
+
# sample_variance, stdev, sample_stdev)
|
235
|
+
|
236
|
+
|
237
|
+
# Compute a histogram using the provided buckets.
|
238
|
+
rdd.histogram(2)
|
239
|
+
# => [[0.0, 50.0, 100], [50, 51]]
|
240
|
+
|
241
|
+
|
242
|
+
# Mapping
|
243
|
+
rdd.map(lambda {|x| x*2}).collect
|
244
|
+
# => [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, ...]
|
245
|
+
rdd.map(:to_f).collect
|
246
|
+
# => [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, ...]
|
247
|
+
|
248
|
+
|
249
|
+
# Mapping the whole collection
|
250
|
+
rdd.map_partitions(lambda{|part| part.reduce(:+)}).collect
|
251
|
+
# => [1225, 3825]
|
252
|
+
|
253
|
+
|
254
|
+
# Selecting
|
255
|
+
rdd.filter(lambda{|x| x.even?}).collect
|
256
|
+
# => [0, 2, 4, 6, 8, 10, 12, 14, 16, ...]
|
257
|
+
|
258
|
+
|
259
|
+
# Sampling
|
260
|
+
rdd.sample(true, 10).collect
|
261
|
+
# => [3, 36, 40, 54, 58, 82, 86, 95, 98]
|
262
|
+
|
263
|
+
|
264
|
+
# Sampling X items
|
265
|
+
rdd.take_sample(true, 10)
|
266
|
+
# => [53, 87, 71, 74, 18, 75, 55, 94, 46, 32]
|
267
|
+
|
268
|
+
|
269
|
+
# Using external process
|
270
|
+
rdd.pipe('cat', "awk '{print $1*10}'")
|
271
|
+
# => ["0", "10", "20", "30", "40", "50", ...]
|
178
272
|
```
|
179
273
|
|
180
|
-
Words count using methods
|
274
|
+
##### Words count using methods
|
181
275
|
|
182
276
|
```ruby
|
183
277
|
# Content:
|
@@ -198,7 +292,7 @@ rdd = rdd.reduce_by_key(lambda{|a, b| a+b})
|
|
198
292
|
rdd.collect_as_hash
|
199
293
|
```
|
200
294
|
|
201
|
-
Estimating PI with a custom serializer
|
295
|
+
##### Estimating PI with a custom serializer
|
202
296
|
|
203
297
|
```ruby
|
204
298
|
slices = 3
|
@@ -221,7 +315,7 @@ rdd = rdd.map(method(:map))
|
|
221
315
|
puts 'Pi is roughly %f' % (4.0 * rdd.sum / n)
|
222
316
|
```
|
223
317
|
|
224
|
-
Estimating PI
|
318
|
+
##### Estimating PI
|
225
319
|
|
226
320
|
```ruby
|
227
321
|
rdd = sc.parallelize([10_000], 1)
|
@@ -230,7 +324,16 @@ rdd = rdd.map(lambda{|x| BigMath.PI(x)})
|
|
230
324
|
rdd.collect # => #<BigDecimal, '0.31415926...'>
|
231
325
|
```
|
232
326
|
|
233
|
-
|
327
|
+
### Mllib (Machine Learning Library)
|
328
|
+
|
329
|
+
Mllib functions are using Spark's Machine Learning Library. Ruby objects are serialized and deserialized in Java so you cannot use custom classes. Supported are primitive types such as string or integers.
|
330
|
+
|
331
|
+
All supported methods/models:
|
332
|
+
|
333
|
+
- [Rubydoc / Mllib](http://www.rubydoc.info/github/ondra-m/ruby-spark/Spark/Mllib)
|
334
|
+
- [Github / Mllib](https://github.com/ondra-m/ruby-spark/tree/master/lib/spark/mllib)
|
335
|
+
|
336
|
+
##### Linear regression
|
234
337
|
|
235
338
|
```ruby
|
236
339
|
# Import Mllib classes into Object
|
@@ -250,3 +353,27 @@ lrm = LinearRegressionWithSGD.train(sc.parallelize(data), initial_weights: [1.0]
|
|
250
353
|
|
251
354
|
lrm.predict([0.0])
|
252
355
|
```
|
356
|
+
|
357
|
+
##### K-Mean
|
358
|
+
|
359
|
+
```ruby
|
360
|
+
Spark::Mllib.import
|
361
|
+
|
362
|
+
# Dense vectors
|
363
|
+
data = [
|
364
|
+
DenseVector.new([0.0,0.0]),
|
365
|
+
DenseVector.new([1.0,1.0]),
|
366
|
+
DenseVector.new([9.0,8.0]),
|
367
|
+
DenseVector.new([8.0,9.0])
|
368
|
+
]
|
369
|
+
|
370
|
+
model = KMeans.train(sc.parallelize(data), 2)
|
371
|
+
|
372
|
+
model.predict([0.0, 0.0]) == model.predict([1.0, 1.0])
|
373
|
+
# => true
|
374
|
+
model.predict([8.0, 9.0]) == model.predict([9.0, 8.0])
|
375
|
+
# => true
|
376
|
+
```
|
377
|
+
|
378
|
+
## Benchmarks
|
379
|
+
|
data/TODO.md
CHANGED
@@ -3,4 +3,6 @@
|
|
3
3
|
- add SQL
|
4
4
|
- worker informations (time, memory, ...)
|
5
5
|
- killing zombie workers
|
6
|
-
-
|
6
|
+
- add_rb, add_inline_rb to Spark::{Context, RDD}
|
7
|
+
- fix broadcast for cluster
|
8
|
+
- dump to disk if there is memory limit
|
data/ext/spark/build.sbt
CHANGED
@@ -6,15 +6,15 @@ assemblySettings
|
|
6
6
|
val defaultScalaVersion = "2.10.4"
|
7
7
|
val defaultSparkVersion = "1.3.0"
|
8
8
|
val defaultSparkCoreVersion = "2.10"
|
9
|
-
val
|
9
|
+
val defaultTargetDir = "target"
|
10
10
|
val defaultHadoopVersion = "1.0.4"
|
11
11
|
|
12
12
|
// Values
|
13
|
+
val _hadoopVersion = scala.util.Properties.envOrElse("HADOOP_VERSION", defaultHadoopVersion)
|
13
14
|
val _scalaVersion = scala.util.Properties.envOrElse("SCALA_VERSION", defaultScalaVersion)
|
14
15
|
val _sparkVersion = scala.util.Properties.envOrElse("SPARK_VERSION", defaultSparkVersion)
|
15
16
|
val _sparkCoreVersion = scala.util.Properties.envOrElse("SPARK_CORE_VERSION", defaultSparkCoreVersion)
|
16
|
-
val
|
17
|
-
val _hadoopVersion = scala.util.Properties.envOrElse("HADOOP_VERSION", defaultHadoopVersion)
|
17
|
+
val _targetDir = scala.util.Properties.envOrElse("TARGET_DIR", defaultTargetDir)
|
18
18
|
|
19
19
|
// Project settings
|
20
20
|
name := "ruby-spark"
|
@@ -26,8 +26,8 @@ scalaVersion := _scalaVersion
|
|
26
26
|
javacOptions ++= Seq("-source", "1.7", "-target", "1.7")
|
27
27
|
|
28
28
|
// Jar target folder
|
29
|
-
artifactPath in Compile in packageBin := file(s"${
|
30
|
-
outputPath in packageDependency := file(s"${
|
29
|
+
artifactPath in Compile in packageBin := file(s"${_targetDir}/ruby-spark.jar")
|
30
|
+
outputPath in packageDependency := file(s"${_targetDir}/ruby-spark-deps.jar")
|
31
31
|
|
32
32
|
// Protocol buffer support
|
33
33
|
seq(sbtprotobuf.ProtobufPlugin.protobufSettings: _*)
|
@@ -123,22 +123,13 @@ object RubyWorker extends Logging {
|
|
123
123
|
executorLocation = env.conf.get("spark.ruby.driver_home")
|
124
124
|
}
|
125
125
|
else{
|
126
|
-
//
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
executorLocation = homeCommand.run.readLine
|
135
|
-
} catch {
|
136
|
-
case e: java.io.IOException =>
|
137
|
-
throw new SparkException("Ruby-spark gem is not installed.", e)
|
138
|
-
}
|
139
|
-
}
|
140
|
-
else{
|
141
|
-
// Prepare and use gem from uri
|
126
|
+
// Use gem installed on the system
|
127
|
+
try {
|
128
|
+
val homeCommand = (new FileCommand(commandTemplate, "ruby-spark home", env, getEnvVars(env))).run
|
129
|
+
executorLocation = homeCommand.readLine
|
130
|
+
} catch {
|
131
|
+
case e: Exception =>
|
132
|
+
throw new SparkException("Ruby-spark gem is not installed.", e)
|
142
133
|
}
|
143
134
|
}
|
144
135
|
|