ruby-spark 1.1.0.1 → 1.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/.travis.yml +15 -0
- data/CHANGELOG.md +8 -0
- data/README.md +184 -57
- data/TODO.md +3 -1
- data/ext/spark/build.sbt +5 -5
- data/ext/spark/src/main/scala/RubyWorker.scala +7 -16
- data/lib/spark.rb +69 -10
- data/lib/spark/accumulator.rb +8 -0
- data/lib/spark/broadcast.rb +7 -0
- data/lib/spark/build.rb +10 -10
- data/lib/spark/cli.rb +68 -76
- data/lib/spark/config.rb +13 -17
- data/lib/spark/context.rb +10 -7
- data/lib/spark/error.rb +4 -0
- data/lib/spark/helper/statistic.rb +5 -1
- data/lib/spark/java_bridge.rb +5 -3
- data/lib/spark/java_bridge/base.rb +15 -15
- data/lib/spark/java_bridge/jruby.rb +3 -1
- data/lib/spark/java_bridge/rjb.rb +2 -0
- data/lib/spark/mllib/classification/logistic_regression.rb +10 -2
- data/lib/spark/mllib/classification/svm.rb +10 -2
- data/lib/spark/mllib/clustering/kmeans.rb +6 -2
- data/lib/spark/mllib/regression/lasso.rb +18 -2
- data/lib/spark/mllib/regression/linear.rb +11 -3
- data/lib/spark/mllib/regression/ridge.rb +18 -2
- data/lib/spark/rdd.rb +11 -2
- data/lib/spark/serializer.rb +1 -1
- data/lib/spark/serializer/auto_batched.rb +7 -0
- data/lib/spark/version.rb +1 -1
- data/ruby-spark.gemspec +4 -5
- data/spec/generator.rb +1 -1
- data/spec/lib/collect_spec.rb +10 -10
- data/spec/lib/config_spec.rb +10 -10
- data/spec/lib/context_spec.rb +116 -115
- data/spec/lib/ext_spec.rb +17 -17
- data/spec/lib/external_apps_spec.rb +1 -1
- data/spec/lib/filter_spec.rb +17 -17
- data/spec/lib/flat_map_spec.rb +22 -19
- data/spec/lib/group_spec.rb +22 -19
- data/spec/lib/helper_spec.rb +60 -12
- data/spec/lib/key_spec.rb +9 -8
- data/spec/lib/manipulation_spec.rb +15 -15
- data/spec/lib/map_partitions_spec.rb +6 -4
- data/spec/lib/map_spec.rb +22 -19
- data/spec/lib/reduce_by_key_spec.rb +19 -19
- data/spec/lib/reduce_spec.rb +22 -20
- data/spec/lib/sample_spec.rb +13 -12
- data/spec/lib/serializer_spec.rb +27 -0
- data/spec/lib/sort_spec.rb +16 -14
- data/spec/lib/statistic_spec.rb +4 -2
- data/spec/lib/whole_text_files_spec.rb +9 -8
- data/spec/spec_helper.rb +3 -3
- metadata +19 -18
data/spec/lib/flat_map_spec.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
|
-
require
|
1
|
+
require 'spec_helper'
|
2
2
|
|
3
|
-
RSpec
|
3
|
+
RSpec.shared_examples 'a flat mapping' do |workers|
|
4
4
|
it "with #{workers || 'default'} worker" do
|
5
5
|
rdd2 = rdd(workers).map(func1)
|
6
6
|
result = numbers.flat_map(&func1)
|
@@ -24,7 +24,7 @@ RSpec::shared_examples "a flat mapping" do |workers|
|
|
24
24
|
end
|
25
25
|
end
|
26
26
|
|
27
|
-
RSpec
|
27
|
+
RSpec.shared_examples 'a flat mapping values' do |workers|
|
28
28
|
it "with #{workers || 'default'} worker" do
|
29
29
|
rdd2 = rdd(workers).flat_map_values(func1)
|
30
30
|
result = []
|
@@ -50,25 +50,26 @@ RSpec::shared_examples "a flat mapping values" do |workers|
|
|
50
50
|
end
|
51
51
|
end
|
52
52
|
|
53
|
-
RSpec
|
53
|
+
RSpec.describe 'Spark::RDD' do
|
54
54
|
let(:func1) { lambda{|x| x*2} }
|
55
55
|
let(:func2) { lambda{|x| [x*3, 1, 1]} }
|
56
56
|
let(:func3) { lambda{|x| [x*4, 2, 2]} }
|
57
57
|
|
58
|
-
context
|
59
|
-
context
|
58
|
+
context 'throught parallelize' do
|
59
|
+
context '.flat_map' do
|
60
60
|
let(:numbers) { Generator.numbers_with_zero }
|
61
61
|
|
62
62
|
def rdd(workers)
|
63
63
|
$sc.parallelize(numbers, workers)
|
64
64
|
end
|
65
65
|
|
66
|
-
it_behaves_like
|
67
|
-
it_behaves_like
|
68
|
-
it_behaves_like
|
66
|
+
it_behaves_like 'a flat mapping', 1
|
67
|
+
it_behaves_like 'a flat mapping', 2
|
68
|
+
# it_behaves_like 'a flat mapping', nil
|
69
|
+
# it_behaves_like 'a flat mapping', rand(2..10)
|
69
70
|
end
|
70
71
|
|
71
|
-
context
|
72
|
+
context '.flat_map_values' do
|
72
73
|
let(:func1) { lambda{|x| x*2} }
|
73
74
|
let(:func2) { lambda{|x| [x.first]} }
|
74
75
|
let(:hash_with_values) { Generator.hash_with_values }
|
@@ -77,24 +78,26 @@ RSpec::describe "Spark::RDD" do
|
|
77
78
|
$sc.parallelize(hash_with_values, workers)
|
78
79
|
end
|
79
80
|
|
80
|
-
it_behaves_like
|
81
|
-
it_behaves_like
|
82
|
-
it_behaves_like
|
81
|
+
it_behaves_like 'a flat mapping values', 1
|
82
|
+
it_behaves_like 'a flat mapping values', 2
|
83
|
+
# it_behaves_like 'a flat mapping values', nil
|
84
|
+
# it_behaves_like 'a flat mapping values', rand(2..10)
|
83
85
|
end
|
84
86
|
end
|
85
87
|
|
86
|
-
context
|
87
|
-
context
|
88
|
-
let(:file) { File.join(
|
88
|
+
context 'throught text_file' do
|
89
|
+
context '.flat_map' do
|
90
|
+
let(:file) { File.join('spec', 'inputs', 'numbers_0_100.txt') }
|
89
91
|
let(:numbers) { File.readlines(file).map(&:strip) }
|
90
92
|
|
91
93
|
def rdd(workers)
|
92
94
|
$sc.text_file(file, workers)
|
93
95
|
end
|
94
96
|
|
95
|
-
it_behaves_like
|
96
|
-
it_behaves_like
|
97
|
-
it_behaves_like
|
97
|
+
it_behaves_like 'a flat mapping', 1
|
98
|
+
it_behaves_like 'a flat mapping', 2
|
99
|
+
# it_behaves_like 'a flat mapping', nil
|
100
|
+
# it_behaves_like 'a flat mapping', rand(2..10)
|
98
101
|
end
|
99
102
|
end
|
100
103
|
end
|
data/spec/lib/group_spec.rb
CHANGED
@@ -1,26 +1,26 @@
|
|
1
|
-
require
|
1
|
+
require 'spec_helper'
|
2
2
|
|
3
|
-
RSpec
|
3
|
+
RSpec.shared_examples 'a groupping by key' do |workers|
|
4
4
|
it "with #{workers || 'default'} worker" do
|
5
5
|
expect(rdd_result(workers)).to eql(result)
|
6
6
|
end
|
7
7
|
end
|
8
8
|
|
9
|
-
RSpec
|
9
|
+
RSpec.shared_examples 'a cogroupping by key' do |workers|
|
10
10
|
context "with #{workers || 'default'} worker" do
|
11
|
-
it
|
11
|
+
it '.group_with' do
|
12
12
|
rdd = rdd_1(workers).group_with(rdd_2(workers))
|
13
13
|
expect(rdd.collect_as_hash).to eql(result_12)
|
14
14
|
end
|
15
15
|
|
16
|
-
it
|
16
|
+
it '.cogroup' do
|
17
17
|
rdd = rdd_1(workers).cogroup(rdd_2(workers), rdd_3(workers))
|
18
18
|
expect(rdd.collect_as_hash).to eql(result_123)
|
19
19
|
end
|
20
20
|
end
|
21
21
|
end
|
22
22
|
|
23
|
-
RSpec
|
23
|
+
RSpec.shared_examples 'a groupping by' do |workers|
|
24
24
|
it "with #{workers || 'default'} worker" do
|
25
25
|
rdd = rdd_numbers(workers)
|
26
26
|
rdd = rdd.group_by(key_function1)
|
@@ -34,7 +34,7 @@ RSpec::shared_examples "a groupping by" do |workers|
|
|
34
34
|
end
|
35
35
|
end
|
36
36
|
|
37
|
-
RSpec
|
37
|
+
RSpec.describe 'Spark::RDD' do
|
38
38
|
|
39
39
|
def make_result(*hashes)
|
40
40
|
_result = {}
|
@@ -47,7 +47,7 @@ RSpec::describe "Spark::RDD" do
|
|
47
47
|
_result
|
48
48
|
end
|
49
49
|
|
50
|
-
context
|
50
|
+
context '.group_by_key' do
|
51
51
|
let(:hash) { Generator.hash }
|
52
52
|
let(:result) { make_result(hash) }
|
53
53
|
|
@@ -56,12 +56,13 @@ RSpec::describe "Spark::RDD" do
|
|
56
56
|
rdd.group_by_key.collect_as_hash
|
57
57
|
end
|
58
58
|
|
59
|
-
it_behaves_like
|
60
|
-
it_behaves_like
|
61
|
-
it_behaves_like
|
59
|
+
it_behaves_like 'a groupping by key', 1
|
60
|
+
it_behaves_like 'a groupping by key', 2
|
61
|
+
# it_behaves_like 'a groupping by key', nil
|
62
|
+
# it_behaves_like 'a groupping by key', rand(2..10)
|
62
63
|
end
|
63
64
|
|
64
|
-
context
|
65
|
+
context 'cogroup' do
|
65
66
|
let(:hash1) { Generator.hash }
|
66
67
|
let(:hash2) { Generator.hash }
|
67
68
|
let(:hash3) { Generator.hash }
|
@@ -81,12 +82,13 @@ RSpec::describe "Spark::RDD" do
|
|
81
82
|
$sc.parallelize(hash3)
|
82
83
|
end
|
83
84
|
|
84
|
-
it_behaves_like
|
85
|
-
it_behaves_like
|
86
|
-
it_behaves_like
|
85
|
+
it_behaves_like 'a cogroupping by key', 1
|
86
|
+
it_behaves_like 'a cogroupping by key', 2
|
87
|
+
# it_behaves_like 'a cogroupping by key', nil
|
88
|
+
# it_behaves_like 'a cogroupping by key', rand(2..10)
|
87
89
|
end
|
88
90
|
|
89
|
-
context
|
91
|
+
context 'group_by' do
|
90
92
|
let(:key_function1) { lambda{|x| x%2} }
|
91
93
|
let(:key_function2) { lambda{|x| x.size} }
|
92
94
|
|
@@ -101,9 +103,10 @@ RSpec::describe "Spark::RDD" do
|
|
101
103
|
$sc.parallelize(words)
|
102
104
|
end
|
103
105
|
|
104
|
-
it_behaves_like
|
105
|
-
it_behaves_like
|
106
|
-
it_behaves_like
|
106
|
+
it_behaves_like 'a groupping by', 1
|
107
|
+
it_behaves_like 'a groupping by', 2
|
108
|
+
# it_behaves_like 'a groupping by', nil
|
109
|
+
# it_behaves_like 'a groupping by', rand(2..10)
|
107
110
|
end
|
108
111
|
|
109
112
|
end
|
data/spec/lib/helper_spec.rb
CHANGED
@@ -1,19 +1,67 @@
|
|
1
|
-
require
|
1
|
+
require 'spec_helper'
|
2
2
|
|
3
|
-
RSpec
|
3
|
+
RSpec.configure do |c|
|
4
4
|
c.include Spark::Helper::Parser
|
5
|
+
c.include Spark::Helper::Statistic
|
5
6
|
end
|
6
7
|
|
7
|
-
RSpec
|
8
|
-
|
9
|
-
it
|
10
|
-
expect(to_memory_size(
|
11
|
-
expect(to_memory_size(
|
12
|
-
expect(to_memory_size(
|
13
|
-
expect(to_memory_size(
|
14
|
-
expect(to_memory_size(
|
15
|
-
expect(to_memory_size(
|
16
|
-
expect(to_memory_size(
|
8
|
+
RSpec.describe Spark::Helper do
|
9
|
+
|
10
|
+
it 'memory size' do
|
11
|
+
expect(to_memory_size('512mb')).to eql(524288.0)
|
12
|
+
expect(to_memory_size('1586 mb')).to eql(1624064.0)
|
13
|
+
expect(to_memory_size('3 MB')).to eql(3072.0)
|
14
|
+
expect(to_memory_size('9gb')).to eql(9437184.0)
|
15
|
+
expect(to_memory_size('9gb', 'mb')).to eql(9216.0)
|
16
|
+
expect(to_memory_size('9mb', 'gb')).to eql(0.01)
|
17
|
+
expect(to_memory_size('6652548796kb', 'mb')).to eql(6496629.68)
|
18
|
+
end
|
19
|
+
|
20
|
+
context 'statistic' do
|
21
|
+
it 'compute_fraction' do
|
22
|
+
expect(compute_fraction(1, 1000, true)).to be_within(0.001).of(0.013)
|
23
|
+
expect(compute_fraction(2, 1000, true)).to be_within(0.001).of(0.018)
|
24
|
+
expect(compute_fraction(3, 1000, true)).to be_within(0.001).of(0.023)
|
25
|
+
expect(compute_fraction(4, 1000, true)).to be_within(0.001).of(0.028)
|
26
|
+
expect(compute_fraction(5, 1000, true)).to be_within(0.001).of(0.031)
|
27
|
+
|
28
|
+
expect(compute_fraction(1, 1000, false)).to be_within(0.001).of(0.0249)
|
29
|
+
expect(compute_fraction(2, 1000, false)).to be_within(0.001).of(0.0268)
|
30
|
+
expect(compute_fraction(3, 1000, false)).to be_within(0.001).of(0.0287)
|
31
|
+
expect(compute_fraction(4, 1000, false)).to be_within(0.001).of(0.0305)
|
32
|
+
expect(compute_fraction(5, 1000, false)).to be_within(0.001).of(0.0322)
|
33
|
+
end
|
34
|
+
|
35
|
+
it 'bisect_right' do
|
36
|
+
data = [10, 20, 30, 40, 50, 60, 70, 80, 90]
|
37
|
+
|
38
|
+
expect(bisect_right(data, 0)).to eq(0)
|
39
|
+
expect(bisect_right(data, 1)).to eq(0)
|
40
|
+
expect(bisect_right(data, 1, 2)).to eq(2)
|
41
|
+
expect(bisect_right(data, 1, 3)).to eq(3)
|
42
|
+
expect(bisect_right(data, 1, 4)).to eq(4)
|
43
|
+
expect(bisect_right(data, 9)).to eq(0)
|
44
|
+
expect(bisect_right(data, 10)).to eq(1)
|
45
|
+
expect(bisect_right(data, 40)).to eq(4)
|
46
|
+
expect(bisect_right(data, 42)).to eq(4)
|
47
|
+
expect(bisect_right(data, 72)).to eq(7)
|
48
|
+
expect(bisect_right(data, 80, 4)).to eq(8)
|
49
|
+
expect(bisect_right(data, 80, 5)).to eq(8)
|
50
|
+
expect(bisect_right(data, 80, 8)).to eq(8)
|
51
|
+
expect(bisect_right(data, 80, 9)).to eq(9)
|
52
|
+
expect(bisect_right(data, 200)).to eq(9)
|
53
|
+
end
|
54
|
+
|
55
|
+
it 'determine_bounds' do
|
56
|
+
data = [10, 20, 30, 40, 50, 60, 70, 80, 90]
|
57
|
+
|
58
|
+
expect(determine_bounds(data, 0)).to eq([])
|
59
|
+
expect(determine_bounds(data, 1)).to eq([])
|
60
|
+
expect(determine_bounds(data, 2)).to eq([50])
|
61
|
+
expect(determine_bounds(data, 3)).to eq([40, 70])
|
62
|
+
expect(determine_bounds(data, 4)).to eq([30, 50, 70])
|
63
|
+
expect(determine_bounds(data, 20)).to eq(data)
|
64
|
+
end
|
17
65
|
end
|
18
66
|
|
19
67
|
end
|
data/spec/lib/key_spec.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
|
-
require
|
1
|
+
require 'spec_helper'
|
2
2
|
|
3
|
-
RSpec
|
3
|
+
RSpec.shared_examples 'a keying by' do |workers|
|
4
4
|
it "with #{workers || 'default'} worker" do
|
5
5
|
rdd = rdd_numbers(workers)
|
6
6
|
rdd = rdd.key_by(key_function1)
|
@@ -16,11 +16,11 @@ RSpec::shared_examples "a keying by" do |workers|
|
|
16
16
|
end
|
17
17
|
end
|
18
18
|
|
19
|
-
RSpec
|
19
|
+
RSpec.describe 'Spark::RDD' do
|
20
20
|
|
21
|
-
context
|
21
|
+
context 'key_by' do
|
22
22
|
let(:key_function1) { lambda{|x| x.even?} }
|
23
|
-
let(:key_function2) { lambda{|x| x.include?(
|
23
|
+
let(:key_function2) { lambda{|x| x.include?('a')} }
|
24
24
|
|
25
25
|
let(:numbers) { Generator.numbers }
|
26
26
|
let(:words) { Generator.words }
|
@@ -33,9 +33,10 @@ RSpec::describe "Spark::RDD" do
|
|
33
33
|
$sc.parallelize(words)
|
34
34
|
end
|
35
35
|
|
36
|
-
it_behaves_like
|
37
|
-
it_behaves_like
|
38
|
-
it_behaves_like
|
36
|
+
it_behaves_like 'a keying by', 1
|
37
|
+
it_behaves_like 'a keying by', 2
|
38
|
+
# it_behaves_like 'a keying by', nil
|
39
|
+
# it_behaves_like 'a keying by', rand(2..10)
|
39
40
|
end
|
40
41
|
|
41
42
|
end
|
@@ -1,10 +1,10 @@
|
|
1
|
-
require
|
1
|
+
require 'spec_helper'
|
2
2
|
|
3
|
-
RSpec
|
3
|
+
RSpec.describe 'Spark::RDD' do
|
4
4
|
let(:numbers) { 1..100 }
|
5
5
|
let(:rand_numbers) { Generator.numbers }
|
6
6
|
|
7
|
-
it
|
7
|
+
it '.glom' do
|
8
8
|
rdd = $sc.parallelize(numbers, 1).glom
|
9
9
|
expect(rdd.collect).to eql([numbers.to_a])
|
10
10
|
|
@@ -14,7 +14,7 @@ RSpec::describe "Spark::RDD" do
|
|
14
14
|
expect(rdd.collect).to eql(numbers.each_slice(20).to_a)
|
15
15
|
end
|
16
16
|
|
17
|
-
it
|
17
|
+
it '.coalesce' do
|
18
18
|
rdd = $sc.parallelize(numbers, 5)
|
19
19
|
|
20
20
|
rdd2 = rdd.glom
|
@@ -24,7 +24,7 @@ RSpec::describe "Spark::RDD" do
|
|
24
24
|
expect(rdd3.collect.size).to eql(4)
|
25
25
|
end
|
26
26
|
|
27
|
-
it
|
27
|
+
it '.distinct' do
|
28
28
|
rdd = $sc.parallelize(rand_numbers, 5)
|
29
29
|
rdd = rdd.distinct
|
30
30
|
expect(rdd.collect.sort).to eql(rand_numbers.uniq.sort)
|
@@ -35,22 +35,22 @@ RSpec::describe "Spark::RDD" do
|
|
35
35
|
expect(rdd.collect).to eql([1])
|
36
36
|
end
|
37
37
|
|
38
|
-
context
|
39
|
-
it
|
38
|
+
context '.union' do
|
39
|
+
it 'classic method' do
|
40
40
|
rdd = $sc.parallelize(numbers, 5)
|
41
41
|
rdd = rdd.union(rdd).collect
|
42
42
|
|
43
43
|
expect(rdd.collect.sort).to eql((numbers.to_a+numbers.to_a).sort)
|
44
44
|
end
|
45
45
|
|
46
|
-
it
|
46
|
+
it 'with a different serializer' do
|
47
47
|
rdd1 = $sc.parallelize(numbers, 1, Spark::Serializer.build{ __batched__(__marshal__) })
|
48
48
|
rdd2 = $sc.parallelize(numbers, 1, Spark::Serializer.build{ __batched__(__oj__) })
|
49
49
|
|
50
50
|
expect { rdd1.union(rdd2).collect }.to_not raise_error
|
51
51
|
end
|
52
52
|
|
53
|
-
it
|
53
|
+
it 'as operator' do
|
54
54
|
rdd1 = $sc.parallelize(numbers)
|
55
55
|
rdd2 = $sc.parallelize(rand_numbers)
|
56
56
|
|
@@ -58,7 +58,7 @@ RSpec::describe "Spark::RDD" do
|
|
58
58
|
end
|
59
59
|
end
|
60
60
|
|
61
|
-
it
|
61
|
+
it '.compact' do
|
62
62
|
data = [nil, nil , 0, 0, 1, 2, nil, 6]
|
63
63
|
result = data.compact
|
64
64
|
ser = Spark::Serializer.build { __batched__(__marshal__, 1) }
|
@@ -73,7 +73,7 @@ RSpec::describe "Spark::RDD" do
|
|
73
73
|
expect(rdd.collect).to eql(result)
|
74
74
|
end
|
75
75
|
|
76
|
-
it
|
76
|
+
it '.intersection' do
|
77
77
|
data1 = [0,1,2,3,4,5,6,7,8,9,10]
|
78
78
|
data2 = [5,6,7,8,9,10,11,12,13,14,15]
|
79
79
|
|
@@ -83,19 +83,19 @@ RSpec::describe "Spark::RDD" do
|
|
83
83
|
expect(rdd1.intersection(rdd2).collect.sort).to eql(data1 & data2)
|
84
84
|
end
|
85
85
|
|
86
|
-
it
|
86
|
+
it '.shuffle' do
|
87
87
|
data = Generator.numbers
|
88
88
|
rdd = $sc.parallelize(data)
|
89
89
|
|
90
90
|
expect(rdd.shuffle.collect).to_not eql(data)
|
91
91
|
end
|
92
92
|
|
93
|
-
context
|
93
|
+
context '.cartesian' do
|
94
94
|
let(:data1) { Generator.numbers(100) }
|
95
95
|
let(:data2) { Generator.numbers(100) }
|
96
96
|
let(:result) { data1.product(data2).map(&:to_s).sort }
|
97
97
|
|
98
|
-
it
|
98
|
+
it 'unbatched' do
|
99
99
|
ser = Spark::Serializer.build { __batched__(__marshal__, 1) }
|
100
100
|
|
101
101
|
rdd1 = $sc.parallelize(data1, 2, ser)
|
@@ -106,7 +106,7 @@ RSpec::describe "Spark::RDD" do
|
|
106
106
|
expect(rdd.collect.sort).to eql(result)
|
107
107
|
end
|
108
108
|
|
109
|
-
it
|
109
|
+
it 'batched' do
|
110
110
|
ser1 = Spark::Serializer.build { __batched__(__marshal__, rand(4..10)) }
|
111
111
|
ser2 = Spark::Serializer.build { __batched__(__marshal__, rand(4..10)) }
|
112
112
|
|
@@ -67,9 +67,10 @@ RSpec::describe 'Spark::RDD.map_partitions(_with_index)' do
|
|
67
67
|
$sc.parallelize(numbers, workers)
|
68
68
|
end
|
69
69
|
|
70
|
-
it_behaves_like 'a map partitions', nil
|
71
70
|
it_behaves_like 'a map partitions', 1
|
72
|
-
it_behaves_like 'a map partitions',
|
71
|
+
it_behaves_like 'a map partitions', 2
|
72
|
+
# it_behaves_like 'a map partitions', nil
|
73
|
+
# it_behaves_like 'a map partitions', rand(2..10)
|
73
74
|
end
|
74
75
|
|
75
76
|
context 'throught text_file' do
|
@@ -80,8 +81,9 @@ RSpec::describe 'Spark::RDD.map_partitions(_with_index)' do
|
|
80
81
|
$sc.text_file(file, workers)
|
81
82
|
end
|
82
83
|
|
83
|
-
it_behaves_like 'a map partitions', nil
|
84
84
|
it_behaves_like 'a map partitions', 1
|
85
|
-
it_behaves_like 'a map partitions',
|
85
|
+
it_behaves_like 'a map partitions', 2
|
86
|
+
# it_behaves_like 'a map partitions', nil
|
87
|
+
# it_behaves_like 'a map partitions', rand(2..10)
|
86
88
|
end
|
87
89
|
end
|
data/spec/lib/map_spec.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
|
-
require
|
1
|
+
require 'spec_helper'
|
2
2
|
|
3
|
-
RSpec
|
3
|
+
RSpec.shared_examples 'a mapping' do |workers|
|
4
4
|
it "with #{workers || 'default'} worker" do
|
5
5
|
rdd2 = rdd(workers).map(func1)
|
6
6
|
result = numbers.map(&func1)
|
@@ -24,7 +24,7 @@ RSpec::shared_examples "a mapping" do |workers|
|
|
24
24
|
end
|
25
25
|
end
|
26
26
|
|
27
|
-
RSpec
|
27
|
+
RSpec.shared_examples 'a mapping values' do |workers|
|
28
28
|
it "with #{workers || 'default'} worker" do
|
29
29
|
rdd2 = rdd(workers).map_values(func1)
|
30
30
|
result = hash.map{|key, value| [key, func1.call(value)]}
|
@@ -43,49 +43,52 @@ RSpec::shared_examples "a mapping values" do |workers|
|
|
43
43
|
end
|
44
44
|
end
|
45
45
|
|
46
|
-
RSpec
|
46
|
+
RSpec.describe 'Spark::RDD' do
|
47
47
|
let(:func1) { lambda{|x| x*2} }
|
48
48
|
let(:func2) { lambda{|x| x*3} }
|
49
49
|
let(:func3) { lambda{|x| x*4} }
|
50
50
|
|
51
|
-
context
|
52
|
-
context
|
51
|
+
context 'throught parallelize' do
|
52
|
+
context '.map' do
|
53
53
|
let(:numbers) { Generator.numbers }
|
54
54
|
|
55
55
|
def rdd(workers)
|
56
56
|
$sc.parallelize(numbers, workers)
|
57
57
|
end
|
58
58
|
|
59
|
-
it_behaves_like
|
60
|
-
it_behaves_like
|
61
|
-
it_behaves_like
|
59
|
+
it_behaves_like 'a mapping', 1
|
60
|
+
it_behaves_like 'a mapping', 2
|
61
|
+
# it_behaves_like 'a mapping', nil
|
62
|
+
# it_behaves_like 'a mapping', rand(2..10)
|
62
63
|
end
|
63
64
|
|
64
|
-
context
|
65
|
+
context '.map_values' do
|
65
66
|
let!(:hash) { Generator.hash }
|
66
67
|
|
67
68
|
def rdd(workers)
|
68
69
|
$sc.parallelize(hash, workers)
|
69
70
|
end
|
70
71
|
|
71
|
-
it_behaves_like
|
72
|
-
it_behaves_like
|
73
|
-
it_behaves_like
|
72
|
+
it_behaves_like 'a mapping values', 1
|
73
|
+
it_behaves_like 'a mapping values', 2
|
74
|
+
# it_behaves_like 'a mapping values', nil
|
75
|
+
# it_behaves_like 'a mapping values', rand(2..10)
|
74
76
|
end
|
75
77
|
end
|
76
78
|
|
77
|
-
context
|
78
|
-
context
|
79
|
-
let(:file) { File.join(
|
79
|
+
context 'throught text_file' do
|
80
|
+
context '.map' do
|
81
|
+
let(:file) { File.join('spec', 'inputs', 'numbers_0_100.txt') }
|
80
82
|
let(:numbers) { File.readlines(file).map(&:strip) }
|
81
83
|
|
82
84
|
def rdd(workers)
|
83
85
|
$sc.text_file(file, workers)
|
84
86
|
end
|
85
87
|
|
86
|
-
it_behaves_like
|
87
|
-
it_behaves_like
|
88
|
-
it_behaves_like
|
88
|
+
it_behaves_like 'a mapping', 1
|
89
|
+
it_behaves_like 'a mapping', 2
|
90
|
+
# it_behaves_like 'a mapping', nil
|
91
|
+
# it_behaves_like 'a mapping', rand(2..10)
|
89
92
|
end
|
90
93
|
end
|
91
94
|
end
|