ruby-spark 1.1.0.1 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +1 -0
  3. data/.travis.yml +15 -0
  4. data/CHANGELOG.md +8 -0
  5. data/README.md +184 -57
  6. data/TODO.md +3 -1
  7. data/ext/spark/build.sbt +5 -5
  8. data/ext/spark/src/main/scala/RubyWorker.scala +7 -16
  9. data/lib/spark.rb +69 -10
  10. data/lib/spark/accumulator.rb +8 -0
  11. data/lib/spark/broadcast.rb +7 -0
  12. data/lib/spark/build.rb +10 -10
  13. data/lib/spark/cli.rb +68 -76
  14. data/lib/spark/config.rb +13 -17
  15. data/lib/spark/context.rb +10 -7
  16. data/lib/spark/error.rb +4 -0
  17. data/lib/spark/helper/statistic.rb +5 -1
  18. data/lib/spark/java_bridge.rb +5 -3
  19. data/lib/spark/java_bridge/base.rb +15 -15
  20. data/lib/spark/java_bridge/jruby.rb +3 -1
  21. data/lib/spark/java_bridge/rjb.rb +2 -0
  22. data/lib/spark/mllib/classification/logistic_regression.rb +10 -2
  23. data/lib/spark/mllib/classification/svm.rb +10 -2
  24. data/lib/spark/mllib/clustering/kmeans.rb +6 -2
  25. data/lib/spark/mllib/regression/lasso.rb +18 -2
  26. data/lib/spark/mllib/regression/linear.rb +11 -3
  27. data/lib/spark/mllib/regression/ridge.rb +18 -2
  28. data/lib/spark/rdd.rb +11 -2
  29. data/lib/spark/serializer.rb +1 -1
  30. data/lib/spark/serializer/auto_batched.rb +7 -0
  31. data/lib/spark/version.rb +1 -1
  32. data/ruby-spark.gemspec +4 -5
  33. data/spec/generator.rb +1 -1
  34. data/spec/lib/collect_spec.rb +10 -10
  35. data/spec/lib/config_spec.rb +10 -10
  36. data/spec/lib/context_spec.rb +116 -115
  37. data/spec/lib/ext_spec.rb +17 -17
  38. data/spec/lib/external_apps_spec.rb +1 -1
  39. data/spec/lib/filter_spec.rb +17 -17
  40. data/spec/lib/flat_map_spec.rb +22 -19
  41. data/spec/lib/group_spec.rb +22 -19
  42. data/spec/lib/helper_spec.rb +60 -12
  43. data/spec/lib/key_spec.rb +9 -8
  44. data/spec/lib/manipulation_spec.rb +15 -15
  45. data/spec/lib/map_partitions_spec.rb +6 -4
  46. data/spec/lib/map_spec.rb +22 -19
  47. data/spec/lib/reduce_by_key_spec.rb +19 -19
  48. data/spec/lib/reduce_spec.rb +22 -20
  49. data/spec/lib/sample_spec.rb +13 -12
  50. data/spec/lib/serializer_spec.rb +27 -0
  51. data/spec/lib/sort_spec.rb +16 -14
  52. data/spec/lib/statistic_spec.rb +4 -2
  53. data/spec/lib/whole_text_files_spec.rb +9 -8
  54. data/spec/spec_helper.rb +3 -3
  55. metadata +19 -18
@@ -1,6 +1,6 @@
1
- require "spec_helper"
1
+ require 'spec_helper'
2
2
 
3
- RSpec::shared_examples "a flat mapping" do |workers|
3
+ RSpec.shared_examples 'a flat mapping' do |workers|
4
4
  it "with #{workers || 'default'} worker" do
5
5
  rdd2 = rdd(workers).map(func1)
6
6
  result = numbers.flat_map(&func1)
@@ -24,7 +24,7 @@ RSpec::shared_examples "a flat mapping" do |workers|
24
24
  end
25
25
  end
26
26
 
27
- RSpec::shared_examples "a flat mapping values" do |workers|
27
+ RSpec.shared_examples 'a flat mapping values' do |workers|
28
28
  it "with #{workers || 'default'} worker" do
29
29
  rdd2 = rdd(workers).flat_map_values(func1)
30
30
  result = []
@@ -50,25 +50,26 @@ RSpec::shared_examples "a flat mapping values" do |workers|
50
50
  end
51
51
  end
52
52
 
53
- RSpec::describe "Spark::RDD" do
53
+ RSpec.describe 'Spark::RDD' do
54
54
  let(:func1) { lambda{|x| x*2} }
55
55
  let(:func2) { lambda{|x| [x*3, 1, 1]} }
56
56
  let(:func3) { lambda{|x| [x*4, 2, 2]} }
57
57
 
58
- context "throught parallelize" do
59
- context ".flat_map" do
58
+ context 'throught parallelize' do
59
+ context '.flat_map' do
60
60
  let(:numbers) { Generator.numbers_with_zero }
61
61
 
62
62
  def rdd(workers)
63
63
  $sc.parallelize(numbers, workers)
64
64
  end
65
65
 
66
- it_behaves_like "a flat mapping", nil
67
- it_behaves_like "a flat mapping", 1
68
- it_behaves_like "a flat mapping", rand(2..10)
66
+ it_behaves_like 'a flat mapping', 1
67
+ it_behaves_like 'a flat mapping', 2
68
+ # it_behaves_like 'a flat mapping', nil
69
+ # it_behaves_like 'a flat mapping', rand(2..10)
69
70
  end
70
71
 
71
- context ".flat_map_values" do
72
+ context '.flat_map_values' do
72
73
  let(:func1) { lambda{|x| x*2} }
73
74
  let(:func2) { lambda{|x| [x.first]} }
74
75
  let(:hash_with_values) { Generator.hash_with_values }
@@ -77,24 +78,26 @@ RSpec::describe "Spark::RDD" do
77
78
  $sc.parallelize(hash_with_values, workers)
78
79
  end
79
80
 
80
- it_behaves_like "a flat mapping values", nil
81
- it_behaves_like "a flat mapping values", 1
82
- it_behaves_like "a flat mapping values", rand(2..10)
81
+ it_behaves_like 'a flat mapping values', 1
82
+ it_behaves_like 'a flat mapping values', 2
83
+ # it_behaves_like 'a flat mapping values', nil
84
+ # it_behaves_like 'a flat mapping values', rand(2..10)
83
85
  end
84
86
  end
85
87
 
86
- context "throught text_file" do
87
- context ".flat_map" do
88
- let(:file) { File.join("spec", "inputs", "numbers_0_100.txt") }
88
+ context 'throught text_file' do
89
+ context '.flat_map' do
90
+ let(:file) { File.join('spec', 'inputs', 'numbers_0_100.txt') }
89
91
  let(:numbers) { File.readlines(file).map(&:strip) }
90
92
 
91
93
  def rdd(workers)
92
94
  $sc.text_file(file, workers)
93
95
  end
94
96
 
95
- it_behaves_like "a flat mapping", nil
96
- it_behaves_like "a flat mapping", 1
97
- it_behaves_like "a flat mapping", rand(2..10)
97
+ it_behaves_like 'a flat mapping', 1
98
+ it_behaves_like 'a flat mapping', 2
99
+ # it_behaves_like 'a flat mapping', nil
100
+ # it_behaves_like 'a flat mapping', rand(2..10)
98
101
  end
99
102
  end
100
103
  end
@@ -1,26 +1,26 @@
1
- require "spec_helper"
1
+ require 'spec_helper'
2
2
 
3
- RSpec::shared_examples "a groupping by key" do |workers|
3
+ RSpec.shared_examples 'a groupping by key' do |workers|
4
4
  it "with #{workers || 'default'} worker" do
5
5
  expect(rdd_result(workers)).to eql(result)
6
6
  end
7
7
  end
8
8
 
9
- RSpec::shared_examples "a cogroupping by key" do |workers|
9
+ RSpec.shared_examples 'a cogroupping by key' do |workers|
10
10
  context "with #{workers || 'default'} worker" do
11
- it ".group_with" do
11
+ it '.group_with' do
12
12
  rdd = rdd_1(workers).group_with(rdd_2(workers))
13
13
  expect(rdd.collect_as_hash).to eql(result_12)
14
14
  end
15
15
 
16
- it ".cogroup" do
16
+ it '.cogroup' do
17
17
  rdd = rdd_1(workers).cogroup(rdd_2(workers), rdd_3(workers))
18
18
  expect(rdd.collect_as_hash).to eql(result_123)
19
19
  end
20
20
  end
21
21
  end
22
22
 
23
- RSpec::shared_examples "a groupping by" do |workers|
23
+ RSpec.shared_examples 'a groupping by' do |workers|
24
24
  it "with #{workers || 'default'} worker" do
25
25
  rdd = rdd_numbers(workers)
26
26
  rdd = rdd.group_by(key_function1)
@@ -34,7 +34,7 @@ RSpec::shared_examples "a groupping by" do |workers|
34
34
  end
35
35
  end
36
36
 
37
- RSpec::describe "Spark::RDD" do
37
+ RSpec.describe 'Spark::RDD' do
38
38
 
39
39
  def make_result(*hashes)
40
40
  _result = {}
@@ -47,7 +47,7 @@ RSpec::describe "Spark::RDD" do
47
47
  _result
48
48
  end
49
49
 
50
- context ".group_by_key" do
50
+ context '.group_by_key' do
51
51
  let(:hash) { Generator.hash }
52
52
  let(:result) { make_result(hash) }
53
53
 
@@ -56,12 +56,13 @@ RSpec::describe "Spark::RDD" do
56
56
  rdd.group_by_key.collect_as_hash
57
57
  end
58
58
 
59
- it_behaves_like "a groupping by key", nil
60
- it_behaves_like "a groupping by key", 1
61
- it_behaves_like "a groupping by key", rand(2..10)
59
+ it_behaves_like 'a groupping by key', 1
60
+ it_behaves_like 'a groupping by key', 2
61
+ # it_behaves_like 'a groupping by key', nil
62
+ # it_behaves_like 'a groupping by key', rand(2..10)
62
63
  end
63
64
 
64
- context "cogroup" do
65
+ context 'cogroup' do
65
66
  let(:hash1) { Generator.hash }
66
67
  let(:hash2) { Generator.hash }
67
68
  let(:hash3) { Generator.hash }
@@ -81,12 +82,13 @@ RSpec::describe "Spark::RDD" do
81
82
  $sc.parallelize(hash3)
82
83
  end
83
84
 
84
- it_behaves_like "a cogroupping by key", nil
85
- it_behaves_like "a cogroupping by key", 1
86
- it_behaves_like "a cogroupping by key", rand(2..10)
85
+ it_behaves_like 'a cogroupping by key', 1
86
+ it_behaves_like 'a cogroupping by key', 2
87
+ # it_behaves_like 'a cogroupping by key', nil
88
+ # it_behaves_like 'a cogroupping by key', rand(2..10)
87
89
  end
88
90
 
89
- context "group_by" do
91
+ context 'group_by' do
90
92
  let(:key_function1) { lambda{|x| x%2} }
91
93
  let(:key_function2) { lambda{|x| x.size} }
92
94
 
@@ -101,9 +103,10 @@ RSpec::describe "Spark::RDD" do
101
103
  $sc.parallelize(words)
102
104
  end
103
105
 
104
- it_behaves_like "a groupping by", nil
105
- it_behaves_like "a groupping by", 1
106
- it_behaves_like "a groupping by", rand(2..10)
106
+ it_behaves_like 'a groupping by', 1
107
+ it_behaves_like 'a groupping by', 2
108
+ # it_behaves_like 'a groupping by', nil
109
+ # it_behaves_like 'a groupping by', rand(2..10)
107
110
  end
108
111
 
109
112
  end
@@ -1,19 +1,67 @@
1
- require "spec_helper"
1
+ require 'spec_helper'
2
2
 
3
- RSpec::configure do |c|
3
+ RSpec.configure do |c|
4
4
  c.include Spark::Helper::Parser
5
+ c.include Spark::Helper::Statistic
5
6
  end
6
7
 
7
- RSpec::describe Spark::Helper do
8
-
9
- it "memory size" do
10
- expect(to_memory_size("512mb")).to eql(524288.0)
11
- expect(to_memory_size("1586 mb")).to eql(1624064.0)
12
- expect(to_memory_size("3 MB")).to eql(3072.0)
13
- expect(to_memory_size("9gb")).to eql(9437184.0)
14
- expect(to_memory_size("9gb", "mb")).to eql(9216.0)
15
- expect(to_memory_size("9mb", "gb")).to eql(0.01)
16
- expect(to_memory_size("6652548796kb", "mb")).to eql(6496629.68)
8
+ RSpec.describe Spark::Helper do
9
+
10
+ it 'memory size' do
11
+ expect(to_memory_size('512mb')).to eql(524288.0)
12
+ expect(to_memory_size('1586 mb')).to eql(1624064.0)
13
+ expect(to_memory_size('3 MB')).to eql(3072.0)
14
+ expect(to_memory_size('9gb')).to eql(9437184.0)
15
+ expect(to_memory_size('9gb', 'mb')).to eql(9216.0)
16
+ expect(to_memory_size('9mb', 'gb')).to eql(0.01)
17
+ expect(to_memory_size('6652548796kb', 'mb')).to eql(6496629.68)
18
+ end
19
+
20
+ context 'statistic' do
21
+ it 'compute_fraction' do
22
+ expect(compute_fraction(1, 1000, true)).to be_within(0.001).of(0.013)
23
+ expect(compute_fraction(2, 1000, true)).to be_within(0.001).of(0.018)
24
+ expect(compute_fraction(3, 1000, true)).to be_within(0.001).of(0.023)
25
+ expect(compute_fraction(4, 1000, true)).to be_within(0.001).of(0.028)
26
+ expect(compute_fraction(5, 1000, true)).to be_within(0.001).of(0.031)
27
+
28
+ expect(compute_fraction(1, 1000, false)).to be_within(0.001).of(0.0249)
29
+ expect(compute_fraction(2, 1000, false)).to be_within(0.001).of(0.0268)
30
+ expect(compute_fraction(3, 1000, false)).to be_within(0.001).of(0.0287)
31
+ expect(compute_fraction(4, 1000, false)).to be_within(0.001).of(0.0305)
32
+ expect(compute_fraction(5, 1000, false)).to be_within(0.001).of(0.0322)
33
+ end
34
+
35
+ it 'bisect_right' do
36
+ data = [10, 20, 30, 40, 50, 60, 70, 80, 90]
37
+
38
+ expect(bisect_right(data, 0)).to eq(0)
39
+ expect(bisect_right(data, 1)).to eq(0)
40
+ expect(bisect_right(data, 1, 2)).to eq(2)
41
+ expect(bisect_right(data, 1, 3)).to eq(3)
42
+ expect(bisect_right(data, 1, 4)).to eq(4)
43
+ expect(bisect_right(data, 9)).to eq(0)
44
+ expect(bisect_right(data, 10)).to eq(1)
45
+ expect(bisect_right(data, 40)).to eq(4)
46
+ expect(bisect_right(data, 42)).to eq(4)
47
+ expect(bisect_right(data, 72)).to eq(7)
48
+ expect(bisect_right(data, 80, 4)).to eq(8)
49
+ expect(bisect_right(data, 80, 5)).to eq(8)
50
+ expect(bisect_right(data, 80, 8)).to eq(8)
51
+ expect(bisect_right(data, 80, 9)).to eq(9)
52
+ expect(bisect_right(data, 200)).to eq(9)
53
+ end
54
+
55
+ it 'determine_bounds' do
56
+ data = [10, 20, 30, 40, 50, 60, 70, 80, 90]
57
+
58
+ expect(determine_bounds(data, 0)).to eq([])
59
+ expect(determine_bounds(data, 1)).to eq([])
60
+ expect(determine_bounds(data, 2)).to eq([50])
61
+ expect(determine_bounds(data, 3)).to eq([40, 70])
62
+ expect(determine_bounds(data, 4)).to eq([30, 50, 70])
63
+ expect(determine_bounds(data, 20)).to eq(data)
64
+ end
17
65
  end
18
66
 
19
67
  end
@@ -1,6 +1,6 @@
1
- require "spec_helper"
1
+ require 'spec_helper'
2
2
 
3
- RSpec::shared_examples "a keying by" do |workers|
3
+ RSpec.shared_examples 'a keying by' do |workers|
4
4
  it "with #{workers || 'default'} worker" do
5
5
  rdd = rdd_numbers(workers)
6
6
  rdd = rdd.key_by(key_function1)
@@ -16,11 +16,11 @@ RSpec::shared_examples "a keying by" do |workers|
16
16
  end
17
17
  end
18
18
 
19
- RSpec::describe "Spark::RDD" do
19
+ RSpec.describe 'Spark::RDD' do
20
20
 
21
- context "key_by" do
21
+ context 'key_by' do
22
22
  let(:key_function1) { lambda{|x| x.even?} }
23
- let(:key_function2) { lambda{|x| x.include?("a")} }
23
+ let(:key_function2) { lambda{|x| x.include?('a')} }
24
24
 
25
25
  let(:numbers) { Generator.numbers }
26
26
  let(:words) { Generator.words }
@@ -33,9 +33,10 @@ RSpec::describe "Spark::RDD" do
33
33
  $sc.parallelize(words)
34
34
  end
35
35
 
36
- it_behaves_like "a keying by", nil
37
- it_behaves_like "a keying by", 1
38
- it_behaves_like "a keying by", rand(2..10)
36
+ it_behaves_like 'a keying by', 1
37
+ it_behaves_like 'a keying by', 2
38
+ # it_behaves_like 'a keying by', nil
39
+ # it_behaves_like 'a keying by', rand(2..10)
39
40
  end
40
41
 
41
42
  end
@@ -1,10 +1,10 @@
1
- require "spec_helper"
1
+ require 'spec_helper'
2
2
 
3
- RSpec::describe "Spark::RDD" do
3
+ RSpec.describe 'Spark::RDD' do
4
4
  let(:numbers) { 1..100 }
5
5
  let(:rand_numbers) { Generator.numbers }
6
6
 
7
- it ".glom" do
7
+ it '.glom' do
8
8
  rdd = $sc.parallelize(numbers, 1).glom
9
9
  expect(rdd.collect).to eql([numbers.to_a])
10
10
 
@@ -14,7 +14,7 @@ RSpec::describe "Spark::RDD" do
14
14
  expect(rdd.collect).to eql(numbers.each_slice(20).to_a)
15
15
  end
16
16
 
17
- it ".coalesce" do
17
+ it '.coalesce' do
18
18
  rdd = $sc.parallelize(numbers, 5)
19
19
 
20
20
  rdd2 = rdd.glom
@@ -24,7 +24,7 @@ RSpec::describe "Spark::RDD" do
24
24
  expect(rdd3.collect.size).to eql(4)
25
25
  end
26
26
 
27
- it ".distinct" do
27
+ it '.distinct' do
28
28
  rdd = $sc.parallelize(rand_numbers, 5)
29
29
  rdd = rdd.distinct
30
30
  expect(rdd.collect.sort).to eql(rand_numbers.uniq.sort)
@@ -35,22 +35,22 @@ RSpec::describe "Spark::RDD" do
35
35
  expect(rdd.collect).to eql([1])
36
36
  end
37
37
 
38
- context ".union" do
39
- it "classic method" do
38
+ context '.union' do
39
+ it 'classic method' do
40
40
  rdd = $sc.parallelize(numbers, 5)
41
41
  rdd = rdd.union(rdd).collect
42
42
 
43
43
  expect(rdd.collect.sort).to eql((numbers.to_a+numbers.to_a).sort)
44
44
  end
45
45
 
46
- it "with a different serializer" do
46
+ it 'with a different serializer' do
47
47
  rdd1 = $sc.parallelize(numbers, 1, Spark::Serializer.build{ __batched__(__marshal__) })
48
48
  rdd2 = $sc.parallelize(numbers, 1, Spark::Serializer.build{ __batched__(__oj__) })
49
49
 
50
50
  expect { rdd1.union(rdd2).collect }.to_not raise_error
51
51
  end
52
52
 
53
- it "as operator" do
53
+ it 'as operator' do
54
54
  rdd1 = $sc.parallelize(numbers)
55
55
  rdd2 = $sc.parallelize(rand_numbers)
56
56
 
@@ -58,7 +58,7 @@ RSpec::describe "Spark::RDD" do
58
58
  end
59
59
  end
60
60
 
61
- it ".compact" do
61
+ it '.compact' do
62
62
  data = [nil, nil , 0, 0, 1, 2, nil, 6]
63
63
  result = data.compact
64
64
  ser = Spark::Serializer.build { __batched__(__marshal__, 1) }
@@ -73,7 +73,7 @@ RSpec::describe "Spark::RDD" do
73
73
  expect(rdd.collect).to eql(result)
74
74
  end
75
75
 
76
- it ".intersection" do
76
+ it '.intersection' do
77
77
  data1 = [0,1,2,3,4,5,6,7,8,9,10]
78
78
  data2 = [5,6,7,8,9,10,11,12,13,14,15]
79
79
 
@@ -83,19 +83,19 @@ RSpec::describe "Spark::RDD" do
83
83
  expect(rdd1.intersection(rdd2).collect.sort).to eql(data1 & data2)
84
84
  end
85
85
 
86
- it ".shuffle" do
86
+ it '.shuffle' do
87
87
  data = Generator.numbers
88
88
  rdd = $sc.parallelize(data)
89
89
 
90
90
  expect(rdd.shuffle.collect).to_not eql(data)
91
91
  end
92
92
 
93
- context ".cartesian" do
93
+ context '.cartesian' do
94
94
  let(:data1) { Generator.numbers(100) }
95
95
  let(:data2) { Generator.numbers(100) }
96
96
  let(:result) { data1.product(data2).map(&:to_s).sort }
97
97
 
98
- it "unbatched" do
98
+ it 'unbatched' do
99
99
  ser = Spark::Serializer.build { __batched__(__marshal__, 1) }
100
100
 
101
101
  rdd1 = $sc.parallelize(data1, 2, ser)
@@ -106,7 +106,7 @@ RSpec::describe "Spark::RDD" do
106
106
  expect(rdd.collect.sort).to eql(result)
107
107
  end
108
108
 
109
- it "batched" do
109
+ it 'batched' do
110
110
  ser1 = Spark::Serializer.build { __batched__(__marshal__, rand(4..10)) }
111
111
  ser2 = Spark::Serializer.build { __batched__(__marshal__, rand(4..10)) }
112
112
 
@@ -67,9 +67,10 @@ RSpec::describe 'Spark::RDD.map_partitions(_with_index)' do
67
67
  $sc.parallelize(numbers, workers)
68
68
  end
69
69
 
70
- it_behaves_like 'a map partitions', nil
71
70
  it_behaves_like 'a map partitions', 1
72
- it_behaves_like 'a map partitions', rand(2..10)
71
+ it_behaves_like 'a map partitions', 2
72
+ # it_behaves_like 'a map partitions', nil
73
+ # it_behaves_like 'a map partitions', rand(2..10)
73
74
  end
74
75
 
75
76
  context 'throught text_file' do
@@ -80,8 +81,9 @@ RSpec::describe 'Spark::RDD.map_partitions(_with_index)' do
80
81
  $sc.text_file(file, workers)
81
82
  end
82
83
 
83
- it_behaves_like 'a map partitions', nil
84
84
  it_behaves_like 'a map partitions', 1
85
- it_behaves_like 'a map partitions', rand(2..10)
85
+ it_behaves_like 'a map partitions', 2
86
+ # it_behaves_like 'a map partitions', nil
87
+ # it_behaves_like 'a map partitions', rand(2..10)
86
88
  end
87
89
  end
@@ -1,6 +1,6 @@
1
- require "spec_helper"
1
+ require 'spec_helper'
2
2
 
3
- RSpec::shared_examples "a mapping" do |workers|
3
+ RSpec.shared_examples 'a mapping' do |workers|
4
4
  it "with #{workers || 'default'} worker" do
5
5
  rdd2 = rdd(workers).map(func1)
6
6
  result = numbers.map(&func1)
@@ -24,7 +24,7 @@ RSpec::shared_examples "a mapping" do |workers|
24
24
  end
25
25
  end
26
26
 
27
- RSpec::shared_examples "a mapping values" do |workers|
27
+ RSpec.shared_examples 'a mapping values' do |workers|
28
28
  it "with #{workers || 'default'} worker" do
29
29
  rdd2 = rdd(workers).map_values(func1)
30
30
  result = hash.map{|key, value| [key, func1.call(value)]}
@@ -43,49 +43,52 @@ RSpec::shared_examples "a mapping values" do |workers|
43
43
  end
44
44
  end
45
45
 
46
- RSpec::describe "Spark::RDD" do
46
+ RSpec.describe 'Spark::RDD' do
47
47
  let(:func1) { lambda{|x| x*2} }
48
48
  let(:func2) { lambda{|x| x*3} }
49
49
  let(:func3) { lambda{|x| x*4} }
50
50
 
51
- context "throught parallelize" do
52
- context ".map" do
51
+ context 'throught parallelize' do
52
+ context '.map' do
53
53
  let(:numbers) { Generator.numbers }
54
54
 
55
55
  def rdd(workers)
56
56
  $sc.parallelize(numbers, workers)
57
57
  end
58
58
 
59
- it_behaves_like "a mapping", nil
60
- it_behaves_like "a mapping", 1
61
- it_behaves_like "a mapping", rand(2..10)
59
+ it_behaves_like 'a mapping', 1
60
+ it_behaves_like 'a mapping', 2
61
+ # it_behaves_like 'a mapping', nil
62
+ # it_behaves_like 'a mapping', rand(2..10)
62
63
  end
63
64
 
64
- context ".map_values" do
65
+ context '.map_values' do
65
66
  let!(:hash) { Generator.hash }
66
67
 
67
68
  def rdd(workers)
68
69
  $sc.parallelize(hash, workers)
69
70
  end
70
71
 
71
- it_behaves_like "a mapping values", nil
72
- it_behaves_like "a mapping values", 1
73
- it_behaves_like "a mapping values", rand(2..10)
72
+ it_behaves_like 'a mapping values', 1
73
+ it_behaves_like 'a mapping values', 2
74
+ # it_behaves_like 'a mapping values', nil
75
+ # it_behaves_like 'a mapping values', rand(2..10)
74
76
  end
75
77
  end
76
78
 
77
- context "throught text_file" do
78
- context ".map" do
79
- let(:file) { File.join("spec", "inputs", "numbers_0_100.txt") }
79
+ context 'throught text_file' do
80
+ context '.map' do
81
+ let(:file) { File.join('spec', 'inputs', 'numbers_0_100.txt') }
80
82
  let(:numbers) { File.readlines(file).map(&:strip) }
81
83
 
82
84
  def rdd(workers)
83
85
  $sc.text_file(file, workers)
84
86
  end
85
87
 
86
- it_behaves_like "a mapping", nil
87
- it_behaves_like "a mapping", 1
88
- it_behaves_like "a mapping", rand(2..10)
88
+ it_behaves_like 'a mapping', 1
89
+ it_behaves_like 'a mapping', 2
90
+ # it_behaves_like 'a mapping', nil
91
+ # it_behaves_like 'a mapping', rand(2..10)
89
92
  end
90
93
  end
91
94
  end