rubadoop 0.7.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Gemfile +7 -0
- data/Gemfile.lock +43 -0
- data/README.rdoc +7 -0
- data/Rakefile +25 -0
- data/lib/rubadoop.rb +26 -0
- data/lib/rubadoop/base_dsl.rb +31 -0
- data/lib/rubadoop/emr.rb +23 -0
- data/lib/rubadoop/emr/jobflow_builder.rb +36 -0
- data/lib/rubadoop/emr/jobflow_builder/bootstrap_action.rb +27 -0
- data/lib/rubadoop/emr/jobflow_builder/job_spec.rb +77 -0
- data/lib/rubadoop/emr/jobflow_builder/step.rb +41 -0
- data/lib/rubadoop/map_reduce.rb +23 -0
- data/lib/rubadoop/map_reduce/call_java.rb +112 -0
- data/lib/rubadoop/map_reduce/call_streaming.rb +55 -0
- data/lib/rubadoop/map_reduce/identity.rb +30 -0
- data/lib/rubadoop/map_reduce/io.rb +128 -0
- data/lib/rubadoop/map_reduce/job_conf_environment.rb +9 -0
- data/lib/rubadoop/map_reduce/mappable.rb +59 -0
- data/lib/rubadoop/map_reduce/mapper.rb +15 -0
- data/lib/rubadoop/map_reduce/reducable.rb +74 -0
- data/lib/rubadoop/map_reduce/reducer.rb +12 -0
- data/lib/rubadoop/map_reduce/test_assist.rb +65 -0
- data/lib/rubadoop/map_reduce/utils.rb +29 -0
- data/lib/rubadoop/oozie/workflow_builder.rb +42 -0
- data/lib/rubadoop/oozie/workflow_builder/job_properties.rb +19 -0
- data/lib/rubadoop/version.rb +3 -0
- data/test/rubadoop/base_dsl_test.rb +27 -0
- data/test/rubadoop/emr/jobflow_builder_test.rb +184 -0
- data/test/rubadoop/map_reduce/call_java_test.rb +122 -0
- data/test/rubadoop/map_reduce/call_streaming_test.rb +81 -0
- data/test/rubadoop/map_reduce/identity_test.rb +40 -0
- data/test/rubadoop/map_reduce/io_test.rb +51 -0
- data/test/rubadoop/map_reduce/job_conf_environment_test.rb +28 -0
- data/test/rubadoop/map_reduce/mappable_test.rb +62 -0
- data/test/rubadoop/map_reduce/mapper_test.rb +76 -0
- data/test/rubadoop/map_reduce/reducable_test.rb +12 -0
- data/test/rubadoop/map_reduce/reducer_test.rb +137 -0
- data/test/rubadoop/map_reduce/test_assist_test.rb +76 -0
- data/test/rubadoop/oozie/workflow_builder_test.rb +21 -0
- data/test/test_helper.rb +10 -0
- metadata +140 -0
@@ -0,0 +1,40 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
module Rubadoop
|
4
|
+
module MapReduce
|
5
|
+
|
6
|
+
class IdentityTest < MiniTest::Spec
|
7
|
+
def test_mapper
|
8
|
+
MapReduce.io_in = StringIO.new("abcd\nefgh")
|
9
|
+
output = StringIO.new
|
10
|
+
MapReduce.io_out = output
|
11
|
+
Identity::Mapper.new
|
12
|
+
assert_equal "abcd\nefgh\n", output.string
|
13
|
+
end
|
14
|
+
|
15
|
+
def test_mapper_ignorekey
|
16
|
+
MapReduce.io_in = StringIO.new("1\tabcd\n2\tefgh")
|
17
|
+
output = StringIO.new
|
18
|
+
MapReduce.io_out = output
|
19
|
+
Identity::Mapper.new(input_ignore_key: true)
|
20
|
+
assert_equal "abcd\nefgh\n", output.string
|
21
|
+
end
|
22
|
+
|
23
|
+
def test_reducer
|
24
|
+
MapReduce.io_in = StringIO.new("k1\tv1\nk2\tv2\n")
|
25
|
+
output = StringIO.new
|
26
|
+
MapReduce.io_out = output
|
27
|
+
Identity::Reducer.new
|
28
|
+
assert_equal "k1\tv1\nk2\tv2\n", output.string
|
29
|
+
end
|
30
|
+
|
31
|
+
def test_reducer_dummy
|
32
|
+
MapReduce.io_in = StringIO.new("abcd\nefgh")
|
33
|
+
output = StringIO.new
|
34
|
+
MapReduce.io_out = output
|
35
|
+
Identity::Reducer.new
|
36
|
+
assert_equal "abcd\nefgh\n", output.string
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
module Rubadoop
|
4
|
+
module MapReduce
|
5
|
+
class IoTest < MiniTest::Spec
|
6
|
+
include ::Rubadoop::MapReduce::Utils
|
7
|
+
|
8
|
+
def test_silent_output
|
9
|
+
::Rubadoop::MapReduce::Io.set_silent_output
|
10
|
+
|
11
|
+
out_entry('silent')
|
12
|
+
end
|
13
|
+
|
14
|
+
def test_standard_output
|
15
|
+
::Rubadoop::MapReduce::Io.set_standard_output
|
16
|
+
|
17
|
+
out_entry('standard')
|
18
|
+
end
|
19
|
+
|
20
|
+
def test_test_out
|
21
|
+
::Rubadoop::MapReduce.out = TestOut.new
|
22
|
+
|
23
|
+
out_entry('poop')
|
24
|
+
log_counter 'Block', 'Rock', 1
|
25
|
+
log_counter 'Block', 'Stock', 0
|
26
|
+
log_counter 'Block', 'Rock', 5
|
27
|
+
out_map_entry('k1', 'v1')
|
28
|
+
|
29
|
+
assert_equal 6, ::Rubadoop::MapReduce.out.counters['Block']['Rock']
|
30
|
+
assert_equal 0, ::Rubadoop::MapReduce.out.counters['Block']['Stock']
|
31
|
+
assert_equal ['poop', "k1\tv1"], ::Rubadoop::MapReduce.out.entries
|
32
|
+
end
|
33
|
+
|
34
|
+
def test_counter_collection
|
35
|
+
[StandardOut, EmptyOut].each do |output_class|
|
36
|
+
::Rubadoop::MapReduce.out = output_class.new
|
37
|
+
|
38
|
+
out_entry('poop')
|
39
|
+
log_counter 'Block', 'Rock', 1
|
40
|
+
log_counter 'Block', 'Stock', 0
|
41
|
+
log_counter 'Block', 'Rock', 5
|
42
|
+
out_map_entry('k1', 'v1')
|
43
|
+
|
44
|
+
assert_equal 6, ::Rubadoop::MapReduce.out.counters['Block']['Rock']
|
45
|
+
assert_equal 0, ::Rubadoop::MapReduce.out.counters['Block']['Stock']
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
module Rubadoop
|
4
|
+
module MapReduce
|
5
|
+
class JobConfEnvironmentTest < MiniTest::Spec
|
6
|
+
include ::Rubadoop::MapReduce::JobConfEnvironment
|
7
|
+
|
8
|
+
def test_simple
|
9
|
+
|
10
|
+
ENV['map_input_file'] = 'goop'
|
11
|
+
|
12
|
+
assert_equal 'goop', job_conf_environment('map.input.file')
|
13
|
+
|
14
|
+
ENV.delete 'map_input_file'
|
15
|
+
end
|
16
|
+
|
17
|
+
def test_missing
|
18
|
+
|
19
|
+
ENV['map.input.file'] = 'poop'
|
20
|
+
|
21
|
+
assert_equal nil, job_conf_environment('map.input.file')
|
22
|
+
|
23
|
+
ENV.delete 'map.input.file'
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
module Rubadoop
|
4
|
+
module MapReduce
|
5
|
+
class MappableTest < MiniTest::Spec
|
6
|
+
include TestAssist
|
7
|
+
include Mappable
|
8
|
+
|
9
|
+
def test_mapper
|
10
|
+
input = (1..500).to_a
|
11
|
+
result = run_test_mapper(input) do
|
12
|
+
mapper do |line|
|
13
|
+
out_entry line if line.to_i % 100 == 0
|
14
|
+
end
|
15
|
+
end
|
16
|
+
assert_equal 5, result.entries.size
|
17
|
+
assert_equal "100", result.entries[0]
|
18
|
+
assert_equal "200", result.entries[1]
|
19
|
+
assert_equal "300", result.entries[2]
|
20
|
+
assert_equal "400", result.entries[3]
|
21
|
+
assert_equal "500", result.entries[4]
|
22
|
+
end
|
23
|
+
|
24
|
+
def test_mapper_batched
|
25
|
+
input = (1..501).to_a
|
26
|
+
result = run_test_mapper(input) do
|
27
|
+
mapper_batched(100) do |batch|
|
28
|
+
out_entry batch
|
29
|
+
end
|
30
|
+
end
|
31
|
+
assert_equal 6, result.entries.size
|
32
|
+
assert_equal (1..100).to_a.map(&:to_s), result.entries[0]
|
33
|
+
assert_equal (101..200).to_a.map(&:to_s), result.entries[1]
|
34
|
+
assert_equal (201..300).to_a.map(&:to_s), result.entries[2]
|
35
|
+
assert_equal (301..400).to_a.map(&:to_s), result.entries[3]
|
36
|
+
assert_equal (401..500).to_a.map(&:to_s), result.entries[4]
|
37
|
+
assert_equal (501..501).to_a.map(&:to_s), result.entries[5]
|
38
|
+
end
|
39
|
+
|
40
|
+
def test_mapper_batched_edge
|
41
|
+
input = (1..10).to_a
|
42
|
+
result = run_test_mapper(input) do
|
43
|
+
mapper_batched(10) do |batch|
|
44
|
+
out_entry batch
|
45
|
+
end
|
46
|
+
end
|
47
|
+
assert_equal 1, result.entries.size
|
48
|
+
assert_equal (1..10).to_a.map(&:to_s), result.entries[0]
|
49
|
+
end
|
50
|
+
|
51
|
+
def test_mapper_batched_empty
|
52
|
+
result = run_test_mapper(nil) do
|
53
|
+
mapper_batched(10) do |batch|
|
54
|
+
out_entry batch
|
55
|
+
end
|
56
|
+
end
|
57
|
+
assert_equal 0, result.entries.size
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
@@ -0,0 +1,76 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
module Rubadoop
|
4
|
+
module MapReduce
|
5
|
+
|
6
|
+
class TestMapper < MapReduce::Mapper
|
7
|
+
end
|
8
|
+
|
9
|
+
class MapperTest < MiniTest::Spec
|
10
|
+
|
11
|
+
def test_split_block
|
12
|
+
mapper = TestMapper.new
|
13
|
+
|
14
|
+
MapReduce.io_in = StringIO.new("abcd\nefgh")
|
15
|
+
l = 0
|
16
|
+
mapper.process { |line|
|
17
|
+
case l
|
18
|
+
when 0; assert_equal "abcd", line
|
19
|
+
when 1; assert_equal "efgh", line
|
20
|
+
else; fail "unexpected line: #{l}=#{key}"
|
21
|
+
end
|
22
|
+
l += 1
|
23
|
+
}
|
24
|
+
assert_equal 2, l
|
25
|
+
end
|
26
|
+
|
27
|
+
def test_split_various
|
28
|
+
mapper = TestMapper.new
|
29
|
+
|
30
|
+
MapReduce.io_in = StringIO.new("abcd\nefgh")
|
31
|
+
assert_equal ["abcd", "efgh"], mapper.process
|
32
|
+
|
33
|
+
MapReduce.io_in = StringIO.new("abcd\n\n\n")
|
34
|
+
assert_equal ["abcd", "", ""], mapper.process
|
35
|
+
|
36
|
+
MapReduce.io_in = StringIO.new("0\n1")
|
37
|
+
assert_equal ["0", "1"], mapper.process
|
38
|
+
|
39
|
+
MapReduce.io_in = StringIO.new("0\n1\n")
|
40
|
+
assert_equal ["0", "1"], mapper.process
|
41
|
+
|
42
|
+
MapReduce.io_in = StringIO.new("0\n1\n ")
|
43
|
+
assert_equal ["0", "1", " "], mapper.process
|
44
|
+
|
45
|
+
MapReduce.io_in = StringIO.new("0\t1\n")
|
46
|
+
assert_equal ["0\t1"], mapper.process
|
47
|
+
|
48
|
+
MapReduce.io_in = StringIO.new("")
|
49
|
+
assert_equal [], mapper.process
|
50
|
+
end
|
51
|
+
|
52
|
+
def test_mapper_ignorekey
|
53
|
+
mapper = TestMapper.new(input_ignore_key: true)
|
54
|
+
|
55
|
+
MapReduce.io_in = StringIO.new("0\t1\n2\t3")
|
56
|
+
assert_equal ["1", "3"], mapper.process
|
57
|
+
|
58
|
+
MapReduce.io_in = StringIO.new("0\n1\n2\n3")
|
59
|
+
assert_equal ["0", "1", "2", "3"], mapper.process
|
60
|
+
|
61
|
+
MapReduce.io_in = StringIO.new("\t1\n2\t3\n")
|
62
|
+
assert_equal ["1", "3"], mapper.process
|
63
|
+
end
|
64
|
+
|
65
|
+
def test_mapper_function_name
|
66
|
+
mapper = TestMapper.new()
|
67
|
+
|
68
|
+
MapReduce.io_in = StringIO.new("0\n1\n2")
|
69
|
+
assert_equal ["0", "1", "2"], mapper.mapper
|
70
|
+
|
71
|
+
MapReduce.io_in = StringIO.new("0\n1\n2")
|
72
|
+
assert_equal ["0", "1", "2"], mapper.process
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
@@ -0,0 +1,137 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
module Rubadoop
|
4
|
+
|
5
|
+
class TestReducer < MapReduce::Reducer
|
6
|
+
end
|
7
|
+
|
8
|
+
class ReducerTest < MiniTest::Spec
|
9
|
+
|
10
|
+
def test_splitting
|
11
|
+
reducer = TestReducer.new
|
12
|
+
|
13
|
+
MapReduce.io_in = StringIO.new("1\t1\n1\t2\n2\t2")
|
14
|
+
l = 0
|
15
|
+
reducer.process { |key, values|
|
16
|
+
case l
|
17
|
+
when 0; assert_equal "1", key; assert_equal ["1", "2"], values.to_a
|
18
|
+
when 1; assert_equal "2", key; assert_equal ["2"], values.to_a
|
19
|
+
else; fail "unexpected line: #{l}=#{key}"
|
20
|
+
end
|
21
|
+
l += 1
|
22
|
+
}
|
23
|
+
assert_equal 2, l
|
24
|
+
|
25
|
+
MapReduce.io_in = StringIO.new("1\t1\n1\t2\n2\t2")
|
26
|
+
r = reducer.process
|
27
|
+
assert_equal 2, r.size
|
28
|
+
assert_equal "1", r[0][:key]; assert_equal ["1", "2"], r[0][:values]
|
29
|
+
assert_equal "2", r[1][:key]; assert_equal ["2"], r[1][:values]
|
30
|
+
|
31
|
+
MapReduce.io_in = StringIO.new("1\t1")
|
32
|
+
r = reducer.process
|
33
|
+
assert_equal 1, r.size
|
34
|
+
assert_equal "1", r[0][:key]; assert_equal ["1"], r[0][:values]
|
35
|
+
|
36
|
+
MapReduce.io_in = StringIO.new("")
|
37
|
+
r = reducer.process
|
38
|
+
assert_equal 0, r.size
|
39
|
+
|
40
|
+
MapReduce.io_in = StringIO.new("abcd\nefgh")
|
41
|
+
r = reducer.process
|
42
|
+
assert_equal 2, r.size
|
43
|
+
assert_equal "abcd", r[0][:key]; assert_equal [nil], r[0][:values]
|
44
|
+
assert_equal "efgh", r[1][:key]; assert_equal [nil], r[1][:values]
|
45
|
+
|
46
|
+
MapReduce.io_in = StringIO.new("1\t1\n1\t2\n2\t2")
|
47
|
+
r = reducer.process
|
48
|
+
assert_equal 2, r.size
|
49
|
+
assert_equal "1", r[0][:key]; assert_equal ["1", "2"], r[0][:values]
|
50
|
+
assert_equal "2", r[1][:key]; assert_equal ["2"], r[1][:values]
|
51
|
+
|
52
|
+
MapReduce.io_in = StringIO.new("1\t1\n1\t2\n2\t2\n3\t3\n3\t1\n3\t9\n4\t1")
|
53
|
+
r = reducer.process
|
54
|
+
assert_equal 4, r.size
|
55
|
+
assert_equal "1", r[0][:key]; assert_equal ["1", "2"], r[0][:values]
|
56
|
+
assert_equal "2", r[1][:key]; assert_equal ["2"], r[1][:values]
|
57
|
+
assert_equal "3", r[2][:key]; assert_equal ["3", "1", "9"], r[2][:values]
|
58
|
+
assert_equal "4", r[3][:key]; assert_equal ["1"], r[3][:values]
|
59
|
+
|
60
|
+
end
|
61
|
+
|
62
|
+
def test_skipping
|
63
|
+
reducer = TestReducer.new
|
64
|
+
|
65
|
+
MapReduce.io_in = StringIO.new("1\t1\n1\t2\n2\t2\n3\t3\n3\t1\n3\t9\n4\t1")
|
66
|
+
l = 0
|
67
|
+
reducer.process { |key, values|
|
68
|
+
case l
|
69
|
+
when 0; assert_equal "1", key; assert_equal ["1", "2"], values.to_a
|
70
|
+
when 1; assert_equal "2", key; assert_equal ["2"], values.to_a
|
71
|
+
when 2; assert_equal "3", key; #assert_equal ["3", "1", "9"], values.to_a
|
72
|
+
when 3; assert_equal "4", key; assert_equal ["1"], values.to_a
|
73
|
+
else; fail "unexpected line: #{l}=#{key}"
|
74
|
+
end
|
75
|
+
l += 1
|
76
|
+
}
|
77
|
+
assert_equal 4, l
|
78
|
+
|
79
|
+
MapReduce.io_in = StringIO.new("1\t1\n1\t2\n2\t2\n3\t3\n3\t1\n3\t9\n4\t1")
|
80
|
+
l = 0
|
81
|
+
reducer.process { |key, values|
|
82
|
+
case l
|
83
|
+
when 0; assert_equal "1", key; assert_equal ["1", "2"], values.to_a
|
84
|
+
when 1; assert_equal "2", key; assert_equal ["2"], values.to_a
|
85
|
+
when 2; assert_equal "3", key; assert_equal "3", values.next()
|
86
|
+
when 3; assert_equal "4", key; assert_equal ["1"], values.to_a
|
87
|
+
else; fail "unexpected line: #{l}=#{key}"
|
88
|
+
end
|
89
|
+
l += 1
|
90
|
+
}
|
91
|
+
assert_equal 4, l
|
92
|
+
|
93
|
+
MapReduce.io_in = StringIO.new("1\t1\n1\t2\n2\t2\n3\t3\n3\t1\n3\t9\n4\t1")
|
94
|
+
l = 0
|
95
|
+
reducer.process { |key, values|
|
96
|
+
case l
|
97
|
+
when 0; assert_equal "1", key;
|
98
|
+
when 1; assert_equal "2", key; assert_equal ["2"], values.to_a
|
99
|
+
when 2; assert_equal "3", key;
|
100
|
+
when 3; assert_equal "4", key; assert_equal ["1"], values.to_a
|
101
|
+
else; fail "unexpected line: #{l}=#{key}"
|
102
|
+
end
|
103
|
+
l += 1
|
104
|
+
}
|
105
|
+
assert_equal 4, l
|
106
|
+
|
107
|
+
MapReduce.io_in = StringIO.new("1\t1\n1\t2\n2\t2\n3\t3\n3\t1\n3\t9\n4\t1")
|
108
|
+
l = 0
|
109
|
+
reducer.process { |key, values|
|
110
|
+
case l
|
111
|
+
when 0; assert_equal "1", key; assert_equal ["1", "2"], values.to_a
|
112
|
+
when 1; assert_equal "2", key; assert_equal ["2"], values.to_a
|
113
|
+
when 2; assert_equal "3", key; #assert_equal ["3", "1", "9"], values.to_a
|
114
|
+
when 3; assert_equal "4", key; #assert_equal ["1"], values.to_a
|
115
|
+
else; fail "unexpected line: #{l}=#{key}"
|
116
|
+
end
|
117
|
+
l += 1
|
118
|
+
}
|
119
|
+
assert_equal 4, l
|
120
|
+
|
121
|
+
MapReduce.io_in = StringIO.new("1\t1\n1\t2\n2\t2\n3\t3\n3\t1\n3\t9\n4\t1")
|
122
|
+
l = 0
|
123
|
+
reducer.process { |key, values|
|
124
|
+
case l
|
125
|
+
when 0; assert_equal "1", key; #assert_equal ["1", "2"], values.to_a
|
126
|
+
when 1; assert_equal "2", key; #assert_equal ["2"], values.to_a
|
127
|
+
when 2; assert_equal "3", key; #assert_equal ["3", "1", "9"], values.to_a
|
128
|
+
when 3; assert_equal "4", key; #assert_equal ["1"], values.to_a
|
129
|
+
else; fail "unexpected line: #{l}=#{key}"
|
130
|
+
end
|
131
|
+
l += 1
|
132
|
+
}
|
133
|
+
assert_equal 4, l
|
134
|
+
end
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
@@ -0,0 +1,76 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
module Rubadoop
|
4
|
+
module MapReduce
|
5
|
+
class TestAssistTest < MiniTest::Spec
|
6
|
+
include ::Rubadoop::MapReduce::TestAssist
|
7
|
+
|
8
|
+
def test_wordcount_mapper
|
9
|
+
input = ['The quick brown fox', 'jumped over', 'the lazy dog']
|
10
|
+
result = run_test_mapper(input) do
|
11
|
+
SimpleWordCount.new.execute
|
12
|
+
end
|
13
|
+
assert_equal 8, result.entries.size
|
14
|
+
assert_equal "the\t2", result.entries[0]
|
15
|
+
assert_equal "dog\t1", result.entries[7]
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_summing_reducer
|
19
|
+
input = Hash['the' => [3, 4, 5], 'fox' => [5]]
|
20
|
+
result = run_test_reducer(input) do
|
21
|
+
SummingReducer.new.execute
|
22
|
+
end
|
23
|
+
assert_equal 2, result.entries.size
|
24
|
+
assert_equal "the\t12", result.entries[0]
|
25
|
+
assert_equal "fox\t5", result.entries[1]
|
26
|
+
end
|
27
|
+
|
28
|
+
def test_no_input
|
29
|
+
result = run_test_mapper(nil) do
|
30
|
+
::Rubadoop::MapReduce.out.entry('cookoo')
|
31
|
+
end
|
32
|
+
assert_equal 1, result.entries.size
|
33
|
+
assert_equal "cookoo", result.entries[0]
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
37
|
+
|
38
|
+
class SimpleWordCount
|
39
|
+
include ::Rubadoop::MapReduce::Utils
|
40
|
+
include ::Rubadoop::MapReduce::Mappable
|
41
|
+
|
42
|
+
def execute
|
43
|
+
words = Hash[]
|
44
|
+
|
45
|
+
mapper do |line|
|
46
|
+
line.split(/\s+/).each do |word|
|
47
|
+
word.downcase!
|
48
|
+
if words[word]
|
49
|
+
words[word] += 1
|
50
|
+
else
|
51
|
+
words[word] = 1
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
words.each do |word, count|
|
57
|
+
out_map_entry(word, count)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
class SummingReducer
|
63
|
+
include ::Rubadoop::MapReduce::Utils
|
64
|
+
include ::Rubadoop::MapReduce::Reducable
|
65
|
+
|
66
|
+
def execute
|
67
|
+
reducer do |key, counts|
|
68
|
+
sum = 0
|
69
|
+
counts.each { |c| sum += c.to_i }
|
70
|
+
out_map_entry(key, sum)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
end
|
76
|
+
end
|