map_reduce 0.0.1.alpha4 → 0.0.1.alpha5
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +176 -2
- data/lib/map_reduce.rb +7 -1
- data/lib/map_reduce/exceptions.rb +4 -0
- data/lib/map_reduce/map_log.rb +45 -0
- data/lib/map_reduce/mapper.rb +72 -0
- data/lib/map_reduce/master.rb +87 -145
- data/lib/map_reduce/master_old.rb +182 -0
- data/lib/map_reduce/reduce_log.rb +60 -0
- data/lib/map_reduce/reducer.rb +65 -0
- data/lib/map_reduce/socket/master.rb +2 -92
- data/lib/map_reduce/socket/{worker_sync.rb → req_fiber.rb} +3 -4
- data/lib/map_reduce/version.rb +1 -1
- data/spec/map_reduce/map_reduce_spec.rb +80 -67
- metadata +28 -23
- data/lib/map_reduce/socket/worker_em.rb +0 -4
@@ -0,0 +1,182 @@
|
|
1
|
+
require File.expand_path("../socket/master", __FILE__)
|
2
|
+
|
3
|
+
module MapReduce
|
4
|
+
class Master
|
5
|
+
# How often data will be flushed to disk
|
6
|
+
FLUSH_TIMEOUT = 1
|
7
|
+
# How many lines should be parsed by one iteration of grouping
|
8
|
+
GROUP_LINES = 100
|
9
|
+
# How many seconds should we sleep if grouping is going faster then reducing
|
10
|
+
GROUP_TIMEOUT = 1
|
11
|
+
# How many keys should be stored before timeout happend
|
12
|
+
GROUP_MAX = 10_000
|
13
|
+
|
14
|
+
# Valid options:
|
15
|
+
# * socket - socket address to bind
|
16
|
+
# default is 'ipc:///dev/shm/master.sock'
|
17
|
+
# * log_folder - folder to store recieved MAP data
|
18
|
+
# default is '/tmp/mapreduce/'
|
19
|
+
# * workers - count of workers that will emit data.
|
20
|
+
# default is :auto,
|
21
|
+
# but in small jobs it is better to define in explicitly,
|
22
|
+
# because if one worker will stop before others start
|
23
|
+
# master will decide that map job is done and will start reducing
|
24
|
+
# * delimiter - master log stores data like "key{delimiter}values"
|
25
|
+
# so to prevent collisions you can specify your own uniq delimiter
|
26
|
+
# default is a pipe "|"
|
27
|
+
#
|
28
|
+
def initialize(opts = {})
|
29
|
+
# Socket addr to bind
|
30
|
+
@socket_addr = opts[:socket] || ::MapReduce::DEFAULT_SOCKET
|
31
|
+
# Folder to write logs
|
32
|
+
@log_folder = opts[:log_folder] || "/tmp/mapreduce/"
|
33
|
+
# How many MapReduce workers will emit data
|
34
|
+
@workers = opts[:workers] || 1
|
35
|
+
# Delimiter to store key/value pairs in log
|
36
|
+
@delimiter = opts[:delimiter] || "|"
|
37
|
+
|
38
|
+
@log = []
|
39
|
+
@data = []
|
40
|
+
@workers_envelopes = {}
|
41
|
+
@log_filename = File.join(@log_folder, "master-#{Process.pid}.log")
|
42
|
+
@sorted_log_filename = File.join(@log_folder, "master-#{Process.pid}_sorted.log")
|
43
|
+
|
44
|
+
FileUtils.mkdir_p(@log_folder)
|
45
|
+
FileUtils.touch(@log_filename)
|
46
|
+
end
|
47
|
+
|
48
|
+
# Start Eventloop
|
49
|
+
#
|
50
|
+
def run
|
51
|
+
EM.run do
|
52
|
+
# Init socket
|
53
|
+
master_socket
|
54
|
+
|
55
|
+
# Init flushing timer
|
56
|
+
flush
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
# Stop Eventloop
|
61
|
+
#
|
62
|
+
def stop
|
63
|
+
EM.stop
|
64
|
+
end
|
65
|
+
|
66
|
+
# Store data in log array till flush
|
67
|
+
#
|
68
|
+
def map(key, message)
|
69
|
+
@log << "#{key}#{@delimiter}#{message}"
|
70
|
+
end
|
71
|
+
|
72
|
+
# Send data back to worker.
|
73
|
+
# Last item in data is last unfinished session,
|
74
|
+
# so till the end of file reading we don't send it
|
75
|
+
#
|
76
|
+
def reduce(envelope)
|
77
|
+
if @data.size >= 2
|
78
|
+
data = @data.shift
|
79
|
+
data = data.flatten
|
80
|
+
master_socket.send_reply(data, envelope)
|
81
|
+
elsif @reduce_stop
|
82
|
+
data = @data.shift
|
83
|
+
data = data.flatten if data
|
84
|
+
master_socket.send_reply(data, envelope)
|
85
|
+
else
|
86
|
+
EM.add_timer(1) do
|
87
|
+
reduce(envelope)
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
# Openning log file for read/write
|
93
|
+
#
|
94
|
+
def log_file
|
95
|
+
@log_file ||= begin
|
96
|
+
File.open(@log_filename, "w+")
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
# Openning sorted log for reading
|
101
|
+
#
|
102
|
+
def sorted_log_file
|
103
|
+
@sorted_log_file ||= begin
|
104
|
+
File.open(@sorted_log_filename, "r")
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
# Flushing data to disk once per FLUSH_TIMEOUT seconds
|
109
|
+
#
|
110
|
+
def flush
|
111
|
+
if @log.any?
|
112
|
+
log_file << @log*"\n" << "\n"
|
113
|
+
log_file.flush
|
114
|
+
@log.clear
|
115
|
+
end
|
116
|
+
|
117
|
+
EM.add_timer(FLUSH_TIMEOUT) do
|
118
|
+
flush
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
# Sorting log.
|
123
|
+
# Linux sort is the fastest way to sort big file.
|
124
|
+
# Deleting original log after sort.
|
125
|
+
#
|
126
|
+
def sort
|
127
|
+
`sort #{@log_filename} -o #{@sorted_log_filename}`
|
128
|
+
FileUtils.rm(@log_filename)
|
129
|
+
@log_file = nil
|
130
|
+
end
|
131
|
+
|
132
|
+
# Start reducing part.
|
133
|
+
# First, flushing rest of log to disk.
|
134
|
+
# Then sort data.
|
135
|
+
# Then start to read/group data
|
136
|
+
#
|
137
|
+
def reduce!
|
138
|
+
flush
|
139
|
+
sort
|
140
|
+
|
141
|
+
iter = sorted_log_file.each_line
|
142
|
+
group iter
|
143
|
+
end
|
144
|
+
|
145
|
+
# Reading sorted data and grouping by key.
|
146
|
+
# If queue (@data) is growing faster then workers grad data we pause reading file.
|
147
|
+
#
|
148
|
+
def group(iter)
|
149
|
+
if @data.size >= GROUP_MAX
|
150
|
+
EM.add_timer(GROUP_TIMEOUT){ group(iter) }
|
151
|
+
else
|
152
|
+
GROUP_LINES.times do
|
153
|
+
line = iter.next.chomp
|
154
|
+
key, msg = line.split(@delimiter)
|
155
|
+
|
156
|
+
last = @data.last
|
157
|
+
if last && last[0] == key
|
158
|
+
last[1] << msg
|
159
|
+
else
|
160
|
+
@data << [key, [msg]]
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
EM.next_tick{ group(iter) }
|
165
|
+
end
|
166
|
+
rescue StopIteration => e
|
167
|
+
FileUtils.rm(@sorted_log_filename)
|
168
|
+
@sorted_log_file = nil
|
169
|
+
@reduce_stop = true
|
170
|
+
end
|
171
|
+
|
172
|
+
# Initializing and binding socket
|
173
|
+
#
|
174
|
+
def master_socket
|
175
|
+
@master_socket ||= begin
|
176
|
+
sock = MapReduce::Socket::Master.new self, @workers
|
177
|
+
sock.bind @socket_addr
|
178
|
+
sock
|
179
|
+
end
|
180
|
+
end
|
181
|
+
end
|
182
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
module MapReduce
|
2
|
+
class ReduceLog
|
3
|
+
def initialize(map_log, delimiter)
|
4
|
+
@map_log = map_log
|
5
|
+
@delimiter = delimiter
|
6
|
+
end
|
7
|
+
|
8
|
+
def get_data
|
9
|
+
if @lines
|
10
|
+
current_key = nil
|
11
|
+
current_values = []
|
12
|
+
while true
|
13
|
+
begin
|
14
|
+
line = @lines.peek.chomp
|
15
|
+
key, values = line.split(@delimiter)
|
16
|
+
current_key ||= key
|
17
|
+
|
18
|
+
if current_key != key
|
19
|
+
break
|
20
|
+
else
|
21
|
+
current_values << values
|
22
|
+
@lines.next
|
23
|
+
end
|
24
|
+
rescue StopIteration => e
|
25
|
+
@file.close
|
26
|
+
FileUtils.rm(File.path(@file))
|
27
|
+
@lines = nil
|
28
|
+
break
|
29
|
+
end
|
30
|
+
end
|
31
|
+
[current_key, *current_values]
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def force
|
36
|
+
unless @lines
|
37
|
+
fn = log_file
|
38
|
+
if fn
|
39
|
+
@file = File.open(fn)
|
40
|
+
@lines = @file.each_line
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def log_file
|
46
|
+
@log_file ||= begin
|
47
|
+
fn = @map_log.reset
|
48
|
+
if fn
|
49
|
+
@more = true
|
50
|
+
sort(fn)
|
51
|
+
fn
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def sort(fn)
|
57
|
+
`sort #{fn} -o #{fn}`
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
@@ -0,0 +1,65 @@
|
|
1
|
+
module MapReduce
|
2
|
+
class Reducer
|
3
|
+
def initialize(opts = {})
|
4
|
+
@masters = opts[:masters] || [::MapReduce::DEFAULT_SOCKET]
|
5
|
+
@connection_type = opts[:type] || :em
|
6
|
+
@task = opts[:task]
|
7
|
+
end
|
8
|
+
|
9
|
+
def reduce(&blk)
|
10
|
+
if @connection_type == :em
|
11
|
+
em_reduce(&blk)
|
12
|
+
else
|
13
|
+
sync_reduce(&blk)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
def sync_reduce(&blk)
|
18
|
+
all = sockets.dup
|
19
|
+
while sock = all.sample
|
20
|
+
key, *values = sock.send_request(["reduce", @task])
|
21
|
+
if key.nil?
|
22
|
+
all.delete sock
|
23
|
+
else
|
24
|
+
blk.call(key, values)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def em_reduce(all = nil, &blk)
|
30
|
+
all ||= sockets.dup
|
31
|
+
sock = all.sample
|
32
|
+
if sock
|
33
|
+
sock.send_request(["reduce", @task]) do |message|
|
34
|
+
key, *values = message
|
35
|
+
if key.nil?
|
36
|
+
all.delete sock
|
37
|
+
else
|
38
|
+
blk.call(key, values)
|
39
|
+
end
|
40
|
+
|
41
|
+
em_reduce(all, &blk)
|
42
|
+
end
|
43
|
+
else
|
44
|
+
blk.call([nil])
|
45
|
+
end
|
46
|
+
end
|
47
|
+
private
|
48
|
+
|
49
|
+
def sockets
|
50
|
+
@sockets ||= begin
|
51
|
+
klass = if @connection_type == :sync
|
52
|
+
EM::Protocols::Zmq2::ReqFiber
|
53
|
+
else
|
54
|
+
EM::Protocols::Zmq2::ReqCb
|
55
|
+
end
|
56
|
+
|
57
|
+
@masters.map do |sock|
|
58
|
+
s = klass.new
|
59
|
+
s.connect(sock)
|
60
|
+
s
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
@@ -1,102 +1,12 @@
|
|
1
|
-
# Reply socket.
|
2
|
-
# Master accepts "map", "map_finished", and "reduce" messages.
|
3
|
-
# For "map" messages it didn't actually replies,
|
4
|
-
# but for "reduce" requests it returns key with grouped values.
|
5
|
-
#
|
6
1
|
module MapReduce::Socket
|
7
2
|
class Master < EM::Protocols::Zmq2::Rep
|
8
|
-
|
9
|
-
# we will sleep for REDUCE_WAIT seconds till state is not REDUCE
|
10
|
-
REDUCE_WAIT = 1
|
11
|
-
|
12
|
-
def initialize(master, workers)
|
3
|
+
def initialize(master)
|
13
4
|
@master = master
|
14
|
-
@workers = workers
|
15
|
-
|
16
|
-
@connections = {}
|
17
|
-
@state = :map
|
18
|
-
|
19
5
|
super()
|
20
6
|
end
|
21
7
|
|
22
8
|
def receive_request(message, envelope)
|
23
|
-
@
|
24
|
-
|
25
|
-
type, key, msg = message
|
26
|
-
case type
|
27
|
-
when "map"
|
28
|
-
map(envelope, key, msg)
|
29
|
-
when "map_finished"
|
30
|
-
map_finished(envelope)
|
31
|
-
when "reduce"
|
32
|
-
reduce(envelope)
|
33
|
-
else
|
34
|
-
MapReduce.logger.error("Wrong message type: #{type}")
|
35
|
-
end
|
36
|
-
end
|
37
|
-
|
38
|
-
# Send data to log
|
39
|
-
# Someone should never MAP data when master already in REDUCE state
|
40
|
-
#
|
41
|
-
def map(envelope, key, msg)
|
42
|
-
if @state == :map
|
43
|
-
@master.map(key, msg)
|
44
|
-
ok(envelope)
|
45
|
-
else
|
46
|
-
MapReduce.logger.error("Someone tries to MAP data while state is REDUCE")
|
47
|
-
not_ok(envelope, "You can't MAP while we are reducing")
|
48
|
-
end
|
49
|
-
end
|
50
|
-
|
51
|
-
# When worker stops mapping data, it sends "map_finished" message.
|
52
|
-
# When all workers will send "map_finished" message reduce will begin.
|
53
|
-
#
|
54
|
-
def map_finished(envelope)
|
55
|
-
ok(envelope)
|
56
|
-
|
57
|
-
@connections[envelope.first] ||= true
|
58
|
-
@workers = @connections.size if @workers == :auto
|
59
|
-
|
60
|
-
return unless @connections.all?{ |k,v| v }
|
61
|
-
return unless @connections.size == @workers
|
62
|
-
|
63
|
-
@state = :reduce
|
64
|
-
@master.reduce!
|
65
|
-
end
|
66
|
-
|
67
|
-
# Wait till all workers stops sending MAP.
|
68
|
-
# After all workers stopped we start REDUCE part of job.
|
69
|
-
#
|
70
|
-
def reduce(envelope)
|
71
|
-
if @state == :reduce
|
72
|
-
@master.reduce(envelope)
|
73
|
-
else
|
74
|
-
EM.add_timer(REDUCE_WAIT) do
|
75
|
-
reduce(envelope)
|
76
|
-
end
|
77
|
-
end
|
78
|
-
end
|
79
|
-
|
80
|
-
# Simple OK reply
|
81
|
-
#
|
82
|
-
def ok(envelope)
|
83
|
-
send_reply(["ok"], envelope)
|
84
|
-
end
|
85
|
-
|
86
|
-
# Simple NOT OK reply
|
87
|
-
#
|
88
|
-
def not_ok(envelope, error)
|
89
|
-
send_reply(["error", error], envelope)
|
90
|
-
end
|
91
|
-
|
92
|
-
# Switch back to :map state if reduce finished
|
93
|
-
#
|
94
|
-
def send_reply(data, envelope)
|
95
|
-
unless data
|
96
|
-
@state = :map
|
97
|
-
@connections = {}
|
98
|
-
end
|
99
|
-
super
|
9
|
+
@master.recieve_msg(message, envelope)
|
100
10
|
end
|
101
11
|
end
|
102
12
|
end
|
@@ -1,9 +1,8 @@
|
|
1
|
-
module
|
2
|
-
class
|
3
|
-
alias_method :async_send_request, :send_request
|
1
|
+
module EM::Protocols::Zmq2
|
2
|
+
class ReqFiber < EM::Protocols::Zmq2::ReqCb
|
4
3
|
def send_request(data, &blk)
|
5
4
|
fib = Fiber.current
|
6
|
-
|
5
|
+
super(data) do |message|
|
7
6
|
fib.resume(message)
|
8
7
|
end
|
9
8
|
if block_given?
|
data/lib/map_reduce/version.rb
CHANGED
@@ -3,32 +3,48 @@ require 'spec_helper'
|
|
3
3
|
describe "MapReduce stack" do
|
4
4
|
describe "single master" do
|
5
5
|
before do
|
6
|
-
@
|
7
|
-
master = MapReduce::Master.new
|
6
|
+
@pid1 = fork do
|
7
|
+
master = MapReduce::Master.new socket: "tcp://127.0.0.1:15555"
|
8
|
+
trap("SIGINT") do
|
9
|
+
master.stop
|
10
|
+
exit
|
11
|
+
end
|
12
|
+
master.run
|
13
|
+
end
|
14
|
+
@pid2 = fork do
|
15
|
+
master = MapReduce::Master.new socket: "tcp://127.0.0.1:15556"
|
16
|
+
trap("SIGINT") do
|
17
|
+
master.stop
|
18
|
+
exit
|
19
|
+
end
|
8
20
|
master.run
|
9
21
|
end
|
10
22
|
end
|
11
23
|
|
12
24
|
after do
|
13
|
-
Process.kill "
|
25
|
+
Process.kill "INT", @pid1
|
26
|
+
Process.kill "INT", @pid2
|
14
27
|
end
|
15
28
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
29
|
+
describe ":em" do
|
30
|
+
it "should map/reduce with multiple masters" do
|
31
|
+
EM.run do
|
32
|
+
@mapper = MapReduce::Mapper.new task: "Fruits", masters: ["tcp://127.0.0.1:15555", "tcp://127.0.0.1:15556"]
|
33
|
+
@reducer = MapReduce::Reducer.new task: "Fruits", masters: ["tcp://127.0.0.1:15555", "tcp://127.0.0.1:15556"]
|
34
|
+
i = 0
|
35
|
+
[["Peter", "Apple"], ["Andrew", "Peach"], ["Mary", "Plum"], ["Peter", "Lemon"], ["Andrew", "Orange"]].each do |a|
|
36
|
+
@mapper.map(*a) do |res|
|
37
|
+
res.must_equal ["ok"]
|
38
|
+
if (i+=1) == 5
|
39
|
+
data = {}
|
40
|
+
@reducer.reduce do |key, values|
|
25
41
|
if key
|
26
42
|
data[key] = values
|
27
43
|
else
|
28
|
-
data.size.must_equal
|
29
|
-
data["
|
30
|
-
data["
|
31
|
-
|
44
|
+
data.size.must_equal 3
|
45
|
+
data["Peter"].sort.must_equal ["Apple", "Lemon"].sort
|
46
|
+
data["Andrew"].sort.must_equal ["Peach", "Orange"].sort
|
47
|
+
data["Mary"].must_equal ["Plum"]
|
32
48
|
EM.stop
|
33
49
|
end
|
34
50
|
end
|
@@ -39,64 +55,61 @@ describe "MapReduce stack" do
|
|
39
55
|
end
|
40
56
|
end
|
41
57
|
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
data
|
58
|
+
describe ":sync" do
|
59
|
+
it "should map/reduce with multiple masters" do
|
60
|
+
EM.synchrony do
|
61
|
+
@mapper = MapReduce::Mapper.new type: :sync, task: "Fruits", masters: ["tcp://127.0.0.1:15555", "tcp://127.0.0.1:15556"]
|
62
|
+
@reducer = MapReduce::Reducer.new type: :sync, task: "Fruits", masters: ["tcp://127.0.0.1:15555", "tcp://127.0.0.1:15556"]
|
63
|
+
[["Peter", "Apple"], ["Andrew", "Peach"], ["Mary", "Plum"], ["Peter", "Lemon"], ["Andrew", "Orange"]].each do |a|
|
64
|
+
res = @mapper.map(*a)
|
65
|
+
res.must_equal ["ok"]
|
66
|
+
end
|
67
|
+
data = {}
|
68
|
+
@reducer.reduce do |k, values|
|
69
|
+
data[k] = values
|
70
|
+
end
|
71
|
+
data.size.must_equal 3
|
72
|
+
data["Peter"].sort.must_equal ["Apple", "Lemon"].sort
|
73
|
+
data["Andrew"].sort.must_equal ["Peach", "Orange"].sort
|
74
|
+
data["Mary"].must_equal ["Plum"]
|
75
|
+
EM.stop
|
52
76
|
end
|
53
|
-
data.size.must_equal 2
|
54
|
-
data["Petr"].must_equal [["Radiohead", "Muse", "R.E.M."] * ',', ["Radiohead", "The Beatles", "Aquarium"] * ',']
|
55
|
-
data["Alex"].must_equal [["Madonna", "Lady Gaga"] * ',']
|
56
|
-
|
57
|
-
EM.stop
|
58
77
|
end
|
59
|
-
end
|
60
|
-
end
|
61
78
|
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
@pid2 = fork do
|
69
|
-
master = MapReduce::Master.new socket: "ipc:///dev/shm/sock2.sock"
|
70
|
-
master.run
|
71
|
-
end
|
72
|
-
end
|
79
|
+
it "should map/reduce-map/reduce with multiple masters" do
|
80
|
+
EM.synchrony do
|
81
|
+
@mapper1 = MapReduce::Mapper.new type: :sync, task: "Fruits", masters: ["tcp://127.0.0.1:15555", "tcp://127.0.0.1:15556"]
|
82
|
+
@reducer1 = MapReduce::Reducer.new type: :sync, task: "Fruits", masters: ["tcp://127.0.0.1:15555", "tcp://127.0.0.1:15556"]
|
83
|
+
@mapper2 = MapReduce::Mapper.new type: :sync, task: "Related", masters: ["tcp://127.0.0.1:15555", "tcp://127.0.0.1:15556"]
|
84
|
+
@reducer2 = MapReduce::Reducer.new type: :sync, task: "Related", masters: ["tcp://127.0.0.1:15555", "tcp://127.0.0.1:15556"]
|
73
85
|
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
86
|
+
[["Peter", "Apple"], ["Andrew", "Peach"], ["Mary", "Plum"], ["Peter", "Lemon"], ["Andrew", "Orange"], ["Peter", "Peach"], ["Yura", "Peach"], ["Yura", "Apricot"], ["Yura", "Apple"]].each do |a|
|
87
|
+
res = @mapper1.map(*a)
|
88
|
+
res.must_equal ["ok"]
|
89
|
+
end
|
78
90
|
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
worker.map("Michael", ["Blur"] * ',')
|
88
|
-
worker.map("Gosha", ["DDT", "Splin"] * ',')
|
89
|
-
worker.map("Obama", ["Adele", "Rolling Stones"] * ',')
|
90
|
-
worker.map_finished
|
91
|
-
worker.reduce do |key, values|
|
92
|
-
data[key] = values if key
|
91
|
+
@reducer1.reduce do |k, values|
|
92
|
+
values.each do |fruit|
|
93
|
+
related = values.dup
|
94
|
+
related.delete fruit
|
95
|
+
related.each do |r|
|
96
|
+
@mapper2.map(fruit, r)
|
97
|
+
end
|
98
|
+
end
|
93
99
|
end
|
94
|
-
data.size.must_equal 5
|
95
|
-
data["Petr"].must_equal [["Radiohead", "Muse", "R.E.M."] * ',', ["Radiohead", "The Beatles", "Aquarium"] * ',']
|
96
|
-
data["Alex"].must_equal [["Madonna", "Lady Gaga"] * ',']
|
97
|
-
end
|
98
100
|
|
99
|
-
|
101
|
+
fruits = {}
|
102
|
+
@reducer2.reduce do |fruit, related|
|
103
|
+
fruits[fruit] ||= []
|
104
|
+
fruits[fruit].push(*related)
|
105
|
+
end
|
106
|
+
|
107
|
+
fruits["Apple"].must_equal ["Apricot", "Lemon", "Peach", "Peach"]
|
108
|
+
fruits["Orange"].must_equal ["Peach"]
|
109
|
+
fruits["Plum"].must_equal nil
|
110
|
+
|
111
|
+
EM.stop
|
112
|
+
end
|
100
113
|
end
|
101
114
|
end
|
102
115
|
end
|