map_reduce 0.0.1.alpha2 → 0.0.1.alpha3
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/map_reduce/master.rb +2 -0
- data/lib/map_reduce/socket/master.rb +2 -3
- data/lib/map_reduce/version.rb +1 -1
- data/lib/map_reduce/worker.rb +50 -24
- data/spec/map_reduce/map_reduce_spec.rb +16 -14
- metadata +2 -2
data/lib/map_reduce/master.rb
CHANGED
@@ -126,6 +126,7 @@ module MapReduce
|
|
126
126
|
def sort
|
127
127
|
`sort #{@log_filename} -o #{@sorted_log_filename}`
|
128
128
|
FileUtils.rm(@log_filename)
|
129
|
+
@log_file = nil
|
129
130
|
end
|
130
131
|
|
131
132
|
# Start reducing part.
|
@@ -164,6 +165,7 @@ module MapReduce
|
|
164
165
|
end
|
165
166
|
rescue StopIteration => e
|
166
167
|
FileUtils.rm(@sorted_log_filename)
|
168
|
+
@sorted_log_file = nil
|
167
169
|
@reduce_stop = true
|
168
170
|
end
|
169
171
|
|
@@ -64,13 +64,12 @@ module MapReduce::Socket
|
|
64
64
|
@master.reduce!
|
65
65
|
end
|
66
66
|
|
67
|
-
# Wait till all workers
|
67
|
+
# Wait till all workers stops sending MAP.
|
68
68
|
# After all workers stopped we start REDUCE part of job.
|
69
69
|
#
|
70
70
|
def reduce(envelope)
|
71
|
-
@connections[envelope] ||= true
|
72
71
|
if @state == :reduce
|
73
|
-
@
|
72
|
+
@master.reduce(envelope)
|
74
73
|
else
|
75
74
|
EM.add_timer(REDUCE_WAIT) do
|
76
75
|
reduce(envelope)
|
data/lib/map_reduce/version.rb
CHANGED
data/lib/map_reduce/worker.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
# MapReduce Worker make two jobs:
|
2
2
|
# First, it maps (emits) all data to masters;
|
3
3
|
# Second, it reduces data returned form master;
|
4
|
+
# After reducing he is ready to map data again.
|
4
5
|
#
|
5
6
|
module MapReduce
|
6
7
|
class Worker
|
@@ -13,10 +14,10 @@ module MapReduce
|
|
13
14
|
# ** :sync - Synchronous type on Fibers
|
14
15
|
#
|
15
16
|
def initialize(opts = {})
|
16
|
-
@
|
17
|
+
@socket_addrs = opts[:masters] || [::MapReduce::DEFAULT_SOCKET]
|
17
18
|
|
18
|
-
opts[:type] ||= :em
|
19
|
-
@socket_class = case
|
19
|
+
@type = opts[:type] ||= :em
|
20
|
+
@socket_class = case @type
|
20
21
|
when :em
|
21
22
|
require File.expand_path("../socket/worker_em", __FILE__)
|
22
23
|
MapReduce::Socket::WorkerEm
|
@@ -34,7 +35,7 @@ module MapReduce
|
|
34
35
|
def emit(key, value, &blk)
|
35
36
|
fail "Key can't be nil" if key.nil?
|
36
37
|
|
37
|
-
sock =
|
38
|
+
sock = pick_map_socket(key)
|
38
39
|
sock.send_request(["map", key, value], &blk)
|
39
40
|
end
|
40
41
|
alias :map :emit
|
@@ -43,12 +44,13 @@ module MapReduce
|
|
43
44
|
# Master will wait till all workers will send "map_finished" message.
|
44
45
|
#
|
45
46
|
def map_finished(&blk)
|
46
|
-
all =
|
47
|
+
all = master_sockets.size
|
47
48
|
resp = 0
|
48
49
|
|
49
|
-
|
50
|
+
master_sockets.each do |sock, h|
|
50
51
|
sock.send_request(["map_finished"]) do |msg|
|
51
|
-
|
52
|
+
socket_state(sock, :reduce)
|
53
|
+
blk.call(["ok"]) if block_given? && (resp+=1) == all
|
52
54
|
end
|
53
55
|
end
|
54
56
|
["ok"]
|
@@ -62,16 +64,36 @@ module MapReduce
|
|
62
64
|
# If nothing to reduce, we return nil to client.
|
63
65
|
#
|
64
66
|
def reduce(&blk)
|
65
|
-
|
67
|
+
if @type == :em
|
68
|
+
em_reduce(&blk)
|
69
|
+
else
|
70
|
+
sync_reduce(&blk)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def sync_reduce(&blk)
|
75
|
+
while sock = random_reduce_socket
|
76
|
+
key, *values = sock.send_request(["reduce"])
|
77
|
+
if key.nil?
|
78
|
+
socket_state(sock, :map)
|
79
|
+
else
|
80
|
+
blk.call(key, values)
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
def em_reduce(&blk)
|
86
|
+
sock = random_reduce_socket
|
66
87
|
if sock
|
67
88
|
sock.send_request(["reduce"]) do |message|
|
68
89
|
key, *values = message
|
69
90
|
if key.nil?
|
70
|
-
|
91
|
+
socket_state(sock, :map)
|
71
92
|
else
|
72
93
|
blk.call(key, values)
|
73
94
|
end
|
74
|
-
|
95
|
+
|
96
|
+
em_reduce(&blk)
|
75
97
|
end
|
76
98
|
else
|
77
99
|
blk.call([nil])
|
@@ -82,37 +104,41 @@ module MapReduce
|
|
82
104
|
|
83
105
|
# Connect to each master.
|
84
106
|
#
|
85
|
-
def
|
86
|
-
@
|
87
|
-
|
107
|
+
def master_sockets
|
108
|
+
@master_sockets ||= begin
|
109
|
+
socks = {}
|
110
|
+
@socket_addrs.each_with_index do |addr, i|
|
88
111
|
sock = @socket_class.new
|
89
112
|
sock.connect addr
|
90
|
-
sock
|
113
|
+
socks[sock] = { state: :map, ind: i }
|
91
114
|
end
|
115
|
+
socks
|
92
116
|
end
|
93
117
|
end
|
94
118
|
|
95
119
|
# Kind of sharding
|
96
120
|
#
|
97
|
-
def
|
98
|
-
shard = if
|
99
|
-
Digest::MD5.hexdigest(key.to_s).to_i(16) %
|
121
|
+
def pick_map_socket(key)
|
122
|
+
shard = if master_sockets.size > 1
|
123
|
+
Digest::MD5.hexdigest(key.to_s).to_i(16) % master_sockets.size
|
100
124
|
else
|
101
125
|
0
|
102
126
|
end
|
103
|
-
|
127
|
+
master_sockets.keys[shard]
|
104
128
|
end
|
105
129
|
|
106
|
-
# Take random socket to get reduce message
|
130
|
+
# Take random socket to get reduce message.
|
131
|
+
# Socket should be in :reduce state.
|
107
132
|
#
|
108
|
-
def
|
109
|
-
|
133
|
+
def random_reduce_socket
|
134
|
+
master_sockets.select{ |k,v| v[:state] == :reduce }.keys.sample
|
110
135
|
end
|
111
136
|
|
112
|
-
#
|
137
|
+
# Change socket's state to :map when it is empty
|
138
|
+
# and to :reduce when mapping is finished
|
113
139
|
#
|
114
|
-
def
|
115
|
-
|
140
|
+
def socket_state(sock, state)
|
141
|
+
master_sockets[sock][:state] = state
|
116
142
|
end
|
117
143
|
end
|
118
144
|
end
|
@@ -76,23 +76,25 @@ describe "MapReduce stack" do
|
|
76
76
|
Process.kill "TERM", @pid2
|
77
77
|
end
|
78
78
|
|
79
|
-
it "should map and reduce some data in SYNC mode" do
|
79
|
+
it "should map and reduce some data in SYNC mode twice" do
|
80
80
|
EM.synchrony do
|
81
|
-
data = {}
|
82
81
|
worker = MapReduce::Worker.new type: :sync, masters: ["ipc:///dev/shm/sock1.sock", "ipc:///dev/shm/sock2.sock"]
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
82
|
+
2.times do
|
83
|
+
data = {}
|
84
|
+
worker.map("Petr", ["Radiohead", "Muse", "R.E.M."] * ',')
|
85
|
+
worker.map("Alex", ["Madonna", "Lady Gaga"] * ',')
|
86
|
+
worker.map("Petr", ["Radiohead", "The Beatles", "Aquarium"] * ',')
|
87
|
+
worker.map("Michael", ["Blur"] * ',')
|
88
|
+
worker.map("Gosha", ["DDT", "Splin"] * ',')
|
89
|
+
worker.map("Obama", ["Adele", "Rolling Stones"] * ',')
|
90
|
+
worker.map_finished
|
91
|
+
worker.reduce do |key, values|
|
92
|
+
data[key] = values if key
|
93
|
+
end
|
94
|
+
data.size.must_equal 5
|
95
|
+
data["Petr"].must_equal [["Radiohead", "Muse", "R.E.M."] * ',', ["Radiohead", "The Beatles", "Aquarium"] * ',']
|
96
|
+
data["Alex"].must_equal [["Madonna", "Lady Gaga"] * ',']
|
92
97
|
end
|
93
|
-
data.size.must_equal 5
|
94
|
-
data["Petr"].must_equal [["Radiohead", "Muse", "R.E.M."] * ',', ["Radiohead", "The Beatles", "Aquarium"] * ',']
|
95
|
-
data["Alex"].must_equal [["Madonna", "Lady Gaga"] * ',']
|
96
98
|
|
97
99
|
EM.stop
|
98
100
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: map_reduce
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.1.
|
4
|
+
version: 0.0.1.alpha3
|
5
5
|
prerelease: 6
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -110,7 +110,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
110
110
|
requirements:
|
111
111
|
- - ! '>='
|
112
112
|
- !ruby/object:Gem::Version
|
113
|
-
hash:
|
113
|
+
hash: 1980425592247416328
|
114
114
|
version: '0'
|
115
115
|
segments:
|
116
116
|
- 0
|