map_reduce 0.0.1.alpha2 → 0.0.1.alpha3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/map_reduce/master.rb +2 -0
- data/lib/map_reduce/socket/master.rb +2 -3
- data/lib/map_reduce/version.rb +1 -1
- data/lib/map_reduce/worker.rb +50 -24
- data/spec/map_reduce/map_reduce_spec.rb +16 -14
- metadata +2 -2
data/lib/map_reduce/master.rb
CHANGED
@@ -126,6 +126,7 @@ module MapReduce
|
|
126
126
|
def sort
|
127
127
|
`sort #{@log_filename} -o #{@sorted_log_filename}`
|
128
128
|
FileUtils.rm(@log_filename)
|
129
|
+
@log_file = nil
|
129
130
|
end
|
130
131
|
|
131
132
|
# Start reducing part.
|
@@ -164,6 +165,7 @@ module MapReduce
|
|
164
165
|
end
|
165
166
|
rescue StopIteration => e
|
166
167
|
FileUtils.rm(@sorted_log_filename)
|
168
|
+
@sorted_log_file = nil
|
167
169
|
@reduce_stop = true
|
168
170
|
end
|
169
171
|
|
@@ -64,13 +64,12 @@ module MapReduce::Socket
|
|
64
64
|
@master.reduce!
|
65
65
|
end
|
66
66
|
|
67
|
-
# Wait till all workers
|
67
|
+
# Wait till all workers stops sending MAP.
|
68
68
|
# After all workers stopped we start REDUCE part of job.
|
69
69
|
#
|
70
70
|
def reduce(envelope)
|
71
|
-
@connections[envelope] ||= true
|
72
71
|
if @state == :reduce
|
73
|
-
@
|
72
|
+
@master.reduce(envelope)
|
74
73
|
else
|
75
74
|
EM.add_timer(REDUCE_WAIT) do
|
76
75
|
reduce(envelope)
|
data/lib/map_reduce/version.rb
CHANGED
data/lib/map_reduce/worker.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
# MapReduce Worker make two jobs:
|
2
2
|
# First, it maps (emits) all data to masters;
|
3
3
|
# Second, it reduces data returned form master;
|
4
|
+
# After reducing he is ready to map data again.
|
4
5
|
#
|
5
6
|
module MapReduce
|
6
7
|
class Worker
|
@@ -13,10 +14,10 @@ module MapReduce
|
|
13
14
|
# ** :sync - Synchronous type on Fibers
|
14
15
|
#
|
15
16
|
def initialize(opts = {})
|
16
|
-
@
|
17
|
+
@socket_addrs = opts[:masters] || [::MapReduce::DEFAULT_SOCKET]
|
17
18
|
|
18
|
-
opts[:type] ||= :em
|
19
|
-
@socket_class = case
|
19
|
+
@type = opts[:type] ||= :em
|
20
|
+
@socket_class = case @type
|
20
21
|
when :em
|
21
22
|
require File.expand_path("../socket/worker_em", __FILE__)
|
22
23
|
MapReduce::Socket::WorkerEm
|
@@ -34,7 +35,7 @@ module MapReduce
|
|
34
35
|
def emit(key, value, &blk)
|
35
36
|
fail "Key can't be nil" if key.nil?
|
36
37
|
|
37
|
-
sock =
|
38
|
+
sock = pick_map_socket(key)
|
38
39
|
sock.send_request(["map", key, value], &blk)
|
39
40
|
end
|
40
41
|
alias :map :emit
|
@@ -43,12 +44,13 @@ module MapReduce
|
|
43
44
|
# Master will wait till all workers will send "map_finished" message.
|
44
45
|
#
|
45
46
|
def map_finished(&blk)
|
46
|
-
all =
|
47
|
+
all = master_sockets.size
|
47
48
|
resp = 0
|
48
49
|
|
49
|
-
|
50
|
+
master_sockets.each do |sock, h|
|
50
51
|
sock.send_request(["map_finished"]) do |msg|
|
51
|
-
|
52
|
+
socket_state(sock, :reduce)
|
53
|
+
blk.call(["ok"]) if block_given? && (resp+=1) == all
|
52
54
|
end
|
53
55
|
end
|
54
56
|
["ok"]
|
@@ -62,16 +64,36 @@ module MapReduce
|
|
62
64
|
# If nothing to reduce, we return nil to client.
|
63
65
|
#
|
64
66
|
def reduce(&blk)
|
65
|
-
|
67
|
+
if @type == :em
|
68
|
+
em_reduce(&blk)
|
69
|
+
else
|
70
|
+
sync_reduce(&blk)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def sync_reduce(&blk)
|
75
|
+
while sock = random_reduce_socket
|
76
|
+
key, *values = sock.send_request(["reduce"])
|
77
|
+
if key.nil?
|
78
|
+
socket_state(sock, :map)
|
79
|
+
else
|
80
|
+
blk.call(key, values)
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
def em_reduce(&blk)
|
86
|
+
sock = random_reduce_socket
|
66
87
|
if sock
|
67
88
|
sock.send_request(["reduce"]) do |message|
|
68
89
|
key, *values = message
|
69
90
|
if key.nil?
|
70
|
-
|
91
|
+
socket_state(sock, :map)
|
71
92
|
else
|
72
93
|
blk.call(key, values)
|
73
94
|
end
|
74
|
-
|
95
|
+
|
96
|
+
em_reduce(&blk)
|
75
97
|
end
|
76
98
|
else
|
77
99
|
blk.call([nil])
|
@@ -82,37 +104,41 @@ module MapReduce
|
|
82
104
|
|
83
105
|
# Connect to each master.
|
84
106
|
#
|
85
|
-
def
|
86
|
-
@
|
87
|
-
|
107
|
+
def master_sockets
|
108
|
+
@master_sockets ||= begin
|
109
|
+
socks = {}
|
110
|
+
@socket_addrs.each_with_index do |addr, i|
|
88
111
|
sock = @socket_class.new
|
89
112
|
sock.connect addr
|
90
|
-
sock
|
113
|
+
socks[sock] = { state: :map, ind: i }
|
91
114
|
end
|
115
|
+
socks
|
92
116
|
end
|
93
117
|
end
|
94
118
|
|
95
119
|
# Kind of sharding
|
96
120
|
#
|
97
|
-
def
|
98
|
-
shard = if
|
99
|
-
Digest::MD5.hexdigest(key.to_s).to_i(16) %
|
121
|
+
def pick_map_socket(key)
|
122
|
+
shard = if master_sockets.size > 1
|
123
|
+
Digest::MD5.hexdigest(key.to_s).to_i(16) % master_sockets.size
|
100
124
|
else
|
101
125
|
0
|
102
126
|
end
|
103
|
-
|
127
|
+
master_sockets.keys[shard]
|
104
128
|
end
|
105
129
|
|
106
|
-
# Take random socket to get reduce message
|
130
|
+
# Take random socket to get reduce message.
|
131
|
+
# Socket should be in :reduce state.
|
107
132
|
#
|
108
|
-
def
|
109
|
-
|
133
|
+
def random_reduce_socket
|
134
|
+
master_sockets.select{ |k,v| v[:state] == :reduce }.keys.sample
|
110
135
|
end
|
111
136
|
|
112
|
-
#
|
137
|
+
# Change socket's state to :map when it is empty
|
138
|
+
# and to :reduce when mapping is finished
|
113
139
|
#
|
114
|
-
def
|
115
|
-
|
140
|
+
def socket_state(sock, state)
|
141
|
+
master_sockets[sock][:state] = state
|
116
142
|
end
|
117
143
|
end
|
118
144
|
end
|
@@ -76,23 +76,25 @@ describe "MapReduce stack" do
|
|
76
76
|
Process.kill "TERM", @pid2
|
77
77
|
end
|
78
78
|
|
79
|
-
it "should map and reduce some data in SYNC mode" do
|
79
|
+
it "should map and reduce some data in SYNC mode twice" do
|
80
80
|
EM.synchrony do
|
81
|
-
data = {}
|
82
81
|
worker = MapReduce::Worker.new type: :sync, masters: ["ipc:///dev/shm/sock1.sock", "ipc:///dev/shm/sock2.sock"]
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
82
|
+
2.times do
|
83
|
+
data = {}
|
84
|
+
worker.map("Petr", ["Radiohead", "Muse", "R.E.M."] * ',')
|
85
|
+
worker.map("Alex", ["Madonna", "Lady Gaga"] * ',')
|
86
|
+
worker.map("Petr", ["Radiohead", "The Beatles", "Aquarium"] * ',')
|
87
|
+
worker.map("Michael", ["Blur"] * ',')
|
88
|
+
worker.map("Gosha", ["DDT", "Splin"] * ',')
|
89
|
+
worker.map("Obama", ["Adele", "Rolling Stones"] * ',')
|
90
|
+
worker.map_finished
|
91
|
+
worker.reduce do |key, values|
|
92
|
+
data[key] = values if key
|
93
|
+
end
|
94
|
+
data.size.must_equal 5
|
95
|
+
data["Petr"].must_equal [["Radiohead", "Muse", "R.E.M."] * ',', ["Radiohead", "The Beatles", "Aquarium"] * ',']
|
96
|
+
data["Alex"].must_equal [["Madonna", "Lady Gaga"] * ',']
|
92
97
|
end
|
93
|
-
data.size.must_equal 5
|
94
|
-
data["Petr"].must_equal [["Radiohead", "Muse", "R.E.M."] * ',', ["Radiohead", "The Beatles", "Aquarium"] * ',']
|
95
|
-
data["Alex"].must_equal [["Madonna", "Lady Gaga"] * ',']
|
96
98
|
|
97
99
|
EM.stop
|
98
100
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: map_reduce
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.1.
|
4
|
+
version: 0.0.1.alpha3
|
5
5
|
prerelease: 6
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -110,7 +110,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
110
110
|
requirements:
|
111
111
|
- - ! '>='
|
112
112
|
- !ruby/object:Gem::Version
|
113
|
-
hash:
|
113
|
+
hash: 1980425592247416328
|
114
114
|
version: '0'
|
115
115
|
segments:
|
116
116
|
- 0
|