map_reduce 0.0.1.alpha2 → 0.0.1.alpha3

Sign up to get free protection for your applications and to get access to all the features.
@@ -126,6 +126,7 @@ module MapReduce
126
126
  def sort
127
127
  `sort #{@log_filename} -o #{@sorted_log_filename}`
128
128
  FileUtils.rm(@log_filename)
129
+ @log_file = nil
129
130
  end
130
131
 
131
132
  # Start reducing part.
@@ -164,6 +165,7 @@ module MapReduce
164
165
  end
165
166
  rescue StopIteration => e
166
167
  FileUtils.rm(@sorted_log_filename)
168
+ @sorted_log_file = nil
167
169
  @reduce_stop = true
168
170
  end
169
171
 
@@ -64,13 +64,12 @@ module MapReduce::Socket
64
64
  @master.reduce!
65
65
  end
66
66
 
67
- # Wait till all workers stopps sending MAP.
67
+ # Wait till all workers stops sending MAP.
68
68
  # After all workers stopped we start REDUCE part of job.
69
69
  #
70
70
  def reduce(envelope)
71
- @connections[envelope] ||= true
72
71
  if @state == :reduce
73
- @state == :map unless @master.reduce(envelope)
72
+ @master.reduce(envelope)
74
73
  else
75
74
  EM.add_timer(REDUCE_WAIT) do
76
75
  reduce(envelope)
@@ -1,3 +1,3 @@
1
1
  module MapReduce
2
- VERSION = "0.0.1.alpha2"
2
+ VERSION = "0.0.1.alpha3"
3
3
  end
@@ -1,6 +1,7 @@
1
1
  # MapReduce Worker make two jobs:
2
2
  # First, it maps (emits) all data to masters;
3
3
  # Second, it reduces data returned form master;
4
+ # After reducing he is ready to map data again.
4
5
  #
5
6
  module MapReduce
6
7
  class Worker
@@ -13,10 +14,10 @@ module MapReduce
13
14
  # ** :sync - Synchronous type on Fibers
14
15
  #
15
16
  def initialize(opts = {})
16
- @master_sockets = opts[:masters] || [::MapReduce::DEFAULT_SOCKET]
17
+ @socket_addrs = opts[:masters] || [::MapReduce::DEFAULT_SOCKET]
17
18
 
18
- opts[:type] ||= :em
19
- @socket_class = case opts[:type]
19
+ @type = opts[:type] ||= :em
20
+ @socket_class = case @type
20
21
  when :em
21
22
  require File.expand_path("../socket/worker_em", __FILE__)
22
23
  MapReduce::Socket::WorkerEm
@@ -34,7 +35,7 @@ module MapReduce
34
35
  def emit(key, value, &blk)
35
36
  fail "Key can't be nil" if key.nil?
36
37
 
37
- sock = pick_socket(key)
38
+ sock = pick_map_socket(key)
38
39
  sock.send_request(["map", key, value], &blk)
39
40
  end
40
41
  alias :map :emit
@@ -43,12 +44,13 @@ module MapReduce
43
44
  # Master will wait till all workers will send "map_finished" message.
44
45
  #
45
46
  def map_finished(&blk)
46
- all = worker_sockets.size
47
+ all = master_sockets.size
47
48
  resp = 0
48
49
 
49
- worker_sockets.each do |sock|
50
+ master_sockets.each do |sock, h|
50
51
  sock.send_request(["map_finished"]) do |msg|
51
- blk.call if block_given? && (resp+=1) == all
52
+ socket_state(sock, :reduce)
53
+ blk.call(["ok"]) if block_given? && (resp+=1) == all
52
54
  end
53
55
  end
54
56
  ["ok"]
@@ -62,16 +64,36 @@ module MapReduce
62
64
  # If nothing to reduce, we return nil to client.
63
65
  #
64
66
  def reduce(&blk)
65
- sock = random_socket
67
+ if @type == :em
68
+ em_reduce(&blk)
69
+ else
70
+ sync_reduce(&blk)
71
+ end
72
+ end
73
+
74
+ def sync_reduce(&blk)
75
+ while sock = random_reduce_socket
76
+ key, *values = sock.send_request(["reduce"])
77
+ if key.nil?
78
+ socket_state(sock, :map)
79
+ else
80
+ blk.call(key, values)
81
+ end
82
+ end
83
+ end
84
+
85
+ def em_reduce(&blk)
86
+ sock = random_reduce_socket
66
87
  if sock
67
88
  sock.send_request(["reduce"]) do |message|
68
89
  key, *values = message
69
90
  if key.nil?
70
- remove_socket(sock)
91
+ socket_state(sock, :map)
71
92
  else
72
93
  blk.call(key, values)
73
94
  end
74
- reduce(&blk)
95
+
96
+ em_reduce(&blk)
75
97
  end
76
98
  else
77
99
  blk.call([nil])
@@ -82,37 +104,41 @@ module MapReduce
82
104
 
83
105
  # Connect to each master.
84
106
  #
85
- def worker_sockets
86
- @worker_sockets ||= begin
87
- @master_sockets.map do |addr|
107
+ def master_sockets
108
+ @master_sockets ||= begin
109
+ socks = {}
110
+ @socket_addrs.each_with_index do |addr, i|
88
111
  sock = @socket_class.new
89
112
  sock.connect addr
90
- sock
113
+ socks[sock] = { state: :map, ind: i }
91
114
  end
115
+ socks
92
116
  end
93
117
  end
94
118
 
95
119
  # Kind of sharding
96
120
  #
97
- def pick_socket(key)
98
- shard = if worker_sockets.size > 1
99
- Digest::MD5.hexdigest(key.to_s).to_i(16) % worker_sockets.size
121
+ def pick_map_socket(key)
122
+ shard = if master_sockets.size > 1
123
+ Digest::MD5.hexdigest(key.to_s).to_i(16) % master_sockets.size
100
124
  else
101
125
  0
102
126
  end
103
- worker_sockets[shard]
127
+ master_sockets.keys[shard]
104
128
  end
105
129
 
106
- # Take random socket to get reduce message
130
+ # Take random socket to get reduce message.
131
+ # Socket should be in :reduce state.
107
132
  #
108
- def random_socket
109
- worker_sockets.sample
133
+ def random_reduce_socket
134
+ master_sockets.select{ |k,v| v[:state] == :reduce }.keys.sample
110
135
  end
111
136
 
112
- # Remove socket when it is empty
137
+ # Change socket's state to :map when it is empty
138
+ # and to :reduce when mapping is finished
113
139
  #
114
- def remove_socket(sock)
115
- worker_sockets.delete sock
140
+ def socket_state(sock, state)
141
+ master_sockets[sock][:state] = state
116
142
  end
117
143
  end
118
144
  end
@@ -76,23 +76,25 @@ describe "MapReduce stack" do
76
76
  Process.kill "TERM", @pid2
77
77
  end
78
78
 
79
- it "should map and reduce some data in SYNC mode" do
79
+ it "should map and reduce some data in SYNC mode twice" do
80
80
  EM.synchrony do
81
- data = {}
82
81
  worker = MapReduce::Worker.new type: :sync, masters: ["ipc:///dev/shm/sock1.sock", "ipc:///dev/shm/sock2.sock"]
83
- worker.map("Petr", ["Radiohead", "Muse", "R.E.M."] * ',')
84
- worker.map("Alex", ["Madonna", "Lady Gaga"] * ',')
85
- worker.map("Petr", ["Radiohead", "The Beatles", "Aquarium"] * ',')
86
- worker.map("Michael", ["Blur"] * ',')
87
- worker.map("Gosha", ["DDT", "Splin"] * ',')
88
- worker.map("Obama", ["Adele", "Rolling Stones"] * ',')
89
- worker.map_finished
90
- worker.reduce do |key, values|
91
- data[key] = values if key
82
+ 2.times do
83
+ data = {}
84
+ worker.map("Petr", ["Radiohead", "Muse", "R.E.M."] * ',')
85
+ worker.map("Alex", ["Madonna", "Lady Gaga"] * ',')
86
+ worker.map("Petr", ["Radiohead", "The Beatles", "Aquarium"] * ',')
87
+ worker.map("Michael", ["Blur"] * ',')
88
+ worker.map("Gosha", ["DDT", "Splin"] * ',')
89
+ worker.map("Obama", ["Adele", "Rolling Stones"] * ',')
90
+ worker.map_finished
91
+ worker.reduce do |key, values|
92
+ data[key] = values if key
93
+ end
94
+ data.size.must_equal 5
95
+ data["Petr"].must_equal [["Radiohead", "Muse", "R.E.M."] * ',', ["Radiohead", "The Beatles", "Aquarium"] * ',']
96
+ data["Alex"].must_equal [["Madonna", "Lady Gaga"] * ',']
92
97
  end
93
- data.size.must_equal 5
94
- data["Petr"].must_equal [["Radiohead", "Muse", "R.E.M."] * ',', ["Radiohead", "The Beatles", "Aquarium"] * ',']
95
- data["Alex"].must_equal [["Madonna", "Lady Gaga"] * ',']
96
98
 
97
99
  EM.stop
98
100
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: map_reduce
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1.alpha2
4
+ version: 0.0.1.alpha3
5
5
  prerelease: 6
6
6
  platform: ruby
7
7
  authors:
@@ -110,7 +110,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
110
110
  requirements:
111
111
  - - ! '>='
112
112
  - !ruby/object:Gem::Version
113
- hash: 2663364217523840830
113
+ hash: 1980425592247416328
114
114
  version: '0'
115
115
  segments:
116
116
  - 0