map_reduce 0.0.1.alpha2 → 0.0.1.alpha3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -126,6 +126,7 @@ module MapReduce
126
126
  def sort
127
127
  `sort #{@log_filename} -o #{@sorted_log_filename}`
128
128
  FileUtils.rm(@log_filename)
129
+ @log_file = nil
129
130
  end
130
131
 
131
132
  # Start reducing part.
@@ -164,6 +165,7 @@ module MapReduce
164
165
  end
165
166
  rescue StopIteration => e
166
167
  FileUtils.rm(@sorted_log_filename)
168
+ @sorted_log_file = nil
167
169
  @reduce_stop = true
168
170
  end
169
171
 
@@ -64,13 +64,12 @@ module MapReduce::Socket
64
64
  @master.reduce!
65
65
  end
66
66
 
67
- # Wait till all workers stopps sending MAP.
67
+ # Wait till all workers stops sending MAP.
68
68
  # After all workers stopped we start REDUCE part of job.
69
69
  #
70
70
  def reduce(envelope)
71
- @connections[envelope] ||= true
72
71
  if @state == :reduce
73
- @state == :map unless @master.reduce(envelope)
72
+ @master.reduce(envelope)
74
73
  else
75
74
  EM.add_timer(REDUCE_WAIT) do
76
75
  reduce(envelope)
@@ -1,3 +1,3 @@
1
1
  module MapReduce
2
- VERSION = "0.0.1.alpha2"
2
+ VERSION = "0.0.1.alpha3"
3
3
  end
@@ -1,6 +1,7 @@
1
1
  # MapReduce Worker make two jobs:
2
2
  # First, it maps (emits) all data to masters;
3
3
  # Second, it reduces data returned form master;
4
+ # After reducing he is ready to map data again.
4
5
  #
5
6
  module MapReduce
6
7
  class Worker
@@ -13,10 +14,10 @@ module MapReduce
13
14
  # ** :sync - Synchronous type on Fibers
14
15
  #
15
16
  def initialize(opts = {})
16
- @master_sockets = opts[:masters] || [::MapReduce::DEFAULT_SOCKET]
17
+ @socket_addrs = opts[:masters] || [::MapReduce::DEFAULT_SOCKET]
17
18
 
18
- opts[:type] ||= :em
19
- @socket_class = case opts[:type]
19
+ @type = opts[:type] ||= :em
20
+ @socket_class = case @type
20
21
  when :em
21
22
  require File.expand_path("../socket/worker_em", __FILE__)
22
23
  MapReduce::Socket::WorkerEm
@@ -34,7 +35,7 @@ module MapReduce
34
35
  def emit(key, value, &blk)
35
36
  fail "Key can't be nil" if key.nil?
36
37
 
37
- sock = pick_socket(key)
38
+ sock = pick_map_socket(key)
38
39
  sock.send_request(["map", key, value], &blk)
39
40
  end
40
41
  alias :map :emit
@@ -43,12 +44,13 @@ module MapReduce
43
44
  # Master will wait till all workers will send "map_finished" message.
44
45
  #
45
46
  def map_finished(&blk)
46
- all = worker_sockets.size
47
+ all = master_sockets.size
47
48
  resp = 0
48
49
 
49
- worker_sockets.each do |sock|
50
+ master_sockets.each do |sock, h|
50
51
  sock.send_request(["map_finished"]) do |msg|
51
- blk.call if block_given? && (resp+=1) == all
52
+ socket_state(sock, :reduce)
53
+ blk.call(["ok"]) if block_given? && (resp+=1) == all
52
54
  end
53
55
  end
54
56
  ["ok"]
@@ -62,16 +64,36 @@ module MapReduce
62
64
  # If nothing to reduce, we return nil to client.
63
65
  #
64
66
  def reduce(&blk)
65
- sock = random_socket
67
+ if @type == :em
68
+ em_reduce(&blk)
69
+ else
70
+ sync_reduce(&blk)
71
+ end
72
+ end
73
+
74
+ def sync_reduce(&blk)
75
+ while sock = random_reduce_socket
76
+ key, *values = sock.send_request(["reduce"])
77
+ if key.nil?
78
+ socket_state(sock, :map)
79
+ else
80
+ blk.call(key, values)
81
+ end
82
+ end
83
+ end
84
+
85
+ def em_reduce(&blk)
86
+ sock = random_reduce_socket
66
87
  if sock
67
88
  sock.send_request(["reduce"]) do |message|
68
89
  key, *values = message
69
90
  if key.nil?
70
- remove_socket(sock)
91
+ socket_state(sock, :map)
71
92
  else
72
93
  blk.call(key, values)
73
94
  end
74
- reduce(&blk)
95
+
96
+ em_reduce(&blk)
75
97
  end
76
98
  else
77
99
  blk.call([nil])
@@ -82,37 +104,41 @@ module MapReduce
82
104
 
83
105
  # Connect to each master.
84
106
  #
85
- def worker_sockets
86
- @worker_sockets ||= begin
87
- @master_sockets.map do |addr|
107
+ def master_sockets
108
+ @master_sockets ||= begin
109
+ socks = {}
110
+ @socket_addrs.each_with_index do |addr, i|
88
111
  sock = @socket_class.new
89
112
  sock.connect addr
90
- sock
113
+ socks[sock] = { state: :map, ind: i }
91
114
  end
115
+ socks
92
116
  end
93
117
  end
94
118
 
95
119
  # Kind of sharding
96
120
  #
97
- def pick_socket(key)
98
- shard = if worker_sockets.size > 1
99
- Digest::MD5.hexdigest(key.to_s).to_i(16) % worker_sockets.size
121
+ def pick_map_socket(key)
122
+ shard = if master_sockets.size > 1
123
+ Digest::MD5.hexdigest(key.to_s).to_i(16) % master_sockets.size
100
124
  else
101
125
  0
102
126
  end
103
- worker_sockets[shard]
127
+ master_sockets.keys[shard]
104
128
  end
105
129
 
106
- # Take random socket to get reduce message
130
+ # Take random socket to get reduce message.
131
+ # Socket should be in :reduce state.
107
132
  #
108
- def random_socket
109
- worker_sockets.sample
133
+ def random_reduce_socket
134
+ master_sockets.select{ |k,v| v[:state] == :reduce }.keys.sample
110
135
  end
111
136
 
112
- # Remove socket when it is empty
137
+ # Change socket's state to :map when it is empty
138
+ # and to :reduce when mapping is finished
113
139
  #
114
- def remove_socket(sock)
115
- worker_sockets.delete sock
140
+ def socket_state(sock, state)
141
+ master_sockets[sock][:state] = state
116
142
  end
117
143
  end
118
144
  end
@@ -76,23 +76,25 @@ describe "MapReduce stack" do
76
76
  Process.kill "TERM", @pid2
77
77
  end
78
78
 
79
- it "should map and reduce some data in SYNC mode" do
79
+ it "should map and reduce some data in SYNC mode twice" do
80
80
  EM.synchrony do
81
- data = {}
82
81
  worker = MapReduce::Worker.new type: :sync, masters: ["ipc:///dev/shm/sock1.sock", "ipc:///dev/shm/sock2.sock"]
83
- worker.map("Petr", ["Radiohead", "Muse", "R.E.M."] * ',')
84
- worker.map("Alex", ["Madonna", "Lady Gaga"] * ',')
85
- worker.map("Petr", ["Radiohead", "The Beatles", "Aquarium"] * ',')
86
- worker.map("Michael", ["Blur"] * ',')
87
- worker.map("Gosha", ["DDT", "Splin"] * ',')
88
- worker.map("Obama", ["Adele", "Rolling Stones"] * ',')
89
- worker.map_finished
90
- worker.reduce do |key, values|
91
- data[key] = values if key
82
+ 2.times do
83
+ data = {}
84
+ worker.map("Petr", ["Radiohead", "Muse", "R.E.M."] * ',')
85
+ worker.map("Alex", ["Madonna", "Lady Gaga"] * ',')
86
+ worker.map("Petr", ["Radiohead", "The Beatles", "Aquarium"] * ',')
87
+ worker.map("Michael", ["Blur"] * ',')
88
+ worker.map("Gosha", ["DDT", "Splin"] * ',')
89
+ worker.map("Obama", ["Adele", "Rolling Stones"] * ',')
90
+ worker.map_finished
91
+ worker.reduce do |key, values|
92
+ data[key] = values if key
93
+ end
94
+ data.size.must_equal 5
95
+ data["Petr"].must_equal [["Radiohead", "Muse", "R.E.M."] * ',', ["Radiohead", "The Beatles", "Aquarium"] * ',']
96
+ data["Alex"].must_equal [["Madonna", "Lady Gaga"] * ',']
92
97
  end
93
- data.size.must_equal 5
94
- data["Petr"].must_equal [["Radiohead", "Muse", "R.E.M."] * ',', ["Radiohead", "The Beatles", "Aquarium"] * ',']
95
- data["Alex"].must_equal [["Madonna", "Lady Gaga"] * ',']
96
98
 
97
99
  EM.stop
98
100
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: map_reduce
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1.alpha2
4
+ version: 0.0.1.alpha3
5
5
  prerelease: 6
6
6
  platform: ruby
7
7
  authors:
@@ -110,7 +110,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
110
110
  requirements:
111
111
  - - ! '>='
112
112
  - !ruby/object:Gem::Version
113
- hash: 2663364217523840830
113
+ hash: 1980425592247416328
114
114
  version: '0'
115
115
  segments:
116
116
  - 0