map_reduce 0.0.1.alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +17 -0
- data/Gemfile +9 -0
- data/LICENSE.txt +22 -0
- data/README.md +31 -0
- data/Rakefile +11 -0
- data/lib/map_reduce.rb +21 -0
- data/lib/map_reduce/master.rb +180 -0
- data/lib/map_reduce/socket/master.rb +100 -0
- data/lib/map_reduce/socket/worker_em.rb +4 -0
- data/lib/map_reduce/socket/worker_sync.rb +16 -0
- data/lib/map_reduce/version.rb +3 -0
- data/lib/map_reduce/worker.rb +118 -0
- data/map_reduce.gemspec +26 -0
- data/spec/map_reduce/map_reduce_spec.rb +59 -0
- data/spec/map_reduce/master_spec.rb +0 -0
- data/spec/map_reduce/worker_spec.rb +0 -0
- data/spec/spec_helper.rb +7 -0
- metadata +134 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2013 Petr Yanovich
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
# MapReduce
|
2
|
+
|
3
|
+
MapReduce is a simple distributed MapReduce framework on Ruby.
|
4
|
+
|
5
|
+
Internally there are ZMQ Transport and Evenmachine.
|
6
|
+
|
7
|
+
## Installation
|
8
|
+
|
9
|
+
Add this line to your application's Gemfile:
|
10
|
+
|
11
|
+
gem 'mapreduce'
|
12
|
+
|
13
|
+
And then execute:
|
14
|
+
|
15
|
+
$ bundle
|
16
|
+
|
17
|
+
Or install it yourself as:
|
18
|
+
|
19
|
+
$ gem install mapreduce
|
20
|
+
|
21
|
+
## Usage
|
22
|
+
|
23
|
+
TODO
|
24
|
+
|
25
|
+
## Contributing
|
26
|
+
|
27
|
+
1. Fork it
|
28
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
29
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
30
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
31
|
+
5. Create new Pull Request
|
data/Rakefile
ADDED
data/lib/map_reduce.rb
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
require "map_reduce/version"
|
2
|
+
require "digest/sha1"
|
3
|
+
require "em-zmq-tp10"
|
4
|
+
require "logger"
|
5
|
+
|
6
|
+
module MapReduce
|
7
|
+
DEFAULT_SOCKET = "ipc:///dev/shm/master.sock"
|
8
|
+
|
9
|
+
extend self
|
10
|
+
|
11
|
+
def logger
|
12
|
+
@logger ||= begin
|
13
|
+
log = Logger.new(STDOUT)
|
14
|
+
log.formatter = Logger::Formatter.new
|
15
|
+
log
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
require File.expand_path("../map_reduce/master", __FILE__)
|
21
|
+
require File.expand_path("../map_reduce/worker", __FILE__)
|
@@ -0,0 +1,180 @@
|
|
1
|
+
require File.expand_path("../socket/master", __FILE__)
|
2
|
+
|
3
|
+
module MapReduce
|
4
|
+
class Master
|
5
|
+
# How often data will be flushed to disk
|
6
|
+
FLUSH_TIMEOUT = 1
|
7
|
+
# How many lines should be parsed by one iteration of grouping
|
8
|
+
GROUP_LINES = 100
|
9
|
+
# How many seconds should we sleep if grouping is going faster then reducing
|
10
|
+
GROUP_TIMEOUT = 1
|
11
|
+
# How many keys should be stored before timeout happend
|
12
|
+
GROUP_MAX = 10_000
|
13
|
+
|
14
|
+
# Valid options:
|
15
|
+
# * socket - socket address to bind
|
16
|
+
# default is 'ipc:///dev/shm/master.sock'
|
17
|
+
# * log_folder - folder to store recieved MAP data
|
18
|
+
# default is '/tmp/mapreduce/'
|
19
|
+
# * workers - count of workers that will emit data.
|
20
|
+
# default is :auto,
|
21
|
+
# but in small jobs it is better to define in explicitly,
|
22
|
+
# because if one worker will stop before others start
|
23
|
+
# master will decide that map job is done and will start reducing
|
24
|
+
# * delimiter - master log stores data like "key{delimiter}values"
|
25
|
+
# so to prevent collisions you can specify your own uniq delimiter
|
26
|
+
# default is a pipe "|"
|
27
|
+
#
|
28
|
+
def initialize(opts = {})
|
29
|
+
# Socket addr to bind
|
30
|
+
@socket_addr = opts[:socket] || ::MapReduce::DEFAULT_SOCKET
|
31
|
+
# Folder to write logs
|
32
|
+
@log_folder = opts[:log_folder] || "/tmp/mapreduce/"
|
33
|
+
# How many MapReduce workers will emit data
|
34
|
+
@workers = opts[:workers] || 1
|
35
|
+
# Delimiter to store key/value pairs in log
|
36
|
+
@delimiter = opts[:delimiter] || "|"
|
37
|
+
|
38
|
+
@log = []
|
39
|
+
@data = []
|
40
|
+
@workers_envelopes = {}
|
41
|
+
@log_filename = File.join(@log_folder, "master-#{Process.pid}.log")
|
42
|
+
@sorted_log_filename = File.join(@log_folder, "master-#{Process.pid}_sorted.log")
|
43
|
+
|
44
|
+
FileUtils.mkdir_p(@log_folder)
|
45
|
+
FileUtils.touch(@log_filename)
|
46
|
+
end
|
47
|
+
|
48
|
+
# Start Eventloop
|
49
|
+
#
|
50
|
+
def run
|
51
|
+
EM.run do
|
52
|
+
# Init socket
|
53
|
+
master_socket
|
54
|
+
|
55
|
+
# Init flushing timer
|
56
|
+
flush
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
# Stop Eventloop
|
61
|
+
#
|
62
|
+
def stop
|
63
|
+
EM.stop
|
64
|
+
end
|
65
|
+
|
66
|
+
# Store data in log array till flush
|
67
|
+
#
|
68
|
+
def map(key, message)
|
69
|
+
@log << "#{key}#{@delimiter}#{message}"
|
70
|
+
end
|
71
|
+
|
72
|
+
# Send data back to worker.
|
73
|
+
# Last item in data is last unfinished session,
|
74
|
+
# so till the end of file reading we don't send it
|
75
|
+
#
|
76
|
+
def reduce(envelope)
|
77
|
+
if @data.size >= 2
|
78
|
+
data = @data.shift
|
79
|
+
data = data.flatten
|
80
|
+
master_socket.send_reply(data, envelope)
|
81
|
+
elsif @reduce_stop
|
82
|
+
data = @data.shift
|
83
|
+
data = data.flatten if data
|
84
|
+
master_socket.send_reply(data, envelope)
|
85
|
+
else
|
86
|
+
EM.add_timer(1) do
|
87
|
+
reduce(envelope)
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
# Openning log file for read/write
|
93
|
+
#
|
94
|
+
def log_file
|
95
|
+
@log_file ||= begin
|
96
|
+
File.open(@log_filename, "w+")
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
# Openning sorted log for reading
|
101
|
+
#
|
102
|
+
def sorted_log_file
|
103
|
+
@sorted_log_file ||= begin
|
104
|
+
File.open(@sorted_log_filename, "r")
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
# Flushing data to disk once per FLUSH_TIMEOUT seconds
|
109
|
+
#
|
110
|
+
def flush
|
111
|
+
if @log.any?
|
112
|
+
log_file << @log*"\n"
|
113
|
+
log_file.flush
|
114
|
+
@log.clear
|
115
|
+
end
|
116
|
+
|
117
|
+
EM.add_timer(FLUSH_TIMEOUT) do
|
118
|
+
flush
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
# Sorting log.
|
123
|
+
# Linux sort is the fastest way to sort big file.
|
124
|
+
# Deleting original log after sort.
|
125
|
+
#
|
126
|
+
def sort
|
127
|
+
`sort #{@log_filename} -o #{@sorted_log_filename}`
|
128
|
+
FileUtils.rm(@log_filename)
|
129
|
+
end
|
130
|
+
|
131
|
+
# Start reducing part.
|
132
|
+
# First, flushing rest of log to disk.
|
133
|
+
# Then sort data.
|
134
|
+
# Then start to read/group data
|
135
|
+
#
|
136
|
+
def reduce!
|
137
|
+
flush
|
138
|
+
sort
|
139
|
+
|
140
|
+
iter = sorted_log_file.each_line
|
141
|
+
group iter
|
142
|
+
end
|
143
|
+
|
144
|
+
# Reading sorted data and grouping by key.
|
145
|
+
# If queue (@data) is growing faster then workers grad data we pause reading file.
|
146
|
+
#
|
147
|
+
def group(iter)
|
148
|
+
if @data.size >= GROUP_MAX
|
149
|
+
EM.add_timer(GROUP_TIMEOUT){ group(iter) }
|
150
|
+
else
|
151
|
+
GROUP_LINES.times do
|
152
|
+
line = iter.next.chomp
|
153
|
+
key, msg = line.split(@delimiter)
|
154
|
+
|
155
|
+
last = @data.last
|
156
|
+
if last && last[0] == key
|
157
|
+
last[1] << msg
|
158
|
+
else
|
159
|
+
@data << [key, [msg]]
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
EM.next_tick{ group(iter) }
|
164
|
+
end
|
165
|
+
rescue StopIteration => e
|
166
|
+
FileUtils.rm(@sorted_log_filename)
|
167
|
+
@reduce_stop = true
|
168
|
+
end
|
169
|
+
|
170
|
+
# Initializing and binding socket
|
171
|
+
#
|
172
|
+
def master_socket
|
173
|
+
@master_socket ||= begin
|
174
|
+
sock = MapReduce::Socket::Master.new self, @workers
|
175
|
+
sock.bind @socket_addr
|
176
|
+
sock
|
177
|
+
end
|
178
|
+
end
|
179
|
+
end
|
180
|
+
end
|
@@ -0,0 +1,100 @@
|
|
1
|
+
# Reply socket.
|
2
|
+
# Master accepts "map", "map_finished", and "reduce" messages.
|
3
|
+
# For "map" messages it didn't actually replies,
|
4
|
+
# but for "reduce" requests it returns key with grouped values.
|
5
|
+
#
|
6
|
+
module MapReduce::Socket
|
7
|
+
class Master < EM::Protocols::Zmq2::Rep
|
8
|
+
# If worker is ready to reduce data, but we are still in MAP state
|
9
|
+
# we will sleep for REDUCE_WAIT seconds till state is not REDUCE
|
10
|
+
REDUCE_WAIT = 1
|
11
|
+
|
12
|
+
def initialize(master, workers)
|
13
|
+
@master = master
|
14
|
+
@workers = workers
|
15
|
+
|
16
|
+
@connections = {}
|
17
|
+
@state = :map
|
18
|
+
|
19
|
+
super()
|
20
|
+
end
|
21
|
+
|
22
|
+
def receive_request(message, envelope)
|
23
|
+
@connections[envelope.first] = false
|
24
|
+
|
25
|
+
type, key, msg = message
|
26
|
+
case type
|
27
|
+
when "map"
|
28
|
+
map(envelope, key, msg)
|
29
|
+
when "map_finished"
|
30
|
+
map_finished(envelope)
|
31
|
+
when "reduce"
|
32
|
+
reduce(envelope)
|
33
|
+
else
|
34
|
+
MapReduce.logger.error("Wrong message type: #{type}")
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
# Send data to log
|
39
|
+
# Someone should never MAP data when master already in REDUCE state
|
40
|
+
#
|
41
|
+
def map(envelope, key, msg)
|
42
|
+
if @state == :map
|
43
|
+
@master.map(key, msg)
|
44
|
+
ok(envelope)
|
45
|
+
else
|
46
|
+
MapReduce.logger.error("Someone tries to MAP data while state is REDUCE")
|
47
|
+
not_ok(envelope, "You can't MAP while we are reducing")
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
# When worker stops mapping data, it sends "map_finished" message.
|
52
|
+
# When all workers will send "map_finished" message reduce will begin.
|
53
|
+
#
|
54
|
+
def map_finished(envelope)
|
55
|
+
ok(envelope)
|
56
|
+
|
57
|
+
@connections[envelope.first] ||= true
|
58
|
+
@workers = @connections.size if @workers == :auto
|
59
|
+
|
60
|
+
return unless @connections.all?{ |k,v| v }
|
61
|
+
return unless @connections.size == @workers
|
62
|
+
|
63
|
+
@state = :reduce
|
64
|
+
@master.reduce!
|
65
|
+
end
|
66
|
+
|
67
|
+
# Wait till all workers stopps sending MAP.
|
68
|
+
# After all workers stopped we start REDUCE part of job.
|
69
|
+
#
|
70
|
+
def reduce(envelope)
|
71
|
+
@connections[envelope] ||= true
|
72
|
+
if @state == :reduce
|
73
|
+
@state == :map unless @master.reduce(envelope)
|
74
|
+
else
|
75
|
+
EM.add_timer(REDUCE_WAIT) do
|
76
|
+
reduce(envelope)
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
# Simple OK reply
|
82
|
+
#
|
83
|
+
def ok(envelope)
|
84
|
+
send_reply(["ok"], envelope)
|
85
|
+
end
|
86
|
+
|
87
|
+
# Simple NOT OK reply
|
88
|
+
#
|
89
|
+
def not_ok(envelope, error)
|
90
|
+
send_reply(["error", error], envelope)
|
91
|
+
end
|
92
|
+
|
93
|
+
# Switch back to :map state if reduce finished
|
94
|
+
#
|
95
|
+
def send_reply(data, envelope)
|
96
|
+
@state = :map unless data
|
97
|
+
super
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
module MapReduce::Socket
|
2
|
+
class WorkerSync < EM::Protocols::Zmq2::ReqCb
|
3
|
+
alias_method :async_send_request, :send_request
|
4
|
+
def send_request(data, &blk)
|
5
|
+
fib = Fiber.current
|
6
|
+
async_send_request(data) do |message|
|
7
|
+
fib.resume(message)
|
8
|
+
end
|
9
|
+
if block_given?
|
10
|
+
blk.call Fiber.yield
|
11
|
+
else
|
12
|
+
Fiber.yield
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,118 @@
|
|
1
|
+
# MapReduce Worker make two jobs:
|
2
|
+
# First, it maps (emits) all data to masters;
|
3
|
+
# Second, it reduces data returned form master;
|
4
|
+
#
|
5
|
+
module MapReduce
|
6
|
+
class Worker
|
7
|
+
|
8
|
+
# Valid options:
|
9
|
+
# * masters - socket addresses of masters,
|
10
|
+
# default is 'ipc:///dev/shm/master.sock'
|
11
|
+
# * type - connection type:
|
12
|
+
# ** :em - Eventmachine with callbacks (default)
|
13
|
+
# ** :sync - Synchronous type on Fibers
|
14
|
+
#
|
15
|
+
def initialize(opts = {})
|
16
|
+
@master_sockets = opts[:masters] || [::MapReduce::DEFAULT_SOCKET]
|
17
|
+
|
18
|
+
opts[:type] ||= :em
|
19
|
+
@socket_class = case opts[:type]
|
20
|
+
when :em
|
21
|
+
require File.expand_path("../socket/worker_em", __FILE__)
|
22
|
+
MapReduce::Socket::WorkerEm
|
23
|
+
when :sync
|
24
|
+
require File.expand_path("../socket/worker_sync", __FILE__)
|
25
|
+
MapReduce::Socket::WorkerSync
|
26
|
+
else
|
27
|
+
fail "Wrong Connection type. Choose :em or :sync, not #{opts[:type]}"
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
# Sends key and value to master through socket.
|
32
|
+
# Key can't be nil.
|
33
|
+
#
|
34
|
+
def emit(key, value, &blk)
|
35
|
+
fail "Key can't be nil" if key.nil?
|
36
|
+
|
37
|
+
sock = pick_socket(key)
|
38
|
+
sock.send_request(["map", key, value], &blk)
|
39
|
+
end
|
40
|
+
alias :map :emit
|
41
|
+
|
42
|
+
# Explicitly stop MAP phase.
|
43
|
+
# Master will wait till all workers will send "map_finished" message.
|
44
|
+
#
|
45
|
+
def map_finished(&blk)
|
46
|
+
all = worker_sockets.size
|
47
|
+
resp = 0
|
48
|
+
|
49
|
+
worker_sockets.each do |sock|
|
50
|
+
sock.send_request(["map_finished"]) do |msg|
|
51
|
+
blk.call if block_given? && (resp+=1) == all
|
52
|
+
end
|
53
|
+
end
|
54
|
+
["ok"]
|
55
|
+
end
|
56
|
+
|
57
|
+
# Reduce operation.
|
58
|
+
# Sends request to all masters.
|
59
|
+
# If master returns nil it means that he is already empty:
|
60
|
+
# nothing to reduce.
|
61
|
+
# Reducing till any socket returns data.
|
62
|
+
# If nothing to reduce, we return nil to client.
|
63
|
+
#
|
64
|
+
def reduce(&blk)
|
65
|
+
sock = random_socket
|
66
|
+
if sock
|
67
|
+
sock.send_request(["reduce"]) do |message|
|
68
|
+
key, *values = message
|
69
|
+
if key.nil?
|
70
|
+
remove_socket(sock)
|
71
|
+
else
|
72
|
+
blk.call(key, values)
|
73
|
+
end
|
74
|
+
reduce(&blk)
|
75
|
+
end
|
76
|
+
else
|
77
|
+
blk.call([nil])
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
private
|
82
|
+
|
83
|
+
# Connect to each master.
|
84
|
+
#
|
85
|
+
def worker_sockets
|
86
|
+
@worker_sockets ||= begin
|
87
|
+
@master_sockets.map do |addr|
|
88
|
+
sock = @socket_class.new
|
89
|
+
sock.connect addr
|
90
|
+
sock
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
# Kind of sharding
|
96
|
+
#
|
97
|
+
def pick_socket(key)
|
98
|
+
shard = if worker_sockets.size > 1
|
99
|
+
Digest::MD5.hexdigest(key.to_s).to_i(16) % worker_socket.size
|
100
|
+
else
|
101
|
+
0
|
102
|
+
end
|
103
|
+
worker_sockets[shard]
|
104
|
+
end
|
105
|
+
|
106
|
+
# Take random socket to get reduce message
|
107
|
+
#
|
108
|
+
def random_socket
|
109
|
+
worker_sockets.sample
|
110
|
+
end
|
111
|
+
|
112
|
+
# Remove socket when it is empty
|
113
|
+
#
|
114
|
+
def remove_socket(sock)
|
115
|
+
worker_sockets.delete sock
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
data/map_reduce.gemspec
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'map_reduce/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "map_reduce"
|
8
|
+
spec.version = MapReduce::VERSION
|
9
|
+
spec.authors = ["Petr Yanovich"]
|
10
|
+
spec.email = ["fl00r@yandex.ru"]
|
11
|
+
spec.description = %q{Simple distributed Map Reduce Framework on Ruby}
|
12
|
+
spec.summary = %q{Simple distributed Map Reduce Framework on Ruby}
|
13
|
+
spec.homepage = ""
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = `git ls-files`.split($/)
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_development_dependency "bundler", "~> 1.3"
|
22
|
+
spec.add_development_dependency "rake"
|
23
|
+
spec.add_development_dependency "em-synchrony"
|
24
|
+
|
25
|
+
spec.add_dependency "em-zmq-tp10"
|
26
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe "MapReduce stack" do
|
4
|
+
before do
|
5
|
+
@pid = fork do
|
6
|
+
master = MapReduce::Master.new
|
7
|
+
master.run
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
after do
|
12
|
+
Process.kill "TERM", @pid
|
13
|
+
end
|
14
|
+
|
15
|
+
it "should map and reduce some data in CB mode" do
|
16
|
+
EM.run do
|
17
|
+
data = {}
|
18
|
+
worker = MapReduce::Worker.new
|
19
|
+
worker.map("Petr", ["Radiohead", "Muse", "R.E.M."] * ',') do
|
20
|
+
worker.map("Alex", ["Madonna", "Lady Gaga"] * ',') do
|
21
|
+
worker.map("Petr", ["Radiohead", "The Beatles", "Aquarium"] * ',') do
|
22
|
+
worker.map_finished do
|
23
|
+
worker.reduce do |key, values|
|
24
|
+
if key
|
25
|
+
data[key] = values
|
26
|
+
else
|
27
|
+
data.size.must_equal 2
|
28
|
+
data["Petr"].must_equal [["Radiohead", "Muse", "R.E.M."] * ',', ["Radiohead", "The Beatles", "Aquarium"] * ',']
|
29
|
+
data["Alex"].must_equal [["Madonna", "Lady Gaga"] * ',']
|
30
|
+
|
31
|
+
EM.stop
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
it "should map and reduce some data in SYNC mode" do
|
42
|
+
EM.synchrony do
|
43
|
+
data = {}
|
44
|
+
worker = MapReduce::Worker.new type: :sync
|
45
|
+
worker.map("Petr", ["Radiohead", "Muse", "R.E.M."] * ',')
|
46
|
+
worker.map("Alex", ["Madonna", "Lady Gaga"] * ',')
|
47
|
+
worker.map("Petr", ["Radiohead", "The Beatles", "Aquarium"] * ',')
|
48
|
+
worker.map_finished
|
49
|
+
worker.reduce do |key, values|
|
50
|
+
data[key] = values if key
|
51
|
+
end
|
52
|
+
data.size.must_equal 2
|
53
|
+
data["Petr"].must_equal [["Radiohead", "Muse", "R.E.M."] * ',', ["Radiohead", "The Beatles", "Aquarium"] * ',']
|
54
|
+
data["Alex"].must_equal [["Madonna", "Lady Gaga"] * ',']
|
55
|
+
|
56
|
+
EM.stop
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
File without changes
|
File without changes
|
data/spec/spec_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,134 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: map_reduce
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1.alpha
|
5
|
+
prerelease: 6
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Petr Yanovich
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2013-05-28 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ~>
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.3'
|
20
|
+
none: false
|
21
|
+
type: :development
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ~>
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.3'
|
27
|
+
none: false
|
28
|
+
prerelease: false
|
29
|
+
name: bundler
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
requirement: !ruby/object:Gem::Requirement
|
32
|
+
requirements:
|
33
|
+
- - ! '>='
|
34
|
+
- !ruby/object:Gem::Version
|
35
|
+
version: '0'
|
36
|
+
none: false
|
37
|
+
type: :development
|
38
|
+
version_requirements: !ruby/object:Gem::Requirement
|
39
|
+
requirements:
|
40
|
+
- - ! '>='
|
41
|
+
- !ruby/object:Gem::Version
|
42
|
+
version: '0'
|
43
|
+
none: false
|
44
|
+
prerelease: false
|
45
|
+
name: rake
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
requirement: !ruby/object:Gem::Requirement
|
48
|
+
requirements:
|
49
|
+
- - ! '>='
|
50
|
+
- !ruby/object:Gem::Version
|
51
|
+
version: '0'
|
52
|
+
none: false
|
53
|
+
type: :development
|
54
|
+
version_requirements: !ruby/object:Gem::Requirement
|
55
|
+
requirements:
|
56
|
+
- - ! '>='
|
57
|
+
- !ruby/object:Gem::Version
|
58
|
+
version: '0'
|
59
|
+
none: false
|
60
|
+
prerelease: false
|
61
|
+
name: em-synchrony
|
62
|
+
- !ruby/object:Gem::Dependency
|
63
|
+
requirement: !ruby/object:Gem::Requirement
|
64
|
+
requirements:
|
65
|
+
- - ! '>='
|
66
|
+
- !ruby/object:Gem::Version
|
67
|
+
version: '0'
|
68
|
+
none: false
|
69
|
+
type: :runtime
|
70
|
+
version_requirements: !ruby/object:Gem::Requirement
|
71
|
+
requirements:
|
72
|
+
- - ! '>='
|
73
|
+
- !ruby/object:Gem::Version
|
74
|
+
version: '0'
|
75
|
+
none: false
|
76
|
+
prerelease: false
|
77
|
+
name: em-zmq-tp10
|
78
|
+
description: Simple distributed Map Reduce Framework on Ruby
|
79
|
+
email:
|
80
|
+
- fl00r@yandex.ru
|
81
|
+
executables: []
|
82
|
+
extensions: []
|
83
|
+
extra_rdoc_files: []
|
84
|
+
files:
|
85
|
+
- .gitignore
|
86
|
+
- Gemfile
|
87
|
+
- LICENSE.txt
|
88
|
+
- README.md
|
89
|
+
- Rakefile
|
90
|
+
- lib/map_reduce.rb
|
91
|
+
- lib/map_reduce/master.rb
|
92
|
+
- lib/map_reduce/socket/master.rb
|
93
|
+
- lib/map_reduce/socket/worker_em.rb
|
94
|
+
- lib/map_reduce/socket/worker_sync.rb
|
95
|
+
- lib/map_reduce/version.rb
|
96
|
+
- lib/map_reduce/worker.rb
|
97
|
+
- map_reduce.gemspec
|
98
|
+
- spec/map_reduce/map_reduce_spec.rb
|
99
|
+
- spec/map_reduce/master_spec.rb
|
100
|
+
- spec/map_reduce/worker_spec.rb
|
101
|
+
- spec/spec_helper.rb
|
102
|
+
homepage: ''
|
103
|
+
licenses:
|
104
|
+
- MIT
|
105
|
+
post_install_message:
|
106
|
+
rdoc_options: []
|
107
|
+
require_paths:
|
108
|
+
- lib
|
109
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
110
|
+
requirements:
|
111
|
+
- - ! '>='
|
112
|
+
- !ruby/object:Gem::Version
|
113
|
+
hash: 2247563880137465180
|
114
|
+
version: '0'
|
115
|
+
segments:
|
116
|
+
- 0
|
117
|
+
none: false
|
118
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
119
|
+
requirements:
|
120
|
+
- - ! '>'
|
121
|
+
- !ruby/object:Gem::Version
|
122
|
+
version: 1.3.1
|
123
|
+
none: false
|
124
|
+
requirements: []
|
125
|
+
rubyforge_project:
|
126
|
+
rubygems_version: 1.8.25
|
127
|
+
signing_key:
|
128
|
+
specification_version: 3
|
129
|
+
summary: Simple distributed Map Reduce Framework on Ruby
|
130
|
+
test_files:
|
131
|
+
- spec/map_reduce/map_reduce_spec.rb
|
132
|
+
- spec/map_reduce/master_spec.rb
|
133
|
+
- spec/map_reduce/worker_spec.rb
|
134
|
+
- spec/spec_helper.rb
|