map_reduce 0.0.1.alpha
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +17 -0
- data/Gemfile +9 -0
- data/LICENSE.txt +22 -0
- data/README.md +31 -0
- data/Rakefile +11 -0
- data/lib/map_reduce.rb +21 -0
- data/lib/map_reduce/master.rb +180 -0
- data/lib/map_reduce/socket/master.rb +100 -0
- data/lib/map_reduce/socket/worker_em.rb +4 -0
- data/lib/map_reduce/socket/worker_sync.rb +16 -0
- data/lib/map_reduce/version.rb +3 -0
- data/lib/map_reduce/worker.rb +118 -0
- data/map_reduce.gemspec +26 -0
- data/spec/map_reduce/map_reduce_spec.rb +59 -0
- data/spec/map_reduce/master_spec.rb +0 -0
- data/spec/map_reduce/worker_spec.rb +0 -0
- data/spec/spec_helper.rb +7 -0
- metadata +134 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2013 Petr Yanovich
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
# MapReduce
|
2
|
+
|
3
|
+
MapReduce is a simple distributed MapReduce framework on Ruby.
|
4
|
+
|
5
|
+
Internally there are ZMQ Transport and Evenmachine.
|
6
|
+
|
7
|
+
## Installation
|
8
|
+
|
9
|
+
Add this line to your application's Gemfile:
|
10
|
+
|
11
|
+
gem 'mapreduce'
|
12
|
+
|
13
|
+
And then execute:
|
14
|
+
|
15
|
+
$ bundle
|
16
|
+
|
17
|
+
Or install it yourself as:
|
18
|
+
|
19
|
+
$ gem install mapreduce
|
20
|
+
|
21
|
+
## Usage
|
22
|
+
|
23
|
+
TODO
|
24
|
+
|
25
|
+
## Contributing
|
26
|
+
|
27
|
+
1. Fork it
|
28
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
29
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
30
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
31
|
+
5. Create new Pull Request
|
data/Rakefile
ADDED
data/lib/map_reduce.rb
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
require "map_reduce/version"
|
2
|
+
require "digest/sha1"
|
3
|
+
require "em-zmq-tp10"
|
4
|
+
require "logger"
|
5
|
+
|
6
|
+
module MapReduce
|
7
|
+
DEFAULT_SOCKET = "ipc:///dev/shm/master.sock"
|
8
|
+
|
9
|
+
extend self
|
10
|
+
|
11
|
+
def logger
|
12
|
+
@logger ||= begin
|
13
|
+
log = Logger.new(STDOUT)
|
14
|
+
log.formatter = Logger::Formatter.new
|
15
|
+
log
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
require File.expand_path("../map_reduce/master", __FILE__)
|
21
|
+
require File.expand_path("../map_reduce/worker", __FILE__)
|
@@ -0,0 +1,180 @@
|
|
1
|
+
require File.expand_path("../socket/master", __FILE__)
|
2
|
+
|
3
|
+
module MapReduce
|
4
|
+
class Master
|
5
|
+
# How often data will be flushed to disk
|
6
|
+
FLUSH_TIMEOUT = 1
|
7
|
+
# How many lines should be parsed by one iteration of grouping
|
8
|
+
GROUP_LINES = 100
|
9
|
+
# How many seconds should we sleep if grouping is going faster then reducing
|
10
|
+
GROUP_TIMEOUT = 1
|
11
|
+
# How many keys should be stored before timeout happend
|
12
|
+
GROUP_MAX = 10_000
|
13
|
+
|
14
|
+
# Valid options:
|
15
|
+
# * socket - socket address to bind
|
16
|
+
# default is 'ipc:///dev/shm/master.sock'
|
17
|
+
# * log_folder - folder to store recieved MAP data
|
18
|
+
# default is '/tmp/mapreduce/'
|
19
|
+
# * workers - count of workers that will emit data.
|
20
|
+
# default is :auto,
|
21
|
+
# but in small jobs it is better to define in explicitly,
|
22
|
+
# because if one worker will stop before others start
|
23
|
+
# master will decide that map job is done and will start reducing
|
24
|
+
# * delimiter - master log stores data like "key{delimiter}values"
|
25
|
+
# so to prevent collisions you can specify your own uniq delimiter
|
26
|
+
# default is a pipe "|"
|
27
|
+
#
|
28
|
+
def initialize(opts = {})
|
29
|
+
# Socket addr to bind
|
30
|
+
@socket_addr = opts[:socket] || ::MapReduce::DEFAULT_SOCKET
|
31
|
+
# Folder to write logs
|
32
|
+
@log_folder = opts[:log_folder] || "/tmp/mapreduce/"
|
33
|
+
# How many MapReduce workers will emit data
|
34
|
+
@workers = opts[:workers] || 1
|
35
|
+
# Delimiter to store key/value pairs in log
|
36
|
+
@delimiter = opts[:delimiter] || "|"
|
37
|
+
|
38
|
+
@log = []
|
39
|
+
@data = []
|
40
|
+
@workers_envelopes = {}
|
41
|
+
@log_filename = File.join(@log_folder, "master-#{Process.pid}.log")
|
42
|
+
@sorted_log_filename = File.join(@log_folder, "master-#{Process.pid}_sorted.log")
|
43
|
+
|
44
|
+
FileUtils.mkdir_p(@log_folder)
|
45
|
+
FileUtils.touch(@log_filename)
|
46
|
+
end
|
47
|
+
|
48
|
+
# Start Eventloop
|
49
|
+
#
|
50
|
+
def run
|
51
|
+
EM.run do
|
52
|
+
# Init socket
|
53
|
+
master_socket
|
54
|
+
|
55
|
+
# Init flushing timer
|
56
|
+
flush
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
# Stop Eventloop
|
61
|
+
#
|
62
|
+
def stop
|
63
|
+
EM.stop
|
64
|
+
end
|
65
|
+
|
66
|
+
# Store data in log array till flush
|
67
|
+
#
|
68
|
+
def map(key, message)
|
69
|
+
@log << "#{key}#{@delimiter}#{message}"
|
70
|
+
end
|
71
|
+
|
72
|
+
# Send data back to worker.
|
73
|
+
# Last item in data is last unfinished session,
|
74
|
+
# so till the end of file reading we don't send it
|
75
|
+
#
|
76
|
+
def reduce(envelope)
|
77
|
+
if @data.size >= 2
|
78
|
+
data = @data.shift
|
79
|
+
data = data.flatten
|
80
|
+
master_socket.send_reply(data, envelope)
|
81
|
+
elsif @reduce_stop
|
82
|
+
data = @data.shift
|
83
|
+
data = data.flatten if data
|
84
|
+
master_socket.send_reply(data, envelope)
|
85
|
+
else
|
86
|
+
EM.add_timer(1) do
|
87
|
+
reduce(envelope)
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
# Openning log file for read/write
|
93
|
+
#
|
94
|
+
def log_file
|
95
|
+
@log_file ||= begin
|
96
|
+
File.open(@log_filename, "w+")
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
# Openning sorted log for reading
|
101
|
+
#
|
102
|
+
def sorted_log_file
|
103
|
+
@sorted_log_file ||= begin
|
104
|
+
File.open(@sorted_log_filename, "r")
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
# Flushing data to disk once per FLUSH_TIMEOUT seconds
|
109
|
+
#
|
110
|
+
def flush
|
111
|
+
if @log.any?
|
112
|
+
log_file << @log*"\n"
|
113
|
+
log_file.flush
|
114
|
+
@log.clear
|
115
|
+
end
|
116
|
+
|
117
|
+
EM.add_timer(FLUSH_TIMEOUT) do
|
118
|
+
flush
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
# Sorting log.
|
123
|
+
# Linux sort is the fastest way to sort big file.
|
124
|
+
# Deleting original log after sort.
|
125
|
+
#
|
126
|
+
def sort
|
127
|
+
`sort #{@log_filename} -o #{@sorted_log_filename}`
|
128
|
+
FileUtils.rm(@log_filename)
|
129
|
+
end
|
130
|
+
|
131
|
+
# Start reducing part.
|
132
|
+
# First, flushing rest of log to disk.
|
133
|
+
# Then sort data.
|
134
|
+
# Then start to read/group data
|
135
|
+
#
|
136
|
+
def reduce!
|
137
|
+
flush
|
138
|
+
sort
|
139
|
+
|
140
|
+
iter = sorted_log_file.each_line
|
141
|
+
group iter
|
142
|
+
end
|
143
|
+
|
144
|
+
# Reading sorted data and grouping by key.
|
145
|
+
# If queue (@data) is growing faster then workers grad data we pause reading file.
|
146
|
+
#
|
147
|
+
def group(iter)
|
148
|
+
if @data.size >= GROUP_MAX
|
149
|
+
EM.add_timer(GROUP_TIMEOUT){ group(iter) }
|
150
|
+
else
|
151
|
+
GROUP_LINES.times do
|
152
|
+
line = iter.next.chomp
|
153
|
+
key, msg = line.split(@delimiter)
|
154
|
+
|
155
|
+
last = @data.last
|
156
|
+
if last && last[0] == key
|
157
|
+
last[1] << msg
|
158
|
+
else
|
159
|
+
@data << [key, [msg]]
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
EM.next_tick{ group(iter) }
|
164
|
+
end
|
165
|
+
rescue StopIteration => e
|
166
|
+
FileUtils.rm(@sorted_log_filename)
|
167
|
+
@reduce_stop = true
|
168
|
+
end
|
169
|
+
|
170
|
+
# Initializing and binding socket
|
171
|
+
#
|
172
|
+
def master_socket
|
173
|
+
@master_socket ||= begin
|
174
|
+
sock = MapReduce::Socket::Master.new self, @workers
|
175
|
+
sock.bind @socket_addr
|
176
|
+
sock
|
177
|
+
end
|
178
|
+
end
|
179
|
+
end
|
180
|
+
end
|
@@ -0,0 +1,100 @@
|
|
1
|
+
# Reply socket.
|
2
|
+
# Master accepts "map", "map_finished", and "reduce" messages.
|
3
|
+
# For "map" messages it didn't actually replies,
|
4
|
+
# but for "reduce" requests it returns key with grouped values.
|
5
|
+
#
|
6
|
+
module MapReduce::Socket
|
7
|
+
class Master < EM::Protocols::Zmq2::Rep
|
8
|
+
# If worker is ready to reduce data, but we are still in MAP state
|
9
|
+
# we will sleep for REDUCE_WAIT seconds till state is not REDUCE
|
10
|
+
REDUCE_WAIT = 1
|
11
|
+
|
12
|
+
def initialize(master, workers)
|
13
|
+
@master = master
|
14
|
+
@workers = workers
|
15
|
+
|
16
|
+
@connections = {}
|
17
|
+
@state = :map
|
18
|
+
|
19
|
+
super()
|
20
|
+
end
|
21
|
+
|
22
|
+
def receive_request(message, envelope)
|
23
|
+
@connections[envelope.first] = false
|
24
|
+
|
25
|
+
type, key, msg = message
|
26
|
+
case type
|
27
|
+
when "map"
|
28
|
+
map(envelope, key, msg)
|
29
|
+
when "map_finished"
|
30
|
+
map_finished(envelope)
|
31
|
+
when "reduce"
|
32
|
+
reduce(envelope)
|
33
|
+
else
|
34
|
+
MapReduce.logger.error("Wrong message type: #{type}")
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
# Send data to log
|
39
|
+
# Someone should never MAP data when master already in REDUCE state
|
40
|
+
#
|
41
|
+
def map(envelope, key, msg)
|
42
|
+
if @state == :map
|
43
|
+
@master.map(key, msg)
|
44
|
+
ok(envelope)
|
45
|
+
else
|
46
|
+
MapReduce.logger.error("Someone tries to MAP data while state is REDUCE")
|
47
|
+
not_ok(envelope, "You can't MAP while we are reducing")
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
# When worker stops mapping data, it sends "map_finished" message.
|
52
|
+
# When all workers will send "map_finished" message reduce will begin.
|
53
|
+
#
|
54
|
+
def map_finished(envelope)
|
55
|
+
ok(envelope)
|
56
|
+
|
57
|
+
@connections[envelope.first] ||= true
|
58
|
+
@workers = @connections.size if @workers == :auto
|
59
|
+
|
60
|
+
return unless @connections.all?{ |k,v| v }
|
61
|
+
return unless @connections.size == @workers
|
62
|
+
|
63
|
+
@state = :reduce
|
64
|
+
@master.reduce!
|
65
|
+
end
|
66
|
+
|
67
|
+
# Wait till all workers stopps sending MAP.
|
68
|
+
# After all workers stopped we start REDUCE part of job.
|
69
|
+
#
|
70
|
+
def reduce(envelope)
|
71
|
+
@connections[envelope] ||= true
|
72
|
+
if @state == :reduce
|
73
|
+
@state == :map unless @master.reduce(envelope)
|
74
|
+
else
|
75
|
+
EM.add_timer(REDUCE_WAIT) do
|
76
|
+
reduce(envelope)
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
# Simple OK reply
|
82
|
+
#
|
83
|
+
def ok(envelope)
|
84
|
+
send_reply(["ok"], envelope)
|
85
|
+
end
|
86
|
+
|
87
|
+
# Simple NOT OK reply
|
88
|
+
#
|
89
|
+
def not_ok(envelope, error)
|
90
|
+
send_reply(["error", error], envelope)
|
91
|
+
end
|
92
|
+
|
93
|
+
# Switch back to :map state if reduce finished
|
94
|
+
#
|
95
|
+
def send_reply(data, envelope)
|
96
|
+
@state = :map unless data
|
97
|
+
super
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
module MapReduce::Socket
|
2
|
+
class WorkerSync < EM::Protocols::Zmq2::ReqCb
|
3
|
+
alias_method :async_send_request, :send_request
|
4
|
+
def send_request(data, &blk)
|
5
|
+
fib = Fiber.current
|
6
|
+
async_send_request(data) do |message|
|
7
|
+
fib.resume(message)
|
8
|
+
end
|
9
|
+
if block_given?
|
10
|
+
blk.call Fiber.yield
|
11
|
+
else
|
12
|
+
Fiber.yield
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,118 @@
|
|
1
|
+
# MapReduce Worker make two jobs:
|
2
|
+
# First, it maps (emits) all data to masters;
|
3
|
+
# Second, it reduces data returned form master;
|
4
|
+
#
|
5
|
+
module MapReduce
|
6
|
+
class Worker
|
7
|
+
|
8
|
+
# Valid options:
|
9
|
+
# * masters - socket addresses of masters,
|
10
|
+
# default is 'ipc:///dev/shm/master.sock'
|
11
|
+
# * type - connection type:
|
12
|
+
# ** :em - Eventmachine with callbacks (default)
|
13
|
+
# ** :sync - Synchronous type on Fibers
|
14
|
+
#
|
15
|
+
def initialize(opts = {})
|
16
|
+
@master_sockets = opts[:masters] || [::MapReduce::DEFAULT_SOCKET]
|
17
|
+
|
18
|
+
opts[:type] ||= :em
|
19
|
+
@socket_class = case opts[:type]
|
20
|
+
when :em
|
21
|
+
require File.expand_path("../socket/worker_em", __FILE__)
|
22
|
+
MapReduce::Socket::WorkerEm
|
23
|
+
when :sync
|
24
|
+
require File.expand_path("../socket/worker_sync", __FILE__)
|
25
|
+
MapReduce::Socket::WorkerSync
|
26
|
+
else
|
27
|
+
fail "Wrong Connection type. Choose :em or :sync, not #{opts[:type]}"
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
# Sends key and value to master through socket.
|
32
|
+
# Key can't be nil.
|
33
|
+
#
|
34
|
+
def emit(key, value, &blk)
|
35
|
+
fail "Key can't be nil" if key.nil?
|
36
|
+
|
37
|
+
sock = pick_socket(key)
|
38
|
+
sock.send_request(["map", key, value], &blk)
|
39
|
+
end
|
40
|
+
alias :map :emit
|
41
|
+
|
42
|
+
# Explicitly stop MAP phase.
|
43
|
+
# Master will wait till all workers will send "map_finished" message.
|
44
|
+
#
|
45
|
+
def map_finished(&blk)
|
46
|
+
all = worker_sockets.size
|
47
|
+
resp = 0
|
48
|
+
|
49
|
+
worker_sockets.each do |sock|
|
50
|
+
sock.send_request(["map_finished"]) do |msg|
|
51
|
+
blk.call if block_given? && (resp+=1) == all
|
52
|
+
end
|
53
|
+
end
|
54
|
+
["ok"]
|
55
|
+
end
|
56
|
+
|
57
|
+
# Reduce operation.
|
58
|
+
# Sends request to all masters.
|
59
|
+
# If master returns nil it means that he is already empty:
|
60
|
+
# nothing to reduce.
|
61
|
+
# Reducing till any socket returns data.
|
62
|
+
# If nothing to reduce, we return nil to client.
|
63
|
+
#
|
64
|
+
def reduce(&blk)
|
65
|
+
sock = random_socket
|
66
|
+
if sock
|
67
|
+
sock.send_request(["reduce"]) do |message|
|
68
|
+
key, *values = message
|
69
|
+
if key.nil?
|
70
|
+
remove_socket(sock)
|
71
|
+
else
|
72
|
+
blk.call(key, values)
|
73
|
+
end
|
74
|
+
reduce(&blk)
|
75
|
+
end
|
76
|
+
else
|
77
|
+
blk.call([nil])
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
private
|
82
|
+
|
83
|
+
# Connect to each master.
|
84
|
+
#
|
85
|
+
def worker_sockets
|
86
|
+
@worker_sockets ||= begin
|
87
|
+
@master_sockets.map do |addr|
|
88
|
+
sock = @socket_class.new
|
89
|
+
sock.connect addr
|
90
|
+
sock
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
# Kind of sharding
|
96
|
+
#
|
97
|
+
def pick_socket(key)
|
98
|
+
shard = if worker_sockets.size > 1
|
99
|
+
Digest::MD5.hexdigest(key.to_s).to_i(16) % worker_socket.size
|
100
|
+
else
|
101
|
+
0
|
102
|
+
end
|
103
|
+
worker_sockets[shard]
|
104
|
+
end
|
105
|
+
|
106
|
+
# Take random socket to get reduce message
|
107
|
+
#
|
108
|
+
def random_socket
|
109
|
+
worker_sockets.sample
|
110
|
+
end
|
111
|
+
|
112
|
+
# Remove socket when it is empty
|
113
|
+
#
|
114
|
+
def remove_socket(sock)
|
115
|
+
worker_sockets.delete sock
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
data/map_reduce.gemspec
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'map_reduce/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "map_reduce"
|
8
|
+
spec.version = MapReduce::VERSION
|
9
|
+
spec.authors = ["Petr Yanovich"]
|
10
|
+
spec.email = ["fl00r@yandex.ru"]
|
11
|
+
spec.description = %q{Simple distributed Map Reduce Framework on Ruby}
|
12
|
+
spec.summary = %q{Simple distributed Map Reduce Framework on Ruby}
|
13
|
+
spec.homepage = ""
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = `git ls-files`.split($/)
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_development_dependency "bundler", "~> 1.3"
|
22
|
+
spec.add_development_dependency "rake"
|
23
|
+
spec.add_development_dependency "em-synchrony"
|
24
|
+
|
25
|
+
spec.add_dependency "em-zmq-tp10"
|
26
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe "MapReduce stack" do
|
4
|
+
before do
|
5
|
+
@pid = fork do
|
6
|
+
master = MapReduce::Master.new
|
7
|
+
master.run
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
after do
|
12
|
+
Process.kill "TERM", @pid
|
13
|
+
end
|
14
|
+
|
15
|
+
it "should map and reduce some data in CB mode" do
|
16
|
+
EM.run do
|
17
|
+
data = {}
|
18
|
+
worker = MapReduce::Worker.new
|
19
|
+
worker.map("Petr", ["Radiohead", "Muse", "R.E.M."] * ',') do
|
20
|
+
worker.map("Alex", ["Madonna", "Lady Gaga"] * ',') do
|
21
|
+
worker.map("Petr", ["Radiohead", "The Beatles", "Aquarium"] * ',') do
|
22
|
+
worker.map_finished do
|
23
|
+
worker.reduce do |key, values|
|
24
|
+
if key
|
25
|
+
data[key] = values
|
26
|
+
else
|
27
|
+
data.size.must_equal 2
|
28
|
+
data["Petr"].must_equal [["Radiohead", "Muse", "R.E.M."] * ',', ["Radiohead", "The Beatles", "Aquarium"] * ',']
|
29
|
+
data["Alex"].must_equal [["Madonna", "Lady Gaga"] * ',']
|
30
|
+
|
31
|
+
EM.stop
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
it "should map and reduce some data in SYNC mode" do
|
42
|
+
EM.synchrony do
|
43
|
+
data = {}
|
44
|
+
worker = MapReduce::Worker.new type: :sync
|
45
|
+
worker.map("Petr", ["Radiohead", "Muse", "R.E.M."] * ',')
|
46
|
+
worker.map("Alex", ["Madonna", "Lady Gaga"] * ',')
|
47
|
+
worker.map("Petr", ["Radiohead", "The Beatles", "Aquarium"] * ',')
|
48
|
+
worker.map_finished
|
49
|
+
worker.reduce do |key, values|
|
50
|
+
data[key] = values if key
|
51
|
+
end
|
52
|
+
data.size.must_equal 2
|
53
|
+
data["Petr"].must_equal [["Radiohead", "Muse", "R.E.M."] * ',', ["Radiohead", "The Beatles", "Aquarium"] * ',']
|
54
|
+
data["Alex"].must_equal [["Madonna", "Lady Gaga"] * ',']
|
55
|
+
|
56
|
+
EM.stop
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
File without changes
|
File without changes
|
data/spec/spec_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,134 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: map_reduce
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1.alpha
|
5
|
+
prerelease: 6
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Petr Yanovich
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2013-05-28 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ~>
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.3'
|
20
|
+
none: false
|
21
|
+
type: :development
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ~>
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.3'
|
27
|
+
none: false
|
28
|
+
prerelease: false
|
29
|
+
name: bundler
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
requirement: !ruby/object:Gem::Requirement
|
32
|
+
requirements:
|
33
|
+
- - ! '>='
|
34
|
+
- !ruby/object:Gem::Version
|
35
|
+
version: '0'
|
36
|
+
none: false
|
37
|
+
type: :development
|
38
|
+
version_requirements: !ruby/object:Gem::Requirement
|
39
|
+
requirements:
|
40
|
+
- - ! '>='
|
41
|
+
- !ruby/object:Gem::Version
|
42
|
+
version: '0'
|
43
|
+
none: false
|
44
|
+
prerelease: false
|
45
|
+
name: rake
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
requirement: !ruby/object:Gem::Requirement
|
48
|
+
requirements:
|
49
|
+
- - ! '>='
|
50
|
+
- !ruby/object:Gem::Version
|
51
|
+
version: '0'
|
52
|
+
none: false
|
53
|
+
type: :development
|
54
|
+
version_requirements: !ruby/object:Gem::Requirement
|
55
|
+
requirements:
|
56
|
+
- - ! '>='
|
57
|
+
- !ruby/object:Gem::Version
|
58
|
+
version: '0'
|
59
|
+
none: false
|
60
|
+
prerelease: false
|
61
|
+
name: em-synchrony
|
62
|
+
- !ruby/object:Gem::Dependency
|
63
|
+
requirement: !ruby/object:Gem::Requirement
|
64
|
+
requirements:
|
65
|
+
- - ! '>='
|
66
|
+
- !ruby/object:Gem::Version
|
67
|
+
version: '0'
|
68
|
+
none: false
|
69
|
+
type: :runtime
|
70
|
+
version_requirements: !ruby/object:Gem::Requirement
|
71
|
+
requirements:
|
72
|
+
- - ! '>='
|
73
|
+
- !ruby/object:Gem::Version
|
74
|
+
version: '0'
|
75
|
+
none: false
|
76
|
+
prerelease: false
|
77
|
+
name: em-zmq-tp10
|
78
|
+
description: Simple distributed Map Reduce Framework on Ruby
|
79
|
+
email:
|
80
|
+
- fl00r@yandex.ru
|
81
|
+
executables: []
|
82
|
+
extensions: []
|
83
|
+
extra_rdoc_files: []
|
84
|
+
files:
|
85
|
+
- .gitignore
|
86
|
+
- Gemfile
|
87
|
+
- LICENSE.txt
|
88
|
+
- README.md
|
89
|
+
- Rakefile
|
90
|
+
- lib/map_reduce.rb
|
91
|
+
- lib/map_reduce/master.rb
|
92
|
+
- lib/map_reduce/socket/master.rb
|
93
|
+
- lib/map_reduce/socket/worker_em.rb
|
94
|
+
- lib/map_reduce/socket/worker_sync.rb
|
95
|
+
- lib/map_reduce/version.rb
|
96
|
+
- lib/map_reduce/worker.rb
|
97
|
+
- map_reduce.gemspec
|
98
|
+
- spec/map_reduce/map_reduce_spec.rb
|
99
|
+
- spec/map_reduce/master_spec.rb
|
100
|
+
- spec/map_reduce/worker_spec.rb
|
101
|
+
- spec/spec_helper.rb
|
102
|
+
homepage: ''
|
103
|
+
licenses:
|
104
|
+
- MIT
|
105
|
+
post_install_message:
|
106
|
+
rdoc_options: []
|
107
|
+
require_paths:
|
108
|
+
- lib
|
109
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
110
|
+
requirements:
|
111
|
+
- - ! '>='
|
112
|
+
- !ruby/object:Gem::Version
|
113
|
+
hash: 2247563880137465180
|
114
|
+
version: '0'
|
115
|
+
segments:
|
116
|
+
- 0
|
117
|
+
none: false
|
118
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
119
|
+
requirements:
|
120
|
+
- - ! '>'
|
121
|
+
- !ruby/object:Gem::Version
|
122
|
+
version: 1.3.1
|
123
|
+
none: false
|
124
|
+
requirements: []
|
125
|
+
rubyforge_project:
|
126
|
+
rubygems_version: 1.8.25
|
127
|
+
signing_key:
|
128
|
+
specification_version: 3
|
129
|
+
summary: Simple distributed Map Reduce Framework on Ruby
|
130
|
+
test_files:
|
131
|
+
- spec/map_reduce/map_reduce_spec.rb
|
132
|
+
- spec/map_reduce/master_spec.rb
|
133
|
+
- spec/map_reduce/worker_spec.rb
|
134
|
+
- spec/spec_helper.rb
|