map-reduce-ruby 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.github/workflows/test.yml +23 -0
- data/.gitignore +11 -0
- data/.rspec +3 -0
- data/.rubocop.yml +51 -0
- data/CHANGELOG.md +1 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +58 -0
- data/LICENSE.txt +21 -0
- data/README.md +211 -0
- data/Rakefile +10 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/lib/map-reduce-ruby.rb +1 -0
- data/lib/map_reduce.rb +16 -0
- data/lib/map_reduce/hash_partitioner.rb +32 -0
- data/lib/map_reduce/mapper.rb +113 -0
- data/lib/map_reduce/mergeable.rb +56 -0
- data/lib/map_reduce/priority_queue.rb +49 -0
- data/lib/map_reduce/reduceable.rb +38 -0
- data/lib/map_reduce/reducer.rb +107 -0
- data/lib/map_reduce/temp_path.rb +42 -0
- data/lib/map_reduce/version.rb +3 -0
- data/map-reduce-ruby.gemspec +38 -0
- metadata +126 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 19ac70d25d6622d30398997cae0a4d6730ebc13f8bbc31df896c5acf480923c4
|
4
|
+
data.tar.gz: ede2a8f736053a268175a7948324a15954475526d0589bf47416347b57a50740
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 650125350b5f166ccc0fb1346c77dfa9f17471ac9ae9290b4cbc4731a5baec872a810551e0b15af064b37f73c7e103ab6281d48cd839f26d4aab6780d5cd66d1
|
7
|
+
data.tar.gz: 70d9f8668143cb507537e369ad0ea79d9bb9f2c33469dffccd5952116f09758a7a880831d81aa4e3b67bc548b4b36aafe321a8d553946d8734fb78c11cd1686e
|
@@ -0,0 +1,23 @@
|
|
1
|
+
name: test
|
2
|
+
on: [push, pull_request]
|
3
|
+
jobs:
|
4
|
+
build:
|
5
|
+
runs-on: ubuntu-latest
|
6
|
+
strategy:
|
7
|
+
matrix:
|
8
|
+
ruby: ['2.6', '2.7', '3.0']
|
9
|
+
steps:
|
10
|
+
- uses: actions/checkout@v1
|
11
|
+
- uses: actions/setup-ruby@v1
|
12
|
+
with:
|
13
|
+
ruby-version: ${{ matrix.ruby }}
|
14
|
+
- uses: actions/cache@v1
|
15
|
+
id: cache
|
16
|
+
with:
|
17
|
+
path: vendor/bundler
|
18
|
+
key: ${{ hashFiles('Gemfile.lock') }}-${{ matrix.ruby }}
|
19
|
+
- run: |
|
20
|
+
gem install bundler
|
21
|
+
bundle install --path=vendor/bundler
|
22
|
+
bundle exec rspec
|
23
|
+
bundle exec rubocop
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.rubocop.yml
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
AllCops:
|
2
|
+
NewCops: enable
|
3
|
+
|
4
|
+
Naming/FileName:
|
5
|
+
Exclude:
|
6
|
+
- lib/map-reduce-ruby.rb
|
7
|
+
|
8
|
+
Style/StringConcatenation:
|
9
|
+
Exclude:
|
10
|
+
- spec/**/*.rb
|
11
|
+
|
12
|
+
Metrics/BlockLength:
|
13
|
+
Enabled: false
|
14
|
+
|
15
|
+
Gemspec/RequiredRubyVersion:
|
16
|
+
Enabled: false
|
17
|
+
|
18
|
+
Style/MutableConstant:
|
19
|
+
Enabled: false
|
20
|
+
|
21
|
+
Metrics/MethodLength:
|
22
|
+
Enabled: false
|
23
|
+
|
24
|
+
Style/Documentation:
|
25
|
+
Enabled: false
|
26
|
+
|
27
|
+
Style/NumericPredicate:
|
28
|
+
Enabled: false
|
29
|
+
|
30
|
+
Metrics/AbcSize:
|
31
|
+
Enabled: false
|
32
|
+
|
33
|
+
Metrics/CyclomaticComplexity:
|
34
|
+
Enabled: false
|
35
|
+
|
36
|
+
Metrics/PerceivedComplexity:
|
37
|
+
Enabled: false
|
38
|
+
|
39
|
+
Style/StringLiterals:
|
40
|
+
Enabled: true
|
41
|
+
EnforcedStyle: double_quotes
|
42
|
+
|
43
|
+
Style/StringLiteralsInInterpolation:
|
44
|
+
Enabled: true
|
45
|
+
EnforcedStyle: double_quotes
|
46
|
+
|
47
|
+
Layout/LineLength:
|
48
|
+
Max: 120
|
49
|
+
|
50
|
+
Style/FrozenStringLiteralComment:
|
51
|
+
EnforcedStyle: never
|
data/CHANGELOG.md
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
# CHANGELOG
|
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
map-reduce-ruby (1.0.0)
|
5
|
+
json
|
6
|
+
lazy_priority_queue
|
7
|
+
|
8
|
+
GEM
|
9
|
+
remote: https://rubygems.org/
|
10
|
+
specs:
|
11
|
+
ast (2.4.2)
|
12
|
+
diff-lcs (1.4.4)
|
13
|
+
json (2.5.1)
|
14
|
+
lazy_priority_queue (0.1.1)
|
15
|
+
parallel (1.20.1)
|
16
|
+
parser (3.0.0.0)
|
17
|
+
ast (~> 2.4.1)
|
18
|
+
rainbow (3.0.0)
|
19
|
+
regexp_parser (2.0.3)
|
20
|
+
rexml (3.2.4)
|
21
|
+
rspec (3.10.0)
|
22
|
+
rspec-core (~> 3.10.0)
|
23
|
+
rspec-expectations (~> 3.10.0)
|
24
|
+
rspec-mocks (~> 3.10.0)
|
25
|
+
rspec-core (3.10.1)
|
26
|
+
rspec-support (~> 3.10.0)
|
27
|
+
rspec-expectations (3.10.1)
|
28
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
29
|
+
rspec-support (~> 3.10.0)
|
30
|
+
rspec-mocks (3.10.1)
|
31
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
32
|
+
rspec-support (~> 3.10.0)
|
33
|
+
rspec-support (3.10.1)
|
34
|
+
rubocop (0.93.1)
|
35
|
+
parallel (~> 1.10)
|
36
|
+
parser (>= 2.7.1.5)
|
37
|
+
rainbow (>= 2.2.2, < 4.0)
|
38
|
+
regexp_parser (>= 1.8)
|
39
|
+
rexml
|
40
|
+
rubocop-ast (>= 0.6.0)
|
41
|
+
ruby-progressbar (~> 1.7)
|
42
|
+
unicode-display_width (>= 1.4.0, < 2.0)
|
43
|
+
rubocop-ast (1.4.1)
|
44
|
+
parser (>= 2.7.1.5)
|
45
|
+
ruby-progressbar (1.11.0)
|
46
|
+
unicode-display_width (1.7.0)
|
47
|
+
|
48
|
+
PLATFORMS
|
49
|
+
ruby
|
50
|
+
x86_64-linux
|
51
|
+
|
52
|
+
DEPENDENCIES
|
53
|
+
map-reduce-ruby!
|
54
|
+
rspec
|
55
|
+
rubocop
|
56
|
+
|
57
|
+
BUNDLED WITH
|
58
|
+
2.2.2
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2021 Benjamin Vetter
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,211 @@
|
|
1
|
+
# MapReduce
|
2
|
+
|
3
|
+
**The easiest way to write distributed, larger than memory map-reduce jobs**
|
4
|
+
|
5
|
+
The MapReduce gem provides the easiest way to write custom, distributed, larger
|
6
|
+
than memory map-reduce jobs by using your local disk and some arbitrary storage
|
7
|
+
layer like s3. You can specify how much memory you are willing to offer and
|
8
|
+
MapReduce will use its buffers accordingly. Finally, you can use your already
|
9
|
+
existing background job system like `sidekiq` or one of its various
|
10
|
+
alternatives. Finally, your keys and values can be everything that can be
|
11
|
+
serialized as json.
|
12
|
+
|
13
|
+
## Installation
|
14
|
+
|
15
|
+
Add this line to your application's Gemfile:
|
16
|
+
|
17
|
+
```ruby
|
18
|
+
gem 'map-reduce-ruby'
|
19
|
+
```
|
20
|
+
|
21
|
+
And then execute:
|
22
|
+
|
23
|
+
$ bundle install
|
24
|
+
|
25
|
+
Or install it yourself as:
|
26
|
+
|
27
|
+
$ gem install map-reduce-ruby
|
28
|
+
|
29
|
+
## Usage
|
30
|
+
|
31
|
+
Any map-reduce job consists of an implementation of your `map` function, your
|
32
|
+
`reduce` function and worker code. So let's start with an implementation for a
|
33
|
+
word count map-reduce task which fetches txt documents from the web. Please
|
34
|
+
note that your keys and values can be everything that can be serialized as
|
35
|
+
json, but nothing else.
|
36
|
+
|
37
|
+
```ruby
|
38
|
+
class WordCounter
|
39
|
+
def map(url)
|
40
|
+
HTTP.get(url).to_s.split.each do |word|
|
41
|
+
yield(word, 1)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def reduce(word, count1, count2)
|
46
|
+
count1 + count2
|
47
|
+
end
|
48
|
+
end
|
49
|
+
```
|
50
|
+
|
51
|
+
The `#map` method takes some key, e.g. a url, and yields an arbitrary amount of
|
52
|
+
key-value pairs. The `#reduce` method takes the key as well as two values and
|
53
|
+
should return a single reduced value.
|
54
|
+
|
55
|
+
Next, we need some worker code to run the mapping part:
|
56
|
+
|
57
|
+
```ruby
|
58
|
+
class WordCountMapper
|
59
|
+
def perform(job_id, mapper_id, url)
|
60
|
+
mapper = MapReduce::Mapper.new(WordCounter.new, partitioner: MapReduce::HashPartitioner.new(16), memory_limit: 100.megabytes)
|
61
|
+
mapper.map(url)
|
62
|
+
|
63
|
+
mapper.shuffle do |partition, tempfile|
|
64
|
+
# store content of tempfile e.g. on s3:
|
65
|
+
bucket.object("map_reduce/jobs/#{job_id}/partitions/#{partition}/chunk.#{mapper_id}.json").put(body: tempfile)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
```
|
70
|
+
|
71
|
+
Please note that `MapReduce::HashPartitioner.new(16)` states that we want split
|
72
|
+
the dataset into 16 partitions (i.e. 0, 1, ... 15). Finally, we need some
|
73
|
+
worker code to run the reduce part:
|
74
|
+
|
75
|
+
```ruby
|
76
|
+
class WordCountReducer
|
77
|
+
def perform(job_id, partition)
|
78
|
+
reducer = MapReduce::Reducer.new(WordCounter.new)
|
79
|
+
|
80
|
+
# fetch all chunks of the partitions e.g. from s3:
|
81
|
+
bucket.list(prefix: "map_reduce/jobs/#{job_id}/partitions/#{partition}/").each do |object|
|
82
|
+
chunk_path = reducer.add_chunk # returns a path to a tempfile
|
83
|
+
|
84
|
+
object.download_file(temp_path)
|
85
|
+
end
|
86
|
+
|
87
|
+
reducer.reduce(chunk_limit: 32) do |word, count|
|
88
|
+
# each word with its final count
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
```
|
93
|
+
|
94
|
+
Please note that `MapReduce::Reducer#add_chunk` returns a path to a tempfile,
|
95
|
+
not a `Tempfile` object. This allows to limit the number of open file
|
96
|
+
descriptors.
|
97
|
+
|
98
|
+
To run your mappers, you can do:
|
99
|
+
|
100
|
+
```ruby
|
101
|
+
job_id = SecureRandom.hex
|
102
|
+
|
103
|
+
list_of_urls.each_with_index do |url, index|
|
104
|
+
WordCountMapper.perform_async(job_id, index, url)
|
105
|
+
end
|
106
|
+
```
|
107
|
+
|
108
|
+
And to run your reducers:
|
109
|
+
|
110
|
+
```ruby
|
111
|
+
(0..15).each do |partition|
|
112
|
+
WordCountReducer.perform_async(job_id, partition)
|
113
|
+
end
|
114
|
+
```
|
115
|
+
|
116
|
+
How to automate running the mappers and reducers in sequence, depends on your
|
117
|
+
background job system. The most simple approach is e.g. to track your mapper
|
118
|
+
state in redis and have a job to start your reducers which waits up until your
|
119
|
+
mappers are finished.
|
120
|
+
|
121
|
+
That's it.
|
122
|
+
|
123
|
+
## Internals
|
124
|
+
|
125
|
+
To fully understand the performance details, the following outlines the inner
|
126
|
+
workings of MapReduce. Of course, feel free to check the code as well.
|
127
|
+
|
128
|
+
`MapReduce::Mapper#map` calls your `map` implementation and adds each yielded
|
129
|
+
key-value pair to an internal buffer up until the memory limit is reached.
|
130
|
+
When the memory limit is reached, the buffer is sorted by key and fed through
|
131
|
+
your `reduce` implementation already, as this can greatly reduce the amount of
|
132
|
+
data already. The result is written to a tempfile. This proceeds up until all
|
133
|
+
key-value pairs are yielded. `MapReduce::Mapper#shuffle` then reads the first
|
134
|
+
key-value pair of all already sorted chunk tempfiles and adds them to a
|
135
|
+
priority queue using a binomial heap, such that with every `pop` operation on
|
136
|
+
that heap, we get items sorted by key. When the item returned by `pop` e.g.
|
137
|
+
belongs to the second chunk, then the next key-value pair of the second chunk
|
138
|
+
is subsequently read and added to the priority queue, up until no more pairs
|
139
|
+
are available. This guarantees that we sort all chunks without fully loading
|
140
|
+
them into memory and is called `k-way-merge`. With every `pop` operation, your
|
141
|
+
`reduce` implementation is continously called up until the key changes between
|
142
|
+
two calls to `pop`. When the key changes, the key is known to be fully reduced,
|
143
|
+
such that the key is hashed modulo the number of partitions and gets written to
|
144
|
+
the correct partition tempfile (when `MapReduce::HashPartitioner` is used).
|
145
|
+
|
146
|
+
The resulting partition tempfiles need to be stored in some global storage
|
147
|
+
system like s3, such that your mapper workers can upload them and the reducer
|
148
|
+
workers can download them.
|
149
|
+
|
150
|
+
`MapReduce::Reducer#add_chunk` adds and registers a new tempfile path such that
|
151
|
+
your reducer can download a mapper file for the particular partition and write
|
152
|
+
its contents to that tempfile path. `MapReduce::Reducer#reduce` finally again
|
153
|
+
builds up a priority queue and performs `k-way-merge`, feeds the key-value
|
154
|
+
pairs into your reduce implementation up until a key change between `pop`
|
155
|
+
operations occurs and yields the fully reduced key-value pair. At the end
|
156
|
+
`#reduce` removes all the tempfiles. You can pass a `chunk_limit` to
|
157
|
+
`MapReduce::Reducer#reduce`, which is most useful when you run on a system with
|
158
|
+
a limited number of open file descriptors allowed. The `chunk_limit` ensures
|
159
|
+
that only the specified amount of chunks are processed in a single run. A run
|
160
|
+
basically means: it takes up to `chunk_limit` chunks, reduces them and pushes
|
161
|
+
the result as a new chunk to the list of chunks to process. Thus, if your
|
162
|
+
number of file descriptors is unlimited, you want to set it to a higher number
|
163
|
+
to avoid the overhead of multiple runs.
|
164
|
+
|
165
|
+
## Partitioners
|
166
|
+
|
167
|
+
Partitioners are used to split the dataset into a specified amount of
|
168
|
+
partitions, which allows to parallelize the work to be done by reducers.
|
169
|
+
MapReduce comes with a `HashPartitioner`, which takes the number of partitions
|
170
|
+
as an argument and derives the partition number from the key as follows:
|
171
|
+
|
172
|
+
```ruby
|
173
|
+
class HashPartitioner
|
174
|
+
def initialize(num_partitions)
|
175
|
+
@num_partitions = num_partitions
|
176
|
+
end
|
177
|
+
|
178
|
+
def call(key)
|
179
|
+
Digest::SHA1.hexdigest(JSON.generate(key))[0..4].to_i(16) % @num_partitions
|
180
|
+
end
|
181
|
+
end
|
182
|
+
```
|
183
|
+
|
184
|
+
Thus, writing your own custom partitioner is really easy and, as it follows the
|
185
|
+
interface of callables, could even be expressed as a simple one-liner:
|
186
|
+
|
187
|
+
```ruby
|
188
|
+
MyPartitioner = proc { |key| Digest::SHA1.hexdigest(JSON.generate(key))[0..4].to_i(16) % 8 }
|
189
|
+
```
|
190
|
+
|
191
|
+
## Development
|
192
|
+
|
193
|
+
After checking out the repo, run `bin/setup` to install dependencies. Then, run
|
194
|
+
`rake spec` to run the tests. You can also run `bin/console` for an interactive
|
195
|
+
prompt that will allow you to experiment.
|
196
|
+
|
197
|
+
To install this gem onto your local machine, run `bundle exec rake install`. To
|
198
|
+
release a new version, update the version number in `version.rb`, and then run
|
199
|
+
`bundle exec rake release`, which will create a git tag for the version, push
|
200
|
+
git commits and the created tag, and push the `.gem` file to
|
201
|
+
[rubygems.org](https://rubygems.org).
|
202
|
+
|
203
|
+
## Contributing
|
204
|
+
|
205
|
+
Bug reports and pull requests are welcome on GitHub at
|
206
|
+
https://github.com/mrkamel/map-reduce-ruby
|
207
|
+
|
208
|
+
## License
|
209
|
+
|
210
|
+
The gem is available as open source under the terms of the [MIT
|
211
|
+
License](https://opensource.org/licenses/MIT).
|
data/Rakefile
ADDED
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "map_reduce"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require "irb"
|
14
|
+
IRB.start(__FILE__)
|
data/bin/setup
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require "map_reduce"
|
data/lib/map_reduce.rb
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
require "tempfile"
|
2
|
+
require "json"
|
3
|
+
require "digest"
|
4
|
+
require "fileutils"
|
5
|
+
require "tmpdir"
|
6
|
+
require "lazy_priority_queue"
|
7
|
+
require "map_reduce/version"
|
8
|
+
require "map_reduce/priority_queue"
|
9
|
+
require "map_reduce/temp_path"
|
10
|
+
require "map_reduce/mergeable"
|
11
|
+
require "map_reduce/reduceable"
|
12
|
+
require "map_reduce/hash_partitioner"
|
13
|
+
require "map_reduce/mapper"
|
14
|
+
require "map_reduce/reducer"
|
15
|
+
|
16
|
+
module MapReduce; end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
module MapReduce
|
2
|
+
# The MapReduce::HashPartitioner calculates a partition for the passed keys
|
3
|
+
# using SHA1 modulo the desired number of partitions.
|
4
|
+
|
5
|
+
class HashPartitioner
|
6
|
+
# Initializes a HashPartitioner.
|
7
|
+
#
|
8
|
+
# @param num_partitions [Fixnum] The desired number of partitions.
|
9
|
+
# Typically 8, 16, 32, 64, etc. but can be everything according to your
|
10
|
+
# needs.
|
11
|
+
#
|
12
|
+
# @example
|
13
|
+
# MapReduce::HashPartitioner.new(16)
|
14
|
+
|
15
|
+
def initialize(num_partitions)
|
16
|
+
@num_partitions = num_partitions
|
17
|
+
end
|
18
|
+
|
19
|
+
# Calculates the partition for the specified key.
|
20
|
+
#
|
21
|
+
# @param key The key to calculate the partition for. Can be everything
|
22
|
+
# that can be serialized as json.
|
23
|
+
# @returns [Integer] The partition number.
|
24
|
+
#
|
25
|
+
# @example
|
26
|
+
# partitioner.call("some key")
|
27
|
+
|
28
|
+
def call(key)
|
29
|
+
Digest::SHA1.hexdigest(JSON.generate(key))[0..4].to_i(16) % @num_partitions
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,113 @@
|
|
1
|
+
module MapReduce
|
2
|
+
# The MapReduce::Mapper class runs the mapping part of your map-reduce job.
|
3
|
+
|
4
|
+
class Mapper
|
5
|
+
include Mergeable
|
6
|
+
include Reduceable
|
7
|
+
include MonitorMixin
|
8
|
+
|
9
|
+
attr_reader :partitions
|
10
|
+
|
11
|
+
# Initializes a new mapper.
|
12
|
+
#
|
13
|
+
# @param implementation Your map-reduce implementation, i.e. an object
|
14
|
+
# which responds to #map and #reduce.
|
15
|
+
# @param partitioner [#call] A partitioner, i.e. an object which responds
|
16
|
+
# to #call and calculates a partition for the passed key.
|
17
|
+
# @param memory_limit [#to_i] The memory limit, i.e. the buffer size in
|
18
|
+
# bytes.
|
19
|
+
#
|
20
|
+
# @example
|
21
|
+
# MapReduce::Mapper.new(MyImplementation.new, partitioner: HashPartitioner.new(16), memory_limit: 100.megabytes)
|
22
|
+
|
23
|
+
def initialize(implementation, partitioner: HashPartitioner.new(32), memory_limit: 100 * 1024 * 1024)
|
24
|
+
super()
|
25
|
+
|
26
|
+
@implementation = implementation
|
27
|
+
@partitioner = partitioner
|
28
|
+
@memory_limit = memory_limit.to_i
|
29
|
+
|
30
|
+
@buffer_size = 0
|
31
|
+
@buffer = []
|
32
|
+
@chunks = []
|
33
|
+
end
|
34
|
+
|
35
|
+
# Passes the received key to your map-reduce implementation and adds
|
36
|
+
# yielded key-value pair to a buffer. When the memory limit is reached, the
|
37
|
+
# chunk is sorted and written to a tempfile.
|
38
|
+
#
|
39
|
+
# @param key The key to pass to the map-reduce implementation.
|
40
|
+
#
|
41
|
+
# @example
|
42
|
+
# mapper.map("some_key")
|
43
|
+
# mapper.map("other_key")
|
44
|
+
|
45
|
+
def map(*args, **kwargs)
|
46
|
+
@implementation.map(*args, **kwargs) do |new_key, new_value|
|
47
|
+
synchronize do
|
48
|
+
@buffer.push([new_key, new_value])
|
49
|
+
|
50
|
+
@buffer_size += JSON.generate([new_key, new_value]).bytesize
|
51
|
+
|
52
|
+
write_chunk if @buffer_size >= @memory_limit
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
# Performs a k-way-merge of the sorted chunks written to tempfiles while
|
58
|
+
# already reducing the result using your map-reduce implementation and
|
59
|
+
# splitting the dataset into partitions. Finally yields each partition with
|
60
|
+
# the tempfile containing the data of the partition.
|
61
|
+
#
|
62
|
+
# @example
|
63
|
+
# mapper.shuffle do |partition, tempfile|
|
64
|
+
# # store data e.g. on s3
|
65
|
+
# end
|
66
|
+
|
67
|
+
def shuffle(&block)
|
68
|
+
return enum_for(:shuffle) unless block_given?
|
69
|
+
|
70
|
+
write_chunk if @buffer_size > 0
|
71
|
+
|
72
|
+
partitions = {}
|
73
|
+
|
74
|
+
reduce_chunk(k_way_merge(@chunks), @implementation).each do |pair|
|
75
|
+
partition = @partitioner.call(pair[0])
|
76
|
+
|
77
|
+
(partitions[partition] ||= Tempfile.new).puts(JSON.generate(pair))
|
78
|
+
end
|
79
|
+
|
80
|
+
@chunks.each { |tempfile| tempfile.close(true) }
|
81
|
+
@chunks = []
|
82
|
+
|
83
|
+
partitions.each_value(&:rewind)
|
84
|
+
|
85
|
+
partitions.each do |partition, tempfile|
|
86
|
+
block.call(partition, tempfile)
|
87
|
+
end
|
88
|
+
|
89
|
+
partitions.each_value { |tempfile| tempfile.close(true) }
|
90
|
+
|
91
|
+
nil
|
92
|
+
end
|
93
|
+
|
94
|
+
private
|
95
|
+
|
96
|
+
def write_chunk
|
97
|
+
tempfile = Tempfile.new
|
98
|
+
|
99
|
+
@buffer.sort_by! { |item| JSON.generate(item.first) }
|
100
|
+
|
101
|
+
reduce_chunk(@buffer, @implementation).each do |pair|
|
102
|
+
tempfile.puts JSON.generate(pair)
|
103
|
+
end
|
104
|
+
|
105
|
+
tempfile.rewind
|
106
|
+
|
107
|
+
@chunks.push(tempfile)
|
108
|
+
|
109
|
+
@buffer_size = 0
|
110
|
+
@buffer = []
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
module MapReduce
|
2
|
+
# The MapReduce::Mergeable mixin provides the k-way-merge operation used by
|
3
|
+
# mappers as well as reducers.
|
4
|
+
|
5
|
+
module Mergeable
|
6
|
+
private
|
7
|
+
|
8
|
+
# Performs the k-way-merge of the passed files using a priority queue using
|
9
|
+
# a binomial heap. The content of the passed files needs to be sorted. It
|
10
|
+
# starts by reading one item of each file and adding it to the priority
|
11
|
+
# queue. Afterwards, it continously pops an item from the queue, yields it
|
12
|
+
# and reads a new item from the file the popped item belongs to, adding the
|
13
|
+
# read item to the queue. This continues up until all items from the files
|
14
|
+
# have been read. This guarantees that the yielded key-value pairs are
|
15
|
+
# sorted without having all items in-memory.
|
16
|
+
#
|
17
|
+
# @param files [IO, Tempfile] The files to run the k-way-merge for. The
|
18
|
+
# content of the files must be sorted.
|
19
|
+
|
20
|
+
def k_way_merge(files)
|
21
|
+
return enum_for(:k_way_merge, files) unless block_given?
|
22
|
+
|
23
|
+
queue = PriorityQueue.new
|
24
|
+
|
25
|
+
files.each_with_index do |file, index|
|
26
|
+
line = file.eof? ? nil : file.readline
|
27
|
+
|
28
|
+
next unless line
|
29
|
+
|
30
|
+
key, value = JSON.parse(line)
|
31
|
+
|
32
|
+
queue.push([key, value, index], JSON.generate(key))
|
33
|
+
end
|
34
|
+
|
35
|
+
loop do
|
36
|
+
key, value, index = queue.pop
|
37
|
+
|
38
|
+
return unless index
|
39
|
+
|
40
|
+
yield([key, value])
|
41
|
+
|
42
|
+
line = files[index].yield_self { |file| file.eof? ? nil : file.readline }
|
43
|
+
|
44
|
+
next unless line
|
45
|
+
|
46
|
+
key, value = JSON.parse(line)
|
47
|
+
|
48
|
+
queue.push([key, value, index], JSON.generate(key))
|
49
|
+
end
|
50
|
+
|
51
|
+
files.each(&:rewind)
|
52
|
+
|
53
|
+
nil
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
module MapReduce
|
2
|
+
# The MapReduce::PriorityQueue implements a min priority queue using a
|
3
|
+
# binomial heap.
|
4
|
+
|
5
|
+
class PriorityQueue
|
6
|
+
# Initializes the priority queue.
|
7
|
+
#
|
8
|
+
# @example
|
9
|
+
# MapReduce::PriorityQueue.new
|
10
|
+
|
11
|
+
def initialize
|
12
|
+
@queue = MinPriorityQueue.new
|
13
|
+
@sequence_number = 0
|
14
|
+
end
|
15
|
+
|
16
|
+
# Adds a new item to the priority queue while the key is used for sorting.
|
17
|
+
# The object and key can basically be everything, but the key must be some
|
18
|
+
# comparable object.
|
19
|
+
#
|
20
|
+
# @param object The object to add to the priority queue.
|
21
|
+
# @param key The key to use for sorting.
|
22
|
+
#
|
23
|
+
# @example
|
24
|
+
# priority_queue = MapReduce::PriorityQueue.new
|
25
|
+
# priority_queue.push("some object", "some key")
|
26
|
+
|
27
|
+
def push(object, key)
|
28
|
+
@queue.push([@sequence_number, object], key)
|
29
|
+
|
30
|
+
@sequence_number += 1
|
31
|
+
end
|
32
|
+
|
33
|
+
# Pops the min item from the queue.
|
34
|
+
#
|
35
|
+
# @returns The popped object.
|
36
|
+
#
|
37
|
+
# @example
|
38
|
+
# priority_queue = MapReduce::PriorityQueue.new
|
39
|
+
# priority_queue.push("object1", "key1")
|
40
|
+
# priority_queue.push("object2", "key2")
|
41
|
+
# priority_queue.pop
|
42
|
+
|
43
|
+
def pop
|
44
|
+
_, object = @queue.pop
|
45
|
+
|
46
|
+
object
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
module MapReduce
|
2
|
+
# The MapReduce::Reduceable mixin allows to reduce an arbitrary chunk using
|
3
|
+
# the specified map-reduce implementation.
|
4
|
+
|
5
|
+
module Reduceable
|
6
|
+
private
|
7
|
+
|
8
|
+
# Reduces the specified chunk, e.g. some enumerable, using the specified
|
9
|
+
# map-reduce implementation using a lookahead of one to detect key changes.
|
10
|
+
# The reduce implementation is called up until a key change is detected,
|
11
|
+
# because the key change signals that the reduce operation is finished for
|
12
|
+
# the particular key, such that it will then be yielded.
|
13
|
+
#
|
14
|
+
# @param chunk The chunk to be reduced. Can e.g. be some enumerable.
|
15
|
+
# @param implementation The map-reduce implementation.
|
16
|
+
|
17
|
+
def reduce_chunk(chunk, implementation)
|
18
|
+
return enum_for(:reduce_chunk, chunk, implementation) unless block_given?
|
19
|
+
|
20
|
+
last_item = chunk.inject do |prev_item, cur_item|
|
21
|
+
prev_key = prev_item[0]
|
22
|
+
|
23
|
+
# Here we can compare without serializing the keys to json first,
|
24
|
+
# because we reduce a chunk which includes a deserialization step.
|
25
|
+
|
26
|
+
if prev_key == cur_item[0]
|
27
|
+
[prev_key, implementation.reduce(prev_key, prev_item[1], cur_item[1])]
|
28
|
+
else
|
29
|
+
yield(prev_item)
|
30
|
+
|
31
|
+
cur_item
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
yield(last_item) if last_item
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,107 @@
|
|
1
|
+
module MapReduce
|
2
|
+
# The MapReduce::Reducer class runs the reducer part of your map-reduce job.
|
3
|
+
|
4
|
+
class Reducer
|
5
|
+
include Mergeable
|
6
|
+
include Reduceable
|
7
|
+
include MonitorMixin
|
8
|
+
|
9
|
+
class InvalidChunkLimit < StandardError; end
|
10
|
+
|
11
|
+
# Initializes a new reducer.
|
12
|
+
#
|
13
|
+
# @param implementation Your map-reduce implementation, i.e. an object
|
14
|
+
# which responds to #map and #reduce.
|
15
|
+
#
|
16
|
+
# @example
|
17
|
+
# MapReduce::Reducer.new(MyImplementation.new)
|
18
|
+
|
19
|
+
def initialize(implementation)
|
20
|
+
super()
|
21
|
+
|
22
|
+
@implementation = implementation
|
23
|
+
|
24
|
+
@temp_paths ||= []
|
25
|
+
end
|
26
|
+
|
27
|
+
# Adds a chunk from the mapper-phase to the reducer by registering a
|
28
|
+
# tempfile and returning the path to that tempfile, such that you can
|
29
|
+
# download a chunk e.g. from s3 and write the content to this tempfile.
|
30
|
+
#
|
31
|
+
# @returns [String] The path to a tempfile.
|
32
|
+
#
|
33
|
+
# @example
|
34
|
+
# chunk_path = reducer.add_chunk
|
35
|
+
# File.write(chunk_path, "downloaded blob")
|
36
|
+
|
37
|
+
def add_chunk
|
38
|
+
temp_path = TempPath.new
|
39
|
+
|
40
|
+
synchronize do
|
41
|
+
@temp_paths.push(temp_path)
|
42
|
+
end
|
43
|
+
|
44
|
+
temp_path.path
|
45
|
+
end
|
46
|
+
|
47
|
+
# Performs a k-way-merge of the added chunks and yields the reduced
|
48
|
+
# key-value pairs. It performs multiple runs when more than `chunk_limit`
|
49
|
+
# chunks exist. A run means: it takes up to `chunk_limit` chunks,
|
50
|
+
# reduces them and pushes the result as a new chunk. At the end it
|
51
|
+
# removes all tempfiles, even if errors occur.
|
52
|
+
#
|
53
|
+
# @param chunk_limit [Integer] The maximum number of files to process
|
54
|
+
# during a single run. Most useful when you run on a system where the
|
55
|
+
# number of open file descriptors is limited. If your number of file
|
56
|
+
# descriptors is unlimited, you want to set it to a higher number to
|
57
|
+
# avoid the overhead of multiple runs.
|
58
|
+
#
|
59
|
+
# @example
|
60
|
+
# reducer = MapReduce::Reducer.new(MyImplementation.new)
|
61
|
+
#
|
62
|
+
# chunk1_path = reducer.add_chunk
|
63
|
+
# # write data to the file
|
64
|
+
#
|
65
|
+
# chunk2_path = reducer.add_chunk
|
66
|
+
# # write data to the file
|
67
|
+
#
|
68
|
+
# reducer.reduce(chunk_limit: 32) do |key, value|
|
69
|
+
# # ...
|
70
|
+
# end
|
71
|
+
|
72
|
+
def reduce(chunk_limit:, &block)
|
73
|
+
return enum_for(:reduce, chunk_limit: chunk_limit) unless block_given?
|
74
|
+
|
75
|
+
raise(InvalidChunkLimit, "Chunk limit must be >= 2") unless chunk_limit >= 2
|
76
|
+
|
77
|
+
begin
|
78
|
+
loop do
|
79
|
+
slice = @temp_paths.shift(chunk_limit)
|
80
|
+
files = slice.select { |temp_path| File.exist?(temp_path.path) }
|
81
|
+
.map { |temp_path| File.open(temp_path.path, "r") }
|
82
|
+
|
83
|
+
begin
|
84
|
+
if @temp_paths.empty?
|
85
|
+
reduce_chunk(k_way_merge(files), @implementation).each do |pair|
|
86
|
+
block.call(pair)
|
87
|
+
end
|
88
|
+
|
89
|
+
return
|
90
|
+
end
|
91
|
+
|
92
|
+
File.open(add_chunk, "w") do |file|
|
93
|
+
reduce_chunk(k_way_merge(files), @implementation).each do |pair|
|
94
|
+
file.puts JSON.generate(pair)
|
95
|
+
end
|
96
|
+
end
|
97
|
+
ensure
|
98
|
+
files.each(&:close)
|
99
|
+
slice.each(&:delete)
|
100
|
+
end
|
101
|
+
end
|
102
|
+
ensure
|
103
|
+
@temp_paths.each(&:delete)
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
module MapReduce
|
2
|
+
# The MapReduce::TempPath generates a tempfile path and automatically deletes
|
3
|
+
# the file when the object is garbage collected or manually deleted. Using
|
4
|
+
# this class instead of Tempfile allows to have less open file descriptors.
|
5
|
+
|
6
|
+
class TempPath
|
7
|
+
attr_reader :path
|
8
|
+
|
9
|
+
# Initializes a new tempfile path.
|
10
|
+
#
|
11
|
+
# @example
|
12
|
+
# temp_path = MapReduce::TempPath.new
|
13
|
+
# File.write(temp_path.path, "blob")
|
14
|
+
|
15
|
+
def initialize
|
16
|
+
@path = Dir::Tmpname.create("") do
|
17
|
+
# nothing
|
18
|
+
end
|
19
|
+
|
20
|
+
FileUtils.touch(@path)
|
21
|
+
|
22
|
+
ObjectSpace.define_finalizer(self, self.class.finalize(@path))
|
23
|
+
end
|
24
|
+
|
25
|
+
# @api private
|
26
|
+
|
27
|
+
def self.finalize(path)
|
28
|
+
proc { FileUtils.rm_f(path) }
|
29
|
+
end
|
30
|
+
|
31
|
+
# Allows to manually delete the tempfile.
|
32
|
+
#
|
33
|
+
# @example
|
34
|
+
# temp_path = MapReduce::TempPath.new
|
35
|
+
# File.write(temp_path.path, "blob")
|
36
|
+
# temp_path.delete
|
37
|
+
|
38
|
+
def delete
|
39
|
+
FileUtils.rm_f(path)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
require_relative "lib/map_reduce/version"
|
2
|
+
|
3
|
+
Gem::Specification.new do |spec|
|
4
|
+
spec.name = "map-reduce-ruby"
|
5
|
+
spec.version = MapReduce::VERSION
|
6
|
+
spec.authors = ["Benjamin Vetter"]
|
7
|
+
spec.email = ["vetter@flakks.com"]
|
8
|
+
|
9
|
+
spec.summary = "The easiest way to write distributed, larger than memory map-reduce jobs"
|
10
|
+
spec.description = "The MapReduce gem is the easiest way to write custom, distributed, larger "\
|
11
|
+
"than memory map-reduce jobs"
|
12
|
+
spec.homepage = "https://github.com/mrkamel/map-reduce-ruby"
|
13
|
+
spec.license = "MIT"
|
14
|
+
|
15
|
+
spec.required_ruby_version = Gem::Requirement.new(">= 2.5.0")
|
16
|
+
|
17
|
+
spec.metadata["homepage_uri"] = spec.homepage
|
18
|
+
spec.metadata["source_code_uri"] = "https://github.com/mrkamel/map-reduce-ruby"
|
19
|
+
spec.metadata["changelog_uri"] = "https://github.com/mrkamel/map-reduce/blob/master/CHANGELOG.md"
|
20
|
+
|
21
|
+
# Specify which files should be added to the gem when it is released.
|
22
|
+
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
23
|
+
spec.files = Dir.chdir(File.expand_path(__dir__)) do
|
24
|
+
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{\A(?:test|spec|features)/}) }
|
25
|
+
end
|
26
|
+
spec.bindir = "exe"
|
27
|
+
spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
|
28
|
+
spec.require_paths = ["lib"]
|
29
|
+
|
30
|
+
spec.add_development_dependency "rspec"
|
31
|
+
spec.add_development_dependency "rubocop"
|
32
|
+
|
33
|
+
spec.add_dependency "json"
|
34
|
+
spec.add_dependency "lazy_priority_queue"
|
35
|
+
|
36
|
+
# For more information and examples about making a new gem, checkout our
|
37
|
+
# guide at: https://bundler.io/guides/creating_gem.html
|
38
|
+
end
|
metadata
ADDED
@@ -0,0 +1,126 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: map-reduce-ruby
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Benjamin Vetter
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2021-07-05 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: rspec
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rubocop
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: json
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: lazy_priority_queue
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
description: The MapReduce gem is the easiest way to write custom, distributed, larger
|
70
|
+
than memory map-reduce jobs
|
71
|
+
email:
|
72
|
+
- vetter@flakks.com
|
73
|
+
executables: []
|
74
|
+
extensions: []
|
75
|
+
extra_rdoc_files: []
|
76
|
+
files:
|
77
|
+
- ".github/workflows/test.yml"
|
78
|
+
- ".gitignore"
|
79
|
+
- ".rspec"
|
80
|
+
- ".rubocop.yml"
|
81
|
+
- CHANGELOG.md
|
82
|
+
- Gemfile
|
83
|
+
- Gemfile.lock
|
84
|
+
- LICENSE.txt
|
85
|
+
- README.md
|
86
|
+
- Rakefile
|
87
|
+
- bin/console
|
88
|
+
- bin/setup
|
89
|
+
- lib/map-reduce-ruby.rb
|
90
|
+
- lib/map_reduce.rb
|
91
|
+
- lib/map_reduce/hash_partitioner.rb
|
92
|
+
- lib/map_reduce/mapper.rb
|
93
|
+
- lib/map_reduce/mergeable.rb
|
94
|
+
- lib/map_reduce/priority_queue.rb
|
95
|
+
- lib/map_reduce/reduceable.rb
|
96
|
+
- lib/map_reduce/reducer.rb
|
97
|
+
- lib/map_reduce/temp_path.rb
|
98
|
+
- lib/map_reduce/version.rb
|
99
|
+
- map-reduce-ruby.gemspec
|
100
|
+
homepage: https://github.com/mrkamel/map-reduce-ruby
|
101
|
+
licenses:
|
102
|
+
- MIT
|
103
|
+
metadata:
|
104
|
+
homepage_uri: https://github.com/mrkamel/map-reduce-ruby
|
105
|
+
source_code_uri: https://github.com/mrkamel/map-reduce-ruby
|
106
|
+
changelog_uri: https://github.com/mrkamel/map-reduce/blob/master/CHANGELOG.md
|
107
|
+
post_install_message:
|
108
|
+
rdoc_options: []
|
109
|
+
require_paths:
|
110
|
+
- lib
|
111
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
112
|
+
requirements:
|
113
|
+
- - ">="
|
114
|
+
- !ruby/object:Gem::Version
|
115
|
+
version: 2.5.0
|
116
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
117
|
+
requirements:
|
118
|
+
- - ">="
|
119
|
+
- !ruby/object:Gem::Version
|
120
|
+
version: '0'
|
121
|
+
requirements: []
|
122
|
+
rubygems_version: 3.0.3
|
123
|
+
signing_key:
|
124
|
+
specification_version: 4
|
125
|
+
summary: The easiest way to write distributed, larger than memory map-reduce jobs
|
126
|
+
test_files: []
|