map-reduce-ruby 2.0.0 → 2.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/Gemfile.lock +1 -1
- data/README.md +4 -7
- data/lib/map_reduce/mapper.rb +8 -2
- data/lib/map_reduce/mergeable.rb +10 -0
- data/lib/map_reduce/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5545309a188291db41e8f5fc24af45a8d983c5084f2233d735cab309921c928c
|
4
|
+
data.tar.gz: 779a839704ace3780a304bc7295c8b1f27e834253ee0544e9ff6ae21eda93753
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: eb577347b7e5c09dd34166e814fc0c50180b6f036fad84de3857dc93d28242be81637d5b0c7f19ea7a846c659eca0c12fcbae01cdac37c4f2bf50c6d9f8f27f6
|
7
|
+
data.tar.gz: 704b5d6a140583099c53902ceaab5af7b45c41c004f159fc215a942a4749063bf0990c2d60cde15af9663da726bab03a1258e085c0e789470150ab96caf895f7
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,13 @@
|
|
1
1
|
# CHANGELOG
|
2
2
|
|
3
|
+
## v2.1.1
|
4
|
+
|
5
|
+
* Fix in `MapReduce::Mapper` when no `reduce` implementation is given
|
6
|
+
|
7
|
+
## v2.1.0
|
8
|
+
|
9
|
+
* Do not reduce in `MapReduce::Mapper` when no `reduce` implementation is given
|
10
|
+
|
3
11
|
## v2.0.0
|
4
12
|
|
5
13
|
* [BREAKING] Keys are no longer automatically converted to json before using
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -7,8 +7,7 @@ than memory map-reduce jobs by using your local disk and some arbitrary storage
|
|
7
7
|
layer like s3. You can specify how much memory you are willing to offer and
|
8
8
|
MapReduce will use its buffers accordingly. Finally, you can use your already
|
9
9
|
existing background job system like `sidekiq` or one of its various
|
10
|
-
alternatives.
|
11
|
-
serialized as json.
|
10
|
+
alternatives.
|
12
11
|
|
13
12
|
## Installation
|
14
13
|
|
@@ -30,9 +29,7 @@ Or install it yourself as:
|
|
30
29
|
|
31
30
|
Any map-reduce job consists of an implementation of your `map` function, your
|
32
31
|
`reduce` function and worker code. So let's start with an implementation for a
|
33
|
-
word count map-reduce task which fetches txt documents from the web.
|
34
|
-
note that your keys and values can be everything that can be serialized as
|
35
|
-
json, but nothing else.
|
32
|
+
word count map-reduce task which fetches txt documents from the web.
|
36
33
|
|
37
34
|
```ruby
|
38
35
|
class WordCounter
|
@@ -68,8 +65,8 @@ class WordCountMapper
|
|
68
65
|
end
|
69
66
|
```
|
70
67
|
|
71
|
-
Please note that `MapReduce::HashPartitioner.new(16)` states that we want
|
72
|
-
the dataset into 16 partitions (i.e. 0, 1, ... 15). Finally, we need some
|
68
|
+
Please note that `MapReduce::HashPartitioner.new(16)` states that we want to
|
69
|
+
split the dataset into 16 partitions (i.e. 0, 1, ... 15). Finally, we need some
|
73
70
|
worker code to run the reduce part:
|
74
71
|
|
75
72
|
```ruby
|
data/lib/map_reduce/mapper.rb
CHANGED
@@ -71,7 +71,10 @@ module MapReduce
|
|
71
71
|
|
72
72
|
partitions = {}
|
73
73
|
|
74
|
-
|
74
|
+
chunk = k_way_merge(@chunks)
|
75
|
+
chunk = reduce_chunk(chunk, @implementation) if @implementation.respond_to?(:reduce)
|
76
|
+
|
77
|
+
chunk.each do |pair|
|
75
78
|
partition = @partitioner.call(pair[0])
|
76
79
|
|
77
80
|
(partitions[partition] ||= Tempfile.new).puts(JSON.generate(pair))
|
@@ -98,7 +101,10 @@ module MapReduce
|
|
98
101
|
|
99
102
|
@buffer.sort_by!(&:first)
|
100
103
|
|
101
|
-
|
104
|
+
chunk = @buffer
|
105
|
+
chunk = reduce_chunk(chunk, @implementation) if @implementation.respond_to?(:reduce)
|
106
|
+
|
107
|
+
chunk.each do |pair|
|
102
108
|
tempfile.puts JSON.generate(pair)
|
103
109
|
end
|
104
110
|
|
data/lib/map_reduce/mergeable.rb
CHANGED
@@ -20,6 +20,16 @@ module MapReduce
|
|
20
20
|
def k_way_merge(files)
|
21
21
|
return enum_for(:k_way_merge, files) unless block_given?
|
22
22
|
|
23
|
+
if files.size == 1
|
24
|
+
files.first.each_line do |line|
|
25
|
+
yield(JSON.parse(line))
|
26
|
+
end
|
27
|
+
|
28
|
+
files.each(&:rewind)
|
29
|
+
|
30
|
+
return
|
31
|
+
end
|
32
|
+
|
23
33
|
queue = PriorityQueue.new
|
24
34
|
|
25
35
|
files.each_with_index do |file, index|
|
data/lib/map_reduce/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: map-reduce-ruby
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Benjamin Vetter
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-10-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rspec
|