map-reduce-ruby 2.0.0 → 2.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/Gemfile.lock +1 -1
- data/README.md +4 -7
- data/lib/map_reduce/mapper.rb +4 -1
- data/lib/map_reduce/mergeable.rb +10 -0
- data/lib/map_reduce/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4f3eeb7739b733f1abdf325ddfcf5a4c42fb257edd837952781246fcda9f48fb
|
4
|
+
data.tar.gz: f40eb08a341fc522c8f7043b74ac1c47fb00af491a107f51a6a76ca67f389629
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6e91d7a1f55f0b89d333b317ecedc7978ca29a709dfad403906f6f2835b121a0a49afb29d03f55d8d7c8aa5c1b70628400404b7bf4d590046ae2e66ed48d7abc
|
7
|
+
data.tar.gz: cc4524aec895d935b70548163e5818f8725c5a05985e55f9b8d9330c34491968bb3ec54f02466ac0748eeb130f47a40d07baefa2627b04ed10dbac1c9638b1af
|
data/CHANGELOG.md
CHANGED
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -7,8 +7,7 @@ than memory map-reduce jobs by using your local disk and some arbitrary storage
|
|
7
7
|
layer like s3. You can specify how much memory you are willing to offer and
|
8
8
|
MapReduce will use its buffers accordingly. Finally, you can use your already
|
9
9
|
existing background job system like `sidekiq` or one of its various
|
10
|
-
alternatives.
|
11
|
-
serialized as json.
|
10
|
+
alternatives.
|
12
11
|
|
13
12
|
## Installation
|
14
13
|
|
@@ -30,9 +29,7 @@ Or install it yourself as:
|
|
30
29
|
|
31
30
|
Any map-reduce job consists of an implementation of your `map` function, your
|
32
31
|
`reduce` function and worker code. So let's start with an implementation for a
|
33
|
-
word count map-reduce task which fetches txt documents from the web.
|
34
|
-
note that your keys and values can be everything that can be serialized as
|
35
|
-
json, but nothing else.
|
32
|
+
word count map-reduce task which fetches txt documents from the web.
|
36
33
|
|
37
34
|
```ruby
|
38
35
|
class WordCounter
|
@@ -68,8 +65,8 @@ class WordCountMapper
|
|
68
65
|
end
|
69
66
|
```
|
70
67
|
|
71
|
-
Please note that `MapReduce::HashPartitioner.new(16)` states that we want
|
72
|
-
the dataset into 16 partitions (i.e. 0, 1, ... 15). Finally, we need some
|
68
|
+
Please note that `MapReduce::HashPartitioner.new(16)` states that we want to
|
69
|
+
split the dataset into 16 partitions (i.e. 0, 1, ... 15). Finally, we need some
|
73
70
|
worker code to run the reduce part:
|
74
71
|
|
75
72
|
```ruby
|
data/lib/map_reduce/mapper.rb
CHANGED
@@ -71,7 +71,10 @@ module MapReduce
|
|
71
71
|
|
72
72
|
partitions = {}
|
73
73
|
|
74
|
-
|
74
|
+
chunk = k_way_merge(@chunks)
|
75
|
+
chunk = reduce_chunk(chunk, @implementation) if @implementation.respond_to?(:reduce)
|
76
|
+
|
77
|
+
chunk.each do |pair|
|
75
78
|
partition = @partitioner.call(pair[0])
|
76
79
|
|
77
80
|
(partitions[partition] ||= Tempfile.new).puts(JSON.generate(pair))
|
data/lib/map_reduce/mergeable.rb
CHANGED
@@ -20,6 +20,16 @@ module MapReduce
|
|
20
20
|
def k_way_merge(files)
|
21
21
|
return enum_for(:k_way_merge, files) unless block_given?
|
22
22
|
|
23
|
+
if files.size == 1
|
24
|
+
files.first.each_line do |line|
|
25
|
+
yield(JSON.parse(line))
|
26
|
+
end
|
27
|
+
|
28
|
+
files.each(&:rewind)
|
29
|
+
|
30
|
+
return
|
31
|
+
end
|
32
|
+
|
23
33
|
queue = PriorityQueue.new
|
24
34
|
|
25
35
|
files.each_with_index do |file, index|
|
data/lib/map_reduce/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: map-reduce-ruby
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Benjamin Vetter
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-10-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rspec
|