map-reduce-ruby 1.0.0 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 19ac70d25d6622d30398997cae0a4d6730ebc13f8bbc31df896c5acf480923c4
4
- data.tar.gz: ede2a8f736053a268175a7948324a15954475526d0589bf47416347b57a50740
3
+ metadata.gz: 68967e02da6776738d27e48228374396dc613f4ee4a48f08268b71c92764eaa0
4
+ data.tar.gz: 0f0633adb6f2c51617ea3234284b30433da4a9ceb25d167ce8eaef2527dc09db
5
5
  SHA512:
6
- metadata.gz: 650125350b5f166ccc0fb1346c77dfa9f17471ac9ae9290b4cbc4731a5baec872a810551e0b15af064b37f73c7e103ab6281d48cd839f26d4aab6780d5cd66d1
7
- data.tar.gz: 70d9f8668143cb507537e369ad0ea79d9bb9f2c33469dffccd5952116f09758a7a880831d81aa4e3b67bc548b4b36aafe321a8d553946d8734fb78c11cd1686e
6
+ metadata.gz: d27e08793121dd81d4f8cc887f350d30ec9ea4039dad3490a4c7969164413cf575bdba0aa4c767097fc4af124aa3bb17b55f6781ff03919ef7d36164b3b14fa6
7
+ data.tar.gz: 71165ebbab647370de51c709b34f8e5cc8348300a41809354472ac125ef542255ec767a4613046cb964443b698fa7ec5dc8334d40df2a64728822aab0b170b43
data/.rubocop.yml CHANGED
@@ -49,3 +49,9 @@ Layout/LineLength:
49
49
 
50
50
  Style/FrozenStringLiteralComment:
51
51
  EnforcedStyle: never
52
+
53
+ Style/ObjectThen:
54
+ Enabled: false
55
+
56
+ Gemspec/RequireMFA:
57
+ Enabled: false
data/CHANGELOG.md CHANGED
@@ -1 +1,28 @@
1
1
  # CHANGELOG
2
+
3
+ ## v2.0.0
4
+
5
+ * [BREAKING] Keys are no longer automatically converted to json before using
6
+ them for sorting
7
+ * This allows to have proper semantic sort order for numeric keys in addition
8
+ to just the clustering of keys
9
+ * Examples of valid keys: `"key"`, `["foo", 1.0]`, `["foo", ["bar"]]`
10
+ * Examples of problematic keys: `nil`, `true`, `["foo", nil]`, `{ "foo" => "bar" }`
11
+ * For migration purposes it is recommended to convert your keys to and from
12
+ json manually if you have complex keys using `JSON.generate`/`JSON.parse`:
13
+
14
+ ```ruby
15
+ class WordCounter
16
+ def map(url)
17
+ HTTP.get(url).to_s.split.each do |word|
18
+ yield(JSON.generate("key" => word), 1) # if you use a hash for the key
19
+ end
20
+ end
21
+
22
+ def reduce(json_key, count1, count2)
23
+ key = JSON.parse(json_key) # if you want to access the original key
24
+
25
+ count1 + count2
26
+ end
27
+ end
28
+ ```
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- map-reduce-ruby (1.0.0)
4
+ map-reduce-ruby (2.0.0)
5
5
  json
6
6
  lazy_priority_queue
7
7
 
@@ -9,41 +9,42 @@ GEM
9
9
  remote: https://rubygems.org/
10
10
  specs:
11
11
  ast (2.4.2)
12
- diff-lcs (1.4.4)
13
- json (2.5.1)
12
+ diff-lcs (1.5.0)
13
+ json (2.6.2)
14
14
  lazy_priority_queue (0.1.1)
15
- parallel (1.20.1)
16
- parser (3.0.0.0)
15
+ parallel (1.22.1)
16
+ parser (3.1.2.1)
17
17
  ast (~> 2.4.1)
18
- rainbow (3.0.0)
19
- regexp_parser (2.0.3)
20
- rexml (3.2.4)
21
- rspec (3.10.0)
22
- rspec-core (~> 3.10.0)
23
- rspec-expectations (~> 3.10.0)
24
- rspec-mocks (~> 3.10.0)
25
- rspec-core (3.10.1)
26
- rspec-support (~> 3.10.0)
27
- rspec-expectations (3.10.1)
18
+ rainbow (3.1.1)
19
+ regexp_parser (2.5.0)
20
+ rexml (3.2.5)
21
+ rspec (3.11.0)
22
+ rspec-core (~> 3.11.0)
23
+ rspec-expectations (~> 3.11.0)
24
+ rspec-mocks (~> 3.11.0)
25
+ rspec-core (3.11.0)
26
+ rspec-support (~> 3.11.0)
27
+ rspec-expectations (3.11.1)
28
28
  diff-lcs (>= 1.2.0, < 2.0)
29
- rspec-support (~> 3.10.0)
30
- rspec-mocks (3.10.1)
29
+ rspec-support (~> 3.11.0)
30
+ rspec-mocks (3.11.1)
31
31
  diff-lcs (>= 1.2.0, < 2.0)
32
- rspec-support (~> 3.10.0)
33
- rspec-support (3.10.1)
34
- rubocop (0.93.1)
32
+ rspec-support (~> 3.11.0)
33
+ rspec-support (3.11.1)
34
+ rubocop (1.36.0)
35
+ json (~> 2.3)
35
36
  parallel (~> 1.10)
36
- parser (>= 2.7.1.5)
37
+ parser (>= 3.1.2.1)
37
38
  rainbow (>= 2.2.2, < 4.0)
38
- regexp_parser (>= 1.8)
39
- rexml
40
- rubocop-ast (>= 0.6.0)
39
+ regexp_parser (>= 1.8, < 3.0)
40
+ rexml (>= 3.2.5, < 4.0)
41
+ rubocop-ast (>= 1.20.1, < 2.0)
41
42
  ruby-progressbar (~> 1.7)
42
- unicode-display_width (>= 1.4.0, < 2.0)
43
- rubocop-ast (1.4.1)
44
- parser (>= 2.7.1.5)
43
+ unicode-display_width (>= 1.4.0, < 3.0)
44
+ rubocop-ast (1.21.0)
45
+ parser (>= 3.1.1.0)
45
46
  ruby-progressbar (1.11.0)
46
- unicode-display_width (1.7.0)
47
+ unicode-display_width (2.3.0)
47
48
 
48
49
  PLATFORMS
49
50
  ruby
data/README.md CHANGED
@@ -120,6 +120,26 @@ mappers are finished.
120
120
 
121
121
  That's it.
122
122
 
123
+ ## Limitations for Keys
124
+
125
+ You have to make sure that your keys are properly sortable in ruby. Please
126
+ note:
127
+
128
+ ```ruby
129
+ "key" < nil # comparison of String with nil failed (ArgumentError)
130
+
131
+ false < true # undefined method `<' for false:FalseClass (NoMethodError)
132
+
133
+ 1 > "key" # comparison of Integer with String failed (ArgumentError
134
+
135
+ { "key" => "value1" } < { "key" => "value2" } #=> false
136
+ { "key" => "value1" } > { "key" => "value2" } #=> false
137
+ { "key" => "value1" } <=> { "key" => "value2" } #=> nil
138
+ ```
139
+
140
+ For those reasons, it is recommended to only use strings, numbers and arrays or
141
+ a combination of those.
142
+
123
143
  ## Internals
124
144
 
125
145
  To fully understand the performance details, the following outlines the inner
@@ -96,7 +96,7 @@ module MapReduce
96
96
  def write_chunk
97
97
  tempfile = Tempfile.new
98
98
 
99
- @buffer.sort_by! { |item| JSON.generate(item.first) }
99
+ @buffer.sort_by!(&:first)
100
100
 
101
101
  reduce_chunk(@buffer, @implementation).each do |pair|
102
102
  tempfile.puts JSON.generate(pair)
@@ -29,7 +29,7 @@ module MapReduce
29
29
 
30
30
  key, value = JSON.parse(line)
31
31
 
32
- queue.push([key, value, index], JSON.generate(key))
32
+ queue.push([key, value, index], key)
33
33
  end
34
34
 
35
35
  loop do
@@ -45,7 +45,7 @@ module MapReduce
45
45
 
46
46
  key, value = JSON.parse(line)
47
47
 
48
- queue.push([key, value, index], JSON.generate(key))
48
+ queue.push([key, value, index], key)
49
49
  end
50
50
 
51
51
  files.each(&:rewind)
@@ -1,4 +1,26 @@
1
1
  module MapReduce
2
+ # Since LazyPriorityQueue is using <= and >=, but not <=>, it does not
3
+ # support sorting array keys. Therefore we wrap the keys in SortKey, which
4
+ # provides those operators. See https://bugs.ruby-lang.org/issues/5574
5
+
6
+ class SortKey
7
+ include Comparable
8
+
9
+ attr_reader :object
10
+
11
+ def initialize(object)
12
+ @object = object
13
+ end
14
+
15
+ def <=>(other)
16
+ res = object <=> other.object
17
+
18
+ raise(ArgumentError, "Unable to compare #{@object.inspect} with #{other.object.inspect}") if res.nil?
19
+
20
+ res
21
+ end
22
+ end
23
+
2
24
  # The MapReduce::PriorityQueue implements a min priority queue using a
3
25
  # binomial heap.
4
26
 
@@ -25,7 +47,7 @@ module MapReduce
25
47
  # priority_queue.push("some object", "some key")
26
48
 
27
49
  def push(object, key)
28
- @queue.push([@sequence_number, object], key)
50
+ @queue.push([@sequence_number, object], SortKey.new(key))
29
51
 
30
52
  @sequence_number += 1
31
53
  end
@@ -1,3 +1,3 @@
1
1
  module MapReduce
2
- VERSION = "1.0.0"
2
+ VERSION = "2.0.0"
3
3
  end
@@ -7,7 +7,7 @@ Gem::Specification.new do |spec|
7
7
  spec.email = ["vetter@flakks.com"]
8
8
 
9
9
  spec.summary = "The easiest way to write distributed, larger than memory map-reduce jobs"
10
- spec.description = "The MapReduce gem is the easiest way to write custom, distributed, larger "\
10
+ spec.description = "The MapReduce gem is the easiest way to write custom, distributed, larger " \
11
11
  "than memory map-reduce jobs"
12
12
  spec.homepage = "https://github.com/mrkamel/map-reduce-ruby"
13
13
  spec.license = "MIT"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: map-reduce-ruby
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 2.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Benjamin Vetter
8
- autorequire:
8
+ autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-07-05 00:00:00.000000000 Z
11
+ date: 2022-09-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rspec
@@ -104,7 +104,7 @@ metadata:
104
104
  homepage_uri: https://github.com/mrkamel/map-reduce-ruby
105
105
  source_code_uri: https://github.com/mrkamel/map-reduce-ruby
106
106
  changelog_uri: https://github.com/mrkamel/map-reduce/blob/master/CHANGELOG.md
107
- post_install_message:
107
+ post_install_message:
108
108
  rdoc_options: []
109
109
  require_paths:
110
110
  - lib
@@ -119,8 +119,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
119
119
  - !ruby/object:Gem::Version
120
120
  version: '0'
121
121
  requirements: []
122
- rubygems_version: 3.0.3
123
- signing_key:
122
+ rubygems_version: 3.3.3
123
+ signing_key:
124
124
  specification_version: 4
125
125
  summary: The easiest way to write distributed, larger than memory map-reduce jobs
126
126
  test_files: []