kraps 0.2.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 57d06d20f406e72e26424dcba5c2af296a1551085113371c2cae9894f18e72ff
4
- data.tar.gz: a25c8c3440cdd26eeb9b32655e77556e4be4966d47bd3e290d0a702c3b4cde9f
3
+ metadata.gz: f5bb601e7ee415b95b4b258a0241c25e6fe19eb3e772c06d4149afbfcfbe6c3d
4
+ data.tar.gz: cb948c05947e48d2d8e970eebbc6e2c4a5b0a88cb162ad87bf0743196f6bcaef
5
5
  SHA512:
6
- metadata.gz: 655e2d0f525e136b72e87231a8d359eda952b7e3b30a8cd38b25f7b4fba27baebedacced0a00071915166325b621437869026c6785ab7353eea928ca736bc2a7
7
- data.tar.gz: 0d450b3032a6809d2a5300e18e3cfc9a948ca5f589f1161823d9923084939d249282d28bf0619c3b9ace10dc742118dfcf5895c898e2d9fe7c55f98d00e683b4
6
+ metadata.gz: 1d1c5a16205c5584626fed5bca9b6c7dd6fae3b4f3c725b158e7740f6fa05a17abdcb483b43cbdad813576e2fc2c7621b89b94d61b32776d85ae774f5a4332d1
7
+ data.tar.gz: 2670dbc002633e801d8cf98fc8454c8881295f72b505bd4baf6cf0c8685a8c97a8a2dbf26e8a617c74b452ef627e820807e7af6e05b20a627fb99ce2eb216a1a
data/CHANGELOG.md CHANGED
@@ -1,5 +1,10 @@
1
1
  # CHANGELOG
2
2
 
3
+ ## v0.3.0
4
+
5
+ * Changed partitioners to receive the number of partitions
6
+ as second parameter
7
+
3
8
  ## v0.2.0
4
9
 
5
10
  * Updated map-reduce-ruby to allow concurrent uploads
data/README.md CHANGED
@@ -143,17 +143,18 @@ split. Kraps assigns every `key` to a partition, either using a custom
143
143
  `partitioner` or the default built in hash partitioner. The hash partitioner
144
144
  simply calculates a hash of your key modulo the number of partitions and the
145
145
  resulting partition number is the partition where the respective key is
146
- assigned to. A partitioner is a callable which gets the key as argument and
147
- returns a partition number. The built in hash partitioner looks similar to this
148
- one:
146
+ assigned to. A partitioner is a callable which gets the key and the number of
147
+ partitions as argument and returns a partition number. The built in hash
148
+ partitioner looks similar to this one:
149
149
 
150
150
  ```ruby
151
- partitioner = proc { |key| Digest::SHA1.hexdigest(key.inspect)[0..4].to_i(16) % 128 } # 128 partitions
151
+ partitioner = proc { |key, num_partitions| Digest::SHA1.hexdigest(key.inspect)[0..4].to_i(16) % num_partitions }
152
152
  ```
153
153
 
154
154
  Please note, it's important that the partitioner and the specified number of
155
155
  partitions stays in sync. When you use a custom partitioner, please make sure
156
- that the partitioner operates on the same number of partitions you specify.
156
+ that the partitioner correctly returns a partition number in the range of
157
+ `0...num_partitions`.
157
158
 
158
159
  ## Datatypes
159
160
 
@@ -0,0 +1,7 @@
1
+ module Kraps
2
+ class HashPartitioner
3
+ def call(key, num_partitions)
4
+ Digest::SHA1.hexdigest(JSON.generate(key))[0..4].to_i(16) % num_partitions
5
+ end
6
+ end
7
+ end
data/lib/kraps/job.rb CHANGED
@@ -6,10 +6,10 @@ module Kraps
6
6
  @worker = worker
7
7
  @steps = []
8
8
  @partitions = 0
9
- @partitioner = MapReduce::HashPartitioner.new(@partitions)
9
+ @partitioner = HashPartitioner.new
10
10
  end
11
11
 
12
- def parallelize(partitions:, partitioner: MapReduce::HashPartitioner.new(partitions), worker: @worker, &block)
12
+ def parallelize(partitions:, partitioner: HashPartitioner.new, worker: @worker, &block)
13
13
  fresh.tap do |job|
14
14
  job.instance_eval do
15
15
  @partitions = partitions
@@ -24,7 +24,7 @@ module Kraps
24
24
  fresh.tap do |job|
25
25
  job.instance_eval do
26
26
  @partitions = partitions if partitions
27
- @partitioner = partitioner || MapReduce::HashPartitioner.new(partitions) if partitioner || partitions
27
+ @partitioner = partitioner if partitioner
28
28
 
29
29
  @steps << Step.new(action: Actions::MAP, args: { partitions: @partitions, partitioner: @partitioner, worker: worker }, block: block)
30
30
  end
data/lib/kraps/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Kraps
2
- VERSION = "0.2.0"
2
+ VERSION = "0.3.0"
3
3
  end
data/lib/kraps/worker.rb CHANGED
@@ -181,7 +181,7 @@ module Kraps
181
181
  end
182
182
 
183
183
  def partitioner
184
- @partitioner ||= step.args[:partitioner]
184
+ @partitioner ||= proc { |key| step.args[:partitioner].call(key, step.args[:partitions]) }
185
185
  end
186
186
 
187
187
  def distributed_job
data/lib/kraps.rb CHANGED
@@ -2,6 +2,7 @@ require_relative "kraps/version"
2
2
  require_relative "kraps/drivers"
3
3
  require_relative "kraps/actions"
4
4
  require_relative "kraps/parallelizer"
5
+ require_relative "kraps/hash_partitioner"
5
6
  require_relative "kraps/temp_path"
6
7
  require_relative "kraps/temp_paths"
7
8
  require_relative "kraps/timeout_queue"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: kraps
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Benjamin Vetter
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-11-01 00:00:00.000000000 Z
11
+ date: 2022-11-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: attachie
@@ -144,6 +144,7 @@ files:
144
144
  - lib/kraps/actions.rb
145
145
  - lib/kraps/drivers.rb
146
146
  - lib/kraps/frame.rb
147
+ - lib/kraps/hash_partitioner.rb
147
148
  - lib/kraps/interval.rb
148
149
  - lib/kraps/job.rb
149
150
  - lib/kraps/parallelizer.rb