kraps 0.1.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +3 -0
- data/CHANGELOG.md +10 -0
- data/Gemfile.lock +28 -30
- data/README.md +6 -5
- data/lib/kraps/hash_partitioner.rb +7 -0
- data/lib/kraps/job.rb +3 -3
- data/lib/kraps/version.rb +1 -1
- data/lib/kraps/worker.rb +15 -7
- data/lib/kraps.rb +1 -0
- metadata +6 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f5bb601e7ee415b95b4b258a0241c25e6fe19eb3e772c06d4149afbfcfbe6c3d
|
4
|
+
data.tar.gz: cb948c05947e48d2d8e970eebbc6e2c4a5b0a88cb162ad87bf0743196f6bcaef
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1d1c5a16205c5584626fed5bca9b6c7dd6fae3b4f3c725b158e7740f6fa05a17abdcb483b43cbdad813576e2fc2c7621b89b94d61b32776d85ae774f5a4332d1
|
7
|
+
data.tar.gz: 2670dbc002633e801d8cf98fc8454c8881295f72b505bd4baf6cf0c8685a8c97a8a2dbf26e8a617c74b452ef627e820807e7af6e05b20a627fb99ce2eb216a1a
|
data/.rubocop.yml
CHANGED
data/CHANGELOG.md
ADDED
data/Gemfile.lock
CHANGED
@@ -1,22 +1,21 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
kraps (0.
|
4
|
+
kraps (0.2.0)
|
5
5
|
attachie
|
6
6
|
distributed_job
|
7
|
-
map-reduce-ruby (>=
|
7
|
+
map-reduce-ruby (>= 3.0.0)
|
8
8
|
redis
|
9
9
|
ruby-progressbar
|
10
10
|
|
11
11
|
GEM
|
12
12
|
remote: https://rubygems.org/
|
13
13
|
specs:
|
14
|
-
activesupport (
|
14
|
+
activesupport (7.0.4)
|
15
15
|
concurrent-ruby (~> 1.0, >= 1.0.2)
|
16
16
|
i18n (>= 1.6, < 2)
|
17
17
|
minitest (>= 5.1)
|
18
18
|
tzinfo (~> 2.0)
|
19
|
-
zeitwerk (~> 2.3)
|
20
19
|
ast (2.4.2)
|
21
20
|
attachie (1.2.0)
|
22
21
|
activesupport
|
@@ -24,17 +23,17 @@ GEM
|
|
24
23
|
connection_pool
|
25
24
|
mime-types
|
26
25
|
aws-eventstream (1.2.0)
|
27
|
-
aws-partitions (1.
|
28
|
-
aws-sdk-core (3.
|
26
|
+
aws-partitions (1.654.0)
|
27
|
+
aws-sdk-core (3.166.0)
|
29
28
|
aws-eventstream (~> 1, >= 1.0.2)
|
30
|
-
aws-partitions (~> 1, >= 1.
|
31
|
-
aws-sigv4 (~> 1.
|
29
|
+
aws-partitions (~> 1, >= 1.651.0)
|
30
|
+
aws-sigv4 (~> 1.5)
|
32
31
|
jmespath (~> 1, >= 1.6.1)
|
33
|
-
aws-sdk-kms (1.
|
34
|
-
aws-sdk-core (~> 3, >= 3.
|
32
|
+
aws-sdk-kms (1.59.0)
|
33
|
+
aws-sdk-core (~> 3, >= 3.165.0)
|
35
34
|
aws-sigv4 (~> 1.1)
|
36
|
-
aws-sdk-s3 (1.
|
37
|
-
aws-sdk-core (~> 3, >= 3.
|
35
|
+
aws-sdk-s3 (1.117.1)
|
36
|
+
aws-sdk-core (~> 3, >= 3.165.0)
|
38
37
|
aws-sdk-kms (~> 1)
|
39
38
|
aws-sigv4 (~> 1.4)
|
40
39
|
aws-sigv4 (1.5.2)
|
@@ -49,7 +48,7 @@ GEM
|
|
49
48
|
jmespath (1.6.1)
|
50
49
|
json (2.6.2)
|
51
50
|
lazy_priority_queue (0.1.1)
|
52
|
-
map-reduce-ruby (
|
51
|
+
map-reduce-ruby (3.0.0)
|
53
52
|
json
|
54
53
|
lazy_priority_queue
|
55
54
|
mime-types (3.4.1)
|
@@ -63,40 +62,39 @@ GEM
|
|
63
62
|
rake (13.0.6)
|
64
63
|
redis (5.0.5)
|
65
64
|
redis-client (>= 0.9.0)
|
66
|
-
redis-client (0.
|
65
|
+
redis-client (0.11.0)
|
67
66
|
connection_pool
|
68
|
-
regexp_parser (2.
|
67
|
+
regexp_parser (2.6.0)
|
69
68
|
rexml (3.2.5)
|
70
|
-
rspec (3.
|
71
|
-
rspec-core (~> 3.
|
72
|
-
rspec-expectations (~> 3.
|
73
|
-
rspec-mocks (~> 3.
|
74
|
-
rspec-core (3.
|
75
|
-
rspec-support (~> 3.
|
76
|
-
rspec-expectations (3.
|
69
|
+
rspec (3.12.0)
|
70
|
+
rspec-core (~> 3.12.0)
|
71
|
+
rspec-expectations (~> 3.12.0)
|
72
|
+
rspec-mocks (~> 3.12.0)
|
73
|
+
rspec-core (3.12.0)
|
74
|
+
rspec-support (~> 3.12.0)
|
75
|
+
rspec-expectations (3.12.0)
|
77
76
|
diff-lcs (>= 1.2.0, < 2.0)
|
78
|
-
rspec-support (~> 3.
|
79
|
-
rspec-mocks (3.
|
77
|
+
rspec-support (~> 3.12.0)
|
78
|
+
rspec-mocks (3.12.0)
|
80
79
|
diff-lcs (>= 1.2.0, < 2.0)
|
81
|
-
rspec-support (~> 3.
|
82
|
-
rspec-support (3.
|
83
|
-
rubocop (1.
|
80
|
+
rspec-support (~> 3.12.0)
|
81
|
+
rspec-support (3.12.0)
|
82
|
+
rubocop (1.38.0)
|
84
83
|
json (~> 2.3)
|
85
84
|
parallel (~> 1.10)
|
86
85
|
parser (>= 3.1.2.1)
|
87
86
|
rainbow (>= 2.2.2, < 4.0)
|
88
87
|
regexp_parser (>= 1.8, < 3.0)
|
89
88
|
rexml (>= 3.2.5, < 4.0)
|
90
|
-
rubocop-ast (>= 1.
|
89
|
+
rubocop-ast (>= 1.23.0, < 2.0)
|
91
90
|
ruby-progressbar (~> 1.7)
|
92
91
|
unicode-display_width (>= 1.4.0, < 3.0)
|
93
|
-
rubocop-ast (1.
|
92
|
+
rubocop-ast (1.23.0)
|
94
93
|
parser (>= 3.1.1.0)
|
95
94
|
ruby-progressbar (1.11.0)
|
96
95
|
tzinfo (2.0.5)
|
97
96
|
concurrent-ruby (~> 1.0)
|
98
97
|
unicode-display_width (2.3.0)
|
99
|
-
zeitwerk (2.6.1)
|
100
98
|
|
101
99
|
PLATFORMS
|
102
100
|
ruby
|
data/README.md
CHANGED
@@ -143,17 +143,18 @@ split. Kraps assigns every `key` to a partition, either using a custom
|
|
143
143
|
`partitioner` or the default built in hash partitioner. The hash partitioner
|
144
144
|
simply calculates a hash of your key modulo the number of partitions and the
|
145
145
|
resulting partition number is the partition where the respective key is
|
146
|
-
assigned to. A partitioner is a callable which gets the key
|
147
|
-
returns a partition number. The built in hash
|
148
|
-
one:
|
146
|
+
assigned to. A partitioner is a callable which gets the key and the number of
|
147
|
+
partitions as argument and returns a partition number. The built in hash
|
148
|
+
partitioner looks similar to this one:
|
149
149
|
|
150
150
|
```ruby
|
151
|
-
partitioner = proc { |key| Digest::SHA1.hexdigest(key.inspect)[0..4].to_i(16) %
|
151
|
+
partitioner = proc { |key, num_partitions| Digest::SHA1.hexdigest(key.inspect)[0..4].to_i(16) % num_partitions }
|
152
152
|
```
|
153
153
|
|
154
154
|
Please note, it's important that the partitioner and the specified number of
|
155
155
|
partitions stays in sync. When you use a custom partitioner, please make sure
|
156
|
-
that the partitioner
|
156
|
+
that the partitioner correctly returns a partition number in the range of
|
157
|
+
`0...num_partitions`.
|
157
158
|
|
158
159
|
## Datatypes
|
159
160
|
|
data/lib/kraps/job.rb
CHANGED
@@ -6,10 +6,10 @@ module Kraps
|
|
6
6
|
@worker = worker
|
7
7
|
@steps = []
|
8
8
|
@partitions = 0
|
9
|
-
@partitioner =
|
9
|
+
@partitioner = HashPartitioner.new
|
10
10
|
end
|
11
11
|
|
12
|
-
def parallelize(partitions:, partitioner:
|
12
|
+
def parallelize(partitions:, partitioner: HashPartitioner.new, worker: @worker, &block)
|
13
13
|
fresh.tap do |job|
|
14
14
|
job.instance_eval do
|
15
15
|
@partitions = partitions
|
@@ -24,7 +24,7 @@ module Kraps
|
|
24
24
|
fresh.tap do |job|
|
25
25
|
job.instance_eval do
|
26
26
|
@partitions = partitions if partitions
|
27
|
-
@partitioner = partitioner
|
27
|
+
@partitioner = partitioner if partitioner
|
28
28
|
|
29
29
|
@steps << Step.new(action: Actions::MAP, args: { partitions: @partitions, partitioner: @partitioner, worker: worker }, block: block)
|
30
30
|
end
|
data/lib/kraps/version.rb
CHANGED
data/lib/kraps/worker.rb
CHANGED
@@ -31,8 +31,12 @@ module Kraps
|
|
31
31
|
mapper = MapReduce::Mapper.new(implementation.new, partitioner: partitioner, memory_limit: @memory_limit)
|
32
32
|
mapper.map(@args["item"])
|
33
33
|
|
34
|
-
mapper.shuffle do |
|
35
|
-
|
34
|
+
mapper.shuffle(chunk_limit: @chunk_limit) do |partitions|
|
35
|
+
Parallelizer.each(partitions.to_a, @concurrency) do |partition, path|
|
36
|
+
File.open(path) do |stream|
|
37
|
+
Kraps.driver.driver.store(Kraps.driver.with_prefix("#{@args["token"]}/#{partition}/chunk.#{@args["part"]}.json"), stream, Kraps.driver.bucket)
|
38
|
+
end
|
39
|
+
end
|
36
40
|
end
|
37
41
|
end
|
38
42
|
|
@@ -68,10 +72,14 @@ module Kraps
|
|
68
72
|
end
|
69
73
|
end
|
70
74
|
|
71
|
-
mapper.shuffle do |
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
+
mapper.shuffle(chunk_limit: @chunk_limit) do |partitions|
|
76
|
+
Parallelizer.each(partitions.to_a, @concurrency) do |partition, path|
|
77
|
+
File.open(path) do |stream|
|
78
|
+
Kraps.driver.driver.store(
|
79
|
+
Kraps.driver.with_prefix("#{@args["token"]}/#{partition}/chunk.#{@args["part"]}.json"), stream, Kraps.driver.bucket
|
80
|
+
)
|
81
|
+
end
|
82
|
+
end
|
75
83
|
end
|
76
84
|
ensure
|
77
85
|
temp_paths&.unlink
|
@@ -173,7 +181,7 @@ module Kraps
|
|
173
181
|
end
|
174
182
|
|
175
183
|
def partitioner
|
176
|
-
@partitioner ||= step.args[:partitioner]
|
184
|
+
@partitioner ||= proc { |key| step.args[:partitioner].call(key, step.args[:partitions]) }
|
177
185
|
end
|
178
186
|
|
179
187
|
def distributed_job
|
data/lib/kraps.rb
CHANGED
@@ -2,6 +2,7 @@ require_relative "kraps/version"
|
|
2
2
|
require_relative "kraps/drivers"
|
3
3
|
require_relative "kraps/actions"
|
4
4
|
require_relative "kraps/parallelizer"
|
5
|
+
require_relative "kraps/hash_partitioner"
|
5
6
|
require_relative "kraps/temp_path"
|
6
7
|
require_relative "kraps/temp_paths"
|
7
8
|
require_relative "kraps/timeout_queue"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: kraps
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Benjamin Vetter
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-11-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: attachie
|
@@ -44,14 +44,14 @@ dependencies:
|
|
44
44
|
requirements:
|
45
45
|
- - ">="
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version:
|
47
|
+
version: 3.0.0
|
48
48
|
type: :runtime
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - ">="
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version:
|
54
|
+
version: 3.0.0
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: redis
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
@@ -132,6 +132,7 @@ extra_rdoc_files: []
|
|
132
132
|
files:
|
133
133
|
- ".rspec"
|
134
134
|
- ".rubocop.yml"
|
135
|
+
- CHANGELOG.md
|
135
136
|
- CODE_OF_CONDUCT.md
|
136
137
|
- Gemfile
|
137
138
|
- Gemfile.lock
|
@@ -143,6 +144,7 @@ files:
|
|
143
144
|
- lib/kraps/actions.rb
|
144
145
|
- lib/kraps/drivers.rb
|
145
146
|
- lib/kraps/frame.rb
|
147
|
+
- lib/kraps/hash_partitioner.rb
|
146
148
|
- lib/kraps/interval.rb
|
147
149
|
- lib/kraps/job.rb
|
148
150
|
- lib/kraps/parallelizer.rb
|