kraps 0.9.0 → 0.10.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +3 -0
- data/CHANGELOG.md +4 -0
- data/README.md +9 -0
- data/lib/kraps/actions.rb +2 -1
- data/lib/kraps/job.rb +18 -0
- data/lib/kraps/runner.rb +15 -0
- data/lib/kraps/version.rb +1 -1
- data/lib/kraps/worker.rb +51 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6ac0098a3c2f8acf49552f5b72621619345f9b59c83edd58368d7a999605f817
|
4
|
+
data.tar.gz: 3d522dd1149b57c9dce596ea4b38aa67ba5f2bc18a96bbc2840e80096e9eb192
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e859a2ecd0e635a067bde3cf3d51f76dc64702d91975342c590a43370d5a8a49da5c2272b6806550420537c74b3c7129ee0c76f58ff42ef0b154d7e8d0904417
|
7
|
+
data.tar.gz: 8e93aa69360b48b46f96a94cc2dd49d841b72fc858688c93071d46b99b86d50bf3b0758dd7f071cb50fd6ddc13cf532ed00d1ed1e4eee277d064568a11931f22
|
data/.rubocop.yml
CHANGED
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -276,6 +276,15 @@ The `key` itself is also passed to the block for the case that you need to
|
|
276
276
|
customize the reduce calculation according to the value of the key. However,
|
277
277
|
most of the time, this is not neccessary and the key can simply be ignored.
|
278
278
|
|
279
|
+
* `append`: Appends the results of 2 jobs, such that all key-value pairs
|
280
|
+
of both jobs will be in the result. `append` does not accept any block.
|
281
|
+
|
282
|
+
```ruby
|
283
|
+
job.append(other_job, worker: MyKrapsWorker, jobs: 8)
|
284
|
+
```
|
285
|
+
Please note that the partitioners and the number of partitions must match for
|
286
|
+
the jobs to be appended.
|
287
|
+
|
279
288
|
* `combine`: Combines the results of 2 jobs by combining every key available
|
280
289
|
in the current job result with the corresponding key from the passed job
|
281
290
|
result. When the passed job result does not have the corresponding key,
|
data/lib/kraps/actions.rb
CHANGED
data/lib/kraps/job.rb
CHANGED
@@ -103,6 +103,24 @@ module Kraps
|
|
103
103
|
end
|
104
104
|
end
|
105
105
|
|
106
|
+
def append(other_job, jobs: nil, worker: @worker, before: nil, &block)
|
107
|
+
fresh.tap do |job|
|
108
|
+
job.instance_eval do
|
109
|
+
@steps << Step.new(
|
110
|
+
action: Actions::APPEND,
|
111
|
+
jobs: [jobs, @partitions].compact.min,
|
112
|
+
partitions: @partitions,
|
113
|
+
partitioner: @partitioner,
|
114
|
+
worker: worker,
|
115
|
+
before: before,
|
116
|
+
block: block,
|
117
|
+
dependency: other_job,
|
118
|
+
options: { append_step_index: other_job.steps.size - 1 }
|
119
|
+
)
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
106
124
|
def each_partition(jobs: nil, worker: @worker, before: nil, &block)
|
107
125
|
fresh.tap do |job|
|
108
126
|
job.instance_eval do
|
data/lib/kraps/runner.rb
CHANGED
@@ -91,6 +91,21 @@ module Kraps
|
|
91
91
|
Frame.new(token: token, partitions: @step.partitions)
|
92
92
|
end
|
93
93
|
|
94
|
+
def perform_append
|
95
|
+
append_job = @step.dependency
|
96
|
+
append_step = append_job.steps[@step.options[:append_step_index]]
|
97
|
+
|
98
|
+
raise(IncompatibleFrame, "Incompatible number of partitions") if append_step.partitions != @step.partitions
|
99
|
+
|
100
|
+
enum = (0...@frame.partitions).map do |partition|
|
101
|
+
{ partition: partition, append_frame: append_step.frame.to_h }
|
102
|
+
end
|
103
|
+
|
104
|
+
token = push_and_wait(job_count: @step.jobs, enum: enum)
|
105
|
+
|
106
|
+
Frame.new(token: token, partitions: @step.partitions)
|
107
|
+
end
|
108
|
+
|
94
109
|
def perform_each_partition
|
95
110
|
enum = (0...@frame.partitions).map { |partition| { partition: partition } }
|
96
111
|
push_and_wait(job_count: @step.jobs, enum: enum)
|
data/lib/kraps/version.rb
CHANGED
data/lib/kraps/worker.rb
CHANGED
@@ -86,6 +86,57 @@ module Kraps
|
|
86
86
|
temp_paths&.delete
|
87
87
|
end
|
88
88
|
|
89
|
+
def perform_append(payload)
|
90
|
+
temp_paths1 = download_all(token: @args["frame"]["token"], partition: payload["partition"])
|
91
|
+
temp_paths2 = download_all(token: payload["append_frame"]["token"], partition: payload["partition"])
|
92
|
+
|
93
|
+
implementation = Object.new
|
94
|
+
implementation.define_singleton_method(:map) do |key, value, &block|
|
95
|
+
block.call(key, value)
|
96
|
+
end
|
97
|
+
|
98
|
+
subsequent_step = next_step
|
99
|
+
|
100
|
+
if subsequent_step&.action == Actions::REDUCE
|
101
|
+
implementation.define_singleton_method(:reduce) do |key, value1, value2|
|
102
|
+
subsequent_step.block.call(key, value1, value2)
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
mapper = MapReduce::Mapper.new(implementation, partitioner: partitioner, memory_limit: @memory_limit)
|
107
|
+
|
108
|
+
temp_paths1.each do |temp_path|
|
109
|
+
File.open(temp_path.path) do |stream|
|
110
|
+
stream.each_line do |line|
|
111
|
+
key, value = JSON.parse(line)
|
112
|
+
|
113
|
+
mapper.map(key, value)
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
temp_paths2.each do |temp_path|
|
119
|
+
File.open(temp_path.path) do |stream|
|
120
|
+
stream.each_line do |line|
|
121
|
+
key, value = JSON.parse(line)
|
122
|
+
|
123
|
+
mapper.map(key, value)
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
mapper.shuffle(chunk_limit: @chunk_limit) do |partitions|
|
129
|
+
Parallelizer.each(partitions.to_a, @concurrency) do |partition, path|
|
130
|
+
File.open(path) do |stream|
|
131
|
+
Kraps.driver.store(Kraps.driver.with_prefix("#{@args["token"]}/#{partition}/chunk.#{payload["partition"]}.json"), stream)
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|
135
|
+
ensure
|
136
|
+
temp_paths1&.delete
|
137
|
+
temp_paths2&.delete
|
138
|
+
end
|
139
|
+
|
89
140
|
def perform_map_partitions(payload)
|
90
141
|
temp_paths = download_all(token: @args["frame"]["token"], partition: payload["partition"])
|
91
142
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: kraps
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.10.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Benjamin Vetter
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-04-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: attachie
|