kraps 0.9.0 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +3 -0
- data/CHANGELOG.md +4 -0
- data/README.md +9 -0
- data/lib/kraps/actions.rb +2 -1
- data/lib/kraps/job.rb +18 -0
- data/lib/kraps/runner.rb +15 -0
- data/lib/kraps/version.rb +1 -1
- data/lib/kraps/worker.rb +51 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6ac0098a3c2f8acf49552f5b72621619345f9b59c83edd58368d7a999605f817
|
4
|
+
data.tar.gz: 3d522dd1149b57c9dce596ea4b38aa67ba5f2bc18a96bbc2840e80096e9eb192
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e859a2ecd0e635a067bde3cf3d51f76dc64702d91975342c590a43370d5a8a49da5c2272b6806550420537c74b3c7129ee0c76f58ff42ef0b154d7e8d0904417
|
7
|
+
data.tar.gz: 8e93aa69360b48b46f96a94cc2dd49d841b72fc858688c93071d46b99b86d50bf3b0758dd7f071cb50fd6ddc13cf532ed00d1ed1e4eee277d064568a11931f22
|
data/.rubocop.yml
CHANGED
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -276,6 +276,15 @@ The `key` itself is also passed to the block for the case that you need to
|
|
276
276
|
customize the reduce calculation according to the value of the key. However,
|
277
277
|
most of the time, this is not neccessary and the key can simply be ignored.
|
278
278
|
|
279
|
+
* `append`: Appends the results of 2 jobs, such that all key-value pairs
|
280
|
+
of both jobs will be in the result. `append` does not accept any block.
|
281
|
+
|
282
|
+
```ruby
|
283
|
+
job.append(other_job, worker: MyKrapsWorker, jobs: 8)
|
284
|
+
```
|
285
|
+
Please note that the partitioners and the number of partitions must match for
|
286
|
+
the jobs to be appended.
|
287
|
+
|
279
288
|
* `combine`: Combines the results of 2 jobs by combining every key available
|
280
289
|
in the current job result with the corresponding key from the passed job
|
281
290
|
result. When the passed job result does not have the corresponding key,
|
data/lib/kraps/actions.rb
CHANGED
data/lib/kraps/job.rb
CHANGED
@@ -103,6 +103,24 @@ module Kraps
|
|
103
103
|
end
|
104
104
|
end
|
105
105
|
|
106
|
+
def append(other_job, jobs: nil, worker: @worker, before: nil, &block)
|
107
|
+
fresh.tap do |job|
|
108
|
+
job.instance_eval do
|
109
|
+
@steps << Step.new(
|
110
|
+
action: Actions::APPEND,
|
111
|
+
jobs: [jobs, @partitions].compact.min,
|
112
|
+
partitions: @partitions,
|
113
|
+
partitioner: @partitioner,
|
114
|
+
worker: worker,
|
115
|
+
before: before,
|
116
|
+
block: block,
|
117
|
+
dependency: other_job,
|
118
|
+
options: { append_step_index: other_job.steps.size - 1 }
|
119
|
+
)
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
106
124
|
def each_partition(jobs: nil, worker: @worker, before: nil, &block)
|
107
125
|
fresh.tap do |job|
|
108
126
|
job.instance_eval do
|
data/lib/kraps/runner.rb
CHANGED
@@ -91,6 +91,21 @@ module Kraps
|
|
91
91
|
Frame.new(token: token, partitions: @step.partitions)
|
92
92
|
end
|
93
93
|
|
94
|
+
def perform_append
|
95
|
+
append_job = @step.dependency
|
96
|
+
append_step = append_job.steps[@step.options[:append_step_index]]
|
97
|
+
|
98
|
+
raise(IncompatibleFrame, "Incompatible number of partitions") if append_step.partitions != @step.partitions
|
99
|
+
|
100
|
+
enum = (0...@frame.partitions).map do |partition|
|
101
|
+
{ partition: partition, append_frame: append_step.frame.to_h }
|
102
|
+
end
|
103
|
+
|
104
|
+
token = push_and_wait(job_count: @step.jobs, enum: enum)
|
105
|
+
|
106
|
+
Frame.new(token: token, partitions: @step.partitions)
|
107
|
+
end
|
108
|
+
|
94
109
|
def perform_each_partition
|
95
110
|
enum = (0...@frame.partitions).map { |partition| { partition: partition } }
|
96
111
|
push_and_wait(job_count: @step.jobs, enum: enum)
|
data/lib/kraps/version.rb
CHANGED
data/lib/kraps/worker.rb
CHANGED
@@ -86,6 +86,57 @@ module Kraps
|
|
86
86
|
temp_paths&.delete
|
87
87
|
end
|
88
88
|
|
89
|
+
def perform_append(payload)
|
90
|
+
temp_paths1 = download_all(token: @args["frame"]["token"], partition: payload["partition"])
|
91
|
+
temp_paths2 = download_all(token: payload["append_frame"]["token"], partition: payload["partition"])
|
92
|
+
|
93
|
+
implementation = Object.new
|
94
|
+
implementation.define_singleton_method(:map) do |key, value, &block|
|
95
|
+
block.call(key, value)
|
96
|
+
end
|
97
|
+
|
98
|
+
subsequent_step = next_step
|
99
|
+
|
100
|
+
if subsequent_step&.action == Actions::REDUCE
|
101
|
+
implementation.define_singleton_method(:reduce) do |key, value1, value2|
|
102
|
+
subsequent_step.block.call(key, value1, value2)
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
mapper = MapReduce::Mapper.new(implementation, partitioner: partitioner, memory_limit: @memory_limit)
|
107
|
+
|
108
|
+
temp_paths1.each do |temp_path|
|
109
|
+
File.open(temp_path.path) do |stream|
|
110
|
+
stream.each_line do |line|
|
111
|
+
key, value = JSON.parse(line)
|
112
|
+
|
113
|
+
mapper.map(key, value)
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
temp_paths2.each do |temp_path|
|
119
|
+
File.open(temp_path.path) do |stream|
|
120
|
+
stream.each_line do |line|
|
121
|
+
key, value = JSON.parse(line)
|
122
|
+
|
123
|
+
mapper.map(key, value)
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
mapper.shuffle(chunk_limit: @chunk_limit) do |partitions|
|
129
|
+
Parallelizer.each(partitions.to_a, @concurrency) do |partition, path|
|
130
|
+
File.open(path) do |stream|
|
131
|
+
Kraps.driver.store(Kraps.driver.with_prefix("#{@args["token"]}/#{partition}/chunk.#{payload["partition"]}.json"), stream)
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|
135
|
+
ensure
|
136
|
+
temp_paths1&.delete
|
137
|
+
temp_paths2&.delete
|
138
|
+
end
|
139
|
+
|
89
140
|
def perform_map_partitions(payload)
|
90
141
|
temp_paths = download_all(token: @args["frame"]["token"], partition: payload["partition"])
|
91
142
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: kraps
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.10.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Benjamin Vetter
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-04-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: attachie
|