kraps 0.9.0 → 0.10.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8b24f67ff2122dc82d372eb421c379ac42f415674999958f42f71a2cdbee1a33
4
- data.tar.gz: d8297482eb38a30cb8ff6a7761a544833e3795a8f3a045343f6a10c370d886ca
3
+ metadata.gz: 6ac0098a3c2f8acf49552f5b72621619345f9b59c83edd58368d7a999605f817
4
+ data.tar.gz: 3d522dd1149b57c9dce596ea4b38aa67ba5f2bc18a96bbc2840e80096e9eb192
5
5
  SHA512:
6
- metadata.gz: 5543d1a8af8fa12007d38f00d9aa515eb1edf254d8bbc2aa8c133c0101dbe7ccaef5f0330929b6b84456a81eec7e5744091deace2d16ed2826f14cd56432db8f
7
- data.tar.gz: 14992d608157562da3af98207681a66cf0a9a9566861fe79c949ccdec9db6cb344bfb070147fc820eac70b076c15db373896f80b5bd8d5f8714f9ce75d7eb7c8
6
+ metadata.gz: e859a2ecd0e635a067bde3cf3d51f76dc64702d91975342c590a43370d5a8a49da5c2272b6806550420537c74b3c7129ee0c76f58ff42ef0b154d7e8d0904417
7
+ data.tar.gz: 8e93aa69360b48b46f96a94cc2dd49d841b72fc858688c93071d46b99b86d50bf3b0758dd7f071cb50fd6ddc13cf532ed00d1ed1e4eee277d064568a11931f22
data/.rubocop.yml CHANGED
@@ -54,6 +54,9 @@ Style/StringLiteralsInInterpolation:
54
54
  Enabled: true
55
55
  EnforcedStyle: double_quotes
56
56
 
57
+ Style/RedundantSelfAssignment:
58
+ Enabled: false
59
+
57
60
  Layout/LineLength:
58
61
  Max: 250
59
62
 
data/CHANGELOG.md CHANGED
@@ -1,5 +1,9 @@
1
1
  # CHANGELOG
2
2
 
3
+ ## v0.10.0
4
+
5
+ * `append` operation added
6
+
3
7
  ## v0.9.0
4
8
 
5
9
  * Argments are no longer passed to the `call` method, but to the
data/README.md CHANGED
@@ -276,6 +276,15 @@ The `key` itself is also passed to the block for the case that you need to
276
276
  customize the reduce calculation according to the value of the key. However,
277
277
  most of the time, this is not neccessary and the key can simply be ignored.
278
278
 
279
+ * `append`: Appends the results of 2 jobs, such that all key-value pairs
280
+ of both jobs will be in the result. `append` does not accept any block.
281
+
282
+ ```ruby
283
+ job.append(other_job, worker: MyKrapsWorker, jobs: 8)
284
+ ```
285
+ Please note that the partitioners and the number of partitions must match for
286
+ the jobs to be appended.
287
+
279
288
  * `combine`: Combines the results of 2 jobs by combining every key available
280
289
  in the current job result with the corresponding key from the passed job
281
290
  result. When the passed job result does not have the corresponding key,
data/lib/kraps/actions.rb CHANGED
@@ -6,7 +6,8 @@ module Kraps
6
6
  MAP_PARTITIONS = "map_partitions",
7
7
  REDUCE = "reduce",
8
8
  COMBINE = "combine",
9
- EACH_PARTITION = "each_partition"
9
+ EACH_PARTITION = "each_partition",
10
+ APPEND = "append"
10
11
  ]
11
12
  end
12
13
  end
data/lib/kraps/job.rb CHANGED
@@ -103,6 +103,24 @@ module Kraps
103
103
  end
104
104
  end
105
105
 
106
+ def append(other_job, jobs: nil, worker: @worker, before: nil, &block)
107
+ fresh.tap do |job|
108
+ job.instance_eval do
109
+ @steps << Step.new(
110
+ action: Actions::APPEND,
111
+ jobs: [jobs, @partitions].compact.min,
112
+ partitions: @partitions,
113
+ partitioner: @partitioner,
114
+ worker: worker,
115
+ before: before,
116
+ block: block,
117
+ dependency: other_job,
118
+ options: { append_step_index: other_job.steps.size - 1 }
119
+ )
120
+ end
121
+ end
122
+ end
123
+
106
124
  def each_partition(jobs: nil, worker: @worker, before: nil, &block)
107
125
  fresh.tap do |job|
108
126
  job.instance_eval do
data/lib/kraps/runner.rb CHANGED
@@ -91,6 +91,21 @@ module Kraps
91
91
  Frame.new(token: token, partitions: @step.partitions)
92
92
  end
93
93
 
94
+ def perform_append
95
+ append_job = @step.dependency
96
+ append_step = append_job.steps[@step.options[:append_step_index]]
97
+
98
+ raise(IncompatibleFrame, "Incompatible number of partitions") if append_step.partitions != @step.partitions
99
+
100
+ enum = (0...@frame.partitions).map do |partition|
101
+ { partition: partition, append_frame: append_step.frame.to_h }
102
+ end
103
+
104
+ token = push_and_wait(job_count: @step.jobs, enum: enum)
105
+
106
+ Frame.new(token: token, partitions: @step.partitions)
107
+ end
108
+
94
109
  def perform_each_partition
95
110
  enum = (0...@frame.partitions).map { |partition| { partition: partition } }
96
111
  push_and_wait(job_count: @step.jobs, enum: enum)
data/lib/kraps/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Kraps
2
- VERSION = "0.9.0"
2
+ VERSION = "0.10.0"
3
3
  end
data/lib/kraps/worker.rb CHANGED
@@ -86,6 +86,57 @@ module Kraps
86
86
  temp_paths&.delete
87
87
  end
88
88
 
89
+ def perform_append(payload)
90
+ temp_paths1 = download_all(token: @args["frame"]["token"], partition: payload["partition"])
91
+ temp_paths2 = download_all(token: payload["append_frame"]["token"], partition: payload["partition"])
92
+
93
+ implementation = Object.new
94
+ implementation.define_singleton_method(:map) do |key, value, &block|
95
+ block.call(key, value)
96
+ end
97
+
98
+ subsequent_step = next_step
99
+
100
+ if subsequent_step&.action == Actions::REDUCE
101
+ implementation.define_singleton_method(:reduce) do |key, value1, value2|
102
+ subsequent_step.block.call(key, value1, value2)
103
+ end
104
+ end
105
+
106
+ mapper = MapReduce::Mapper.new(implementation, partitioner: partitioner, memory_limit: @memory_limit)
107
+
108
+ temp_paths1.each do |temp_path|
109
+ File.open(temp_path.path) do |stream|
110
+ stream.each_line do |line|
111
+ key, value = JSON.parse(line)
112
+
113
+ mapper.map(key, value)
114
+ end
115
+ end
116
+ end
117
+
118
+ temp_paths2.each do |temp_path|
119
+ File.open(temp_path.path) do |stream|
120
+ stream.each_line do |line|
121
+ key, value = JSON.parse(line)
122
+
123
+ mapper.map(key, value)
124
+ end
125
+ end
126
+ end
127
+
128
+ mapper.shuffle(chunk_limit: @chunk_limit) do |partitions|
129
+ Parallelizer.each(partitions.to_a, @concurrency) do |partition, path|
130
+ File.open(path) do |stream|
131
+ Kraps.driver.store(Kraps.driver.with_prefix("#{@args["token"]}/#{partition}/chunk.#{payload["partition"]}.json"), stream)
132
+ end
133
+ end
134
+ end
135
+ ensure
136
+ temp_paths1&.delete
137
+ temp_paths2&.delete
138
+ end
139
+
89
140
  def perform_map_partitions(payload)
90
141
  temp_paths = download_all(token: @args["frame"]["token"], partition: payload["partition"])
91
142
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: kraps
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.0
4
+ version: 0.10.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Benjamin Vetter
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-03-13 00:00:00.000000000 Z
11
+ date: 2024-04-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: attachie