kraps 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 15d08cf8952d4e5a083a6a4f9791fd16e9e2dbf67c1c71326f3af840c0c72eb8
4
- data.tar.gz: c6542584846c54e5897b7b59ef40e8ec282ee27521b2d5ff39551eb02755882d
3
+ metadata.gz: 921ae08326c96216136418861b88af7f11bce519c924cd1813216165f7f02690
4
+ data.tar.gz: '0913d31d3caeea0be664bc714e9d0da58227f515c047be31359e96040bc0c141'
5
5
  SHA512:
6
- metadata.gz: 21d1ef7a132edacf54e0b2df12b8d085af84ec1ed1cd019d258e43aba4cffbecdeada9b2b7f4baeefec4b59d115eb3e38400da94a3d7961ab19bbbb7dd2cf58c
7
- data.tar.gz: fde066e9fdc5f9df7e95be43142cb04a7a1c5279decb277f1d815db508c87d2c04be46ea9559069c8a2c9539ee2eaa949a2fe2fdc3bf862937f9211cdfd8fbd5
6
+ metadata.gz: d8e43e5229fc310019801e62a2e278470a1eb37b50e4aca27b9c64edb6666115f0f25c7a7375790516e2726fcf10980cdac1523c54dde8d3527a39fd919a2a5a
7
+ data.tar.gz: 30b1a9edcdd4f7ff476bfa4c070aef31debd727500e27a08b59f1df2663362c60e3cc3a3c860455d568abd994bb56a216f7eedf8baea6cc06ca73b1d0bdf9a07
data/CHANGELOG.md CHANGED
@@ -1,5 +1,10 @@
1
1
  # CHANGELOG
2
2
 
3
+ ## v0.5.0
4
+
5
+ * Added a `before` option to specify a callable to run before
6
+ a step to e.g. populate caches upfront, etc.
7
+
3
8
  ## v0.4.0
4
9
 
5
10
  * Pre-reduce in a map step when the subsequent step is a
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- kraps (0.4.0)
4
+ kraps (0.5.0)
5
5
  attachie
6
6
  distributed_job
7
7
  map-reduce-ruby (>= 3.0.0)
data/README.md CHANGED
@@ -265,6 +265,19 @@ job.each_partition do |partition, pairs|
265
265
  end
266
266
  ```
267
267
 
268
+ Please note that every API method accepts a `before` callable:
269
+
270
+ ```ruby
271
+ before_block = proc do
272
+ # runs once before the map action in every worker, which can be useful to
273
+ # e.g. populate caches etc.
274
+ end
275
+
276
+ job.map(before: before_block) do |key, value, collector|
277
+ # ...
278
+ end
279
+ ```
280
+
268
281
  ## More Complex Jobs
269
282
 
270
283
  Please note that a job class can return multiple jobs and jobs can build up on
data/lib/kraps/job.rb CHANGED
@@ -9,46 +9,74 @@ module Kraps
9
9
  @partitioner = HashPartitioner.new
10
10
  end
11
11
 
12
- def parallelize(partitions:, partitioner: HashPartitioner.new, worker: @worker, &block)
12
+ def parallelize(partitions:, partitioner: HashPartitioner.new, worker: @worker, before: nil, &block)
13
13
  fresh.tap do |job|
14
14
  job.instance_eval do
15
15
  @partitions = partitions
16
16
  @partitioner = partitioner
17
17
 
18
- @steps << Step.new(action: Actions::PARALLELIZE, args: { partitions: @partitions, partitioner: @partitioner, worker: worker }, block: block)
18
+ @steps << Step.new(
19
+ action: Actions::PARALLELIZE,
20
+ partitions: @partitions,
21
+ partitioner: @partitioner,
22
+ worker: worker,
23
+ before: before,
24
+ block: block
25
+ )
19
26
  end
20
27
  end
21
28
  end
22
29
 
23
- def map(partitions: nil, partitioner: nil, worker: @worker, &block)
30
+ def map(partitions: nil, partitioner: nil, worker: @worker, before: nil, &block)
24
31
  fresh.tap do |job|
25
32
  job.instance_eval do
26
33
  @partitions = partitions if partitions
27
34
  @partitioner = partitioner if partitioner
28
35
 
29
- @steps << Step.new(action: Actions::MAP, args: { partitions: @partitions, partitioner: @partitioner, worker: worker }, block: block)
36
+ @steps << Step.new(
37
+ action: Actions::MAP,
38
+ partitions: @partitions,
39
+ partitioner: @partitioner,
40
+ worker: worker,
41
+ before: before,
42
+ block: block
43
+ )
30
44
  end
31
45
  end
32
46
  end
33
47
 
34
- def reduce(worker: @worker, &block)
48
+ def reduce(worker: @worker, before: nil, &block)
35
49
  fresh.tap do |job|
36
50
  job.instance_eval do
37
- @steps << Step.new(action: Actions::REDUCE, args: { partitions: @partitions, partitioner: @partitioner, worker: worker }, block: block)
51
+ @steps << Step.new(
52
+ action: Actions::REDUCE,
53
+ partitions: @partitions,
54
+ partitioner: @partitioner,
55
+ worker: worker,
56
+ before: before,
57
+ block: block
58
+ )
38
59
  end
39
60
  end
40
61
  end
41
62
 
42
- def each_partition(worker: @worker, &block)
63
+ def each_partition(worker: @worker, before: nil, &block)
43
64
  fresh.tap do |job|
44
65
  job.instance_eval do
45
- @steps << Step.new(action: Actions::EACH_PARTITION, args: { partitions: @partitions, partitioner: @partitioner, worker: worker }, block: block)
66
+ @steps << Step.new(
67
+ action: Actions::EACH_PARTITION,
68
+ partitions: @partitions,
69
+ partitioner: @partitioner,
70
+ worker: worker,
71
+ before: before,
72
+ block: block
73
+ )
46
74
  end
47
75
  end
48
76
  end
49
77
 
50
- def repartition(partitions:, partitioner: nil, worker: @worker)
51
- map(partitions: partitions, partitioner: partitioner, worker: worker) do |key, value, collector|
78
+ def repartition(partitions:, partitioner: nil, worker: @worker, before: nil)
79
+ map(partitions: partitions, partitioner: partitioner, worker: worker, before: before) do |key, value, collector|
52
80
  collector.call(key, value)
53
81
  end
54
82
  end
data/lib/kraps/runner.rb CHANGED
@@ -55,7 +55,7 @@ module Kraps
55
55
  enqueue(token: distributed_job.token, part: part, item: item)
56
56
  end
57
57
 
58
- Frame.new(token: distributed_job.token, partitions: @step.args[:partitions])
58
+ Frame.new(token: distributed_job.token, partitions: @step.partitions)
59
59
  end
60
60
  end
61
61
 
@@ -65,7 +65,7 @@ module Kraps
65
65
  enqueue(token: distributed_job.token, part: part, partition: partition)
66
66
  end
67
67
 
68
- Frame.new(token: distributed_job.token, partitions: @step.args[:partitions])
68
+ Frame.new(token: distributed_job.token, partitions: @step.partitions)
69
69
  end
70
70
  end
71
71
 
@@ -75,7 +75,7 @@ module Kraps
75
75
  enqueue(token: distributed_job.token, part: part, partition: partition)
76
76
  end
77
77
 
78
- Frame.new(token: distributed_job.token, partitions: @step.args[:partitions])
78
+ Frame.new(token: distributed_job.token, partitions: @step.partitions)
79
79
  end
80
80
  end
81
81
 
@@ -91,7 +91,7 @@ module Kraps
91
91
 
92
92
  def enqueue(token:, part:, **rest)
93
93
  Kraps.enqueuer.call(
94
- @step.args[:worker],
94
+ @step.worker,
95
95
  JSON.generate(
96
96
  job_index: @job_index,
97
97
  step_index: @step_index,
data/lib/kraps/step.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Kraps
2
- Step = Struct.new(:action, :args, :block, :frame, keyword_init: true)
2
+ Step = Struct.new(:action, :partitioner, :partitions, :block, :worker, :before, :frame, keyword_init: true)
3
3
  end
data/lib/kraps/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Kraps
2
- VERSION = "0.4.0"
2
+ VERSION = "0.5.0"
3
3
  end
data/lib/kraps/worker.rb CHANGED
@@ -13,6 +13,8 @@ module Kraps
13
13
  raise(InvalidAction, "Invalid action #{step.action}") unless Actions::ALL.include?(step.action)
14
14
 
15
15
  with_retries(retries) do # TODO: allow to use queue based retries
16
+ step.before&.call
17
+
16
18
  send(:"perform_#{step.action}")
17
19
 
18
20
  distributed_job.done(@args["part"])
@@ -194,7 +196,7 @@ module Kraps
194
196
  end
195
197
 
196
198
  def partitioner
197
- @partitioner ||= proc { |key| step.args[:partitioner].call(key, step.args[:partitions]) }
199
+ @partitioner ||= proc { |key| step.partitioner.call(key, step.partitions) }
198
200
  end
199
201
 
200
202
  def distributed_job
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: kraps
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Benjamin Vetter
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-11-09 00:00:00.000000000 Z
11
+ date: 2022-11-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: attachie