kraps 0.6.0 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +3 -0
- data/CHANGELOG.md +14 -0
- data/Gemfile.lock +2 -5
- data/README.md +23 -13
- data/docker-compose.yml +1 -1
- data/lib/kraps/downloader.rb +19 -0
- data/lib/kraps/job.rb +26 -18
- data/lib/kraps/redis_queue.rb +151 -0
- data/lib/kraps/runner.rb +48 -79
- data/lib/kraps/step.rb +1 -1
- data/lib/kraps/version.rb +1 -1
- data/lib/kraps/worker.rb +42 -43
- data/lib/kraps.rb +16 -5
- metadata +4 -16
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d261c779e82209152e26decbc6c5a6c5c5ddb0fb40803884383617635727d3b2
|
4
|
+
data.tar.gz: 1b9c6fa8db7a7811cbac5a7a5db518e1f3ee75df583521b64417341e830425f4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: dcb05139042149be087b1a2c7f14a31cd5e28dedb1517aca83299f63b90046e4d05e0ab19dfaeede329e784880623abda19675252cdeaad04f8ccd87249afde5
|
7
|
+
data.tar.gz: 10fd07c322c659ae21a682832eba30416c830f9d2146af685d69168ad5137045ef4268c0a43cee4e879bb875edf900ca740bbe4cbfe8b91b34ad3df40763bce0
|
data/.rubocop.yml
CHANGED
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,19 @@
|
|
1
1
|
# CHANGELOG
|
2
2
|
|
3
|
+
## v0.8.0
|
4
|
+
|
5
|
+
* Use number of partitions of previous step for `jobs` option by default
|
6
|
+
* Changed `combine` to receive a `collector`
|
7
|
+
* Added mandatory `concurrency` argument to `load`
|
8
|
+
|
9
|
+
## v0.7.0
|
10
|
+
|
11
|
+
* Added a `jobs` option to the actions to limit the concurrency
|
12
|
+
when e.g. accessing external data stores and to avoid overloading
|
13
|
+
them
|
14
|
+
* Added a queue using redis for the jobs to avoid starving workers
|
15
|
+
* Removed `distributed_job` dependency
|
16
|
+
|
3
17
|
## v0.6.0
|
4
18
|
|
5
19
|
* Added `map_partitions`
|
data/Gemfile.lock
CHANGED
@@ -1,9 +1,8 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
kraps (0.
|
4
|
+
kraps (0.7.0)
|
5
5
|
attachie
|
6
|
-
distributed_job
|
7
6
|
map-reduce-ruby (>= 3.0.0)
|
8
7
|
redis
|
9
8
|
ruby-progressbar
|
@@ -41,8 +40,6 @@ GEM
|
|
41
40
|
concurrent-ruby (1.1.10)
|
42
41
|
connection_pool (2.3.0)
|
43
42
|
diff-lcs (1.5.0)
|
44
|
-
distributed_job (3.1.0)
|
45
|
-
redis (>= 4.1.0)
|
46
43
|
i18n (1.12.0)
|
47
44
|
concurrent-ruby (~> 1.0)
|
48
45
|
jmespath (1.6.1)
|
@@ -62,7 +59,7 @@ GEM
|
|
62
59
|
rake (13.0.6)
|
63
60
|
redis (5.0.5)
|
64
61
|
redis-client (>= 0.9.0)
|
65
|
-
redis-client (0.11.
|
62
|
+
redis-client (0.11.2)
|
66
63
|
connection_pool
|
67
64
|
regexp_parser (2.6.0)
|
68
65
|
rexml (3.2.5)
|
data/README.md
CHANGED
@@ -30,7 +30,7 @@ Kraps.configure(
|
|
30
30
|
driver: Kraps::Drivers::S3Driver.new(s3_client: Aws::S3::Client.new("..."), bucket: "some-bucket", prefix: "temp/kraps/"),
|
31
31
|
redis: Redis.new,
|
32
32
|
namespace: "my-application", # An optional namespace to be used for redis keys, default: nil
|
33
|
-
job_ttl:
|
33
|
+
job_ttl: 7.days, # Job information in redis will automatically be removed after this amount of time, default: 4 days
|
34
34
|
show_progress: true # Whether or not to show the progress in the terminal when executing jobs, default: true
|
35
35
|
enqueuer: ->(worker, json) { worker.perform_async(json) } # Allows to customize the enqueueing of worker jobs
|
36
36
|
)
|
@@ -220,7 +220,7 @@ items are used as keys and the values are set to `nil`.
|
|
220
220
|
* `map`: Maps the key value pairs to other key value pairs
|
221
221
|
|
222
222
|
```ruby
|
223
|
-
job.map(partitions: 128, partitioner: partitioner, worker: MyKrapsWorker) do |key, value, collector|
|
223
|
+
job.map(partitions: 128, partitioner: partitioner, worker: MyKrapsWorker, jobs: 8) do |key, value, collector|
|
224
224
|
collector.call("changed #{key}", "changed #{value}")
|
225
225
|
end
|
226
226
|
```
|
@@ -229,13 +229,22 @@ The block gets each key-value pair passed and the `collector` block can be
|
|
229
229
|
called as often as neccessary. This is also the reason why `map` can not simply
|
230
230
|
return the new key-value pair, but the `collector` must be used instead.
|
231
231
|
|
232
|
+
The `jobs` argument can be useful when you need to access an external data
|
233
|
+
source, like a relational database and you want to limit the number of workers
|
234
|
+
accessing the store concurrently to avoid overloading it. If you don't specify
|
235
|
+
it, it will be identical to the number of partitions of the previous step. It
|
236
|
+
is recommended to only use it for steps where you need to throttle the
|
237
|
+
concurrency, because it will of course slow down the processing. The `jobs`
|
238
|
+
argument only applies to the current step. The following steps don't inherit
|
239
|
+
the argument, but reset it.
|
240
|
+
|
232
241
|
* `map_partitions`: Maps the key value pairs to other key value pairs, but the
|
233
242
|
block receives all data of each partition as an enumerable and sorted by key.
|
234
243
|
Please be aware that you should not call `to_a` or similar on the enumerable.
|
235
244
|
Prefer `map` over `map_partitions` when possible.
|
236
245
|
|
237
246
|
```ruby
|
238
|
-
job.map_partitions(partitions: 128, partitioner: partitioner, worker: MyKrapsWorker) do |pairs, collector|
|
247
|
+
job.map_partitions(partitions: 128, partitioner: partitioner, worker: MyKrapsWorker, jobs: 8) do |pairs, collector|
|
239
248
|
pairs.each do |key, value|
|
240
249
|
collector.call("changed #{key}", "changed #{value}")
|
241
250
|
end
|
@@ -245,7 +254,7 @@ end
|
|
245
254
|
* `reduce`: Reduces the values of pairs having the same key
|
246
255
|
|
247
256
|
```ruby
|
248
|
-
job.reduce(worker: MyKrapsWorker) do |key, value1, value2|
|
257
|
+
job.reduce(worker: MyKrapsWorker, jobs: 8) do |key, value1, value2|
|
249
258
|
value1 + value2
|
250
259
|
end
|
251
260
|
```
|
@@ -265,8 +274,8 @@ most of the time, this is not neccessary and the key can simply be ignored.
|
|
265
274
|
passed job result are completely omitted.
|
266
275
|
|
267
276
|
```ruby
|
268
|
-
job.combine(other_job, worker: MyKrapsWorker) do |key, value1, value2|
|
269
|
-
(value1 || {}).merge(value2 || {})
|
277
|
+
job.combine(other_job, worker: MyKrapsWorker, jobs: 8) do |key, value1, value2, collector|
|
278
|
+
collector.call(key, (value1 || {}).merge(value2 || {}))
|
270
279
|
end
|
271
280
|
```
|
272
281
|
|
@@ -279,7 +288,7 @@ since Kraps detects the dependency on its own.
|
|
279
288
|
* `repartition`: Used to change the partitioning
|
280
289
|
|
281
290
|
```ruby
|
282
|
-
job.repartition(partitions: 128, partitioner: partitioner, worker: MyKrapsWorker)
|
291
|
+
job.repartition(partitions: 128, partitioner: partitioner, worker: MyKrapsWorker, jobs: 8)
|
283
292
|
```
|
284
293
|
|
285
294
|
Repartitions all data into the specified number of partitions and using the
|
@@ -290,7 +299,7 @@ specified partitioner.
|
|
290
299
|
`to_a` or similar on the enumerable.
|
291
300
|
|
292
301
|
```ruby
|
293
|
-
job.each_partition do |partition, pairs|
|
302
|
+
job.each_partition(jobs: 8) do |partition, pairs|
|
294
303
|
pairs.each do |key, value|
|
295
304
|
# ...
|
296
305
|
end
|
@@ -308,10 +317,12 @@ It creates a folder for every partition and stores one or more chunks in there.
|
|
308
317
|
* `load`: Loads the previously dumped data
|
309
318
|
|
310
319
|
```ruby
|
311
|
-
job.load(prefix: "path/to/dump", partitions: 32, partitioner: Kraps::HashPartitioner.new, worker: MyKrapsWorker)
|
320
|
+
job.load(prefix: "path/to/dump", partitions: 32, concurrency: 8, partitioner: Kraps::HashPartitioner.new, worker: MyKrapsWorker)
|
312
321
|
```
|
313
322
|
|
314
|
-
The number of partitions
|
323
|
+
The number of partitions, the partitioner and concurrency must be specified.
|
324
|
+
The concurrency specifies the number of threads used for downloading chunks in
|
325
|
+
parallel.
|
315
326
|
|
316
327
|
Please note that every API method accepts a `before` callable:
|
317
328
|
|
@@ -379,7 +390,8 @@ jobs only once.
|
|
379
390
|
Kraps ships with an in-memory fake driver for storage, which you can use for
|
380
391
|
testing purposes instead of the s3 driver:
|
381
392
|
|
382
|
-
```ruby
|
393
|
+
```ruby
|
394
|
+
Kraps.configure(
|
383
395
|
driver: Kraps::Drivers::FakeDriver.new(bucket: "kraps"),
|
384
396
|
# ...
|
385
397
|
) ```
|
@@ -425,8 +437,6 @@ The API of the driver is:
|
|
425
437
|
Kraps is built on top of
|
426
438
|
[map-reduce-ruby](https://github.com/mrkamel/map-reduce-ruby) for the
|
427
439
|
map/reduce framework,
|
428
|
-
[distributed_job](https://github.com/mrkamel/distributed_job)
|
429
|
-
to keep track of the job/step status,
|
430
440
|
[attachie](https://github.com/mrkamel/attachie) to interact with the storage
|
431
441
|
layer (s3),
|
432
442
|
[ruby-progressbar](https://github.com/jfelchner/ruby-progressbar) to
|
data/docker-compose.yml
CHANGED
@@ -0,0 +1,19 @@
|
|
1
|
+
module Kraps
|
2
|
+
class Downloader
|
3
|
+
def self.download_all(prefix:, concurrency:)
|
4
|
+
temp_paths = TempPaths.new
|
5
|
+
|
6
|
+
files = Kraps.driver.list(prefix: prefix).sort
|
7
|
+
|
8
|
+
temp_paths_index = files.each_with_object({}) do |file, hash|
|
9
|
+
hash[file] = temp_paths.add
|
10
|
+
end
|
11
|
+
|
12
|
+
Parallelizer.each(files, concurrency) do |file|
|
13
|
+
Kraps.driver.download(file, temp_paths_index[file].path)
|
14
|
+
end
|
15
|
+
|
16
|
+
temp_paths
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
data/lib/kraps/job.rb
CHANGED
@@ -27,14 +27,17 @@ module Kraps
|
|
27
27
|
end
|
28
28
|
end
|
29
29
|
|
30
|
-
def map(partitions: nil, partitioner: nil, worker: @worker, before: nil, &block)
|
30
|
+
def map(partitions: nil, partitioner: nil, jobs: nil, worker: @worker, before: nil, &block)
|
31
31
|
fresh.tap do |job|
|
32
32
|
job.instance_eval do
|
33
|
+
jobs = [jobs, @partitions].compact.min
|
34
|
+
|
33
35
|
@partitions = partitions if partitions
|
34
36
|
@partitioner = partitioner if partitioner
|
35
37
|
|
36
38
|
@steps << Step.new(
|
37
39
|
action: Actions::MAP,
|
40
|
+
jobs: jobs,
|
38
41
|
partitions: @partitions,
|
39
42
|
partitioner: @partitioner,
|
40
43
|
worker: worker,
|
@@ -45,14 +48,17 @@ module Kraps
|
|
45
48
|
end
|
46
49
|
end
|
47
50
|
|
48
|
-
def map_partitions(partitions: nil, partitioner: nil, worker: @worker, before: nil, &block)
|
51
|
+
def map_partitions(partitions: nil, partitioner: nil, jobs: nil, worker: @worker, before: nil, &block)
|
49
52
|
fresh.tap do |job|
|
50
53
|
job.instance_eval do
|
54
|
+
jobs = [jobs, @partitions].compact.min
|
55
|
+
|
51
56
|
@partitions = partitions if partitions
|
52
57
|
@partitioner = partitioner if partitioner
|
53
58
|
|
54
59
|
@steps << Step.new(
|
55
60
|
action: Actions::MAP_PARTITIONS,
|
61
|
+
jobs: jobs,
|
56
62
|
partitions: @partitions,
|
57
63
|
partitioner: @partitioner,
|
58
64
|
worker: worker,
|
@@ -63,11 +69,12 @@ module Kraps
|
|
63
69
|
end
|
64
70
|
end
|
65
71
|
|
66
|
-
def reduce(worker: @worker, before: nil, &block)
|
72
|
+
def reduce(jobs: nil, worker: @worker, before: nil, &block)
|
67
73
|
fresh.tap do |job|
|
68
74
|
job.instance_eval do
|
69
75
|
@steps << Step.new(
|
70
76
|
action: Actions::REDUCE,
|
77
|
+
jobs: [jobs, @partitions].compact.min,
|
71
78
|
partitions: @partitions,
|
72
79
|
partitioner: @partitioner,
|
73
80
|
worker: worker,
|
@@ -78,11 +85,12 @@ module Kraps
|
|
78
85
|
end
|
79
86
|
end
|
80
87
|
|
81
|
-
def combine(other_job, worker: @worker, before: nil, &block)
|
88
|
+
def combine(other_job, jobs: nil, worker: @worker, before: nil, &block)
|
82
89
|
fresh.tap do |job|
|
83
90
|
job.instance_eval do
|
84
91
|
@steps << Step.new(
|
85
92
|
action: Actions::COMBINE,
|
93
|
+
jobs: [jobs, @partitions].compact.min,
|
86
94
|
partitions: @partitions,
|
87
95
|
partitioner: @partitioner,
|
88
96
|
worker: worker,
|
@@ -95,11 +103,12 @@ module Kraps
|
|
95
103
|
end
|
96
104
|
end
|
97
105
|
|
98
|
-
def each_partition(worker: @worker, before: nil, &block)
|
106
|
+
def each_partition(jobs: nil, worker: @worker, before: nil, &block)
|
99
107
|
fresh.tap do |job|
|
100
108
|
job.instance_eval do
|
101
109
|
@steps << Step.new(
|
102
110
|
action: Actions::EACH_PARTITION,
|
111
|
+
jobs: [jobs, @partitions].compact.min,
|
103
112
|
partitions: @partitions,
|
104
113
|
partitioner: @partitioner,
|
105
114
|
worker: worker,
|
@@ -110,8 +119,8 @@ module Kraps
|
|
110
119
|
end
|
111
120
|
end
|
112
121
|
|
113
|
-
def repartition(partitions:, partitioner: nil, worker: @worker, before: nil)
|
114
|
-
map(partitions: partitions, partitioner: partitioner, worker: worker, before: before) do |key, value, collector|
|
122
|
+
def repartition(partitions:, jobs: nil, partitioner: nil, worker: @worker, before: nil)
|
123
|
+
map(jobs: jobs, partitions: partitions, partitioner: partitioner, worker: worker, before: before) do |key, value, collector|
|
115
124
|
collector.call(key, value)
|
116
125
|
end
|
117
126
|
end
|
@@ -130,7 +139,7 @@ module Kraps
|
|
130
139
|
end
|
131
140
|
end
|
132
141
|
|
133
|
-
def load(prefix:, partitions:, partitioner:, worker: @worker)
|
142
|
+
def load(prefix:, partitions:, partitioner:, concurrency:, worker: @worker)
|
134
143
|
job = parallelize(partitions: partitions, partitioner: proc { |key, _| key }, worker: worker) do |collector|
|
135
144
|
(0...partitions).each do |partition|
|
136
145
|
collector.call(partition)
|
@@ -138,20 +147,19 @@ module Kraps
|
|
138
147
|
end
|
139
148
|
|
140
149
|
job.map_partitions(partitioner: partitioner, worker: worker) do |partition, _, collector|
|
141
|
-
|
150
|
+
temp_paths = Downloader.download_all(prefix: File.join(prefix, partition.to_s, "/"), concurrency: concurrency)
|
142
151
|
|
143
|
-
|
144
|
-
|
152
|
+
temp_paths.each do |temp_path|
|
153
|
+
File.open(temp_path.path) do |stream|
|
154
|
+
stream.each_line do |line|
|
155
|
+
key, value = JSON.parse(line)
|
145
156
|
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
key, value = JSON.parse(line)
|
150
|
-
|
151
|
-
collector.call(key, value)
|
157
|
+
collector.call(key, value)
|
158
|
+
end
|
159
|
+
end
|
152
160
|
end
|
153
161
|
ensure
|
154
|
-
|
162
|
+
temp_paths&.delete
|
155
163
|
end
|
156
164
|
end
|
157
165
|
|
@@ -0,0 +1,151 @@
|
|
1
|
+
module Kraps
|
2
|
+
class RedisQueue
|
3
|
+
VISIBILITY_TIMEOUT = 60
|
4
|
+
|
5
|
+
attr_reader :token
|
6
|
+
|
7
|
+
def initialize(redis:, token:, namespace:, ttl:)
|
8
|
+
@redis = redis
|
9
|
+
@token = token
|
10
|
+
@namespace = namespace
|
11
|
+
@ttl = ttl
|
12
|
+
end
|
13
|
+
|
14
|
+
def size
|
15
|
+
@size_script ||= <<~SCRIPT
|
16
|
+
local queue_key, pending_key, status_key, ttl, job = ARGV[1], ARGV[2], ARGV[3], tonumber(ARGV[4]), ARGV[5]
|
17
|
+
|
18
|
+
redis.call('expire', queue_key, ttl)
|
19
|
+
redis.call('expire', pending_key, ttl)
|
20
|
+
redis.call('expire', status_key, ttl)
|
21
|
+
|
22
|
+
return redis.call('llen', queue_key) + redis.call('zcard', pending_key)
|
23
|
+
SCRIPT
|
24
|
+
|
25
|
+
@redis.eval(@size_script, argv: [redis_queue_key, redis_pending_key, redis_status_key, @ttl])
|
26
|
+
end
|
27
|
+
|
28
|
+
def enqueue(payload)
|
29
|
+
@enqueue_script ||= <<~SCRIPT
|
30
|
+
local queue_key, pending_key, status_key, ttl, job = ARGV[1], ARGV[2], ARGV[3], tonumber(ARGV[4]), ARGV[5]
|
31
|
+
|
32
|
+
redis.call('rpush', queue_key, job)
|
33
|
+
|
34
|
+
redis.call('expire', queue_key, ttl)
|
35
|
+
redis.call('expire', pending_key, ttl)
|
36
|
+
redis.call('expire', status_key, ttl)
|
37
|
+
SCRIPT
|
38
|
+
|
39
|
+
@redis.eval(@enqueue_script, argv: [redis_queue_key, redis_pending_key, redis_status_key, @ttl, JSON.generate(payload)])
|
40
|
+
end
|
41
|
+
|
42
|
+
def dequeue
|
43
|
+
@dequeue_script ||= <<~SCRIPT
|
44
|
+
local queue_key, pending_key, status_key, ttl, visibility_timeout = ARGV[1], ARGV[2], ARGV[3], tonumber(ARGV[4]), tonumber(ARGV[5])
|
45
|
+
|
46
|
+
local zitem = redis.call('zrange', pending_key, 0, 0, 'WITHSCORES')
|
47
|
+
local job = zitem[1]
|
48
|
+
|
49
|
+
if not zitem[2] or tonumber(zitem[2]) > tonumber(redis.call('time')[1]) then
|
50
|
+
job = redis.call('lpop', queue_key)
|
51
|
+
end
|
52
|
+
|
53
|
+
redis.call('expire', queue_key, ttl)
|
54
|
+
redis.call('expire', pending_key, ttl)
|
55
|
+
redis.call('expire', status_key, ttl)
|
56
|
+
|
57
|
+
if not job then return nil end
|
58
|
+
|
59
|
+
redis.call('zadd', pending_key, tonumber(redis.call('time')[1]) + visibility_timeout, job)
|
60
|
+
redis.call('expire', pending_key, ttl)
|
61
|
+
|
62
|
+
return job
|
63
|
+
SCRIPT
|
64
|
+
|
65
|
+
job = @redis.eval(@dequeue_script, argv: [redis_queue_key, redis_pending_key, redis_status_key, @ttl, VISIBILITY_TIMEOUT])
|
66
|
+
|
67
|
+
unless job
|
68
|
+
yield(nil)
|
69
|
+
return
|
70
|
+
end
|
71
|
+
|
72
|
+
keep_alive(job) do
|
73
|
+
yield(JSON.parse(job)) if job
|
74
|
+
end
|
75
|
+
|
76
|
+
@remove_script ||= <<~SCRIPT
|
77
|
+
local queue_key, pending_key, status_key, ttl, job = ARGV[1], ARGV[2], ARGV[3], tonumber(ARGV[4]), ARGV[5]
|
78
|
+
|
79
|
+
redis.call('zrem', pending_key, job)
|
80
|
+
|
81
|
+
redis.call('expire', queue_key, ttl)
|
82
|
+
redis.call('expire', pending_key, ttl)
|
83
|
+
redis.call('expire', status_key, ttl)
|
84
|
+
SCRIPT
|
85
|
+
|
86
|
+
@redis.eval(@remove_script, argv: [redis_queue_key, redis_pending_key, redis_status_key, @ttl, job])
|
87
|
+
end
|
88
|
+
|
89
|
+
def stop
|
90
|
+
@stop_script ||= <<~SCRIPT
|
91
|
+
local queue_key, pending_key, status_key, ttl = ARGV[1], ARGV[2], ARGV[3], tonumber(ARGV[4])
|
92
|
+
|
93
|
+
redis.call('hset', status_key, 'stopped', 1)
|
94
|
+
|
95
|
+
redis.call('expire', queue_key, ttl)
|
96
|
+
redis.call('expire', pending_key, ttl)
|
97
|
+
redis.call('expire', status_key, ttl)
|
98
|
+
SCRIPT
|
99
|
+
|
100
|
+
@redis.eval(@stop_script, argv: [redis_queue_key, redis_pending_key, redis_status_key, @ttl])
|
101
|
+
end
|
102
|
+
|
103
|
+
def stopped?
|
104
|
+
@stopped_script ||= <<~SCRIPT
|
105
|
+
local queue_key, pending_key, status_key, ttl = ARGV[1], ARGV[2], ARGV[3], tonumber(ARGV[4])
|
106
|
+
|
107
|
+
redis.call('expire', queue_key, ttl)
|
108
|
+
redis.call('expire', pending_key, ttl)
|
109
|
+
redis.call('expire', status_key, ttl)
|
110
|
+
|
111
|
+
return redis.call('hget', status_key, 'stopped')
|
112
|
+
SCRIPT
|
113
|
+
|
114
|
+
@redis.eval(@stopped_script, argv: [redis_queue_key, redis_pending_key, redis_status_key, @ttl]).to_i == 1
|
115
|
+
end
|
116
|
+
|
117
|
+
private
|
118
|
+
|
119
|
+
def keep_alive(job)
|
120
|
+
@keep_alive_script ||= <<~SCRIPT
|
121
|
+
local queue_key, pending_key, status_key, ttl, job, visibility_timeout = ARGV[1], ARGV[2], ARGV[3], tonumber(ARGV[4]), ARGV[5], tonumber(ARGV[6])
|
122
|
+
|
123
|
+
redis.call('zadd', pending_key, tonumber(redis.call('time')[1]) + visibility_timeout, job)
|
124
|
+
|
125
|
+
redis.call('expire', queue_key, ttl)
|
126
|
+
redis.call('expire', pending_key, ttl)
|
127
|
+
redis.call('expire', status_key, ttl)
|
128
|
+
SCRIPT
|
129
|
+
|
130
|
+
interval = Interval.new(5) do
|
131
|
+
@redis.eval(@keep_alive_script, argv: [redis_queue_key, redis_pending_key, redis_status_key, @ttl, job, VISIBILITY_TIMEOUT])
|
132
|
+
end
|
133
|
+
|
134
|
+
yield
|
135
|
+
ensure
|
136
|
+
interval&.stop
|
137
|
+
end
|
138
|
+
|
139
|
+
def redis_queue_key
|
140
|
+
[@namespace, "kraps", "queue", @token].compact.join(":")
|
141
|
+
end
|
142
|
+
|
143
|
+
def redis_pending_key
|
144
|
+
[@namespace, "kraps", "pending", @token].compact.join(":")
|
145
|
+
end
|
146
|
+
|
147
|
+
def redis_status_key
|
148
|
+
[@namespace, "kraps", "status", @token].compact.join(":")
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|
data/lib/kraps/runner.rb
CHANGED
@@ -45,48 +45,35 @@ module Kraps
|
|
45
45
|
|
46
46
|
def perform_parallelize
|
47
47
|
enum = Enumerator.new do |yielder|
|
48
|
-
collector = proc { |item| yielder << item }
|
48
|
+
collector = proc { |item| yielder << { item: item } }
|
49
49
|
|
50
50
|
@step.block.call(collector)
|
51
51
|
end
|
52
52
|
|
53
|
-
|
54
|
-
push_and_wait(distributed_job, enum) do |item, part|
|
55
|
-
enqueue(token: distributed_job.token, part: part, item: item)
|
56
|
-
end
|
53
|
+
token = push_and_wait(enum: enum)
|
57
54
|
|
58
|
-
|
59
|
-
end
|
55
|
+
Frame.new(token: token, partitions: @step.partitions)
|
60
56
|
end
|
61
57
|
|
62
58
|
def perform_map
|
63
|
-
|
64
|
-
|
65
|
-
enqueue(token: distributed_job.token, part: part, partition: partition)
|
66
|
-
end
|
59
|
+
enum = (0...@frame.partitions).map { |partition| { partition: partition } }
|
60
|
+
token = push_and_wait(job_count: @step.jobs, enum: enum)
|
67
61
|
|
68
|
-
|
69
|
-
end
|
62
|
+
Frame.new(token: token, partitions: @step.partitions)
|
70
63
|
end
|
71
64
|
|
72
65
|
def perform_map_partitions
|
73
|
-
|
74
|
-
|
75
|
-
enqueue(token: distributed_job.token, part: part, partition: partition)
|
76
|
-
end
|
66
|
+
enum = (0...@frame.partitions).map { |partition| { partition: partition } }
|
67
|
+
token = push_and_wait(job_count: @step.jobs, enum: enum)
|
77
68
|
|
78
|
-
|
79
|
-
end
|
69
|
+
Frame.new(token: token, partitions: @step.partitions)
|
80
70
|
end
|
81
71
|
|
82
72
|
def perform_reduce
|
83
|
-
|
84
|
-
|
85
|
-
enqueue(token: distributed_job.token, part: part, partition: partition)
|
86
|
-
end
|
73
|
+
enum = (0...@frame.partitions).map { |partition| { partition: partition } }
|
74
|
+
token = push_and_wait(job_count: @step.jobs, enum: enum)
|
87
75
|
|
88
|
-
|
89
|
-
end
|
76
|
+
Frame.new(token: token, partitions: @step.partitions)
|
90
77
|
end
|
91
78
|
|
92
79
|
def perform_combine
|
@@ -95,82 +82,64 @@ module Kraps
|
|
95
82
|
|
96
83
|
raise(IncompatibleFrame, "Incompatible number of partitions") if combine_step.partitions != @step.partitions
|
97
84
|
|
98
|
-
|
99
|
-
|
100
|
-
enqueue(token: distributed_job.token, part: part, partition: partition, combine_frame: combine_step.frame.to_h)
|
101
|
-
end
|
102
|
-
|
103
|
-
Frame.new(token: distributed_job.token, partitions: @step.partitions)
|
85
|
+
enum = (0...@frame.partitions).map do |partition|
|
86
|
+
{ partition: partition, combine_frame: combine_step.frame.to_h }
|
104
87
|
end
|
88
|
+
|
89
|
+
token = push_and_wait(job_count: @step.jobs, enum: enum)
|
90
|
+
|
91
|
+
Frame.new(token: token, partitions: @step.partitions)
|
105
92
|
end
|
106
93
|
|
107
94
|
def perform_each_partition
|
108
|
-
|
109
|
-
|
110
|
-
enqueue(token: distributed_job.token, part: part, partition: partition)
|
111
|
-
end
|
95
|
+
enum = (0...@frame.partitions).map { |partition| { partition: partition } }
|
96
|
+
push_and_wait(job_count: @step.jobs, enum: enum)
|
112
97
|
|
113
|
-
|
114
|
-
end
|
98
|
+
@frame
|
115
99
|
end
|
116
100
|
|
117
|
-
def
|
118
|
-
Kraps.
|
119
|
-
|
120
|
-
JSON.generate(
|
121
|
-
job_index: @job_index,
|
122
|
-
step_index: @step_index,
|
123
|
-
frame: @frame.to_h,
|
124
|
-
token: token,
|
125
|
-
part: part,
|
126
|
-
klass: @klass,
|
127
|
-
args: @args,
|
128
|
-
kwargs: @kwargs,
|
129
|
-
**rest
|
130
|
-
)
|
131
|
-
)
|
132
|
-
end
|
101
|
+
def push_and_wait(enum:, job_count: nil)
|
102
|
+
redis_queue = RedisQueue.new(redis: Kraps.redis, token: SecureRandom.hex, namespace: Kraps.namespace, ttl: Kraps.job_ttl)
|
103
|
+
progress_bar = build_progress_bar("#{@klass}: job #{@job_index + 1}/#{@jobs.size}, step #{@step_index + 1}/#{@job.steps.size}, #{@step.jobs || "?"} jobs, token #{redis_queue.token}, %a, %c/%C (%p%) => #{@step.action}")
|
133
104
|
|
134
|
-
|
135
|
-
distributed_job = Kraps.distributed_job_client.build(token: SecureRandom.hex)
|
105
|
+
total = 0
|
136
106
|
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
end
|
107
|
+
interval = Interval.new(1) do
|
108
|
+
# The interval is used to continously update the progress bar even
|
109
|
+
# when push_all is used and to avoid sessions being terminated due
|
110
|
+
# to inactivity etc
|
142
111
|
|
143
|
-
|
144
|
-
|
112
|
+
progress_bar.total = total
|
113
|
+
progress_bar.progress = [progress_bar.total - redis_queue.size, 0].max
|
114
|
+
end
|
145
115
|
|
146
|
-
|
147
|
-
total
|
116
|
+
enum.each_with_index do |item, part|
|
117
|
+
total += 1
|
148
118
|
|
149
|
-
|
150
|
-
|
151
|
-
end
|
119
|
+
redis_queue.enqueue(item.merge(part: part))
|
120
|
+
end
|
152
121
|
|
153
|
-
|
154
|
-
|
155
|
-
interval.fire(timeout: 1)
|
122
|
+
(job_count || total).times do
|
123
|
+
break if redis_queue.stopped?
|
156
124
|
|
157
|
-
|
158
|
-
end
|
159
|
-
ensure
|
160
|
-
interval&.stop
|
125
|
+
Kraps.enqueuer.call(@step.worker, JSON.generate(job_index: @job_index, step_index: @step_index, frame: @frame.to_h, token: redis_queue.token, klass: @klass, args: @args, kwargs: @kwargs))
|
161
126
|
end
|
162
127
|
|
163
128
|
loop do
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
break if distributed_job.finished? || distributed_job.stopped?
|
129
|
+
break if redis_queue.size.zero?
|
130
|
+
break if redis_queue.stopped?
|
168
131
|
|
169
132
|
sleep(1)
|
170
133
|
end
|
171
134
|
|
172
|
-
raise(JobStopped, "The job was stopped") if
|
135
|
+
raise(JobStopped, "The job was stopped") if redis_queue.stopped?
|
136
|
+
|
137
|
+
interval.fire(timeout: 1)
|
138
|
+
|
139
|
+
redis_queue.token
|
173
140
|
ensure
|
141
|
+
redis_queue&.stop
|
142
|
+
interval&.stop
|
174
143
|
progress_bar&.stop
|
175
144
|
end
|
176
145
|
|
data/lib/kraps/step.rb
CHANGED
@@ -1,3 +1,3 @@
|
|
1
1
|
module Kraps
|
2
|
-
Step = Struct.new(:action, :partitioner, :partitions, :block, :worker, :before, :frame, :dependency, :options, keyword_init: true)
|
2
|
+
Step = Struct.new(:action, :partitioner, :partitions, :jobs, :block, :worker, :before, :frame, :dependency, :options, keyword_init: true)
|
3
3
|
end
|
data/lib/kraps/version.rb
CHANGED
data/lib/kraps/worker.rb
CHANGED
@@ -11,22 +11,22 @@ module Kraps
|
|
11
11
|
end
|
12
12
|
|
13
13
|
def call(retries: 3)
|
14
|
-
return if
|
14
|
+
return if redis_queue.stopped?
|
15
15
|
|
16
16
|
raise(InvalidAction, "Invalid action #{step.action}") unless Actions::ALL.include?(step.action)
|
17
17
|
|
18
|
-
|
19
|
-
|
18
|
+
dequeue do |payload|
|
19
|
+
with_retries(retries) do # TODO: allow to use queue based retries
|
20
|
+
step.before&.call
|
20
21
|
|
21
|
-
|
22
|
-
|
23
|
-
distributed_job.done(@args["part"])
|
22
|
+
send(:"perform_#{step.action}", payload)
|
23
|
+
end
|
24
24
|
end
|
25
25
|
end
|
26
26
|
|
27
27
|
private
|
28
28
|
|
29
|
-
def perform_parallelize
|
29
|
+
def perform_parallelize(payload)
|
30
30
|
implementation = Class.new do
|
31
31
|
def map(key)
|
32
32
|
yield(key, nil)
|
@@ -34,19 +34,19 @@ module Kraps
|
|
34
34
|
end
|
35
35
|
|
36
36
|
mapper = MapReduce::Mapper.new(implementation.new, partitioner: partitioner, memory_limit: @memory_limit)
|
37
|
-
mapper.map(
|
37
|
+
mapper.map(payload["item"])
|
38
38
|
|
39
39
|
mapper.shuffle(chunk_limit: @chunk_limit) do |partitions|
|
40
40
|
Parallelizer.each(partitions.to_a, @concurrency) do |partition, path|
|
41
41
|
File.open(path) do |stream|
|
42
|
-
Kraps.driver.store(Kraps.driver.with_prefix("#{@args["token"]}/#{partition}/chunk.#{
|
42
|
+
Kraps.driver.store(Kraps.driver.with_prefix("#{@args["token"]}/#{partition}/chunk.#{payload["part"]}.json"), stream)
|
43
43
|
end
|
44
44
|
end
|
45
45
|
end
|
46
46
|
end
|
47
47
|
|
48
|
-
def perform_map
|
49
|
-
temp_paths = download_all(token: @args["frame"]["token"], partition:
|
48
|
+
def perform_map(payload)
|
49
|
+
temp_paths = download_all(token: @args["frame"]["token"], partition: payload["partition"])
|
50
50
|
|
51
51
|
current_step = step
|
52
52
|
|
@@ -78,7 +78,7 @@ module Kraps
|
|
78
78
|
mapper.shuffle(chunk_limit: @chunk_limit) do |partitions|
|
79
79
|
Parallelizer.each(partitions.to_a, @concurrency) do |partition, path|
|
80
80
|
File.open(path) do |stream|
|
81
|
-
Kraps.driver.store(Kraps.driver.with_prefix("#{@args["token"]}/#{partition}/chunk.#{
|
81
|
+
Kraps.driver.store(Kraps.driver.with_prefix("#{@args["token"]}/#{partition}/chunk.#{payload["partition"]}.json"), stream)
|
82
82
|
end
|
83
83
|
end
|
84
84
|
end
|
@@ -86,11 +86,11 @@ module Kraps
|
|
86
86
|
temp_paths&.delete
|
87
87
|
end
|
88
88
|
|
89
|
-
def perform_map_partitions
|
90
|
-
temp_paths = download_all(token: @args["frame"]["token"], partition:
|
89
|
+
def perform_map_partitions(payload)
|
90
|
+
temp_paths = download_all(token: @args["frame"]["token"], partition: payload["partition"])
|
91
91
|
|
92
92
|
current_step = step
|
93
|
-
current_partition =
|
93
|
+
current_partition = payload["partition"]
|
94
94
|
|
95
95
|
implementation = Object.new
|
96
96
|
implementation.define_singleton_method(:map) do |enum, &block|
|
@@ -111,7 +111,7 @@ module Kraps
|
|
111
111
|
mapper.shuffle(chunk_limit: @chunk_limit) do |partitions|
|
112
112
|
Parallelizer.each(partitions.to_a, @concurrency) do |partition, path|
|
113
113
|
File.open(path) do |stream|
|
114
|
-
Kraps.driver.store(Kraps.driver.with_prefix("#{@args["token"]}/#{partition}/chunk.#{
|
114
|
+
Kraps.driver.store(Kraps.driver.with_prefix("#{@args["token"]}/#{partition}/chunk.#{payload["partition"]}.json"), stream)
|
115
115
|
end
|
116
116
|
end
|
117
117
|
end
|
@@ -119,7 +119,7 @@ module Kraps
|
|
119
119
|
temp_paths&.delete
|
120
120
|
end
|
121
121
|
|
122
|
-
def perform_reduce
|
122
|
+
def perform_reduce(payload)
|
123
123
|
current_step = step
|
124
124
|
|
125
125
|
implementation = Object.new
|
@@ -129,7 +129,7 @@ module Kraps
|
|
129
129
|
|
130
130
|
reducer = MapReduce::Reducer.new(implementation)
|
131
131
|
|
132
|
-
Parallelizer.each(Kraps.driver.list(prefix: Kraps.driver.with_prefix("#{@args["frame"]["token"]}/#{
|
132
|
+
Parallelizer.each(Kraps.driver.list(prefix: Kraps.driver.with_prefix("#{@args["frame"]["token"]}/#{payload["partition"]}/")), @concurrency) do |file|
|
133
133
|
Kraps.driver.download(file, reducer.add_chunk)
|
134
134
|
end
|
135
135
|
|
@@ -139,14 +139,14 @@ module Kraps
|
|
139
139
|
tempfile.puts(JSON.generate([key, value]))
|
140
140
|
end
|
141
141
|
|
142
|
-
Kraps.driver.store(Kraps.driver.with_prefix("#{@args["token"]}/#{
|
142
|
+
Kraps.driver.store(Kraps.driver.with_prefix("#{@args["token"]}/#{payload["partition"]}/chunk.#{payload["partition"]}.json"), tempfile.tap(&:rewind))
|
143
143
|
ensure
|
144
144
|
tempfile&.close(true)
|
145
145
|
end
|
146
146
|
|
147
|
-
def perform_combine
|
148
|
-
temp_paths1 = download_all(token: @args["frame"]["token"], partition:
|
149
|
-
temp_paths2 = download_all(token:
|
147
|
+
def perform_combine(payload)
|
148
|
+
temp_paths1 = download_all(token: @args["frame"]["token"], partition: payload["partition"])
|
149
|
+
temp_paths2 = download_all(token: payload["combine_frame"]["token"], partition: payload["partition"])
|
150
150
|
|
151
151
|
enum1 = k_way_merge(temp_paths1.each.to_a, chunk_limit: @chunk_limit)
|
152
152
|
enum2 = k_way_merge(temp_paths2.each.to_a, chunk_limit: @chunk_limit)
|
@@ -157,7 +157,7 @@ module Kraps
|
|
157
157
|
implementation = Object.new
|
158
158
|
implementation.define_singleton_method(:map) do |&block|
|
159
159
|
combine_method.call(enum1, enum2) do |key, value1, value2|
|
160
|
-
|
160
|
+
current_step.block.call(key, value1, value2, block)
|
161
161
|
end
|
162
162
|
end
|
163
163
|
|
@@ -167,7 +167,7 @@ module Kraps
|
|
167
167
|
mapper.shuffle(chunk_limit: @chunk_limit) do |partitions|
|
168
168
|
Parallelizer.each(partitions.to_a, @concurrency) do |partition, path|
|
169
169
|
File.open(path) do |stream|
|
170
|
-
Kraps.driver.store(Kraps.driver.with_prefix("#{@args["token"]}/#{partition}/chunk.#{
|
170
|
+
Kraps.driver.store(Kraps.driver.with_prefix("#{@args["token"]}/#{partition}/chunk.#{payload["partition"]}.json"), stream)
|
171
171
|
end
|
172
172
|
end
|
173
173
|
end
|
@@ -213,10 +213,10 @@ module Kraps
|
|
213
213
|
end
|
214
214
|
end
|
215
215
|
|
216
|
-
def perform_each_partition
|
216
|
+
def perform_each_partition(payload)
|
217
217
|
temp_paths = TempPaths.new
|
218
218
|
|
219
|
-
files = Kraps.driver.list(prefix: Kraps.driver.with_prefix("#{@args["frame"]["token"]}/#{
|
219
|
+
files = Kraps.driver.list(prefix: Kraps.driver.with_prefix("#{@args["frame"]["token"]}/#{payload["partition"]}/")).sort
|
220
220
|
|
221
221
|
temp_paths_index = files.each_with_object({}) do |file, hash|
|
222
222
|
hash[file] = temp_paths.add
|
@@ -226,7 +226,7 @@ module Kraps
|
|
226
226
|
Kraps.driver.download(file, temp_paths_index[file].path)
|
227
227
|
end
|
228
228
|
|
229
|
-
step.block.call(
|
229
|
+
step.block.call(payload["partition"], k_way_merge(temp_paths.each.to_a, chunk_limit: @chunk_limit))
|
230
230
|
ensure
|
231
231
|
temp_paths&.delete
|
232
232
|
end
|
@@ -237,11 +237,11 @@ module Kraps
|
|
237
237
|
begin
|
238
238
|
yield
|
239
239
|
rescue Kraps::Error
|
240
|
-
|
240
|
+
redis_queue.stop
|
241
241
|
raise
|
242
242
|
rescue StandardError => e
|
243
243
|
if retries >= num_retries
|
244
|
-
|
244
|
+
redis_queue.stop
|
245
245
|
raise
|
246
246
|
end
|
247
247
|
|
@@ -254,20 +254,23 @@ module Kraps
|
|
254
254
|
end
|
255
255
|
end
|
256
256
|
|
257
|
-
def
|
258
|
-
|
259
|
-
|
260
|
-
|
257
|
+
def dequeue
|
258
|
+
loop do
|
259
|
+
break if redis_queue.stopped?
|
260
|
+
break if redis_queue.size.zero?
|
261
261
|
|
262
|
-
|
263
|
-
|
262
|
+
redis_queue.dequeue do |payload|
|
263
|
+
payload ? yield(payload) : sleep(1)
|
264
|
+
end
|
264
265
|
end
|
266
|
+
end
|
265
267
|
|
266
|
-
|
267
|
-
|
268
|
-
|
268
|
+
def redis_queue
|
269
|
+
@redis_queue ||= RedisQueue.new(redis: Kraps.redis, token: @args["token"], namespace: Kraps.namespace, ttl: Kraps.job_ttl)
|
270
|
+
end
|
269
271
|
|
270
|
-
|
272
|
+
def download_all(token:, partition:)
|
273
|
+
Downloader.download_all(prefix: Kraps.driver.with_prefix("#{token}/#{partition}/"), concurrency: @concurrency)
|
271
274
|
end
|
272
275
|
|
273
276
|
def jobs
|
@@ -301,9 +304,5 @@ module Kraps
|
|
301
304
|
def partitioner
|
302
305
|
@partitioner ||= proc { |key| step.partitioner.call(key, step.partitions) }
|
303
306
|
end
|
304
|
-
|
305
|
-
def distributed_job
|
306
|
-
@distributed_job ||= Kraps.distributed_job_client.build(token: @args["token"])
|
307
|
-
end
|
308
307
|
end
|
309
308
|
end
|
data/lib/kraps.rb
CHANGED
@@ -1,4 +1,3 @@
|
|
1
|
-
require "distributed_job"
|
2
1
|
require "ruby-progressbar"
|
3
2
|
require "ruby-progressbar/outputs/null"
|
4
3
|
require "map_reduce"
|
@@ -9,6 +8,7 @@ require_relative "kraps/drivers"
|
|
9
8
|
require_relative "kraps/actions"
|
10
9
|
require_relative "kraps/parallelizer"
|
11
10
|
require_relative "kraps/hash_partitioner"
|
11
|
+
require_relative "kraps/redis_queue"
|
12
12
|
require_relative "kraps/temp_path"
|
13
13
|
require_relative "kraps/temp_paths"
|
14
14
|
require_relative "kraps/timeout_queue"
|
@@ -19,6 +19,7 @@ require_relative "kraps/runner"
|
|
19
19
|
require_relative "kraps/step"
|
20
20
|
require_relative "kraps/frame"
|
21
21
|
require_relative "kraps/worker"
|
22
|
+
require_relative "kraps/downloader"
|
22
23
|
|
23
24
|
module Kraps
|
24
25
|
class Error < StandardError; end
|
@@ -27,9 +28,11 @@ module Kraps
|
|
27
28
|
class JobStopped < Error; end
|
28
29
|
class IncompatibleFrame < Error; end
|
29
30
|
|
30
|
-
def self.configure(driver:, redis: Redis.new, namespace: nil, job_ttl: 24 * 60 * 60, show_progress: true, enqueuer: ->(worker, json) { worker.perform_async(json) })
|
31
|
+
def self.configure(driver:, redis: Redis.new, namespace: nil, job_ttl: 4 * 24 * 60 * 60, show_progress: true, enqueuer: ->(worker, json) { worker.perform_async(json) })
|
31
32
|
@driver = driver
|
32
|
-
@
|
33
|
+
@redis = redis
|
34
|
+
@namespace = namespace
|
35
|
+
@job_ttl = job_ttl.to_i
|
33
36
|
@show_progress = show_progress
|
34
37
|
@enqueuer = enqueuer
|
35
38
|
end
|
@@ -38,8 +41,16 @@ module Kraps
|
|
38
41
|
@driver
|
39
42
|
end
|
40
43
|
|
41
|
-
def self.
|
42
|
-
@
|
44
|
+
def self.redis
|
45
|
+
@redis
|
46
|
+
end
|
47
|
+
|
48
|
+
def self.namespace
|
49
|
+
@namespace
|
50
|
+
end
|
51
|
+
|
52
|
+
def self.job_ttl
|
53
|
+
@job_ttl
|
43
54
|
end
|
44
55
|
|
45
56
|
def self.show_progress?
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: kraps
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.8.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Benjamin Vetter
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-02-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: attachie
|
@@ -24,20 +24,6 @@ dependencies:
|
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '0'
|
27
|
-
- !ruby/object:Gem::Dependency
|
28
|
-
name: distributed_job
|
29
|
-
requirement: !ruby/object:Gem::Requirement
|
30
|
-
requirements:
|
31
|
-
- - ">="
|
32
|
-
- !ruby/object:Gem::Version
|
33
|
-
version: '0'
|
34
|
-
type: :runtime
|
35
|
-
prerelease: false
|
36
|
-
version_requirements: !ruby/object:Gem::Requirement
|
37
|
-
requirements:
|
38
|
-
- - ">="
|
39
|
-
- !ruby/object:Gem::Version
|
40
|
-
version: '0'
|
41
27
|
- !ruby/object:Gem::Dependency
|
42
28
|
name: map-reduce-ruby
|
43
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -142,6 +128,7 @@ files:
|
|
142
128
|
- docker-compose.yml
|
143
129
|
- lib/kraps.rb
|
144
130
|
- lib/kraps/actions.rb
|
131
|
+
- lib/kraps/downloader.rb
|
145
132
|
- lib/kraps/drivers.rb
|
146
133
|
- lib/kraps/frame.rb
|
147
134
|
- lib/kraps/hash_partitioner.rb
|
@@ -149,6 +136,7 @@ files:
|
|
149
136
|
- lib/kraps/job.rb
|
150
137
|
- lib/kraps/job_resolver.rb
|
151
138
|
- lib/kraps/parallelizer.rb
|
139
|
+
- lib/kraps/redis_queue.rb
|
152
140
|
- lib/kraps/runner.rb
|
153
141
|
- lib/kraps/step.rb
|
154
142
|
- lib/kraps/temp_path.rb
|