kraps 0.6.0 → 0.8.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +3 -0
- data/CHANGELOG.md +14 -0
- data/Gemfile.lock +2 -5
- data/README.md +23 -13
- data/docker-compose.yml +1 -1
- data/lib/kraps/downloader.rb +19 -0
- data/lib/kraps/job.rb +26 -18
- data/lib/kraps/redis_queue.rb +151 -0
- data/lib/kraps/runner.rb +48 -79
- data/lib/kraps/step.rb +1 -1
- data/lib/kraps/version.rb +1 -1
- data/lib/kraps/worker.rb +42 -43
- data/lib/kraps.rb +16 -5
- metadata +4 -16
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d261c779e82209152e26decbc6c5a6c5c5ddb0fb40803884383617635727d3b2
|
4
|
+
data.tar.gz: 1b9c6fa8db7a7811cbac5a7a5db518e1f3ee75df583521b64417341e830425f4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: dcb05139042149be087b1a2c7f14a31cd5e28dedb1517aca83299f63b90046e4d05e0ab19dfaeede329e784880623abda19675252cdeaad04f8ccd87249afde5
|
7
|
+
data.tar.gz: 10fd07c322c659ae21a682832eba30416c830f9d2146af685d69168ad5137045ef4268c0a43cee4e879bb875edf900ca740bbe4cbfe8b91b34ad3df40763bce0
|
data/.rubocop.yml
CHANGED
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,19 @@
|
|
1
1
|
# CHANGELOG
|
2
2
|
|
3
|
+
## v0.8.0
|
4
|
+
|
5
|
+
* Use number of partitions of previous step for `jobs` option by default
|
6
|
+
* Changed `combine` to receive a `collector`
|
7
|
+
* Added mandatory `concurrency` argument to `load`
|
8
|
+
|
9
|
+
## v0.7.0
|
10
|
+
|
11
|
+
* Added a `jobs` option to the actions to limit the concurrency
|
12
|
+
when e.g. accessing external data stores and to avoid overloading
|
13
|
+
them
|
14
|
+
* Added a queue using redis for the jobs to avoid starving workers
|
15
|
+
* Removed `distributed_job` dependency
|
16
|
+
|
3
17
|
## v0.6.0
|
4
18
|
|
5
19
|
* Added `map_partitions`
|
data/Gemfile.lock
CHANGED
@@ -1,9 +1,8 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
kraps (0.
|
4
|
+
kraps (0.7.0)
|
5
5
|
attachie
|
6
|
-
distributed_job
|
7
6
|
map-reduce-ruby (>= 3.0.0)
|
8
7
|
redis
|
9
8
|
ruby-progressbar
|
@@ -41,8 +40,6 @@ GEM
|
|
41
40
|
concurrent-ruby (1.1.10)
|
42
41
|
connection_pool (2.3.0)
|
43
42
|
diff-lcs (1.5.0)
|
44
|
-
distributed_job (3.1.0)
|
45
|
-
redis (>= 4.1.0)
|
46
43
|
i18n (1.12.0)
|
47
44
|
concurrent-ruby (~> 1.0)
|
48
45
|
jmespath (1.6.1)
|
@@ -62,7 +59,7 @@ GEM
|
|
62
59
|
rake (13.0.6)
|
63
60
|
redis (5.0.5)
|
64
61
|
redis-client (>= 0.9.0)
|
65
|
-
redis-client (0.11.
|
62
|
+
redis-client (0.11.2)
|
66
63
|
connection_pool
|
67
64
|
regexp_parser (2.6.0)
|
68
65
|
rexml (3.2.5)
|
data/README.md
CHANGED
@@ -30,7 +30,7 @@ Kraps.configure(
|
|
30
30
|
driver: Kraps::Drivers::S3Driver.new(s3_client: Aws::S3::Client.new("..."), bucket: "some-bucket", prefix: "temp/kraps/"),
|
31
31
|
redis: Redis.new,
|
32
32
|
namespace: "my-application", # An optional namespace to be used for redis keys, default: nil
|
33
|
-
job_ttl:
|
33
|
+
job_ttl: 7.days, # Job information in redis will automatically be removed after this amount of time, default: 4 days
|
34
34
|
show_progress: true # Whether or not to show the progress in the terminal when executing jobs, default: true
|
35
35
|
enqueuer: ->(worker, json) { worker.perform_async(json) } # Allows to customize the enqueueing of worker jobs
|
36
36
|
)
|
@@ -220,7 +220,7 @@ items are used as keys and the values are set to `nil`.
|
|
220
220
|
* `map`: Maps the key value pairs to other key value pairs
|
221
221
|
|
222
222
|
```ruby
|
223
|
-
job.map(partitions: 128, partitioner: partitioner, worker: MyKrapsWorker) do |key, value, collector|
|
223
|
+
job.map(partitions: 128, partitioner: partitioner, worker: MyKrapsWorker, jobs: 8) do |key, value, collector|
|
224
224
|
collector.call("changed #{key}", "changed #{value}")
|
225
225
|
end
|
226
226
|
```
|
@@ -229,13 +229,22 @@ The block gets each key-value pair passed and the `collector` block can be
|
|
229
229
|
called as often as neccessary. This is also the reason why `map` can not simply
|
230
230
|
return the new key-value pair, but the `collector` must be used instead.
|
231
231
|
|
232
|
+
The `jobs` argument can be useful when you need to access an external data
|
233
|
+
source, like a relational database and you want to limit the number of workers
|
234
|
+
accessing the store concurrently to avoid overloading it. If you don't specify
|
235
|
+
it, it will be identical to the number of partitions of the previous step. It
|
236
|
+
is recommended to only use it for steps where you need to throttle the
|
237
|
+
concurrency, because it will of course slow down the processing. The `jobs`
|
238
|
+
argument only applies to the current step. The following steps don't inherit
|
239
|
+
the argument, but reset it.
|
240
|
+
|
232
241
|
* `map_partitions`: Maps the key value pairs to other key value pairs, but the
|
233
242
|
block receives all data of each partition as an enumerable and sorted by key.
|
234
243
|
Please be aware that you should not call `to_a` or similar on the enumerable.
|
235
244
|
Prefer `map` over `map_partitions` when possible.
|
236
245
|
|
237
246
|
```ruby
|
238
|
-
job.map_partitions(partitions: 128, partitioner: partitioner, worker: MyKrapsWorker) do |pairs, collector|
|
247
|
+
job.map_partitions(partitions: 128, partitioner: partitioner, worker: MyKrapsWorker, jobs: 8) do |pairs, collector|
|
239
248
|
pairs.each do |key, value|
|
240
249
|
collector.call("changed #{key}", "changed #{value}")
|
241
250
|
end
|
@@ -245,7 +254,7 @@ end
|
|
245
254
|
* `reduce`: Reduces the values of pairs having the same key
|
246
255
|
|
247
256
|
```ruby
|
248
|
-
job.reduce(worker: MyKrapsWorker) do |key, value1, value2|
|
257
|
+
job.reduce(worker: MyKrapsWorker, jobs: 8) do |key, value1, value2|
|
249
258
|
value1 + value2
|
250
259
|
end
|
251
260
|
```
|
@@ -265,8 +274,8 @@ most of the time, this is not neccessary and the key can simply be ignored.
|
|
265
274
|
passed job result are completely omitted.
|
266
275
|
|
267
276
|
```ruby
|
268
|
-
job.combine(other_job, worker: MyKrapsWorker) do |key, value1, value2|
|
269
|
-
(value1 || {}).merge(value2 || {})
|
277
|
+
job.combine(other_job, worker: MyKrapsWorker, jobs: 8) do |key, value1, value2, collector|
|
278
|
+
collector.call(key, (value1 || {}).merge(value2 || {}))
|
270
279
|
end
|
271
280
|
```
|
272
281
|
|
@@ -279,7 +288,7 @@ since Kraps detects the dependency on its own.
|
|
279
288
|
* `repartition`: Used to change the partitioning
|
280
289
|
|
281
290
|
```ruby
|
282
|
-
job.repartition(partitions: 128, partitioner: partitioner, worker: MyKrapsWorker)
|
291
|
+
job.repartition(partitions: 128, partitioner: partitioner, worker: MyKrapsWorker, jobs: 8)
|
283
292
|
```
|
284
293
|
|
285
294
|
Repartitions all data into the specified number of partitions and using the
|
@@ -290,7 +299,7 @@ specified partitioner.
|
|
290
299
|
`to_a` or similar on the enumerable.
|
291
300
|
|
292
301
|
```ruby
|
293
|
-
job.each_partition do |partition, pairs|
|
302
|
+
job.each_partition(jobs: 8) do |partition, pairs|
|
294
303
|
pairs.each do |key, value|
|
295
304
|
# ...
|
296
305
|
end
|
@@ -308,10 +317,12 @@ It creates a folder for every partition and stores one or more chunks in there.
|
|
308
317
|
* `load`: Loads the previously dumped data
|
309
318
|
|
310
319
|
```ruby
|
311
|
-
job.load(prefix: "path/to/dump", partitions: 32, partitioner: Kraps::HashPartitioner.new, worker: MyKrapsWorker)
|
320
|
+
job.load(prefix: "path/to/dump", partitions: 32, concurrency: 8, partitioner: Kraps::HashPartitioner.new, worker: MyKrapsWorker)
|
312
321
|
```
|
313
322
|
|
314
|
-
The number of partitions
|
323
|
+
The number of partitions, the partitioner and concurrency must be specified.
|
324
|
+
The concurrency specifies the number of threads used for downloading chunks in
|
325
|
+
parallel.
|
315
326
|
|
316
327
|
Please note that every API method accepts a `before` callable:
|
317
328
|
|
@@ -379,7 +390,8 @@ jobs only once.
|
|
379
390
|
Kraps ships with an in-memory fake driver for storage, which you can use for
|
380
391
|
testing purposes instead of the s3 driver:
|
381
392
|
|
382
|
-
```ruby
|
393
|
+
```ruby
|
394
|
+
Kraps.configure(
|
383
395
|
driver: Kraps::Drivers::FakeDriver.new(bucket: "kraps"),
|
384
396
|
# ...
|
385
397
|
) ```
|
@@ -425,8 +437,6 @@ The API of the driver is:
|
|
425
437
|
Kraps is built on top of
|
426
438
|
[map-reduce-ruby](https://github.com/mrkamel/map-reduce-ruby) for the
|
427
439
|
map/reduce framework,
|
428
|
-
[distributed_job](https://github.com/mrkamel/distributed_job)
|
429
|
-
to keep track of the job/step status,
|
430
440
|
[attachie](https://github.com/mrkamel/attachie) to interact with the storage
|
431
441
|
layer (s3),
|
432
442
|
[ruby-progressbar](https://github.com/jfelchner/ruby-progressbar) to
|
data/docker-compose.yml
CHANGED
@@ -0,0 +1,19 @@
|
|
1
|
+
module Kraps
|
2
|
+
class Downloader
|
3
|
+
def self.download_all(prefix:, concurrency:)
|
4
|
+
temp_paths = TempPaths.new
|
5
|
+
|
6
|
+
files = Kraps.driver.list(prefix: prefix).sort
|
7
|
+
|
8
|
+
temp_paths_index = files.each_with_object({}) do |file, hash|
|
9
|
+
hash[file] = temp_paths.add
|
10
|
+
end
|
11
|
+
|
12
|
+
Parallelizer.each(files, concurrency) do |file|
|
13
|
+
Kraps.driver.download(file, temp_paths_index[file].path)
|
14
|
+
end
|
15
|
+
|
16
|
+
temp_paths
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
data/lib/kraps/job.rb
CHANGED
@@ -27,14 +27,17 @@ module Kraps
|
|
27
27
|
end
|
28
28
|
end
|
29
29
|
|
30
|
-
def map(partitions: nil, partitioner: nil, worker: @worker, before: nil, &block)
|
30
|
+
def map(partitions: nil, partitioner: nil, jobs: nil, worker: @worker, before: nil, &block)
|
31
31
|
fresh.tap do |job|
|
32
32
|
job.instance_eval do
|
33
|
+
jobs = [jobs, @partitions].compact.min
|
34
|
+
|
33
35
|
@partitions = partitions if partitions
|
34
36
|
@partitioner = partitioner if partitioner
|
35
37
|
|
36
38
|
@steps << Step.new(
|
37
39
|
action: Actions::MAP,
|
40
|
+
jobs: jobs,
|
38
41
|
partitions: @partitions,
|
39
42
|
partitioner: @partitioner,
|
40
43
|
worker: worker,
|
@@ -45,14 +48,17 @@ module Kraps
|
|
45
48
|
end
|
46
49
|
end
|
47
50
|
|
48
|
-
def map_partitions(partitions: nil, partitioner: nil, worker: @worker, before: nil, &block)
|
51
|
+
def map_partitions(partitions: nil, partitioner: nil, jobs: nil, worker: @worker, before: nil, &block)
|
49
52
|
fresh.tap do |job|
|
50
53
|
job.instance_eval do
|
54
|
+
jobs = [jobs, @partitions].compact.min
|
55
|
+
|
51
56
|
@partitions = partitions if partitions
|
52
57
|
@partitioner = partitioner if partitioner
|
53
58
|
|
54
59
|
@steps << Step.new(
|
55
60
|
action: Actions::MAP_PARTITIONS,
|
61
|
+
jobs: jobs,
|
56
62
|
partitions: @partitions,
|
57
63
|
partitioner: @partitioner,
|
58
64
|
worker: worker,
|
@@ -63,11 +69,12 @@ module Kraps
|
|
63
69
|
end
|
64
70
|
end
|
65
71
|
|
66
|
-
def reduce(worker: @worker, before: nil, &block)
|
72
|
+
def reduce(jobs: nil, worker: @worker, before: nil, &block)
|
67
73
|
fresh.tap do |job|
|
68
74
|
job.instance_eval do
|
69
75
|
@steps << Step.new(
|
70
76
|
action: Actions::REDUCE,
|
77
|
+
jobs: [jobs, @partitions].compact.min,
|
71
78
|
partitions: @partitions,
|
72
79
|
partitioner: @partitioner,
|
73
80
|
worker: worker,
|
@@ -78,11 +85,12 @@ module Kraps
|
|
78
85
|
end
|
79
86
|
end
|
80
87
|
|
81
|
-
def combine(other_job, worker: @worker, before: nil, &block)
|
88
|
+
def combine(other_job, jobs: nil, worker: @worker, before: nil, &block)
|
82
89
|
fresh.tap do |job|
|
83
90
|
job.instance_eval do
|
84
91
|
@steps << Step.new(
|
85
92
|
action: Actions::COMBINE,
|
93
|
+
jobs: [jobs, @partitions].compact.min,
|
86
94
|
partitions: @partitions,
|
87
95
|
partitioner: @partitioner,
|
88
96
|
worker: worker,
|
@@ -95,11 +103,12 @@ module Kraps
|
|
95
103
|
end
|
96
104
|
end
|
97
105
|
|
98
|
-
def each_partition(worker: @worker, before: nil, &block)
|
106
|
+
def each_partition(jobs: nil, worker: @worker, before: nil, &block)
|
99
107
|
fresh.tap do |job|
|
100
108
|
job.instance_eval do
|
101
109
|
@steps << Step.new(
|
102
110
|
action: Actions::EACH_PARTITION,
|
111
|
+
jobs: [jobs, @partitions].compact.min,
|
103
112
|
partitions: @partitions,
|
104
113
|
partitioner: @partitioner,
|
105
114
|
worker: worker,
|
@@ -110,8 +119,8 @@ module Kraps
|
|
110
119
|
end
|
111
120
|
end
|
112
121
|
|
113
|
-
def repartition(partitions:, partitioner: nil, worker: @worker, before: nil)
|
114
|
-
map(partitions: partitions, partitioner: partitioner, worker: worker, before: before) do |key, value, collector|
|
122
|
+
def repartition(partitions:, jobs: nil, partitioner: nil, worker: @worker, before: nil)
|
123
|
+
map(jobs: jobs, partitions: partitions, partitioner: partitioner, worker: worker, before: before) do |key, value, collector|
|
115
124
|
collector.call(key, value)
|
116
125
|
end
|
117
126
|
end
|
@@ -130,7 +139,7 @@ module Kraps
|
|
130
139
|
end
|
131
140
|
end
|
132
141
|
|
133
|
-
def load(prefix:, partitions:, partitioner:, worker: @worker)
|
142
|
+
def load(prefix:, partitions:, partitioner:, concurrency:, worker: @worker)
|
134
143
|
job = parallelize(partitions: partitions, partitioner: proc { |key, _| key }, worker: worker) do |collector|
|
135
144
|
(0...partitions).each do |partition|
|
136
145
|
collector.call(partition)
|
@@ -138,20 +147,19 @@ module Kraps
|
|
138
147
|
end
|
139
148
|
|
140
149
|
job.map_partitions(partitioner: partitioner, worker: worker) do |partition, _, collector|
|
141
|
-
|
150
|
+
temp_paths = Downloader.download_all(prefix: File.join(prefix, partition.to_s, "/"), concurrency: concurrency)
|
142
151
|
|
143
|
-
|
144
|
-
|
152
|
+
temp_paths.each do |temp_path|
|
153
|
+
File.open(temp_path.path) do |stream|
|
154
|
+
stream.each_line do |line|
|
155
|
+
key, value = JSON.parse(line)
|
145
156
|
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
key, value = JSON.parse(line)
|
150
|
-
|
151
|
-
collector.call(key, value)
|
157
|
+
collector.call(key, value)
|
158
|
+
end
|
159
|
+
end
|
152
160
|
end
|
153
161
|
ensure
|
154
|
-
|
162
|
+
temp_paths&.delete
|
155
163
|
end
|
156
164
|
end
|
157
165
|
|
@@ -0,0 +1,151 @@
|
|
1
|
+
module Kraps
|
2
|
+
class RedisQueue
|
3
|
+
VISIBILITY_TIMEOUT = 60
|
4
|
+
|
5
|
+
attr_reader :token
|
6
|
+
|
7
|
+
def initialize(redis:, token:, namespace:, ttl:)
|
8
|
+
@redis = redis
|
9
|
+
@token = token
|
10
|
+
@namespace = namespace
|
11
|
+
@ttl = ttl
|
12
|
+
end
|
13
|
+
|
14
|
+
def size
|
15
|
+
@size_script ||= <<~SCRIPT
|
16
|
+
local queue_key, pending_key, status_key, ttl, job = ARGV[1], ARGV[2], ARGV[3], tonumber(ARGV[4]), ARGV[5]
|
17
|
+
|
18
|
+
redis.call('expire', queue_key, ttl)
|
19
|
+
redis.call('expire', pending_key, ttl)
|
20
|
+
redis.call('expire', status_key, ttl)
|
21
|
+
|
22
|
+
return redis.call('llen', queue_key) + redis.call('zcard', pending_key)
|
23
|
+
SCRIPT
|
24
|
+
|
25
|
+
@redis.eval(@size_script, argv: [redis_queue_key, redis_pending_key, redis_status_key, @ttl])
|
26
|
+
end
|
27
|
+
|
28
|
+
def enqueue(payload)
|
29
|
+
@enqueue_script ||= <<~SCRIPT
|
30
|
+
local queue_key, pending_key, status_key, ttl, job = ARGV[1], ARGV[2], ARGV[3], tonumber(ARGV[4]), ARGV[5]
|
31
|
+
|
32
|
+
redis.call('rpush', queue_key, job)
|
33
|
+
|
34
|
+
redis.call('expire', queue_key, ttl)
|
35
|
+
redis.call('expire', pending_key, ttl)
|
36
|
+
redis.call('expire', status_key, ttl)
|
37
|
+
SCRIPT
|
38
|
+
|
39
|
+
@redis.eval(@enqueue_script, argv: [redis_queue_key, redis_pending_key, redis_status_key, @ttl, JSON.generate(payload)])
|
40
|
+
end
|
41
|
+
|
42
|
+
def dequeue
|
43
|
+
@dequeue_script ||= <<~SCRIPT
|
44
|
+
local queue_key, pending_key, status_key, ttl, visibility_timeout = ARGV[1], ARGV[2], ARGV[3], tonumber(ARGV[4]), tonumber(ARGV[5])
|
45
|
+
|
46
|
+
local zitem = redis.call('zrange', pending_key, 0, 0, 'WITHSCORES')
|
47
|
+
local job = zitem[1]
|
48
|
+
|
49
|
+
if not zitem[2] or tonumber(zitem[2]) > tonumber(redis.call('time')[1]) then
|
50
|
+
job = redis.call('lpop', queue_key)
|
51
|
+
end
|
52
|
+
|
53
|
+
redis.call('expire', queue_key, ttl)
|
54
|
+
redis.call('expire', pending_key, ttl)
|
55
|
+
redis.call('expire', status_key, ttl)
|
56
|
+
|
57
|
+
if not job then return nil end
|
58
|
+
|
59
|
+
redis.call('zadd', pending_key, tonumber(redis.call('time')[1]) + visibility_timeout, job)
|
60
|
+
redis.call('expire', pending_key, ttl)
|
61
|
+
|
62
|
+
return job
|
63
|
+
SCRIPT
|
64
|
+
|
65
|
+
job = @redis.eval(@dequeue_script, argv: [redis_queue_key, redis_pending_key, redis_status_key, @ttl, VISIBILITY_TIMEOUT])
|
66
|
+
|
67
|
+
unless job
|
68
|
+
yield(nil)
|
69
|
+
return
|
70
|
+
end
|
71
|
+
|
72
|
+
keep_alive(job) do
|
73
|
+
yield(JSON.parse(job)) if job
|
74
|
+
end
|
75
|
+
|
76
|
+
@remove_script ||= <<~SCRIPT
|
77
|
+
local queue_key, pending_key, status_key, ttl, job = ARGV[1], ARGV[2], ARGV[3], tonumber(ARGV[4]), ARGV[5]
|
78
|
+
|
79
|
+
redis.call('zrem', pending_key, job)
|
80
|
+
|
81
|
+
redis.call('expire', queue_key, ttl)
|
82
|
+
redis.call('expire', pending_key, ttl)
|
83
|
+
redis.call('expire', status_key, ttl)
|
84
|
+
SCRIPT
|
85
|
+
|
86
|
+
@redis.eval(@remove_script, argv: [redis_queue_key, redis_pending_key, redis_status_key, @ttl, job])
|
87
|
+
end
|
88
|
+
|
89
|
+
def stop
|
90
|
+
@stop_script ||= <<~SCRIPT
|
91
|
+
local queue_key, pending_key, status_key, ttl = ARGV[1], ARGV[2], ARGV[3], tonumber(ARGV[4])
|
92
|
+
|
93
|
+
redis.call('hset', status_key, 'stopped', 1)
|
94
|
+
|
95
|
+
redis.call('expire', queue_key, ttl)
|
96
|
+
redis.call('expire', pending_key, ttl)
|
97
|
+
redis.call('expire', status_key, ttl)
|
98
|
+
SCRIPT
|
99
|
+
|
100
|
+
@redis.eval(@stop_script, argv: [redis_queue_key, redis_pending_key, redis_status_key, @ttl])
|
101
|
+
end
|
102
|
+
|
103
|
+
def stopped?
|
104
|
+
@stopped_script ||= <<~SCRIPT
|
105
|
+
local queue_key, pending_key, status_key, ttl = ARGV[1], ARGV[2], ARGV[3], tonumber(ARGV[4])
|
106
|
+
|
107
|
+
redis.call('expire', queue_key, ttl)
|
108
|
+
redis.call('expire', pending_key, ttl)
|
109
|
+
redis.call('expire', status_key, ttl)
|
110
|
+
|
111
|
+
return redis.call('hget', status_key, 'stopped')
|
112
|
+
SCRIPT
|
113
|
+
|
114
|
+
@redis.eval(@stopped_script, argv: [redis_queue_key, redis_pending_key, redis_status_key, @ttl]).to_i == 1
|
115
|
+
end
|
116
|
+
|
117
|
+
private
|
118
|
+
|
119
|
+
def keep_alive(job)
|
120
|
+
@keep_alive_script ||= <<~SCRIPT
|
121
|
+
local queue_key, pending_key, status_key, ttl, job, visibility_timeout = ARGV[1], ARGV[2], ARGV[3], tonumber(ARGV[4]), ARGV[5], tonumber(ARGV[6])
|
122
|
+
|
123
|
+
redis.call('zadd', pending_key, tonumber(redis.call('time')[1]) + visibility_timeout, job)
|
124
|
+
|
125
|
+
redis.call('expire', queue_key, ttl)
|
126
|
+
redis.call('expire', pending_key, ttl)
|
127
|
+
redis.call('expire', status_key, ttl)
|
128
|
+
SCRIPT
|
129
|
+
|
130
|
+
interval = Interval.new(5) do
|
131
|
+
@redis.eval(@keep_alive_script, argv: [redis_queue_key, redis_pending_key, redis_status_key, @ttl, job, VISIBILITY_TIMEOUT])
|
132
|
+
end
|
133
|
+
|
134
|
+
yield
|
135
|
+
ensure
|
136
|
+
interval&.stop
|
137
|
+
end
|
138
|
+
|
139
|
+
def redis_queue_key
|
140
|
+
[@namespace, "kraps", "queue", @token].compact.join(":")
|
141
|
+
end
|
142
|
+
|
143
|
+
def redis_pending_key
|
144
|
+
[@namespace, "kraps", "pending", @token].compact.join(":")
|
145
|
+
end
|
146
|
+
|
147
|
+
def redis_status_key
|
148
|
+
[@namespace, "kraps", "status", @token].compact.join(":")
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|
data/lib/kraps/runner.rb
CHANGED
@@ -45,48 +45,35 @@ module Kraps
|
|
45
45
|
|
46
46
|
def perform_parallelize
|
47
47
|
enum = Enumerator.new do |yielder|
|
48
|
-
collector = proc { |item| yielder << item }
|
48
|
+
collector = proc { |item| yielder << { item: item } }
|
49
49
|
|
50
50
|
@step.block.call(collector)
|
51
51
|
end
|
52
52
|
|
53
|
-
|
54
|
-
push_and_wait(distributed_job, enum) do |item, part|
|
55
|
-
enqueue(token: distributed_job.token, part: part, item: item)
|
56
|
-
end
|
53
|
+
token = push_and_wait(enum: enum)
|
57
54
|
|
58
|
-
|
59
|
-
end
|
55
|
+
Frame.new(token: token, partitions: @step.partitions)
|
60
56
|
end
|
61
57
|
|
62
58
|
def perform_map
|
63
|
-
|
64
|
-
|
65
|
-
enqueue(token: distributed_job.token, part: part, partition: partition)
|
66
|
-
end
|
59
|
+
enum = (0...@frame.partitions).map { |partition| { partition: partition } }
|
60
|
+
token = push_and_wait(job_count: @step.jobs, enum: enum)
|
67
61
|
|
68
|
-
|
69
|
-
end
|
62
|
+
Frame.new(token: token, partitions: @step.partitions)
|
70
63
|
end
|
71
64
|
|
72
65
|
def perform_map_partitions
|
73
|
-
|
74
|
-
|
75
|
-
enqueue(token: distributed_job.token, part: part, partition: partition)
|
76
|
-
end
|
66
|
+
enum = (0...@frame.partitions).map { |partition| { partition: partition } }
|
67
|
+
token = push_and_wait(job_count: @step.jobs, enum: enum)
|
77
68
|
|
78
|
-
|
79
|
-
end
|
69
|
+
Frame.new(token: token, partitions: @step.partitions)
|
80
70
|
end
|
81
71
|
|
82
72
|
def perform_reduce
|
83
|
-
|
84
|
-
|
85
|
-
enqueue(token: distributed_job.token, part: part, partition: partition)
|
86
|
-
end
|
73
|
+
enum = (0...@frame.partitions).map { |partition| { partition: partition } }
|
74
|
+
token = push_and_wait(job_count: @step.jobs, enum: enum)
|
87
75
|
|
88
|
-
|
89
|
-
end
|
76
|
+
Frame.new(token: token, partitions: @step.partitions)
|
90
77
|
end
|
91
78
|
|
92
79
|
def perform_combine
|
@@ -95,82 +82,64 @@ module Kraps
|
|
95
82
|
|
96
83
|
raise(IncompatibleFrame, "Incompatible number of partitions") if combine_step.partitions != @step.partitions
|
97
84
|
|
98
|
-
|
99
|
-
|
100
|
-
enqueue(token: distributed_job.token, part: part, partition: partition, combine_frame: combine_step.frame.to_h)
|
101
|
-
end
|
102
|
-
|
103
|
-
Frame.new(token: distributed_job.token, partitions: @step.partitions)
|
85
|
+
enum = (0...@frame.partitions).map do |partition|
|
86
|
+
{ partition: partition, combine_frame: combine_step.frame.to_h }
|
104
87
|
end
|
88
|
+
|
89
|
+
token = push_and_wait(job_count: @step.jobs, enum: enum)
|
90
|
+
|
91
|
+
Frame.new(token: token, partitions: @step.partitions)
|
105
92
|
end
|
106
93
|
|
107
94
|
def perform_each_partition
|
108
|
-
|
109
|
-
|
110
|
-
enqueue(token: distributed_job.token, part: part, partition: partition)
|
111
|
-
end
|
95
|
+
enum = (0...@frame.partitions).map { |partition| { partition: partition } }
|
96
|
+
push_and_wait(job_count: @step.jobs, enum: enum)
|
112
97
|
|
113
|
-
|
114
|
-
end
|
98
|
+
@frame
|
115
99
|
end
|
116
100
|
|
117
|
-
def
|
118
|
-
Kraps.
|
119
|
-
|
120
|
-
JSON.generate(
|
121
|
-
job_index: @job_index,
|
122
|
-
step_index: @step_index,
|
123
|
-
frame: @frame.to_h,
|
124
|
-
token: token,
|
125
|
-
part: part,
|
126
|
-
klass: @klass,
|
127
|
-
args: @args,
|
128
|
-
kwargs: @kwargs,
|
129
|
-
**rest
|
130
|
-
)
|
131
|
-
)
|
132
|
-
end
|
101
|
+
def push_and_wait(enum:, job_count: nil)
|
102
|
+
redis_queue = RedisQueue.new(redis: Kraps.redis, token: SecureRandom.hex, namespace: Kraps.namespace, ttl: Kraps.job_ttl)
|
103
|
+
progress_bar = build_progress_bar("#{@klass}: job #{@job_index + 1}/#{@jobs.size}, step #{@step_index + 1}/#{@job.steps.size}, #{@step.jobs || "?"} jobs, token #{redis_queue.token}, %a, %c/%C (%p%) => #{@step.action}")
|
133
104
|
|
134
|
-
|
135
|
-
distributed_job = Kraps.distributed_job_client.build(token: SecureRandom.hex)
|
105
|
+
total = 0
|
136
106
|
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
end
|
107
|
+
interval = Interval.new(1) do
|
108
|
+
# The interval is used to continously update the progress bar even
|
109
|
+
# when push_all is used and to avoid sessions being terminated due
|
110
|
+
# to inactivity etc
|
142
111
|
|
143
|
-
|
144
|
-
|
112
|
+
progress_bar.total = total
|
113
|
+
progress_bar.progress = [progress_bar.total - redis_queue.size, 0].max
|
114
|
+
end
|
145
115
|
|
146
|
-
|
147
|
-
total
|
116
|
+
enum.each_with_index do |item, part|
|
117
|
+
total += 1
|
148
118
|
|
149
|
-
|
150
|
-
|
151
|
-
end
|
119
|
+
redis_queue.enqueue(item.merge(part: part))
|
120
|
+
end
|
152
121
|
|
153
|
-
|
154
|
-
|
155
|
-
interval.fire(timeout: 1)
|
122
|
+
(job_count || total).times do
|
123
|
+
break if redis_queue.stopped?
|
156
124
|
|
157
|
-
|
158
|
-
end
|
159
|
-
ensure
|
160
|
-
interval&.stop
|
125
|
+
Kraps.enqueuer.call(@step.worker, JSON.generate(job_index: @job_index, step_index: @step_index, frame: @frame.to_h, token: redis_queue.token, klass: @klass, args: @args, kwargs: @kwargs))
|
161
126
|
end
|
162
127
|
|
163
128
|
loop do
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
break if distributed_job.finished? || distributed_job.stopped?
|
129
|
+
break if redis_queue.size.zero?
|
130
|
+
break if redis_queue.stopped?
|
168
131
|
|
169
132
|
sleep(1)
|
170
133
|
end
|
171
134
|
|
172
|
-
raise(JobStopped, "The job was stopped") if
|
135
|
+
raise(JobStopped, "The job was stopped") if redis_queue.stopped?
|
136
|
+
|
137
|
+
interval.fire(timeout: 1)
|
138
|
+
|
139
|
+
redis_queue.token
|
173
140
|
ensure
|
141
|
+
redis_queue&.stop
|
142
|
+
interval&.stop
|
174
143
|
progress_bar&.stop
|
175
144
|
end
|
176
145
|
|
data/lib/kraps/step.rb
CHANGED
@@ -1,3 +1,3 @@
|
|
1
1
|
module Kraps
|
2
|
-
Step = Struct.new(:action, :partitioner, :partitions, :block, :worker, :before, :frame, :dependency, :options, keyword_init: true)
|
2
|
+
Step = Struct.new(:action, :partitioner, :partitions, :jobs, :block, :worker, :before, :frame, :dependency, :options, keyword_init: true)
|
3
3
|
end
|
data/lib/kraps/version.rb
CHANGED
data/lib/kraps/worker.rb
CHANGED
@@ -11,22 +11,22 @@ module Kraps
|
|
11
11
|
end
|
12
12
|
|
13
13
|
def call(retries: 3)
|
14
|
-
return if
|
14
|
+
return if redis_queue.stopped?
|
15
15
|
|
16
16
|
raise(InvalidAction, "Invalid action #{step.action}") unless Actions::ALL.include?(step.action)
|
17
17
|
|
18
|
-
|
19
|
-
|
18
|
+
dequeue do |payload|
|
19
|
+
with_retries(retries) do # TODO: allow to use queue based retries
|
20
|
+
step.before&.call
|
20
21
|
|
21
|
-
|
22
|
-
|
23
|
-
distributed_job.done(@args["part"])
|
22
|
+
send(:"perform_#{step.action}", payload)
|
23
|
+
end
|
24
24
|
end
|
25
25
|
end
|
26
26
|
|
27
27
|
private
|
28
28
|
|
29
|
-
def perform_parallelize
|
29
|
+
def perform_parallelize(payload)
|
30
30
|
implementation = Class.new do
|
31
31
|
def map(key)
|
32
32
|
yield(key, nil)
|
@@ -34,19 +34,19 @@ module Kraps
|
|
34
34
|
end
|
35
35
|
|
36
36
|
mapper = MapReduce::Mapper.new(implementation.new, partitioner: partitioner, memory_limit: @memory_limit)
|
37
|
-
mapper.map(
|
37
|
+
mapper.map(payload["item"])
|
38
38
|
|
39
39
|
mapper.shuffle(chunk_limit: @chunk_limit) do |partitions|
|
40
40
|
Parallelizer.each(partitions.to_a, @concurrency) do |partition, path|
|
41
41
|
File.open(path) do |stream|
|
42
|
-
Kraps.driver.store(Kraps.driver.with_prefix("#{@args["token"]}/#{partition}/chunk.#{
|
42
|
+
Kraps.driver.store(Kraps.driver.with_prefix("#{@args["token"]}/#{partition}/chunk.#{payload["part"]}.json"), stream)
|
43
43
|
end
|
44
44
|
end
|
45
45
|
end
|
46
46
|
end
|
47
47
|
|
48
|
-
def perform_map
|
49
|
-
temp_paths = download_all(token: @args["frame"]["token"], partition:
|
48
|
+
def perform_map(payload)
|
49
|
+
temp_paths = download_all(token: @args["frame"]["token"], partition: payload["partition"])
|
50
50
|
|
51
51
|
current_step = step
|
52
52
|
|
@@ -78,7 +78,7 @@ module Kraps
|
|
78
78
|
mapper.shuffle(chunk_limit: @chunk_limit) do |partitions|
|
79
79
|
Parallelizer.each(partitions.to_a, @concurrency) do |partition, path|
|
80
80
|
File.open(path) do |stream|
|
81
|
-
Kraps.driver.store(Kraps.driver.with_prefix("#{@args["token"]}/#{partition}/chunk.#{
|
81
|
+
Kraps.driver.store(Kraps.driver.with_prefix("#{@args["token"]}/#{partition}/chunk.#{payload["partition"]}.json"), stream)
|
82
82
|
end
|
83
83
|
end
|
84
84
|
end
|
@@ -86,11 +86,11 @@ module Kraps
|
|
86
86
|
temp_paths&.delete
|
87
87
|
end
|
88
88
|
|
89
|
-
def perform_map_partitions
|
90
|
-
temp_paths = download_all(token: @args["frame"]["token"], partition:
|
89
|
+
def perform_map_partitions(payload)
|
90
|
+
temp_paths = download_all(token: @args["frame"]["token"], partition: payload["partition"])
|
91
91
|
|
92
92
|
current_step = step
|
93
|
-
current_partition =
|
93
|
+
current_partition = payload["partition"]
|
94
94
|
|
95
95
|
implementation = Object.new
|
96
96
|
implementation.define_singleton_method(:map) do |enum, &block|
|
@@ -111,7 +111,7 @@ module Kraps
|
|
111
111
|
mapper.shuffle(chunk_limit: @chunk_limit) do |partitions|
|
112
112
|
Parallelizer.each(partitions.to_a, @concurrency) do |partition, path|
|
113
113
|
File.open(path) do |stream|
|
114
|
-
Kraps.driver.store(Kraps.driver.with_prefix("#{@args["token"]}/#{partition}/chunk.#{
|
114
|
+
Kraps.driver.store(Kraps.driver.with_prefix("#{@args["token"]}/#{partition}/chunk.#{payload["partition"]}.json"), stream)
|
115
115
|
end
|
116
116
|
end
|
117
117
|
end
|
@@ -119,7 +119,7 @@ module Kraps
|
|
119
119
|
temp_paths&.delete
|
120
120
|
end
|
121
121
|
|
122
|
-
def perform_reduce
|
122
|
+
def perform_reduce(payload)
|
123
123
|
current_step = step
|
124
124
|
|
125
125
|
implementation = Object.new
|
@@ -129,7 +129,7 @@ module Kraps
|
|
129
129
|
|
130
130
|
reducer = MapReduce::Reducer.new(implementation)
|
131
131
|
|
132
|
-
Parallelizer.each(Kraps.driver.list(prefix: Kraps.driver.with_prefix("#{@args["frame"]["token"]}/#{
|
132
|
+
Parallelizer.each(Kraps.driver.list(prefix: Kraps.driver.with_prefix("#{@args["frame"]["token"]}/#{payload["partition"]}/")), @concurrency) do |file|
|
133
133
|
Kraps.driver.download(file, reducer.add_chunk)
|
134
134
|
end
|
135
135
|
|
@@ -139,14 +139,14 @@ module Kraps
|
|
139
139
|
tempfile.puts(JSON.generate([key, value]))
|
140
140
|
end
|
141
141
|
|
142
|
-
Kraps.driver.store(Kraps.driver.with_prefix("#{@args["token"]}/#{
|
142
|
+
Kraps.driver.store(Kraps.driver.with_prefix("#{@args["token"]}/#{payload["partition"]}/chunk.#{payload["partition"]}.json"), tempfile.tap(&:rewind))
|
143
143
|
ensure
|
144
144
|
tempfile&.close(true)
|
145
145
|
end
|
146
146
|
|
147
|
-
def perform_combine
|
148
|
-
temp_paths1 = download_all(token: @args["frame"]["token"], partition:
|
149
|
-
temp_paths2 = download_all(token:
|
147
|
+
def perform_combine(payload)
|
148
|
+
temp_paths1 = download_all(token: @args["frame"]["token"], partition: payload["partition"])
|
149
|
+
temp_paths2 = download_all(token: payload["combine_frame"]["token"], partition: payload["partition"])
|
150
150
|
|
151
151
|
enum1 = k_way_merge(temp_paths1.each.to_a, chunk_limit: @chunk_limit)
|
152
152
|
enum2 = k_way_merge(temp_paths2.each.to_a, chunk_limit: @chunk_limit)
|
@@ -157,7 +157,7 @@ module Kraps
|
|
157
157
|
implementation = Object.new
|
158
158
|
implementation.define_singleton_method(:map) do |&block|
|
159
159
|
combine_method.call(enum1, enum2) do |key, value1, value2|
|
160
|
-
|
160
|
+
current_step.block.call(key, value1, value2, block)
|
161
161
|
end
|
162
162
|
end
|
163
163
|
|
@@ -167,7 +167,7 @@ module Kraps
|
|
167
167
|
mapper.shuffle(chunk_limit: @chunk_limit) do |partitions|
|
168
168
|
Parallelizer.each(partitions.to_a, @concurrency) do |partition, path|
|
169
169
|
File.open(path) do |stream|
|
170
|
-
Kraps.driver.store(Kraps.driver.with_prefix("#{@args["token"]}/#{partition}/chunk.#{
|
170
|
+
Kraps.driver.store(Kraps.driver.with_prefix("#{@args["token"]}/#{partition}/chunk.#{payload["partition"]}.json"), stream)
|
171
171
|
end
|
172
172
|
end
|
173
173
|
end
|
@@ -213,10 +213,10 @@ module Kraps
|
|
213
213
|
end
|
214
214
|
end
|
215
215
|
|
216
|
-
def perform_each_partition
|
216
|
+
def perform_each_partition(payload)
|
217
217
|
temp_paths = TempPaths.new
|
218
218
|
|
219
|
-
files = Kraps.driver.list(prefix: Kraps.driver.with_prefix("#{@args["frame"]["token"]}/#{
|
219
|
+
files = Kraps.driver.list(prefix: Kraps.driver.with_prefix("#{@args["frame"]["token"]}/#{payload["partition"]}/")).sort
|
220
220
|
|
221
221
|
temp_paths_index = files.each_with_object({}) do |file, hash|
|
222
222
|
hash[file] = temp_paths.add
|
@@ -226,7 +226,7 @@ module Kraps
|
|
226
226
|
Kraps.driver.download(file, temp_paths_index[file].path)
|
227
227
|
end
|
228
228
|
|
229
|
-
step.block.call(
|
229
|
+
step.block.call(payload["partition"], k_way_merge(temp_paths.each.to_a, chunk_limit: @chunk_limit))
|
230
230
|
ensure
|
231
231
|
temp_paths&.delete
|
232
232
|
end
|
@@ -237,11 +237,11 @@ module Kraps
|
|
237
237
|
begin
|
238
238
|
yield
|
239
239
|
rescue Kraps::Error
|
240
|
-
|
240
|
+
redis_queue.stop
|
241
241
|
raise
|
242
242
|
rescue StandardError => e
|
243
243
|
if retries >= num_retries
|
244
|
-
|
244
|
+
redis_queue.stop
|
245
245
|
raise
|
246
246
|
end
|
247
247
|
|
@@ -254,20 +254,23 @@ module Kraps
|
|
254
254
|
end
|
255
255
|
end
|
256
256
|
|
257
|
-
def
|
258
|
-
|
259
|
-
|
260
|
-
|
257
|
+
def dequeue
|
258
|
+
loop do
|
259
|
+
break if redis_queue.stopped?
|
260
|
+
break if redis_queue.size.zero?
|
261
261
|
|
262
|
-
|
263
|
-
|
262
|
+
redis_queue.dequeue do |payload|
|
263
|
+
payload ? yield(payload) : sleep(1)
|
264
|
+
end
|
264
265
|
end
|
266
|
+
end
|
265
267
|
|
266
|
-
|
267
|
-
|
268
|
-
|
268
|
+
def redis_queue
|
269
|
+
@redis_queue ||= RedisQueue.new(redis: Kraps.redis, token: @args["token"], namespace: Kraps.namespace, ttl: Kraps.job_ttl)
|
270
|
+
end
|
269
271
|
|
270
|
-
|
272
|
+
def download_all(token:, partition:)
|
273
|
+
Downloader.download_all(prefix: Kraps.driver.with_prefix("#{token}/#{partition}/"), concurrency: @concurrency)
|
271
274
|
end
|
272
275
|
|
273
276
|
def jobs
|
@@ -301,9 +304,5 @@ module Kraps
|
|
301
304
|
def partitioner
|
302
305
|
@partitioner ||= proc { |key| step.partitioner.call(key, step.partitions) }
|
303
306
|
end
|
304
|
-
|
305
|
-
def distributed_job
|
306
|
-
@distributed_job ||= Kraps.distributed_job_client.build(token: @args["token"])
|
307
|
-
end
|
308
307
|
end
|
309
308
|
end
|
data/lib/kraps.rb
CHANGED
@@ -1,4 +1,3 @@
|
|
1
|
-
require "distributed_job"
|
2
1
|
require "ruby-progressbar"
|
3
2
|
require "ruby-progressbar/outputs/null"
|
4
3
|
require "map_reduce"
|
@@ -9,6 +8,7 @@ require_relative "kraps/drivers"
|
|
9
8
|
require_relative "kraps/actions"
|
10
9
|
require_relative "kraps/parallelizer"
|
11
10
|
require_relative "kraps/hash_partitioner"
|
11
|
+
require_relative "kraps/redis_queue"
|
12
12
|
require_relative "kraps/temp_path"
|
13
13
|
require_relative "kraps/temp_paths"
|
14
14
|
require_relative "kraps/timeout_queue"
|
@@ -19,6 +19,7 @@ require_relative "kraps/runner"
|
|
19
19
|
require_relative "kraps/step"
|
20
20
|
require_relative "kraps/frame"
|
21
21
|
require_relative "kraps/worker"
|
22
|
+
require_relative "kraps/downloader"
|
22
23
|
|
23
24
|
module Kraps
|
24
25
|
class Error < StandardError; end
|
@@ -27,9 +28,11 @@ module Kraps
|
|
27
28
|
class JobStopped < Error; end
|
28
29
|
class IncompatibleFrame < Error; end
|
29
30
|
|
30
|
-
def self.configure(driver:, redis: Redis.new, namespace: nil, job_ttl: 24 * 60 * 60, show_progress: true, enqueuer: ->(worker, json) { worker.perform_async(json) })
|
31
|
+
def self.configure(driver:, redis: Redis.new, namespace: nil, job_ttl: 4 * 24 * 60 * 60, show_progress: true, enqueuer: ->(worker, json) { worker.perform_async(json) })
|
31
32
|
@driver = driver
|
32
|
-
@
|
33
|
+
@redis = redis
|
34
|
+
@namespace = namespace
|
35
|
+
@job_ttl = job_ttl.to_i
|
33
36
|
@show_progress = show_progress
|
34
37
|
@enqueuer = enqueuer
|
35
38
|
end
|
@@ -38,8 +41,16 @@ module Kraps
|
|
38
41
|
@driver
|
39
42
|
end
|
40
43
|
|
41
|
-
def self.
|
42
|
-
@
|
44
|
+
def self.redis
|
45
|
+
@redis
|
46
|
+
end
|
47
|
+
|
48
|
+
def self.namespace
|
49
|
+
@namespace
|
50
|
+
end
|
51
|
+
|
52
|
+
def self.job_ttl
|
53
|
+
@job_ttl
|
43
54
|
end
|
44
55
|
|
45
56
|
def self.show_progress?
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: kraps
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.8.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Benjamin Vetter
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-02-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: attachie
|
@@ -24,20 +24,6 @@ dependencies:
|
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '0'
|
27
|
-
- !ruby/object:Gem::Dependency
|
28
|
-
name: distributed_job
|
29
|
-
requirement: !ruby/object:Gem::Requirement
|
30
|
-
requirements:
|
31
|
-
- - ">="
|
32
|
-
- !ruby/object:Gem::Version
|
33
|
-
version: '0'
|
34
|
-
type: :runtime
|
35
|
-
prerelease: false
|
36
|
-
version_requirements: !ruby/object:Gem::Requirement
|
37
|
-
requirements:
|
38
|
-
- - ">="
|
39
|
-
- !ruby/object:Gem::Version
|
40
|
-
version: '0'
|
41
27
|
- !ruby/object:Gem::Dependency
|
42
28
|
name: map-reduce-ruby
|
43
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -142,6 +128,7 @@ files:
|
|
142
128
|
- docker-compose.yml
|
143
129
|
- lib/kraps.rb
|
144
130
|
- lib/kraps/actions.rb
|
131
|
+
- lib/kraps/downloader.rb
|
145
132
|
- lib/kraps/drivers.rb
|
146
133
|
- lib/kraps/frame.rb
|
147
134
|
- lib/kraps/hash_partitioner.rb
|
@@ -149,6 +136,7 @@ files:
|
|
149
136
|
- lib/kraps/job.rb
|
150
137
|
- lib/kraps/job_resolver.rb
|
151
138
|
- lib/kraps/parallelizer.rb
|
139
|
+
- lib/kraps/redis_queue.rb
|
152
140
|
- lib/kraps/runner.rb
|
153
141
|
- lib/kraps/step.rb
|
154
142
|
- lib/kraps/temp_path.rb
|