kraps 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/.rubocop.yml +79 -0
- data/CODE_OF_CONDUCT.md +84 -0
- data/Gemfile +8 -0
- data/Gemfile.lock +113 -0
- data/LICENSE.txt +21 -0
- data/README.md +333 -0
- data/Rakefile +6 -0
- data/docker-compose.yml +6 -0
- data/lib/kraps/actions.rb +10 -0
- data/lib/kraps/drivers.rb +37 -0
- data/lib/kraps/frame.rb +3 -0
- data/lib/kraps/interval.rb +34 -0
- data/lib/kraps/job.rb +62 -0
- data/lib/kraps/parallelizer.rb +29 -0
- data/lib/kraps/runner.rb +160 -0
- data/lib/kraps/step.rb +3 -0
- data/lib/kraps/temp_path.rb +29 -0
- data/lib/kraps/temp_paths.rb +34 -0
- data/lib/kraps/timeout_queue.rb +27 -0
- data/lib/kraps/version.rb +3 -0
- data/lib/kraps/worker.rb +183 -0
- data/lib/kraps.rb +48 -0
- metadata +182 -0
@@ -0,0 +1,37 @@
|
|
1
|
+
require "attachie"
|
2
|
+
require "attachie/s3_driver"
|
3
|
+
require "attachie/fake_driver"
|
4
|
+
|
5
|
+
module Kraps
|
6
|
+
module Drivers
|
7
|
+
module Driver
|
8
|
+
def with_prefix(path)
|
9
|
+
File.join(*[@prefix, path].compact)
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
class S3Driver
|
14
|
+
include Driver
|
15
|
+
|
16
|
+
attr_reader :driver, :bucket, :prefix
|
17
|
+
|
18
|
+
def initialize(s3_client:, bucket:, prefix: nil)
|
19
|
+
@driver = Attachie::S3Driver.new(s3_client)
|
20
|
+
@bucket = bucket
|
21
|
+
@prefix = prefix
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
class FakeDriver
|
26
|
+
include Driver
|
27
|
+
|
28
|
+
attr_reader :driver, :bucket, :prefix
|
29
|
+
|
30
|
+
def initialize(bucket:, prefix: nil)
|
31
|
+
@driver = Attachie::FakeDriver.new
|
32
|
+
@bucket = bucket
|
33
|
+
@prefix = prefix
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
data/lib/kraps/frame.rb
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
module Kraps
|
2
|
+
class Interval
|
3
|
+
include MonitorMixin
|
4
|
+
|
5
|
+
def initialize(timeout, &block)
|
6
|
+
super()
|
7
|
+
|
8
|
+
@thread_queue = TimeoutQueue.new
|
9
|
+
@main_queue = TimeoutQueue.new
|
10
|
+
@stopped = false
|
11
|
+
|
12
|
+
@thread = Thread.new do
|
13
|
+
until @stopped
|
14
|
+
item = @thread_queue.deq(timeout: timeout)
|
15
|
+
|
16
|
+
block.call unless @stopped
|
17
|
+
|
18
|
+
@main_queue.enq(1) if item
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def fire(timeout:)
|
24
|
+
@thread_queue.enq(1)
|
25
|
+
@main_queue.deq(timeout: timeout)
|
26
|
+
end
|
27
|
+
|
28
|
+
def stop
|
29
|
+
@stopped = true
|
30
|
+
@thread_queue.enq(nil)
|
31
|
+
@thread.join
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
data/lib/kraps/job.rb
ADDED
@@ -0,0 +1,62 @@
|
|
1
|
+
module Kraps
|
2
|
+
class Job
|
3
|
+
attr_reader :steps
|
4
|
+
|
5
|
+
def initialize(worker:)
|
6
|
+
@worker = worker
|
7
|
+
@steps = []
|
8
|
+
@partitions = 0
|
9
|
+
@partitioner = MapReduce::HashPartitioner.new(@partitions)
|
10
|
+
end
|
11
|
+
|
12
|
+
def parallelize(partitions:, partitioner: MapReduce::HashPartitioner.new(partitions), worker: @worker, &block)
|
13
|
+
fresh.tap do |job|
|
14
|
+
job.instance_eval do
|
15
|
+
@partitions = partitions
|
16
|
+
@partitioner = partitioner
|
17
|
+
|
18
|
+
@steps << Step.new(action: Actions::PARALLELIZE, args: { partitions: @partitions, partitioner: @partitioner, worker: worker }, block: block)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def map(partitions: nil, partitioner: nil, worker: @worker, &block)
|
24
|
+
fresh.tap do |job|
|
25
|
+
job.instance_eval do
|
26
|
+
@partitions = partitions if partitions
|
27
|
+
@partitioner = partitioner || MapReduce::HashPartitioner.new(partitions) if partitioner || partitions
|
28
|
+
|
29
|
+
@steps << Step.new(action: Actions::MAP, args: { partitions: @partitions, partitioner: @partitioner, worker: worker }, block: block)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def reduce(worker: @worker, &block)
|
35
|
+
fresh.tap do |job|
|
36
|
+
job.instance_eval do
|
37
|
+
@steps << Step.new(action: Actions::REDUCE, args: { partitions: @partitions, partitioner: @partitioner, worker: worker }, block: block)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def each_partition(worker: @worker, &block)
|
43
|
+
fresh.tap do |job|
|
44
|
+
job.instance_eval do
|
45
|
+
@steps << Step.new(action: Actions::EACH_PARTITION, args: { partitions: @partitions, partitioner: @partitioner, worker: worker }, block: block)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def repartition(partitions:, partitioner: nil, worker: @worker)
|
51
|
+
map(partitions: partitions, partitioner: partitioner, worker: worker) do |key, value, collector|
|
52
|
+
collector.call(key, value)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def fresh
|
57
|
+
dup.tap do |job|
|
58
|
+
job.instance_variable_set(:@steps, @steps.dup)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module Kraps
|
2
|
+
class Parallelizer
|
3
|
+
def self.each(enum, num_threads)
|
4
|
+
queue = Queue.new
|
5
|
+
|
6
|
+
enum.each { |element| queue.push element }
|
7
|
+
|
8
|
+
stopped = false
|
9
|
+
|
10
|
+
threads = Array.new(num_threads) do
|
11
|
+
Thread.new do
|
12
|
+
yield queue.pop(true) until stopped || queue.empty?
|
13
|
+
rescue ThreadError
|
14
|
+
# Queue empty
|
15
|
+
rescue StandardError => e
|
16
|
+
stopped = true
|
17
|
+
|
18
|
+
e
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
threads.each(&:join).each do |thread|
|
23
|
+
raise thread.value if thread.value.is_a?(Exception)
|
24
|
+
end
|
25
|
+
|
26
|
+
enum
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
data/lib/kraps/runner.rb
ADDED
@@ -0,0 +1,160 @@
|
|
1
|
+
module Kraps
|
2
|
+
class Runner
|
3
|
+
def initialize(klass)
|
4
|
+
@klass = klass
|
5
|
+
end
|
6
|
+
|
7
|
+
def call(*args, **kwargs)
|
8
|
+
Array(@klass.new.call(*args, **kwargs)).tap do |jobs|
|
9
|
+
jobs.each_with_index do |job, job_index|
|
10
|
+
job.steps.each_with_index.inject(nil) do |frame, (_, step_index)|
|
11
|
+
StepRunner.new(
|
12
|
+
klass: @klass,
|
13
|
+
args: args,
|
14
|
+
kwargs: kwargs,
|
15
|
+
jobs: jobs,
|
16
|
+
job_index: job_index,
|
17
|
+
step_index: step_index,
|
18
|
+
frame: frame
|
19
|
+
).call
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
class StepRunner
|
26
|
+
def initialize(klass:, args:, kwargs:, jobs:, job_index:, step_index:, frame:)
|
27
|
+
@klass = klass
|
28
|
+
@args = args
|
29
|
+
@kwargs = kwargs
|
30
|
+
@jobs = jobs
|
31
|
+
@job_index = job_index
|
32
|
+
@job = @jobs[@job_index]
|
33
|
+
@step_index = step_index
|
34
|
+
@step = @job.steps[@step_index]
|
35
|
+
@frame = frame
|
36
|
+
end
|
37
|
+
|
38
|
+
def call
|
39
|
+
raise(InvalidAction, "Invalid action #{@step.action}") unless Actions::ALL.include?(@step.action)
|
40
|
+
|
41
|
+
@step.frame ||= send(:"perform_#{@step.action}")
|
42
|
+
end
|
43
|
+
|
44
|
+
private
|
45
|
+
|
46
|
+
def perform_parallelize
|
47
|
+
enum = Enumerator.new do |yielder|
|
48
|
+
collector = proc { |item| yielder << item }
|
49
|
+
|
50
|
+
@step.block.call(collector)
|
51
|
+
end
|
52
|
+
|
53
|
+
with_distributed_job do |distributed_job|
|
54
|
+
push_and_wait(distributed_job, enum) do |item, part|
|
55
|
+
enqueue(token: distributed_job.token, part: part, item: item)
|
56
|
+
end
|
57
|
+
|
58
|
+
Frame.new(token: distributed_job.token, partitions: @step.args[:partitions])
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def perform_map
|
63
|
+
with_distributed_job do |distributed_job|
|
64
|
+
push_and_wait(distributed_job, 0...@frame.partitions) do |partition, part|
|
65
|
+
enqueue(token: distributed_job.token, part: part, partition: partition)
|
66
|
+
end
|
67
|
+
|
68
|
+
Frame.new(token: distributed_job.token, partitions: @step.args[:partitions])
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
def perform_reduce
|
73
|
+
with_distributed_job do |distributed_job|
|
74
|
+
push_and_wait(distributed_job, 0...@frame.partitions) do |partition, part|
|
75
|
+
enqueue(token: distributed_job.token, part: part, partition: partition)
|
76
|
+
end
|
77
|
+
|
78
|
+
Frame.new(token: distributed_job.token, partitions: @step.args[:partitions])
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
def perform_each_partition
|
83
|
+
with_distributed_job do |distributed_job|
|
84
|
+
push_and_wait(distributed_job, 0...@frame.partitions) do |partition, part|
|
85
|
+
enqueue(token: distributed_job.token, part: part, partition: partition)
|
86
|
+
end
|
87
|
+
|
88
|
+
@frame
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
def enqueue(token:, part:, **rest)
|
93
|
+
Kraps.enqueuer.call(
|
94
|
+
@step.args[:worker],
|
95
|
+
JSON.generate(
|
96
|
+
job_index: @job_index,
|
97
|
+
step_index: @step_index,
|
98
|
+
frame: @frame.to_h,
|
99
|
+
token: token,
|
100
|
+
part: part,
|
101
|
+
klass: @klass,
|
102
|
+
args: @args,
|
103
|
+
kwargs: @kwargs,
|
104
|
+
**rest
|
105
|
+
)
|
106
|
+
)
|
107
|
+
end
|
108
|
+
|
109
|
+
def with_distributed_job
|
110
|
+
distributed_job = Kraps.distributed_job_client.build(token: SecureRandom.hex)
|
111
|
+
|
112
|
+
yield(distributed_job)
|
113
|
+
rescue Interrupt
|
114
|
+
distributed_job&.stop
|
115
|
+
raise
|
116
|
+
end
|
117
|
+
|
118
|
+
def push_and_wait(distributed_job, enum)
|
119
|
+
progress_bar = build_progress_bar("#{@klass}: job #{@job_index + 1}/#{@jobs.size}, step #{@step_index + 1}/#{@job.steps.size}, token #{distributed_job.token}, %a, %c/%C (%p%) => #{@step.action}")
|
120
|
+
|
121
|
+
begin
|
122
|
+
total = 0
|
123
|
+
|
124
|
+
interval = Interval.new(1) do
|
125
|
+
progress_bar.total = total
|
126
|
+
end
|
127
|
+
|
128
|
+
distributed_job.push_each(enum) do |item, part|
|
129
|
+
total += 1
|
130
|
+
interval.fire(timeout: 1)
|
131
|
+
|
132
|
+
yield(item, part)
|
133
|
+
end
|
134
|
+
ensure
|
135
|
+
interval&.stop
|
136
|
+
end
|
137
|
+
|
138
|
+
loop do
|
139
|
+
progress_bar.total = distributed_job.total
|
140
|
+
progress_bar.progress = progress_bar.total - distributed_job.count
|
141
|
+
|
142
|
+
break if distributed_job.finished? || distributed_job.stopped?
|
143
|
+
|
144
|
+
sleep(1)
|
145
|
+
end
|
146
|
+
|
147
|
+
raise(JobStopped, "The job was stopped") if distributed_job.stopped?
|
148
|
+
ensure
|
149
|
+
progress_bar&.stop
|
150
|
+
end
|
151
|
+
|
152
|
+
def build_progress_bar(format)
|
153
|
+
options = { format: format, total: 1, autofinish: false }
|
154
|
+
options[:output] = ProgressBar::Outputs::Null unless Kraps.show_progress?
|
155
|
+
|
156
|
+
ProgressBar.create(options)
|
157
|
+
end
|
158
|
+
end
|
159
|
+
end
|
160
|
+
end
|
data/lib/kraps/step.rb
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
module Kraps
|
2
|
+
class TempPath
|
3
|
+
attr_reader :path
|
4
|
+
|
5
|
+
def initialize(prefix: nil, suffix: nil)
|
6
|
+
@path = File.join(Dir.tmpdir, [prefix, SecureRandom.hex[0, 16], Process.pid, suffix].compact.join("."))
|
7
|
+
|
8
|
+
File.open(@path, File::CREAT | File::EXCL) {}
|
9
|
+
|
10
|
+
ObjectSpace.define_finalizer(self, self.class.finalize(@path))
|
11
|
+
|
12
|
+
return unless block_given?
|
13
|
+
|
14
|
+
begin
|
15
|
+
yield
|
16
|
+
ensure
|
17
|
+
unlink
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def unlink
|
22
|
+
FileUtils.rm_f(@path)
|
23
|
+
end
|
24
|
+
|
25
|
+
def self.finalize(path)
|
26
|
+
proc { FileUtils.rm_f(path) }
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
module Kraps
|
2
|
+
class TempPaths
|
3
|
+
include MonitorMixin
|
4
|
+
include Enumerable
|
5
|
+
|
6
|
+
def initialize
|
7
|
+
super
|
8
|
+
|
9
|
+
@temp_paths = []
|
10
|
+
end
|
11
|
+
|
12
|
+
def add
|
13
|
+
synchronize do
|
14
|
+
temp_path = TempPath.new
|
15
|
+
@temp_paths << temp_path
|
16
|
+
temp_path
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def unlink
|
21
|
+
synchronize do
|
22
|
+
@temp_paths.each(&:unlink)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def each(&block)
|
27
|
+
return enum_for(__method__) unless block_given?
|
28
|
+
|
29
|
+
synchronize do
|
30
|
+
@temp_paths.each(&block)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module Kraps
|
2
|
+
class TimeoutQueue
|
3
|
+
include MonitorMixin
|
4
|
+
|
5
|
+
def initialize
|
6
|
+
super
|
7
|
+
|
8
|
+
@cond = new_cond
|
9
|
+
@queue = []
|
10
|
+
end
|
11
|
+
|
12
|
+
def enq(item)
|
13
|
+
synchronize do
|
14
|
+
@queue << item
|
15
|
+
@cond.signal
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def deq(timeout:)
|
20
|
+
synchronize do
|
21
|
+
@cond.wait(timeout) if @queue.empty?
|
22
|
+
|
23
|
+
return @queue.empty? ? nil : @queue.shift
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
data/lib/kraps/worker.rb
ADDED
@@ -0,0 +1,183 @@
|
|
1
|
+
module Kraps
|
2
|
+
class Worker
|
3
|
+
def initialize(json, memory_limit:, chunk_limit:, concurrency:)
|
4
|
+
@args = JSON.parse(json)
|
5
|
+
@memory_limit = memory_limit
|
6
|
+
@chunk_limit = chunk_limit
|
7
|
+
@concurrency = concurrency
|
8
|
+
end
|
9
|
+
|
10
|
+
def call(retries: 3)
|
11
|
+
return if distributed_job.stopped?
|
12
|
+
|
13
|
+
raise(InvalidAction, "Invalid action #{step.action}") unless Actions::ALL.include?(step.action)
|
14
|
+
|
15
|
+
with_retries(retries) do # TODO: allow to use queue based retries
|
16
|
+
send(:"perform_#{step.action}")
|
17
|
+
|
18
|
+
distributed_job.done(@args["part"])
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
private
|
23
|
+
|
24
|
+
def perform_parallelize
|
25
|
+
implementation = Class.new do
|
26
|
+
def map(key)
|
27
|
+
yield(key, nil)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
mapper = MapReduce::Mapper.new(implementation.new, partitioner: partitioner, memory_limit: @memory_limit)
|
32
|
+
mapper.map(@args["item"])
|
33
|
+
|
34
|
+
mapper.shuffle do |partition, tempfile| # TODO: upload in parallel
|
35
|
+
Kraps.driver.driver.store(Kraps.driver.with_prefix("#{@args["token"]}/#{partition}/chunk.#{@args["part"]}.json"), tempfile, Kraps.driver.bucket)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def perform_map
|
40
|
+
temp_paths = TempPaths.new
|
41
|
+
|
42
|
+
files = Kraps.driver.driver.list(Kraps.driver.bucket, prefix: Kraps.driver.with_prefix("#{@args["frame"]["token"]}/#{@args["partition"]}/")).sort
|
43
|
+
|
44
|
+
temp_paths_index = files.each_with_object({}) do |file, hash|
|
45
|
+
hash[file] = temp_paths.add
|
46
|
+
end
|
47
|
+
|
48
|
+
Parallelizer.each(files, @concurrency) do |file|
|
49
|
+
Kraps.driver.driver.download(file, Kraps.driver.bucket, temp_paths_index[file].path)
|
50
|
+
end
|
51
|
+
|
52
|
+
current_step = step
|
53
|
+
|
54
|
+
implementation = Object.new
|
55
|
+
implementation.define_singleton_method(:map) do |key, value, &block|
|
56
|
+
current_step.block.call(key, value, block)
|
57
|
+
end
|
58
|
+
|
59
|
+
mapper = MapReduce::Mapper.new(implementation, partitioner: partitioner, memory_limit: @memory_limit)
|
60
|
+
|
61
|
+
temp_paths.each do |temp_path|
|
62
|
+
File.open(temp_path.path) do |stream|
|
63
|
+
stream.each_line do |line|
|
64
|
+
key, value = JSON.parse(line)
|
65
|
+
|
66
|
+
mapper.map(key, value)
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
mapper.shuffle do |partition, tempfile| # TODO: upload in parallel
|
72
|
+
Kraps.driver.driver.store(
|
73
|
+
Kraps.driver.with_prefix("#{@args["token"]}/#{partition}/chunk.#{@args["part"]}.json"), tempfile, Kraps.driver.bucket
|
74
|
+
)
|
75
|
+
end
|
76
|
+
ensure
|
77
|
+
temp_paths&.unlink
|
78
|
+
end
|
79
|
+
|
80
|
+
def perform_reduce
|
81
|
+
current_step = step
|
82
|
+
|
83
|
+
implementation = Object.new
|
84
|
+
implementation.define_singleton_method(:reduce) do |key, value1, value2|
|
85
|
+
current_step.block.call(key, value1, value2)
|
86
|
+
end
|
87
|
+
|
88
|
+
reducer = MapReduce::Reducer.new(implementation)
|
89
|
+
|
90
|
+
Parallelizer.each(Kraps.driver.driver.list(Kraps.driver.bucket, prefix: Kraps.driver.with_prefix("#{@args["frame"]["token"]}/#{@args["partition"]}/")), @concurrency) do |file|
|
91
|
+
Kraps.driver.driver.download(file, Kraps.driver.bucket, reducer.add_chunk)
|
92
|
+
end
|
93
|
+
|
94
|
+
tempfile = Tempfile.new
|
95
|
+
|
96
|
+
reducer.reduce(chunk_limit: @chunk_limit) do |key, value|
|
97
|
+
tempfile.puts(JSON.generate([key, value]))
|
98
|
+
end
|
99
|
+
|
100
|
+
Kraps.driver.driver.store(Kraps.driver.with_prefix("#{@args["token"]}/#{@args["partition"]}/chunk.#{@args["part"]}.json"), tempfile.tap(&:rewind), Kraps.driver.bucket)
|
101
|
+
ensure
|
102
|
+
tempfile&.close(true)
|
103
|
+
end
|
104
|
+
|
105
|
+
def perform_each_partition
|
106
|
+
temp_paths = TempPaths.new
|
107
|
+
|
108
|
+
files = Kraps.driver.driver.list(Kraps.driver.bucket, prefix: Kraps.driver.with_prefix("#{@args["frame"]["token"]}/#{@args["partition"]}/")).sort
|
109
|
+
|
110
|
+
temp_paths_index = files.each_with_object({}) do |file, hash|
|
111
|
+
hash[file] = temp_paths.add
|
112
|
+
end
|
113
|
+
|
114
|
+
Parallelizer.each(files, @concurrency) do |file|
|
115
|
+
Kraps.driver.driver.download(file, Kraps.driver.bucket, temp_paths_index[file].path)
|
116
|
+
end
|
117
|
+
|
118
|
+
enum = Enumerator::Lazy.new(temp_paths) do |yielder, temp_path|
|
119
|
+
File.open(temp_path.path) do |stream|
|
120
|
+
stream.each_line do |line|
|
121
|
+
yielder << JSON.parse(line)
|
122
|
+
end
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
step.block.call(@args["partition"], enum)
|
127
|
+
ensure
|
128
|
+
temp_paths&.unlink
|
129
|
+
end
|
130
|
+
|
131
|
+
def with_retries(num_retries)
|
132
|
+
retries = 0
|
133
|
+
|
134
|
+
begin
|
135
|
+
yield
|
136
|
+
rescue Kraps::Error
|
137
|
+
distributed_job.stop
|
138
|
+
rescue StandardError
|
139
|
+
sleep(5)
|
140
|
+
retries += 1
|
141
|
+
|
142
|
+
if retries >= num_retries
|
143
|
+
distributed_job.stop
|
144
|
+
raise
|
145
|
+
end
|
146
|
+
|
147
|
+
retry
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
def jobs
|
152
|
+
@jobs ||= Array(@args["klass"].constantize.new.call(*@args["args"], **@args["kwargs"].transform_keys(&:to_sym)))
|
153
|
+
end
|
154
|
+
|
155
|
+
def job
|
156
|
+
@job ||= begin
|
157
|
+
job_index = @args["job_index"]
|
158
|
+
|
159
|
+
jobs[job_index] || raise(InvalidJob, "Can't find job #{job_index}")
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
def steps
|
164
|
+
@steps ||= job.steps
|
165
|
+
end
|
166
|
+
|
167
|
+
def step
|
168
|
+
@step ||= begin
|
169
|
+
step_index = @args["step_index"]
|
170
|
+
|
171
|
+
steps[step_index] || raise(InvalidStep, "Can't find step #{step_index}")
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
175
|
+
def partitioner
|
176
|
+
@partitioner ||= step.args[:partitioner]
|
177
|
+
end
|
178
|
+
|
179
|
+
def distributed_job
|
180
|
+
@distributed_job ||= Kraps.distributed_job_client.build(token: @args["token"])
|
181
|
+
end
|
182
|
+
end
|
183
|
+
end
|
data/lib/kraps.rb
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
require_relative "kraps/version"
|
2
|
+
require_relative "kraps/drivers"
|
3
|
+
require_relative "kraps/actions"
|
4
|
+
require_relative "kraps/parallelizer"
|
5
|
+
require_relative "kraps/temp_path"
|
6
|
+
require_relative "kraps/temp_paths"
|
7
|
+
require_relative "kraps/timeout_queue"
|
8
|
+
require_relative "kraps/interval"
|
9
|
+
require_relative "kraps/job"
|
10
|
+
require_relative "kraps/runner"
|
11
|
+
require_relative "kraps/step"
|
12
|
+
require_relative "kraps/frame"
|
13
|
+
require_relative "kraps/worker"
|
14
|
+
require "distributed_job"
|
15
|
+
require "ruby-progressbar"
|
16
|
+
require "ruby-progressbar/outputs/null"
|
17
|
+
require "map_reduce"
|
18
|
+
require "redis"
|
19
|
+
|
20
|
+
module Kraps
|
21
|
+
class Error < StandardError; end
|
22
|
+
class InvalidAction < Error; end
|
23
|
+
class InvalidStep < Error; end
|
24
|
+
class JobStopped < Error; end
|
25
|
+
|
26
|
+
def self.configure(driver:, redis: Redis.new, namespace: nil, job_ttl: 24 * 60 * 60, show_progress: true, enqueuer: ->(worker, json) { worker.perform_async(json) })
|
27
|
+
@driver = driver
|
28
|
+
@distributed_job_client = DistributedJob::Client.new(redis: redis, namespace: namespace, default_ttl: job_ttl)
|
29
|
+
@show_progress = show_progress
|
30
|
+
@enqueuer = enqueuer
|
31
|
+
end
|
32
|
+
|
33
|
+
def self.driver
|
34
|
+
@driver
|
35
|
+
end
|
36
|
+
|
37
|
+
def self.distributed_job_client
|
38
|
+
@distributed_job_client
|
39
|
+
end
|
40
|
+
|
41
|
+
def self.show_progress?
|
42
|
+
@show_progress
|
43
|
+
end
|
44
|
+
|
45
|
+
def self.enqueuer
|
46
|
+
@enqueuer
|
47
|
+
end
|
48
|
+
end
|