kraps 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/.rubocop.yml +79 -0
- data/CODE_OF_CONDUCT.md +84 -0
- data/Gemfile +8 -0
- data/Gemfile.lock +113 -0
- data/LICENSE.txt +21 -0
- data/README.md +333 -0
- data/Rakefile +6 -0
- data/docker-compose.yml +6 -0
- data/lib/kraps/actions.rb +10 -0
- data/lib/kraps/drivers.rb +37 -0
- data/lib/kraps/frame.rb +3 -0
- data/lib/kraps/interval.rb +34 -0
- data/lib/kraps/job.rb +62 -0
- data/lib/kraps/parallelizer.rb +29 -0
- data/lib/kraps/runner.rb +160 -0
- data/lib/kraps/step.rb +3 -0
- data/lib/kraps/temp_path.rb +29 -0
- data/lib/kraps/temp_paths.rb +34 -0
- data/lib/kraps/timeout_queue.rb +27 -0
- data/lib/kraps/version.rb +3 -0
- data/lib/kraps/worker.rb +183 -0
- data/lib/kraps.rb +48 -0
- metadata +182 -0
@@ -0,0 +1,37 @@
|
|
1
|
+
require "attachie"
|
2
|
+
require "attachie/s3_driver"
|
3
|
+
require "attachie/fake_driver"
|
4
|
+
|
5
|
+
module Kraps
|
6
|
+
module Drivers
|
7
|
+
module Driver
|
8
|
+
def with_prefix(path)
|
9
|
+
File.join(*[@prefix, path].compact)
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
class S3Driver
|
14
|
+
include Driver
|
15
|
+
|
16
|
+
attr_reader :driver, :bucket, :prefix
|
17
|
+
|
18
|
+
def initialize(s3_client:, bucket:, prefix: nil)
|
19
|
+
@driver = Attachie::S3Driver.new(s3_client)
|
20
|
+
@bucket = bucket
|
21
|
+
@prefix = prefix
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
class FakeDriver
|
26
|
+
include Driver
|
27
|
+
|
28
|
+
attr_reader :driver, :bucket, :prefix
|
29
|
+
|
30
|
+
def initialize(bucket:, prefix: nil)
|
31
|
+
@driver = Attachie::FakeDriver.new
|
32
|
+
@bucket = bucket
|
33
|
+
@prefix = prefix
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
data/lib/kraps/frame.rb
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
module Kraps
|
2
|
+
class Interval
|
3
|
+
include MonitorMixin
|
4
|
+
|
5
|
+
def initialize(timeout, &block)
|
6
|
+
super()
|
7
|
+
|
8
|
+
@thread_queue = TimeoutQueue.new
|
9
|
+
@main_queue = TimeoutQueue.new
|
10
|
+
@stopped = false
|
11
|
+
|
12
|
+
@thread = Thread.new do
|
13
|
+
until @stopped
|
14
|
+
item = @thread_queue.deq(timeout: timeout)
|
15
|
+
|
16
|
+
block.call unless @stopped
|
17
|
+
|
18
|
+
@main_queue.enq(1) if item
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def fire(timeout:)
|
24
|
+
@thread_queue.enq(1)
|
25
|
+
@main_queue.deq(timeout: timeout)
|
26
|
+
end
|
27
|
+
|
28
|
+
def stop
|
29
|
+
@stopped = true
|
30
|
+
@thread_queue.enq(nil)
|
31
|
+
@thread.join
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
data/lib/kraps/job.rb
ADDED
@@ -0,0 +1,62 @@
|
|
1
|
+
module Kraps
|
2
|
+
class Job
|
3
|
+
attr_reader :steps
|
4
|
+
|
5
|
+
def initialize(worker:)
|
6
|
+
@worker = worker
|
7
|
+
@steps = []
|
8
|
+
@partitions = 0
|
9
|
+
@partitioner = MapReduce::HashPartitioner.new(@partitions)
|
10
|
+
end
|
11
|
+
|
12
|
+
def parallelize(partitions:, partitioner: MapReduce::HashPartitioner.new(partitions), worker: @worker, &block)
|
13
|
+
fresh.tap do |job|
|
14
|
+
job.instance_eval do
|
15
|
+
@partitions = partitions
|
16
|
+
@partitioner = partitioner
|
17
|
+
|
18
|
+
@steps << Step.new(action: Actions::PARALLELIZE, args: { partitions: @partitions, partitioner: @partitioner, worker: worker }, block: block)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def map(partitions: nil, partitioner: nil, worker: @worker, &block)
|
24
|
+
fresh.tap do |job|
|
25
|
+
job.instance_eval do
|
26
|
+
@partitions = partitions if partitions
|
27
|
+
@partitioner = partitioner || MapReduce::HashPartitioner.new(partitions) if partitioner || partitions
|
28
|
+
|
29
|
+
@steps << Step.new(action: Actions::MAP, args: { partitions: @partitions, partitioner: @partitioner, worker: worker }, block: block)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def reduce(worker: @worker, &block)
|
35
|
+
fresh.tap do |job|
|
36
|
+
job.instance_eval do
|
37
|
+
@steps << Step.new(action: Actions::REDUCE, args: { partitions: @partitions, partitioner: @partitioner, worker: worker }, block: block)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def each_partition(worker: @worker, &block)
|
43
|
+
fresh.tap do |job|
|
44
|
+
job.instance_eval do
|
45
|
+
@steps << Step.new(action: Actions::EACH_PARTITION, args: { partitions: @partitions, partitioner: @partitioner, worker: worker }, block: block)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def repartition(partitions:, partitioner: nil, worker: @worker)
|
51
|
+
map(partitions: partitions, partitioner: partitioner, worker: worker) do |key, value, collector|
|
52
|
+
collector.call(key, value)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def fresh
|
57
|
+
dup.tap do |job|
|
58
|
+
job.instance_variable_set(:@steps, @steps.dup)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module Kraps
|
2
|
+
class Parallelizer
|
3
|
+
def self.each(enum, num_threads)
|
4
|
+
queue = Queue.new
|
5
|
+
|
6
|
+
enum.each { |element| queue.push element }
|
7
|
+
|
8
|
+
stopped = false
|
9
|
+
|
10
|
+
threads = Array.new(num_threads) do
|
11
|
+
Thread.new do
|
12
|
+
yield queue.pop(true) until stopped || queue.empty?
|
13
|
+
rescue ThreadError
|
14
|
+
# Queue empty
|
15
|
+
rescue StandardError => e
|
16
|
+
stopped = true
|
17
|
+
|
18
|
+
e
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
threads.each(&:join).each do |thread|
|
23
|
+
raise thread.value if thread.value.is_a?(Exception)
|
24
|
+
end
|
25
|
+
|
26
|
+
enum
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
data/lib/kraps/runner.rb
ADDED
@@ -0,0 +1,160 @@
|
|
1
|
+
module Kraps
|
2
|
+
class Runner
|
3
|
+
def initialize(klass)
|
4
|
+
@klass = klass
|
5
|
+
end
|
6
|
+
|
7
|
+
def call(*args, **kwargs)
|
8
|
+
Array(@klass.new.call(*args, **kwargs)).tap do |jobs|
|
9
|
+
jobs.each_with_index do |job, job_index|
|
10
|
+
job.steps.each_with_index.inject(nil) do |frame, (_, step_index)|
|
11
|
+
StepRunner.new(
|
12
|
+
klass: @klass,
|
13
|
+
args: args,
|
14
|
+
kwargs: kwargs,
|
15
|
+
jobs: jobs,
|
16
|
+
job_index: job_index,
|
17
|
+
step_index: step_index,
|
18
|
+
frame: frame
|
19
|
+
).call
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
class StepRunner
|
26
|
+
def initialize(klass:, args:, kwargs:, jobs:, job_index:, step_index:, frame:)
|
27
|
+
@klass = klass
|
28
|
+
@args = args
|
29
|
+
@kwargs = kwargs
|
30
|
+
@jobs = jobs
|
31
|
+
@job_index = job_index
|
32
|
+
@job = @jobs[@job_index]
|
33
|
+
@step_index = step_index
|
34
|
+
@step = @job.steps[@step_index]
|
35
|
+
@frame = frame
|
36
|
+
end
|
37
|
+
|
38
|
+
def call
|
39
|
+
raise(InvalidAction, "Invalid action #{@step.action}") unless Actions::ALL.include?(@step.action)
|
40
|
+
|
41
|
+
@step.frame ||= send(:"perform_#{@step.action}")
|
42
|
+
end
|
43
|
+
|
44
|
+
private
|
45
|
+
|
46
|
+
def perform_parallelize
|
47
|
+
enum = Enumerator.new do |yielder|
|
48
|
+
collector = proc { |item| yielder << item }
|
49
|
+
|
50
|
+
@step.block.call(collector)
|
51
|
+
end
|
52
|
+
|
53
|
+
with_distributed_job do |distributed_job|
|
54
|
+
push_and_wait(distributed_job, enum) do |item, part|
|
55
|
+
enqueue(token: distributed_job.token, part: part, item: item)
|
56
|
+
end
|
57
|
+
|
58
|
+
Frame.new(token: distributed_job.token, partitions: @step.args[:partitions])
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def perform_map
|
63
|
+
with_distributed_job do |distributed_job|
|
64
|
+
push_and_wait(distributed_job, 0...@frame.partitions) do |partition, part|
|
65
|
+
enqueue(token: distributed_job.token, part: part, partition: partition)
|
66
|
+
end
|
67
|
+
|
68
|
+
Frame.new(token: distributed_job.token, partitions: @step.args[:partitions])
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
def perform_reduce
|
73
|
+
with_distributed_job do |distributed_job|
|
74
|
+
push_and_wait(distributed_job, 0...@frame.partitions) do |partition, part|
|
75
|
+
enqueue(token: distributed_job.token, part: part, partition: partition)
|
76
|
+
end
|
77
|
+
|
78
|
+
Frame.new(token: distributed_job.token, partitions: @step.args[:partitions])
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
def perform_each_partition
|
83
|
+
with_distributed_job do |distributed_job|
|
84
|
+
push_and_wait(distributed_job, 0...@frame.partitions) do |partition, part|
|
85
|
+
enqueue(token: distributed_job.token, part: part, partition: partition)
|
86
|
+
end
|
87
|
+
|
88
|
+
@frame
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
def enqueue(token:, part:, **rest)
|
93
|
+
Kraps.enqueuer.call(
|
94
|
+
@step.args[:worker],
|
95
|
+
JSON.generate(
|
96
|
+
job_index: @job_index,
|
97
|
+
step_index: @step_index,
|
98
|
+
frame: @frame.to_h,
|
99
|
+
token: token,
|
100
|
+
part: part,
|
101
|
+
klass: @klass,
|
102
|
+
args: @args,
|
103
|
+
kwargs: @kwargs,
|
104
|
+
**rest
|
105
|
+
)
|
106
|
+
)
|
107
|
+
end
|
108
|
+
|
109
|
+
def with_distributed_job
|
110
|
+
distributed_job = Kraps.distributed_job_client.build(token: SecureRandom.hex)
|
111
|
+
|
112
|
+
yield(distributed_job)
|
113
|
+
rescue Interrupt
|
114
|
+
distributed_job&.stop
|
115
|
+
raise
|
116
|
+
end
|
117
|
+
|
118
|
+
def push_and_wait(distributed_job, enum)
|
119
|
+
progress_bar = build_progress_bar("#{@klass}: job #{@job_index + 1}/#{@jobs.size}, step #{@step_index + 1}/#{@job.steps.size}, token #{distributed_job.token}, %a, %c/%C (%p%) => #{@step.action}")
|
120
|
+
|
121
|
+
begin
|
122
|
+
total = 0
|
123
|
+
|
124
|
+
interval = Interval.new(1) do
|
125
|
+
progress_bar.total = total
|
126
|
+
end
|
127
|
+
|
128
|
+
distributed_job.push_each(enum) do |item, part|
|
129
|
+
total += 1
|
130
|
+
interval.fire(timeout: 1)
|
131
|
+
|
132
|
+
yield(item, part)
|
133
|
+
end
|
134
|
+
ensure
|
135
|
+
interval&.stop
|
136
|
+
end
|
137
|
+
|
138
|
+
loop do
|
139
|
+
progress_bar.total = distributed_job.total
|
140
|
+
progress_bar.progress = progress_bar.total - distributed_job.count
|
141
|
+
|
142
|
+
break if distributed_job.finished? || distributed_job.stopped?
|
143
|
+
|
144
|
+
sleep(1)
|
145
|
+
end
|
146
|
+
|
147
|
+
raise(JobStopped, "The job was stopped") if distributed_job.stopped?
|
148
|
+
ensure
|
149
|
+
progress_bar&.stop
|
150
|
+
end
|
151
|
+
|
152
|
+
def build_progress_bar(format)
|
153
|
+
options = { format: format, total: 1, autofinish: false }
|
154
|
+
options[:output] = ProgressBar::Outputs::Null unless Kraps.show_progress?
|
155
|
+
|
156
|
+
ProgressBar.create(options)
|
157
|
+
end
|
158
|
+
end
|
159
|
+
end
|
160
|
+
end
|
data/lib/kraps/step.rb
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
module Kraps
|
2
|
+
class TempPath
|
3
|
+
attr_reader :path
|
4
|
+
|
5
|
+
def initialize(prefix: nil, suffix: nil)
|
6
|
+
@path = File.join(Dir.tmpdir, [prefix, SecureRandom.hex[0, 16], Process.pid, suffix].compact.join("."))
|
7
|
+
|
8
|
+
File.open(@path, File::CREAT | File::EXCL) {}
|
9
|
+
|
10
|
+
ObjectSpace.define_finalizer(self, self.class.finalize(@path))
|
11
|
+
|
12
|
+
return unless block_given?
|
13
|
+
|
14
|
+
begin
|
15
|
+
yield
|
16
|
+
ensure
|
17
|
+
unlink
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def unlink
|
22
|
+
FileUtils.rm_f(@path)
|
23
|
+
end
|
24
|
+
|
25
|
+
def self.finalize(path)
|
26
|
+
proc { FileUtils.rm_f(path) }
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
module Kraps
|
2
|
+
class TempPaths
|
3
|
+
include MonitorMixin
|
4
|
+
include Enumerable
|
5
|
+
|
6
|
+
def initialize
|
7
|
+
super
|
8
|
+
|
9
|
+
@temp_paths = []
|
10
|
+
end
|
11
|
+
|
12
|
+
def add
|
13
|
+
synchronize do
|
14
|
+
temp_path = TempPath.new
|
15
|
+
@temp_paths << temp_path
|
16
|
+
temp_path
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def unlink
|
21
|
+
synchronize do
|
22
|
+
@temp_paths.each(&:unlink)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def each(&block)
|
27
|
+
return enum_for(__method__) unless block_given?
|
28
|
+
|
29
|
+
synchronize do
|
30
|
+
@temp_paths.each(&block)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module Kraps
|
2
|
+
class TimeoutQueue
|
3
|
+
include MonitorMixin
|
4
|
+
|
5
|
+
def initialize
|
6
|
+
super
|
7
|
+
|
8
|
+
@cond = new_cond
|
9
|
+
@queue = []
|
10
|
+
end
|
11
|
+
|
12
|
+
def enq(item)
|
13
|
+
synchronize do
|
14
|
+
@queue << item
|
15
|
+
@cond.signal
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def deq(timeout:)
|
20
|
+
synchronize do
|
21
|
+
@cond.wait(timeout) if @queue.empty?
|
22
|
+
|
23
|
+
return @queue.empty? ? nil : @queue.shift
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
data/lib/kraps/worker.rb
ADDED
@@ -0,0 +1,183 @@
|
|
1
|
+
module Kraps
|
2
|
+
class Worker
|
3
|
+
def initialize(json, memory_limit:, chunk_limit:, concurrency:)
|
4
|
+
@args = JSON.parse(json)
|
5
|
+
@memory_limit = memory_limit
|
6
|
+
@chunk_limit = chunk_limit
|
7
|
+
@concurrency = concurrency
|
8
|
+
end
|
9
|
+
|
10
|
+
def call(retries: 3)
|
11
|
+
return if distributed_job.stopped?
|
12
|
+
|
13
|
+
raise(InvalidAction, "Invalid action #{step.action}") unless Actions::ALL.include?(step.action)
|
14
|
+
|
15
|
+
with_retries(retries) do # TODO: allow to use queue based retries
|
16
|
+
send(:"perform_#{step.action}")
|
17
|
+
|
18
|
+
distributed_job.done(@args["part"])
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
private
|
23
|
+
|
24
|
+
def perform_parallelize
|
25
|
+
implementation = Class.new do
|
26
|
+
def map(key)
|
27
|
+
yield(key, nil)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
mapper = MapReduce::Mapper.new(implementation.new, partitioner: partitioner, memory_limit: @memory_limit)
|
32
|
+
mapper.map(@args["item"])
|
33
|
+
|
34
|
+
mapper.shuffle do |partition, tempfile| # TODO: upload in parallel
|
35
|
+
Kraps.driver.driver.store(Kraps.driver.with_prefix("#{@args["token"]}/#{partition}/chunk.#{@args["part"]}.json"), tempfile, Kraps.driver.bucket)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def perform_map
|
40
|
+
temp_paths = TempPaths.new
|
41
|
+
|
42
|
+
files = Kraps.driver.driver.list(Kraps.driver.bucket, prefix: Kraps.driver.with_prefix("#{@args["frame"]["token"]}/#{@args["partition"]}/")).sort
|
43
|
+
|
44
|
+
temp_paths_index = files.each_with_object({}) do |file, hash|
|
45
|
+
hash[file] = temp_paths.add
|
46
|
+
end
|
47
|
+
|
48
|
+
Parallelizer.each(files, @concurrency) do |file|
|
49
|
+
Kraps.driver.driver.download(file, Kraps.driver.bucket, temp_paths_index[file].path)
|
50
|
+
end
|
51
|
+
|
52
|
+
current_step = step
|
53
|
+
|
54
|
+
implementation = Object.new
|
55
|
+
implementation.define_singleton_method(:map) do |key, value, &block|
|
56
|
+
current_step.block.call(key, value, block)
|
57
|
+
end
|
58
|
+
|
59
|
+
mapper = MapReduce::Mapper.new(implementation, partitioner: partitioner, memory_limit: @memory_limit)
|
60
|
+
|
61
|
+
temp_paths.each do |temp_path|
|
62
|
+
File.open(temp_path.path) do |stream|
|
63
|
+
stream.each_line do |line|
|
64
|
+
key, value = JSON.parse(line)
|
65
|
+
|
66
|
+
mapper.map(key, value)
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
mapper.shuffle do |partition, tempfile| # TODO: upload in parallel
|
72
|
+
Kraps.driver.driver.store(
|
73
|
+
Kraps.driver.with_prefix("#{@args["token"]}/#{partition}/chunk.#{@args["part"]}.json"), tempfile, Kraps.driver.bucket
|
74
|
+
)
|
75
|
+
end
|
76
|
+
ensure
|
77
|
+
temp_paths&.unlink
|
78
|
+
end
|
79
|
+
|
80
|
+
def perform_reduce
|
81
|
+
current_step = step
|
82
|
+
|
83
|
+
implementation = Object.new
|
84
|
+
implementation.define_singleton_method(:reduce) do |key, value1, value2|
|
85
|
+
current_step.block.call(key, value1, value2)
|
86
|
+
end
|
87
|
+
|
88
|
+
reducer = MapReduce::Reducer.new(implementation)
|
89
|
+
|
90
|
+
Parallelizer.each(Kraps.driver.driver.list(Kraps.driver.bucket, prefix: Kraps.driver.with_prefix("#{@args["frame"]["token"]}/#{@args["partition"]}/")), @concurrency) do |file|
|
91
|
+
Kraps.driver.driver.download(file, Kraps.driver.bucket, reducer.add_chunk)
|
92
|
+
end
|
93
|
+
|
94
|
+
tempfile = Tempfile.new
|
95
|
+
|
96
|
+
reducer.reduce(chunk_limit: @chunk_limit) do |key, value|
|
97
|
+
tempfile.puts(JSON.generate([key, value]))
|
98
|
+
end
|
99
|
+
|
100
|
+
Kraps.driver.driver.store(Kraps.driver.with_prefix("#{@args["token"]}/#{@args["partition"]}/chunk.#{@args["part"]}.json"), tempfile.tap(&:rewind), Kraps.driver.bucket)
|
101
|
+
ensure
|
102
|
+
tempfile&.close(true)
|
103
|
+
end
|
104
|
+
|
105
|
+
def perform_each_partition
|
106
|
+
temp_paths = TempPaths.new
|
107
|
+
|
108
|
+
files = Kraps.driver.driver.list(Kraps.driver.bucket, prefix: Kraps.driver.with_prefix("#{@args["frame"]["token"]}/#{@args["partition"]}/")).sort
|
109
|
+
|
110
|
+
temp_paths_index = files.each_with_object({}) do |file, hash|
|
111
|
+
hash[file] = temp_paths.add
|
112
|
+
end
|
113
|
+
|
114
|
+
Parallelizer.each(files, @concurrency) do |file|
|
115
|
+
Kraps.driver.driver.download(file, Kraps.driver.bucket, temp_paths_index[file].path)
|
116
|
+
end
|
117
|
+
|
118
|
+
enum = Enumerator::Lazy.new(temp_paths) do |yielder, temp_path|
|
119
|
+
File.open(temp_path.path) do |stream|
|
120
|
+
stream.each_line do |line|
|
121
|
+
yielder << JSON.parse(line)
|
122
|
+
end
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
step.block.call(@args["partition"], enum)
|
127
|
+
ensure
|
128
|
+
temp_paths&.unlink
|
129
|
+
end
|
130
|
+
|
131
|
+
def with_retries(num_retries)
|
132
|
+
retries = 0
|
133
|
+
|
134
|
+
begin
|
135
|
+
yield
|
136
|
+
rescue Kraps::Error
|
137
|
+
distributed_job.stop
|
138
|
+
rescue StandardError
|
139
|
+
sleep(5)
|
140
|
+
retries += 1
|
141
|
+
|
142
|
+
if retries >= num_retries
|
143
|
+
distributed_job.stop
|
144
|
+
raise
|
145
|
+
end
|
146
|
+
|
147
|
+
retry
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
def jobs
|
152
|
+
@jobs ||= Array(@args["klass"].constantize.new.call(*@args["args"], **@args["kwargs"].transform_keys(&:to_sym)))
|
153
|
+
end
|
154
|
+
|
155
|
+
def job
|
156
|
+
@job ||= begin
|
157
|
+
job_index = @args["job_index"]
|
158
|
+
|
159
|
+
jobs[job_index] || raise(InvalidJob, "Can't find job #{job_index}")
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
def steps
|
164
|
+
@steps ||= job.steps
|
165
|
+
end
|
166
|
+
|
167
|
+
def step
|
168
|
+
@step ||= begin
|
169
|
+
step_index = @args["step_index"]
|
170
|
+
|
171
|
+
steps[step_index] || raise(InvalidStep, "Can't find step #{step_index}")
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
175
|
+
def partitioner
|
176
|
+
@partitioner ||= step.args[:partitioner]
|
177
|
+
end
|
178
|
+
|
179
|
+
def distributed_job
|
180
|
+
@distributed_job ||= Kraps.distributed_job_client.build(token: @args["token"])
|
181
|
+
end
|
182
|
+
end
|
183
|
+
end
|
data/lib/kraps.rb
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
require_relative "kraps/version"
|
2
|
+
require_relative "kraps/drivers"
|
3
|
+
require_relative "kraps/actions"
|
4
|
+
require_relative "kraps/parallelizer"
|
5
|
+
require_relative "kraps/temp_path"
|
6
|
+
require_relative "kraps/temp_paths"
|
7
|
+
require_relative "kraps/timeout_queue"
|
8
|
+
require_relative "kraps/interval"
|
9
|
+
require_relative "kraps/job"
|
10
|
+
require_relative "kraps/runner"
|
11
|
+
require_relative "kraps/step"
|
12
|
+
require_relative "kraps/frame"
|
13
|
+
require_relative "kraps/worker"
|
14
|
+
require "distributed_job"
|
15
|
+
require "ruby-progressbar"
|
16
|
+
require "ruby-progressbar/outputs/null"
|
17
|
+
require "map_reduce"
|
18
|
+
require "redis"
|
19
|
+
|
20
|
+
module Kraps
|
21
|
+
class Error < StandardError; end
|
22
|
+
class InvalidAction < Error; end
|
23
|
+
class InvalidStep < Error; end
|
24
|
+
class JobStopped < Error; end
|
25
|
+
|
26
|
+
def self.configure(driver:, redis: Redis.new, namespace: nil, job_ttl: 24 * 60 * 60, show_progress: true, enqueuer: ->(worker, json) { worker.perform_async(json) })
|
27
|
+
@driver = driver
|
28
|
+
@distributed_job_client = DistributedJob::Client.new(redis: redis, namespace: namespace, default_ttl: job_ttl)
|
29
|
+
@show_progress = show_progress
|
30
|
+
@enqueuer = enqueuer
|
31
|
+
end
|
32
|
+
|
33
|
+
def self.driver
|
34
|
+
@driver
|
35
|
+
end
|
36
|
+
|
37
|
+
def self.distributed_job_client
|
38
|
+
@distributed_job_client
|
39
|
+
end
|
40
|
+
|
41
|
+
def self.show_progress?
|
42
|
+
@show_progress
|
43
|
+
end
|
44
|
+
|
45
|
+
def self.enqueuer
|
46
|
+
@enqueuer
|
47
|
+
end
|
48
|
+
end
|