skyrunner 0.1.3 → 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +1 -0
- data/bin/skyrunner +2 -3
- data/lib/generators/sky_runner/install/templates/skyrunner.rb +2 -6
- data/lib/skyrunner/job.rb +83 -9
- data/lib/skyrunner/version.rb +1 -1
- data/lib/skyrunner.rb +103 -60
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3b4147a678d048c85f857479136563287a39576e
|
4
|
+
data.tar.gz: b1ccac1353150bc7948332135aefd8c90bbefb5b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c4c80e760302f36ccdafc30b7c76b9f86838728ee479f578fd71f8e401285845fcb07a7c4003787f2d0d843959f6a9edf7dcddf7d6decec56aaff462aeee2ed8
|
7
|
+
data.tar.gz: a56cfeae042ce3387ffaa1cb757f06d26639f65b2b9ed4789aefe04bdc02442b65367943ca2625161e4826bac8a5bfac010e38c062bf981aa2bdbb13aa68562d
|
data/Gemfile
CHANGED
data/bin/skyrunner
CHANGED
@@ -38,8 +38,7 @@ end
|
|
38
38
|
|
39
39
|
SkyRunner.dynamo_db_table_name = opts[:dynamo_db_table_name]
|
40
40
|
SkyRunner.sqs_queue_name = opts[:sqs_queue_name]
|
41
|
-
SkyRunner.
|
42
|
-
SkyRunner.num_threads = opts[:num_threads].to_i
|
41
|
+
SkyRunner.consumer_threads = opts[:num_threads].to_i
|
43
42
|
|
44
43
|
COMMANDS = ["init", "purge", "consume", "test"]
|
45
44
|
|
@@ -57,6 +56,6 @@ when "consume"
|
|
57
56
|
when "test"
|
58
57
|
$: << "."
|
59
58
|
require "#{File.dirname(__FILE__)}/../jobs/example_job"
|
60
|
-
ExampleJobModule::ExampleJob.new.execute!(number_of_tasks:
|
59
|
+
ExampleJobModule::ExampleJob.new.execute!(number_of_tasks: 1000)
|
61
60
|
SkyRunner.consume!
|
62
61
|
end
|
@@ -5,17 +5,13 @@ SkyRunner.setup do |config|
|
|
5
5
|
config.dynamo_db_table_name = "skyrunner_jobs_#{Rails.env}"
|
6
6
|
config.sqs_queue_name = "skyrunner_tasks_#{Rails.env}"
|
7
7
|
|
8
|
-
# Set the number of tasks for a consumer to pull and run from SQS at a time. (Max 10, default 10)
|
9
|
-
#
|
10
|
-
# config.consumer_batch_size = 10
|
11
|
-
|
12
8
|
# Set the visibility timeout of queue items. If the consumer batch size (above) is set to 10,
|
13
9
|
# this should provide sufficient time for a consumer to process 10 tasks, for example. (default 90)
|
14
10
|
#
|
15
11
|
# config.visibility_timeout = 90
|
16
12
|
|
17
|
-
# Set the number of concurrent threads
|
13
|
+
# Set the number of concurrent consumer threads when running the consumer.
|
18
14
|
# (If greater than one, you obviously need to make sure your tasks are thread-safe.)
|
19
15
|
#
|
20
|
-
# config.
|
16
|
+
# config.consumer_threads = 10
|
21
17
|
end
|
data/lib/skyrunner/job.rb
CHANGED
@@ -34,13 +34,17 @@ module SkyRunner::Job
|
|
34
34
|
table = SkyRunner.dynamo_db_table
|
35
35
|
queue = SkyRunner.sqs_queue
|
36
36
|
|
37
|
-
record =
|
37
|
+
record = nil
|
38
|
+
|
39
|
+
SkyRunner::retry_dynamo_db do
|
40
|
+
record = table.items.put(id: job_id, task_id: job_id, class: self.class.name, args: args.to_json, total_tasks: 1, completed_tasks: 0, done: 0, failed: 0)
|
41
|
+
end
|
38
42
|
|
39
43
|
pending_args = []
|
40
44
|
|
41
45
|
flush = lambda do
|
42
46
|
messages = pending_args.map do |task_args|
|
43
|
-
{ job_id: job_id, task_id: SecureRandom.hex, task_args: task_args }.to_json
|
47
|
+
{ job_id: job_id, task_id: SecureRandom.hex, task_args: task_args, job_class: self.class.name }.to_json
|
44
48
|
end
|
45
49
|
|
46
50
|
dropped_message_count = 0
|
@@ -57,7 +61,9 @@ module SkyRunner::Job
|
|
57
61
|
end
|
58
62
|
end
|
59
63
|
|
60
|
-
|
64
|
+
SkyRunner::retry_dynamo_db do
|
65
|
+
record.attributes.add({ total_tasks: messages.size - dropped_message_count })
|
66
|
+
end
|
61
67
|
end
|
62
68
|
|
63
69
|
self.run(args) do |*task_args|
|
@@ -94,7 +100,7 @@ module SkyRunner::Job
|
|
94
100
|
private
|
95
101
|
|
96
102
|
def dynamo_db_record
|
97
|
-
SkyRunner.dynamo_db_table.items[self.skyrunner_job_id]
|
103
|
+
SkyRunner.dynamo_db_table.items[self.skyrunner_job_id, self.skyrunner_job_id]
|
98
104
|
end
|
99
105
|
|
100
106
|
def handle_task_failed!
|
@@ -102,7 +108,10 @@ module SkyRunner::Job
|
|
102
108
|
|
103
109
|
begin
|
104
110
|
record = dynamo_db_record
|
105
|
-
|
111
|
+
|
112
|
+
SkyRunner::retry_dynamo_db do
|
113
|
+
record.attributes.add({ failed: 1 })
|
114
|
+
end
|
106
115
|
|
107
116
|
(self.class.job_event_methods[:failed] || []).each do |method|
|
108
117
|
if self.method(method).arity == 0 && self.method(method).parameters.size == 0
|
@@ -111,6 +120,8 @@ module SkyRunner::Job
|
|
111
120
|
self.send(method, JSON.parse(record.attributes["args"]).symbolize_keys)
|
112
121
|
end
|
113
122
|
end
|
123
|
+
|
124
|
+
delete_task_records! rescue nil
|
114
125
|
rescue Exception => e
|
115
126
|
end
|
116
127
|
end
|
@@ -119,15 +130,20 @@ module SkyRunner::Job
|
|
119
130
|
return false unless self.skyrunner_job_id
|
120
131
|
|
121
132
|
record = dynamo_db_record
|
133
|
+
new_attributes = nil
|
122
134
|
|
123
|
-
|
135
|
+
SkyRunner::retry_dynamo_db do
|
136
|
+
new_attributes = record.attributes.add({ completed_tasks: 1 }, return: :all_new)
|
137
|
+
end
|
124
138
|
|
125
139
|
if new_attributes["total_tasks"] == new_attributes["completed_tasks"]
|
126
140
|
begin
|
127
141
|
if_condition = { completed_tasks: new_attributes["total_tasks"], done: 0 }
|
128
142
|
|
129
|
-
|
130
|
-
|
143
|
+
SkyRunner::retry_dynamo_db do
|
144
|
+
record.attributes.update(if: if_condition) do |u|
|
145
|
+
u.add(done: 1)
|
146
|
+
end
|
131
147
|
end
|
132
148
|
|
133
149
|
(self.class.job_event_methods[:completed] || []).each do |method|
|
@@ -137,11 +153,69 @@ module SkyRunner::Job
|
|
137
153
|
self.send(method, JSON.parse(record.attributes["args"]).symbolize_keys)
|
138
154
|
end
|
139
155
|
end
|
156
|
+
|
157
|
+
delete_task_records! rescue nil
|
140
158
|
rescue AWS::DynamoDB::Errors::ConditionalCheckFailedException => e
|
141
|
-
# This is OK, we had a double finisher.
|
159
|
+
# This is OK, we had a double finisher so lets block them.
|
142
160
|
end
|
143
161
|
end
|
144
162
|
|
145
163
|
true
|
146
164
|
end
|
165
|
+
|
166
|
+
def delete_task_records!
|
167
|
+
delete_batch_queue = Queue.new
|
168
|
+
mutex = Mutex.new
|
169
|
+
delete_items_queued = false
|
170
|
+
threads = []
|
171
|
+
|
172
|
+
1.upto([1, (SkyRunner.consumer_threads / 4.0).floor].max) do
|
173
|
+
threads << Thread.new do
|
174
|
+
|
175
|
+
db_table = SkyRunner.dynamo_db_table
|
176
|
+
|
177
|
+
loop do
|
178
|
+
should_break = false
|
179
|
+
|
180
|
+
mutex.synchronize do
|
181
|
+
should_break = (SkyRunner::stop_consuming? || delete_items_queued) && delete_batch_queue.empty?
|
182
|
+
end
|
183
|
+
|
184
|
+
break if should_break
|
185
|
+
|
186
|
+
if delete_batch_queue.size > 0
|
187
|
+
batch = delete_batch_queue.pop
|
188
|
+
|
189
|
+
if batch
|
190
|
+
SkyRunner::retry_dynamo_db do
|
191
|
+
db_table.batch_delete(batch)
|
192
|
+
end
|
193
|
+
end
|
194
|
+
else
|
195
|
+
sleep 1
|
196
|
+
end
|
197
|
+
end
|
198
|
+
end
|
199
|
+
end
|
200
|
+
|
201
|
+
items_to_delete = []
|
202
|
+
table = SkyRunner.dynamo_db_table
|
203
|
+
|
204
|
+
table.items.query(hash_value: "#{self.skyrunner_job_id}-tasks", select: [:id, :task_id]) do |task_item|
|
205
|
+
items_to_delete << [task_item.attributes["id"], task_item.attributes["task_id"]]
|
206
|
+
|
207
|
+
if items_to_delete.size >= 25
|
208
|
+
delete_batch_queue << items_to_delete
|
209
|
+
items_to_delete = []
|
210
|
+
end
|
211
|
+
end
|
212
|
+
|
213
|
+
delete_batch_queue << items_to_delete unless items_to_delete.empty?
|
214
|
+
|
215
|
+
mutex.synchronize do
|
216
|
+
delete_items_queued = true
|
217
|
+
end
|
218
|
+
|
219
|
+
threads.each(&:join)
|
220
|
+
end
|
147
221
|
end
|
data/lib/skyrunner/version.rb
CHANGED
data/lib/skyrunner.rb
CHANGED
@@ -5,6 +5,7 @@ require "active_support/core_ext"
|
|
5
5
|
require "log4r"
|
6
6
|
require "json"
|
7
7
|
require "set"
|
8
|
+
require "retries"
|
8
9
|
|
9
10
|
module SkyRunner
|
10
11
|
require "skyrunner/engine" if defined?(Rails)
|
@@ -33,7 +34,8 @@ module SkyRunner
|
|
33
34
|
table = dynamo_db.tables.create(table_name,
|
34
35
|
SkyRunner.dynamo_db_read_capacity,
|
35
36
|
SkyRunner.dynamo_db_write_capacity,
|
36
|
-
hash_key: {
|
37
|
+
hash_key: { id: :string },
|
38
|
+
range_key: { task_id: :string })
|
37
39
|
|
38
40
|
sleep 1 while table.status == :creating
|
39
41
|
end
|
@@ -61,92 +63,124 @@ module SkyRunner
|
|
61
63
|
end
|
62
64
|
|
63
65
|
def self.consume!(&block)
|
64
|
-
|
65
|
-
table
|
66
|
-
raise "Queue #{SkyRunner::sqs_queue_name} not found. Try running 'skyrunner init'" unless queue
|
67
|
-
raise "DynamoDB table #{SkyRunner::dynamo_db_table_name} not found. Try running 'skyrunner init'" unless table && table.exists?
|
66
|
+
raise "Queue #{SkyRunner::sqs_queue_name} not found. Try running 'skyrunner init'" unless sqs_queue
|
67
|
+
raise "DynamoDB table #{SkyRunner::dynamo_db_table_name} not found. Try running 'skyrunner init'" unless dynamo_db_table && dynamo_db_table.exists?
|
68
68
|
|
69
69
|
local_queue = Queue.new
|
70
|
-
error_queue = Queue.new
|
71
70
|
|
72
71
|
threads = []
|
73
72
|
|
74
|
-
1.upto(SkyRunner::
|
73
|
+
1.upto(SkyRunner::consumer_threads) do
|
75
74
|
threads << Thread.new do
|
75
|
+
table = SkyRunner::dynamo_db_table
|
76
|
+
|
76
77
|
loop do
|
77
|
-
|
78
|
+
begin
|
79
|
+
if local_queue.empty?
|
80
|
+
break if SkyRunner::stop_consuming?
|
81
|
+
|
82
|
+
sleep 1
|
83
|
+
next
|
84
|
+
end
|
78
85
|
|
79
|
-
|
86
|
+
klass, job_id, task_id, task_args, message = local_queue.pop
|
80
87
|
|
81
|
-
|
88
|
+
if klass
|
89
|
+
begin
|
90
|
+
# Avoid running the same task twice, enter record and raise error if exists already.
|
82
91
|
|
83
|
-
|
84
|
-
|
92
|
+
SkyRunner::retry_dynamo_db do
|
93
|
+
table.items.put({ id: "#{job_id}-tasks", task_id: task_id }, unless_exists: ["id", "task_id"])
|
94
|
+
end
|
85
95
|
|
86
|
-
|
87
|
-
job.skyrunner_job_id = job_id
|
96
|
+
SkyRunner::log :info, "Run Task: #{task_args} Job: #{job_id} Message: #{message.id}"
|
88
97
|
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
98
|
+
job = klass.new
|
99
|
+
job.skyrunner_job_id = job_id
|
100
|
+
|
101
|
+
begin
|
102
|
+
job.consume!(task_args)
|
103
|
+
message.delete
|
104
|
+
rescue Exception => e
|
105
|
+
message.delete rescue nil
|
106
|
+
block.call(e) if block_given?
|
107
|
+
SkyRunner::log :error, "Task Failed: #{task_args} Job: #{job_id} #{e.message} #{e.backtrace.join("\n")}"
|
108
|
+
end
|
109
|
+
rescue AWS::DynamoDB::Errors::ConditionalCheckFailedException => e
|
110
|
+
message.delete rescue nil
|
111
|
+
end
|
96
112
|
end
|
113
|
+
rescue Exception => e
|
114
|
+
puts e.message
|
115
|
+
puts e.backtrace.join("\n")
|
116
|
+
raise e
|
97
117
|
end
|
98
118
|
end
|
99
119
|
end
|
100
120
|
end
|
101
121
|
|
102
|
-
|
122
|
+
1.upto((SkyRunner::consumer_threads.to_f / SQS_MAX_BATCH_SIZE).ceil + 1) do
|
123
|
+
threads << Thread.new do
|
124
|
+
begin
|
125
|
+
loop do
|
126
|
+
table = SkyRunner::dynamo_db_table
|
127
|
+
queue = sqs_queue
|
103
128
|
|
104
|
-
|
105
|
-
if error_queue.size > 0
|
106
|
-
SkyRunner::stop_consuming!
|
129
|
+
break if SkyRunner::stop_consuming?
|
107
130
|
|
108
|
-
|
109
|
-
error = error_queue.pop
|
110
|
-
yield error if block_given?
|
111
|
-
end
|
112
|
-
end
|
131
|
+
sleep 1 while local_queue.size >= SkyRunner.consumer_threads
|
113
132
|
|
114
|
-
|
133
|
+
received_messages = []
|
115
134
|
|
116
|
-
|
135
|
+
queue.receive_messages(limit: SQS_MAX_BATCH_SIZE, wait_time_seconds: 5) do |message|
|
136
|
+
received_messages << [message, JSON.parse(message.body)]
|
137
|
+
end
|
117
138
|
|
118
|
-
|
139
|
+
next unless received_messages.size > 0
|
119
140
|
|
120
|
-
|
141
|
+
job_ids = received_messages.map { |m| [m[1]["job_id"], m[1]["job_id"]] }.uniq
|
121
142
|
|
122
|
-
|
123
|
-
received_messages << [message, JSON.parse(message.body)]
|
124
|
-
end
|
143
|
+
job_records = {}
|
125
144
|
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
145
|
+
SkyRunner::retry_dynamo_db do
|
146
|
+
# Read DynamoDB records into job and task lookup tables.
|
147
|
+
table.batch_get(["id", "task_id", "failed"], job_ids.uniq, consistent_read: true) do |record|
|
148
|
+
job_records[record["id"]] = record
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
received_messages.each do |received_message|
|
153
|
+
message, message_data = received_message
|
154
|
+
job_id = message_data["job_id"]
|
155
|
+
task_id = message_data["task_id"]
|
156
|
+
|
157
|
+
job_record = job_records[job_id]
|
158
|
+
|
159
|
+
if job_record && job_record["failed"] == 0
|
160
|
+
begin
|
161
|
+
klass = Kernel.const_get(message_data["job_class"])
|
162
|
+
task_args = message_data["task_args"]
|
163
|
+
local_queue.push([klass, job_id, task_id, task_args, message])
|
164
|
+
rescue NameError => e
|
165
|
+
block.call(e) if block_given?
|
166
|
+
message.delete rescue nil
|
167
|
+
log :error, "Task Failed: No such class #{message_data["job_class"]} #{e.message}"
|
168
|
+
end
|
169
|
+
else
|
170
|
+
message.delete rescue nil
|
171
|
+
end
|
142
172
|
end
|
143
|
-
else
|
144
|
-
message.delete
|
145
173
|
end
|
174
|
+
rescue Exception => e
|
175
|
+
puts e.message
|
176
|
+
puts e.backtrace.join("\n")
|
177
|
+
raise e
|
146
178
|
end
|
147
179
|
end
|
148
180
|
end
|
149
181
|
|
182
|
+
log :info, "Consumer started."
|
183
|
+
|
150
184
|
threads.each(&:join)
|
151
185
|
|
152
186
|
true
|
@@ -188,14 +222,11 @@ module SkyRunner
|
|
188
222
|
mattr_accessor :sqs_message_retention_period
|
189
223
|
@@sqs_message_retention_period = 345600
|
190
224
|
|
191
|
-
mattr_accessor :consumer_batch_size
|
192
|
-
@@consumer_batch_size = 10
|
193
|
-
|
194
225
|
mattr_accessor :logger
|
195
226
|
@@logger = Log4r::Logger.new("skyrunner")
|
196
227
|
|
197
|
-
mattr_accessor :
|
198
|
-
@@
|
228
|
+
mattr_accessor :consumer_threads
|
229
|
+
@@consumer_threads = 10
|
199
230
|
|
200
231
|
mattr_accessor :stop_consuming_flag
|
201
232
|
|
@@ -217,6 +248,18 @@ module SkyRunner
|
|
217
248
|
end
|
218
249
|
end
|
219
250
|
|
251
|
+
def self.retry_dynamo_db(&block)
|
252
|
+
handler = Proc.new do |exception, num, delay|
|
253
|
+
if exception
|
254
|
+
SkyRunner.log :warn, "Having to retry DynamoDB requests. #{exception.message}"
|
255
|
+
end
|
256
|
+
end
|
257
|
+
|
258
|
+
with_retries(handler: handler, max_tries: 100, rescue: AWS::DynamoDB::Errors::ProvisionedThroughputExceededException, base_sleep_seconds: 2, max_sleep_seconds: 60) do
|
259
|
+
block.call
|
260
|
+
end
|
261
|
+
end
|
262
|
+
|
220
263
|
private
|
221
264
|
|
222
265
|
def self.dynamo_db
|