skyrunner 0.1.3 → 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b828d1a9dbc216ff8aca55ce2e8007f07ff1c47d
4
- data.tar.gz: 21ed7eb84af89b74578726cb69faadc981c6fdf6
3
+ metadata.gz: 3b4147a678d048c85f857479136563287a39576e
4
+ data.tar.gz: b1ccac1353150bc7948332135aefd8c90bbefb5b
5
5
  SHA512:
6
- metadata.gz: f78e396093a0dcbe578b1976911b29e4eab5fdf01ecaffff0405ebbc7394b642923c5b3b47da2da6c4419168cbb8b5932646eddc3391a79c319e43b5b6e44e2a
7
- data.tar.gz: 43e38c229e0f32c9cddc66ffa375657f96f1e2b8de18e26cf030c14e96a16d065cd813f895281b8eb7ab4cd95fc0807a778117b8f75a8dc8d1766fa0c3c9e25a
6
+ metadata.gz: c4c80e760302f36ccdafc30b7c76b9f86838728ee479f578fd71f8e401285845fcb07a7c4003787f2d0d843959f6a9edf7dcddf7d6decec56aaff462aeee2ed8
7
+ data.tar.gz: a56cfeae042ce3387ffaa1cb757f06d26639f65b2b9ed4789aefe04bdc02442b65367943ca2625161e4826bac8a5bfac010e38c062bf981aa2bdbb13aa68562d
data/Gemfile CHANGED
@@ -7,3 +7,4 @@ gem "aws-sdk"
7
7
  gem "activesupport"
8
8
  gem "log4r"
9
9
  gem "trollop"
10
+ gem "retries"
data/bin/skyrunner CHANGED
@@ -38,8 +38,7 @@ end
38
38
 
39
39
  SkyRunner.dynamo_db_table_name = opts[:dynamo_db_table_name]
40
40
  SkyRunner.sqs_queue_name = opts[:sqs_queue_name]
41
- SkyRunner.consumer_batch_size = opts[:batch_size].to_i
42
- SkyRunner.num_threads = opts[:num_threads].to_i
41
+ SkyRunner.consumer_threads = opts[:num_threads].to_i
43
42
 
44
43
  COMMANDS = ["init", "purge", "consume", "test"]
45
44
 
@@ -57,6 +56,6 @@ when "consume"
57
56
  when "test"
58
57
  $: << "."
59
58
  require "#{File.dirname(__FILE__)}/../jobs/example_job"
60
- ExampleJobModule::ExampleJob.new.execute!(number_of_tasks: 100)
59
+ ExampleJobModule::ExampleJob.new.execute!(number_of_tasks: 1000)
61
60
  SkyRunner.consume!
62
61
  end
@@ -5,17 +5,13 @@ SkyRunner.setup do |config|
5
5
  config.dynamo_db_table_name = "skyrunner_jobs_#{Rails.env}"
6
6
  config.sqs_queue_name = "skyrunner_tasks_#{Rails.env}"
7
7
 
8
- # Set the number of tasks for a consumer to pull and run from SQS at a time. (Max 10, default 10)
9
- #
10
- # config.consumer_batch_size = 10
11
-
12
8
  # Set the visibility timeout of queue items. If the consumer batch size (above) is set to 10,
13
9
  # this should provide sufficient time for a consumer to process 10 tasks, for example. (default 90)
14
10
  #
15
11
  # config.visibility_timeout = 90
16
12
 
17
- # Set the number of concurrent threads for the consumer process.
13
+ # Set the number of concurrent consumer threads when running the consumer.
18
14
  # (If greater than one, you obviously need to make sure your tasks are thread-safe.)
19
15
  #
20
- # config.num_threads = 10
16
+ # config.consumer_threads = 10
21
17
  end
data/lib/skyrunner/job.rb CHANGED
@@ -34,13 +34,17 @@ module SkyRunner::Job
34
34
  table = SkyRunner.dynamo_db_table
35
35
  queue = SkyRunner.sqs_queue
36
36
 
37
- record = table.items.put(job_id: job_id, class: self.class.name, args: args.to_json, total_tasks: 1, completed_tasks: 0, done: 0, failed: 0)
37
+ record = nil
38
+
39
+ SkyRunner::retry_dynamo_db do
40
+ record = table.items.put(id: job_id, task_id: job_id, class: self.class.name, args: args.to_json, total_tasks: 1, completed_tasks: 0, done: 0, failed: 0)
41
+ end
38
42
 
39
43
  pending_args = []
40
44
 
41
45
  flush = lambda do
42
46
  messages = pending_args.map do |task_args|
43
- { job_id: job_id, task_id: SecureRandom.hex, task_args: task_args }.to_json
47
+ { job_id: job_id, task_id: SecureRandom.hex, task_args: task_args, job_class: self.class.name }.to_json
44
48
  end
45
49
 
46
50
  dropped_message_count = 0
@@ -57,7 +61,9 @@ module SkyRunner::Job
57
61
  end
58
62
  end
59
63
 
60
- record.attributes.add({ total_tasks: messages.size - dropped_message_count })
64
+ SkyRunner::retry_dynamo_db do
65
+ record.attributes.add({ total_tasks: messages.size - dropped_message_count })
66
+ end
61
67
  end
62
68
 
63
69
  self.run(args) do |*task_args|
@@ -94,7 +100,7 @@ module SkyRunner::Job
94
100
  private
95
101
 
96
102
  def dynamo_db_record
97
- SkyRunner.dynamo_db_table.items[self.skyrunner_job_id]
103
+ SkyRunner.dynamo_db_table.items[self.skyrunner_job_id, self.skyrunner_job_id]
98
104
  end
99
105
 
100
106
  def handle_task_failed!
@@ -102,7 +108,10 @@ module SkyRunner::Job
102
108
 
103
109
  begin
104
110
  record = dynamo_db_record
105
- record.attributes.add({ failed: 1 })
111
+
112
+ SkyRunner::retry_dynamo_db do
113
+ record.attributes.add({ failed: 1 })
114
+ end
106
115
 
107
116
  (self.class.job_event_methods[:failed] || []).each do |method|
108
117
  if self.method(method).arity == 0 && self.method(method).parameters.size == 0
@@ -111,6 +120,8 @@ module SkyRunner::Job
111
120
  self.send(method, JSON.parse(record.attributes["args"]).symbolize_keys)
112
121
  end
113
122
  end
123
+
124
+ delete_task_records! rescue nil
114
125
  rescue Exception => e
115
126
  end
116
127
  end
@@ -119,15 +130,20 @@ module SkyRunner::Job
119
130
  return false unless self.skyrunner_job_id
120
131
 
121
132
  record = dynamo_db_record
133
+ new_attributes = nil
122
134
 
123
- new_attributes = record.attributes.add({ completed_tasks: 1 }, return: :all_new)
135
+ SkyRunner::retry_dynamo_db do
136
+ new_attributes = record.attributes.add({ completed_tasks: 1 }, return: :all_new)
137
+ end
124
138
 
125
139
  if new_attributes["total_tasks"] == new_attributes["completed_tasks"]
126
140
  begin
127
141
  if_condition = { completed_tasks: new_attributes["total_tasks"], done: 0 }
128
142
 
129
- record.attributes.update(if: if_condition) do |u|
130
- u.add(done: 1)
143
+ SkyRunner::retry_dynamo_db do
144
+ record.attributes.update(if: if_condition) do |u|
145
+ u.add(done: 1)
146
+ end
131
147
  end
132
148
 
133
149
  (self.class.job_event_methods[:completed] || []).each do |method|
@@ -137,11 +153,69 @@ module SkyRunner::Job
137
153
  self.send(method, JSON.parse(record.attributes["args"]).symbolize_keys)
138
154
  end
139
155
  end
156
+
157
+ delete_task_records! rescue nil
140
158
  rescue AWS::DynamoDB::Errors::ConditionalCheckFailedException => e
141
- # This is OK, we had a double finisher.
159
+ # This is OK, we had a double finisher so lets block them.
142
160
  end
143
161
  end
144
162
 
145
163
  true
146
164
  end
165
+
166
+ def delete_task_records!
167
+ delete_batch_queue = Queue.new
168
+ mutex = Mutex.new
169
+ delete_items_queued = false
170
+ threads = []
171
+
172
+ 1.upto([1, (SkyRunner.consumer_threads / 4.0).floor].max) do
173
+ threads << Thread.new do
174
+
175
+ db_table = SkyRunner.dynamo_db_table
176
+
177
+ loop do
178
+ should_break = false
179
+
180
+ mutex.synchronize do
181
+ should_break = (SkyRunner::stop_consuming? || delete_items_queued) && delete_batch_queue.empty?
182
+ end
183
+
184
+ break if should_break
185
+
186
+ if delete_batch_queue.size > 0
187
+ batch = delete_batch_queue.pop
188
+
189
+ if batch
190
+ SkyRunner::retry_dynamo_db do
191
+ db_table.batch_delete(batch)
192
+ end
193
+ end
194
+ else
195
+ sleep 1
196
+ end
197
+ end
198
+ end
199
+ end
200
+
201
+ items_to_delete = []
202
+ table = SkyRunner.dynamo_db_table
203
+
204
+ table.items.query(hash_value: "#{self.skyrunner_job_id}-tasks", select: [:id, :task_id]) do |task_item|
205
+ items_to_delete << [task_item.attributes["id"], task_item.attributes["task_id"]]
206
+
207
+ if items_to_delete.size >= 25
208
+ delete_batch_queue << items_to_delete
209
+ items_to_delete = []
210
+ end
211
+ end
212
+
213
+ delete_batch_queue << items_to_delete unless items_to_delete.empty?
214
+
215
+ mutex.synchronize do
216
+ delete_items_queued = true
217
+ end
218
+
219
+ threads.each(&:join)
220
+ end
147
221
  end
@@ -1,3 +1,3 @@
1
1
  module Skyrunner
2
- VERSION = "0.1.3"
2
+ VERSION = "0.1.4"
3
3
  end
data/lib/skyrunner.rb CHANGED
@@ -5,6 +5,7 @@ require "active_support/core_ext"
5
5
  require "log4r"
6
6
  require "json"
7
7
  require "set"
8
+ require "retries"
8
9
 
9
10
  module SkyRunner
10
11
  require "skyrunner/engine" if defined?(Rails)
@@ -33,7 +34,8 @@ module SkyRunner
33
34
  table = dynamo_db.tables.create(table_name,
34
35
  SkyRunner.dynamo_db_read_capacity,
35
36
  SkyRunner.dynamo_db_write_capacity,
36
- hash_key: { job_id: :string })
37
+ hash_key: { id: :string },
38
+ range_key: { task_id: :string })
37
39
 
38
40
  sleep 1 while table.status == :creating
39
41
  end
@@ -61,92 +63,124 @@ module SkyRunner
61
63
  end
62
64
 
63
65
  def self.consume!(&block)
64
- queue = sqs_queue
65
- table = dynamo_db_table
66
- raise "Queue #{SkyRunner::sqs_queue_name} not found. Try running 'skyrunner init'" unless queue
67
- raise "DynamoDB table #{SkyRunner::dynamo_db_table_name} not found. Try running 'skyrunner init'" unless table && table.exists?
66
+ raise "Queue #{SkyRunner::sqs_queue_name} not found. Try running 'skyrunner init'" unless sqs_queue
67
+ raise "DynamoDB table #{SkyRunner::dynamo_db_table_name} not found. Try running 'skyrunner init'" unless dynamo_db_table && dynamo_db_table.exists?
68
68
 
69
69
  local_queue = Queue.new
70
- error_queue = Queue.new
71
70
 
72
71
  threads = []
73
72
 
74
- 1.upto(SkyRunner::num_threads) do
73
+ 1.upto(SkyRunner::consumer_threads) do
75
74
  threads << Thread.new do
75
+ table = SkyRunner::dynamo_db_table
76
+
76
77
  loop do
77
- break if SkyRunner::stop_consuming? && local_queue.empty?
78
+ begin
79
+ if local_queue.empty?
80
+ break if SkyRunner::stop_consuming?
81
+
82
+ sleep 1
83
+ next
84
+ end
78
85
 
79
- sleep 1 unless local_queue.size > 0
86
+ klass, job_id, task_id, task_args, message = local_queue.pop
80
87
 
81
- klass, job_id, task_args, message = local_queue.pop
88
+ if klass
89
+ begin
90
+ # Avoid running the same task twice, enter record and raise error if exists already.
82
91
 
83
- if klass
84
- SkyRunner::log :info, "Run Task: #{task_args} Job: #{job_id} Message: #{message.id}"
92
+ SkyRunner::retry_dynamo_db do
93
+ table.items.put({ id: "#{job_id}-tasks", task_id: task_id }, unless_exists: ["id", "task_id"])
94
+ end
85
95
 
86
- job = klass.new
87
- job.skyrunner_job_id = job_id
96
+ SkyRunner::log :info, "Run Task: #{task_args} Job: #{job_id} Message: #{message.id}"
88
97
 
89
- begin
90
- job.consume!(task_args)
91
- message.delete
92
- rescue Exception => e
93
- message.delete rescue nil
94
- error_queue.push(e)
95
- SkyRunner::log :error, "Task Failed: #{task_args} Job: #{job_id} #{e.message} #{e.backtrace.join("\n")}"
98
+ job = klass.new
99
+ job.skyrunner_job_id = job_id
100
+
101
+ begin
102
+ job.consume!(task_args)
103
+ message.delete
104
+ rescue Exception => e
105
+ message.delete rescue nil
106
+ block.call(e) if block_given?
107
+ SkyRunner::log :error, "Task Failed: #{task_args} Job: #{job_id} #{e.message} #{e.backtrace.join("\n")}"
108
+ end
109
+ rescue AWS::DynamoDB::Errors::ConditionalCheckFailedException => e
110
+ message.delete rescue nil
111
+ end
96
112
  end
113
+ rescue Exception => e
114
+ puts e.message
115
+ puts e.backtrace.join("\n")
116
+ raise e
97
117
  end
98
118
  end
99
119
  end
100
120
  end
101
121
 
102
- log :info, "Consumer started."
122
+ 1.upto((SkyRunner::consumer_threads.to_f / SQS_MAX_BATCH_SIZE).ceil + 1) do
123
+ threads << Thread.new do
124
+ begin
125
+ loop do
126
+ table = SkyRunner::dynamo_db_table
127
+ queue = sqs_queue
103
128
 
104
- loop do
105
- if error_queue.size > 0
106
- SkyRunner::stop_consuming!
129
+ break if SkyRunner::stop_consuming?
107
130
 
108
- while error_queue.size > 0
109
- error = error_queue.pop
110
- yield error if block_given?
111
- end
112
- end
131
+ sleep 1 while local_queue.size >= SkyRunner.consumer_threads
113
132
 
114
- return true if stop_consuming?
133
+ received_messages = []
115
134
 
116
- sleep 1 while local_queue.size >= SkyRunner.num_threads
135
+ queue.receive_messages(limit: SQS_MAX_BATCH_SIZE, wait_time_seconds: 5) do |message|
136
+ received_messages << [message, JSON.parse(message.body)]
137
+ end
117
138
 
118
- received_messages = []
139
+ next unless received_messages.size > 0
119
140
 
120
- batch_size = [1, [SkyRunner.consumer_batch_size, SQS_MAX_BATCH_SIZE].min].max
141
+ job_ids = received_messages.map { |m| [m[1]["job_id"], m[1]["job_id"]] }.uniq
121
142
 
122
- queue.receive_messages(limit: batch_size, wait_time_seconds: 5) do |message|
123
- received_messages << [message, JSON.parse(message.body)]
124
- end
143
+ job_records = {}
125
144
 
126
- next unless received_messages.size > 0
127
-
128
- table.batch_get(:all, received_messages.map { |m| m[1]["job_id"] }.uniq, consistent_read: true) do |record|
129
- received_messages.select { |m| m[1]["job_id"] == record["job_id"] }.each do |received_message|
130
- message, message_data = received_message
131
- job_id = message_data["job_id"]
132
-
133
- if record["failed"] == 0 && error_queue.empty?
134
- begin
135
- klass = Kernel.const_get(record["class"])
136
- task_args = message_data["task_args"]
137
- local_queue.push([klass, job_id, task_args, message])
138
- rescue NameError => e
139
- message.delete rescue nil
140
- log :error, "Task Failed: No such class #{record["class"]} #{e.message}"
141
- yield e if block_given?
145
+ SkyRunner::retry_dynamo_db do
146
+ # Read DynamoDB records into job and task lookup tables.
147
+ table.batch_get(["id", "task_id", "failed"], job_ids.uniq, consistent_read: true) do |record|
148
+ job_records[record["id"]] = record
149
+ end
150
+ end
151
+
152
+ received_messages.each do |received_message|
153
+ message, message_data = received_message
154
+ job_id = message_data["job_id"]
155
+ task_id = message_data["task_id"]
156
+
157
+ job_record = job_records[job_id]
158
+
159
+ if job_record && job_record["failed"] == 0
160
+ begin
161
+ klass = Kernel.const_get(message_data["job_class"])
162
+ task_args = message_data["task_args"]
163
+ local_queue.push([klass, job_id, task_id, task_args, message])
164
+ rescue NameError => e
165
+ block.call(e) if block_given?
166
+ message.delete rescue nil
167
+ log :error, "Task Failed: No such class #{message_data["job_class"]} #{e.message}"
168
+ end
169
+ else
170
+ message.delete rescue nil
171
+ end
142
172
  end
143
- else
144
- message.delete
145
173
  end
174
+ rescue Exception => e
175
+ puts e.message
176
+ puts e.backtrace.join("\n")
177
+ raise e
146
178
  end
147
179
  end
148
180
  end
149
181
 
182
+ log :info, "Consumer started."
183
+
150
184
  threads.each(&:join)
151
185
 
152
186
  true
@@ -188,14 +222,11 @@ module SkyRunner
188
222
  mattr_accessor :sqs_message_retention_period
189
223
  @@sqs_message_retention_period = 345600
190
224
 
191
- mattr_accessor :consumer_batch_size
192
- @@consumer_batch_size = 10
193
-
194
225
  mattr_accessor :logger
195
226
  @@logger = Log4r::Logger.new("skyrunner")
196
227
 
197
- mattr_accessor :num_threads
198
- @@num_threads = 10
228
+ mattr_accessor :consumer_threads
229
+ @@consumer_threads = 10
199
230
 
200
231
  mattr_accessor :stop_consuming_flag
201
232
 
@@ -217,6 +248,18 @@ module SkyRunner
217
248
  end
218
249
  end
219
250
 
251
+ def self.retry_dynamo_db(&block)
252
+ handler = Proc.new do |exception, num, delay|
253
+ if exception
254
+ SkyRunner.log :warn, "Having to retry DynamoDB requests. #{exception.message}"
255
+ end
256
+ end
257
+
258
+ with_retries(handler: handler, max_tries: 100, rescue: AWS::DynamoDB::Errors::ProvisionedThroughputExceededException, base_sleep_seconds: 2, max_sleep_seconds: 60) do
259
+ block.call
260
+ end
261
+ end
262
+
220
263
  private
221
264
 
222
265
  def self.dynamo_db
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: skyrunner
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.1.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Greg Fodor