skyrunner 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b828d1a9dbc216ff8aca55ce2e8007f07ff1c47d
4
- data.tar.gz: 21ed7eb84af89b74578726cb69faadc981c6fdf6
3
+ metadata.gz: 3b4147a678d048c85f857479136563287a39576e
4
+ data.tar.gz: b1ccac1353150bc7948332135aefd8c90bbefb5b
5
5
  SHA512:
6
- metadata.gz: f78e396093a0dcbe578b1976911b29e4eab5fdf01ecaffff0405ebbc7394b642923c5b3b47da2da6c4419168cbb8b5932646eddc3391a79c319e43b5b6e44e2a
7
- data.tar.gz: 43e38c229e0f32c9cddc66ffa375657f96f1e2b8de18e26cf030c14e96a16d065cd813f895281b8eb7ab4cd95fc0807a778117b8f75a8dc8d1766fa0c3c9e25a
6
+ metadata.gz: c4c80e760302f36ccdafc30b7c76b9f86838728ee479f578fd71f8e401285845fcb07a7c4003787f2d0d843959f6a9edf7dcddf7d6decec56aaff462aeee2ed8
7
+ data.tar.gz: a56cfeae042ce3387ffaa1cb757f06d26639f65b2b9ed4789aefe04bdc02442b65367943ca2625161e4826bac8a5bfac010e38c062bf981aa2bdbb13aa68562d
data/Gemfile CHANGED
@@ -7,3 +7,4 @@ gem "aws-sdk"
7
7
  gem "activesupport"
8
8
  gem "log4r"
9
9
  gem "trollop"
10
+ gem "retries"
data/bin/skyrunner CHANGED
@@ -38,8 +38,7 @@ end
38
38
 
39
39
  SkyRunner.dynamo_db_table_name = opts[:dynamo_db_table_name]
40
40
  SkyRunner.sqs_queue_name = opts[:sqs_queue_name]
41
- SkyRunner.consumer_batch_size = opts[:batch_size].to_i
42
- SkyRunner.num_threads = opts[:num_threads].to_i
41
+ SkyRunner.consumer_threads = opts[:num_threads].to_i
43
42
 
44
43
  COMMANDS = ["init", "purge", "consume", "test"]
45
44
 
@@ -57,6 +56,6 @@ when "consume"
57
56
  when "test"
58
57
  $: << "."
59
58
  require "#{File.dirname(__FILE__)}/../jobs/example_job"
60
- ExampleJobModule::ExampleJob.new.execute!(number_of_tasks: 100)
59
+ ExampleJobModule::ExampleJob.new.execute!(number_of_tasks: 1000)
61
60
  SkyRunner.consume!
62
61
  end
@@ -5,17 +5,13 @@ SkyRunner.setup do |config|
5
5
  config.dynamo_db_table_name = "skyrunner_jobs_#{Rails.env}"
6
6
  config.sqs_queue_name = "skyrunner_tasks_#{Rails.env}"
7
7
 
8
- # Set the number of tasks for a consumer to pull and run from SQS at a time. (Max 10, default 10)
9
- #
10
- # config.consumer_batch_size = 10
11
-
12
8
  # Set the visibility timeout of queue items. If the consumer batch size (above) is set to 10,
13
9
  # this should provide sufficient time for a consumer to process 10 tasks, for example. (default 90)
14
10
  #
15
11
  # config.visibility_timeout = 90
16
12
 
17
- # Set the number of concurrent threads for the consumer process.
13
+ # Set the number of concurrent consumer threads when running the consumer.
18
14
  # (If greater than one, you obviously need to make sure your tasks are thread-safe.)
19
15
  #
20
- # config.num_threads = 10
16
+ # config.consumer_threads = 10
21
17
  end
data/lib/skyrunner/job.rb CHANGED
@@ -34,13 +34,17 @@ module SkyRunner::Job
34
34
  table = SkyRunner.dynamo_db_table
35
35
  queue = SkyRunner.sqs_queue
36
36
 
37
- record = table.items.put(job_id: job_id, class: self.class.name, args: args.to_json, total_tasks: 1, completed_tasks: 0, done: 0, failed: 0)
37
+ record = nil
38
+
39
+ SkyRunner::retry_dynamo_db do
40
+ record = table.items.put(id: job_id, task_id: job_id, class: self.class.name, args: args.to_json, total_tasks: 1, completed_tasks: 0, done: 0, failed: 0)
41
+ end
38
42
 
39
43
  pending_args = []
40
44
 
41
45
  flush = lambda do
42
46
  messages = pending_args.map do |task_args|
43
- { job_id: job_id, task_id: SecureRandom.hex, task_args: task_args }.to_json
47
+ { job_id: job_id, task_id: SecureRandom.hex, task_args: task_args, job_class: self.class.name }.to_json
44
48
  end
45
49
 
46
50
  dropped_message_count = 0
@@ -57,7 +61,9 @@ module SkyRunner::Job
57
61
  end
58
62
  end
59
63
 
60
- record.attributes.add({ total_tasks: messages.size - dropped_message_count })
64
+ SkyRunner::retry_dynamo_db do
65
+ record.attributes.add({ total_tasks: messages.size - dropped_message_count })
66
+ end
61
67
  end
62
68
 
63
69
  self.run(args) do |*task_args|
@@ -94,7 +100,7 @@ module SkyRunner::Job
94
100
  private
95
101
 
96
102
  def dynamo_db_record
97
- SkyRunner.dynamo_db_table.items[self.skyrunner_job_id]
103
+ SkyRunner.dynamo_db_table.items[self.skyrunner_job_id, self.skyrunner_job_id]
98
104
  end
99
105
 
100
106
  def handle_task_failed!
@@ -102,7 +108,10 @@ module SkyRunner::Job
102
108
 
103
109
  begin
104
110
  record = dynamo_db_record
105
- record.attributes.add({ failed: 1 })
111
+
112
+ SkyRunner::retry_dynamo_db do
113
+ record.attributes.add({ failed: 1 })
114
+ end
106
115
 
107
116
  (self.class.job_event_methods[:failed] || []).each do |method|
108
117
  if self.method(method).arity == 0 && self.method(method).parameters.size == 0
@@ -111,6 +120,8 @@ module SkyRunner::Job
111
120
  self.send(method, JSON.parse(record.attributes["args"]).symbolize_keys)
112
121
  end
113
122
  end
123
+
124
+ delete_task_records! rescue nil
114
125
  rescue Exception => e
115
126
  end
116
127
  end
@@ -119,15 +130,20 @@ module SkyRunner::Job
119
130
  return false unless self.skyrunner_job_id
120
131
 
121
132
  record = dynamo_db_record
133
+ new_attributes = nil
122
134
 
123
- new_attributes = record.attributes.add({ completed_tasks: 1 }, return: :all_new)
135
+ SkyRunner::retry_dynamo_db do
136
+ new_attributes = record.attributes.add({ completed_tasks: 1 }, return: :all_new)
137
+ end
124
138
 
125
139
  if new_attributes["total_tasks"] == new_attributes["completed_tasks"]
126
140
  begin
127
141
  if_condition = { completed_tasks: new_attributes["total_tasks"], done: 0 }
128
142
 
129
- record.attributes.update(if: if_condition) do |u|
130
- u.add(done: 1)
143
+ SkyRunner::retry_dynamo_db do
144
+ record.attributes.update(if: if_condition) do |u|
145
+ u.add(done: 1)
146
+ end
131
147
  end
132
148
 
133
149
  (self.class.job_event_methods[:completed] || []).each do |method|
@@ -137,11 +153,69 @@ module SkyRunner::Job
137
153
  self.send(method, JSON.parse(record.attributes["args"]).symbolize_keys)
138
154
  end
139
155
  end
156
+
157
+ delete_task_records! rescue nil
140
158
  rescue AWS::DynamoDB::Errors::ConditionalCheckFailedException => e
141
- # This is OK, we had a double finisher.
159
+ # This is OK, we had a double finisher so lets block them.
142
160
  end
143
161
  end
144
162
 
145
163
  true
146
164
  end
165
+
166
+ def delete_task_records!
167
+ delete_batch_queue = Queue.new
168
+ mutex = Mutex.new
169
+ delete_items_queued = false
170
+ threads = []
171
+
172
+ 1.upto([1, (SkyRunner.consumer_threads / 4.0).floor].max) do
173
+ threads << Thread.new do
174
+
175
+ db_table = SkyRunner.dynamo_db_table
176
+
177
+ loop do
178
+ should_break = false
179
+
180
+ mutex.synchronize do
181
+ should_break = (SkyRunner::stop_consuming? || delete_items_queued) && delete_batch_queue.empty?
182
+ end
183
+
184
+ break if should_break
185
+
186
+ if delete_batch_queue.size > 0
187
+ batch = delete_batch_queue.pop
188
+
189
+ if batch
190
+ SkyRunner::retry_dynamo_db do
191
+ db_table.batch_delete(batch)
192
+ end
193
+ end
194
+ else
195
+ sleep 1
196
+ end
197
+ end
198
+ end
199
+ end
200
+
201
+ items_to_delete = []
202
+ table = SkyRunner.dynamo_db_table
203
+
204
+ table.items.query(hash_value: "#{self.skyrunner_job_id}-tasks", select: [:id, :task_id]) do |task_item|
205
+ items_to_delete << [task_item.attributes["id"], task_item.attributes["task_id"]]
206
+
207
+ if items_to_delete.size >= 25
208
+ delete_batch_queue << items_to_delete
209
+ items_to_delete = []
210
+ end
211
+ end
212
+
213
+ delete_batch_queue << items_to_delete unless items_to_delete.empty?
214
+
215
+ mutex.synchronize do
216
+ delete_items_queued = true
217
+ end
218
+
219
+ threads.each(&:join)
220
+ end
147
221
  end
@@ -1,3 +1,3 @@
1
1
  module Skyrunner
2
- VERSION = "0.1.3"
2
+ VERSION = "0.1.4"
3
3
  end
data/lib/skyrunner.rb CHANGED
@@ -5,6 +5,7 @@ require "active_support/core_ext"
5
5
  require "log4r"
6
6
  require "json"
7
7
  require "set"
8
+ require "retries"
8
9
 
9
10
  module SkyRunner
10
11
  require "skyrunner/engine" if defined?(Rails)
@@ -33,7 +34,8 @@ module SkyRunner
33
34
  table = dynamo_db.tables.create(table_name,
34
35
  SkyRunner.dynamo_db_read_capacity,
35
36
  SkyRunner.dynamo_db_write_capacity,
36
- hash_key: { job_id: :string })
37
+ hash_key: { id: :string },
38
+ range_key: { task_id: :string })
37
39
 
38
40
  sleep 1 while table.status == :creating
39
41
  end
@@ -61,92 +63,124 @@ module SkyRunner
61
63
  end
62
64
 
63
65
  def self.consume!(&block)
64
- queue = sqs_queue
65
- table = dynamo_db_table
66
- raise "Queue #{SkyRunner::sqs_queue_name} not found. Try running 'skyrunner init'" unless queue
67
- raise "DynamoDB table #{SkyRunner::dynamo_db_table_name} not found. Try running 'skyrunner init'" unless table && table.exists?
66
+ raise "Queue #{SkyRunner::sqs_queue_name} not found. Try running 'skyrunner init'" unless sqs_queue
67
+ raise "DynamoDB table #{SkyRunner::dynamo_db_table_name} not found. Try running 'skyrunner init'" unless dynamo_db_table && dynamo_db_table.exists?
68
68
 
69
69
  local_queue = Queue.new
70
- error_queue = Queue.new
71
70
 
72
71
  threads = []
73
72
 
74
- 1.upto(SkyRunner::num_threads) do
73
+ 1.upto(SkyRunner::consumer_threads) do
75
74
  threads << Thread.new do
75
+ table = SkyRunner::dynamo_db_table
76
+
76
77
  loop do
77
- break if SkyRunner::stop_consuming? && local_queue.empty?
78
+ begin
79
+ if local_queue.empty?
80
+ break if SkyRunner::stop_consuming?
81
+
82
+ sleep 1
83
+ next
84
+ end
78
85
 
79
- sleep 1 unless local_queue.size > 0
86
+ klass, job_id, task_id, task_args, message = local_queue.pop
80
87
 
81
- klass, job_id, task_args, message = local_queue.pop
88
+ if klass
89
+ begin
90
+ # Avoid running the same task twice, enter record and raise error if exists already.
82
91
 
83
- if klass
84
- SkyRunner::log :info, "Run Task: #{task_args} Job: #{job_id} Message: #{message.id}"
92
+ SkyRunner::retry_dynamo_db do
93
+ table.items.put({ id: "#{job_id}-tasks", task_id: task_id }, unless_exists: ["id", "task_id"])
94
+ end
85
95
 
86
- job = klass.new
87
- job.skyrunner_job_id = job_id
96
+ SkyRunner::log :info, "Run Task: #{task_args} Job: #{job_id} Message: #{message.id}"
88
97
 
89
- begin
90
- job.consume!(task_args)
91
- message.delete
92
- rescue Exception => e
93
- message.delete rescue nil
94
- error_queue.push(e)
95
- SkyRunner::log :error, "Task Failed: #{task_args} Job: #{job_id} #{e.message} #{e.backtrace.join("\n")}"
98
+ job = klass.new
99
+ job.skyrunner_job_id = job_id
100
+
101
+ begin
102
+ job.consume!(task_args)
103
+ message.delete
104
+ rescue Exception => e
105
+ message.delete rescue nil
106
+ block.call(e) if block_given?
107
+ SkyRunner::log :error, "Task Failed: #{task_args} Job: #{job_id} #{e.message} #{e.backtrace.join("\n")}"
108
+ end
109
+ rescue AWS::DynamoDB::Errors::ConditionalCheckFailedException => e
110
+ message.delete rescue nil
111
+ end
96
112
  end
113
+ rescue Exception => e
114
+ puts e.message
115
+ puts e.backtrace.join("\n")
116
+ raise e
97
117
  end
98
118
  end
99
119
  end
100
120
  end
101
121
 
102
- log :info, "Consumer started."
122
+ 1.upto((SkyRunner::consumer_threads.to_f / SQS_MAX_BATCH_SIZE).ceil + 1) do
123
+ threads << Thread.new do
124
+ begin
125
+ loop do
126
+ table = SkyRunner::dynamo_db_table
127
+ queue = sqs_queue
103
128
 
104
- loop do
105
- if error_queue.size > 0
106
- SkyRunner::stop_consuming!
129
+ break if SkyRunner::stop_consuming?
107
130
 
108
- while error_queue.size > 0
109
- error = error_queue.pop
110
- yield error if block_given?
111
- end
112
- end
131
+ sleep 1 while local_queue.size >= SkyRunner.consumer_threads
113
132
 
114
- return true if stop_consuming?
133
+ received_messages = []
115
134
 
116
- sleep 1 while local_queue.size >= SkyRunner.num_threads
135
+ queue.receive_messages(limit: SQS_MAX_BATCH_SIZE, wait_time_seconds: 5) do |message|
136
+ received_messages << [message, JSON.parse(message.body)]
137
+ end
117
138
 
118
- received_messages = []
139
+ next unless received_messages.size > 0
119
140
 
120
- batch_size = [1, [SkyRunner.consumer_batch_size, SQS_MAX_BATCH_SIZE].min].max
141
+ job_ids = received_messages.map { |m| [m[1]["job_id"], m[1]["job_id"]] }.uniq
121
142
 
122
- queue.receive_messages(limit: batch_size, wait_time_seconds: 5) do |message|
123
- received_messages << [message, JSON.parse(message.body)]
124
- end
143
+ job_records = {}
125
144
 
126
- next unless received_messages.size > 0
127
-
128
- table.batch_get(:all, received_messages.map { |m| m[1]["job_id"] }.uniq, consistent_read: true) do |record|
129
- received_messages.select { |m| m[1]["job_id"] == record["job_id"] }.each do |received_message|
130
- message, message_data = received_message
131
- job_id = message_data["job_id"]
132
-
133
- if record["failed"] == 0 && error_queue.empty?
134
- begin
135
- klass = Kernel.const_get(record["class"])
136
- task_args = message_data["task_args"]
137
- local_queue.push([klass, job_id, task_args, message])
138
- rescue NameError => e
139
- message.delete rescue nil
140
- log :error, "Task Failed: No such class #{record["class"]} #{e.message}"
141
- yield e if block_given?
145
+ SkyRunner::retry_dynamo_db do
146
+ # Read DynamoDB records into job and task lookup tables.
147
+ table.batch_get(["id", "task_id", "failed"], job_ids.uniq, consistent_read: true) do |record|
148
+ job_records[record["id"]] = record
149
+ end
150
+ end
151
+
152
+ received_messages.each do |received_message|
153
+ message, message_data = received_message
154
+ job_id = message_data["job_id"]
155
+ task_id = message_data["task_id"]
156
+
157
+ job_record = job_records[job_id]
158
+
159
+ if job_record && job_record["failed"] == 0
160
+ begin
161
+ klass = Kernel.const_get(message_data["job_class"])
162
+ task_args = message_data["task_args"]
163
+ local_queue.push([klass, job_id, task_id, task_args, message])
164
+ rescue NameError => e
165
+ block.call(e) if block_given?
166
+ message.delete rescue nil
167
+ log :error, "Task Failed: No such class #{message_data["job_class"]} #{e.message}"
168
+ end
169
+ else
170
+ message.delete rescue nil
171
+ end
142
172
  end
143
- else
144
- message.delete
145
173
  end
174
+ rescue Exception => e
175
+ puts e.message
176
+ puts e.backtrace.join("\n")
177
+ raise e
146
178
  end
147
179
  end
148
180
  end
149
181
 
182
+ log :info, "Consumer started."
183
+
150
184
  threads.each(&:join)
151
185
 
152
186
  true
@@ -188,14 +222,11 @@ module SkyRunner
188
222
  mattr_accessor :sqs_message_retention_period
189
223
  @@sqs_message_retention_period = 345600
190
224
 
191
- mattr_accessor :consumer_batch_size
192
- @@consumer_batch_size = 10
193
-
194
225
  mattr_accessor :logger
195
226
  @@logger = Log4r::Logger.new("skyrunner")
196
227
 
197
- mattr_accessor :num_threads
198
- @@num_threads = 10
228
+ mattr_accessor :consumer_threads
229
+ @@consumer_threads = 10
199
230
 
200
231
  mattr_accessor :stop_consuming_flag
201
232
 
@@ -217,6 +248,18 @@ module SkyRunner
217
248
  end
218
249
  end
219
250
 
251
+ def self.retry_dynamo_db(&block)
252
+ handler = Proc.new do |exception, num, delay|
253
+ if exception
254
+ SkyRunner.log :warn, "Having to retry DynamoDB requests. #{exception.message}"
255
+ end
256
+ end
257
+
258
+ with_retries(handler: handler, max_tries: 100, rescue: AWS::DynamoDB::Errors::ProvisionedThroughputExceededException, base_sleep_seconds: 2, max_sleep_seconds: 60) do
259
+ block.call
260
+ end
261
+ end
262
+
220
263
  private
221
264
 
222
265
  def self.dynamo_db
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: skyrunner
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.1.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Greg Fodor