sqewer 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitlab-ci.yml +12 -0
- data/.yardopts +1 -0
- data/DETAILS.md +180 -0
- data/FAQ.md +54 -0
- data/Gemfile +18 -0
- data/README.md +69 -0
- data/Rakefile +41 -0
- data/example.env +6 -0
- data/lib/sqewer.rb +11 -0
- data/lib/sqewer/atomic_counter.rb +22 -0
- data/lib/sqewer/cli.rb +44 -0
- data/lib/sqewer/connection.rb +66 -0
- data/lib/sqewer/contrib/appsignal_wrapper.rb +29 -0
- data/lib/sqewer/contrib/performable.rb +23 -0
- data/lib/sqewer/execution_context.rb +55 -0
- data/lib/sqewer/isolator.rb +33 -0
- data/lib/sqewer/middleware_stack.rb +44 -0
- data/lib/sqewer/null_logger.rb +9 -0
- data/lib/sqewer/serializer.rb +71 -0
- data/lib/sqewer/simple_job.rb +78 -0
- data/lib/sqewer/submitter.rb +18 -0
- data/lib/sqewer/version.rb +3 -0
- data/lib/sqewer/worker.rb +200 -0
- data/spec/conveyor_belt_spec.rb +14 -0
- data/spec/spec_helper.rb +49 -0
- data/spec/sqewer/atomic_counter_spec.rb +15 -0
- data/spec/sqewer/cli_app.rb +13 -0
- data/spec/sqewer/cli_spec.rb +57 -0
- data/spec/sqewer/connection_spec.rb +57 -0
- data/spec/sqewer/execution_context_spec.rb +43 -0
- data/spec/sqewer/middleware_stack_spec.rb +69 -0
- data/spec/sqewer/serializer_spec.rb +123 -0
- data/spec/sqewer/simple_job_spec.rb +69 -0
- data/spec/sqewer/submitter_spec.rb +59 -0
- data/spec/sqewer/worker_spec.rb +130 -0
- data/sqewer.gemspec +108 -0
- metadata +248 -0
@@ -0,0 +1,66 @@
|
|
1
|
+
# Adapter that handles communication with a specific queue. In the future this
|
2
|
+
# could be switched to a Google PubSub queue, or to AMQP, or to any other queue
|
3
|
+
# with guaranteed re-delivery without ACK. The required queue semantics are
|
4
|
+
# very simple:
|
5
|
+
#
|
6
|
+
# * no message should be deleted if the receiving client has not deleted it explicitly
|
7
|
+
# * any execution that ends with an exception should cause the message to be re-enqueued
|
8
|
+
class Sqewer::Connection
|
9
|
+
DEFAULT_TIMEOUT_SECONDS = 5
|
10
|
+
BATCH_RECEIVE_SIZE = 10
|
11
|
+
|
12
|
+
# Returns the default adapter, connected to the queue set via the `SQS_QUEUE_URL`
|
13
|
+
# environment variable.
|
14
|
+
def self.default
|
15
|
+
new(ENV.fetch('SQS_QUEUE_URL'))
|
16
|
+
rescue KeyError => e
|
17
|
+
raise "SQS_QUEUE_URL not set in the environment. This is the queue URL that the default that Sqewer uses"
|
18
|
+
end
|
19
|
+
|
20
|
+
# Initializes a new adapter, with access to the SQS queue at the given URL.
|
21
|
+
#
|
22
|
+
# @param queue_url[String] the SQS queue URL (the URL can be copied from your AWS console)
|
23
|
+
def initialize(queue_url)
|
24
|
+
require 'aws-sdk'
|
25
|
+
@queue_url = queue_url
|
26
|
+
end
|
27
|
+
|
28
|
+
# Poll for messages, and return if no records are received within the given period.
|
29
|
+
#
|
30
|
+
# @param timeout[Fixnum] the number of seconds to wait before returning if no messages appear on the queue
|
31
|
+
# @yield [String, String] the receipt identifier and contents of the message body
|
32
|
+
# @return [void]
|
33
|
+
def poll(timeout = DEFAULT_TIMEOUT_SECONDS)
|
34
|
+
poller = ::Aws::SQS::QueuePoller.new(@queue_url)
|
35
|
+
# SDK v2 automatically deletes messages if the block returns normally, but we want it to happen manually
|
36
|
+
# from the caller.
|
37
|
+
poller.poll(max_number_of_messages: BATCH_RECEIVE_SIZE, skip_delete: true,
|
38
|
+
idle_timeout: timeout.to_i, wait_time_seconds: timeout.to_i) do | sqs_messages |
|
39
|
+
|
40
|
+
sqs_messages.each do | sqs_message |
|
41
|
+
yield [sqs_message.receipt_handle, sqs_message.body]
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
# Send a message to the backing queue
|
48
|
+
#
|
49
|
+
# @param message_body[String] the message to send
|
50
|
+
# @param kwargs_for_send[Hash] additional arguments for the submit (such as `delay_seconds`).
|
51
|
+
# Passes the arguments to the AWS SDK.
|
52
|
+
# @return [void]
|
53
|
+
def send_message(message_body, **kwargs_for_send)
|
54
|
+
client = ::Aws::SQS::Client.new
|
55
|
+
client.send_message(queue_url: @queue_url, message_body: message_body, **kwargs_for_send)
|
56
|
+
end
|
57
|
+
|
58
|
+
# Deletes a message after it has been succesfully decoded and processed
|
59
|
+
#
|
60
|
+
# @param message_identifier[String] the ID of the message to delete. For SQS, it is the receipt handle
|
61
|
+
# @return [void]
|
62
|
+
def delete_message(message_identifier)
|
63
|
+
client = ::Aws::SQS::Client.new
|
64
|
+
client.delete_message(queue_url: @queue_url, receipt_handle: message_identifier)
|
65
|
+
end
|
66
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module Sqewer
|
2
|
+
module Contrib
|
3
|
+
# Can be used as a wrapper middleware in an ExecutionContext to log exceptions
|
4
|
+
# to Appsignal and to monitor performance. Will only activate
|
5
|
+
# if the Appsignal gem is loaded within the current process and active.
|
6
|
+
class AppsignalWrapper
|
7
|
+
# Unserialize the job
|
8
|
+
def around_deserialization(serializer, msg_id, msg_payload)
|
9
|
+
return yield unless (defined?(Appsignal) && Appsignal.active?)
|
10
|
+
|
11
|
+
Appsignal.monitor_transaction('perform_job.demarshal',
|
12
|
+
:class => serializer.class.to_s, :params => {:recepit_handle => msg_id}, :method => 'deserialize') do
|
13
|
+
yield
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
# Run the job with Appsignal monitoring.
|
18
|
+
def around_execution(job, context)
|
19
|
+
return yield unless (defined?(Appsignal) && Appsignal.active?)
|
20
|
+
|
21
|
+
Appsignal.monitor_transaction('perform_job.sqewer',
|
22
|
+
:class => job.class.to_s, :params => job.to_h, :method => 'run') do |t|
|
23
|
+
context['appsignal.transaction'] = t
|
24
|
+
yield
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module Sqewer
|
2
|
+
module Contrib
|
3
|
+
# A job class that can be used to adapt Jobs from ActiveJob and friends. They use
|
4
|
+
# the `perform` method which gets the arguments.
|
5
|
+
class Performable
|
6
|
+
def initialize(performable_class:, perform_arguments:)
|
7
|
+
@class, @args = performable_class, perform_arguments
|
8
|
+
end
|
9
|
+
|
10
|
+
def to_h
|
11
|
+
{performable_class: @class, perform_arguments: @args}
|
12
|
+
end
|
13
|
+
|
14
|
+
def inspect
|
15
|
+
'<%s{%s}>' % [@class, @args.inspect]
|
16
|
+
end
|
17
|
+
|
18
|
+
def run(context)
|
19
|
+
Kernel.const_get(@class).perform(*@args)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
require 'logger'
|
2
|
+
|
3
|
+
# Is passed to each Job when executing (is the argument for the `#run` method
|
4
|
+
# of the job). The job can use this object to submit extra jobs, or to get
|
5
|
+
# at the things specific for the execution context (database/key-value store
|
6
|
+
# connections, error handling transaction and so on).
|
7
|
+
class Sqewer::ExecutionContext
|
8
|
+
# Create a new ExecutionContext with an environment hash.
|
9
|
+
#
|
10
|
+
# @param submitter[Sqewer::Submitter] the object to submit new jobs through. Used when jobs want to submit jobs
|
11
|
+
# @param extra_variables[Hash] any extra data to pass around to each Job
|
12
|
+
def initialize(submitter, extra_variables={})
|
13
|
+
@submitter = submitter
|
14
|
+
@params = {}
|
15
|
+
extra_variables.each_pair{|k, v| self[k] = v }
|
16
|
+
end
|
17
|
+
|
18
|
+
# Submits one or more jobs to the queue
|
19
|
+
def submit!(*jobs, **execution_options)
|
20
|
+
@submitter.submit!(*jobs, **execution_options)
|
21
|
+
end
|
22
|
+
|
23
|
+
# Sets a key in the execution environment
|
24
|
+
#
|
25
|
+
# @param key[#to_s] the key to set
|
26
|
+
# @param value the value to set
|
27
|
+
def []=(key, value)
|
28
|
+
@params[key.to_s] = value
|
29
|
+
end
|
30
|
+
|
31
|
+
# Returns a key of the execution environment by name
|
32
|
+
#
|
33
|
+
# @param key[#to_s] the key to get
|
34
|
+
def [](key)
|
35
|
+
@params[key.to_s]
|
36
|
+
end
|
37
|
+
|
38
|
+
# Returns a key of the execution environment, or executes the given block
|
39
|
+
# if the key is not set
|
40
|
+
#
|
41
|
+
# @param key[#to_s] the key to get
|
42
|
+
# @param blk the block to execute if no such key is present
|
43
|
+
def fetch(key, &blk)
|
44
|
+
@params.fetch(key.to_s, &blk)
|
45
|
+
end
|
46
|
+
|
47
|
+
# Returns the logger set in the execution environment, or
|
48
|
+
# the NullLogger if no logger is set. Can be used to supply
|
49
|
+
# a logger prefixed with job parameters per job.
|
50
|
+
#
|
51
|
+
# @return [Logger] the logger to send messages to.
|
52
|
+
def logger
|
53
|
+
@params.fetch('logger') { Sqewer::NullLogger }
|
54
|
+
end
|
55
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# Used to isolate the execution environment of the jobs. You can use it to run each
|
2
|
+
# job in a separate process (a-la Resque) or stick to the default of running those jobs
|
3
|
+
# in threads (a-la Sidekiq).
|
4
|
+
class Sqewer::Isolator
|
5
|
+
# Used for running each job in a separate process.
|
6
|
+
class PerProcess
|
7
|
+
# The method called to isolate a particular job flow (both instantiation and execution)
|
8
|
+
def isolate
|
9
|
+
require 'exceptional_fork'
|
10
|
+
ExceptionalFork.fork_and_wait { yield }
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
# Returns the Isolator that runs each job unserialization and execution
|
15
|
+
# as a separate process, and then ensures that that process quits cleanly.
|
16
|
+
#
|
17
|
+
# @return [Sqewer::Isolator::PerProcess] the isolator
|
18
|
+
def self.process
|
19
|
+
@per_process ||= PerProcess.new
|
20
|
+
end
|
21
|
+
|
22
|
+
# Returns the default Isolator that just wraps the instantiation/execution block
|
23
|
+
#
|
24
|
+
# @return [Sqewer::Isolator] the isolator
|
25
|
+
def self.default
|
26
|
+
@default ||= new
|
27
|
+
end
|
28
|
+
|
29
|
+
# The method called to isolate a particular job flow (both instantiation and execution)
|
30
|
+
def isolate
|
31
|
+
yield
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
# Allows arbitrary wrapping of the job deserialization and job execution procedures
|
2
|
+
class Sqewer::MiddlewareStack
|
3
|
+
|
4
|
+
# Returns the default middleware stack, which is empty (an instance of None).
|
5
|
+
#
|
6
|
+
# @return [MiddlewareStack] the default empty stack
|
7
|
+
def self.default
|
8
|
+
@instance ||= new
|
9
|
+
end
|
10
|
+
|
11
|
+
# Creates a new MiddlewareStack. Once created, handlers can be added using `:<<`
|
12
|
+
def initialize
|
13
|
+
@handlers = []
|
14
|
+
end
|
15
|
+
|
16
|
+
# Adds a handler. The handler should respond to :around_deserialization and #around_execution.
|
17
|
+
#
|
18
|
+
# @param handler[#around_deserializarion, #around_execution] The middleware item to insert
|
19
|
+
# @return [void]
|
20
|
+
def <<(handler)
|
21
|
+
@handlers << handler
|
22
|
+
# TODO: cache the wrapping proc
|
23
|
+
end
|
24
|
+
|
25
|
+
def around_execution(job, context, &inner_block)
|
26
|
+
return yield if @handlers.empty?
|
27
|
+
|
28
|
+
responders = @handlers.select{|e| e.respond_to?(:around_execution) }
|
29
|
+
responders.reverse.inject(inner_block) {|outer_block, middleware_object|
|
30
|
+
->{
|
31
|
+
middleware_object.public_send(:around_execution, job, context, &outer_block)
|
32
|
+
}
|
33
|
+
}.call
|
34
|
+
end
|
35
|
+
|
36
|
+
def around_deserialization(serializer, message_id, message_body, &inner_block)
|
37
|
+
return yield if @handlers.empty?
|
38
|
+
|
39
|
+
responders = @handlers.select{|e| e.respond_to?(:around_deserialization) }
|
40
|
+
responders.reverse.inject(inner_block) {|outer_block, middleware_object|
|
41
|
+
->{ middleware_object.public_send(:around_deserialization, serializer, message_id, message_body, &outer_block) }
|
42
|
+
}.call
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
# Converts jobs into strings that can be sent to the job queue, and
|
2
|
+
# restores jobs from those strings. If you want to use, say, Marshal
|
3
|
+
# to store your jobs instead of the default, or if you want to generate
|
4
|
+
# custom job objects from S3 bucket notifications, you might want to override this
|
5
|
+
# class and feed the overridden instance to {Sqewer::Worker}.
|
6
|
+
class Sqewer::Serializer
|
7
|
+
|
8
|
+
# Returns the default Serializer, of which we store one instance
|
9
|
+
# (because the serializer is stateless).
|
10
|
+
#
|
11
|
+
# @return [Serializer] the instance of the default JSON serializer
|
12
|
+
def self.default
|
13
|
+
@instance ||= new
|
14
|
+
end
|
15
|
+
|
16
|
+
AnonymousJobClass = Class.new(StandardError)
|
17
|
+
ArityMismatch = Class.new(ArgumentError)
|
18
|
+
|
19
|
+
# Instantiate a Job object from a message body string. If the
|
20
|
+
# returned result is `nil`, the job will be skipped.
|
21
|
+
#
|
22
|
+
# @param message_body[String] a string in JSON containing the job parameters
|
23
|
+
# @return [#run, NilClass] an object that responds to `run()` or nil.
|
24
|
+
def unserialize(message_body)
|
25
|
+
job_ticket_hash = JSON.parse(message_body, symbolize_names: true)
|
26
|
+
raise "Job ticket must unmarshal into a Hash" unless job_ticket_hash.is_a?(Hash)
|
27
|
+
|
28
|
+
job_ticket_hash = convert_old_ticket_format(job_ticket_hash) if job_ticket_hash[:job_class]
|
29
|
+
|
30
|
+
# Use fetch() to raise a descriptive KeyError if none
|
31
|
+
job_class_name = job_ticket_hash.delete(:_job_class)
|
32
|
+
raise ":_job_class not set in the ticket" unless job_class_name
|
33
|
+
job_class = Kernel.const_get(job_class_name)
|
34
|
+
|
35
|
+
job_params = job_ticket_hash.delete(:_job_params)
|
36
|
+
if job_params.nil? || job_params.empty?
|
37
|
+
job_class.new # no args
|
38
|
+
else
|
39
|
+
begin
|
40
|
+
job_class.new(**job_params) # The rest of the message are keyword arguments for the job
|
41
|
+
rescue ArgumentError => e
|
42
|
+
raise ArityMismatch, "Could not instantiate #{job_class} because it did not accept the arguments #{job_params.inspect}"
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
# Converts the given Job into a string, which can be submitted to the queue
|
48
|
+
#
|
49
|
+
# @param job[#to_h] an object that supports `to_h`
|
50
|
+
# @return [String] serialized string ready to be put into the queue
|
51
|
+
def serialize(job)
|
52
|
+
job_class_name = job.class.to_s
|
53
|
+
|
54
|
+
begin
|
55
|
+
Kernel.const_get(job_class_name)
|
56
|
+
rescue NameError
|
57
|
+
raise AnonymousJobClass, "The class of #{job.inspect} could not be resolved and will not restore to a Job"
|
58
|
+
end
|
59
|
+
|
60
|
+
job_params = job.respond_to?(:to_h) ? job.to_h : nil
|
61
|
+
job_ticket_hash = {_job_class: job_class_name, _job_params: job_params}
|
62
|
+
JSON.dump(job_ticket_hash)
|
63
|
+
end
|
64
|
+
|
65
|
+
private
|
66
|
+
|
67
|
+
def convert_old_ticket_format(hash_of_properties)
|
68
|
+
job_class = hash_of_properties.delete(:job_class)
|
69
|
+
{_job_class: job_class, _job_params: hash_of_properties}
|
70
|
+
end
|
71
|
+
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
# A module that you can include into your Job class.
|
2
|
+
# It adds the following features:
|
3
|
+
#
|
4
|
+
# * initialize() will have keyword access to all accessors, and will ensure you have called each one of them
|
5
|
+
# * to_h() will produce a symbolized Hash with all the properties defined using attr_accessor, and the job_class_name
|
6
|
+
# * inspect() will provide a sensible default string representation for logging
|
7
|
+
module Sqewer::SimpleJob
|
8
|
+
UnknownJobAttribute = Class.new(StandardError)
|
9
|
+
MissingAttribute = Class.new(StandardError)
|
10
|
+
|
11
|
+
EQ_END = /(\w+)(\=)$/
|
12
|
+
|
13
|
+
# Returns the list of methods on the object that have corresponding accessors.
|
14
|
+
# This is then used by #inspect to compose a list of the job parameters, formatted
|
15
|
+
# as an inspected Hash.
|
16
|
+
#
|
17
|
+
# @return [Array<Symbol>] the array of attributes to show via inspect
|
18
|
+
def inspectable_attributes
|
19
|
+
# All the attributes that have accessors
|
20
|
+
methods.grep(EQ_END).map{|e| e.to_s.gsub(EQ_END, '\1')}.map(&:to_sym)
|
21
|
+
end
|
22
|
+
|
23
|
+
# Returns the inspection string with the job and all of it's instantiation keyword attributes.
|
24
|
+
# If `inspectable_attributes` has been overridden, the attributes returned by that method will be the
|
25
|
+
# ones returned in the inspection string.
|
26
|
+
#
|
27
|
+
# j = SomeJob.new(retries: 4, param: 'a')
|
28
|
+
# j.inspect #=> "<SomeJob:{retries: 4, param: \"a\"}>"
|
29
|
+
#
|
30
|
+
# @return [String] the object inspect string
|
31
|
+
def inspect
|
32
|
+
key_attrs = inspectable_attributes
|
33
|
+
hash_repr = to_h
|
34
|
+
h = key_attrs.each_with_object({}) do |k, o|
|
35
|
+
o[k] = hash_repr[k]
|
36
|
+
end
|
37
|
+
"<#{self.class}:#{h.inspect}>"
|
38
|
+
end
|
39
|
+
|
40
|
+
# Initializes a new Job with the given job args. Will check for presence of
|
41
|
+
# accessor methods for each of the arguments, and call them with the arguments given.
|
42
|
+
#
|
43
|
+
# If one of the accessors was not triggered during the call, an exception will be raised
|
44
|
+
# (because you most likely forgot a parameter for a job, or the job class changed whereas
|
45
|
+
# the queue still contains jobs in old formats).
|
46
|
+
#
|
47
|
+
# @param jobargs[Hash] the keyword arguments, mapping 1 to 1 to the accessors of the job
|
48
|
+
def initialize(**jobargs)
|
49
|
+
@simple_job_args = jobargs.keys
|
50
|
+
touched_attributes = Set.new
|
51
|
+
jobargs.each do |(k,v)|
|
52
|
+
|
53
|
+
accessor = "#{k}="
|
54
|
+
touched_attributes << k
|
55
|
+
unless respond_to?(accessor)
|
56
|
+
raise UnknownJobAttribute, "Unknown attribute #{k.inspect} for #{self.class}"
|
57
|
+
end
|
58
|
+
|
59
|
+
send("#{k}=", v)
|
60
|
+
end
|
61
|
+
|
62
|
+
accessors = methods.grep(EQ_END).map{|method_name| method_name.to_s.gsub(EQ_END, '\1').to_sym }
|
63
|
+
settable_attributes = Set.new(accessors)
|
64
|
+
missing_attributes = settable_attributes - touched_attributes
|
65
|
+
|
66
|
+
missing_attributes.each do | attr |
|
67
|
+
raise MissingAttribute, "Missing job attribute #{attr.inspect}"
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
def to_h
|
72
|
+
keys_and_values = @simple_job_args.each_with_object({}) do |k, h|
|
73
|
+
h[k] = send(k)
|
74
|
+
end
|
75
|
+
|
76
|
+
keys_and_values
|
77
|
+
end
|
78
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
# A shim for submitting jobs to the queue. Accepts a connection
|
2
|
+
# (something that responds to `#send_message`)
|
3
|
+
# and the serializer (something that responds to `#serialize`) to
|
4
|
+
# convert the job into the string that will be put in the queue.
|
5
|
+
class Sqewer::Submitter < Struct.new(:connection, :serializer)
|
6
|
+
|
7
|
+
# Returns a default Submitter, configured with the default connection
|
8
|
+
# and the default serializer.
|
9
|
+
def self.default
|
10
|
+
new(Sqewer::Connection.default, Sqewer::Serializer.default)
|
11
|
+
end
|
12
|
+
|
13
|
+
def submit!(*jobs, **kwargs_for_send)
|
14
|
+
jobs.each do | job |
|
15
|
+
connection.send_message(serializer.serialize(job), **kwargs_for_send)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,200 @@
|
|
1
|
+
require 'logger'
|
2
|
+
require 'thread'
|
3
|
+
require 'very_tiny_state_machine'
|
4
|
+
require 'fiber'
|
5
|
+
|
6
|
+
# A massively threaded worker engine
|
7
|
+
class Sqewer::Worker
|
8
|
+
DEFAULT_NUM_THREADS = 4
|
9
|
+
SLEEP_SECONDS_ON_EMPTY_QUEUE = 1
|
10
|
+
THROTTLE_FACTOR = 2
|
11
|
+
|
12
|
+
# Returns the default Worker instance, configured based on the default components
|
13
|
+
#
|
14
|
+
# @return [Sqewer::Worker]
|
15
|
+
def self.default
|
16
|
+
@default ||= new
|
17
|
+
end
|
18
|
+
|
19
|
+
# Creates a new Worker. The Worker, unlike it is in the Rails tradition, is only responsible for
|
20
|
+
# the actual processing of jobs, and not for the job arguments.
|
21
|
+
#
|
22
|
+
# @param connection[Sqewer::Connection] the object that handles polling and submitting
|
23
|
+
# @param serializer[#serialize, #unserialize] the serializer/unserializer for the jobs
|
24
|
+
# @param execution_context_class[Class] the class for the execution context (will be instantiated by
|
25
|
+
# the worker for each job execution)
|
26
|
+
# @param submitter_class[Class] the class used for submitting jobs (will be instantiated by the worker for each job execution)
|
27
|
+
# @param middleware_stack[Sqewer::MiddlewareStack] the middleware stack that is going to be used
|
28
|
+
# @param logger[Logger] the logger to log execution to and to pass to the jobs
|
29
|
+
# @param isolator[Sqewer::Isolator] the isolator to encapsulate job instantiation and execution, if desired
|
30
|
+
# @param num_threads[Fixnum] how many worker threads to spawn
|
31
|
+
def initialize(connection: Sqewer::Connection.default,
|
32
|
+
serializer: Sqewer::Serializer.default,
|
33
|
+
execution_context_class: Sqewer::ExecutionContext,
|
34
|
+
submitter_class: Sqewer::Submitter,
|
35
|
+
middleware_stack: Sqewer::MiddlewareStack.default,
|
36
|
+
logger: Logger.new($stderr),
|
37
|
+
isolator: Sqewer::Isolator.default,
|
38
|
+
num_threads: DEFAULT_NUM_THREADS)
|
39
|
+
|
40
|
+
@logger = logger
|
41
|
+
@connection = connection
|
42
|
+
@serializer = serializer
|
43
|
+
@middleware_stack = middleware_stack
|
44
|
+
@execution_context_class = execution_context_class
|
45
|
+
@submitter_class = submitter_class
|
46
|
+
@isolator = isolator
|
47
|
+
@num_threads = num_threads
|
48
|
+
|
49
|
+
raise ArgumentError, "num_threads must be > 0" unless num_threads > 0
|
50
|
+
|
51
|
+
@execution_counter = Sqewer::AtomicCounter.new
|
52
|
+
|
53
|
+
@state = VeryTinyStateMachine.new(:stopped)
|
54
|
+
@state.permit_state :starting, :running, :stopping, :stopped, :failed
|
55
|
+
@state.permit_transition :stopped => :starting, :starting => :running, :running => :stopping, :stopping => :stopped
|
56
|
+
@state.permit_transition :starting => :failed # Failed to start
|
57
|
+
end
|
58
|
+
|
59
|
+
# Start listening on the queue, spin up a number of consumer threads that will execute the jobs.
|
60
|
+
#
|
61
|
+
# @param num_threads[Fixnum] the number of consumer/executor threads to spin up
|
62
|
+
# @return [void]
|
63
|
+
def start
|
64
|
+
@state.transition! :starting
|
65
|
+
|
66
|
+
Thread.abort_on_exception = true
|
67
|
+
|
68
|
+
@logger.info { '[worker] Starting with %d consumer threads' % @num_threads }
|
69
|
+
@execution_queue = Queue.new
|
70
|
+
|
71
|
+
consumers = (1..@num_threads).map do
|
72
|
+
Thread.new do
|
73
|
+
loop {
|
74
|
+
break if @state.in_state?(:stopping)
|
75
|
+
take_and_execute
|
76
|
+
}
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
# Create a fiber-based provider thread. When the execution queue is exhausted, use
|
81
|
+
# the fiber to take a new job and place it on the queue. We use a fiber to have a way
|
82
|
+
# to "suspend" the polling loop in the SQS client when the local buffer queue fills up.
|
83
|
+
provider = Thread.new do
|
84
|
+
feeder_fiber = Fiber.new do
|
85
|
+
loop do
|
86
|
+
break if @state.in_state?(:stopping)
|
87
|
+
@connection.poll do |message_id, message_body|
|
88
|
+
break if @state.in_state?(:stopping)
|
89
|
+
Fiber.yield([message_id, message_body])
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
loop do
|
95
|
+
break if !feeder_fiber.alive?
|
96
|
+
break if stopping?
|
97
|
+
|
98
|
+
if @execution_queue.length < (@num_threads * THROTTLE_FACTOR)
|
99
|
+
@execution_queue << feeder_fiber.resume
|
100
|
+
else
|
101
|
+
@logger.debug "Suspending poller (%d items buffered)" % @execution_queue.length
|
102
|
+
sleep 0.2
|
103
|
+
Thread.pass
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
# It makes sense to have one GC caller per process, since a GC cuts across threads.
|
109
|
+
# We will perform a full GC cycle after the same number of jobs as our consumer thread
|
110
|
+
# count - so not on every job, but still as often as we can to keep the memory use in check.
|
111
|
+
gc = Thread.new do
|
112
|
+
loop do
|
113
|
+
break if stopping?
|
114
|
+
GC.start if (@execution_counter.to_i % @num_threads).zero?
|
115
|
+
sleep 0.5
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
@threads = [provider, gc] + consumers
|
120
|
+
|
121
|
+
# If any of our threads are already dead, it means there is some misconfiguration and startup failed
|
122
|
+
if @threads.any?{|t| !t.alive? }
|
123
|
+
@threads.map(&:kill)
|
124
|
+
@state.transition! :failed
|
125
|
+
@logger.fatal { '[worker] Failed to start (one or more threads died on startup)' }
|
126
|
+
else
|
127
|
+
@state.transition! :running
|
128
|
+
@logger.info { '[worker] Started, %d consumer threads' % consumers.length }
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
# Attempts to softly stop the running consumers and the producer. Once the call is made,
|
133
|
+
# all the threads will stop at their next loop iteration.
|
134
|
+
def stop
|
135
|
+
@state.transition! :stopping
|
136
|
+
@logger.info { '[worker] Stopping (clean shutdown), will wait for threads to terminate'}
|
137
|
+
@threads.map(&:join)
|
138
|
+
@logger.info { '[worker] Stopped'}
|
139
|
+
@state.transition! :stopped
|
140
|
+
end
|
141
|
+
|
142
|
+
# Peforms a hard shutdown by killing all the threads
|
143
|
+
def kill
|
144
|
+
@state.transition! :stopping
|
145
|
+
@logger.info { '[worker] Killing (unclean shutdown), will kill all threads'}
|
146
|
+
@threads.map(&:kill)
|
147
|
+
@logger.info { '[worker] Stopped'}
|
148
|
+
@state.transition! :stopped
|
149
|
+
end
|
150
|
+
|
151
|
+
private
|
152
|
+
|
153
|
+
def stopping?
|
154
|
+
@state.in_state?(:stopping)
|
155
|
+
end
|
156
|
+
|
157
|
+
|
158
|
+
def take_and_execute
|
159
|
+
message_id, message_body = @execution_queue.pop(nonblock=true)
|
160
|
+
return unless message_id
|
161
|
+
return @connection.delete_message(message_id) unless message_body && !message_body.empty?
|
162
|
+
|
163
|
+
@isolator.isolate do
|
164
|
+
job = @middleware_stack.around_deserialization(@serializer, message_id, message_body) do
|
165
|
+
@serializer.unserialize(message_body)
|
166
|
+
end
|
167
|
+
|
168
|
+
if job # if the serializer returns a nil or false
|
169
|
+
t = Time.now
|
170
|
+
submitter = @submitter_class.new(@connection, @serializer)
|
171
|
+
context = @execution_context_class.new(submitter, {STR_logger => @logger})
|
172
|
+
|
173
|
+
begin
|
174
|
+
@middleware_stack.around_execution(job, context) do
|
175
|
+
job.method(:run).arity.zero? ? job.run : job.run(context)
|
176
|
+
end
|
177
|
+
@logger.info { "[worker] Finished #{job.inspect} in %0.2fs" % (Time.now - t) }
|
178
|
+
rescue => e
|
179
|
+
@logger.error { "[worker] Failed #{job.inspect} with a #{e}" }
|
180
|
+
raise e
|
181
|
+
end
|
182
|
+
end
|
183
|
+
end
|
184
|
+
|
185
|
+
@connection.delete_message(message_id)
|
186
|
+
rescue ThreadError # Queue is empty
|
187
|
+
sleep SLEEP_SECONDS_ON_EMPTY_QUEUE
|
188
|
+
Thread.pass
|
189
|
+
rescue SystemExit, SignalException, Interrupt => e # Time to quit
|
190
|
+
@logger.error { "[worker] Signaled, will quit the consumer" }
|
191
|
+
return
|
192
|
+
rescue => e # anything else, at or below StandardError that does not need us to quit
|
193
|
+
@logger.fatal { "[worker] Failed #{message_id} with #{e}" }
|
194
|
+
@logger.fatal(e.class)
|
195
|
+
@logger.fatal(e.message)
|
196
|
+
e.backtrace.each { |s| @logger.fatal{"\t#{s}"} }
|
197
|
+
end
|
198
|
+
|
199
|
+
STR_logger = 'logger'
|
200
|
+
end
|