sqewer 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitlab-ci.yml +12 -0
- data/.yardopts +1 -0
- data/DETAILS.md +180 -0
- data/FAQ.md +54 -0
- data/Gemfile +18 -0
- data/README.md +69 -0
- data/Rakefile +41 -0
- data/example.env +6 -0
- data/lib/sqewer.rb +11 -0
- data/lib/sqewer/atomic_counter.rb +22 -0
- data/lib/sqewer/cli.rb +44 -0
- data/lib/sqewer/connection.rb +66 -0
- data/lib/sqewer/contrib/appsignal_wrapper.rb +29 -0
- data/lib/sqewer/contrib/performable.rb +23 -0
- data/lib/sqewer/execution_context.rb +55 -0
- data/lib/sqewer/isolator.rb +33 -0
- data/lib/sqewer/middleware_stack.rb +44 -0
- data/lib/sqewer/null_logger.rb +9 -0
- data/lib/sqewer/serializer.rb +71 -0
- data/lib/sqewer/simple_job.rb +78 -0
- data/lib/sqewer/submitter.rb +18 -0
- data/lib/sqewer/version.rb +3 -0
- data/lib/sqewer/worker.rb +200 -0
- data/spec/conveyor_belt_spec.rb +14 -0
- data/spec/spec_helper.rb +49 -0
- data/spec/sqewer/atomic_counter_spec.rb +15 -0
- data/spec/sqewer/cli_app.rb +13 -0
- data/spec/sqewer/cli_spec.rb +57 -0
- data/spec/sqewer/connection_spec.rb +57 -0
- data/spec/sqewer/execution_context_spec.rb +43 -0
- data/spec/sqewer/middleware_stack_spec.rb +69 -0
- data/spec/sqewer/serializer_spec.rb +123 -0
- data/spec/sqewer/simple_job_spec.rb +69 -0
- data/spec/sqewer/submitter_spec.rb +59 -0
- data/spec/sqewer/worker_spec.rb +130 -0
- data/sqewer.gemspec +108 -0
- metadata +248 -0
@@ -0,0 +1,66 @@
|
|
1
|
+
# Adapter that handles communication with a specific queue. In the future this
|
2
|
+
# could be switched to a Google PubSub queue, or to AMQP, or to any other queue
|
3
|
+
# with guaranteed re-delivery without ACK. The required queue semantics are
|
4
|
+
# very simple:
|
5
|
+
#
|
6
|
+
# * no message should be deleted if the receiving client has not deleted it explicitly
|
7
|
+
# * any execution that ends with an exception should cause the message to be re-enqueued
|
8
|
+
class Sqewer::Connection
|
9
|
+
DEFAULT_TIMEOUT_SECONDS = 5
|
10
|
+
BATCH_RECEIVE_SIZE = 10
|
11
|
+
|
12
|
+
# Returns the default adapter, connected to the queue set via the `SQS_QUEUE_URL`
|
13
|
+
# environment variable.
|
14
|
+
def self.default
|
15
|
+
new(ENV.fetch('SQS_QUEUE_URL'))
|
16
|
+
rescue KeyError => e
|
17
|
+
raise "SQS_QUEUE_URL not set in the environment. This is the queue URL that the default that Sqewer uses"
|
18
|
+
end
|
19
|
+
|
20
|
+
# Initializes a new adapter, with access to the SQS queue at the given URL.
|
21
|
+
#
|
22
|
+
# @param queue_url[String] the SQS queue URL (the URL can be copied from your AWS console)
|
23
|
+
def initialize(queue_url)
|
24
|
+
require 'aws-sdk'
|
25
|
+
@queue_url = queue_url
|
26
|
+
end
|
27
|
+
|
28
|
+
# Poll for messages, and return if no records are received within the given period.
|
29
|
+
#
|
30
|
+
# @param timeout[Fixnum] the number of seconds to wait before returning if no messages appear on the queue
|
31
|
+
# @yield [String, String] the receipt identifier and contents of the message body
|
32
|
+
# @return [void]
|
33
|
+
def poll(timeout = DEFAULT_TIMEOUT_SECONDS)
|
34
|
+
poller = ::Aws::SQS::QueuePoller.new(@queue_url)
|
35
|
+
# SDK v2 automatically deletes messages if the block returns normally, but we want it to happen manually
|
36
|
+
# from the caller.
|
37
|
+
poller.poll(max_number_of_messages: BATCH_RECEIVE_SIZE, skip_delete: true,
|
38
|
+
idle_timeout: timeout.to_i, wait_time_seconds: timeout.to_i) do | sqs_messages |
|
39
|
+
|
40
|
+
sqs_messages.each do | sqs_message |
|
41
|
+
yield [sqs_message.receipt_handle, sqs_message.body]
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
# Send a message to the backing queue
|
48
|
+
#
|
49
|
+
# @param message_body[String] the message to send
|
50
|
+
# @param kwargs_for_send[Hash] additional arguments for the submit (such as `delay_seconds`).
|
51
|
+
# Passes the arguments to the AWS SDK.
|
52
|
+
# @return [void]
|
53
|
+
def send_message(message_body, **kwargs_for_send)
|
54
|
+
client = ::Aws::SQS::Client.new
|
55
|
+
client.send_message(queue_url: @queue_url, message_body: message_body, **kwargs_for_send)
|
56
|
+
end
|
57
|
+
|
58
|
+
# Deletes a message after it has been succesfully decoded and processed
|
59
|
+
#
|
60
|
+
# @param message_identifier[String] the ID of the message to delete. For SQS, it is the receipt handle
|
61
|
+
# @return [void]
|
62
|
+
def delete_message(message_identifier)
|
63
|
+
client = ::Aws::SQS::Client.new
|
64
|
+
client.delete_message(queue_url: @queue_url, receipt_handle: message_identifier)
|
65
|
+
end
|
66
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module Sqewer
|
2
|
+
module Contrib
|
3
|
+
# Can be used as a wrapper middleware in an ExecutionContext to log exceptions
|
4
|
+
# to Appsignal and to monitor performance. Will only activate
|
5
|
+
# if the Appsignal gem is loaded within the current process and active.
|
6
|
+
class AppsignalWrapper
|
7
|
+
# Unserialize the job
|
8
|
+
def around_deserialization(serializer, msg_id, msg_payload)
|
9
|
+
return yield unless (defined?(Appsignal) && Appsignal.active?)
|
10
|
+
|
11
|
+
Appsignal.monitor_transaction('perform_job.demarshal',
|
12
|
+
:class => serializer.class.to_s, :params => {:recepit_handle => msg_id}, :method => 'deserialize') do
|
13
|
+
yield
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
# Run the job with Appsignal monitoring.
|
18
|
+
def around_execution(job, context)
|
19
|
+
return yield unless (defined?(Appsignal) && Appsignal.active?)
|
20
|
+
|
21
|
+
Appsignal.monitor_transaction('perform_job.sqewer',
|
22
|
+
:class => job.class.to_s, :params => job.to_h, :method => 'run') do |t|
|
23
|
+
context['appsignal.transaction'] = t
|
24
|
+
yield
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module Sqewer
|
2
|
+
module Contrib
|
3
|
+
# A job class that can be used to adapt Jobs from ActiveJob and friends. They use
|
4
|
+
# the `perform` method which gets the arguments.
|
5
|
+
class Performable
|
6
|
+
def initialize(performable_class:, perform_arguments:)
|
7
|
+
@class, @args = performable_class, perform_arguments
|
8
|
+
end
|
9
|
+
|
10
|
+
def to_h
|
11
|
+
{performable_class: @class, perform_arguments: @args}
|
12
|
+
end
|
13
|
+
|
14
|
+
def inspect
|
15
|
+
'<%s{%s}>' % [@class, @args.inspect]
|
16
|
+
end
|
17
|
+
|
18
|
+
def run(context)
|
19
|
+
Kernel.const_get(@class).perform(*@args)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
require 'logger'
|
2
|
+
|
3
|
+
# Is passed to each Job when executing (is the argument for the `#run` method
|
4
|
+
# of the job). The job can use this object to submit extra jobs, or to get
|
5
|
+
# at the things specific for the execution context (database/key-value store
|
6
|
+
# connections, error handling transaction and so on).
|
7
|
+
class Sqewer::ExecutionContext
|
8
|
+
# Create a new ExecutionContext with an environment hash.
|
9
|
+
#
|
10
|
+
# @param submitter[Sqewer::Submitter] the object to submit new jobs through. Used when jobs want to submit jobs
|
11
|
+
# @param extra_variables[Hash] any extra data to pass around to each Job
|
12
|
+
def initialize(submitter, extra_variables={})
|
13
|
+
@submitter = submitter
|
14
|
+
@params = {}
|
15
|
+
extra_variables.each_pair{|k, v| self[k] = v }
|
16
|
+
end
|
17
|
+
|
18
|
+
# Submits one or more jobs to the queue
|
19
|
+
def submit!(*jobs, **execution_options)
|
20
|
+
@submitter.submit!(*jobs, **execution_options)
|
21
|
+
end
|
22
|
+
|
23
|
+
# Sets a key in the execution environment
|
24
|
+
#
|
25
|
+
# @param key[#to_s] the key to set
|
26
|
+
# @param value the value to set
|
27
|
+
def []=(key, value)
|
28
|
+
@params[key.to_s] = value
|
29
|
+
end
|
30
|
+
|
31
|
+
# Returns a key of the execution environment by name
|
32
|
+
#
|
33
|
+
# @param key[#to_s] the key to get
|
34
|
+
def [](key)
|
35
|
+
@params[key.to_s]
|
36
|
+
end
|
37
|
+
|
38
|
+
# Returns a key of the execution environment, or executes the given block
|
39
|
+
# if the key is not set
|
40
|
+
#
|
41
|
+
# @param key[#to_s] the key to get
|
42
|
+
# @param blk the block to execute if no such key is present
|
43
|
+
def fetch(key, &blk)
|
44
|
+
@params.fetch(key.to_s, &blk)
|
45
|
+
end
|
46
|
+
|
47
|
+
# Returns the logger set in the execution environment, or
|
48
|
+
# the NullLogger if no logger is set. Can be used to supply
|
49
|
+
# a logger prefixed with job parameters per job.
|
50
|
+
#
|
51
|
+
# @return [Logger] the logger to send messages to.
|
52
|
+
def logger
|
53
|
+
@params.fetch('logger') { Sqewer::NullLogger }
|
54
|
+
end
|
55
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# Used to isolate the execution environment of the jobs. You can use it to run each
|
2
|
+
# job in a separate process (a-la Resque) or stick to the default of running those jobs
|
3
|
+
# in threads (a-la Sidekiq).
|
4
|
+
class Sqewer::Isolator
|
5
|
+
# Used for running each job in a separate process.
|
6
|
+
class PerProcess
|
7
|
+
# The method called to isolate a particular job flow (both instantiation and execution)
|
8
|
+
def isolate
|
9
|
+
require 'exceptional_fork'
|
10
|
+
ExceptionalFork.fork_and_wait { yield }
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
# Returns the Isolator that runs each job unserialization and execution
|
15
|
+
# as a separate process, and then ensures that that process quits cleanly.
|
16
|
+
#
|
17
|
+
# @return [Sqewer::Isolator::PerProcess] the isolator
|
18
|
+
def self.process
|
19
|
+
@per_process ||= PerProcess.new
|
20
|
+
end
|
21
|
+
|
22
|
+
# Returns the default Isolator that just wraps the instantiation/execution block
|
23
|
+
#
|
24
|
+
# @return [Sqewer::Isolator] the isolator
|
25
|
+
def self.default
|
26
|
+
@default ||= new
|
27
|
+
end
|
28
|
+
|
29
|
+
# The method called to isolate a particular job flow (both instantiation and execution)
|
30
|
+
def isolate
|
31
|
+
yield
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
# Allows arbitrary wrapping of the job deserialization and job execution procedures
|
2
|
+
class Sqewer::MiddlewareStack
|
3
|
+
|
4
|
+
# Returns the default middleware stack, which is empty (an instance of None).
|
5
|
+
#
|
6
|
+
# @return [MiddlewareStack] the default empty stack
|
7
|
+
def self.default
|
8
|
+
@instance ||= new
|
9
|
+
end
|
10
|
+
|
11
|
+
# Creates a new MiddlewareStack. Once created, handlers can be added using `:<<`
|
12
|
+
def initialize
|
13
|
+
@handlers = []
|
14
|
+
end
|
15
|
+
|
16
|
+
# Adds a handler. The handler should respond to :around_deserialization and #around_execution.
|
17
|
+
#
|
18
|
+
# @param handler[#around_deserializarion, #around_execution] The middleware item to insert
|
19
|
+
# @return [void]
|
20
|
+
def <<(handler)
|
21
|
+
@handlers << handler
|
22
|
+
# TODO: cache the wrapping proc
|
23
|
+
end
|
24
|
+
|
25
|
+
def around_execution(job, context, &inner_block)
|
26
|
+
return yield if @handlers.empty?
|
27
|
+
|
28
|
+
responders = @handlers.select{|e| e.respond_to?(:around_execution) }
|
29
|
+
responders.reverse.inject(inner_block) {|outer_block, middleware_object|
|
30
|
+
->{
|
31
|
+
middleware_object.public_send(:around_execution, job, context, &outer_block)
|
32
|
+
}
|
33
|
+
}.call
|
34
|
+
end
|
35
|
+
|
36
|
+
def around_deserialization(serializer, message_id, message_body, &inner_block)
|
37
|
+
return yield if @handlers.empty?
|
38
|
+
|
39
|
+
responders = @handlers.select{|e| e.respond_to?(:around_deserialization) }
|
40
|
+
responders.reverse.inject(inner_block) {|outer_block, middleware_object|
|
41
|
+
->{ middleware_object.public_send(:around_deserialization, serializer, message_id, message_body, &outer_block) }
|
42
|
+
}.call
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
# Converts jobs into strings that can be sent to the job queue, and
|
2
|
+
# restores jobs from those strings. If you want to use, say, Marshal
|
3
|
+
# to store your jobs instead of the default, or if you want to generate
|
4
|
+
# custom job objects from S3 bucket notifications, you might want to override this
|
5
|
+
# class and feed the overridden instance to {Sqewer::Worker}.
|
6
|
+
class Sqewer::Serializer
|
7
|
+
|
8
|
+
# Returns the default Serializer, of which we store one instance
|
9
|
+
# (because the serializer is stateless).
|
10
|
+
#
|
11
|
+
# @return [Serializer] the instance of the default JSON serializer
|
12
|
+
def self.default
|
13
|
+
@instance ||= new
|
14
|
+
end
|
15
|
+
|
16
|
+
AnonymousJobClass = Class.new(StandardError)
|
17
|
+
ArityMismatch = Class.new(ArgumentError)
|
18
|
+
|
19
|
+
# Instantiate a Job object from a message body string. If the
|
20
|
+
# returned result is `nil`, the job will be skipped.
|
21
|
+
#
|
22
|
+
# @param message_body[String] a string in JSON containing the job parameters
|
23
|
+
# @return [#run, NilClass] an object that responds to `run()` or nil.
|
24
|
+
def unserialize(message_body)
|
25
|
+
job_ticket_hash = JSON.parse(message_body, symbolize_names: true)
|
26
|
+
raise "Job ticket must unmarshal into a Hash" unless job_ticket_hash.is_a?(Hash)
|
27
|
+
|
28
|
+
job_ticket_hash = convert_old_ticket_format(job_ticket_hash) if job_ticket_hash[:job_class]
|
29
|
+
|
30
|
+
# Use fetch() to raise a descriptive KeyError if none
|
31
|
+
job_class_name = job_ticket_hash.delete(:_job_class)
|
32
|
+
raise ":_job_class not set in the ticket" unless job_class_name
|
33
|
+
job_class = Kernel.const_get(job_class_name)
|
34
|
+
|
35
|
+
job_params = job_ticket_hash.delete(:_job_params)
|
36
|
+
if job_params.nil? || job_params.empty?
|
37
|
+
job_class.new # no args
|
38
|
+
else
|
39
|
+
begin
|
40
|
+
job_class.new(**job_params) # The rest of the message are keyword arguments for the job
|
41
|
+
rescue ArgumentError => e
|
42
|
+
raise ArityMismatch, "Could not instantiate #{job_class} because it did not accept the arguments #{job_params.inspect}"
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
# Converts the given Job into a string, which can be submitted to the queue
|
48
|
+
#
|
49
|
+
# @param job[#to_h] an object that supports `to_h`
|
50
|
+
# @return [String] serialized string ready to be put into the queue
|
51
|
+
def serialize(job)
|
52
|
+
job_class_name = job.class.to_s
|
53
|
+
|
54
|
+
begin
|
55
|
+
Kernel.const_get(job_class_name)
|
56
|
+
rescue NameError
|
57
|
+
raise AnonymousJobClass, "The class of #{job.inspect} could not be resolved and will not restore to a Job"
|
58
|
+
end
|
59
|
+
|
60
|
+
job_params = job.respond_to?(:to_h) ? job.to_h : nil
|
61
|
+
job_ticket_hash = {_job_class: job_class_name, _job_params: job_params}
|
62
|
+
JSON.dump(job_ticket_hash)
|
63
|
+
end
|
64
|
+
|
65
|
+
private
|
66
|
+
|
67
|
+
def convert_old_ticket_format(hash_of_properties)
|
68
|
+
job_class = hash_of_properties.delete(:job_class)
|
69
|
+
{_job_class: job_class, _job_params: hash_of_properties}
|
70
|
+
end
|
71
|
+
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
# A module that you can include into your Job class.
|
2
|
+
# It adds the following features:
|
3
|
+
#
|
4
|
+
# * initialize() will have keyword access to all accessors, and will ensure you have called each one of them
|
5
|
+
# * to_h() will produce a symbolized Hash with all the properties defined using attr_accessor, and the job_class_name
|
6
|
+
# * inspect() will provide a sensible default string representation for logging
|
7
|
+
module Sqewer::SimpleJob
|
8
|
+
UnknownJobAttribute = Class.new(StandardError)
|
9
|
+
MissingAttribute = Class.new(StandardError)
|
10
|
+
|
11
|
+
EQ_END = /(\w+)(\=)$/
|
12
|
+
|
13
|
+
# Returns the list of methods on the object that have corresponding accessors.
|
14
|
+
# This is then used by #inspect to compose a list of the job parameters, formatted
|
15
|
+
# as an inspected Hash.
|
16
|
+
#
|
17
|
+
# @return [Array<Symbol>] the array of attributes to show via inspect
|
18
|
+
def inspectable_attributes
|
19
|
+
# All the attributes that have accessors
|
20
|
+
methods.grep(EQ_END).map{|e| e.to_s.gsub(EQ_END, '\1')}.map(&:to_sym)
|
21
|
+
end
|
22
|
+
|
23
|
+
# Returns the inspection string with the job and all of it's instantiation keyword attributes.
|
24
|
+
# If `inspectable_attributes` has been overridden, the attributes returned by that method will be the
|
25
|
+
# ones returned in the inspection string.
|
26
|
+
#
|
27
|
+
# j = SomeJob.new(retries: 4, param: 'a')
|
28
|
+
# j.inspect #=> "<SomeJob:{retries: 4, param: \"a\"}>"
|
29
|
+
#
|
30
|
+
# @return [String] the object inspect string
|
31
|
+
def inspect
|
32
|
+
key_attrs = inspectable_attributes
|
33
|
+
hash_repr = to_h
|
34
|
+
h = key_attrs.each_with_object({}) do |k, o|
|
35
|
+
o[k] = hash_repr[k]
|
36
|
+
end
|
37
|
+
"<#{self.class}:#{h.inspect}>"
|
38
|
+
end
|
39
|
+
|
40
|
+
# Initializes a new Job with the given job args. Will check for presence of
|
41
|
+
# accessor methods for each of the arguments, and call them with the arguments given.
|
42
|
+
#
|
43
|
+
# If one of the accessors was not triggered during the call, an exception will be raised
|
44
|
+
# (because you most likely forgot a parameter for a job, or the job class changed whereas
|
45
|
+
# the queue still contains jobs in old formats).
|
46
|
+
#
|
47
|
+
# @param jobargs[Hash] the keyword arguments, mapping 1 to 1 to the accessors of the job
|
48
|
+
def initialize(**jobargs)
|
49
|
+
@simple_job_args = jobargs.keys
|
50
|
+
touched_attributes = Set.new
|
51
|
+
jobargs.each do |(k,v)|
|
52
|
+
|
53
|
+
accessor = "#{k}="
|
54
|
+
touched_attributes << k
|
55
|
+
unless respond_to?(accessor)
|
56
|
+
raise UnknownJobAttribute, "Unknown attribute #{k.inspect} for #{self.class}"
|
57
|
+
end
|
58
|
+
|
59
|
+
send("#{k}=", v)
|
60
|
+
end
|
61
|
+
|
62
|
+
accessors = methods.grep(EQ_END).map{|method_name| method_name.to_s.gsub(EQ_END, '\1').to_sym }
|
63
|
+
settable_attributes = Set.new(accessors)
|
64
|
+
missing_attributes = settable_attributes - touched_attributes
|
65
|
+
|
66
|
+
missing_attributes.each do | attr |
|
67
|
+
raise MissingAttribute, "Missing job attribute #{attr.inspect}"
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
def to_h
|
72
|
+
keys_and_values = @simple_job_args.each_with_object({}) do |k, h|
|
73
|
+
h[k] = send(k)
|
74
|
+
end
|
75
|
+
|
76
|
+
keys_and_values
|
77
|
+
end
|
78
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
# A shim for submitting jobs to the queue. Accepts a connection
|
2
|
+
# (something that responds to `#send_message`)
|
3
|
+
# and the serializer (something that responds to `#serialize`) to
|
4
|
+
# convert the job into the string that will be put in the queue.
|
5
|
+
class Sqewer::Submitter < Struct.new(:connection, :serializer)
|
6
|
+
|
7
|
+
# Returns a default Submitter, configured with the default connection
|
8
|
+
# and the default serializer.
|
9
|
+
def self.default
|
10
|
+
new(Sqewer::Connection.default, Sqewer::Serializer.default)
|
11
|
+
end
|
12
|
+
|
13
|
+
def submit!(*jobs, **kwargs_for_send)
|
14
|
+
jobs.each do | job |
|
15
|
+
connection.send_message(serializer.serialize(job), **kwargs_for_send)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,200 @@
|
|
1
|
+
require 'logger'
|
2
|
+
require 'thread'
|
3
|
+
require 'very_tiny_state_machine'
|
4
|
+
require 'fiber'
|
5
|
+
|
6
|
+
# A massively threaded worker engine
|
7
|
+
class Sqewer::Worker
|
8
|
+
DEFAULT_NUM_THREADS = 4
|
9
|
+
SLEEP_SECONDS_ON_EMPTY_QUEUE = 1
|
10
|
+
THROTTLE_FACTOR = 2
|
11
|
+
|
12
|
+
# Returns the default Worker instance, configured based on the default components
|
13
|
+
#
|
14
|
+
# @return [Sqewer::Worker]
|
15
|
+
def self.default
|
16
|
+
@default ||= new
|
17
|
+
end
|
18
|
+
|
19
|
+
# Creates a new Worker. The Worker, unlike it is in the Rails tradition, is only responsible for
|
20
|
+
# the actual processing of jobs, and not for the job arguments.
|
21
|
+
#
|
22
|
+
# @param connection[Sqewer::Connection] the object that handles polling and submitting
|
23
|
+
# @param serializer[#serialize, #unserialize] the serializer/unserializer for the jobs
|
24
|
+
# @param execution_context_class[Class] the class for the execution context (will be instantiated by
|
25
|
+
# the worker for each job execution)
|
26
|
+
# @param submitter_class[Class] the class used for submitting jobs (will be instantiated by the worker for each job execution)
|
27
|
+
# @param middleware_stack[Sqewer::MiddlewareStack] the middleware stack that is going to be used
|
28
|
+
# @param logger[Logger] the logger to log execution to and to pass to the jobs
|
29
|
+
# @param isolator[Sqewer::Isolator] the isolator to encapsulate job instantiation and execution, if desired
|
30
|
+
# @param num_threads[Fixnum] how many worker threads to spawn
|
31
|
+
def initialize(connection: Sqewer::Connection.default,
|
32
|
+
serializer: Sqewer::Serializer.default,
|
33
|
+
execution_context_class: Sqewer::ExecutionContext,
|
34
|
+
submitter_class: Sqewer::Submitter,
|
35
|
+
middleware_stack: Sqewer::MiddlewareStack.default,
|
36
|
+
logger: Logger.new($stderr),
|
37
|
+
isolator: Sqewer::Isolator.default,
|
38
|
+
num_threads: DEFAULT_NUM_THREADS)
|
39
|
+
|
40
|
+
@logger = logger
|
41
|
+
@connection = connection
|
42
|
+
@serializer = serializer
|
43
|
+
@middleware_stack = middleware_stack
|
44
|
+
@execution_context_class = execution_context_class
|
45
|
+
@submitter_class = submitter_class
|
46
|
+
@isolator = isolator
|
47
|
+
@num_threads = num_threads
|
48
|
+
|
49
|
+
raise ArgumentError, "num_threads must be > 0" unless num_threads > 0
|
50
|
+
|
51
|
+
@execution_counter = Sqewer::AtomicCounter.new
|
52
|
+
|
53
|
+
@state = VeryTinyStateMachine.new(:stopped)
|
54
|
+
@state.permit_state :starting, :running, :stopping, :stopped, :failed
|
55
|
+
@state.permit_transition :stopped => :starting, :starting => :running, :running => :stopping, :stopping => :stopped
|
56
|
+
@state.permit_transition :starting => :failed # Failed to start
|
57
|
+
end
|
58
|
+
|
59
|
+
# Start listening on the queue, spin up a number of consumer threads that will execute the jobs.
|
60
|
+
#
|
61
|
+
# @param num_threads[Fixnum] the number of consumer/executor threads to spin up
|
62
|
+
# @return [void]
|
63
|
+
def start
|
64
|
+
@state.transition! :starting
|
65
|
+
|
66
|
+
Thread.abort_on_exception = true
|
67
|
+
|
68
|
+
@logger.info { '[worker] Starting with %d consumer threads' % @num_threads }
|
69
|
+
@execution_queue = Queue.new
|
70
|
+
|
71
|
+
consumers = (1..@num_threads).map do
|
72
|
+
Thread.new do
|
73
|
+
loop {
|
74
|
+
break if @state.in_state?(:stopping)
|
75
|
+
take_and_execute
|
76
|
+
}
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
# Create a fiber-based provider thread. When the execution queue is exhausted, use
|
81
|
+
# the fiber to take a new job and place it on the queue. We use a fiber to have a way
|
82
|
+
# to "suspend" the polling loop in the SQS client when the local buffer queue fills up.
|
83
|
+
provider = Thread.new do
|
84
|
+
feeder_fiber = Fiber.new do
|
85
|
+
loop do
|
86
|
+
break if @state.in_state?(:stopping)
|
87
|
+
@connection.poll do |message_id, message_body|
|
88
|
+
break if @state.in_state?(:stopping)
|
89
|
+
Fiber.yield([message_id, message_body])
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
loop do
|
95
|
+
break if !feeder_fiber.alive?
|
96
|
+
break if stopping?
|
97
|
+
|
98
|
+
if @execution_queue.length < (@num_threads * THROTTLE_FACTOR)
|
99
|
+
@execution_queue << feeder_fiber.resume
|
100
|
+
else
|
101
|
+
@logger.debug "Suspending poller (%d items buffered)" % @execution_queue.length
|
102
|
+
sleep 0.2
|
103
|
+
Thread.pass
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
# It makes sense to have one GC caller per process, since a GC cuts across threads.
|
109
|
+
# We will perform a full GC cycle after the same number of jobs as our consumer thread
|
110
|
+
# count - so not on every job, but still as often as we can to keep the memory use in check.
|
111
|
+
gc = Thread.new do
|
112
|
+
loop do
|
113
|
+
break if stopping?
|
114
|
+
GC.start if (@execution_counter.to_i % @num_threads).zero?
|
115
|
+
sleep 0.5
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
@threads = [provider, gc] + consumers
|
120
|
+
|
121
|
+
# If any of our threads are already dead, it means there is some misconfiguration and startup failed
|
122
|
+
if @threads.any?{|t| !t.alive? }
|
123
|
+
@threads.map(&:kill)
|
124
|
+
@state.transition! :failed
|
125
|
+
@logger.fatal { '[worker] Failed to start (one or more threads died on startup)' }
|
126
|
+
else
|
127
|
+
@state.transition! :running
|
128
|
+
@logger.info { '[worker] Started, %d consumer threads' % consumers.length }
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
# Attempts to softly stop the running consumers and the producer. Once the call is made,
|
133
|
+
# all the threads will stop at their next loop iteration.
|
134
|
+
def stop
|
135
|
+
@state.transition! :stopping
|
136
|
+
@logger.info { '[worker] Stopping (clean shutdown), will wait for threads to terminate'}
|
137
|
+
@threads.map(&:join)
|
138
|
+
@logger.info { '[worker] Stopped'}
|
139
|
+
@state.transition! :stopped
|
140
|
+
end
|
141
|
+
|
142
|
+
# Peforms a hard shutdown by killing all the threads
|
143
|
+
def kill
|
144
|
+
@state.transition! :stopping
|
145
|
+
@logger.info { '[worker] Killing (unclean shutdown), will kill all threads'}
|
146
|
+
@threads.map(&:kill)
|
147
|
+
@logger.info { '[worker] Stopped'}
|
148
|
+
@state.transition! :stopped
|
149
|
+
end
|
150
|
+
|
151
|
+
private
|
152
|
+
|
153
|
+
def stopping?
|
154
|
+
@state.in_state?(:stopping)
|
155
|
+
end
|
156
|
+
|
157
|
+
|
158
|
+
def take_and_execute
|
159
|
+
message_id, message_body = @execution_queue.pop(nonblock=true)
|
160
|
+
return unless message_id
|
161
|
+
return @connection.delete_message(message_id) unless message_body && !message_body.empty?
|
162
|
+
|
163
|
+
@isolator.isolate do
|
164
|
+
job = @middleware_stack.around_deserialization(@serializer, message_id, message_body) do
|
165
|
+
@serializer.unserialize(message_body)
|
166
|
+
end
|
167
|
+
|
168
|
+
if job # if the serializer returns a nil or false
|
169
|
+
t = Time.now
|
170
|
+
submitter = @submitter_class.new(@connection, @serializer)
|
171
|
+
context = @execution_context_class.new(submitter, {STR_logger => @logger})
|
172
|
+
|
173
|
+
begin
|
174
|
+
@middleware_stack.around_execution(job, context) do
|
175
|
+
job.method(:run).arity.zero? ? job.run : job.run(context)
|
176
|
+
end
|
177
|
+
@logger.info { "[worker] Finished #{job.inspect} in %0.2fs" % (Time.now - t) }
|
178
|
+
rescue => e
|
179
|
+
@logger.error { "[worker] Failed #{job.inspect} with a #{e}" }
|
180
|
+
raise e
|
181
|
+
end
|
182
|
+
end
|
183
|
+
end
|
184
|
+
|
185
|
+
@connection.delete_message(message_id)
|
186
|
+
rescue ThreadError # Queue is empty
|
187
|
+
sleep SLEEP_SECONDS_ON_EMPTY_QUEUE
|
188
|
+
Thread.pass
|
189
|
+
rescue SystemExit, SignalException, Interrupt => e # Time to quit
|
190
|
+
@logger.error { "[worker] Signaled, will quit the consumer" }
|
191
|
+
return
|
192
|
+
rescue => e # anything else, at or below StandardError that does not need us to quit
|
193
|
+
@logger.fatal { "[worker] Failed #{message_id} with #{e}" }
|
194
|
+
@logger.fatal(e.class)
|
195
|
+
@logger.fatal(e.message)
|
196
|
+
e.backtrace.each { |s| @logger.fatal{"\t#{s}"} }
|
197
|
+
end
|
198
|
+
|
199
|
+
STR_logger = 'logger'
|
200
|
+
end
|