chore-core 1.5.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/LICENSE.txt +20 -0
- data/README.md +260 -0
- data/Rakefile +32 -0
- data/bin/chore +34 -0
- data/chore-core.gemspec +46 -0
- data/lib/chore/cli.rb +232 -0
- data/lib/chore/configuration.rb +13 -0
- data/lib/chore/consumer.rb +52 -0
- data/lib/chore/duplicate_detector.rb +56 -0
- data/lib/chore/fetcher.rb +31 -0
- data/lib/chore/hooks.rb +25 -0
- data/lib/chore/job.rb +103 -0
- data/lib/chore/json_encoder.rb +18 -0
- data/lib/chore/manager.rb +47 -0
- data/lib/chore/publisher.rb +29 -0
- data/lib/chore/queues/filesystem/consumer.rb +128 -0
- data/lib/chore/queues/filesystem/filesystem_queue.rb +49 -0
- data/lib/chore/queues/filesystem/publisher.rb +45 -0
- data/lib/chore/queues/sqs/consumer.rb +121 -0
- data/lib/chore/queues/sqs/publisher.rb +55 -0
- data/lib/chore/queues/sqs.rb +38 -0
- data/lib/chore/railtie.rb +18 -0
- data/lib/chore/signal.rb +175 -0
- data/lib/chore/strategies/consumer/batcher.rb +76 -0
- data/lib/chore/strategies/consumer/single_consumer_strategy.rb +34 -0
- data/lib/chore/strategies/consumer/threaded_consumer_strategy.rb +81 -0
- data/lib/chore/strategies/worker/forked_worker_strategy.rb +221 -0
- data/lib/chore/strategies/worker/single_worker_strategy.rb +39 -0
- data/lib/chore/tasks/queues.task +11 -0
- data/lib/chore/unit_of_work.rb +17 -0
- data/lib/chore/util.rb +18 -0
- data/lib/chore/version.rb +9 -0
- data/lib/chore/worker.rb +117 -0
- data/lib/chore-core.rb +1 -0
- data/lib/chore.rb +218 -0
- data/spec/chore/cli_spec.rb +182 -0
- data/spec/chore/consumer_spec.rb +36 -0
- data/spec/chore/duplicate_detector_spec.rb +62 -0
- data/spec/chore/fetcher_spec.rb +38 -0
- data/spec/chore/hooks_spec.rb +44 -0
- data/spec/chore/job_spec.rb +80 -0
- data/spec/chore/json_encoder_spec.rb +11 -0
- data/spec/chore/manager_spec.rb +39 -0
- data/spec/chore/queues/filesystem/filesystem_consumer_spec.rb +71 -0
- data/spec/chore/queues/sqs/consumer_spec.rb +136 -0
- data/spec/chore/queues/sqs/publisher_spec.rb +74 -0
- data/spec/chore/queues/sqs_spec.rb +37 -0
- data/spec/chore/signal_spec.rb +244 -0
- data/spec/chore/strategies/consumer/batcher_spec.rb +93 -0
- data/spec/chore/strategies/consumer/single_consumer_strategy_spec.rb +23 -0
- data/spec/chore/strategies/consumer/threaded_consumer_strategy_spec.rb +105 -0
- data/spec/chore/strategies/worker/forked_worker_strategy_spec.rb +281 -0
- data/spec/chore/strategies/worker/single_worker_strategy_spec.rb +36 -0
- data/spec/chore/worker_spec.rb +134 -0
- data/spec/chore_spec.rb +108 -0
- data/spec/spec_helper.rb +58 -0
- data/spec/test_job.rb +7 -0
- metadata +194 -0
@@ -0,0 +1,52 @@
|
|
1
|
+
module Chore
|
2
|
+
# Raised when Chore is booting up, but encounters a set of configuration that is impossible to boot from. Typically
|
3
|
+
# you'll find additional information around the cause of the exception by examining the logfiles
|
4
|
+
class TerribleMistake < Exception
|
5
|
+
# You can raise this exception if your queue is in a terrible state and must shut down
|
6
|
+
end
|
7
|
+
|
8
|
+
# Base class for a Chore Consumer. Provides the basic interface to adhere to for building custom
|
9
|
+
# Chore Consumers.
|
10
|
+
class Consumer
|
11
|
+
|
12
|
+
attr_accessor :queue_name
|
13
|
+
|
14
|
+
def initialize(queue_name, opts={})
|
15
|
+
@queue_name = queue_name
|
16
|
+
@running = true
|
17
|
+
end
|
18
|
+
|
19
|
+
# Causes the underlying connection for all consumers of this class to be reset. Useful for the case where
|
20
|
+
# the consumer is being used across a fork. Should be overriden in consumers (but is not required).
|
21
|
+
def self.reset_connection!
|
22
|
+
end
|
23
|
+
|
24
|
+
# Consume takes a block with an arity of two. The two params are
|
25
|
+
# |message_id,message_body| where message_id is any object that the
|
26
|
+
# consumer will need to be able to act on a message later (reject, complete, etc)
|
27
|
+
def consume(&block)
|
28
|
+
raise NotImplementedError
|
29
|
+
end
|
30
|
+
|
31
|
+
# Reject should put a message back on a queue to be processed again later. It takes
|
32
|
+
# a message_id as returned via consume.
|
33
|
+
def reject(message_id)
|
34
|
+
raise NotImplementedError
|
35
|
+
end
|
36
|
+
|
37
|
+
# Complete should mark a message as finished. It takes a message_id as returned via consume
|
38
|
+
def complete(message_id)
|
39
|
+
raise NotImplementedError
|
40
|
+
end
|
41
|
+
|
42
|
+
# Perform any shutdown behavior and stop consuming messages
|
43
|
+
def stop
|
44
|
+
@running = false
|
45
|
+
end
|
46
|
+
|
47
|
+
# Returns true if the Consumer is currently running
|
48
|
+
def running?
|
49
|
+
@running
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
module Chore
|
2
|
+
class DuplicateDetector #:nodoc:
|
3
|
+
|
4
|
+
def initialize(opts={})
|
5
|
+
# Make it optional. Only required when we use it.
|
6
|
+
begin
|
7
|
+
require 'dalli'
|
8
|
+
rescue LoadError => e
|
9
|
+
Chore.logger.error "Unable to load dalli gem. It is required if duplicate \
|
10
|
+
detection is enabled. Install it with 'gem install dalli'."
|
11
|
+
raise e
|
12
|
+
end
|
13
|
+
|
14
|
+
memcached_options = {
|
15
|
+
:auto_eject_hosts => false,
|
16
|
+
:cache_lookups => false,
|
17
|
+
:tcp_nodelay => true,
|
18
|
+
:socket_max_failures => 5,
|
19
|
+
:socket_timeout => 2
|
20
|
+
}
|
21
|
+
|
22
|
+
@timeouts = {}
|
23
|
+
@dupe_on_cache_failure = opts.fetch(:dupe_on_cache_failure) { false }
|
24
|
+
@timeout = opts.fetch(:timeout) { 0 }
|
25
|
+
@servers = opts.fetch(:servers) { nil }
|
26
|
+
@memcached_client = opts.fetch(:memcached_client) { Dalli::Client.new(@servers, memcached_options) }
|
27
|
+
end
|
28
|
+
|
29
|
+
# Checks the message against the configured dedupe server to see if the message is unique or not
|
30
|
+
# Unique messages will return false
|
31
|
+
# Duplicated messages will return true
|
32
|
+
def found_duplicate?(msg)
|
33
|
+
return false unless msg && msg.respond_to?(:queue) && msg.queue
|
34
|
+
timeout = self.queue_timeout(msg.queue)
|
35
|
+
begin
|
36
|
+
!@memcached_client.add(msg.id, "1",timeout)
|
37
|
+
rescue StandardError => e
|
38
|
+
if @dupe_on_cache_failure
|
39
|
+
Chore.logger.error "Error accessing duplicate cache server. Assuming message is a duplicate. #{e}\n#{e.backtrace * "\n"}"
|
40
|
+
true
|
41
|
+
else
|
42
|
+
Chore.logger.error "Error accessing duplicate cache server. Assuming message is not a duplicate. #{e}\n#{e.backtrace * "\n"}"
|
43
|
+
false
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
# Retrieves the timeout for the given queue. The timeout is the window of time in seconds that
|
49
|
+
# we would consider the message to be non-unique, before we consider it dead in the water
|
50
|
+
# After that timeout, we would consider the next copy of the message received to be unique, and process it.
|
51
|
+
def queue_timeout(queue)
|
52
|
+
@timeouts[queue.url] ||= queue.visibility_timeout || @timeout
|
53
|
+
end
|
54
|
+
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
module Chore
|
2
|
+
class Fetcher #:nodoc:
|
3
|
+
attr_reader :manager, :consumers
|
4
|
+
|
5
|
+
def initialize(manager)
|
6
|
+
@stopping = false
|
7
|
+
@manager = manager
|
8
|
+
@strategy = Chore.config.consumer_strategy.new(self)
|
9
|
+
end
|
10
|
+
|
11
|
+
# Starts the fetcher with the configured Consumer Strategy. This will begin consuming messages from your queue
|
12
|
+
def start
|
13
|
+
Chore.logger.info "Fetcher starting up"
|
14
|
+
@strategy.fetch
|
15
|
+
end
|
16
|
+
|
17
|
+
# Stops the fetcher, preventing any further messages from being pulled from the queue
|
18
|
+
def stop!
|
19
|
+
unless @stopping
|
20
|
+
Chore.logger.info "Fetcher shutting down"
|
21
|
+
@stopping = true
|
22
|
+
@strategy.stop!
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
# Determines in the fetcher is in the process of stopping
|
27
|
+
def stopping?
|
28
|
+
@stopping
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
data/lib/chore/hooks.rb
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
module Chore
|
2
|
+
# Abstracts the notion of registering and running hooks during certain points in the lifecycle of chore
|
3
|
+
# processing work.
|
4
|
+
module Hooks
|
5
|
+
|
6
|
+
# Helper method to look up, and execute hooks based on an event name.
|
7
|
+
# Hooks are assumed to be methods defined on `self` that are of the pattern
|
8
|
+
# hook_name_identifier. ex: before_perform_log
|
9
|
+
def run_hooks_for(event,*args)
|
10
|
+
results = global_hooks_for(event).map { |prc| prc.call(*args) } || [true]
|
11
|
+
results << hooks_for(event).map { |method| send(method,*args) }
|
12
|
+
results = false if results.any? {|r| false == r }
|
13
|
+
results
|
14
|
+
end
|
15
|
+
|
16
|
+
private
|
17
|
+
def hooks_for(event)
|
18
|
+
(self.methods - Object.methods).grep(/^#{event}/).sort
|
19
|
+
end
|
20
|
+
|
21
|
+
def global_hooks_for(event)
|
22
|
+
Chore.hooks_for(event)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
data/lib/chore/job.rb
ADDED
@@ -0,0 +1,103 @@
|
|
1
|
+
require 'chore/hooks'
|
2
|
+
|
3
|
+
module Chore
|
4
|
+
|
5
|
+
# <tt>Chore::Job</tt> is the module which gives your job classes the methods they need to be published
|
6
|
+
# and run within Chore. You cannot have a Job in Chore that does not include this module
|
7
|
+
module Job
|
8
|
+
|
9
|
+
# An exception to represent a job choosing to forcibly reject a given instance of itself.
|
10
|
+
# The reasoning behind rejecting the job and the message that spawned it are left to
|
11
|
+
# the developer to dedide to use or not to use.
|
12
|
+
class RejectMessageException < Exception
|
13
|
+
# Throw a RejectMessageException from your job to signal that the message should be rejected.
|
14
|
+
# The semantics of +reject+ are queue implementation dependent.
|
15
|
+
end
|
16
|
+
|
17
|
+
def self.job_classes #:nodoc:
|
18
|
+
@classes || []
|
19
|
+
end
|
20
|
+
|
21
|
+
def self.included(base) #:nodoc:
|
22
|
+
@classes ||= []
|
23
|
+
@classes << base.name
|
24
|
+
base.extend(ClassMethods)
|
25
|
+
base.extend(Hooks)
|
26
|
+
end
|
27
|
+
|
28
|
+
module ClassMethods
|
29
|
+
DEFAULT_OPTIONS = { }
|
30
|
+
|
31
|
+
# Pass a hash of options to queue_options the included class's use of Chore::Job
|
32
|
+
# +opts+ has just the one required option.
|
33
|
+
# * +:name+: which should map to the name of the queue this job should be published to.
|
34
|
+
def queue_options(opts = {})
|
35
|
+
@chore_options = (@chore_options || DEFAULT_OPTIONS).merge(opts_from_cli).merge(opts)
|
36
|
+
required_options.each do |k|
|
37
|
+
raise ArgumentError.new("#{self.to_s} :#{k} is a required option for Chore::Job") unless @chore_options[k]
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
# This is a method so it can be overriden to create additional required
|
42
|
+
# queue_options params. This also determines what options get pulled
|
43
|
+
# from the global Chore.config.
|
44
|
+
def required_options
|
45
|
+
[:name, :publisher, :max_attempts]
|
46
|
+
end
|
47
|
+
|
48
|
+
def options #:nodoc:#
|
49
|
+
@chore_options ||= queue_options
|
50
|
+
end
|
51
|
+
|
52
|
+
def opts_from_cli #:nodoc:#
|
53
|
+
@from_cli ||= (Chore.config.marshal_dump.select {|k,v| required_options.include? k } || {})
|
54
|
+
end
|
55
|
+
|
56
|
+
# Execute the current job. We create an instance of the job to do the perform
|
57
|
+
# as this allows the jobs themselves to do initialization that might require access
|
58
|
+
# to the parameters of the job.
|
59
|
+
def perform(*args)
|
60
|
+
job = self.new(args)
|
61
|
+
job.perform(*args)
|
62
|
+
end
|
63
|
+
|
64
|
+
# Publish a job using an instance of job. Similar to perform we do this so that a job
|
65
|
+
# can perform initialization logic before the perform_async is begun. This, in addition, to
|
66
|
+
# hooks allows for rather complex jobs to be written simply.
|
67
|
+
def perform_async(*args)
|
68
|
+
job = self.new(args)
|
69
|
+
job.perform_async(*args)
|
70
|
+
end
|
71
|
+
|
72
|
+
# Resque/Sidekiq compatible serialization. No reason to change what works
|
73
|
+
def job_hash(job_params)
|
74
|
+
{:class => self.to_s, :args => job_params}
|
75
|
+
end
|
76
|
+
|
77
|
+
# The name of the configured queue, combined with an optional prefix
|
78
|
+
def prefixed_queue_name
|
79
|
+
"#{Chore.config.queue_prefix}#{self.options[:name]}"
|
80
|
+
end
|
81
|
+
end #ClassMethods
|
82
|
+
|
83
|
+
# This is handy to override in an included job to be able to do job setup that requires
|
84
|
+
# access to a job's arguments to be able to perform any context specific initialization that may
|
85
|
+
# be required.
|
86
|
+
def initialize(args=nil)
|
87
|
+
end
|
88
|
+
|
89
|
+
# This needs to be overriden by the object that is including this module.
|
90
|
+
def perform(*args)
|
91
|
+
raise NotImplementedError
|
92
|
+
end
|
93
|
+
|
94
|
+
# Use the current configured publisher to send this job into a queue.
|
95
|
+
def perform_async(*args)
|
96
|
+
self.class.run_hooks_for(:before_publish,*args)
|
97
|
+
@chore_publisher ||= self.class.options[:publisher]
|
98
|
+
@chore_publisher.publish(self.class.prefixed_queue_name,self.class.job_hash(args))
|
99
|
+
self.class.run_hooks_for(:after_publish,*args)
|
100
|
+
end
|
101
|
+
|
102
|
+
end #Job
|
103
|
+
end #Chore
|
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'json'
|
2
|
+
|
3
|
+
module Chore
|
4
|
+
# Json encoding for serializing jobs.
|
5
|
+
module JsonEncoder
|
6
|
+
class << self
|
7
|
+
# Encodes the +job+ into JSON using the standard ruby JSON parsing library
|
8
|
+
def encode(job)
|
9
|
+
JSON.generate(job.to_hash)
|
10
|
+
end
|
11
|
+
|
12
|
+
# Decodes the +job+ from JSON into a ruby Hash using the standard ruby JSON parsing library
|
13
|
+
def decode(job)
|
14
|
+
JSON.parse(job)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
require 'json'
|
2
|
+
require 'chore/worker'
|
3
|
+
require 'chore/fetcher'
|
4
|
+
|
5
|
+
module Chore
|
6
|
+
# Manages the interactions between fetching messages (Consumer Strategy), and working over them (Worker Strategy)
|
7
|
+
class Manager
|
8
|
+
|
9
|
+
def initialize()
|
10
|
+
Chore.logger.info "Booting Chore #{Chore::VERSION}"
|
11
|
+
Chore.logger.debug { Chore.config.inspect }
|
12
|
+
@started_at = nil
|
13
|
+
@worker_strategy = Chore.config.worker_strategy.new(self)
|
14
|
+
@fetcher = Chore.config.fetcher.new(self)
|
15
|
+
@processed = 0
|
16
|
+
@stopping = false
|
17
|
+
end
|
18
|
+
|
19
|
+
# Start the Manager. This calls both the #start method of the configured Worker Strategy, as well as Fetcher#start.
|
20
|
+
def start
|
21
|
+
@started_at = Time.now
|
22
|
+
@worker_strategy.start
|
23
|
+
@fetcher.start
|
24
|
+
end
|
25
|
+
|
26
|
+
# Shut down the Manager, the Worker Strategy, and the Fetcher. This calls the +:before_shutdown+ hook.
|
27
|
+
def shutdown!
|
28
|
+
unless @stopping
|
29
|
+
Chore.logger.info "Manager shutting down"
|
30
|
+
@stopping = true
|
31
|
+
Chore.run_hooks_for(:before_shutdown)
|
32
|
+
@fetcher.stop!
|
33
|
+
@worker_strategy.stop!
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
# Take in an amount of +work+ (either an Array of, or a single UnitOfWork), and pass it down for the
|
38
|
+
# worker strategy to process. <b>This method is blocking</b>. It will continue to attempt to assign the work via
|
39
|
+
# the worker strategy, until it accepts it. It is up to the strategy to determine what cases it is allowed to accept
|
40
|
+
# work. The blocking semantic of this method is to prevent the Fetcher from getting messages off of the queue faster
|
41
|
+
# than they can be consumed.
|
42
|
+
def assign(work)
|
43
|
+
Chore.logger.debug { "Manager#assign: No. of UnitsOfWork: #{work.length})" }
|
44
|
+
@worker_strategy.assign(work) unless @stopping
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module Chore
|
2
|
+
# Base class for Chore Publishers. Provides the bare interface one needs to adhere to when writing custom publishers
|
3
|
+
class Publisher
|
4
|
+
DEFAULT_OPTIONS = { :encoder => JsonEncoder }
|
5
|
+
|
6
|
+
attr_accessor :options
|
7
|
+
|
8
|
+
def initialize(opts={})
|
9
|
+
self.options = DEFAULT_OPTIONS.merge(opts)
|
10
|
+
end
|
11
|
+
|
12
|
+
# Publishes the provided +job+ to the queue identified by the +queue_name+. Not designed to be used directly, this
|
13
|
+
# method ferries to the publish method on an instance of your configured Publisher.
|
14
|
+
def self.publish(queue_name,job)
|
15
|
+
self.new.publish(queue_name,job)
|
16
|
+
end
|
17
|
+
|
18
|
+
# Raises a NotImplementedError. This method should be overridden in your descendent, custom publisher class
|
19
|
+
def publish(queue_name,job)
|
20
|
+
raise NotImplementedError
|
21
|
+
end
|
22
|
+
protected
|
23
|
+
|
24
|
+
def encode_job(job)
|
25
|
+
options[:encoder].encode(job)
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,128 @@
|
|
1
|
+
require 'fileutils'
|
2
|
+
require 'chore/queues/filesystem/filesystem_queue'
|
3
|
+
|
4
|
+
module Chore
|
5
|
+
module Queues
|
6
|
+
module Filesystem
|
7
|
+
|
8
|
+
# This is the consuming side of the file system queue. This class consumes jobs created by
|
9
|
+
# FilesystemPublisher#publish. The root of the file system queue is configured in
|
10
|
+
# Chore.config.fs_queue_root. In there a directory will be created for each queue name.
|
11
|
+
# Each queue directory contains a directory called "new" and one called "inprogress".
|
12
|
+
# FilesystemPublisher#publish creates new job files in the "new" directory. This consumer
|
13
|
+
# polls that directory every 5 seconds for new jobs which are moved to "inprogress".
|
14
|
+
#
|
15
|
+
# Once complete job files are deleted.
|
16
|
+
# If rejected they are moved back into new and will be processed again. This may not be the
|
17
|
+
# desired behavior long term and we may want to add configuration to this class to allow more
|
18
|
+
# creating failure handling and retrying.
|
19
|
+
class Consumer < Chore::Consumer
|
20
|
+
include FilesystemQueue
|
21
|
+
|
22
|
+
Chore::CLI.register_option 'fs_queue_root', '--fs-queue-root DIRECTORY', 'Root directory for fs based queue'
|
23
|
+
|
24
|
+
FILE_QUEUE_MUTEXES = {}
|
25
|
+
|
26
|
+
# The amount of time units of work can run before the queue considers
|
27
|
+
# them timed out. For filesystem queues, this is the global default.
|
28
|
+
attr_reader :queue_timeout
|
29
|
+
|
30
|
+
def initialize(queue_name, opts={})
|
31
|
+
super(queue_name, opts)
|
32
|
+
|
33
|
+
# Even though putting these Mutexes in this hash is, by itself, not particularly threadsafe
|
34
|
+
# as long as some Mutex ends up in the queue after all consumers are created we're good
|
35
|
+
# as they are pulled from the queue and synchronized for file operations below
|
36
|
+
FILE_QUEUE_MUTEXES[@queue_name] ||= Mutex.new
|
37
|
+
|
38
|
+
@in_progress_dir = in_progress_dir(queue_name)
|
39
|
+
@new_dir = new_dir(queue_name)
|
40
|
+
@queue_timeout = Chore.config.default_queue_timeout
|
41
|
+
end
|
42
|
+
|
43
|
+
def consume(&handler)
|
44
|
+
Chore.logger.info "Starting consuming file system queue #{@queue_name} in #{queue_dir(queue_name)}"
|
45
|
+
while running?
|
46
|
+
begin
|
47
|
+
#TODO move expired job files to new directory?
|
48
|
+
handle_jobs(&handler)
|
49
|
+
rescue => e
|
50
|
+
Chore.logger.error { "#{self.class}#consume: #{e} #{e.backtrace * "\n"}" }
|
51
|
+
ensure
|
52
|
+
sleep 5
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
def reject(id)
|
58
|
+
Chore.logger.debug "Rejecting: #{id}"
|
59
|
+
make_new_again(id)
|
60
|
+
end
|
61
|
+
|
62
|
+
def complete(id)
|
63
|
+
Chore.logger.debug "Completing (deleting): #{id}"
|
64
|
+
FileUtils.rm(File.join(@in_progress_dir, id))
|
65
|
+
end
|
66
|
+
|
67
|
+
private
|
68
|
+
|
69
|
+
# finds all new job files, moves them to in progress and starts the job
|
70
|
+
# Returns a list of the job files processed
|
71
|
+
def handle_jobs(&block)
|
72
|
+
# all consumers on a single queue share a lock on handling files.
|
73
|
+
# Each consumer comes along, processes all present files and release the lock.
|
74
|
+
# This isn't particularly useful but is here to allow the configuration of
|
75
|
+
# ThreadedConsumerStrategy with mutiple threads on a queue safely although you
|
76
|
+
# probably wouldn't want to do that.
|
77
|
+
FILE_QUEUE_MUTEXES[@queue_name].synchronize do
|
78
|
+
job_files.each do |job_file|
|
79
|
+
Chore.logger.debug "Found a new job #{job_file}"
|
80
|
+
|
81
|
+
job_json = File.read(make_in_progress(job_file))
|
82
|
+
basename, previous_attempts = file_info(job_file)
|
83
|
+
|
84
|
+
# job_file is just the name which is the job id
|
85
|
+
block.call(job_file, queue_name, queue_timeout, job_json, previous_attempts)
|
86
|
+
Chore.run_hooks_for(:on_fetch, job_file, job_json)
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
def make_in_progress(job)
|
92
|
+
move_job(File.join(@new_dir, job), File.join(@in_progress_dir, job))
|
93
|
+
end
|
94
|
+
|
95
|
+
def make_new_again(job)
|
96
|
+
basename, previous_attempts = file_info(job)
|
97
|
+
move_job(File.join(@in_progress_dir, job), File.join(@new_dir, "#{basename}.#{previous_attempts + 1}.job"))
|
98
|
+
end
|
99
|
+
|
100
|
+
# Moves job file to inprogress directory and returns the full path
|
101
|
+
def move_job(from, to)
|
102
|
+
f = File.open(from, "r")
|
103
|
+
# wait on the lock a publisher in another process might have.
|
104
|
+
# Once we get the lock the file is ours to move to mark it in progress
|
105
|
+
f.flock(File::LOCK_EX)
|
106
|
+
begin
|
107
|
+
FileUtils.mv(f.path, to)
|
108
|
+
ensure
|
109
|
+
f.flock(File::LOCK_UN) # yes we can unlock it after its been moved, I checked
|
110
|
+
end
|
111
|
+
to
|
112
|
+
end
|
113
|
+
|
114
|
+
def job_files
|
115
|
+
Dir.entries(@new_dir).select{|e| ! e.start_with?(".")}
|
116
|
+
end
|
117
|
+
|
118
|
+
# Grabs the unique identifier for the job filename and the number of times
|
119
|
+
# it's been attempted (also based on the filename)
|
120
|
+
def file_info(job_file)
|
121
|
+
id, previous_attempts = File.basename(job_file, '.job').split('.')
|
122
|
+
[id, previous_attempts.to_i]
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
@@ -0,0 +1,49 @@
|
|
1
|
+
# Common methods used by FilesystemConsumer and FilesystemPublisher for dealing with the
|
2
|
+
# directories which implement the queue.
|
3
|
+
module Chore::FilesystemQueue
|
4
|
+
|
5
|
+
# Local directory for new jobs to be placed
|
6
|
+
NEW_JOB_DIR = "new"
|
7
|
+
# Local directory for jobs currently in-process to be moved
|
8
|
+
IN_PROGRESS_DIR = "inprogress"
|
9
|
+
|
10
|
+
# Retrieves the directory for in-process messages to go. If the directory for the +queue_name+ doesn't exist,
|
11
|
+
# it will be created for you. If the directory cannot be created, an IOError will be raised
|
12
|
+
def in_progress_dir(queue_name)
|
13
|
+
validate_dir(queue_name, IN_PROGRESS_DIR)
|
14
|
+
end
|
15
|
+
|
16
|
+
# Retrieves the directory for newly recieved messages to go. If the directory for the +queue_name+ doesn't exist,
|
17
|
+
# it will be created for you. If the directory cannot be created, an IOError will be raised
|
18
|
+
def new_dir(queue_name)
|
19
|
+
validate_dir(queue_name, NEW_JOB_DIR)
|
20
|
+
end
|
21
|
+
|
22
|
+
# Returns the root directory where messages are placed
|
23
|
+
def root_dir
|
24
|
+
@root_dir ||= prepare_dir(File.expand_path(Chore.config.fs_queue_root))
|
25
|
+
end
|
26
|
+
|
27
|
+
# Returns the fully qualified path to the directory for +queue_name+
|
28
|
+
def queue_dir(queue_name)
|
29
|
+
prepare_dir(File.join(root_dir, queue_name))
|
30
|
+
end
|
31
|
+
|
32
|
+
private
|
33
|
+
# Returns the directory for the given +queue_name+ and +task_state+. If the directory doesn't exist, it will be
|
34
|
+
# created for you. If the directory cannot be created, an IOError will be raised
|
35
|
+
def validate_dir(queue_name, task_state)
|
36
|
+
prepare_dir(File.join(queue_dir(queue_name), task_state))
|
37
|
+
end
|
38
|
+
|
39
|
+
# Creates a directory if it does not exist. Returns the directory
|
40
|
+
def prepare_dir(dir)
|
41
|
+
unless Dir.exists?(dir)
|
42
|
+
FileUtils.mkdir_p(dir)
|
43
|
+
end
|
44
|
+
|
45
|
+
raise IOError.new("directory for file system queue does not have write permission: #{dir}") unless File.writable?(dir)
|
46
|
+
dir
|
47
|
+
end
|
48
|
+
|
49
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
require 'chore/queues/filesystem/filesystem_queue'
|
2
|
+
|
3
|
+
module Chore
|
4
|
+
module Queues
|
5
|
+
module Filesystem
|
6
|
+
|
7
|
+
# Publisher for writing jobs to the local filesystem. Useful for testing in offline environments or
|
8
|
+
# when queuing implementations are irrelevent to the task at hand, such as local development of new jobs.
|
9
|
+
class Publisher < Chore::Publisher
|
10
|
+
# See the top of FilesystemConsumer for comments on how this works
|
11
|
+
include FilesystemQueue
|
12
|
+
|
13
|
+
# Mutex for holding a lock over the files for this queue while they are in process
|
14
|
+
FILE_MUTEX = Mutex.new
|
15
|
+
|
16
|
+
# use of mutex and file locking should make this both threadsafe and safe for multiple
|
17
|
+
# processes to use the same queue directory simultaneously.
|
18
|
+
def publish(queue_name,job)
|
19
|
+
FILE_MUTEX.synchronize do
|
20
|
+
while true
|
21
|
+
# keep trying to get a file with nothing in it meaning we just created it
|
22
|
+
# as opposed to us getting someone else's file that hasn't been processed yet.
|
23
|
+
f = File.open(filename(queue_name, job[:class].to_s), "w")
|
24
|
+
if f.flock(File::LOCK_EX | File::LOCK_NB) && f.size == 0
|
25
|
+
begin
|
26
|
+
f.write(job.to_json)
|
27
|
+
ensure
|
28
|
+
f.flock(File::LOCK_UN)
|
29
|
+
break
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
# create a unique filename for a job in a queue based on queue name, job name and date
|
37
|
+
def filename(queue_name, job_name)
|
38
|
+
now = Time.now.strftime "%Y%m%d-%H%M%S-%6N"
|
39
|
+
previous_attempts = 0
|
40
|
+
File.join(new_dir(queue_name), "#{queue_name}-#{job_name}-#{now}.#{previous_attempts}.job")
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|