gouda 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,57 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Gouda
4
+ class Railtie < Rails::Railtie
5
+ rake_tasks do
6
+ task preload: :setup do
7
+ if defined?(Rails) && Rails.respond_to?(:application)
8
+ if Rails.application.config.eager_load
9
+ ActiveSupport.run_load_hooks(:before_eager_load, Rails.application)
10
+ Rails.application.config.eager_load_namespaces.each(&:eager_load!)
11
+ end
12
+ end
13
+ end
14
+ end
15
+
16
+ initializer "gouda.configure_rails_initialization" do
17
+ Gouda.config.app_executor = if defined?(Rails) && Rails.respond_to?(:application)
18
+ Rails.application.executor
19
+ else
20
+ ActiveSupport::Executor
21
+ end
22
+ end
23
+
24
+ initializer "gouda.active_job.extensions" do
25
+ ActiveSupport.on_load :active_job do
26
+ include Gouda::ActiveJobExtensions::Interrupts
27
+ end
28
+ end
29
+
30
+ generators do
31
+ require "generators/gouda/install_generator"
32
+ end
33
+
34
+ # The `to_prepare` block which is executed once in production
35
+ # and before each request in development.
36
+ config.to_prepare do
37
+ Gouda::Scheduler.update_schedule_from_config!
38
+
39
+ if defined?(Rails) && Rails.respond_to?(:application)
40
+ config_from_rails = Rails.application.config.try(:gouda)
41
+ if config_from_rails
42
+ Gouda.config.cleanup_preserved_jobs_before = config_from_rails[:cleanup_preserved_jobs_before]
43
+ Gouda.config.preserve_job_records = config_from_rails[:preserve_job_records]
44
+ Gouda.config.polling_sleep_interval_seconds = config_from_rails[:polling_sleep_interval_seconds]
45
+ Gouda.config.worker_thread_count = config_from_rails[:worker_thread_count]
46
+ if Gouda.config.logger
47
+ Gouda.config.logger.level = config_from_rails[:log_level] || Gouda.config.log_level
48
+ end
49
+ end
50
+ else
51
+ Gouda.config.preserve_job_records = false
52
+ Gouda.config.polling_sleep_interval_seconds = 0.2
53
+ Gouda.config.logger.level = Gouda.config.log_level
54
+ end
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,108 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Timers handles jobs which run either on a Cron schedule or using arbitrary time intervals
4
+
5
+ require "fugit"
6
+ module Gouda::Scheduler
7
+ # A timer entry is either a Cron pattern or an interval duration, and configures which job needs to be scheduled
8
+ # and when
9
+ class Entry < Struct.new(:name, :cron, :interval_seconds, :job_class, :kwargs, :args, :set, keyword_init: true)
10
+ def scheduler_key
11
+ [name, interval_seconds, cron, job_class].compact.join("_")
12
+ end
13
+
14
+ def next_at
15
+ if interval_seconds
16
+ first_existing = Gouda::Workload.where(scheduler_key: scheduler_key).where("scheduled_at > NOW()").order("scheduled_at DESC").pluck(:scheduled_at).first
17
+ (first_existing || Time.now.utc) + interval_seconds
18
+ elsif cron
19
+ fugit = Fugit::Cron.parse(cron)
20
+ raise ArgumentError, "Unable to parse cron pattern #{cron.inspect}" unless fugit
21
+ Time.at(fugit.next_time.to_i).utc
22
+ end
23
+ end
24
+
25
+ def build_active_job
26
+ next_at = self.next_at
27
+ return unless next_at
28
+
29
+ job_class = self.job_class.constantize
30
+
31
+ active_job = kwargs_value.present? ? job_class.new(*args_value, **kwargs_value) : job_class.new(*args_value) # This method supports ruby2_keywords
32
+ active_job.scheduled_at = next_at
33
+ active_job.scheduler_key = scheduler_key
34
+
35
+ set_value.present? ? active_job.set(set_value) : active_job
36
+ end
37
+
38
+ private
39
+
40
+ def set_value
41
+ value = set || {}
42
+ value.respond_to?(:call) ? value.call : value
43
+ end
44
+
45
+ def args_value
46
+ value = args || []
47
+ value.respond_to?(:call) ? value.call : value
48
+ end
49
+
50
+ def kwargs_value
51
+ value = kwargs || nil
52
+ value.respond_to?(:call) ? value.call : value
53
+ end
54
+ end
55
+
56
+ def self.update_schedule_from_config!(cron_table_hash = nil)
57
+ Gouda.logger.info "Updating scheduled workload entries..."
58
+ if cron_table_hash.blank?
59
+ config_from_rails = Rails.application.config.try(:gouda)
60
+
61
+ cron_table_hash = if config_from_rails.present?
62
+ config_from_rails.dig(:cron).to_h if config_from_rails.dig(:enable_cron)
63
+ elsif Gouda.config.enable_cron
64
+ Gouda.config.cron
65
+ end
66
+
67
+ return unless cron_table_hash
68
+ end
69
+
70
+ defaults = {cron: nil, interval_seconds: nil, kwargs: nil, args: nil}
71
+ @cron_table = cron_table_hash.map do |(name, cron_entry_params)|
72
+ # `class` is a reserved keyword and a method that exists on every Ruby object so...
73
+ cron_entry_params[:job_class] ||= cron_entry_params.delete(:class)
74
+ params_with_defaults = defaults.merge(cron_entry_params)
75
+ Entry.new(name:, **params_with_defaults)
76
+ end
77
+ end
78
+
79
+ def self.enqueue_next_scheduled_workload_for(finished_workload)
80
+ return unless finished_workload.scheduler_key
81
+
82
+ timer_table = @cron_table.to_a.index_by(&:scheduler_key)
83
+ timer_entry = timer_table[finished_workload.scheduler_key]
84
+ return unless timer_entry
85
+
86
+ Gouda.enqueue_jobs_via_their_adapters([timer_entry.build_active_job])
87
+ end
88
+
89
+ def self.entries
90
+ @cron_table || []
91
+ end
92
+
93
+ def self.update_scheduled_workloads!
94
+ table_entries = @cron_table || []
95
+
96
+ # Remove any cron keyed workloads which no longer match config-wise
97
+ known_keys = table_entries.map(&:scheduler_key).uniq
98
+ Gouda::Workload.transaction do
99
+ Gouda::Workload.where.not(scheduler_key: known_keys).delete_all
100
+
101
+ # Insert the next iteration for every "next" entry in the crontab.
102
+ active_jobs_to_enqueue = table_entries.filter_map(&:build_active_job)
103
+ Gouda.logger.info "#{active_jobs_to_enqueue.size} job(s) to enqueue from the scheduler."
104
+ enqjobs = Gouda.enqueue_jobs_via_their_adapters(active_jobs_to_enqueue)
105
+ Gouda.logger.info "#{enqjobs.size} scheduled job(s) enqueued."
106
+ end
107
+ end
108
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Gouda
4
+ VERSION = "0.1.0"
5
+ end
@@ -0,0 +1,188 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "securerandom"
4
+ require "gouda/version"
5
+
6
+ module Gouda
7
+ POLL_INTERVAL_DURATION_SECONDS = 1
8
+
9
+ # Is used for keeping the IDs of currently executing jobs on this worker in a thread-safe way.
10
+ # These IDs are used to update the heartbeat timestamps during execution. We need just three
11
+ # methods here - add to a set, remove from a set, and convert the set into an array for a SQL query
12
+ # with `WHERE id IN`.
13
+ class ThreadSafeSet
14
+ def initialize
15
+ @set = Set.new
16
+ @mutex = Mutex.new
17
+ end
18
+
19
+ def add(value)
20
+ @mutex.synchronize { @set.add(value) }
21
+ value
22
+ end
23
+
24
+ def delete(value)
25
+ @mutex.synchronize { @set.delete(value) }
26
+ value
27
+ end
28
+
29
+ def to_a
30
+ @mutex.synchronize { @set.to_a }
31
+ end
32
+ end
33
+
34
+ # Returns `true` once a given timer has elapsed.
35
+ # This is useful to terminate a worker after a certain amount of time
36
+ class TimerShutdownCheck
37
+ def initialize(seconds_float)
38
+ @dt = seconds_float
39
+ @st = Process.clock_gettime(Process::CLOCK_MONOTONIC)
40
+ end
41
+
42
+ def call
43
+ (Process.clock_gettime(Process::CLOCK_MONOTONIC) - @st) > @dt
44
+ end
45
+ end
46
+
47
+ # Captures UNIX signals (TERM and INT) and then returns true. Once you initialize the
48
+ # this check you install signal handlers, meaning that the worker will not raise `Interrupt`
49
+ # from any theads but will get the space it needs to terminate cleanly. At least for SIGINT
50
+ # and SIGTERM this is very desirable. This is the default shutdown check.
51
+ class TrapShutdownCheck
52
+ def initialize
53
+ @did_trap = false
54
+ @did_log = false
55
+ Signal.trap(:TERM) do
56
+ @did_trap = :TERM
57
+ end
58
+ Signal.trap(:INT) do
59
+ @did_trap = :INT
60
+ end
61
+ end
62
+
63
+ def call
64
+ if @did_trap
65
+ @did_log ||= begin
66
+ warn("Gouda worker signaled to terminate via SIG#{@did_trap}")
67
+ true
68
+ end
69
+ true
70
+ else
71
+ false
72
+ end
73
+ end
74
+ end
75
+
76
+ # This shutdown check will return `true` once there
77
+ # are no enqueued jobs to process for this worker. This
78
+ # can be used to run a worker just as long as there are jobs to handle
79
+ # and then to let it quit by itself (handy for spot instances and the like)
80
+ class EmptyQueueShutdownCheck
81
+ def initialize(queue_constraint = Gouda::AnyQueue)
82
+ @queue_constraint = queue_constraint
83
+ end
84
+
85
+ def call
86
+ # return false unless Rails.application # Rails is still booting and there is no application defined
87
+
88
+ Gouda.config.app_executor.wrap do
89
+ Gouda::Workload.waiting_to_start(queue_constraint: @queue_constraint).none?
90
+ end
91
+ rescue # If the DB connection cannot be checked out etc
92
+ false
93
+ end
94
+ end
95
+
96
+ # A wrapping callable which returns `true` if any of the
97
+ # given callables return true. This can be used to combine a timed shutdown ("in 30 seconds")
98
+ # with a signal handler shutdown ("shutdown on SIGTERM/SIGINT")
99
+ class CombinedShutdownCheck
100
+ # @param callables_for_condition[#call] other shutdown checks
101
+ def initialize(*callables_for_condition)
102
+ @conditions = callables_for_condition
103
+ end
104
+
105
+ def call
106
+ # Once one shutdown check told us to shut down there is no point to query all the others
107
+ @memo ||= @conditions.any?(&:call)
108
+ end
109
+ end
110
+
111
+ # Start looping, taking work from the queue and performing it, over multiple worker threads.
112
+ # Once the `check_shutdown` callable returns `true` the threads will cleanly terminate and the method will return (so it is blocking).
113
+ #
114
+ # @param n_threads[Integer] how many _worker_ threads to start. Another thread will be started for housekeeping, so ideally this should be the size of your connection pool minus 1
115
+ # @param check_shutdown[#call] A callable object (can be a Proc etc.). Once starts returning `true` the worker threads and the housekeeping thread will cleanly exit
116
+ def self.worker_loop(n_threads:, check_shutdown: TrapShutdownCheck.new, queue_constraint: Gouda::AnyQueue)
117
+ # We need quite a few things when starting the loop - we have to be far enough into the Rails bootup sequence
118
+ # that both the application and the executor are available
119
+ #
120
+ # raise "Rails is not loaded yet" unless defined?(Rails) && Rails.respond_to?(:application)
121
+ # raise "Rails application is not loaded yet" unless Rails.application
122
+ # raise "Rails executor not available yet" unless Rails.application.executor
123
+
124
+ check_shutdown = CombinedShutdownCheck.new(*check_shutdown) if !check_shutdown.respond_to?(:call) && check_shutdown.is_a?(Array)
125
+
126
+ worker_id = [Socket.gethostname, Process.pid, SecureRandom.uuid].join("-")
127
+
128
+ executing_workload_ids = ThreadSafeSet.new
129
+
130
+ raise ArgumentError, "You need at least 1 worker thread, but you requested #{n_threads}" if n_threads < 1
131
+ worker_threads = n_threads.times.map do
132
+ Thread.new do
133
+ worker_id_and_thread_id = [worker_id, "t0x#{Thread.current.object_id.to_s(16)}"].join("-")
134
+ loop do
135
+ break if check_shutdown.call
136
+
137
+ did_process = Gouda.config.app_executor.wrap do
138
+ Gouda::Workload.checkout_and_perform_one(executing_on: worker_id_and_thread_id, queue_constraint:, in_progress: executing_workload_ids)
139
+ end
140
+
141
+ # If no job was retrieved the queue is likely empty. Relax the polling then and ease off.
142
+ # If a job was retrieved it is likely that a burst has just been enqueued, and we do not
143
+ # sleep but proceed to attempt to retrieve the next job right after.
144
+ jitter_sleep_interval = POLL_INTERVAL_DURATION_SECONDS + (POLL_INTERVAL_DURATION_SECONDS * 0.25)
145
+ sleep_with_interruptions(jitter_sleep_interval, check_shutdown) unless did_process
146
+ rescue => e
147
+ warn "Uncaught exception during perform (#{e.class} - #{e}"
148
+ end
149
+ end
150
+ end
151
+
152
+ # Do the housekeeping tasks on main
153
+ loop do
154
+ break if check_shutdown.call
155
+
156
+ Gouda.config.app_executor.wrap do
157
+ # Mark known executing jobs as such. If a worker process is killed or the machine it is running on dies,
158
+ # a stale timestamp can indicate to us that the job was orphaned and is marked as "executing"
159
+ # even though the worker it was running on has failed for whatever reason.
160
+ # Later on we can figure out what to do with those jobs (re-enqueue them or toss them)
161
+ Gouda::Workload.where(id: executing_workload_ids.to_a, state: "executing").update_all(executing_on: worker_id, last_execution_heartbeat_at: Time.now.utc)
162
+
163
+ # Find jobs which just hung and clean them up (mark them as "finished" and enqueue replacement workloads if possible)
164
+ Gouda::Workload.reap_zombie_workloads
165
+ rescue => e
166
+ # Appsignal.add_exception(e)
167
+ warn "Uncaught exception during housekeeping (#{e.class} - #{e}"
168
+ end
169
+
170
+ # Jitter the sleep so that the workers booted at the same time do not all dogpile
171
+ randomized_sleep_duration_s = POLL_INTERVAL_DURATION_SECONDS + (POLL_INTERVAL_DURATION_SECONDS.to_f * rand)
172
+ sleep_with_interruptions(randomized_sleep_duration_s, check_shutdown)
173
+ end
174
+ ensure
175
+ worker_threads&.map(&:join)
176
+ end
177
+
178
+ def self.sleep_with_interruptions(n_seconds, must_abort_proc)
179
+ start_time_seconds = Process.clock_gettime(Process::CLOCK_MONOTONIC)
180
+ # remaining_seconds = n_seconds
181
+ check_interval_seconds = Gouda.config.polling_sleep_interval_seconds
182
+ loop do
183
+ return if must_abort_proc.call
184
+ return if Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time_seconds >= n_seconds
185
+ sleep(check_interval_seconds)
186
+ end
187
+ end
188
+ end
@@ -0,0 +1,214 @@
1
+ # # frozen_string_literal: true
2
+
3
+ # This model is called "workload" for a reason. The ActiveJob can be enqueued multiple times with
4
+ # the same job ID which gets generated by Rails. These multiple enqueues of the same job are not
5
+ # exactly copies of one another. When you use job-iteration for example, your job will be retried with a different
6
+ # cursor position value. When you use ActiveJob `rescue_from` as well - the job will be retried and keep the same
7
+ # active job ID, but it then gets returned into the queue "in some way". What we want is that the records in our
8
+ # table represent a unit of work that the worker has to execute "at some point". If the same job gets enqueued multiple
9
+ # times due to retries or pause/resume we want the enqueues to be separate workloads, which can fail or succeed
10
+ # independently. This also allows the queue records to be "append-only" which allows the records to be pruned
11
+ # on a regular basis. This is why they are called "workloads" and not "jobs". "Executions" is a great term used
12
+ # by good_job but it seems that it is not clear what has the "identity". With the Workload the ID of the workload
13
+ # is the "provider ID" for ActiveJob. It is therefore possible (and likely) that multiple Workloads will exist
14
+ # sharing the same ActiveJob ID.
15
+ class Gouda::Workload < ActiveRecord::Base
16
+ ZOMBIE_MAX_THRESHOLD = "5 minutes"
17
+
18
+ self.table_name = "gouda_workloads"
19
+ # GoodJob calls these "enqueued" but they are more like
20
+ # "waiting to start" - jobs which have been scheduled past now,
21
+ # or haven't been scheduled to a particular time, are in the "enqueued"
22
+ # state and match the queue constraint
23
+ scope :waiting_to_start, ->(queue_constraint: Gouda::AnyQueue) {
24
+ condition_for_ready_to_execute_jobs = <<~SQL
25
+ #{queue_constraint.to_sql}
26
+ AND execution_concurrency_key NOT IN (
27
+ SELECT execution_concurrency_key FROM #{quoted_table_name} WHERE state = 'executing' AND execution_concurrency_key IS NOT NULL
28
+ )
29
+ AND state = 'enqueued'
30
+ AND (scheduled_at <= clock_timestamp())
31
+ SQL
32
+
33
+ where(Arel.sql(condition_for_ready_to_execute_jobs))
34
+ }
35
+
36
+ scope :errored, -> { where("error != '{}'") }
37
+ scope :retried, -> { where("(serialized_params -> 'exception_executions') != '{}' AND state != 'finished'") }
38
+ scope :finished, -> { where(state: "finished") }
39
+ scope :enqueued, -> { where(state: "enqueued") }
40
+ scope :executing, -> { where(state: "executing") }
41
+
42
+ def self.queue_names
43
+ connection.select_values("SELECT DISTINCT(queue_name) FROM #{quoted_table_name} ORDER BY queue_name ASC")
44
+ end
45
+
46
+ def self.prune
47
+ if Gouda.config.preserve_job_records
48
+ where(state: "finished").where("execution_finished_at < ?", Gouda.cleanup_preserved_jobs_before.ago).delete_all
49
+ else
50
+ where(state: "finished").delete_all
51
+ end
52
+ end
53
+
54
+ # Re-enqueue zombie workloads which have been left to rot due to machine kills, worker OOM kills and the like
55
+ # With a lock so no single zombie job gets enqueued more than once
56
+ # And wrapped in transactions with the possibility to roll back a single workload without it rollbacking the entire batch
57
+ def self.reap_zombie_workloads
58
+ uncached do # again needed due to the use of clock_timestamp() in the SQL
59
+ transaction do
60
+ zombie_workloads_scope = Gouda::Workload.lock("FOR UPDATE SKIP LOCKED").where("state = 'executing' AND last_execution_heartbeat_at < (clock_timestamp() - interval '#{ZOMBIE_MAX_THRESHOLD}')")
61
+ zombie_workloads_scope.find_each(batch_size: 1000) do |workload|
62
+ # with_lock will start its own transaction
63
+ workload.with_lock("FOR UPDATE SKIP LOCKED") do
64
+ Gouda.logger.info { "Reviving (re-enqueueing) Gouda workload #{workload.id} after interruption" }
65
+
66
+ # Appsignal.increment_counter("gouda_workloads_revived", 1, job_class: workload.active_job_class_name)
67
+
68
+ interrupted_at = workload.last_execution_heartbeat_at
69
+ workload.update!(state: "finished", interrupted_at:, last_execution_heartbeat_at: Time.now.utc, execution_finished_at: Time.now.utc)
70
+ revived_job = ActiveJob::Base.deserialize(workload.active_job_data)
71
+ # Save the interrupted_at timestamp so that upon execution the new job will raise a Gouda::Interrpupted exception.
72
+ # The exception can then be handled like any other ActiveJob exception (using rescue_from or similar).
73
+ revived_job.interrupted_at = interrupted_at
74
+ revived_job.enqueue
75
+ end
76
+ rescue ActiveRecord::RecordNotFound
77
+ # This will happen if we have selected the zombie workload in the outer block, but
78
+ # by the point we reload it and take a FOR UPDATE SKIP LOCKED lock another worker is
79
+ # already reaping it - a call to `reload` will cause a RecordNotFound, since Postgres
80
+ # will hide the row from us. This is what we want in fact - we want to progress to
81
+ # the next row. So we allow the code to proceed, as we expect that the other worker
82
+ # (which stole the workload from us) will have set it to "state=finished" by the time we reattempt
83
+ # our SELECT with conditions
84
+ Gouda.logger.debug { "Gouda workload #{workload.id} cannot be reaped as it was hijacked by another worker" }
85
+ end
86
+ end
87
+ end
88
+ end
89
+
90
+ # Lock the next workload and mark it as executing
91
+ def self.checkout_and_lock_one(executing_on:, queue_constraint: Gouda::AnyQueue)
92
+ where_query = <<~SQL
93
+ #{queue_constraint.to_sql}
94
+ AND workloads.state = 'enqueued'
95
+ AND NOT EXISTS (
96
+ SELECT NULL
97
+ FROM #{quoted_table_name} AS concurrent
98
+ WHERE concurrent.state = 'executing'
99
+ AND concurrent.execution_concurrency_key = workloads.execution_concurrency_key
100
+ )
101
+ AND workloads.scheduled_at <= clock_timestamp()
102
+ SQL
103
+ # Enter a txn just to mark this job as being executed "by us". This allows us to avoid any
104
+ # locks during execution itself, including advisory locks
105
+ jobs = Gouda::Workload
106
+ .select("workloads.*")
107
+ .from("#{quoted_table_name} AS workloads")
108
+ .where(where_query)
109
+ .order("workloads.priority ASC NULLS LAST")
110
+ .lock("FOR UPDATE SKIP LOCKED")
111
+ .limit(1)
112
+
113
+ _first_available_workload = ActiveSupport::Notifications.instrument("checkout_and_lock_one.gouda", {queue_constraint: queue_constraint.to_sql}) do |payload|
114
+ payload[:condition_sql] = jobs.to_sql
115
+ payload[:retried_checkouts_due_to_concurrent_exec] = 0
116
+ uncached do # Necessary because we SELECT with a clock_timestamp() which otherwise gets cached by ActiveRecord query cache
117
+ transaction do
118
+ jobs.first.tap do |job|
119
+ job&.update!(state: "executing", executing_on:, last_execution_heartbeat_at: Time.now.utc, execution_started_at: Time.now.utc)
120
+ end
121
+ rescue ActiveRecord::RecordNotUnique
122
+ # It can happen that due to a race the `execution_concurrency_key NOT IN` does not capture
123
+ # a job which _just_ entered the "executing" state, apparently after we do our SELECT. This will happen regardless
124
+ # whether we are using a CTE or a sub-SELECT
125
+ payload[:retried_checkouts_due_to_concurrent_exec] += 1
126
+ nil
127
+ end
128
+ end
129
+ end
130
+ end
131
+
132
+ # Get a new workload and call perform
133
+ # @param in_progress[#add,#delete] Used for tracking work in progress for heartbeats
134
+ def self.checkout_and_perform_one(executing_on:, queue_constraint: Gouda::AnyQueue, in_progress: Set.new)
135
+ # Select a job and mark it as "executing" which will make it unavailable to any other
136
+ workload = checkout_and_lock_one(executing_on:, queue_constraint:)
137
+ if workload
138
+ in_progress.add(workload.id)
139
+ workload.perform_and_update_state!
140
+ end
141
+ ensure
142
+ in_progress.delete(workload.id) if workload
143
+ end
144
+
145
+ def enqueued_at
146
+ Time.parse(serialized_params["enqueued_at"]) if serialized_params["enqueued_at"]
147
+ end
148
+
149
+ def perform_and_update_state!
150
+ ActiveSupport::Notifications.instrument("perform_job.gouda", {workload: self}) do |instrument_payload|
151
+ extras = {}
152
+ if Gouda::JobFuse.exists?(active_job_class_name: active_job_class_name)
153
+ extras[:error] = {class_name: "WorkloadSkippedError", message: "Skipped because of a fuse at #{Time.now.utc}"}
154
+ else
155
+ job_result = ActiveJob::Base.execute(active_job_data)
156
+
157
+ if job_result.is_a?(Exception)
158
+ # When an exception is handled, let's say we have a retry_on <exception> in our job, we end up here
159
+ # and it won't be rescueed
160
+ handled_error = job_result
161
+ update!(error: error_hash(handled_error))
162
+ end
163
+
164
+ instrument_payload[:value] = job_result
165
+ instrument_payload[:handled_error] = handled_error
166
+
167
+ job_result
168
+ end
169
+ rescue => exception_not_retried_by_active_job
170
+ # When a job fails and is not retryable it will end up here.
171
+ update!(error: error_hash(exception_not_retried_by_active_job))
172
+ instrument_payload[:unhandled_error] = exception_not_retried_by_active_job
173
+ Gouda.logger.error { exception_not_retried_by_active_job }
174
+ exception_not_retried_by_active_job # Return the exception instead of re-raising it
175
+ ensure
176
+ update!(state: "finished", last_execution_heartbeat_at: Time.now.utc, execution_finished_at: Time.now.utc, **extras)
177
+ # If the workload that just finished was a scheduled workload (via timer/cron) enqueue the next execution.
178
+ # Otherwise the next job will only get enqueued once the config is reloaded
179
+ Gouda::Scheduler.enqueue_next_scheduled_workload_for(self)
180
+ end
181
+ end
182
+
183
+ def schedule_now!
184
+ with_lock do
185
+ return if state != "enqueued"
186
+
187
+ update!(scheduled_at: Time.now.utc)
188
+ end
189
+ end
190
+
191
+ def mark_finished!
192
+ with_lock do
193
+ now = Time.now.utc
194
+ execution_started_at ||= now
195
+
196
+ return if state == "finished"
197
+
198
+ update!(
199
+ state: "finished", last_execution_heartbeat_at: now,
200
+ execution_finished_at: now, execution_started_at: execution_started_at,
201
+ error: {class_name: "RemovedError", message: "Manually removed at #{now}"}
202
+ )
203
+ Gouda::Scheduler.enqueue_next_scheduled_workload_for(self)
204
+ end
205
+ end
206
+
207
+ def error_hash(error)
208
+ {class_name: error.class.to_s, backtrace: error.backtrace.to_a, message: error.message}
209
+ end
210
+
211
+ def active_job_data
212
+ serialized_params.deep_dup.merge("provider_job_id" => id, "interrupted_at" => interrupted_at, "scheduler_key" => scheduler_key) # TODO: is this memory-economical?
213
+ end
214
+ end
data/lib/gouda.rb ADDED
@@ -0,0 +1,116 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "active_support"
4
+ require "active_support/core_ext/numeric/time"
5
+ require "active_support/configurable"
6
+ require "rails/railtie"
7
+ require_relative "gouda/bulk"
8
+ require_relative "gouda/adapter"
9
+ require_relative "gouda/scheduler"
10
+ require_relative "gouda/railtie" if defined?(Rails::Railtie)
11
+ require_relative "gouda/workload"
12
+ require_relative "gouda/worker"
13
+ require_relative "gouda/job_fuse"
14
+ require_relative "gouda/queue_constraints"
15
+ require_relative "gouda/active_job_extensions/interrupts"
16
+ require_relative "gouda/active_job_extensions/concurrency"
17
+ require_relative "active_job/queue_adapters/gouda_adapter"
18
+
19
+ module Gouda
20
+ class Gouda::Configuration
21
+ include ActiveSupport::Configurable
22
+
23
+ config_accessor(:preserve_job_records, default: false)
24
+ config_accessor(:cleanup_preserved_jobs_before, default: 3.hours)
25
+ config_accessor(:polling_sleep_interval_seconds, default: 0.2)
26
+ config_accessor(:worker_thread_count, default: 1)
27
+ config_accessor(:logger, default: ActiveSupport::Logger.new($stdout))
28
+ config_accessor(:app_executor)
29
+ config_accessor(:cron, default: {})
30
+ config_accessor(:enable_cron, default: true)
31
+ # Log levels are:
32
+ # constant | level
33
+ # Logger::DEBUG (0)
34
+ # Logger::INFO (1)
35
+ # Logger::WARN (2)
36
+ # Logger::ERROR (3)
37
+ # Logger::FATAL (4)
38
+ # Logger::UNKNOWN (5)
39
+ config_accessor(:log_level, default: Logger::DEBUG)
40
+ end
41
+
42
+ class InterruptError < StandardError
43
+ end
44
+
45
+ class ConcurrencyExceededError < StandardError
46
+ end
47
+
48
+ def self.start
49
+ Gouda::Scheduler.update_scheduled_workloads!
50
+
51
+ queue_constraint = if ENV["GOUDA_QUEUES"]
52
+ Gouda.parse_queue_constraint(ENV["GOUDA_QUEUES"])
53
+ else
54
+ Gouda::AnyQueue
55
+ end
56
+
57
+ Gouda.logger.info("Gouda version: #{Gouda::VERSION}")
58
+ Gouda.logger.info("Worker threads: #{Gouda.config.worker_thread_count}")
59
+
60
+ Gouda.worker_loop(n_threads: Gouda.config.worker_thread_count, queue_constraint:)
61
+ end
62
+
63
+ def self.config
64
+ @config ||= Configuration.new
65
+ end
66
+
67
+ def self.configure
68
+ yield config
69
+ end
70
+
71
+ def self.logger
72
+ Gouda.config.logger
73
+ end
74
+
75
+ def self.create_tables(active_record_schema)
76
+ active_record_schema.create_enum :gouda_workload_state, %w[enqueued executing finished]
77
+ active_record_schema.create_table :gouda_workloads, id: :uuid do |t|
78
+ t.uuid :active_job_id, null: false
79
+ t.timestamp :scheduled_at, null: false
80
+ t.timestamp :execution_started_at
81
+ t.timestamp :execution_finished_at
82
+ t.timestamp :last_execution_heartbeat_at
83
+ t.timestamp :interrupted_at, null: true
84
+
85
+ t.string :scheduler_key, null: true
86
+ t.string :queue_name, null: false, default: "default"
87
+ t.integer :priority
88
+ t.string :active_job_class_name, null: false
89
+ t.jsonb :serialized_params
90
+ t.jsonb :error, default: {}, null: false
91
+ t.enum :state, enum_type: :gouda_workload_state, default: "enqueued", null: false
92
+ t.string :execution_concurrency_key
93
+ t.string :enqueue_concurrency_key
94
+ t.string :executing_on
95
+ t.integer :position_in_bulk
96
+
97
+ t.timestamps
98
+ end
99
+
100
+ active_record_schema.add_index :gouda_workloads, [:priority, :id, :scheduled_at], where: "state = 'enqueued'", name: :gouda_checkout_all_index
101
+ active_record_schema.add_index :gouda_workloads, [:id, :last_execution_heartbeat_at], where: "state = 'executing'", name: :gouda_last_heartbeat_index
102
+ active_record_schema.add_index :gouda_workloads, [:enqueue_concurrency_key], where: "state = 'enqueued' AND enqueue_concurrency_key IS NOT NULL", unique: true, name: :guard_double_enqueue
103
+ active_record_schema.add_index :gouda_workloads, [:scheduler_key], where: "state = 'enqueued' AND scheduler_key IS NOT NULL", unique: true, name: :guard_double_schedule
104
+ active_record_schema.add_index :gouda_workloads, [:execution_concurrency_key], where: "state = 'executing' AND execution_concurrency_key IS NOT NULL", unique: true, name: :guard_double_exec
105
+ active_record_schema.add_index :gouda_workloads, [:active_job_id], name: :same_job_display_idx
106
+ active_record_schema.add_index :gouda_workloads, [:priority], order: {priority: "ASC NULLS LAST"}, name: :ordered_priority_idx
107
+ active_record_schema.add_index :gouda_workloads, [:last_execution_heartbeat_at], name: :index_gouda_workloads_on_last_execution_heartbeat_at
108
+ active_record_schema.add_index :gouda_workloads, [:scheduler_key], name: :index_gouda_workloads_on_scheduler_key
109
+
110
+ active_record_schema.create_table :gouda_job_fuses, id: false do |t|
111
+ t.string :active_job_class_name, null: false
112
+
113
+ t.timestamps
114
+ end
115
+ end
116
+ end