gouda 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: f36a7c7b361cb8008f34b4a178c6bf1eb2a358e95502d0550fc14fef78ef5ed7
4
+ data.tar.gz: 6d2bc5778d6284f212189f4311e575393c5d9c1da13e4618f0b9a5d6ace9e954
5
+ SHA512:
6
+ metadata.gz: 71a324a3bae3ee17c2ed547915b1cc841442059c30f0c85138f1b220a7c9dc2b307e6d0363fba12069be0b893ce74e1cefe81f5d3fc1ceec427b56a7331be256
7
+ data.tar.gz: 8e9d521dc92ccd14175611e21535ed9e5b71610e74c22fd4038e11a16c104b9fc73d93431e1e9aae38bb37e739d91447f3739ddd8099083e4d6e821db1426ea5
@@ -0,0 +1,36 @@
1
+ name: CI
2
+
3
+ on:
4
+ - push
5
+
6
+ env:
7
+ BUNDLE_PATH: vendor/bundle
8
+
9
+ jobs:
10
+ test:
11
+ name: Tests
12
+ runs-on: ubuntu-latest
13
+ services:
14
+ postgres:
15
+ image: postgres:15-alpine
16
+ env:
17
+ POSTGRES_PASSWORD: postgres
18
+ ports:
19
+ - 5432:5432
20
+ options: >-
21
+ --health-cmd pg_isready
22
+ --health-interval 100ms
23
+ --health-timeout 1s
24
+ --health-retries 100
25
+
26
+ if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository
27
+ steps:
28
+ - name: Checkout
29
+ uses: actions/checkout@v4
30
+ - name: Setup Ruby
31
+ uses: ruby/setup-ruby@v1
32
+ with:
33
+ ruby-version: '3.2'
34
+ bundler-cache: true
35
+ - name: "Tests and Lint"
36
+ run: bundle exec rake
data/.gitignore ADDED
@@ -0,0 +1,9 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /_yardoc/
4
+ /coverage/
5
+ /doc/
6
+ /pkg/
7
+ /spec/reports/
8
+ /tmp/
9
+ Gemfile.lock
data/.rubocop.yml ADDED
@@ -0,0 +1,10 @@
1
+ require: standard
2
+
3
+ AllCops:
4
+ TargetRubyVersion: 3.1
5
+
6
+ inherit_gem:
7
+ standard: config/base.yml
8
+
9
+ # Don't define any actual rubocop config here - this file is only used for
10
+ # proper editor support, and not used on CI, formatters, nor anywhere else.
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ 3.2.2
data/CHANGELOG.md ADDED
@@ -0,0 +1,6 @@
1
+ ## [Unreleased]
2
+
3
+ ## [0.1.0] - 2023-05-27
4
+
5
+ - Initial release
6
+
data/Gemfile ADDED
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ source "https://rubygems.org"
4
+ gemspec
5
+
6
+ gem "standard", require: false
7
+ gem "rake", "~> 13.0"
8
+ gem "minitest", "~> 5.0"
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2024 Cheddar Payments BV
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,17 @@
1
+ Gouda is an ActiveJob adapter used at Cheddar. It requires PostgreSQL and a recent version of Rails.
2
+
3
+ ⚠️ At the moment Gouda is only used internally at Cheddar. We do not provide support for it, nor do we accept
4
+ issues or feature requests. This is likely to change in the future.
5
+
6
+ ## Installation
7
+
8
+ ```
9
+ $ bundle add gouda
10
+ $ bundle install
11
+ $ bin/rails g gouda:install
12
+ ```
13
+
14
+ ## Usage
15
+
16
+ At the moment the Gouda UI is proprietary, so this gem only provides a "headless" implementation. We expect this to change in the future.
17
+
data/Rakefile ADDED
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "bundler/gem_tasks"
4
+ require "rake/testtask"
5
+
6
+ Rake::TestTask.new(:test) do |t|
7
+ t.libs << "test"
8
+ t.libs << "lib"
9
+
10
+ file_name = ARGV[1]
11
+
12
+ t.test_files = if file_name
13
+ [file_name]
14
+ else
15
+ FileList["test/**/*_test.rb"]
16
+ end
17
+ end
18
+
19
+ task default: :test
data/gouda.gemspec ADDED
@@ -0,0 +1,32 @@
1
+ require_relative "lib/gouda/version"
2
+
3
+ Gem::Specification.new do |spec|
4
+ spec.name = "gouda"
5
+ spec.version = Gouda::VERSION
6
+ spec.summary = "Job Scheduler"
7
+ spec.description = "Job Scheduler for Rails"
8
+ spec.authors = ["Sebastian van Hesteren", "Julik Tarkhanov"]
9
+ spec.email = ["sebastian@cheddar.me", "me@julik.nl"]
10
+ spec.homepage = "https://rubygems.org/gems/gouda"
11
+ spec.license = "MIT"
12
+ spec.required_ruby_version = Gem::Requirement.new(">= 2.4.0")
13
+ spec.require_paths = ["lib"]
14
+
15
+ spec.metadata["homepage_uri"] = spec.homepage
16
+ spec.metadata["source_code_uri"] = "https://github.com/cheddar-me/gouda"
17
+ spec.metadata["changelog_uri"] = "https://github.com/cheddar-me/gouda/CHANGELOG.md"
18
+
19
+ spec.files = Dir.chdir(File.expand_path(__dir__)) do
20
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{\A(?:test|spec|features)/}) }
21
+ end
22
+
23
+ spec.add_dependency "activerecord", "~> 7"
24
+ spec.add_dependency "activesupport", "~> 7"
25
+ spec.add_dependency "railties", "~> 7"
26
+ spec.add_dependency "activejob", "~> 7"
27
+ spec.add_dependency "fugit", "~> 1.10.1"
28
+
29
+ spec.add_development_dependency "pg"
30
+ spec.add_development_dependency "debug"
31
+ spec.add_development_dependency "pry"
32
+ end
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ActiveJob # :nodoc:
4
+ module QueueAdapters # :nodoc:
5
+ class GoudaAdapter < Gouda::Adapter
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "rails/generators"
4
+ require "rails/generators/active_record"
5
+
6
+ module Gouda
7
+ # Rails generator used for setting up Gouda in a Rails application.
8
+ # Run it with +bin/rails g gouda:install+ in your console.
9
+
10
+ class InstallGenerator < Rails::Generators::Base
11
+ include ActiveRecord::Generators::Migration
12
+
13
+ TEMPLATES = File.join(File.dirname(__FILE__), "templates/install")
14
+ source_paths << TEMPLATES
15
+
16
+ class_option :database, type: :string, aliases: %i[--db], desc: "The database for your migration. By default, the current environment's primary database is used."
17
+
18
+ # Generates monolithic migration file that contains all database changes.
19
+ def create_migration_file
20
+ migration_template "migrations/create_gouda_tables.rb.erb", File.join(db_migrate_path, "create_gouda_tables.rb")
21
+ end
22
+
23
+ private
24
+
25
+ def migration_version
26
+ "[#{ActiveRecord::VERSION::STRING.to_f}]"
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ class CreateGoudaWorkloads < ActiveRecord::Migration<%= migration_version %>
4
+ def change
5
+ Gouda.create_tables(self)
6
+ end
7
+ end
@@ -0,0 +1,70 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Gouda
4
+ module ActiveJobExtensions
5
+ module Concurrency
6
+ extend ActiveSupport::Concern
7
+
8
+ VALID_TYPES = [String, Symbol, Numeric, Date, Time, TrueClass, FalseClass, NilClass].freeze
9
+
10
+ included do
11
+ class_attribute :gouda_concurrency_config, instance_accessor: false, default: {}
12
+ end
13
+
14
+ class_methods do
15
+ def gouda_control_concurrency_with(total_limit: nil, perform_limit: nil, enqueue_limit: nil, key: nil)
16
+ raise ArgumentError, "Need one of total_limit, perform_limit, enqueue_limit" if [total_limit, perform_limit, enqueue_limit].all?(&:blank?)
17
+ raise ArgumentError, "The only available limit is 1" if [total_limit, perform_limit, enqueue_limit].any? { |v| v.is_a?(Integer) && v != 1 }
18
+
19
+ if total_limit
20
+ perform_limit = total_limit
21
+ enqueue_limit = total_limit
22
+ end
23
+
24
+ self.gouda_concurrency_config = {perform_limit:, enqueue_limit:, key:}
25
+ end
26
+ end
27
+
28
+ # This method will be tried by the Gouda adapter
29
+ def enqueue_concurrency_key
30
+ job_config = self.class.try(:gouda_concurrency_config)
31
+ return unless job_config
32
+ return unless job_config[:enqueue_limit]
33
+
34
+ _gouda_concurrency_extension_key_via_config || _gouda_concurrency_extension_automatic_key_from_class_and_args
35
+ end
36
+
37
+ # This method will be tried by the Gouda adapter
38
+ def execution_concurrency_key
39
+ job_config = self.class.try(:gouda_concurrency_config)
40
+ return unless job_config
41
+ return unless job_config[:perform_limit]
42
+
43
+ _gouda_concurrency_extension_key_via_config || _gouda_concurrency_extension_automatic_key_from_class_and_args
44
+ end
45
+
46
+ # Generates automatic serialized sha1 key
47
+ def _gouda_concurrency_extension_automatic_key_from_class_and_args
48
+ # To have a stable serialization of an ActiveJob we can re-use the method defined by
49
+ # ActiveJob itself. We need to have the job class name and all the arguments, and for arguments
50
+ # which are ActiveRecords or derivatives - we want them converted into global IDs. This also avoids
51
+ # having attributes of the argument ActiveModels contribute to the concurrency key.
52
+ # Add "cursor_position" from job-iteration so that different offsets of the same job can run
53
+ # concurrently.
54
+ pertinent_job_attributes = serialize.slice("job_class", "arguments", "priority", "cursor_position")
55
+ Digest::SHA1.hexdigest(JSON.dump(pertinent_job_attributes))
56
+ end
57
+
58
+ # Generates the concurrency key from the configuration
59
+ def _gouda_concurrency_extension_key_via_config
60
+ key = self.class.gouda_concurrency_config[:key]
61
+ return if key.blank?
62
+
63
+ key = key.respond_to?(:call) ? instance_exec(&key) : key
64
+ raise TypeError, "Concurrency key must be a String; was a #{key.class}" unless VALID_TYPES.any? { |type| key.is_a?(type) }
65
+
66
+ key
67
+ end
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Gouda
4
+ module ActiveJobExtensions
5
+ module Interrupts
6
+ extend ActiveSupport::Concern
7
+
8
+ included do
9
+ around_perform do |job, block|
10
+ # The @gouda_workload_interrupted_at ivar gets set on the job when the Workload
11
+ # gets reaped as a zombie. It contains the last know heartbeat of the job, assuming
12
+ # that it got interrupted around that particular time. The ivar gets persisted not
13
+ # into the original Workload (which gets marked "finished") but into the new Workload
14
+ # which the reap_zombie_workloads method enqueues.
15
+ if job.interrupted_at
16
+ Gouda.logger.warn { "Job: #{job.class.name} #{job.job_id} was previously interrupted" }
17
+ # The job is going to be re-enqueued it InterruptError is marked as retriable. We need
18
+ # to remove `interrupted_at` otherwise it will get raised again once that new job
19
+ # starts executing - which is not what we want
20
+ interrupted_error_time = job.interrupted_at
21
+ job.interrupted_at = nil
22
+
23
+ raise Gouda::InterruptError, "Job was interrupted around #{interrupted_error_time}"
24
+ end
25
+ block.call
26
+ end
27
+
28
+ # This overrides ActiveJob::Base to also set the "interrupted_at" value, which Gouda
29
+ # supplies in the active_job_data hash. The value is needed so that the job can correctly
30
+ # raise an InterruptError after an interruption, and we have to do it here so that we can
31
+ # still use ActiveJob::Base.execute, which Appsignal overloads.
32
+ # We also need to retain the scheduler_key value so that retries which ActiveJob does for us
33
+ # preserve that value when remarshaling the job
34
+ def self.deserialize(active_job_data)
35
+ super.tap do |job|
36
+ job.interrupted_at = active_job_data["interrupted_at"]
37
+ job.scheduler_key = active_job_data["scheduler_key"]
38
+ end
39
+ end
40
+
41
+ attr_accessor :interrupted_at
42
+ attr_accessor :scheduler_key
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,183 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Acts as an ActiveJob adapter
4
+
5
+ class Gouda::Adapter
6
+ prepend Gouda::BulkAdapterExtension
7
+
8
+ ENQUEUE_ERROR_MESSAGE = <<~ERR
9
+ The job has been rejected due to a matching enqueue concurrency key
10
+ ERR
11
+
12
+ # Enqueues the ActiveJob job to be performed.
13
+ # For use by Rails; you should generally not call this directly.
14
+ # @param active_job [ActiveJob::Base] the job to be enqueued from +#perform_later+
15
+ # @return [String, nil] the ID of the inserted workload or nil if the insert did not go through (due to concurrency)
16
+ def enqueue(active_job)
17
+ # This is the method that gets called by ActiveJob internally (from inside the ActiveJob::Base instance
18
+ # method). This is also when ActiveJob runs the enqueue callbacks. After this method returns
19
+ # ActiveJob will set @successfully_enqueued inside the job to `true` as long as no
20
+ # EnqueueError has been raised. This is, of course, incompatible with bulk-enqueueing (which we want)
21
+ # to use by default. What we can do is verify the value of the property set by our `enqueue_all` method,
22
+ # and raise the exception based on that.
23
+ enqueue_all([active_job])
24
+ if active_job.enqueue_error
25
+ Gouda.logger.warn { "Error #{active_job.enqueue_error.inspect} for Gouda workload (#{active_job.job_id})" }
26
+ raise active_job.enqueue_error
27
+ end
28
+ active_job.provider_job_id
29
+ end
30
+
31
+ # Enqueues an ActiveJob job to be run at a specific time.
32
+ # For use by Rails; you should generally not call this directly.
33
+ # @param active_job [ActiveJob::Base] the job to be enqueued from +#perform_later+
34
+ # @param timestamp [Integer, nil] the epoch time to perform the job
35
+ # @return [String, nil] the ID of the inserted Gouda or nil if the insert did not go through (due to concurrency)
36
+ def enqueue_at(active_job, timestamp_int)
37
+ active_job.scheduled_at = Time.at(timestamp_int).utc
38
+ enqueue_all([active_job])
39
+ if active_job.enqueue_error
40
+ Gouda.logger.warn { "Error #{active_job.enqueue_error.inspect} for Gouda workload (#{active_job.job_id})" }
41
+ raise active_job.enqueue_error
42
+ end
43
+ active_job.provider_job_id
44
+ end
45
+
46
+ # Enqueues multiple ActiveJobs.
47
+ # For use by Rails; you should generally not call this directly.
48
+ # @param active_job [ActiveJob::Base] the job to be enqueued from +#perform_later+
49
+ # @param timestamp [Integer, nil] the epoch time to perform the job
50
+ # @return [Integer] the number of jobs which were successfully sent to the queue
51
+ def enqueue_all(active_jobs)
52
+ t_now = Time.now.utc
53
+ bulk_insert_attributes = active_jobs.map.with_index do |active_job, i|
54
+ # We can't pregenerate an ID because we want to INSERT .. ON CONFLICT DO NOTHING
55
+ # and we want Postgres to use _all_ unique indexes for it, which would include a conflict of IDs -
56
+ # so some jobs could get silently rejected because of a duplicate ID. However unlikely this can better be prevented.
57
+ # We can't tell Postgres to ignore conflicts on _both_ the scheduler key and the enqueue concurrency key but not on
58
+ # the ID - it is either "all indexes" or "just one", but never "this index and that index". MERGE https://www.postgresql.org/docs/current/sql-merge.html
59
+ # is in theory capable of solving this but let's not complicate things all to hastily, the hour is getting late
60
+ {
61
+ active_job_id: active_job.job_id, # Multiple jobs can have the same ID due to retries, job-iteration etc.
62
+ scheduled_at: active_job.scheduled_at || t_now,
63
+ scheduler_key: active_job.scheduler_key, # So that the scheduler_key gets retained between retries
64
+ priority: active_job.priority,
65
+ execution_concurrency_key: extract_execution_concurrency_key(active_job),
66
+ enqueue_concurrency_key: extract_enqueue_concurrency_key(active_job),
67
+ queue_name: active_job.queue_name || "default",
68
+ active_job_class_name: active_job.class.to_s,
69
+ serialized_params: active_job.serialize.except("provider_job_id"), # For when a job which gets retried
70
+ interrupted_at: active_job.interrupted_at, # So that an exception can be raised when this job gets executed
71
+ position_in_bulk: i,
72
+ state: "enqueued"
73
+ }
74
+ end
75
+
76
+ # Filter out all the jobs with the same (and present) concurrency key and scheduler key
77
+ bulk_insert_attributes = filter_by_unique_not_nil_hash_key(bulk_insert_attributes, :enqueue_concurrency_key)
78
+ bulk_insert_attributes = filter_by_unique_not_nil_hash_key(bulk_insert_attributes, :scheduler_key)
79
+
80
+ # Do a bulk insert. For jobs with an enqueue concurrency key there will be no enqueue
81
+ # as the default for insert_all is to DO NOTHING. An exception would be nice but we are after performance here.
82
+ # Use batches of 500 so that we do not exceed the maximum statement size or do not create a transaction for the
83
+ # insert which times out
84
+ inserted_ids_and_positions = bulk_insert_attributes.each_slice(500).flat_map do |chunk|
85
+ ActiveSupport::Notifications.instrument("insert_all.gouda", {n_rows: chunk.size}) do |payload|
86
+ rows = Gouda::Workload.insert_all(chunk, returning: [:id, :position_in_bulk])
87
+ payload[:inserted_jobs] = rows.length
88
+ payload[:rejected_jobs] = chunk.size - rows.length
89
+ rows
90
+ end
91
+ end
92
+
93
+ # Mark all the jobs we ended up not enqueuing as such. If these jobs are getting enqueued "one by one"
94
+ # then their callbacks have already run, and they are already set to `successfully_enqueued = true`. If
95
+ # they are enqueued using `enqueue_all` directly there are no guarantees, as `enqueue_all` is a fairly new
96
+ # Rails feature. Now is the moment we need to "fish out" our bulk enqueue position and use it to detect
97
+ # which jobs did get enqueued and which didn't. Yes, this is a bit roundabout - but otherwise we could
98
+ # have a unique index and DO NOTHING just on the enqueue concurrency key
99
+ inserted_ids_and_positions.each do |row|
100
+ i = row.fetch("position_in_bulk")
101
+ active_jobs[i].provider_job_id = row.fetch("id")
102
+ active_jobs[i].successfully_enqueued = true
103
+ end
104
+ _, failed_enqueue = active_jobs.partition(&:successfully_enqueued?)
105
+ failed_enqueue.each do |active_job|
106
+ active_job.successfully_enqueued = false
107
+ active_job.enqueue_error = ActiveJob::EnqueueError.new(ENQUEUE_ERROR_MESSAGE)
108
+ end
109
+
110
+ # And return how many jobs we _did_ enqueue
111
+ inserted_ids_and_positions.length
112
+ end
113
+
114
+ # The whole point of Gouda is actually co-committing jobs with the business objects they use. The
115
+ # changes in Rails are directed towards shifting the job enqueues into an after_commit hook, so
116
+ # that the jobs - when they start executing - will always find the committed business-objects in
117
+ # the database. It is their attempt at ensuring read-after-write consistency in the face of two
118
+ # separate data stores. However, with a DB-based job queue which is using the same database
119
+ # as the rest of the application, we actually want the opposite - if a transaction commits,
120
+ # we want it to commit both the jobs to be done on the business objects and the business objects
121
+ # themselves. Folding the job enqueues into the same transaction can also be a great improvement
122
+ # to performance. Some of our jobs also imply that a job was generated as a result of a business
123
+ # model change. With after_commit, there is a subtle race condition where your application may
124
+ # crash between you doing the COMMIT on your transaction and the after_commit hooks executing.
125
+ # We want to avoid this in Gouda and always have a guarantee that if our main models committed,
126
+ # so did the jobs that use them.
127
+ # So: tell ActiveJob that we prefer the jobs to be co-committed.
128
+ #
129
+ # See https://github.com/rails/rails/pull/51426
130
+ def enqueue_after_transaction_commit?
131
+ false
132
+ end
133
+
134
+ private
135
+
136
+ def combine_enqueue_concurrency_key(enqueue_concurrency_key, scheduler_key, cursor_position)
137
+ # We also include the scheduler key into the enqueue key. This is done for the following reasons:
138
+ # Our scheduler always schedules "next subsequent" job once a job completes or fails. If we already have
139
+ # a job scheduled for execution way in the future (say - next month), and the enqueue concurrency key is set,
140
+ # we will need to manually remove it from the queue if we want to run its instance sooner. We could define a
141
+ # unique index on (enqueue_concurrency_key, scheduler_key) - but that would make our enqueue concurrency keys
142
+ # because NULLs in the scheduler_key are not considered equal to each other. We could mofidy our index statement
143
+ # with NULLS NOT DISTINCT - see https://www.postgresql.org/docs/current/indexes-unique.html - but that would
144
+ # create another problem. We want NULLs to _be_ distinct for the enqueue_concurrency_key column, but we want them
145
+ # to _not_ be distinct for the scheduler_key column (one off-scheduler job eneuqued at most for the same
146
+ # scheduler_key value). Postgres does not give us this ability, sadly. So the way to go about it is to
147
+ # mix the scheduler key (name of the scheduled task + cron pattern and whatnot) into the enqueue_concurrency_key
148
+ # value itself - this provides us with all the necessary properties.
149
+ # For job-iteration we need to do the same so that we can have multiple jobs enqueued with the same key but
150
+ # different cursor positions
151
+ [enqueue_concurrency_key, scheduler_key, cursor_position].compact.join(":")
152
+ end
153
+
154
+ def extract_enqueue_concurrency_key(active_job)
155
+ ck_value = active_job.try(:enqueue_concurrency_key)
156
+ return unless ck_value.present?
157
+
158
+ enqueueing_as = active_job.try(:scheduler_key).present? ? "scheduled" : "immediate"
159
+ combine_enqueue_concurrency_key(ck_value, enqueueing_as, active_job.try(:cursor_position))
160
+ end
161
+
162
+ def extract_execution_concurrency_key(active_job)
163
+ active_job.try(:execution_concurrency_key)
164
+ end
165
+
166
+ # Finds all hashes in the given attributes which have the same value of the given attribute and preserves just one
167
+ # in the returned array. We need to do that for both the scheduler key and the enqueue concurrency key.
168
+ def filter_by_unique_not_nil_hash_key(bulk_insert_attributes, key_name)
169
+ # This is not as nice as a combo of partition/unique_by and whatnot but it is linear time, so there.
170
+ seen = Set.new
171
+ bulk_insert_attributes.filter do |item|
172
+ maybe_key = item.fetch(key_name)
173
+ if maybe_key && seen.include?(maybe_key)
174
+ false
175
+ elsif maybe_key
176
+ seen << maybe_key
177
+ true
178
+ else
179
+ true
180
+ end
181
+ end
182
+ end
183
+ end
data/lib/gouda/bulk.rb ADDED
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Gouda
4
+ def self.in_bulk(&blk)
5
+ if Thread.current[:gouda_bulk_buffer].nil?
6
+ Thread.current[:gouda_bulk_buffer] = []
7
+ retval = yield
8
+ buf, Thread.current[:gouda_bulk_buffer] = Thread.current[:gouda_bulk_buffer], nil
9
+ enqueue_jobs_via_their_adapters(buf)
10
+ retval
11
+ else # There already is an open bulk
12
+ yield
13
+ end
14
+ end
15
+
16
+ # This method exists in edge Rails so probably can be replaced later:
17
+ # https://github.com/rails/rails/commit/9b62f88a2fde0d2bf8c4f6e3bcd06ecba7ca9d8d
18
+ def self.enqueue_jobs_via_their_adapters(active_jobs)
19
+ jobs_per_adapter = active_jobs.compact.group_by { |aj| aj.class.queue_adapter }
20
+ jobs_per_adapter.each_pair do |adapter, active_jobs|
21
+ if adapter.respond_to?(:enqueue_all)
22
+ adapter.enqueue_all(active_jobs)
23
+ else
24
+ active_jobs.each { |aj| adapter.enqueue(aj) }
25
+ end
26
+ end
27
+ end
28
+
29
+ module BulkAdapterExtension
30
+ def enqueue_all(active_jobs)
31
+ if Thread.current[:gouda_bulk_buffer]
32
+ Thread.current[:gouda_bulk_buffer].append(*active_jobs)
33
+ active_jobs
34
+ else
35
+ super
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ class Gouda::JobFuse < ActiveRecord::Base
4
+ self.table_name = "gouda_job_fuses"
5
+ self.primary_key = :active_job_class_name
6
+ end
@@ -0,0 +1,5 @@
1
+ class CreateGoudaTables < ActiveRecord::Migration<%= migration_version %>
2
+ def change
3
+ Gouda.create_tables(self)
4
+ end
5
+ end
@@ -0,0 +1,73 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Gouda
4
+ module AnyQueue
5
+ def self.to_sql
6
+ "1=1"
7
+ end
8
+ end
9
+
10
+ class OnlyQueuesConstraint < Struct.new(:queue_names)
11
+ def to_sql
12
+ placeholders = (["?"] * queue_names.length).join(",")
13
+ ActiveRecord::Base.sanitize_sql_array([<<~SQL, *queue_names])
14
+ queue_name IN (#{placeholders})
15
+ SQL
16
+ end
17
+ end
18
+
19
+ class ExceptQueueConstraint < Struct.new(:queue_names)
20
+ def to_sql
21
+ placeholders = (["?"] * queue_names.length).join(",")
22
+ ActiveRecord::Base.sanitize_sql_array([<<~SQL, *queue_names])
23
+ queue_name NOT IN (#{placeholders})
24
+ SQL
25
+ end
26
+ end
27
+
28
+ def self.parse_queue_constraint(constraint_str_from_envvar)
29
+ parsed = queue_parser(constraint_str_from_envvar)
30
+ if parsed[:include]
31
+ OnlyQueuesConstraint.new(parsed[:include])
32
+ elsif parsed[:exclude]
33
+ ExceptQueueConstraint.new(parsed[:exclude])
34
+ else
35
+ AnyQueue
36
+ end
37
+ end
38
+
39
+ # Parse a string representing a group of queues into a more readable data
40
+ # structure.
41
+ # @param string [String] Queue string
42
+ # @return [Hash]
43
+ # How to match a given queue. It can have the following keys and values:
44
+ # - +{ all: true }+ indicates that all queues match.
45
+ # - +{ exclude: Array<String> }+ indicates the listed queue names should
46
+ # not match.
47
+ # - +{ include: Array<String> }+ indicates the listed queue names should
48
+ # match.
49
+ # @example
50
+ # Gouda::QueueConstraints.queue_parser('-queue1,queue2')
51
+ # => { exclude: [ 'queue1', 'queue2' ] }
52
+ def self.queue_parser(string)
53
+ string = string.presence || "*"
54
+
55
+ case string.first
56
+ when "-"
57
+ exclude_queues = true
58
+ string = string[1..]
59
+ when "+"
60
+ string = string[1..]
61
+ end
62
+
63
+ queues = string.split(",").map(&:strip)
64
+
65
+ if queues.include?("*")
66
+ {all: true}
67
+ elsif exclude_queues
68
+ {exclude: queues}
69
+ else
70
+ {include: queues}
71
+ end
72
+ end
73
+ end