gouda 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.github/workflows/ci.yml +36 -0
- data/.gitignore +9 -0
- data/.rubocop.yml +10 -0
- data/.ruby-version +1 -0
- data/CHANGELOG.md +6 -0
- data/Gemfile +8 -0
- data/LICENSE.txt +21 -0
- data/README.md +17 -0
- data/Rakefile +19 -0
- data/gouda.gemspec +32 -0
- data/lib/active_job/queue_adapters/gouda_adapter.rb +8 -0
- data/lib/generators/gouda/install_generator.rb +29 -0
- data/lib/generators/gouda/templates/install/migrations/create_gouda_tables.rb.erb +7 -0
- data/lib/gouda/active_job_extensions/concurrency.rb +70 -0
- data/lib/gouda/active_job_extensions/interrupts.rb +46 -0
- data/lib/gouda/adapter.rb +183 -0
- data/lib/gouda/bulk.rb +39 -0
- data/lib/gouda/job_fuse.rb +6 -0
- data/lib/gouda/migrations/create_gouda_tables.rb.erb +5 -0
- data/lib/gouda/queue_constraints.rb +73 -0
- data/lib/gouda/railtie.rb +57 -0
- data/lib/gouda/scheduler.rb +108 -0
- data/lib/gouda/version.rb +5 -0
- data/lib/gouda/worker.rb +188 -0
- data/lib/gouda/workload.rb +214 -0
- data/lib/gouda.rb +116 -0
- metadata +186 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: f36a7c7b361cb8008f34b4a178c6bf1eb2a358e95502d0550fc14fef78ef5ed7
|
4
|
+
data.tar.gz: 6d2bc5778d6284f212189f4311e575393c5d9c1da13e4618f0b9a5d6ace9e954
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 71a324a3bae3ee17c2ed547915b1cc841442059c30f0c85138f1b220a7c9dc2b307e6d0363fba12069be0b893ce74e1cefe81f5d3fc1ceec427b56a7331be256
|
7
|
+
data.tar.gz: 8e9d521dc92ccd14175611e21535ed9e5b71610e74c22fd4038e11a16c104b9fc73d93431e1e9aae38bb37e739d91447f3739ddd8099083e4d6e821db1426ea5
|
@@ -0,0 +1,36 @@
|
|
1
|
+
name: CI
|
2
|
+
|
3
|
+
on:
|
4
|
+
- push
|
5
|
+
|
6
|
+
env:
|
7
|
+
BUNDLE_PATH: vendor/bundle
|
8
|
+
|
9
|
+
jobs:
|
10
|
+
test:
|
11
|
+
name: Tests
|
12
|
+
runs-on: ubuntu-latest
|
13
|
+
services:
|
14
|
+
postgres:
|
15
|
+
image: postgres:15-alpine
|
16
|
+
env:
|
17
|
+
POSTGRES_PASSWORD: postgres
|
18
|
+
ports:
|
19
|
+
- 5432:5432
|
20
|
+
options: >-
|
21
|
+
--health-cmd pg_isready
|
22
|
+
--health-interval 100ms
|
23
|
+
--health-timeout 1s
|
24
|
+
--health-retries 100
|
25
|
+
|
26
|
+
if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository
|
27
|
+
steps:
|
28
|
+
- name: Checkout
|
29
|
+
uses: actions/checkout@v4
|
30
|
+
- name: Setup Ruby
|
31
|
+
uses: ruby/setup-ruby@v1
|
32
|
+
with:
|
33
|
+
ruby-version: '3.2'
|
34
|
+
bundler-cache: true
|
35
|
+
- name: "Tests and Lint"
|
36
|
+
run: bundle exec rake
|
data/.gitignore
ADDED
data/.rubocop.yml
ADDED
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
3.2.2
|
data/CHANGELOG.md
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2024 Cheddar Payments BV
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
Gouda is an ActiveJob adapter used at Cheddar. It requires PostgreSQL and a recent version of Rails.
|
2
|
+
|
3
|
+
⚠️ At the moment Gouda is only used internally at Cheddar. We do not provide support for it, nor do we accept
|
4
|
+
issues or feature requests. This is likely to change in the future.
|
5
|
+
|
6
|
+
## Installation
|
7
|
+
|
8
|
+
```
|
9
|
+
$ bundle add gouda
|
10
|
+
$ bundle install
|
11
|
+
$ bin/rails g gouda:install
|
12
|
+
```
|
13
|
+
|
14
|
+
## Usage
|
15
|
+
|
16
|
+
At the moment the Gouda UI is proprietary, so this gem only provides a "headless" implementation. We expect this to change in the future.
|
17
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "bundler/gem_tasks"
|
4
|
+
require "rake/testtask"
|
5
|
+
|
6
|
+
Rake::TestTask.new(:test) do |t|
|
7
|
+
t.libs << "test"
|
8
|
+
t.libs << "lib"
|
9
|
+
|
10
|
+
file_name = ARGV[1]
|
11
|
+
|
12
|
+
t.test_files = if file_name
|
13
|
+
[file_name]
|
14
|
+
else
|
15
|
+
FileList["test/**/*_test.rb"]
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
task default: :test
|
data/gouda.gemspec
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
require_relative "lib/gouda/version"
|
2
|
+
|
3
|
+
Gem::Specification.new do |spec|
|
4
|
+
spec.name = "gouda"
|
5
|
+
spec.version = Gouda::VERSION
|
6
|
+
spec.summary = "Job Scheduler"
|
7
|
+
spec.description = "Job Scheduler for Rails"
|
8
|
+
spec.authors = ["Sebastian van Hesteren", "Julik Tarkhanov"]
|
9
|
+
spec.email = ["sebastian@cheddar.me", "me@julik.nl"]
|
10
|
+
spec.homepage = "https://rubygems.org/gems/gouda"
|
11
|
+
spec.license = "MIT"
|
12
|
+
spec.required_ruby_version = Gem::Requirement.new(">= 2.4.0")
|
13
|
+
spec.require_paths = ["lib"]
|
14
|
+
|
15
|
+
spec.metadata["homepage_uri"] = spec.homepage
|
16
|
+
spec.metadata["source_code_uri"] = "https://github.com/cheddar-me/gouda"
|
17
|
+
spec.metadata["changelog_uri"] = "https://github.com/cheddar-me/gouda/CHANGELOG.md"
|
18
|
+
|
19
|
+
spec.files = Dir.chdir(File.expand_path(__dir__)) do
|
20
|
+
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{\A(?:test|spec|features)/}) }
|
21
|
+
end
|
22
|
+
|
23
|
+
spec.add_dependency "activerecord", "~> 7"
|
24
|
+
spec.add_dependency "activesupport", "~> 7"
|
25
|
+
spec.add_dependency "railties", "~> 7"
|
26
|
+
spec.add_dependency "activejob", "~> 7"
|
27
|
+
spec.add_dependency "fugit", "~> 1.10.1"
|
28
|
+
|
29
|
+
spec.add_development_dependency "pg"
|
30
|
+
spec.add_development_dependency "debug"
|
31
|
+
spec.add_development_dependency "pry"
|
32
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "rails/generators"
|
4
|
+
require "rails/generators/active_record"
|
5
|
+
|
6
|
+
module Gouda
|
7
|
+
# Rails generator used for setting up Gouda in a Rails application.
|
8
|
+
# Run it with +bin/rails g gouda:install+ in your console.
|
9
|
+
|
10
|
+
class InstallGenerator < Rails::Generators::Base
|
11
|
+
include ActiveRecord::Generators::Migration
|
12
|
+
|
13
|
+
TEMPLATES = File.join(File.dirname(__FILE__), "templates/install")
|
14
|
+
source_paths << TEMPLATES
|
15
|
+
|
16
|
+
class_option :database, type: :string, aliases: %i[--db], desc: "The database for your migration. By default, the current environment's primary database is used."
|
17
|
+
|
18
|
+
# Generates monolithic migration file that contains all database changes.
|
19
|
+
def create_migration_file
|
20
|
+
migration_template "migrations/create_gouda_tables.rb.erb", File.join(db_migrate_path, "create_gouda_tables.rb")
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
def migration_version
|
26
|
+
"[#{ActiveRecord::VERSION::STRING.to_f}]"
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,70 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Gouda
|
4
|
+
module ActiveJobExtensions
|
5
|
+
module Concurrency
|
6
|
+
extend ActiveSupport::Concern
|
7
|
+
|
8
|
+
VALID_TYPES = [String, Symbol, Numeric, Date, Time, TrueClass, FalseClass, NilClass].freeze
|
9
|
+
|
10
|
+
included do
|
11
|
+
class_attribute :gouda_concurrency_config, instance_accessor: false, default: {}
|
12
|
+
end
|
13
|
+
|
14
|
+
class_methods do
|
15
|
+
def gouda_control_concurrency_with(total_limit: nil, perform_limit: nil, enqueue_limit: nil, key: nil)
|
16
|
+
raise ArgumentError, "Need one of total_limit, perform_limit, enqueue_limit" if [total_limit, perform_limit, enqueue_limit].all?(&:blank?)
|
17
|
+
raise ArgumentError, "The only available limit is 1" if [total_limit, perform_limit, enqueue_limit].any? { |v| v.is_a?(Integer) && v != 1 }
|
18
|
+
|
19
|
+
if total_limit
|
20
|
+
perform_limit = total_limit
|
21
|
+
enqueue_limit = total_limit
|
22
|
+
end
|
23
|
+
|
24
|
+
self.gouda_concurrency_config = {perform_limit:, enqueue_limit:, key:}
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
# This method will be tried by the Gouda adapter
|
29
|
+
def enqueue_concurrency_key
|
30
|
+
job_config = self.class.try(:gouda_concurrency_config)
|
31
|
+
return unless job_config
|
32
|
+
return unless job_config[:enqueue_limit]
|
33
|
+
|
34
|
+
_gouda_concurrency_extension_key_via_config || _gouda_concurrency_extension_automatic_key_from_class_and_args
|
35
|
+
end
|
36
|
+
|
37
|
+
# This method will be tried by the Gouda adapter
|
38
|
+
def execution_concurrency_key
|
39
|
+
job_config = self.class.try(:gouda_concurrency_config)
|
40
|
+
return unless job_config
|
41
|
+
return unless job_config[:perform_limit]
|
42
|
+
|
43
|
+
_gouda_concurrency_extension_key_via_config || _gouda_concurrency_extension_automatic_key_from_class_and_args
|
44
|
+
end
|
45
|
+
|
46
|
+
# Generates automatic serialized sha1 key
|
47
|
+
def _gouda_concurrency_extension_automatic_key_from_class_and_args
|
48
|
+
# To have a stable serialization of an ActiveJob we can re-use the method defined by
|
49
|
+
# ActiveJob itself. We need to have the job class name and all the arguments, and for arguments
|
50
|
+
# which are ActiveRecords or derivatives - we want them converted into global IDs. This also avoids
|
51
|
+
# having attributes of the argument ActiveModels contribute to the concurrency key.
|
52
|
+
# Add "cursor_position" from job-iteration so that different offsets of the same job can run
|
53
|
+
# concurrently.
|
54
|
+
pertinent_job_attributes = serialize.slice("job_class", "arguments", "priority", "cursor_position")
|
55
|
+
Digest::SHA1.hexdigest(JSON.dump(pertinent_job_attributes))
|
56
|
+
end
|
57
|
+
|
58
|
+
# Generates the concurrency key from the configuration
|
59
|
+
def _gouda_concurrency_extension_key_via_config
|
60
|
+
key = self.class.gouda_concurrency_config[:key]
|
61
|
+
return if key.blank?
|
62
|
+
|
63
|
+
key = key.respond_to?(:call) ? instance_exec(&key) : key
|
64
|
+
raise TypeError, "Concurrency key must be a String; was a #{key.class}" unless VALID_TYPES.any? { |type| key.is_a?(type) }
|
65
|
+
|
66
|
+
key
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Gouda
|
4
|
+
module ActiveJobExtensions
|
5
|
+
module Interrupts
|
6
|
+
extend ActiveSupport::Concern
|
7
|
+
|
8
|
+
included do
|
9
|
+
around_perform do |job, block|
|
10
|
+
# The @gouda_workload_interrupted_at ivar gets set on the job when the Workload
|
11
|
+
# gets reaped as a zombie. It contains the last know heartbeat of the job, assuming
|
12
|
+
# that it got interrupted around that particular time. The ivar gets persisted not
|
13
|
+
# into the original Workload (which gets marked "finished") but into the new Workload
|
14
|
+
# which the reap_zombie_workloads method enqueues.
|
15
|
+
if job.interrupted_at
|
16
|
+
Gouda.logger.warn { "Job: #{job.class.name} #{job.job_id} was previously interrupted" }
|
17
|
+
# The job is going to be re-enqueued it InterruptError is marked as retriable. We need
|
18
|
+
# to remove `interrupted_at` otherwise it will get raised again once that new job
|
19
|
+
# starts executing - which is not what we want
|
20
|
+
interrupted_error_time = job.interrupted_at
|
21
|
+
job.interrupted_at = nil
|
22
|
+
|
23
|
+
raise Gouda::InterruptError, "Job was interrupted around #{interrupted_error_time}"
|
24
|
+
end
|
25
|
+
block.call
|
26
|
+
end
|
27
|
+
|
28
|
+
# This overrides ActiveJob::Base to also set the "interrupted_at" value, which Gouda
|
29
|
+
# supplies in the active_job_data hash. The value is needed so that the job can correctly
|
30
|
+
# raise an InterruptError after an interruption, and we have to do it here so that we can
|
31
|
+
# still use ActiveJob::Base.execute, which Appsignal overloads.
|
32
|
+
# We also need to retain the scheduler_key value so that retries which ActiveJob does for us
|
33
|
+
# preserve that value when remarshaling the job
|
34
|
+
def self.deserialize(active_job_data)
|
35
|
+
super.tap do |job|
|
36
|
+
job.interrupted_at = active_job_data["interrupted_at"]
|
37
|
+
job.scheduler_key = active_job_data["scheduler_key"]
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
attr_accessor :interrupted_at
|
42
|
+
attr_accessor :scheduler_key
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,183 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Acts as an ActiveJob adapter
|
4
|
+
|
5
|
+
class Gouda::Adapter
|
6
|
+
prepend Gouda::BulkAdapterExtension
|
7
|
+
|
8
|
+
ENQUEUE_ERROR_MESSAGE = <<~ERR
|
9
|
+
The job has been rejected due to a matching enqueue concurrency key
|
10
|
+
ERR
|
11
|
+
|
12
|
+
# Enqueues the ActiveJob job to be performed.
|
13
|
+
# For use by Rails; you should generally not call this directly.
|
14
|
+
# @param active_job [ActiveJob::Base] the job to be enqueued from +#perform_later+
|
15
|
+
# @return [String, nil] the ID of the inserted workload or nil if the insert did not go through (due to concurrency)
|
16
|
+
def enqueue(active_job)
|
17
|
+
# This is the method that gets called by ActiveJob internally (from inside the ActiveJob::Base instance
|
18
|
+
# method). This is also when ActiveJob runs the enqueue callbacks. After this method returns
|
19
|
+
# ActiveJob will set @successfully_enqueued inside the job to `true` as long as no
|
20
|
+
# EnqueueError has been raised. This is, of course, incompatible with bulk-enqueueing (which we want)
|
21
|
+
# to use by default. What we can do is verify the value of the property set by our `enqueue_all` method,
|
22
|
+
# and raise the exception based on that.
|
23
|
+
enqueue_all([active_job])
|
24
|
+
if active_job.enqueue_error
|
25
|
+
Gouda.logger.warn { "Error #{active_job.enqueue_error.inspect} for Gouda workload (#{active_job.job_id})" }
|
26
|
+
raise active_job.enqueue_error
|
27
|
+
end
|
28
|
+
active_job.provider_job_id
|
29
|
+
end
|
30
|
+
|
31
|
+
# Enqueues an ActiveJob job to be run at a specific time.
|
32
|
+
# For use by Rails; you should generally not call this directly.
|
33
|
+
# @param active_job [ActiveJob::Base] the job to be enqueued from +#perform_later+
|
34
|
+
# @param timestamp [Integer, nil] the epoch time to perform the job
|
35
|
+
# @return [String, nil] the ID of the inserted Gouda or nil if the insert did not go through (due to concurrency)
|
36
|
+
def enqueue_at(active_job, timestamp_int)
|
37
|
+
active_job.scheduled_at = Time.at(timestamp_int).utc
|
38
|
+
enqueue_all([active_job])
|
39
|
+
if active_job.enqueue_error
|
40
|
+
Gouda.logger.warn { "Error #{active_job.enqueue_error.inspect} for Gouda workload (#{active_job.job_id})" }
|
41
|
+
raise active_job.enqueue_error
|
42
|
+
end
|
43
|
+
active_job.provider_job_id
|
44
|
+
end
|
45
|
+
|
46
|
+
# Enqueues multiple ActiveJobs.
|
47
|
+
# For use by Rails; you should generally not call this directly.
|
48
|
+
# @param active_job [ActiveJob::Base] the job to be enqueued from +#perform_later+
|
49
|
+
# @param timestamp [Integer, nil] the epoch time to perform the job
|
50
|
+
# @return [Integer] the number of jobs which were successfully sent to the queue
|
51
|
+
def enqueue_all(active_jobs)
|
52
|
+
t_now = Time.now.utc
|
53
|
+
bulk_insert_attributes = active_jobs.map.with_index do |active_job, i|
|
54
|
+
# We can't pregenerate an ID because we want to INSERT .. ON CONFLICT DO NOTHING
|
55
|
+
# and we want Postgres to use _all_ unique indexes for it, which would include a conflict of IDs -
|
56
|
+
# so some jobs could get silently rejected because of a duplicate ID. However unlikely this can better be prevented.
|
57
|
+
# We can't tell Postgres to ignore conflicts on _both_ the scheduler key and the enqueue concurrency key but not on
|
58
|
+
# the ID - it is either "all indexes" or "just one", but never "this index and that index". MERGE https://www.postgresql.org/docs/current/sql-merge.html
|
59
|
+
# is in theory capable of solving this but let's not complicate things all to hastily, the hour is getting late
|
60
|
+
{
|
61
|
+
active_job_id: active_job.job_id, # Multiple jobs can have the same ID due to retries, job-iteration etc.
|
62
|
+
scheduled_at: active_job.scheduled_at || t_now,
|
63
|
+
scheduler_key: active_job.scheduler_key, # So that the scheduler_key gets retained between retries
|
64
|
+
priority: active_job.priority,
|
65
|
+
execution_concurrency_key: extract_execution_concurrency_key(active_job),
|
66
|
+
enqueue_concurrency_key: extract_enqueue_concurrency_key(active_job),
|
67
|
+
queue_name: active_job.queue_name || "default",
|
68
|
+
active_job_class_name: active_job.class.to_s,
|
69
|
+
serialized_params: active_job.serialize.except("provider_job_id"), # For when a job which gets retried
|
70
|
+
interrupted_at: active_job.interrupted_at, # So that an exception can be raised when this job gets executed
|
71
|
+
position_in_bulk: i,
|
72
|
+
state: "enqueued"
|
73
|
+
}
|
74
|
+
end
|
75
|
+
|
76
|
+
# Filter out all the jobs with the same (and present) concurrency key and scheduler key
|
77
|
+
bulk_insert_attributes = filter_by_unique_not_nil_hash_key(bulk_insert_attributes, :enqueue_concurrency_key)
|
78
|
+
bulk_insert_attributes = filter_by_unique_not_nil_hash_key(bulk_insert_attributes, :scheduler_key)
|
79
|
+
|
80
|
+
# Do a bulk insert. For jobs with an enqueue concurrency key there will be no enqueue
|
81
|
+
# as the default for insert_all is to DO NOTHING. An exception would be nice but we are after performance here.
|
82
|
+
# Use batches of 500 so that we do not exceed the maximum statement size or do not create a transaction for the
|
83
|
+
# insert which times out
|
84
|
+
inserted_ids_and_positions = bulk_insert_attributes.each_slice(500).flat_map do |chunk|
|
85
|
+
ActiveSupport::Notifications.instrument("insert_all.gouda", {n_rows: chunk.size}) do |payload|
|
86
|
+
rows = Gouda::Workload.insert_all(chunk, returning: [:id, :position_in_bulk])
|
87
|
+
payload[:inserted_jobs] = rows.length
|
88
|
+
payload[:rejected_jobs] = chunk.size - rows.length
|
89
|
+
rows
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
# Mark all the jobs we ended up not enqueuing as such. If these jobs are getting enqueued "one by one"
|
94
|
+
# then their callbacks have already run, and they are already set to `successfully_enqueued = true`. If
|
95
|
+
# they are enqueued using `enqueue_all` directly there are no guarantees, as `enqueue_all` is a fairly new
|
96
|
+
# Rails feature. Now is the moment we need to "fish out" our bulk enqueue position and use it to detect
|
97
|
+
# which jobs did get enqueued and which didn't. Yes, this is a bit roundabout - but otherwise we could
|
98
|
+
# have a unique index and DO NOTHING just on the enqueue concurrency key
|
99
|
+
inserted_ids_and_positions.each do |row|
|
100
|
+
i = row.fetch("position_in_bulk")
|
101
|
+
active_jobs[i].provider_job_id = row.fetch("id")
|
102
|
+
active_jobs[i].successfully_enqueued = true
|
103
|
+
end
|
104
|
+
_, failed_enqueue = active_jobs.partition(&:successfully_enqueued?)
|
105
|
+
failed_enqueue.each do |active_job|
|
106
|
+
active_job.successfully_enqueued = false
|
107
|
+
active_job.enqueue_error = ActiveJob::EnqueueError.new(ENQUEUE_ERROR_MESSAGE)
|
108
|
+
end
|
109
|
+
|
110
|
+
# And return how many jobs we _did_ enqueue
|
111
|
+
inserted_ids_and_positions.length
|
112
|
+
end
|
113
|
+
|
114
|
+
# The whole point of Gouda is actually co-committing jobs with the business objects they use. The
|
115
|
+
# changes in Rails are directed towards shifting the job enqueues into an after_commit hook, so
|
116
|
+
# that the jobs - when they start executing - will always find the committed business-objects in
|
117
|
+
# the database. It is their attempt at ensuring read-after-write consistency in the face of two
|
118
|
+
# separate data stores. However, with a DB-based job queue which is using the same database
|
119
|
+
# as the rest of the application, we actually want the opposite - if a transaction commits,
|
120
|
+
# we want it to commit both the jobs to be done on the business objects and the business objects
|
121
|
+
# themselves. Folding the job enqueues into the same transaction can also be a great improvement
|
122
|
+
# to performance. Some of our jobs also imply that a job was generated as a result of a business
|
123
|
+
# model change. With after_commit, there is a subtle race condition where your application may
|
124
|
+
# crash between you doing the COMMIT on your transaction and the after_commit hooks executing.
|
125
|
+
# We want to avoid this in Gouda and always have a guarantee that if our main models committed,
|
126
|
+
# so did the jobs that use them.
|
127
|
+
# So: tell ActiveJob that we prefer the jobs to be co-committed.
|
128
|
+
#
|
129
|
+
# See https://github.com/rails/rails/pull/51426
|
130
|
+
def enqueue_after_transaction_commit?
|
131
|
+
false
|
132
|
+
end
|
133
|
+
|
134
|
+
private
|
135
|
+
|
136
|
+
def combine_enqueue_concurrency_key(enqueue_concurrency_key, scheduler_key, cursor_position)
|
137
|
+
# We also include the scheduler key into the enqueue key. This is done for the following reasons:
|
138
|
+
# Our scheduler always schedules "next subsequent" job once a job completes or fails. If we already have
|
139
|
+
# a job scheduled for execution way in the future (say - next month), and the enqueue concurrency key is set,
|
140
|
+
# we will need to manually remove it from the queue if we want to run its instance sooner. We could define a
|
141
|
+
# unique index on (enqueue_concurrency_key, scheduler_key) - but that would make our enqueue concurrency keys
|
142
|
+
# because NULLs in the scheduler_key are not considered equal to each other. We could mofidy our index statement
|
143
|
+
# with NULLS NOT DISTINCT - see https://www.postgresql.org/docs/current/indexes-unique.html - but that would
|
144
|
+
# create another problem. We want NULLs to _be_ distinct for the enqueue_concurrency_key column, but we want them
|
145
|
+
# to _not_ be distinct for the scheduler_key column (one off-scheduler job eneuqued at most for the same
|
146
|
+
# scheduler_key value). Postgres does not give us this ability, sadly. So the way to go about it is to
|
147
|
+
# mix the scheduler key (name of the scheduled task + cron pattern and whatnot) into the enqueue_concurrency_key
|
148
|
+
# value itself - this provides us with all the necessary properties.
|
149
|
+
# For job-iteration we need to do the same so that we can have multiple jobs enqueued with the same key but
|
150
|
+
# different cursor positions
|
151
|
+
[enqueue_concurrency_key, scheduler_key, cursor_position].compact.join(":")
|
152
|
+
end
|
153
|
+
|
154
|
+
def extract_enqueue_concurrency_key(active_job)
|
155
|
+
ck_value = active_job.try(:enqueue_concurrency_key)
|
156
|
+
return unless ck_value.present?
|
157
|
+
|
158
|
+
enqueueing_as = active_job.try(:scheduler_key).present? ? "scheduled" : "immediate"
|
159
|
+
combine_enqueue_concurrency_key(ck_value, enqueueing_as, active_job.try(:cursor_position))
|
160
|
+
end
|
161
|
+
|
162
|
+
def extract_execution_concurrency_key(active_job)
|
163
|
+
active_job.try(:execution_concurrency_key)
|
164
|
+
end
|
165
|
+
|
166
|
+
# Finds all hashes in the given attributes which have the same value of the given attribute and preserves just one
|
167
|
+
# in the returned array. We need to do that for both the scheduler key and the enqueue concurrency key.
|
168
|
+
def filter_by_unique_not_nil_hash_key(bulk_insert_attributes, key_name)
|
169
|
+
# This is not as nice as a combo of partition/unique_by and whatnot but it is linear time, so there.
|
170
|
+
seen = Set.new
|
171
|
+
bulk_insert_attributes.filter do |item|
|
172
|
+
maybe_key = item.fetch(key_name)
|
173
|
+
if maybe_key && seen.include?(maybe_key)
|
174
|
+
false
|
175
|
+
elsif maybe_key
|
176
|
+
seen << maybe_key
|
177
|
+
true
|
178
|
+
else
|
179
|
+
true
|
180
|
+
end
|
181
|
+
end
|
182
|
+
end
|
183
|
+
end
|
data/lib/gouda/bulk.rb
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Gouda
|
4
|
+
def self.in_bulk(&blk)
|
5
|
+
if Thread.current[:gouda_bulk_buffer].nil?
|
6
|
+
Thread.current[:gouda_bulk_buffer] = []
|
7
|
+
retval = yield
|
8
|
+
buf, Thread.current[:gouda_bulk_buffer] = Thread.current[:gouda_bulk_buffer], nil
|
9
|
+
enqueue_jobs_via_their_adapters(buf)
|
10
|
+
retval
|
11
|
+
else # There already is an open bulk
|
12
|
+
yield
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
# This method exists in edge Rails so probably can be replaced later:
|
17
|
+
# https://github.com/rails/rails/commit/9b62f88a2fde0d2bf8c4f6e3bcd06ecba7ca9d8d
|
18
|
+
def self.enqueue_jobs_via_their_adapters(active_jobs)
|
19
|
+
jobs_per_adapter = active_jobs.compact.group_by { |aj| aj.class.queue_adapter }
|
20
|
+
jobs_per_adapter.each_pair do |adapter, active_jobs|
|
21
|
+
if adapter.respond_to?(:enqueue_all)
|
22
|
+
adapter.enqueue_all(active_jobs)
|
23
|
+
else
|
24
|
+
active_jobs.each { |aj| adapter.enqueue(aj) }
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
module BulkAdapterExtension
|
30
|
+
def enqueue_all(active_jobs)
|
31
|
+
if Thread.current[:gouda_bulk_buffer]
|
32
|
+
Thread.current[:gouda_bulk_buffer].append(*active_jobs)
|
33
|
+
active_jobs
|
34
|
+
else
|
35
|
+
super
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,73 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Gouda
|
4
|
+
module AnyQueue
|
5
|
+
def self.to_sql
|
6
|
+
"1=1"
|
7
|
+
end
|
8
|
+
end
|
9
|
+
|
10
|
+
class OnlyQueuesConstraint < Struct.new(:queue_names)
|
11
|
+
def to_sql
|
12
|
+
placeholders = (["?"] * queue_names.length).join(",")
|
13
|
+
ActiveRecord::Base.sanitize_sql_array([<<~SQL, *queue_names])
|
14
|
+
queue_name IN (#{placeholders})
|
15
|
+
SQL
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
class ExceptQueueConstraint < Struct.new(:queue_names)
|
20
|
+
def to_sql
|
21
|
+
placeholders = (["?"] * queue_names.length).join(",")
|
22
|
+
ActiveRecord::Base.sanitize_sql_array([<<~SQL, *queue_names])
|
23
|
+
queue_name NOT IN (#{placeholders})
|
24
|
+
SQL
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def self.parse_queue_constraint(constraint_str_from_envvar)
|
29
|
+
parsed = queue_parser(constraint_str_from_envvar)
|
30
|
+
if parsed[:include]
|
31
|
+
OnlyQueuesConstraint.new(parsed[:include])
|
32
|
+
elsif parsed[:exclude]
|
33
|
+
ExceptQueueConstraint.new(parsed[:exclude])
|
34
|
+
else
|
35
|
+
AnyQueue
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
# Parse a string representing a group of queues into a more readable data
|
40
|
+
# structure.
|
41
|
+
# @param string [String] Queue string
|
42
|
+
# @return [Hash]
|
43
|
+
# How to match a given queue. It can have the following keys and values:
|
44
|
+
# - +{ all: true }+ indicates that all queues match.
|
45
|
+
# - +{ exclude: Array<String> }+ indicates the listed queue names should
|
46
|
+
# not match.
|
47
|
+
# - +{ include: Array<String> }+ indicates the listed queue names should
|
48
|
+
# match.
|
49
|
+
# @example
|
50
|
+
# Gouda::QueueConstraints.queue_parser('-queue1,queue2')
|
51
|
+
# => { exclude: [ 'queue1', 'queue2' ] }
|
52
|
+
def self.queue_parser(string)
|
53
|
+
string = string.presence || "*"
|
54
|
+
|
55
|
+
case string.first
|
56
|
+
when "-"
|
57
|
+
exclude_queues = true
|
58
|
+
string = string[1..]
|
59
|
+
when "+"
|
60
|
+
string = string[1..]
|
61
|
+
end
|
62
|
+
|
63
|
+
queues = string.split(",").map(&:strip)
|
64
|
+
|
65
|
+
if queues.include?("*")
|
66
|
+
{all: true}
|
67
|
+
elsif exclude_queues
|
68
|
+
{exclude: queues}
|
69
|
+
else
|
70
|
+
{include: queues}
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|