gouda 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.github/workflows/ci.yml +36 -0
- data/.gitignore +9 -0
- data/.rubocop.yml +10 -0
- data/.ruby-version +1 -0
- data/CHANGELOG.md +6 -0
- data/Gemfile +8 -0
- data/LICENSE.txt +21 -0
- data/README.md +17 -0
- data/Rakefile +19 -0
- data/gouda.gemspec +32 -0
- data/lib/active_job/queue_adapters/gouda_adapter.rb +8 -0
- data/lib/generators/gouda/install_generator.rb +29 -0
- data/lib/generators/gouda/templates/install/migrations/create_gouda_tables.rb.erb +7 -0
- data/lib/gouda/active_job_extensions/concurrency.rb +70 -0
- data/lib/gouda/active_job_extensions/interrupts.rb +46 -0
- data/lib/gouda/adapter.rb +183 -0
- data/lib/gouda/bulk.rb +39 -0
- data/lib/gouda/job_fuse.rb +6 -0
- data/lib/gouda/migrations/create_gouda_tables.rb.erb +5 -0
- data/lib/gouda/queue_constraints.rb +73 -0
- data/lib/gouda/railtie.rb +57 -0
- data/lib/gouda/scheduler.rb +108 -0
- data/lib/gouda/version.rb +5 -0
- data/lib/gouda/worker.rb +188 -0
- data/lib/gouda/workload.rb +214 -0
- data/lib/gouda.rb +116 -0
- metadata +186 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: f36a7c7b361cb8008f34b4a178c6bf1eb2a358e95502d0550fc14fef78ef5ed7
|
4
|
+
data.tar.gz: 6d2bc5778d6284f212189f4311e575393c5d9c1da13e4618f0b9a5d6ace9e954
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 71a324a3bae3ee17c2ed547915b1cc841442059c30f0c85138f1b220a7c9dc2b307e6d0363fba12069be0b893ce74e1cefe81f5d3fc1ceec427b56a7331be256
|
7
|
+
data.tar.gz: 8e9d521dc92ccd14175611e21535ed9e5b71610e74c22fd4038e11a16c104b9fc73d93431e1e9aae38bb37e739d91447f3739ddd8099083e4d6e821db1426ea5
|
@@ -0,0 +1,36 @@
|
|
1
|
+
name: CI
|
2
|
+
|
3
|
+
on:
|
4
|
+
- push
|
5
|
+
|
6
|
+
env:
|
7
|
+
BUNDLE_PATH: vendor/bundle
|
8
|
+
|
9
|
+
jobs:
|
10
|
+
test:
|
11
|
+
name: Tests
|
12
|
+
runs-on: ubuntu-latest
|
13
|
+
services:
|
14
|
+
postgres:
|
15
|
+
image: postgres:15-alpine
|
16
|
+
env:
|
17
|
+
POSTGRES_PASSWORD: postgres
|
18
|
+
ports:
|
19
|
+
- 5432:5432
|
20
|
+
options: >-
|
21
|
+
--health-cmd pg_isready
|
22
|
+
--health-interval 100ms
|
23
|
+
--health-timeout 1s
|
24
|
+
--health-retries 100
|
25
|
+
|
26
|
+
if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository
|
27
|
+
steps:
|
28
|
+
- name: Checkout
|
29
|
+
uses: actions/checkout@v4
|
30
|
+
- name: Setup Ruby
|
31
|
+
uses: ruby/setup-ruby@v1
|
32
|
+
with:
|
33
|
+
ruby-version: '3.2'
|
34
|
+
bundler-cache: true
|
35
|
+
- name: "Tests and Lint"
|
36
|
+
run: bundle exec rake
|
data/.gitignore
ADDED
data/.rubocop.yml
ADDED
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
3.2.2
|
data/CHANGELOG.md
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2024 Cheddar Payments BV
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
Gouda is an ActiveJob adapter used at Cheddar. It requires PostgreSQL and a recent version of Rails.
|
2
|
+
|
3
|
+
⚠️ At the moment Gouda is only used internally at Cheddar. We do not provide support for it, nor do we accept
|
4
|
+
issues or feature requests. This is likely to change in the future.
|
5
|
+
|
6
|
+
## Installation
|
7
|
+
|
8
|
+
```
|
9
|
+
$ bundle add gouda
|
10
|
+
$ bundle install
|
11
|
+
$ bin/rails g gouda:install
|
12
|
+
```
|
13
|
+
|
14
|
+
## Usage
|
15
|
+
|
16
|
+
At the moment the Gouda UI is proprietary, so this gem only provides a "headless" implementation. We expect this to change in the future.
|
17
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "bundler/gem_tasks"
|
4
|
+
require "rake/testtask"
|
5
|
+
|
6
|
+
Rake::TestTask.new(:test) do |t|
|
7
|
+
t.libs << "test"
|
8
|
+
t.libs << "lib"
|
9
|
+
|
10
|
+
file_name = ARGV[1]
|
11
|
+
|
12
|
+
t.test_files = if file_name
|
13
|
+
[file_name]
|
14
|
+
else
|
15
|
+
FileList["test/**/*_test.rb"]
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
task default: :test
|
data/gouda.gemspec
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
require_relative "lib/gouda/version"
|
2
|
+
|
3
|
+
Gem::Specification.new do |spec|
|
4
|
+
spec.name = "gouda"
|
5
|
+
spec.version = Gouda::VERSION
|
6
|
+
spec.summary = "Job Scheduler"
|
7
|
+
spec.description = "Job Scheduler for Rails"
|
8
|
+
spec.authors = ["Sebastian van Hesteren", "Julik Tarkhanov"]
|
9
|
+
spec.email = ["sebastian@cheddar.me", "me@julik.nl"]
|
10
|
+
spec.homepage = "https://rubygems.org/gems/gouda"
|
11
|
+
spec.license = "MIT"
|
12
|
+
spec.required_ruby_version = Gem::Requirement.new(">= 2.4.0")
|
13
|
+
spec.require_paths = ["lib"]
|
14
|
+
|
15
|
+
spec.metadata["homepage_uri"] = spec.homepage
|
16
|
+
spec.metadata["source_code_uri"] = "https://github.com/cheddar-me/gouda"
|
17
|
+
spec.metadata["changelog_uri"] = "https://github.com/cheddar-me/gouda/CHANGELOG.md"
|
18
|
+
|
19
|
+
spec.files = Dir.chdir(File.expand_path(__dir__)) do
|
20
|
+
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{\A(?:test|spec|features)/}) }
|
21
|
+
end
|
22
|
+
|
23
|
+
spec.add_dependency "activerecord", "~> 7"
|
24
|
+
spec.add_dependency "activesupport", "~> 7"
|
25
|
+
spec.add_dependency "railties", "~> 7"
|
26
|
+
spec.add_dependency "activejob", "~> 7"
|
27
|
+
spec.add_dependency "fugit", "~> 1.10.1"
|
28
|
+
|
29
|
+
spec.add_development_dependency "pg"
|
30
|
+
spec.add_development_dependency "debug"
|
31
|
+
spec.add_development_dependency "pry"
|
32
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "rails/generators"
|
4
|
+
require "rails/generators/active_record"
|
5
|
+
|
6
|
+
module Gouda
|
7
|
+
# Rails generator used for setting up Gouda in a Rails application.
|
8
|
+
# Run it with +bin/rails g gouda:install+ in your console.
|
9
|
+
|
10
|
+
class InstallGenerator < Rails::Generators::Base
|
11
|
+
include ActiveRecord::Generators::Migration
|
12
|
+
|
13
|
+
TEMPLATES = File.join(File.dirname(__FILE__), "templates/install")
|
14
|
+
source_paths << TEMPLATES
|
15
|
+
|
16
|
+
class_option :database, type: :string, aliases: %i[--db], desc: "The database for your migration. By default, the current environment's primary database is used."
|
17
|
+
|
18
|
+
# Generates monolithic migration file that contains all database changes.
|
19
|
+
def create_migration_file
|
20
|
+
migration_template "migrations/create_gouda_tables.rb.erb", File.join(db_migrate_path, "create_gouda_tables.rb")
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
|
25
|
+
def migration_version
|
26
|
+
"[#{ActiveRecord::VERSION::STRING.to_f}]"
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,70 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Gouda
|
4
|
+
module ActiveJobExtensions
|
5
|
+
module Concurrency
|
6
|
+
extend ActiveSupport::Concern
|
7
|
+
|
8
|
+
VALID_TYPES = [String, Symbol, Numeric, Date, Time, TrueClass, FalseClass, NilClass].freeze
|
9
|
+
|
10
|
+
included do
|
11
|
+
class_attribute :gouda_concurrency_config, instance_accessor: false, default: {}
|
12
|
+
end
|
13
|
+
|
14
|
+
class_methods do
|
15
|
+
def gouda_control_concurrency_with(total_limit: nil, perform_limit: nil, enqueue_limit: nil, key: nil)
|
16
|
+
raise ArgumentError, "Need one of total_limit, perform_limit, enqueue_limit" if [total_limit, perform_limit, enqueue_limit].all?(&:blank?)
|
17
|
+
raise ArgumentError, "The only available limit is 1" if [total_limit, perform_limit, enqueue_limit].any? { |v| v.is_a?(Integer) && v != 1 }
|
18
|
+
|
19
|
+
if total_limit
|
20
|
+
perform_limit = total_limit
|
21
|
+
enqueue_limit = total_limit
|
22
|
+
end
|
23
|
+
|
24
|
+
self.gouda_concurrency_config = {perform_limit:, enqueue_limit:, key:}
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
# This method will be tried by the Gouda adapter
|
29
|
+
def enqueue_concurrency_key
|
30
|
+
job_config = self.class.try(:gouda_concurrency_config)
|
31
|
+
return unless job_config
|
32
|
+
return unless job_config[:enqueue_limit]
|
33
|
+
|
34
|
+
_gouda_concurrency_extension_key_via_config || _gouda_concurrency_extension_automatic_key_from_class_and_args
|
35
|
+
end
|
36
|
+
|
37
|
+
# This method will be tried by the Gouda adapter
|
38
|
+
def execution_concurrency_key
|
39
|
+
job_config = self.class.try(:gouda_concurrency_config)
|
40
|
+
return unless job_config
|
41
|
+
return unless job_config[:perform_limit]
|
42
|
+
|
43
|
+
_gouda_concurrency_extension_key_via_config || _gouda_concurrency_extension_automatic_key_from_class_and_args
|
44
|
+
end
|
45
|
+
|
46
|
+
# Generates automatic serialized sha1 key
|
47
|
+
def _gouda_concurrency_extension_automatic_key_from_class_and_args
|
48
|
+
# To have a stable serialization of an ActiveJob we can re-use the method defined by
|
49
|
+
# ActiveJob itself. We need to have the job class name and all the arguments, and for arguments
|
50
|
+
# which are ActiveRecords or derivatives - we want them converted into global IDs. This also avoids
|
51
|
+
# having attributes of the argument ActiveModels contribute to the concurrency key.
|
52
|
+
# Add "cursor_position" from job-iteration so that different offsets of the same job can run
|
53
|
+
# concurrently.
|
54
|
+
pertinent_job_attributes = serialize.slice("job_class", "arguments", "priority", "cursor_position")
|
55
|
+
Digest::SHA1.hexdigest(JSON.dump(pertinent_job_attributes))
|
56
|
+
end
|
57
|
+
|
58
|
+
# Generates the concurrency key from the configuration
|
59
|
+
def _gouda_concurrency_extension_key_via_config
|
60
|
+
key = self.class.gouda_concurrency_config[:key]
|
61
|
+
return if key.blank?
|
62
|
+
|
63
|
+
key = key.respond_to?(:call) ? instance_exec(&key) : key
|
64
|
+
raise TypeError, "Concurrency key must be a String; was a #{key.class}" unless VALID_TYPES.any? { |type| key.is_a?(type) }
|
65
|
+
|
66
|
+
key
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Gouda
|
4
|
+
module ActiveJobExtensions
|
5
|
+
module Interrupts
|
6
|
+
extend ActiveSupport::Concern
|
7
|
+
|
8
|
+
included do
|
9
|
+
around_perform do |job, block|
|
10
|
+
# The @gouda_workload_interrupted_at ivar gets set on the job when the Workload
|
11
|
+
# gets reaped as a zombie. It contains the last know heartbeat of the job, assuming
|
12
|
+
# that it got interrupted around that particular time. The ivar gets persisted not
|
13
|
+
# into the original Workload (which gets marked "finished") but into the new Workload
|
14
|
+
# which the reap_zombie_workloads method enqueues.
|
15
|
+
if job.interrupted_at
|
16
|
+
Gouda.logger.warn { "Job: #{job.class.name} #{job.job_id} was previously interrupted" }
|
17
|
+
# The job is going to be re-enqueued it InterruptError is marked as retriable. We need
|
18
|
+
# to remove `interrupted_at` otherwise it will get raised again once that new job
|
19
|
+
# starts executing - which is not what we want
|
20
|
+
interrupted_error_time = job.interrupted_at
|
21
|
+
job.interrupted_at = nil
|
22
|
+
|
23
|
+
raise Gouda::InterruptError, "Job was interrupted around #{interrupted_error_time}"
|
24
|
+
end
|
25
|
+
block.call
|
26
|
+
end
|
27
|
+
|
28
|
+
# This overrides ActiveJob::Base to also set the "interrupted_at" value, which Gouda
|
29
|
+
# supplies in the active_job_data hash. The value is needed so that the job can correctly
|
30
|
+
# raise an InterruptError after an interruption, and we have to do it here so that we can
|
31
|
+
# still use ActiveJob::Base.execute, which Appsignal overloads.
|
32
|
+
# We also need to retain the scheduler_key value so that retries which ActiveJob does for us
|
33
|
+
# preserve that value when remarshaling the job
|
34
|
+
def self.deserialize(active_job_data)
|
35
|
+
super.tap do |job|
|
36
|
+
job.interrupted_at = active_job_data["interrupted_at"]
|
37
|
+
job.scheduler_key = active_job_data["scheduler_key"]
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
attr_accessor :interrupted_at
|
42
|
+
attr_accessor :scheduler_key
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,183 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Acts as an ActiveJob adapter
|
4
|
+
|
5
|
+
class Gouda::Adapter
|
6
|
+
prepend Gouda::BulkAdapterExtension
|
7
|
+
|
8
|
+
ENQUEUE_ERROR_MESSAGE = <<~ERR
|
9
|
+
The job has been rejected due to a matching enqueue concurrency key
|
10
|
+
ERR
|
11
|
+
|
12
|
+
# Enqueues the ActiveJob job to be performed.
|
13
|
+
# For use by Rails; you should generally not call this directly.
|
14
|
+
# @param active_job [ActiveJob::Base] the job to be enqueued from +#perform_later+
|
15
|
+
# @return [String, nil] the ID of the inserted workload or nil if the insert did not go through (due to concurrency)
|
16
|
+
def enqueue(active_job)
|
17
|
+
# This is the method that gets called by ActiveJob internally (from inside the ActiveJob::Base instance
|
18
|
+
# method). This is also when ActiveJob runs the enqueue callbacks. After this method returns
|
19
|
+
# ActiveJob will set @successfully_enqueued inside the job to `true` as long as no
|
20
|
+
# EnqueueError has been raised. This is, of course, incompatible with bulk-enqueueing (which we want)
|
21
|
+
# to use by default. What we can do is verify the value of the property set by our `enqueue_all` method,
|
22
|
+
# and raise the exception based on that.
|
23
|
+
enqueue_all([active_job])
|
24
|
+
if active_job.enqueue_error
|
25
|
+
Gouda.logger.warn { "Error #{active_job.enqueue_error.inspect} for Gouda workload (#{active_job.job_id})" }
|
26
|
+
raise active_job.enqueue_error
|
27
|
+
end
|
28
|
+
active_job.provider_job_id
|
29
|
+
end
|
30
|
+
|
31
|
+
# Enqueues an ActiveJob job to be run at a specific time.
|
32
|
+
# For use by Rails; you should generally not call this directly.
|
33
|
+
# @param active_job [ActiveJob::Base] the job to be enqueued from +#perform_later+
|
34
|
+
# @param timestamp [Integer, nil] the epoch time to perform the job
|
35
|
+
# @return [String, nil] the ID of the inserted Gouda or nil if the insert did not go through (due to concurrency)
|
36
|
+
def enqueue_at(active_job, timestamp_int)
|
37
|
+
active_job.scheduled_at = Time.at(timestamp_int).utc
|
38
|
+
enqueue_all([active_job])
|
39
|
+
if active_job.enqueue_error
|
40
|
+
Gouda.logger.warn { "Error #{active_job.enqueue_error.inspect} for Gouda workload (#{active_job.job_id})" }
|
41
|
+
raise active_job.enqueue_error
|
42
|
+
end
|
43
|
+
active_job.provider_job_id
|
44
|
+
end
|
45
|
+
|
46
|
+
# Enqueues multiple ActiveJobs.
|
47
|
+
# For use by Rails; you should generally not call this directly.
|
48
|
+
# @param active_job [ActiveJob::Base] the job to be enqueued from +#perform_later+
|
49
|
+
# @param timestamp [Integer, nil] the epoch time to perform the job
|
50
|
+
# @return [Integer] the number of jobs which were successfully sent to the queue
|
51
|
+
def enqueue_all(active_jobs)
|
52
|
+
t_now = Time.now.utc
|
53
|
+
bulk_insert_attributes = active_jobs.map.with_index do |active_job, i|
|
54
|
+
# We can't pregenerate an ID because we want to INSERT .. ON CONFLICT DO NOTHING
|
55
|
+
# and we want Postgres to use _all_ unique indexes for it, which would include a conflict of IDs -
|
56
|
+
# so some jobs could get silently rejected because of a duplicate ID. However unlikely this can better be prevented.
|
57
|
+
# We can't tell Postgres to ignore conflicts on _both_ the scheduler key and the enqueue concurrency key but not on
|
58
|
+
# the ID - it is either "all indexes" or "just one", but never "this index and that index". MERGE https://www.postgresql.org/docs/current/sql-merge.html
|
59
|
+
# is in theory capable of solving this but let's not complicate things all to hastily, the hour is getting late
|
60
|
+
{
|
61
|
+
active_job_id: active_job.job_id, # Multiple jobs can have the same ID due to retries, job-iteration etc.
|
62
|
+
scheduled_at: active_job.scheduled_at || t_now,
|
63
|
+
scheduler_key: active_job.scheduler_key, # So that the scheduler_key gets retained between retries
|
64
|
+
priority: active_job.priority,
|
65
|
+
execution_concurrency_key: extract_execution_concurrency_key(active_job),
|
66
|
+
enqueue_concurrency_key: extract_enqueue_concurrency_key(active_job),
|
67
|
+
queue_name: active_job.queue_name || "default",
|
68
|
+
active_job_class_name: active_job.class.to_s,
|
69
|
+
serialized_params: active_job.serialize.except("provider_job_id"), # For when a job which gets retried
|
70
|
+
interrupted_at: active_job.interrupted_at, # So that an exception can be raised when this job gets executed
|
71
|
+
position_in_bulk: i,
|
72
|
+
state: "enqueued"
|
73
|
+
}
|
74
|
+
end
|
75
|
+
|
76
|
+
# Filter out all the jobs with the same (and present) concurrency key and scheduler key
|
77
|
+
bulk_insert_attributes = filter_by_unique_not_nil_hash_key(bulk_insert_attributes, :enqueue_concurrency_key)
|
78
|
+
bulk_insert_attributes = filter_by_unique_not_nil_hash_key(bulk_insert_attributes, :scheduler_key)
|
79
|
+
|
80
|
+
# Do a bulk insert. For jobs with an enqueue concurrency key there will be no enqueue
|
81
|
+
# as the default for insert_all is to DO NOTHING. An exception would be nice but we are after performance here.
|
82
|
+
# Use batches of 500 so that we do not exceed the maximum statement size or do not create a transaction for the
|
83
|
+
# insert which times out
|
84
|
+
inserted_ids_and_positions = bulk_insert_attributes.each_slice(500).flat_map do |chunk|
|
85
|
+
ActiveSupport::Notifications.instrument("insert_all.gouda", {n_rows: chunk.size}) do |payload|
|
86
|
+
rows = Gouda::Workload.insert_all(chunk, returning: [:id, :position_in_bulk])
|
87
|
+
payload[:inserted_jobs] = rows.length
|
88
|
+
payload[:rejected_jobs] = chunk.size - rows.length
|
89
|
+
rows
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
# Mark all the jobs we ended up not enqueuing as such. If these jobs are getting enqueued "one by one"
|
94
|
+
# then their callbacks have already run, and they are already set to `successfully_enqueued = true`. If
|
95
|
+
# they are enqueued using `enqueue_all` directly there are no guarantees, as `enqueue_all` is a fairly new
|
96
|
+
# Rails feature. Now is the moment we need to "fish out" our bulk enqueue position and use it to detect
|
97
|
+
# which jobs did get enqueued and which didn't. Yes, this is a bit roundabout - but otherwise we could
|
98
|
+
# have a unique index and DO NOTHING just on the enqueue concurrency key
|
99
|
+
inserted_ids_and_positions.each do |row|
|
100
|
+
i = row.fetch("position_in_bulk")
|
101
|
+
active_jobs[i].provider_job_id = row.fetch("id")
|
102
|
+
active_jobs[i].successfully_enqueued = true
|
103
|
+
end
|
104
|
+
_, failed_enqueue = active_jobs.partition(&:successfully_enqueued?)
|
105
|
+
failed_enqueue.each do |active_job|
|
106
|
+
active_job.successfully_enqueued = false
|
107
|
+
active_job.enqueue_error = ActiveJob::EnqueueError.new(ENQUEUE_ERROR_MESSAGE)
|
108
|
+
end
|
109
|
+
|
110
|
+
# And return how many jobs we _did_ enqueue
|
111
|
+
inserted_ids_and_positions.length
|
112
|
+
end
|
113
|
+
|
114
|
+
# The whole point of Gouda is actually co-committing jobs with the business objects they use. The
|
115
|
+
# changes in Rails are directed towards shifting the job enqueues into an after_commit hook, so
|
116
|
+
# that the jobs - when they start executing - will always find the committed business-objects in
|
117
|
+
# the database. It is their attempt at ensuring read-after-write consistency in the face of two
|
118
|
+
# separate data stores. However, with a DB-based job queue which is using the same database
|
119
|
+
# as the rest of the application, we actually want the opposite - if a transaction commits,
|
120
|
+
# we want it to commit both the jobs to be done on the business objects and the business objects
|
121
|
+
# themselves. Folding the job enqueues into the same transaction can also be a great improvement
|
122
|
+
# to performance. Some of our jobs also imply that a job was generated as a result of a business
|
123
|
+
# model change. With after_commit, there is a subtle race condition where your application may
|
124
|
+
# crash between you doing the COMMIT on your transaction and the after_commit hooks executing.
|
125
|
+
# We want to avoid this in Gouda and always have a guarantee that if our main models committed,
|
126
|
+
# so did the jobs that use them.
|
127
|
+
# So: tell ActiveJob that we prefer the jobs to be co-committed.
|
128
|
+
#
|
129
|
+
# See https://github.com/rails/rails/pull/51426
|
130
|
+
def enqueue_after_transaction_commit?
|
131
|
+
false
|
132
|
+
end
|
133
|
+
|
134
|
+
private
|
135
|
+
|
136
|
+
def combine_enqueue_concurrency_key(enqueue_concurrency_key, scheduler_key, cursor_position)
|
137
|
+
# We also include the scheduler key into the enqueue key. This is done for the following reasons:
|
138
|
+
# Our scheduler always schedules "next subsequent" job once a job completes or fails. If we already have
|
139
|
+
# a job scheduled for execution way in the future (say - next month), and the enqueue concurrency key is set,
|
140
|
+
# we will need to manually remove it from the queue if we want to run its instance sooner. We could define a
|
141
|
+
# unique index on (enqueue_concurrency_key, scheduler_key) - but that would make our enqueue concurrency keys
|
142
|
+
# because NULLs in the scheduler_key are not considered equal to each other. We could mofidy our index statement
|
143
|
+
# with NULLS NOT DISTINCT - see https://www.postgresql.org/docs/current/indexes-unique.html - but that would
|
144
|
+
# create another problem. We want NULLs to _be_ distinct for the enqueue_concurrency_key column, but we want them
|
145
|
+
# to _not_ be distinct for the scheduler_key column (one off-scheduler job eneuqued at most for the same
|
146
|
+
# scheduler_key value). Postgres does not give us this ability, sadly. So the way to go about it is to
|
147
|
+
# mix the scheduler key (name of the scheduled task + cron pattern and whatnot) into the enqueue_concurrency_key
|
148
|
+
# value itself - this provides us with all the necessary properties.
|
149
|
+
# For job-iteration we need to do the same so that we can have multiple jobs enqueued with the same key but
|
150
|
+
# different cursor positions
|
151
|
+
[enqueue_concurrency_key, scheduler_key, cursor_position].compact.join(":")
|
152
|
+
end
|
153
|
+
|
154
|
+
def extract_enqueue_concurrency_key(active_job)
|
155
|
+
ck_value = active_job.try(:enqueue_concurrency_key)
|
156
|
+
return unless ck_value.present?
|
157
|
+
|
158
|
+
enqueueing_as = active_job.try(:scheduler_key).present? ? "scheduled" : "immediate"
|
159
|
+
combine_enqueue_concurrency_key(ck_value, enqueueing_as, active_job.try(:cursor_position))
|
160
|
+
end
|
161
|
+
|
162
|
+
def extract_execution_concurrency_key(active_job)
|
163
|
+
active_job.try(:execution_concurrency_key)
|
164
|
+
end
|
165
|
+
|
166
|
+
# Finds all hashes in the given attributes which have the same value of the given attribute and preserves just one
|
167
|
+
# in the returned array. We need to do that for both the scheduler key and the enqueue concurrency key.
|
168
|
+
def filter_by_unique_not_nil_hash_key(bulk_insert_attributes, key_name)
|
169
|
+
# This is not as nice as a combo of partition/unique_by and whatnot but it is linear time, so there.
|
170
|
+
seen = Set.new
|
171
|
+
bulk_insert_attributes.filter do |item|
|
172
|
+
maybe_key = item.fetch(key_name)
|
173
|
+
if maybe_key && seen.include?(maybe_key)
|
174
|
+
false
|
175
|
+
elsif maybe_key
|
176
|
+
seen << maybe_key
|
177
|
+
true
|
178
|
+
else
|
179
|
+
true
|
180
|
+
end
|
181
|
+
end
|
182
|
+
end
|
183
|
+
end
|
data/lib/gouda/bulk.rb
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Gouda
|
4
|
+
def self.in_bulk(&blk)
|
5
|
+
if Thread.current[:gouda_bulk_buffer].nil?
|
6
|
+
Thread.current[:gouda_bulk_buffer] = []
|
7
|
+
retval = yield
|
8
|
+
buf, Thread.current[:gouda_bulk_buffer] = Thread.current[:gouda_bulk_buffer], nil
|
9
|
+
enqueue_jobs_via_their_adapters(buf)
|
10
|
+
retval
|
11
|
+
else # There already is an open bulk
|
12
|
+
yield
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
# This method exists in edge Rails so probably can be replaced later:
|
17
|
+
# https://github.com/rails/rails/commit/9b62f88a2fde0d2bf8c4f6e3bcd06ecba7ca9d8d
|
18
|
+
def self.enqueue_jobs_via_their_adapters(active_jobs)
|
19
|
+
jobs_per_adapter = active_jobs.compact.group_by { |aj| aj.class.queue_adapter }
|
20
|
+
jobs_per_adapter.each_pair do |adapter, active_jobs|
|
21
|
+
if adapter.respond_to?(:enqueue_all)
|
22
|
+
adapter.enqueue_all(active_jobs)
|
23
|
+
else
|
24
|
+
active_jobs.each { |aj| adapter.enqueue(aj) }
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
module BulkAdapterExtension
|
30
|
+
def enqueue_all(active_jobs)
|
31
|
+
if Thread.current[:gouda_bulk_buffer]
|
32
|
+
Thread.current[:gouda_bulk_buffer].append(*active_jobs)
|
33
|
+
active_jobs
|
34
|
+
else
|
35
|
+
super
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,73 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Gouda
|
4
|
+
module AnyQueue
|
5
|
+
def self.to_sql
|
6
|
+
"1=1"
|
7
|
+
end
|
8
|
+
end
|
9
|
+
|
10
|
+
class OnlyQueuesConstraint < Struct.new(:queue_names)
|
11
|
+
def to_sql
|
12
|
+
placeholders = (["?"] * queue_names.length).join(",")
|
13
|
+
ActiveRecord::Base.sanitize_sql_array([<<~SQL, *queue_names])
|
14
|
+
queue_name IN (#{placeholders})
|
15
|
+
SQL
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
class ExceptQueueConstraint < Struct.new(:queue_names)
|
20
|
+
def to_sql
|
21
|
+
placeholders = (["?"] * queue_names.length).join(",")
|
22
|
+
ActiveRecord::Base.sanitize_sql_array([<<~SQL, *queue_names])
|
23
|
+
queue_name NOT IN (#{placeholders})
|
24
|
+
SQL
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def self.parse_queue_constraint(constraint_str_from_envvar)
|
29
|
+
parsed = queue_parser(constraint_str_from_envvar)
|
30
|
+
if parsed[:include]
|
31
|
+
OnlyQueuesConstraint.new(parsed[:include])
|
32
|
+
elsif parsed[:exclude]
|
33
|
+
ExceptQueueConstraint.new(parsed[:exclude])
|
34
|
+
else
|
35
|
+
AnyQueue
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
# Parse a string representing a group of queues into a more readable data
|
40
|
+
# structure.
|
41
|
+
# @param string [String] Queue string
|
42
|
+
# @return [Hash]
|
43
|
+
# How to match a given queue. It can have the following keys and values:
|
44
|
+
# - +{ all: true }+ indicates that all queues match.
|
45
|
+
# - +{ exclude: Array<String> }+ indicates the listed queue names should
|
46
|
+
# not match.
|
47
|
+
# - +{ include: Array<String> }+ indicates the listed queue names should
|
48
|
+
# match.
|
49
|
+
# @example
|
50
|
+
# Gouda::QueueConstraints.queue_parser('-queue1,queue2')
|
51
|
+
# => { exclude: [ 'queue1', 'queue2' ] }
|
52
|
+
def self.queue_parser(string)
|
53
|
+
string = string.presence || "*"
|
54
|
+
|
55
|
+
case string.first
|
56
|
+
when "-"
|
57
|
+
exclude_queues = true
|
58
|
+
string = string[1..]
|
59
|
+
when "+"
|
60
|
+
string = string[1..]
|
61
|
+
end
|
62
|
+
|
63
|
+
queues = string.split(",").map(&:strip)
|
64
|
+
|
65
|
+
if queues.include?("*")
|
66
|
+
{all: true}
|
67
|
+
elsif exclude_queues
|
68
|
+
{exclude: queues}
|
69
|
+
else
|
70
|
+
{include: queues}
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|