gouda 0.1.11 → 0.1.13

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9b4b5340ec99f15fe7a62bc9a63642862219dc149d38d2e7e519f2ed655747e0
4
- data.tar.gz: 817fe1a0ef4a2b107ad07ce28c3e627e79ffe3cf5fb1be06089165e07e432ec5
3
+ metadata.gz: eb8694c3440600f405fc75ab09fa91b735d9ddaf2268ce3da37197f2cc21cd37
4
+ data.tar.gz: adb08446c066d226e45bddfb17274cd13279661eba771dcb3acf14f72b8f0702
5
5
  SHA512:
6
- metadata.gz: 0aa57b5dedc0a7fe3126d965fe377ebb00dc8f407d7a01877886b57c2f35f45bd2c254d370949a65ec44f3704353b2741509ff099acc0c285aa752ab500805a1
7
- data.tar.gz: 29f6d58da6179f0af817c541fdf9e0ca2fbf00fa83cab2d69c780a221761b26d08679a02419afbba93070d7d19babed436727a2cf994471fa94aabf7c359a7b8
6
+ metadata.gz: b9c17fa5785b888213ad8ad83190c2728de7569bd9be81e4e220d43f5ad74a2e109680da713a0e90780b2a2244bf410fcc9d175cd01b5335d5881abebc9cac6f
7
+ data.tar.gz: 683e667a73971a47043374e01ed2fb8e06f157b98e010a3cdf7ca4f153c7383ca37535f3533a034dee08dbc2779b2def8b4828aa1f76c3708838fd0811e2c991
@@ -14,8 +14,7 @@ jobs:
14
14
  strategy:
15
15
  matrix:
16
16
  ruby:
17
- - '2.7'
18
- - '3.3'
17
+ - '3.1'
19
18
  services:
20
19
  postgres:
21
20
  image: postgres
data/.standard.yml ADDED
@@ -0,0 +1 @@
1
+ ruby_version: 3.1
data/CHANGELOG.md CHANGED
@@ -1,54 +1,64 @@
1
1
  ## [Unreleased]
2
2
 
3
- ## [0.1.0] - 2024-06-10
4
3
 
5
- - Initial release
4
+ ## [0.1.13] - 2024-09-03
6
5
 
7
- ## [0.1.1] - 2024-06-10
6
+ - Ensure we won't execute workloads which were scheduled but are no longer present in the cron table entries.
8
7
 
9
- - Fix support for older ruby versions until 2.7
8
+ ## [0.1.12] - 2024-07-03
10
9
 
11
- ## [0.1.2] - 2024-06-11
10
+ - When doing polling, suppress DEBUG-level messages. This will stop Gouda spamming the logs with SQL in dev/test environments.
12
11
 
13
- - Updated readme and method renaming in Scheduler
12
+ ## [0.1.11] - 2024-07-03
14
13
 
15
- ## [0.1.3] - 2024-06-11
14
+ - Fix: make sure the Gouda logger config does not get used during Rails initialization
16
15
 
17
- - Allow the Rails app to boot even if there is no database yet
16
+ ## [0.1.10] - 2024-07-03
18
17
 
19
- ## [0.1.4] - 2024-06-14
18
+ - Fix: remove logger overrides that Gouda should install, as this causes problems for Rails apps hosting Gouda
20
19
 
21
- - Rescue NoDatabaseError at scheduler update.
22
- - Include tests in gem, for sake of easier debugging.
23
- - Reduce logging in local test runs.
24
- - Bump local ruby version to 3.3.3
20
+ ## [0.1.9] - 2024-06-26
25
21
 
26
- ## [0.1.5] - 2024-06-18
22
+ - Fix: cleanup_preserved_jobs_before in Gouda::Workload.prune now points to Gouda.config
27
23
 
28
- - Update documentation
29
- - Don't pass on scheduler keys to retries
24
+ ## [0.1.8] - 2024-06-21
25
+
26
+ - Move some missed instrumentations to Gouda.instrument
27
+
28
+ ## [0.1.7] - 2024-06-21
29
+
30
+ - Separate all instrumentation to use ActiveSupport::Notification
30
31
 
31
32
  ## [0.1.6] - 2024-06-18
32
33
 
33
34
  - Fix: don't upsert workloads twice when starting Gouda.
34
35
  - Add back in Appsignal calls
35
36
 
36
- ## [0.1.7] - 2024-06-21
37
+ ## [0.1.5] - 2024-06-18
37
38
 
38
- - Separate all instrumentation to use ActiveSupport::Notification
39
+ - Update documentation
40
+ - Don't pass on scheduler keys to retries
39
41
 
40
- ## [0.1.8] - 2024-06-21
42
+ ## [0.1.4] - 2024-06-14
41
43
 
42
- - Move some missed instrumentations to Gouda.instrument
44
+ - Rescue NoDatabaseError at scheduler update.
45
+ - Include tests in gem, for sake of easier debugging.
46
+ - Reduce logging in local test runs.
47
+ - Bump local ruby version to 3.3.3
43
48
 
44
- ## [0.1.9] - 2024-06-26
49
+ ## [0.1.3] - 2024-06-11
45
50
 
46
- - Fix: cleanup_preserved_jobs_before in Gouda::Workload.prune now points to Gouda.config
51
+ - Allow the Rails app to boot even if there is no database yet
47
52
 
48
- ## [0.1.10] - 2024-07-03
53
+ ## [0.1.2] - 2024-06-11
49
54
 
50
- - Fix: remove logger overrides that Gouda should install, as this causes problems for Rails apps hosting Gouda
55
+ - Updated readme and method renaming in Scheduler
51
56
 
52
- ## [0.1.11] - 2024-07-03
57
+ ## [0.1.1] - 2024-06-10
58
+
59
+ - Fix support for older ruby versions until 2.7
60
+
61
+ ## [0.1.0] - 2024-06-10
62
+
63
+ - Initial release
53
64
 
54
- - Fix: make sure the Gouda logger config does not get used during Rails initialization
data/gouda.gemspec CHANGED
@@ -9,10 +9,10 @@ Gem::Specification.new do |spec|
9
9
  spec.email = ["sebastian@cheddar.me", "me@julik.nl"]
10
10
  spec.homepage = "https://github.com/cheddar-me/gouda"
11
11
  spec.license = "MIT"
12
- spec.required_ruby_version = Gem::Requirement.new(">= 2.7.0")
12
+ spec.required_ruby_version = Gem::Requirement.new(">= 3.1.0")
13
13
  spec.require_paths = ["lib"]
14
14
 
15
- spec.metadata["homepage_uri"] =
15
+ spec.metadata["homepage_uri"] = spec.homepage
16
16
  spec.metadata["source_code_uri"] = spec.homepage
17
17
  spec.metadata["changelog_uri"] = "https://github.com/cheddar-me/gouda/CHANGELOG.md"
18
18
 
data/lib/gouda/adapter.rb CHANGED
@@ -57,7 +57,7 @@ class Gouda::Adapter
57
57
  # We can't tell Postgres to ignore conflicts on _both_ the scheduler key and the enqueue concurrency key but not on
58
58
  # the ID - it is either "all indexes" or "just one", but never "this index and that index". MERGE https://www.postgresql.org/docs/current/sql-merge.html
59
59
  # is in theory capable of solving this but let's not complicate things all to hastily, the hour is getting late
60
- scheduler_key = active_job.try(:executions) == 0 ? active_job.scheduler_key : nil # only enforce scheduler key on first workload
60
+ scheduler_key = (active_job.try(:executions) == 0) ? active_job.scheduler_key : nil # only enforce scheduler key on first workload
61
61
  {
62
62
  active_job_id: active_job.job_id, # Multiple jobs can have the same ID due to retries, job-iteration etc.
63
63
  scheduled_at: active_job.scheduled_at || t_now,
@@ -87,7 +87,7 @@ module Gouda::Scheduler
87
87
  # @return Array[Entry]
88
88
  def self.build_scheduler_entries_list!(cron_table_hash = nil)
89
89
  Gouda.logger.info "Updating scheduled workload entries..."
90
- if cron_table_hash.blank?
90
+ if cron_table_hash.nil? # An empty hash indicates that an empty crontab will be loaded
91
91
  config_from_rails = Rails.application.config.try(:gouda)
92
92
 
93
93
  cron_table_hash = if config_from_rails.present?
@@ -106,6 +106,9 @@ module Gouda::Scheduler
106
106
  params_with_defaults = defaults.merge(cron_entry_params)
107
107
  Entry.new(name: name, **params_with_defaults)
108
108
  end
109
+ @known_scheduler_keys = Set.new(@cron_table.map(&:scheduler_key))
110
+
111
+ @cron_table
109
112
  end
110
113
 
111
114
  # Once a workload has finished (doesn't matter whether it raised an exception
@@ -132,6 +135,14 @@ module Gouda::Scheduler
132
135
  @cron_table || []
133
136
  end
134
137
 
138
+ # Returns the set of known scheduler keys that may be present in the workloads table and are defined
139
+ # by the current entries.
140
+ #
141
+ # @return Set[String]
142
+ def self.known_scheduler_keys
143
+ @known_scheduler_keys || Set.new
144
+ end
145
+
135
146
  # Will upsert (`INSERT ... ON CONFLICT UPDATE`) workloads for all entries which are in the scheduler entries
136
147
  # table (the table needs to be read or hydrated first using `build_scheduler_entries_list!`). This is done
137
148
  # in a transaction. Any workloads which have been previously inserted from the scheduled entries, but no
@@ -143,9 +154,11 @@ module Gouda::Scheduler
143
154
  def self.upsert_workloads_from_entries_list!
144
155
  table_entries = @cron_table || []
145
156
 
146
- # Remove any cron keyed workloads which no longer match config-wise
157
+ # Remove any cron keyed workloads which no longer match config-wise.
158
+ # We do this to keep things clean (but it is not enough, an extra guard is needed in Workload checkout)
147
159
  known_keys = table_entries.map(&:scheduler_key).uniq
148
160
  Gouda::Workload.transaction do
161
+ # We do this to keep things a bit clean
149
162
  Gouda::Workload.where.not(scheduler_key: known_keys).delete_all
150
163
 
151
164
  # Insert the next iteration for every "next" entry in the crontab.
data/lib/gouda/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Gouda
4
- VERSION = "0.1.11"
4
+ VERSION = "0.1.13"
5
5
  end
data/lib/gouda/worker.rb CHANGED
@@ -83,12 +83,13 @@ module Gouda
83
83
  end
84
84
 
85
85
  def call
86
- # return false unless Rails.application # Rails is still booting and there is no application defined
87
-
88
86
  Gouda.config.app_executor.wrap do
89
- Gouda::Workload.waiting_to_start(queue_constraint: @queue_constraint).none?
87
+ Gouda.suppressing_sql_logs { Gouda::Workload.waiting_to_start(queue_constraint: @queue_constraint).none? }
90
88
  end
91
- rescue # If the DB connection cannot be checked out etc
89
+ rescue
90
+ # It is possible that in this scenario we do not have a database set up yet, for example,
91
+ # or we are unable to connect to the DB for whatever reason. In that case we should
92
+ # return `false` so that the worker can poll again later.
92
93
  false
93
94
  end
94
95
  end
@@ -158,13 +159,14 @@ module Gouda
158
159
  # a stale timestamp can indicate to us that the job was orphaned and is marked as "executing"
159
160
  # even though the worker it was running on has failed for whatever reason.
160
161
  # Later on we can figure out what to do with those jobs (re-enqueue them or toss them)
161
- Gouda::Workload.where(id: executing_workload_ids.to_a, state: "executing").update_all(executing_on: worker_id, last_execution_heartbeat_at: Time.now.utc)
162
+ Gouda.suppressing_sql_logs do # these updates will also be very frequent with long-running jobs
163
+ Gouda::Workload.where(id: executing_workload_ids.to_a, state: "executing").update_all(executing_on: worker_id, last_execution_heartbeat_at: Time.now.utc)
164
+ end
162
165
 
163
166
  # Find jobs which just hung and clean them up (mark them as "finished" and enqueue replacement workloads if possible)
164
167
  Gouda::Workload.reap_zombie_workloads
165
168
  rescue => e
166
169
  Gouda.instrument(:exception, {exception: e})
167
-
168
170
  warn "Uncaught exception during housekeeping (#{e.class} - #{e}"
169
171
  end
170
172
 
@@ -95,14 +95,14 @@ class Gouda::Workload < ActiveRecord::Base
95
95
  AND NOT EXISTS (
96
96
  SELECT NULL
97
97
  FROM #{quoted_table_name} AS concurrent
98
- WHERE concurrent.state = 'executing'
98
+ WHERE concurrent.state = 'executing'
99
99
  AND concurrent.execution_concurrency_key = workloads.execution_concurrency_key
100
100
  )
101
101
  AND workloads.scheduled_at <= clock_timestamp()
102
102
  SQL
103
103
  # Enter a txn just to mark this job as being executed "by us". This allows us to avoid any
104
104
  # locks during execution itself, including advisory locks
105
- jobs = Gouda::Workload
105
+ workloads = Gouda::Workload
106
106
  .select("workloads.*")
107
107
  .from("#{quoted_table_name} AS workloads")
108
108
  .where(where_query)
@@ -111,12 +111,40 @@ class Gouda::Workload < ActiveRecord::Base
111
111
  .limit(1)
112
112
 
113
113
  _first_available_workload = ActiveSupport::Notifications.instrument(:checkout_and_lock_one, {queue_constraint: queue_constraint.to_sql}) do |payload|
114
- payload[:condition_sql] = jobs.to_sql
114
+ payload[:condition_sql] = workloads.to_sql
115
115
  payload[:retried_checkouts_due_to_concurrent_exec] = 0
116
116
  uncached do # Necessary because we SELECT with a clock_timestamp() which otherwise gets cached by ActiveRecord query cache
117
117
  transaction do
118
- jobs.first.tap do |job|
119
- job&.update!(state: "executing", executing_on: executing_on, last_execution_heartbeat_at: Time.now.utc, execution_started_at: Time.now.utc)
118
+ workload = Gouda.suppressing_sql_logs { workloads.first } # Silence SQL output as this gets called very frequently
119
+ return nil unless workload
120
+
121
+ if workload.scheduler_key && !Gouda::Scheduler.known_scheduler_keys.include?(workload.scheduler_key)
122
+ # Check whether this workload was enqueued with a scheduler key, but no longer is in the cron table.
123
+ # If that is the case (we are trying to execute a workload which has a scheduler key, but the scheduler
124
+ # does not know about that key) it means that the workload has been removed from the cron table and must not run.
125
+ # Moreover: running it can be dangerous because it was likely removed from the table for a reason.
126
+ # Should that be the case, mark the job "finished" and return `nil` to get to the next poll. If the deployed worker still has
127
+ # the workload in its scheduler table, but a new deploy removed it - this is a race condition, but we are willing to accept it.
128
+ # Note that we are already "just not enqueueing" that job when the cron table gets loaded - this already happens.
129
+ #
130
+ # Removing jobs from the queue forcibly when we load the cron table is nice, but not enough, because our system can be in a state
131
+ # of partial deployment:
132
+ #
133
+ # [ release 1 does have some_job_hourly crontab entry ]
134
+ # [ release 2 no longer does ]
135
+ # ^ --- race conditions possible here --^
136
+ #
137
+ # So even if we remove the crontabled workloads during app boot, it does not give us a guarantee that release 1 won't reinsert them.
138
+ # This is why this safeguard is needed.
139
+ error = {class_name: "WorkloadSkippedError", message: "Skipped as scheduler_key was no longer in the cron table"}
140
+ workload.update!(state: "finished", error:)
141
+ # And return nil. This will cause a brief "sleep" in the polling routine since the caller may think there are no more workloads
142
+ # in the queue, but only for a brief moment.
143
+ nil
144
+ else
145
+ # Once we have verified this job is OK to execute
146
+ workload.update!(state: "executing", executing_on: executing_on, last_execution_heartbeat_at: Time.now.utc, execution_started_at: Time.now.utc)
147
+ workload
120
148
  end
121
149
  rescue ActiveRecord::RecordNotUnique
122
150
  # It can happen that due to a race the `execution_concurrency_key NOT IN` does not capture
data/lib/gouda.rb CHANGED
@@ -64,10 +64,8 @@ module Gouda
64
64
  def self.logger
65
65
  # By default, return a logger that sends data nowhere. The `Rails.logger` method
66
66
  # only becomes available later in the Rails lifecycle.
67
- @fallback_gouda_logger ||= begin
68
- ActiveSupport::Logger.new($stdout).tap do |logger|
69
- logger.level = Logger::WARN
70
- end
67
+ @fallback_gouda_logger ||= ActiveSupport::Logger.new($stdout).tap do |logger|
68
+ logger.level = Logger::WARN
71
69
  end
72
70
 
73
71
  # We want the Rails-configured loggers to take precedence over ours, since Gouda
@@ -81,8 +79,22 @@ module Gouda
81
79
  Rails.try(:logger) || ActiveJob::Base.try(:logger) || @fallback_gouda_logger
82
80
  end
83
81
 
84
- def self.instrument(channel, options, &block)
85
- ActiveSupport::Notifications.instrument("#{channel}.gouda", options, &block)
82
+ def self.suppressing_sql_logs(&)
83
+ # This is used for frequently-called methods that poll the DB. If logging is done at a low level (DEBUG)
84
+ # those methods print a lot of SQL into the logs, on every poll. While that is useful if
85
+ # you collect SQL queries from the logs, in most cases - especially if this is used
86
+ # in a side-thread inside Puma - the output might be quite annoying. So silence the
87
+ # logger when we poll, but just to INFO. Omitting DEBUG-level messages gets rid of the SQL.
88
+ if Gouda::Workload.logger
89
+ Gouda::Workload.logger.silence(Logger::INFO, &)
90
+ else
91
+ # In tests (and at earlier stages of the Rails boot cycle) the global ActiveRecord logger may be nil
92
+ yield
93
+ end
94
+ end
95
+
96
+ def self.instrument(channel, options, &)
97
+ ActiveSupport::Notifications.instrument("#{channel}.gouda", options, &)
86
98
  end
87
99
 
88
100
  def self.create_tables(active_record_schema)
@@ -142,6 +142,45 @@ class GoudaSchedulerTest < ActiveSupport::TestCase
142
142
  assert_equal [nil, nil], Gouda::Workload.first.serialized_params["arguments"]
143
143
  end
144
144
 
145
+ test "ensures a job that was scheduled but no longer present in the cron table gets force-finished without executing" do
146
+ tab = {
147
+ first_hourly: {
148
+ cron: "@hourly",
149
+ class: "GoudaSchedulerTest::TestJob",
150
+ args: [nil, nil]
151
+ }
152
+ }
153
+
154
+ assert_nothing_raised do
155
+ Gouda::Scheduler.build_scheduler_entries_list!(tab)
156
+ end
157
+
158
+ Gouda::Workload.delete_all
159
+ assert_changes_by(-> { Gouda::Workload.count }, exactly: 1) do
160
+ Gouda::Scheduler.upsert_workloads_from_entries_list!
161
+ end
162
+
163
+ # Update all workloads so that it is already time for it to be executed (as we use clock_timestamp()
164
+ # time travel is not possible in those tests)
165
+ Gouda::Workload.update_all(scheduled_at: Time.now - 2.minutes)
166
+
167
+ workload = Gouda::Workload.checkout_and_lock_one(executing_on: "test")
168
+ assert workload # Now this workload does get selected for execution
169
+ workload.update(state: "enqueued") # Return it to the queue
170
+
171
+ # Erase the crontab.
172
+ # No need to enqueue next jobs in this test as there would not be jobs enqueued anyway
173
+ assert_nothing_raised do
174
+ Gouda::Scheduler.build_scheduler_entries_list!({})
175
+ end
176
+
177
+ assert_nil Gouda::Workload.checkout_and_lock_one(executing_on: "test"), "The workload should not be picked for execution now"
178
+ just_finished_workload = Gouda::Workload.where(state: "finished").first!
179
+ assert_equal "finished", just_finished_workload.state
180
+ assert just_finished_workload.error
181
+ assert_match(/scheduler/, just_finished_workload.error.fetch("message"))
182
+ end
183
+
145
184
  test "is able to accept a crontab" do
146
185
  tab = {
147
186
  first_hourly: {
@@ -56,27 +56,16 @@ class ActiveSupport::TestCase
56
56
  ActiveRecord::Base.connection.execute("TRUNCATE TABLE gouda_job_fuses")
57
57
  end
58
58
 
59
- def test_create_tables
60
- ActiveRecord::Base.transaction do
61
- ActiveRecord::Base.connection.execute("DROP TABLE gouda_workloads")
62
- ActiveRecord::Base.connection.execute("DROP TABLE gouda_job_fuses")
63
- # The adapter has to be in a variable as the schema definition is scoped to the migrator, not self
64
- ActiveRecord::Schema.define(version: 1) do |via_definer|
65
- Gouda.create_tables(via_definer)
66
- end
67
- end
68
- end
69
-
70
59
  def subscribed_notification_for(notification)
71
60
  payload = nil
72
- subscription = ActiveSupport::Notifications.subscribe notification do |name, start, finish, id, _payload|
73
- payload = _payload
61
+ subscription = ActiveSupport::Notifications.subscribe notification do |name, start, finish, id, local_payload|
62
+ payload = local_payload
74
63
  end
75
64
 
76
65
  yield
77
66
 
78
67
  ActiveSupport::Notifications.unsubscribe(subscription)
79
68
 
80
- return payload
69
+ payload
81
70
  end
82
71
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gouda
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.11
4
+ version: 0.1.13
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sebastian van Hesteren
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2024-07-03 00:00:00.000000000 Z
12
+ date: 2024-09-04 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: activerecord
@@ -135,6 +135,7 @@ files:
135
135
  - ".gitignore"
136
136
  - ".rubocop.yml"
137
137
  - ".ruby-version"
138
+ - ".standard.yml"
138
139
  - CHANGELOG.md
139
140
  - Gemfile
140
141
  - LICENSE.txt
@@ -170,8 +171,8 @@ homepage: https://github.com/cheddar-me/gouda
170
171
  licenses:
171
172
  - MIT
172
173
  metadata:
173
- source_code_uri: https://github.com/cheddar-me/gouda
174
174
  homepage_uri: https://github.com/cheddar-me/gouda
175
+ source_code_uri: https://github.com/cheddar-me/gouda
175
176
  changelog_uri: https://github.com/cheddar-me/gouda/CHANGELOG.md
176
177
  post_install_message:
177
178
  rdoc_options: []
@@ -181,7 +182,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
181
182
  requirements:
182
183
  - - ">="
183
184
  - !ruby/object:Gem::Version
184
- version: 2.7.0
185
+ version: 3.1.0
185
186
  required_rubygems_version: !ruby/object:Gem::Requirement
186
187
  requirements:
187
188
  - - ">="