inst-jobs 2.3.3 → 2.4.11

Sign up to get free protection for your applications and to get access to all the features.
Files changed (96) hide show
  1. checksums.yaml +4 -4
  2. data/db/migrate/20101216224513_create_delayed_jobs.rb +9 -7
  3. data/db/migrate/20110531144916_cleanup_delayed_jobs_indexes.rb +8 -13
  4. data/db/migrate/20110610213249_optimize_delayed_jobs.rb +8 -8
  5. data/db/migrate/20110831210257_add_delayed_jobs_next_in_strand.rb +25 -25
  6. data/db/migrate/20120510004759_delayed_jobs_delete_trigger_lock_for_update.rb +4 -8
  7. data/db/migrate/20120531150712_drop_psql_jobs_pop_fn.rb +1 -3
  8. data/db/migrate/20120607164022_delayed_jobs_use_advisory_locks.rb +11 -15
  9. data/db/migrate/20120607181141_index_jobs_on_locked_by.rb +1 -1
  10. data/db/migrate/20120608191051_add_jobs_run_at_index.rb +2 -2
  11. data/db/migrate/20120927184213_change_delayed_jobs_handler_to_text.rb +1 -1
  12. data/db/migrate/20140505215510_copy_failed_jobs_original_id.rb +2 -3
  13. data/db/migrate/20150807133223_add_max_concurrent_to_jobs.rb +9 -13
  14. data/db/migrate/20151210162949_improve_max_concurrent.rb +4 -8
  15. data/db/migrate/20161206323555_add_back_default_string_limits_jobs.rb +3 -2
  16. data/db/migrate/20181217155351_speed_up_max_concurrent_triggers.rb +13 -17
  17. data/db/migrate/20200330230722_add_id_to_get_delayed_jobs_index.rb +8 -8
  18. data/db/migrate/20200824222232_speed_up_max_concurrent_delete_trigger.rb +72 -77
  19. data/db/migrate/20200825011002_add_strand_order_override.rb +93 -97
  20. data/db/migrate/20210809145804_add_n_strand_index.rb +3 -3
  21. data/db/migrate/20210812210128_add_singleton_column.rb +200 -0
  22. data/db/migrate/20210917232626_add_delete_conflicting_singletons_before_unlock_trigger.rb +27 -0
  23. data/db/migrate/20210928174754_fix_singleton_condition_in_before_insert.rb +56 -0
  24. data/exe/inst_jobs +3 -2
  25. data/lib/delayed/backend/active_record.rb +204 -150
  26. data/lib/delayed/backend/base.rb +106 -82
  27. data/lib/delayed/batch.rb +11 -9
  28. data/lib/delayed/cli.rb +98 -84
  29. data/lib/delayed/core_ext/kernel.rb +4 -2
  30. data/lib/delayed/daemon.rb +70 -74
  31. data/lib/delayed/job_tracking.rb +26 -25
  32. data/lib/delayed/lifecycle.rb +27 -24
  33. data/lib/delayed/log_tailer.rb +17 -17
  34. data/lib/delayed/logging.rb +13 -16
  35. data/lib/delayed/message_sending.rb +43 -52
  36. data/lib/delayed/performable_method.rb +6 -8
  37. data/lib/delayed/periodic.rb +72 -65
  38. data/lib/delayed/plugin.rb +2 -4
  39. data/lib/delayed/pool.rb +198 -193
  40. data/lib/delayed/server/helpers.rb +6 -6
  41. data/lib/delayed/server.rb +51 -54
  42. data/lib/delayed/settings.rb +93 -81
  43. data/lib/delayed/testing.rb +21 -22
  44. data/lib/delayed/version.rb +1 -1
  45. data/lib/delayed/work_queue/in_process.rb +21 -18
  46. data/lib/delayed/work_queue/parent_process/client.rb +54 -55
  47. data/lib/delayed/work_queue/parent_process/server.rb +219 -208
  48. data/lib/delayed/work_queue/parent_process.rb +52 -53
  49. data/lib/delayed/worker/consul_health_check.rb +21 -19
  50. data/lib/delayed/worker/health_check.rb +29 -22
  51. data/lib/delayed/worker/null_health_check.rb +3 -1
  52. data/lib/delayed/worker/process_helper.rb +8 -9
  53. data/lib/delayed/worker.rb +271 -265
  54. data/lib/delayed/yaml_extensions.rb +12 -10
  55. data/lib/delayed_job.rb +37 -38
  56. data/lib/inst-jobs.rb +1 -1
  57. data/spec/active_record_job_spec.rb +129 -136
  58. data/spec/delayed/cli_spec.rb +7 -7
  59. data/spec/delayed/daemon_spec.rb +10 -9
  60. data/spec/delayed/message_sending_spec.rb +16 -9
  61. data/spec/delayed/periodic_spec.rb +13 -12
  62. data/spec/delayed/server_spec.rb +38 -38
  63. data/spec/delayed/settings_spec.rb +26 -25
  64. data/spec/delayed/work_queue/in_process_spec.rb +7 -8
  65. data/spec/delayed/work_queue/parent_process/client_spec.rb +17 -12
  66. data/spec/delayed/work_queue/parent_process/server_spec.rb +70 -41
  67. data/spec/delayed/work_queue/parent_process_spec.rb +21 -23
  68. data/spec/delayed/worker/consul_health_check_spec.rb +22 -22
  69. data/spec/delayed/worker/health_check_spec.rb +60 -52
  70. data/spec/delayed/worker_spec.rb +28 -25
  71. data/spec/sample_jobs.rb +45 -15
  72. data/spec/shared/delayed_batch.rb +74 -67
  73. data/spec/shared/delayed_method.rb +143 -102
  74. data/spec/shared/performable_method.rb +39 -38
  75. data/spec/shared/shared_backend.rb +547 -441
  76. data/spec/shared/testing.rb +14 -14
  77. data/spec/shared/worker.rb +155 -147
  78. data/spec/shared_jobs_specs.rb +13 -13
  79. data/spec/spec_helper.rb +46 -41
  80. metadata +79 -55
  81. data/lib/delayed/backend/redis/bulk_update.lua +0 -50
  82. data/lib/delayed/backend/redis/destroy_job.lua +0 -2
  83. data/lib/delayed/backend/redis/enqueue.lua +0 -29
  84. data/lib/delayed/backend/redis/fail_job.lua +0 -5
  85. data/lib/delayed/backend/redis/find_available.lua +0 -3
  86. data/lib/delayed/backend/redis/functions.rb +0 -59
  87. data/lib/delayed/backend/redis/get_and_lock_next_available.lua +0 -17
  88. data/lib/delayed/backend/redis/includes/jobs_common.lua +0 -203
  89. data/lib/delayed/backend/redis/job.rb +0 -528
  90. data/lib/delayed/backend/redis/set_running.lua +0 -5
  91. data/lib/delayed/backend/redis/tickle_strand.lua +0 -2
  92. data/spec/gemfiles/52.gemfile +0 -7
  93. data/spec/gemfiles/60.gemfile +0 -7
  94. data/spec/gemfiles/60.gemfile.lock +0 -246
  95. data/spec/gemfiles/61.gemfile +0 -7
  96. data/spec/redis_job_spec.rb +0 -148
@@ -1,23 +1,23 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative 'health_check'
4
- require_relative 'process_helper'
5
- require 'socket'
3
+ require_relative "health_check"
4
+ require_relative "process_helper"
5
+ require "socket"
6
6
 
7
7
  module Delayed
8
8
  class Worker
9
9
  class ConsulHealthCheck < HealthCheck
10
10
  self.type_name = :consul
11
11
 
12
- CONSUL_CONFIG_KEYS = %w{url acl_token}.map(&:freeze).freeze
13
- DEFAULT_SERVICE_NAME = 'inst-jobs_worker'.freeze
12
+ CONSUL_CONFIG_KEYS = %w[url acl_token].map(&:freeze).freeze
13
+ DEFAULT_SERVICE_NAME = "inst-jobs_worker"
14
14
  attr_reader :service_client, :health_client
15
15
 
16
16
  def initialize(*, **)
17
17
  super
18
18
  # Because we don't want the consul client to be a hard dependency we're
19
19
  # only requiring it once it's absolutely needed
20
- require 'diplomat'
20
+ require "diplomat"
21
21
 
22
22
  if config.keys.any? { |k| CONSUL_CONFIG_KEYS.include?(k) }
23
23
  consul_config = Diplomat::Configuration.new.tap do |conf|
@@ -35,10 +35,10 @@ module Delayed
35
35
 
36
36
  def start
37
37
  @service_client.register({
38
- id: worker_name,
39
- name: service_name,
40
- check: check_attributes
41
- })
38
+ id: worker_name,
39
+ name: service_name,
40
+ check: check_attributes
41
+ })
42
42
  end
43
43
 
44
44
  def stop
@@ -48,27 +48,28 @@ module Delayed
48
48
  def live_workers
49
49
  # Filter out critical workers (probably nodes failing their serf health check)
50
50
  live_nodes = @health_client.service(service_name, {
51
- filter: 'not Checks.Status == critical'
52
- })
51
+ filter: "not Checks.Status == critical"
52
+ })
53
53
 
54
- live_nodes.map { |n| n.Service['ID']}
54
+ live_nodes.map { |n| n.Service["ID"] }
55
55
  end
56
56
 
57
57
  private
58
58
 
59
59
  def check_attributes
60
60
  {
61
- args: ['bash', '-c', check_script],
62
- status: 'passing',
63
- interval: @config.fetch(:check_interval, '5m'),
64
- deregister_critical_service_after: @config.fetch(:deregister_service_delay, '10m'),
61
+ args: ["bash", "-c", check_script],
62
+ status: "passing",
63
+ interval: @config.fetch(:check_interval, "5m"),
64
+ deregister_critical_service_after: @config.fetch(:deregister_service_delay, "10m")
65
65
  }.tap do |h|
66
- h[:docker_container_id] = docker_container_id if @config['docker']
66
+ h[:docker_container_id] = docker_container_id if @config["docker"]
67
67
  end
68
68
  end
69
69
 
70
70
  def check_script
71
71
  return @check_script if @check_script
72
+
72
73
  mtime = ProcessHelper.mtime(Process.pid)
73
74
  @check_script = ProcessHelper.check_script(Process.pid, mtime)
74
75
  end
@@ -77,12 +78,13 @@ module Delayed
77
78
  # cgroups for part of its magic and also uses the container id as the cgroup name
78
79
  def docker_container_id
79
80
  return @docker_container_id if @docker_container_id
81
+
80
82
  content = File.read("/proc/1/cgroup").split("\n")
81
83
  @docker_container_id = content.last.split("/").last
82
84
  end
83
85
 
84
86
  def service_name
85
- @service_name ||= @config.fetch('service_name', DEFAULT_SERVICE_NAME)
87
+ @service_name ||= @config.fetch("service_name", DEFAULT_SERVICE_NAME)
86
88
  end
87
89
  end
88
90
  end
@@ -11,58 +11,65 @@ module Delayed
11
11
 
12
12
  def inherited(subclass)
13
13
  @subclasses << subclass
14
+ super
14
15
  end
15
16
 
16
17
  def build(type:, worker_name:, config: {})
17
18
  type = type.to_sym
18
19
  klass = @subclasses.find { |sc| sc.type_name == type }
19
20
  raise ArgumentError, "Unable to build a HealthCheck for type #{type}" unless klass
21
+
20
22
  klass.new(worker_name: worker_name, config: config)
21
23
  end
22
24
 
23
25
  def reschedule_abandoned_jobs
24
26
  return if Settings.worker_health_check_type == :none
27
+
25
28
  Delayed::Job.transaction do
26
29
  # this action is a special case, and SHOULD NOT be a periodic job
27
30
  # because if it gets wiped out suddenly during execution
28
- # it can't go clean up it's abandoned self. Therefore,
31
+ # it can't go clean up its abandoned self. Therefore,
29
32
  # we expect it to get run from it's own process forked from the job pool
30
33
  # and we try to get an advisory lock when it runs. If we succeed,
31
34
  # no other worker is trying to do this right now (and if we abandon the
32
35
  # operation, the transaction will end, releasing the advisory lock).
33
- result = attempt_advisory_lock
36
+ result = Delayed::Job.attempt_advisory_lock("Delayed::Worker::HealthCheck#reschedule_abandoned_jobs")
34
37
  return unless result
38
+
39
+ horizon = 5.minutes.ago
40
+
35
41
  checker = Worker::HealthCheck.build(
36
42
  type: Settings.worker_health_check_type,
37
43
  config: Settings.worker_health_check_config,
38
- worker_name: 'cleanup-crew'
44
+ worker_name: "cleanup-crew"
39
45
  )
40
46
  live_workers = checker.live_workers
41
47
 
42
- Delayed::Job.running_jobs.each do |job|
43
- # prefetched jobs have their own way of automatically unlocking themselves
44
- next if job.locked_by.start_with?("prefetch:")
45
- unless live_workers.include?(job.locked_by)
46
- begin
47
- Delayed::Job.transaction do
48
- # double check that the job is still there. locked_by will immediately be reset
49
- # to nil in this transaction by Job#reschedule
50
- next unless Delayed::Job.where(id: job, locked_by: job.locked_by).update_all(locked_by: "abandoned job cleanup") == 1
51
- job.reschedule
52
- end
53
- rescue
54
- ::Rails.logger.error "Failure rescheduling abandoned job #{job.id} #{$!.inspect}"
48
+ loop do
49
+ batch = Delayed::Job.running_jobs
50
+ .where("locked_at<?", horizon)
51
+ .where.not("locked_by LIKE 'prefetch:%'")
52
+ .where.not(locked_by: live_workers)
53
+ .limit(100)
54
+ .to_a
55
+ break if batch.empty?
56
+
57
+ batch.each do |job|
58
+ Delayed::Job.transaction do
59
+ # double check that the job is still there. locked_by will immediately be reset
60
+ # to nil in this transaction by Job#reschedule
61
+ next unless Delayed::Job.where(id: job,
62
+ locked_by: job.locked_by)
63
+ .update_all(locked_by: "abandoned job cleanup") == 1
64
+
65
+ job.reschedule
55
66
  end
56
67
  end
68
+ rescue
69
+ ::Rails.logger.error "Failure rescheduling abandoned job #{job.id} #{$!.inspect}"
57
70
  end
58
71
  end
59
72
  end
60
-
61
- def attempt_advisory_lock
62
- lock_name = "Delayed::Worker::HealthCheck#reschedule_abandoned_jobs"
63
- conn = ActiveRecord::Base.connection
64
- conn.select_value("SELECT pg_try_advisory_xact_lock(#{conn.quote_table_name('half_md5_as_bigint')}('#{lock_name}'));")
65
- end
66
73
  end
67
74
 
68
75
  attr_accessor :config, :worker_name
@@ -13,7 +13,9 @@ module Delayed
13
13
  true
14
14
  end
15
15
 
16
- def live_workers; []; end
16
+ def live_workers
17
+ []
18
+ end
17
19
  end
18
20
  end
19
21
  end
@@ -3,14 +3,13 @@
3
3
  module Delayed
4
4
  class Worker
5
5
  module ProcessHelper
6
-
7
- STAT_LINUX = 'stat --format=%%Y /proc/$WORKER_PID'
8
- STAT_MAC = 'ps -o lstart -p $WORKER_PID'
6
+ STAT_LINUX = "stat --format=%%Y /proc/$WORKER_PID"
7
+ STAT_MAC = "ps -o lstart -p $WORKER_PID"
9
8
  STAT = RUBY_PLATFORM =~ /darwin/ ? STAT_MAC : STAT_LINUX
10
9
  ALIVE_CHECK_LINUX = '[ -d "/proc/$WORKER_PID" ]'
11
- ALIVE_CHECK_MAC = 'ps -p $WORKER_PID > /dev/null'
10
+ ALIVE_CHECK_MAC = "ps -p $WORKER_PID > /dev/null"
12
11
  ALIVE_CHECK = RUBY_PLATFORM =~ /darwin/ ? ALIVE_CHECK_MAC : ALIVE_CHECK_LINUX
13
- SCRIPT_TEMPLATE = <<-BASH.freeze
12
+ SCRIPT_TEMPLATE = <<-BASH
14
13
  WORKER_PID="%<pid>d" # an example, filled from ruby when the check is created
15
14
  ORIGINAL_MTIME="%<mtime>s" # an example, filled from ruby when the check is created
16
15
 
@@ -31,19 +30,19 @@ module Delayed
31
30
 
32
31
  def self.mtime(pid)
33
32
  if RUBY_PLATFORM =~ /darwin/
34
- `ps -o lstart -p #{pid}`.sub(/\n$/, '').presence
33
+ `ps -o lstart -p #{pid}`.sub(/\n$/, "").presence
35
34
  else
36
35
  File::Stat.new("/proc/#{pid}").mtime.to_i.to_s rescue nil
37
36
  end
38
37
  end
39
38
 
40
39
  def self.check_script(pid, mtime)
41
- sprintf(SCRIPT_TEMPLATE, {pid: pid, mtime: mtime})
40
+ format(SCRIPT_TEMPLATE, { pid: pid, mtime: mtime })
42
41
  end
43
42
 
44
43
  def self.process_is_still_running?(pid, mtime)
45
- system(self.check_script(pid, mtime))
44
+ system(check_script(pid, mtime))
46
45
  end
47
46
  end
48
47
  end
49
- end
48
+ end