inst-jobs 2.3.3 → 2.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (95) hide show
  1. checksums.yaml +4 -4
  2. data/db/migrate/20101216224513_create_delayed_jobs.rb +9 -7
  3. data/db/migrate/20110531144916_cleanup_delayed_jobs_indexes.rb +8 -13
  4. data/db/migrate/20110610213249_optimize_delayed_jobs.rb +8 -8
  5. data/db/migrate/20110831210257_add_delayed_jobs_next_in_strand.rb +25 -25
  6. data/db/migrate/20120510004759_delayed_jobs_delete_trigger_lock_for_update.rb +4 -8
  7. data/db/migrate/20120531150712_drop_psql_jobs_pop_fn.rb +1 -3
  8. data/db/migrate/20120607164022_delayed_jobs_use_advisory_locks.rb +11 -15
  9. data/db/migrate/20120607181141_index_jobs_on_locked_by.rb +1 -1
  10. data/db/migrate/20120608191051_add_jobs_run_at_index.rb +2 -2
  11. data/db/migrate/20120927184213_change_delayed_jobs_handler_to_text.rb +1 -1
  12. data/db/migrate/20140505215510_copy_failed_jobs_original_id.rb +2 -3
  13. data/db/migrate/20150807133223_add_max_concurrent_to_jobs.rb +9 -13
  14. data/db/migrate/20151210162949_improve_max_concurrent.rb +4 -8
  15. data/db/migrate/20161206323555_add_back_default_string_limits_jobs.rb +3 -2
  16. data/db/migrate/20181217155351_speed_up_max_concurrent_triggers.rb +13 -17
  17. data/db/migrate/20200330230722_add_id_to_get_delayed_jobs_index.rb +8 -8
  18. data/db/migrate/20200824222232_speed_up_max_concurrent_delete_trigger.rb +72 -77
  19. data/db/migrate/20200825011002_add_strand_order_override.rb +93 -97
  20. data/db/migrate/20210809145804_add_n_strand_index.rb +3 -3
  21. data/db/migrate/20210812210128_add_singleton_column.rb +203 -0
  22. data/exe/inst_jobs +3 -2
  23. data/lib/delayed/backend/active_record.rb +182 -148
  24. data/lib/delayed/backend/base.rb +79 -74
  25. data/lib/delayed/batch.rb +11 -9
  26. data/lib/delayed/cli.rb +98 -84
  27. data/lib/delayed/core_ext/kernel.rb +4 -2
  28. data/lib/delayed/daemon.rb +70 -74
  29. data/lib/delayed/job_tracking.rb +26 -25
  30. data/lib/delayed/lifecycle.rb +27 -24
  31. data/lib/delayed/log_tailer.rb +17 -17
  32. data/lib/delayed/logging.rb +13 -16
  33. data/lib/delayed/message_sending.rb +42 -51
  34. data/lib/delayed/performable_method.rb +5 -7
  35. data/lib/delayed/periodic.rb +66 -65
  36. data/lib/delayed/plugin.rb +2 -4
  37. data/lib/delayed/pool.rb +198 -193
  38. data/lib/delayed/server/helpers.rb +6 -6
  39. data/lib/delayed/server.rb +51 -54
  40. data/lib/delayed/settings.rb +93 -81
  41. data/lib/delayed/testing.rb +21 -22
  42. data/lib/delayed/version.rb +1 -1
  43. data/lib/delayed/work_queue/in_process.rb +21 -18
  44. data/lib/delayed/work_queue/parent_process/client.rb +54 -55
  45. data/lib/delayed/work_queue/parent_process/server.rb +215 -209
  46. data/lib/delayed/work_queue/parent_process.rb +52 -53
  47. data/lib/delayed/worker/consul_health_check.rb +21 -19
  48. data/lib/delayed/worker/health_check.rb +21 -12
  49. data/lib/delayed/worker/null_health_check.rb +3 -1
  50. data/lib/delayed/worker/process_helper.rb +8 -9
  51. data/lib/delayed/worker.rb +271 -265
  52. data/lib/delayed/yaml_extensions.rb +12 -10
  53. data/lib/delayed_job.rb +37 -38
  54. data/lib/inst-jobs.rb +1 -1
  55. data/spec/active_record_job_spec.rb +128 -135
  56. data/spec/delayed/cli_spec.rb +7 -7
  57. data/spec/delayed/daemon_spec.rb +8 -8
  58. data/spec/delayed/message_sending_spec.rb +8 -9
  59. data/spec/delayed/periodic_spec.rb +13 -12
  60. data/spec/delayed/server_spec.rb +38 -38
  61. data/spec/delayed/settings_spec.rb +26 -25
  62. data/spec/delayed/work_queue/in_process_spec.rb +7 -7
  63. data/spec/delayed/work_queue/parent_process/client_spec.rb +15 -11
  64. data/spec/delayed/work_queue/parent_process/server_spec.rb +43 -40
  65. data/spec/delayed/work_queue/parent_process_spec.rb +21 -21
  66. data/spec/delayed/worker/consul_health_check_spec.rb +22 -22
  67. data/spec/delayed/worker/health_check_spec.rb +51 -49
  68. data/spec/delayed/worker_spec.rb +28 -25
  69. data/spec/gemfiles/52.gemfile +5 -3
  70. data/spec/gemfiles/52.gemfile.lock +240 -0
  71. data/spec/gemfiles/60.gemfile +5 -3
  72. data/spec/gemfiles/60.gemfile.lock +1 -1
  73. data/spec/gemfiles/61.gemfile +5 -3
  74. data/spec/sample_jobs.rb +45 -15
  75. data/spec/shared/delayed_batch.rb +74 -67
  76. data/spec/shared/delayed_method.rb +143 -102
  77. data/spec/shared/performable_method.rb +39 -38
  78. data/spec/shared/shared_backend.rb +517 -441
  79. data/spec/shared/testing.rb +14 -14
  80. data/spec/shared/worker.rb +155 -147
  81. data/spec/shared_jobs_specs.rb +13 -13
  82. data/spec/spec_helper.rb +43 -40
  83. metadata +74 -56
  84. data/lib/delayed/backend/redis/bulk_update.lua +0 -50
  85. data/lib/delayed/backend/redis/destroy_job.lua +0 -2
  86. data/lib/delayed/backend/redis/enqueue.lua +0 -29
  87. data/lib/delayed/backend/redis/fail_job.lua +0 -5
  88. data/lib/delayed/backend/redis/find_available.lua +0 -3
  89. data/lib/delayed/backend/redis/functions.rb +0 -59
  90. data/lib/delayed/backend/redis/get_and_lock_next_available.lua +0 -17
  91. data/lib/delayed/backend/redis/includes/jobs_common.lua +0 -203
  92. data/lib/delayed/backend/redis/job.rb +0 -528
  93. data/lib/delayed/backend/redis/set_running.lua +0 -5
  94. data/lib/delayed/backend/redis/tickle_strand.lua +0 -2
  95. data/spec/redis_job_spec.rb +0 -148
@@ -1,69 +1,68 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'pathname'
4
- require 'socket'
5
- require 'timeout'
3
+ require "pathname"
4
+ require "socket"
5
+ require "timeout"
6
6
 
7
- require_relative 'parent_process/client'
8
- require_relative 'parent_process/server'
7
+ require_relative "parent_process/client"
8
+ require_relative "parent_process/server"
9
9
 
10
10
  module Delayed
11
- module WorkQueue
12
- # ParentProcess is a WorkQueue implementation that spawns a separate worker
13
- # process for querying the queue. Each Worker child process sends requests to
14
- # the ParentProcess via IPC, and receives responses. This centralized queue
15
- # querying cuts down on db queries and lock contention, and allows the
16
- # possibility for other centralized logic such as notifications when all workers
17
- # are idle.
18
- #
19
- # The IPC implementation uses Unix stream sockets and Ruby's built-in Marshal
20
- # functionality. The ParentProcess creates a Unix socket on the filesystem in
21
- # the tmp directory, so that if a worker process dies and is restarted it can
22
- # reconnect to the socket.
23
- #
24
- # While Unix and IP sockets are API compatible, we take a lot of shortcuts
25
- # because we know it's just a local Unix socket. If we ever wanted to swap this
26
- # out for a TCP/IP socket and have the WorkQueue running on another host, we'd
27
- # want to be a lot more robust about partial reads/writes and timeouts.
28
- class ParentProcess
29
- class ProtocolError < RuntimeError
30
- end
11
+ module WorkQueue
12
+ # ParentProcess is a WorkQueue implementation that spawns a separate worker
13
+ # process for querying the queue. Each Worker child process sends requests to
14
+ # the ParentProcess via IPC, and receives responses. This centralized queue
15
+ # querying cuts down on db queries and lock contention, and allows the
16
+ # possibility for other centralized logic such as notifications when all workers
17
+ # are idle.
18
+ #
19
+ # The IPC implementation uses Unix stream sockets and Ruby's built-in Marshal
20
+ # functionality. The ParentProcess creates a Unix socket on the filesystem in
21
+ # the tmp directory, so that if a worker process dies and is restarted it can
22
+ # reconnect to the socket.
23
+ #
24
+ # While Unix and IP sockets are API compatible, we take a lot of shortcuts
25
+ # because we know it's just a local Unix socket. If we ever wanted to swap this
26
+ # out for a TCP/IP socket and have the WorkQueue running on another host, we'd
27
+ # want to be a lot more robust about partial reads/writes and timeouts.
28
+ class ParentProcess
29
+ class ProtocolError < RuntimeError
30
+ end
31
31
 
32
- attr_reader :server_address
32
+ attr_reader :server_address
33
33
 
34
- DEFAULT_SOCKET_NAME = 'inst-jobs.sock'.freeze
35
- private_constant :DEFAULT_SOCKET_NAME
34
+ DEFAULT_SOCKET_NAME = "inst-jobs.sock"
35
+ private_constant :DEFAULT_SOCKET_NAME
36
36
 
37
- def initialize(config = Settings.parent_process)
38
- @config = config
39
- @server_address = generate_socket_path(config['server_address'])
40
- end
37
+ def initialize(config = Settings.parent_process)
38
+ @config = config
39
+ @server_address = generate_socket_path(config["server_address"])
40
+ end
41
41
 
42
- def server(parent_pid: nil)
43
- # The unix_server_socket method takes care of cleaning up any existing
44
- # socket for us if the work queue process dies and is restarted.
45
- listen_socket = Socket.unix_server_socket(@server_address)
46
- Server.new(listen_socket, parent_pid: parent_pid, config: @config)
47
- end
42
+ def server(parent_pid: nil)
43
+ # The unix_server_socket method takes care of cleaning up any existing
44
+ # socket for us if the work queue process dies and is restarted.
45
+ listen_socket = Socket.unix_server_socket(@server_address)
46
+ Server.new(listen_socket, parent_pid: parent_pid, config: @config)
47
+ end
48
48
 
49
- def client
50
- Client.new(Addrinfo.unix(@server_address), config: @config)
51
- end
49
+ def client
50
+ Client.new(Addrinfo.unix(@server_address), config: @config)
51
+ end
52
52
 
53
- private
53
+ private
54
54
 
55
- def generate_socket_path(supplied_path)
56
- pathname = Pathname.new(supplied_path)
55
+ def generate_socket_path(supplied_path)
56
+ pathname = Pathname.new(supplied_path)
57
57
 
58
- if pathname.absolute? && pathname.directory?
59
- pathname.join(DEFAULT_SOCKET_NAME).to_s
60
- elsif pathname.absolute?
61
- supplied_path
62
- else
63
- generate_socket_path(Settings.expand_rails_path(supplied_path))
58
+ if pathname.absolute? && pathname.directory?
59
+ pathname.join(DEFAULT_SOCKET_NAME).to_s
60
+ elsif pathname.absolute?
61
+ supplied_path
62
+ else
63
+ generate_socket_path(Settings.expand_rails_path(supplied_path))
64
+ end
65
+ end
64
66
  end
65
67
  end
66
68
  end
67
- end
68
- end
69
-
@@ -1,23 +1,23 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative 'health_check'
4
- require_relative 'process_helper'
5
- require 'socket'
3
+ require_relative "health_check"
4
+ require_relative "process_helper"
5
+ require "socket"
6
6
 
7
7
  module Delayed
8
8
  class Worker
9
9
  class ConsulHealthCheck < HealthCheck
10
10
  self.type_name = :consul
11
11
 
12
- CONSUL_CONFIG_KEYS = %w{url acl_token}.map(&:freeze).freeze
13
- DEFAULT_SERVICE_NAME = 'inst-jobs_worker'.freeze
12
+ CONSUL_CONFIG_KEYS = %w[url acl_token].map(&:freeze).freeze
13
+ DEFAULT_SERVICE_NAME = "inst-jobs_worker"
14
14
  attr_reader :service_client, :health_client
15
15
 
16
16
  def initialize(*, **)
17
17
  super
18
18
  # Because we don't want the consul client to be a hard dependency we're
19
19
  # only requiring it once it's absolutely needed
20
- require 'diplomat'
20
+ require "diplomat"
21
21
 
22
22
  if config.keys.any? { |k| CONSUL_CONFIG_KEYS.include?(k) }
23
23
  consul_config = Diplomat::Configuration.new.tap do |conf|
@@ -35,10 +35,10 @@ module Delayed
35
35
 
36
36
  def start
37
37
  @service_client.register({
38
- id: worker_name,
39
- name: service_name,
40
- check: check_attributes
41
- })
38
+ id: worker_name,
39
+ name: service_name,
40
+ check: check_attributes
41
+ })
42
42
  end
43
43
 
44
44
  def stop
@@ -48,27 +48,28 @@ module Delayed
48
48
  def live_workers
49
49
  # Filter out critical workers (probably nodes failing their serf health check)
50
50
  live_nodes = @health_client.service(service_name, {
51
- filter: 'not Checks.Status == critical'
52
- })
51
+ filter: "not Checks.Status == critical"
52
+ })
53
53
 
54
- live_nodes.map { |n| n.Service['ID']}
54
+ live_nodes.map { |n| n.Service["ID"] }
55
55
  end
56
56
 
57
57
  private
58
58
 
59
59
  def check_attributes
60
60
  {
61
- args: ['bash', '-c', check_script],
62
- status: 'passing',
63
- interval: @config.fetch(:check_interval, '5m'),
64
- deregister_critical_service_after: @config.fetch(:deregister_service_delay, '10m'),
61
+ args: ["bash", "-c", check_script],
62
+ status: "passing",
63
+ interval: @config.fetch(:check_interval, "5m"),
64
+ deregister_critical_service_after: @config.fetch(:deregister_service_delay, "10m")
65
65
  }.tap do |h|
66
- h[:docker_container_id] = docker_container_id if @config['docker']
66
+ h[:docker_container_id] = docker_container_id if @config["docker"]
67
67
  end
68
68
  end
69
69
 
70
70
  def check_script
71
71
  return @check_script if @check_script
72
+
72
73
  mtime = ProcessHelper.mtime(Process.pid)
73
74
  @check_script = ProcessHelper.check_script(Process.pid, mtime)
74
75
  end
@@ -77,12 +78,13 @@ module Delayed
77
78
  # cgroups for part of its magic and also uses the container id as the cgroup name
78
79
  def docker_container_id
79
80
  return @docker_container_id if @docker_container_id
81
+
80
82
  content = File.read("/proc/1/cgroup").split("\n")
81
83
  @docker_container_id = content.last.split("/").last
82
84
  end
83
85
 
84
86
  def service_name
85
- @service_name ||= @config.fetch('service_name', DEFAULT_SERVICE_NAME)
87
+ @service_name ||= @config.fetch("service_name", DEFAULT_SERVICE_NAME)
86
88
  end
87
89
  end
88
90
  end
@@ -11,17 +11,20 @@ module Delayed
11
11
 
12
12
  def inherited(subclass)
13
13
  @subclasses << subclass
14
+ super
14
15
  end
15
16
 
16
17
  def build(type:, worker_name:, config: {})
17
18
  type = type.to_sym
18
19
  klass = @subclasses.find { |sc| sc.type_name == type }
19
20
  raise ArgumentError, "Unable to build a HealthCheck for type #{type}" unless klass
21
+
20
22
  klass.new(worker_name: worker_name, config: config)
21
23
  end
22
24
 
23
25
  def reschedule_abandoned_jobs
24
26
  return if Settings.worker_health_check_type == :none
27
+
25
28
  Delayed::Job.transaction do
26
29
  # this action is a special case, and SHOULD NOT be a periodic job
27
30
  # because if it gets wiped out suddenly during execution
@@ -32,27 +35,32 @@ module Delayed
32
35
  # operation, the transaction will end, releasing the advisory lock).
33
36
  result = attempt_advisory_lock
34
37
  return unless result
38
+
35
39
  checker = Worker::HealthCheck.build(
36
40
  type: Settings.worker_health_check_type,
37
41
  config: Settings.worker_health_check_config,
38
- worker_name: 'cleanup-crew'
42
+ worker_name: "cleanup-crew"
39
43
  )
40
44
  live_workers = checker.live_workers
41
45
 
42
46
  Delayed::Job.running_jobs.each do |job|
43
47
  # prefetched jobs have their own way of automatically unlocking themselves
44
48
  next if job.locked_by.start_with?("prefetch:")
45
- unless live_workers.include?(job.locked_by)
46
- begin
47
- Delayed::Job.transaction do
48
- # double check that the job is still there. locked_by will immediately be reset
49
- # to nil in this transaction by Job#reschedule
50
- next unless Delayed::Job.where(id: job, locked_by: job.locked_by).update_all(locked_by: "abandoned job cleanup") == 1
51
- job.reschedule
52
- end
53
- rescue
54
- ::Rails.logger.error "Failure rescheduling abandoned job #{job.id} #{$!.inspect}"
49
+
50
+ next if live_workers.include?(job.locked_by)
51
+
52
+ begin
53
+ Delayed::Job.transaction do
54
+ # double check that the job is still there. locked_by will immediately be reset
55
+ # to nil in this transaction by Job#reschedule
56
+ next unless Delayed::Job.where(id: job,
57
+ locked_by: job.locked_by)
58
+ .update_all(locked_by: "abandoned job cleanup") == 1
59
+
60
+ job.reschedule
55
61
  end
62
+ rescue
63
+ ::Rails.logger.error "Failure rescheduling abandoned job #{job.id} #{$!.inspect}"
56
64
  end
57
65
  end
58
66
  end
@@ -61,7 +69,8 @@ module Delayed
61
69
  def attempt_advisory_lock
62
70
  lock_name = "Delayed::Worker::HealthCheck#reschedule_abandoned_jobs"
63
71
  conn = ActiveRecord::Base.connection
64
- conn.select_value("SELECT pg_try_advisory_xact_lock(#{conn.quote_table_name('half_md5_as_bigint')}('#{lock_name}'));")
72
+ fn_name = conn.quote_table_name("half_md5_as_bigint")
73
+ conn.select_value("SELECT pg_try_advisory_xact_lock(#{fn_name}('#{lock_name}'));")
65
74
  end
66
75
  end
67
76
 
@@ -13,7 +13,9 @@ module Delayed
13
13
  true
14
14
  end
15
15
 
16
- def live_workers; []; end
16
+ def live_workers
17
+ []
18
+ end
17
19
  end
18
20
  end
19
21
  end
@@ -3,14 +3,13 @@
3
3
  module Delayed
4
4
  class Worker
5
5
  module ProcessHelper
6
-
7
- STAT_LINUX = 'stat --format=%%Y /proc/$WORKER_PID'
8
- STAT_MAC = 'ps -o lstart -p $WORKER_PID'
6
+ STAT_LINUX = "stat --format=%%Y /proc/$WORKER_PID"
7
+ STAT_MAC = "ps -o lstart -p $WORKER_PID"
9
8
  STAT = RUBY_PLATFORM =~ /darwin/ ? STAT_MAC : STAT_LINUX
10
9
  ALIVE_CHECK_LINUX = '[ -d "/proc/$WORKER_PID" ]'
11
- ALIVE_CHECK_MAC = 'ps -p $WORKER_PID > /dev/null'
10
+ ALIVE_CHECK_MAC = "ps -p $WORKER_PID > /dev/null"
12
11
  ALIVE_CHECK = RUBY_PLATFORM =~ /darwin/ ? ALIVE_CHECK_MAC : ALIVE_CHECK_LINUX
13
- SCRIPT_TEMPLATE = <<-BASH.freeze
12
+ SCRIPT_TEMPLATE = <<-BASH
14
13
  WORKER_PID="%<pid>d" # an example, filled from ruby when the check is created
15
14
  ORIGINAL_MTIME="%<mtime>s" # an example, filled from ruby when the check is created
16
15
 
@@ -31,19 +30,19 @@ module Delayed
31
30
 
32
31
  def self.mtime(pid)
33
32
  if RUBY_PLATFORM =~ /darwin/
34
- `ps -o lstart -p #{pid}`.sub(/\n$/, '').presence
33
+ `ps -o lstart -p #{pid}`.sub(/\n$/, "").presence
35
34
  else
36
35
  File::Stat.new("/proc/#{pid}").mtime.to_i.to_s rescue nil
37
36
  end
38
37
  end
39
38
 
40
39
  def self.check_script(pid, mtime)
41
- sprintf(SCRIPT_TEMPLATE, {pid: pid, mtime: mtime})
40
+ format(SCRIPT_TEMPLATE, { pid: pid, mtime: mtime })
42
41
  end
43
42
 
44
43
  def self.process_is_still_running?(pid, mtime)
45
- system(self.check_script(pid, mtime))
44
+ system(check_script(pid, mtime))
46
45
  end
47
46
  end
48
47
  end
49
- end
48
+ end
@@ -1,261 +1,267 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Delayed
4
-
5
- class TimeoutError < RuntimeError; end
6
- class RetriableError < RuntimeError
7
- # this error is a special case. You _should_ raise
8
- # it from inside the rescue block for another error,
9
- # because it indicates: "something made this job fail
10
- # but we're pretty sure it's transient and it's safe to try again".
11
- # the workflow is still the same (retry will happen unless
12
- # retries are exhausted), but it won't call the :error
13
- # callback unless it can't retry anymore. It WILL call the
14
- # separate ":retry" callback, which is ONLY activated
15
- # for this kind of error.
16
- end
17
-
18
- require 'tmpdir'
19
- require 'set'
20
-
21
- class Worker
22
- include Delayed::Logging
23
- SIGNALS = %i{INT TERM QUIT}
24
-
25
- attr_reader :config, :queue_name, :min_priority, :max_priority, :work_queue
26
-
27
- # Callback to fire when a delayed job fails max_attempts times. If this
28
- # callback is defined, then the value of destroy_failed_jobs is ignored, and
29
- # the job is destroyed if this block returns true.
30
- #
31
- # This allows for destroying "uninteresting" failures, while keeping around
32
- # interesting failures to be investigated later.
33
- #
34
- # The block is called with args(job, last_exception)
35
- def self.on_max_failures=(block)
36
- @@on_max_failures = block
4
+ class TimeoutError < RuntimeError; end
5
+
6
+ class RetriableError < RuntimeError
7
+ # this error is a special case. You _should_ raise
8
+ # it from inside the rescue block for another error,
9
+ # because it indicates: "something made this job fail
10
+ # but we're pretty sure it's transient and it's safe to try again".
11
+ # the workflow is still the same (retry will happen unless
12
+ # retries are exhausted), but it won't call the :error
13
+ # callback unless it can't retry anymore. It WILL call the
14
+ # separate ":retry" callback, which is ONLY activated
15
+ # for this kind of error.
37
16
  end
38
- cattr_reader :on_max_failures
39
17
 
40
- cattr_accessor :plugins
41
- self.plugins = Set.new
18
+ require "tmpdir"
19
+ require "set"
20
+
21
+ class Worker
22
+ include Delayed::Logging
23
+ SIGNALS = %i[INT TERM QUIT].freeze
24
+
25
+ attr_reader :config, :queue_name, :min_priority, :max_priority, :work_queue
26
+
27
+ class << self
28
+ # Callback to fire when a delayed job fails max_attempts times. If this
29
+ # callback is defined, then the value of destroy_failed_jobs is ignored, and
30
+ # the job is destroyed if this block returns true.
31
+ #
32
+ # This allows for destroying "uninteresting" failures, while keeping around
33
+ # interesting failures to be investigated later.
34
+ #
35
+ # The block is called with args(job, last_exception)
36
+ attr_accessor :on_max_failures
37
+ end
42
38
 
43
- def self.lifecycle
44
- @lifecycle ||= Delayed::Lifecycle.new
45
- end
39
+ cattr_accessor :plugins
40
+ self.plugins = Set.new
46
41
 
47
- def self.current_job
48
- Thread.current[:running_delayed_job]
49
- end
42
+ def self.lifecycle
43
+ @lifecycle ||= Delayed::Lifecycle.new
44
+ end
50
45
 
51
- def self.running_job(job)
52
- Thread.current[:running_delayed_job] = job
53
- yield
54
- ensure
55
- Thread.current[:running_delayed_job] = nil
56
- end
46
+ def self.current_job
47
+ Thread.current[:running_delayed_job]
48
+ end
57
49
 
58
- def initialize(options = {})
59
- @exit = false
60
- @parent_pid = options[:parent_pid]
61
- @queue_name = options[:queue] ||= Settings.queue
62
- @min_priority = options[:min_priority]
63
- @max_priority = options[:max_priority]
64
- @max_job_count = options[:worker_max_job_count].to_i
65
- @max_memory_usage = options[:worker_max_memory_usage].to_i
66
- @work_queue = options.delete(:work_queue) || WorkQueue::InProcess.new
67
- @health_check_type = Settings.worker_health_check_type
68
- @health_check_config = Settings.worker_health_check_config
69
- @config = options
70
- @job_count = 0
71
-
72
- @signal_queue = []
73
-
74
- app = Rails.application
75
- if app && !app.config.cache_classes
76
- Delayed::Worker.lifecycle.around(:perform) do |worker, job, &block|
77
- reload = app.config.reload_classes_only_on_change != true || app.reloaders.map(&:updated?).any?
78
-
79
- if reload
80
- if defined?(ActiveSupport::Reloader)
81
- Rails.application.reloader.reload!
82
- else
83
- ActionDispatch::Reloader.prepare!
50
+ def self.running_job(job)
51
+ Thread.current[:running_delayed_job] = job
52
+ yield
53
+ ensure
54
+ Thread.current[:running_delayed_job] = nil
55
+ end
56
+
57
+ def initialize(options = {})
58
+ @exit = false
59
+ @parent_pid = options[:parent_pid]
60
+ @queue_name = options[:queue] ||= Settings.queue
61
+ @min_priority = options[:min_priority]
62
+ @max_priority = options[:max_priority]
63
+ @max_job_count = options[:worker_max_job_count].to_i
64
+ @max_memory_usage = options[:worker_max_memory_usage].to_i
65
+ @work_queue = options.delete(:work_queue) || WorkQueue::InProcess.new
66
+ @health_check_type = Settings.worker_health_check_type
67
+ @health_check_config = Settings.worker_health_check_config
68
+ @config = options
69
+ @job_count = 0
70
+
71
+ @signal_queue = []
72
+
73
+ app = Rails.application
74
+ if app && !app.config.cache_classes
75
+ Delayed::Worker.lifecycle.around(:perform) do |worker, job, &block|
76
+ reload = app.config.reload_classes_only_on_change != true || app.reloaders.map(&:updated?).any?
77
+
78
+ if reload
79
+ if defined?(ActiveSupport::Reloader)
80
+ Rails.application.reloader.reload!
81
+ else
82
+ ActionDispatch::Reloader.prepare!
83
+ end
84
84
  end
85
- end
86
85
 
87
- begin
88
- block.call(worker, job)
89
- ensure
90
- ActionDispatch::Reloader.cleanup! if reload && !defined?(ActiveSupport::Reloader)
86
+ begin
87
+ block.call(worker, job)
88
+ ensure
89
+ ActionDispatch::Reloader.cleanup! if reload && !defined?(ActiveSupport::Reloader)
90
+ end
91
91
  end
92
92
  end
93
- end
94
-
95
- plugins.each { |plugin| plugin.inject! }
96
- end
97
93
 
98
- def name
99
- @name ||= "#{Socket.gethostname rescue "X"}:#{self.id}"
100
- end
101
-
102
- def set_process_name(new_name)
103
- $0 = "delayed:#{new_name}"
104
- end
94
+ plugins.each(&:inject!)
95
+ end
105
96
 
106
- def exit?
107
- !!@exit || parent_exited?
108
- end
97
+ def name
98
+ @name ||= "#{Socket.gethostname rescue 'X'}:#{id}"
99
+ end
109
100
 
110
- def parent_exited?
111
- @parent_pid && @parent_pid != Process.ppid
112
- end
101
+ def process_name=(new_name)
102
+ $0 = "delayed:#{new_name}"
103
+ end
113
104
 
114
- def wake_up
115
- @self_pipe[1].write_nonblock('.', exception: false)
116
- work_queue.wake_up
117
- end
105
+ def exit?
106
+ !!@exit || parent_exited?
107
+ end
118
108
 
119
- def start
120
- logger.info "Starting worker"
121
- set_process_name("start:#{Settings.worker_procname_prefix}#{@queue_name}:#{min_priority || 0}:#{max_priority || 'max'}")
122
- @self_pipe = IO.pipe
123
- work_queue.init
109
+ def parent_exited?
110
+ @parent_pid && @parent_pid != Process.ppid
111
+ end
124
112
 
125
- work_thread = Thread.current
126
- SIGNALS.each do |sig|
127
- trap(sig) { @signal_queue << sig; wake_up }
113
+ def wake_up
114
+ @self_pipe[1].write_nonblock(".", exception: false)
115
+ work_queue.wake_up
128
116
  end
129
117
 
130
- raise 'Could not register health_check' unless health_check.start
131
-
132
- signal_processor = Thread.new do
133
- loop do
134
- @self_pipe[0].read(1)
135
- case @signal_queue.pop
136
- when :INT, :TERM
137
- @exit = true # get the main thread to bail early if it's waiting for a job
138
- work_thread.raise(SystemExit) # Force the main thread to bail out of the current job
139
- cleanup! # we're going to get SIGKILL'd in a moment, so clean up asap
140
- break
141
- when :QUIT
142
- @exit = true
143
- else
144
- logger.error "Unknown signal '#{sig}' received"
118
+ def start
119
+ logger.info "Starting worker"
120
+ self.process_name =
121
+ "start:#{Settings.worker_procname_prefix}#{@queue_name}:#{min_priority || 0}:#{max_priority || 'max'}"
122
+ @self_pipe = IO.pipe
123
+ work_queue.init
124
+
125
+ work_thread = Thread.current
126
+ SIGNALS.each do |sig|
127
+ trap(sig) do
128
+ @signal_queue << sig
129
+ wake_up
145
130
  end
146
131
  end
147
- end
148
132
 
149
- self.class.lifecycle.run_callbacks(:execute, self) do
150
- until exit? do
151
- run
133
+ raise "Could not register health_check" unless health_check.start
134
+
135
+ signal_processor = Thread.new do
136
+ loop do
137
+ @self_pipe[0].read(1)
138
+ case @signal_queue.pop
139
+ when :INT, :TERM
140
+ @exit = true # get the main thread to bail early if it's waiting for a job
141
+ work_thread.raise(SystemExit) # Force the main thread to bail out of the current job
142
+ cleanup! # we're going to get SIGKILL'd in a moment, so clean up asap
143
+ break
144
+ when :QUIT
145
+ @exit = true
146
+ else
147
+ logger.error "Unknown signal '#{sig}' received"
148
+ end
149
+ end
152
150
  end
153
- end
154
151
 
155
- logger.info "Stopping worker"
156
- rescue => e
157
- Rails.logger.fatal("Child process died: #{e.inspect}") rescue nil
158
- self.class.lifecycle.run_callbacks(:exceptional_exit, self, e) { }
159
- ensure
160
- cleanup!
152
+ self.class.lifecycle.run_callbacks(:execute, self) do
153
+ run until exit?
154
+ end
161
155
 
162
- if signal_processor
163
- signal_processor.kill
164
- signal_processor.join
165
- end
156
+ logger.info "Stopping worker"
157
+ rescue => e
158
+ Rails.logger.fatal("Child process died: #{e.inspect}") rescue nil
159
+ self.class.lifecycle.run_callbacks(:exceptional_exit, self, e) { nil }
160
+ ensure
161
+ cleanup!
166
162
 
167
- @self_pipe&.each(&:close)
168
- @self_pipe = nil
169
- end
163
+ if signal_processor
164
+ signal_processor.kill
165
+ signal_processor.join
166
+ end
170
167
 
171
- def cleanup!
172
- return if cleaned?
168
+ @self_pipe&.each(&:close)
169
+ @self_pipe = nil
170
+ end
173
171
 
174
- health_check.stop
175
- work_queue.close
176
- Delayed::Job.clear_locks!(name)
172
+ def cleanup!
173
+ return if cleaned?
177
174
 
178
- @cleaned = true
179
- end
175
+ health_check.stop
176
+ work_queue.close
177
+ Delayed::Job.clear_locks!(name)
180
178
 
181
- def cleaned?
182
- @cleaned
183
- end
179
+ @cleaned = true
180
+ end
184
181
 
185
- def run
186
- return if exit?
187
- self.class.lifecycle.run_callbacks(:loop, self) do
188
- set_process_name("pop:#{Settings.worker_procname_prefix}#{@queue_name}:#{min_priority || 0}:#{max_priority || 'max'}")
189
- job = self.class.lifecycle.run_callbacks(:pop, self) do
190
- work_queue.get_and_lock_next_available(name, config)
191
- end
182
+ def cleaned?
183
+ @cleaned
184
+ end
192
185
 
193
- if job
194
- configure_for_job(job) do
195
- @job_count += perform(job)
186
+ def run
187
+ return if exit?
196
188
 
197
- if @max_job_count > 0 && @job_count >= @max_job_count
198
- logger.debug "Max job count of #{@max_job_count} exceeded, dying"
199
- @exit = true
200
- end
189
+ self.class.lifecycle.run_callbacks(:loop, self) do
190
+ self.process_name =
191
+ "pop:#{Settings.worker_procname_prefix}#{@queue_name}:#{min_priority || 0}:#{max_priority || 'max'}"
192
+ job = self.class.lifecycle.run_callbacks(:pop, self) do
193
+ work_queue.get_and_lock_next_available(name, config)
194
+ end
201
195
 
202
- if @max_memory_usage > 0
203
- memory = sample_memory
204
- if memory > @max_memory_usage
205
- logger.debug "Memory usage of #{memory} exceeds max of #{@max_memory_usage}, dying"
196
+ if job
197
+ configure_for_job(job) do
198
+ @job_count += perform(job)
199
+
200
+ if @max_job_count.positive? && @job_count >= @max_job_count
201
+ logger.debug "Max job count of #{@max_job_count} exceeded, dying"
206
202
  @exit = true
207
- else
208
- logger.debug "Memory usage: #{memory}"
203
+ end
204
+
205
+ if @max_memory_usage.positive?
206
+ memory = sample_memory
207
+ if memory > @max_memory_usage
208
+ logger.debug "Memory usage of #{memory} exceeds max of #{@max_memory_usage}, dying"
209
+ @exit = true
210
+ else
211
+ logger.debug "Memory usage: #{memory}"
212
+ end
209
213
  end
210
214
  end
215
+ else
216
+ self.process_name =
217
+ "wait:#{Settings.worker_procname_prefix}#{@queue_name}:#{min_priority || 0}:#{max_priority || 'max'}"
218
+ sleep(Settings.sleep_delay + (rand * Settings.sleep_delay_stagger)) unless exit?
211
219
  end
212
- else
213
- set_process_name("wait:#{Settings.worker_procname_prefix}#{@queue_name}:#{min_priority || 0}:#{max_priority || 'max'}")
214
- sleep(Settings.sleep_delay + (rand * Settings.sleep_delay_stagger)) unless exit?
215
220
  end
216
221
  end
217
- end
218
222
 
219
- def perform(job)
220
- begin
221
- count = 1
222
- raise Delayed::Backend::JobExpired, "job expired at #{job.expires_at}" if job.expired?
223
- self.class.lifecycle.run_callbacks(:perform, self, job) do
224
- set_process_name("run:#{Settings.worker_procname_prefix}#{job.id}:#{job.name}")
225
- logger.info("Processing #{log_job(job, :long)}")
226
- runtime = Benchmark.realtime do
227
- if job.batch?
228
- # each job in the batch will have perform called on it, so we don't
229
- # need a timeout around this
230
- count = perform_batch(job)
231
- else
232
- job.invoke_job
223
+ def perform(job)
224
+ begin
225
+ count = 1
226
+ raise Delayed::Backend::JobExpired, "job expired at #{job.expires_at}" if job.expired?
227
+
228
+ self.class.lifecycle.run_callbacks(:perform, self, job) do
229
+ self.process_name = "run:#{Settings.worker_procname_prefix}#{job.id}:#{job.name}"
230
+ logger.info("Processing #{log_job(job, :long)}")
231
+ runtime = Benchmark.realtime do
232
+ if job.batch?
233
+ # each job in the batch will have perform called on it, so we don't
234
+ # need a timeout around this
235
+ count = perform_batch(job)
236
+ else
237
+ job.invoke_job
238
+ end
239
+ job.destroy
233
240
  end
234
- job.destroy
241
+ logger.info("Completed #{log_job(job)} #{format('%.0fms', (runtime * 1000))}")
242
+ end
243
+ rescue ::Delayed::RetriableError => e
244
+ can_retry = job.attempts + 1 < job.inferred_max_attempts
245
+ callback_type = can_retry ? :retry : :error
246
+ self.class.lifecycle.run_callbacks(callback_type, self, job, e) do
247
+ handle_failed_job(job, e)
248
+ end
249
+ rescue SystemExit => e
250
+ # There wasn't really a failure here so no callbacks and whatnot needed,
251
+ # still reschedule the job though.
252
+ job.reschedule(e)
253
+ rescue Exception => e # rubocop:disable Lint/RescueException
254
+ self.class.lifecycle.run_callbacks(:error, self, job, e) do
255
+ handle_failed_job(job, e)
235
256
  end
236
- logger.info("Completed #{log_job(job)} #{"%.0fms" % (runtime * 1000)}")
237
- end
238
- rescue ::Delayed::RetriableError => re
239
- can_retry = job.attempts + 1 < job.inferred_max_attempts
240
- callback_type = can_retry ? :retry : :error
241
- self.class.lifecycle.run_callbacks(callback_type, self, job, re) do
242
- handle_failed_job(job, re)
243
- end
244
- rescue SystemExit => se
245
- # There wasn't really a failure here so no callbacks and whatnot needed,
246
- # still reschedule the job though.
247
- job.reschedule(se)
248
- rescue Exception => e
249
- self.class.lifecycle.run_callbacks(:error, self, job, e) do
250
- handle_failed_job(job, e)
251
257
  end
258
+ count
252
259
  end
253
- count
254
- end
255
260
 
256
- def perform_batch(parent_job)
257
- batch = parent_job.payload_object
258
- if batch.mode == :serial
261
+ def perform_batch(parent_job)
262
+ batch = parent_job.payload_object
263
+ return unless batch.mode == :serial
264
+
259
265
  batch.jobs.each do |job|
260
266
  job.source = parent_job.source
261
267
  job.create_and_lock!(name)
@@ -265,72 +271,72 @@ class Worker
265
271
  end
266
272
  batch.items.size
267
273
  end
268
- end
269
274
 
270
- def handle_failed_job(job, error)
271
- job.last_error = "#{error.message}\n#{error.backtrace.join("\n")}"
272
- logger.error("Failed with #{error.class} [#{error.message}] (#{job.attempts} attempts)")
273
- job.reschedule(error)
274
- end
275
+ def handle_failed_job(job, error)
276
+ job.last_error = "#{error.message}\n#{error.backtrace.join("\n")}"
277
+ logger.error("Failed with #{error.class} [#{error.message}] (#{job.attempts} attempts)")
278
+ job.reschedule(error)
279
+ end
275
280
 
276
- def id
277
- Process.pid
278
- end
281
+ def id
282
+ Process.pid
283
+ end
279
284
 
280
- def log_job(job, format = :short)
281
- case format
282
- when :long
283
- "#{job.full_name} #{ Settings.job_detailed_log_format.call(job) }"
284
- else
285
- job.full_name
285
+ def log_job(job, format = :short)
286
+ case format
287
+ when :long
288
+ "#{job.full_name} #{Settings.job_detailed_log_format.call(job)}"
289
+ else
290
+ job.full_name
291
+ end
286
292
  end
287
- end
288
293
 
289
- # set up the session context information, so that it gets logged with the job log lines
290
- # also set up a unique tmpdir, which will get removed at the end of the job.
291
- def configure_for_job(job)
292
- previous_tmpdir = ENV['TMPDIR']
294
+ # set up the session context information, so that it gets logged with the job log lines
295
+ # also set up a unique tmpdir, which will get removed at the end of the job.
296
+ def configure_for_job(job)
297
+ previous_tmpdir = ENV["TMPDIR"]
293
298
 
294
- self.class.running_job(job) do
295
- dir = Dir.mktmpdir("job-#{job.id}-#{self.name.gsub(/[^\w\.]/, '.')}-")
296
- begin
297
- ENV['TMPDIR'] = dir
298
- yield
299
- ensure
300
- FileUtils.remove_entry(dir, true)
299
+ self.class.running_job(job) do
300
+ dir = Dir.mktmpdir("job-#{job.id}-#{name.gsub(/[^\w.]/, '.')}-")
301
+ begin
302
+ ENV["TMPDIR"] = dir
303
+ yield
304
+ ensure
305
+ FileUtils.remove_entry(dir, true)
306
+ end
301
307
  end
308
+ ensure
309
+ ENV["TMPDIR"] = previous_tmpdir
302
310
  end
303
- ensure
304
- ENV['TMPDIR'] = previous_tmpdir
305
- end
306
-
307
- def health_check
308
- @health_check ||= HealthCheck.build(
309
- type: @health_check_type,
310
- worker_name: name,
311
- config: @health_check_config
312
- )
313
- end
314
311
 
315
- # `sample` reports KB, not B
316
- if File.directory?("/proc")
317
- # linux w/ proc fs
318
- LINUX_PAGE_SIZE = (size = `getconf PAGESIZE`.to_i; size > 0 ? size : 4096)
319
- def sample_memory
320
- s = File.read("/proc/#{Process.pid}/statm").to_i rescue 0
321
- s * LINUX_PAGE_SIZE / 1024
312
+ def health_check
313
+ @health_check ||= HealthCheck.build(
314
+ type: @health_check_type,
315
+ worker_name: name,
316
+ config: @health_check_config
317
+ )
322
318
  end
323
- else
324
- # generic unix solution
325
- def sample_memory
326
- if Rails.env.test?
327
- 0
328
- else
329
- # hmm this is actually resident set size, doesn't include swapped-to-disk
330
- # memory.
331
- `ps -o rss= -p #{Process.pid}`.to_i
319
+
320
+ # `sample` reports KB, not B
321
+ if File.directory?("/proc")
322
+ # linux w/ proc fs
323
+ LINUX_PAGE_SIZE = (size = `getconf PAGESIZE`.to_i
324
+ size.positive? ? size : 4096)
325
+ def sample_memory
326
+ s = File.read("/proc/#{Process.pid}/statm").to_i rescue 0
327
+ s * LINUX_PAGE_SIZE / 1024
328
+ end
329
+ else
330
+ # generic unix solution
331
+ def sample_memory
332
+ if Rails.env.test?
333
+ 0
334
+ else
335
+ # hmm this is actually resident set size, doesn't include swapped-to-disk
336
+ # memory.
337
+ `ps -o rss= -p #{Process.pid}`.to_i
338
+ end
332
339
  end
333
340
  end
334
341
  end
335
342
  end
336
- end