inst-jobs 2.0.0 → 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/db/migrate/20101216224513_create_delayed_jobs.rb +9 -7
- data/db/migrate/20110531144916_cleanup_delayed_jobs_indexes.rb +8 -13
- data/db/migrate/20110610213249_optimize_delayed_jobs.rb +8 -8
- data/db/migrate/20110831210257_add_delayed_jobs_next_in_strand.rb +25 -25
- data/db/migrate/20120510004759_delayed_jobs_delete_trigger_lock_for_update.rb +4 -8
- data/db/migrate/20120531150712_drop_psql_jobs_pop_fn.rb +1 -3
- data/db/migrate/20120607164022_delayed_jobs_use_advisory_locks.rb +11 -15
- data/db/migrate/20120607181141_index_jobs_on_locked_by.rb +1 -1
- data/db/migrate/20120608191051_add_jobs_run_at_index.rb +2 -2
- data/db/migrate/20120927184213_change_delayed_jobs_handler_to_text.rb +1 -1
- data/db/migrate/20140505215510_copy_failed_jobs_original_id.rb +2 -3
- data/db/migrate/20150807133223_add_max_concurrent_to_jobs.rb +9 -13
- data/db/migrate/20151210162949_improve_max_concurrent.rb +4 -8
- data/db/migrate/20161206323555_add_back_default_string_limits_jobs.rb +3 -2
- data/db/migrate/20181217155351_speed_up_max_concurrent_triggers.rb +13 -17
- data/db/migrate/20200330230722_add_id_to_get_delayed_jobs_index.rb +8 -8
- data/db/migrate/20200824222232_speed_up_max_concurrent_delete_trigger.rb +72 -77
- data/db/migrate/20200825011002_add_strand_order_override.rb +93 -97
- data/db/migrate/20210809145804_add_n_strand_index.rb +12 -0
- data/db/migrate/20210812210128_add_singleton_column.rb +200 -0
- data/db/migrate/20210917232626_add_delete_conflicting_singletons_before_unlock_trigger.rb +27 -0
- data/db/migrate/20210928174754_fix_singleton_condition_in_before_insert.rb +56 -0
- data/db/migrate/20210929204903_update_conflicting_singleton_function_to_use_index.rb +27 -0
- data/db/migrate/20211101190934_update_after_delete_trigger_for_singleton_index.rb +137 -0
- data/db/migrate/20211207094200_update_after_delete_trigger_for_singleton_transition_cases.rb +171 -0
- data/db/migrate/20211220112800_fix_singleton_race_condition_insert.rb +59 -0
- data/db/migrate/20211220113000_fix_singleton_race_condition_delete.rb +207 -0
- data/db/migrate/20220127091200_fix_singleton_unique_constraint.rb +31 -0
- data/db/migrate/20220128084800_update_insert_trigger_for_singleton_unique_constraint_change.rb +60 -0
- data/db/migrate/20220128084900_update_delete_trigger_for_singleton_unique_constraint_change.rb +209 -0
- data/db/migrate/20220203063200_remove_old_singleton_index.rb +31 -0
- data/db/migrate/20220328152900_add_failed_jobs_indicies.rb +12 -0
- data/exe/inst_jobs +3 -2
- data/lib/delayed/backend/active_record.rb +226 -168
- data/lib/delayed/backend/base.rb +119 -72
- data/lib/delayed/batch.rb +11 -9
- data/lib/delayed/cli.rb +98 -84
- data/lib/delayed/core_ext/kernel.rb +4 -2
- data/lib/delayed/daemon.rb +70 -74
- data/lib/delayed/job_tracking.rb +26 -25
- data/lib/delayed/lifecycle.rb +28 -23
- data/lib/delayed/log_tailer.rb +17 -17
- data/lib/delayed/logging.rb +13 -16
- data/lib/delayed/message_sending.rb +43 -52
- data/lib/delayed/performable_method.rb +6 -8
- data/lib/delayed/periodic.rb +72 -68
- data/lib/delayed/plugin.rb +2 -4
- data/lib/delayed/pool.rb +205 -168
- data/lib/delayed/rails_reloader_plugin.rb +30 -0
- data/lib/delayed/server/helpers.rb +6 -6
- data/lib/delayed/server.rb +51 -54
- data/lib/delayed/settings.rb +96 -81
- data/lib/delayed/testing.rb +21 -22
- data/lib/delayed/version.rb +1 -1
- data/lib/delayed/work_queue/in_process.rb +21 -17
- data/lib/delayed/work_queue/parent_process/client.rb +55 -53
- data/lib/delayed/work_queue/parent_process/server.rb +245 -207
- data/lib/delayed/work_queue/parent_process.rb +52 -53
- data/lib/delayed/worker/consul_health_check.rb +32 -33
- data/lib/delayed/worker/health_check.rb +35 -27
- data/lib/delayed/worker/null_health_check.rb +3 -1
- data/lib/delayed/worker/process_helper.rb +11 -12
- data/lib/delayed/worker.rb +257 -244
- data/lib/delayed/yaml_extensions.rb +12 -10
- data/lib/delayed_job.rb +37 -37
- data/lib/inst-jobs.rb +1 -1
- data/spec/active_record_job_spec.rb +152 -139
- data/spec/delayed/cli_spec.rb +7 -7
- data/spec/delayed/daemon_spec.rb +10 -9
- data/spec/delayed/message_sending_spec.rb +16 -9
- data/spec/delayed/periodic_spec.rb +14 -21
- data/spec/delayed/server_spec.rb +38 -38
- data/spec/delayed/settings_spec.rb +26 -25
- data/spec/delayed/work_queue/in_process_spec.rb +8 -9
- data/spec/delayed/work_queue/parent_process/client_spec.rb +17 -12
- data/spec/delayed/work_queue/parent_process/server_spec.rb +118 -42
- data/spec/delayed/work_queue/parent_process_spec.rb +21 -23
- data/spec/delayed/worker/consul_health_check_spec.rb +37 -50
- data/spec/delayed/worker/health_check_spec.rb +60 -52
- data/spec/delayed/worker_spec.rb +53 -24
- data/spec/sample_jobs.rb +45 -15
- data/spec/shared/delayed_batch.rb +74 -67
- data/spec/shared/delayed_method.rb +143 -102
- data/spec/shared/performable_method.rb +39 -38
- data/spec/shared/shared_backend.rb +801 -440
- data/spec/shared/testing.rb +14 -14
- data/spec/shared/worker.rb +157 -149
- data/spec/shared_jobs_specs.rb +13 -13
- data/spec/spec_helper.rb +57 -56
- metadata +183 -103
- data/lib/delayed/backend/redis/bulk_update.lua +0 -50
- data/lib/delayed/backend/redis/destroy_job.lua +0 -2
- data/lib/delayed/backend/redis/enqueue.lua +0 -29
- data/lib/delayed/backend/redis/fail_job.lua +0 -5
- data/lib/delayed/backend/redis/find_available.lua +0 -3
- data/lib/delayed/backend/redis/functions.rb +0 -59
- data/lib/delayed/backend/redis/get_and_lock_next_available.lua +0 -17
- data/lib/delayed/backend/redis/includes/jobs_common.lua +0 -203
- data/lib/delayed/backend/redis/job.rb +0 -535
- data/lib/delayed/backend/redis/set_running.lua +0 -5
- data/lib/delayed/backend/redis/tickle_strand.lua +0 -2
- data/spec/gemfiles/42.gemfile +0 -7
- data/spec/gemfiles/50.gemfile +0 -7
- data/spec/gemfiles/51.gemfile +0 -7
- data/spec/gemfiles/52.gemfile +0 -7
- data/spec/gemfiles/60.gemfile +0 -7
- data/spec/redis_job_spec.rb +0 -148
| @@ -1,77 +1,75 @@ | |
| 1 1 | 
             
            # frozen_string_literal: true
         | 
| 2 2 |  | 
| 3 | 
            -
            require_relative  | 
| 4 | 
            -
            require_relative  | 
| 5 | 
            -
            require  | 
| 3 | 
            +
            require_relative "health_check"
         | 
| 4 | 
            +
            require_relative "process_helper"
         | 
| 5 | 
            +
            require "socket"
         | 
| 6 6 |  | 
| 7 7 | 
             
            module Delayed
         | 
| 8 8 | 
             
              class Worker
         | 
| 9 9 | 
             
                class ConsulHealthCheck < HealthCheck
         | 
| 10 10 | 
             
                  self.type_name = :consul
         | 
| 11 11 |  | 
| 12 | 
            -
                  CONSUL_CONFIG_KEYS = %w | 
| 13 | 
            -
                  DEFAULT_SERVICE_NAME =  | 
| 14 | 
            -
                  attr_reader : | 
| 12 | 
            +
                  CONSUL_CONFIG_KEYS = %w[url acl_token].map(&:freeze).freeze
         | 
| 13 | 
            +
                  DEFAULT_SERVICE_NAME = "inst-jobs_worker"
         | 
| 14 | 
            +
                  attr_reader :service_client, :health_client
         | 
| 15 15 |  | 
| 16 16 | 
             
                  def initialize(*, **)
         | 
| 17 17 | 
             
                    super
         | 
| 18 18 | 
             
                    # Because we don't want the consul client to be a hard dependency we're
         | 
| 19 19 | 
             
                    # only requiring it once it's absolutely needed
         | 
| 20 | 
            -
                    require  | 
| 20 | 
            +
                    require "diplomat"
         | 
| 21 21 |  | 
| 22 22 | 
             
                    if config.keys.any? { |k| CONSUL_CONFIG_KEYS.include?(k) }
         | 
| 23 | 
            -
                      consul_config =  | 
| 23 | 
            +
                      consul_config = Diplomat::Configuration.new.tap do |conf|
         | 
| 24 24 | 
             
                        CONSUL_CONFIG_KEYS.each do |key|
         | 
| 25 25 | 
             
                          conf.send("#{key}=", config[key]) if config[key]
         | 
| 26 26 | 
             
                        end
         | 
| 27 27 | 
             
                      end
         | 
| 28 | 
            -
                      @ | 
| 29 | 
            -
                      @ | 
| 28 | 
            +
                      @service_client = Diplomat::Service.new(configuration: consul_config)
         | 
| 29 | 
            +
                      @health_client = Diplomat::Health.new(configuration: consul_config)
         | 
| 30 30 | 
             
                    else
         | 
| 31 | 
            -
                      @ | 
| 32 | 
            -
                      @ | 
| 31 | 
            +
                      @service_client = Diplomat::Service.new
         | 
| 32 | 
            +
                      @health_client = Diplomat::Health.new
         | 
| 33 33 | 
             
                    end
         | 
| 34 34 | 
             
                  end
         | 
| 35 35 |  | 
| 36 36 | 
             
                  def start
         | 
| 37 | 
            -
                     | 
| 38 | 
            -
             | 
| 39 | 
            -
             | 
| 40 | 
            -
             | 
| 41 | 
            -
             | 
| 42 | 
            -
                    response = @agent_client.register_service(service)
         | 
| 43 | 
            -
                    response.ok?
         | 
| 37 | 
            +
                    @service_client.register({
         | 
| 38 | 
            +
                                               id: worker_name,
         | 
| 39 | 
            +
                                               name: service_name,
         | 
| 40 | 
            +
                                               check: check_attributes
         | 
| 41 | 
            +
                                             })
         | 
| 44 42 | 
             
                  end
         | 
| 45 43 |  | 
| 46 44 | 
             
                  def stop
         | 
| 47 | 
            -
                     | 
| 48 | 
            -
                    response.ok? || response.not_found?
         | 
| 45 | 
            +
                    @service_client.deregister(worker_name)
         | 
| 49 46 | 
             
                  end
         | 
| 50 47 |  | 
| 51 48 | 
             
                  def live_workers
         | 
| 52 | 
            -
                     | 
| 53 | 
            -
                     | 
| 54 | 
            -
             | 
| 55 | 
            -
             | 
| 56 | 
            -
             | 
| 57 | 
            -
                     | 
| 49 | 
            +
                    # Filter out critical workers (probably nodes failing their serf health check)
         | 
| 50 | 
            +
                    live_nodes = @health_client.service(service_name, {
         | 
| 51 | 
            +
                                                          filter: "not Checks.Status == critical"
         | 
| 52 | 
            +
                                                        })
         | 
| 53 | 
            +
             | 
| 54 | 
            +
                    live_nodes.map { |n| n.Service["ID"] }
         | 
| 58 55 | 
             
                  end
         | 
| 59 56 |  | 
| 60 57 | 
             
                  private
         | 
| 61 58 |  | 
| 62 59 | 
             
                  def check_attributes
         | 
| 63 60 | 
             
                    {
         | 
| 64 | 
            -
                      args: [ | 
| 65 | 
            -
                      status:  | 
| 66 | 
            -
                      interval: @config.fetch(:check_interval,  | 
| 67 | 
            -
                      deregister_critical_service_after: @config.fetch(:deregister_service_delay,  | 
| 61 | 
            +
                      args: ["bash", "-c", check_script],
         | 
| 62 | 
            +
                      status: "passing",
         | 
| 63 | 
            +
                      interval: @config.fetch(:check_interval, "5m"),
         | 
| 64 | 
            +
                      deregister_critical_service_after: @config.fetch(:deregister_service_delay, "10m")
         | 
| 68 65 | 
             
                    }.tap do |h|
         | 
| 69 | 
            -
                      h[:docker_container_id] = docker_container_id if @config[ | 
| 66 | 
            +
                      h[:docker_container_id] = docker_container_id if @config["docker"]
         | 
| 70 67 | 
             
                    end
         | 
| 71 68 | 
             
                  end
         | 
| 72 69 |  | 
| 73 70 | 
             
                  def check_script
         | 
| 74 71 | 
             
                    return @check_script if @check_script
         | 
| 72 | 
            +
             | 
| 75 73 | 
             
                    mtime = ProcessHelper.mtime(Process.pid)
         | 
| 76 74 | 
             
                    @check_script = ProcessHelper.check_script(Process.pid, mtime)
         | 
| 77 75 | 
             
                  end
         | 
| @@ -80,12 +78,13 @@ module Delayed | |
| 80 78 | 
             
                  # cgroups for part of its magic and also uses the container id as the cgroup name
         | 
| 81 79 | 
             
                  def docker_container_id
         | 
| 82 80 | 
             
                    return @docker_container_id if @docker_container_id
         | 
| 81 | 
            +
             | 
| 83 82 | 
             
                    content = File.read("/proc/1/cgroup").split("\n")
         | 
| 84 83 | 
             
                    @docker_container_id = content.last.split("/").last
         | 
| 85 84 | 
             
                  end
         | 
| 86 85 |  | 
| 87 86 | 
             
                  def service_name
         | 
| 88 | 
            -
                    @service_name ||= @config.fetch( | 
| 87 | 
            +
                    @service_name ||= @config.fetch("service_name", DEFAULT_SERVICE_NAME)
         | 
| 89 88 | 
             
                  end
         | 
| 90 89 | 
             
                end
         | 
| 91 90 | 
             
              end
         | 
| @@ -11,57 +11,65 @@ module Delayed | |
| 11 11 |  | 
| 12 12 | 
             
                    def inherited(subclass)
         | 
| 13 13 | 
             
                      @subclasses << subclass
         | 
| 14 | 
            +
                      super
         | 
| 14 15 | 
             
                    end
         | 
| 15 16 |  | 
| 16 17 | 
             
                    def build(type:, worker_name:, config: {})
         | 
| 17 18 | 
             
                      type = type.to_sym
         | 
| 18 19 | 
             
                      klass = @subclasses.find { |sc| sc.type_name == type }
         | 
| 19 20 | 
             
                      raise ArgumentError, "Unable to build a HealthCheck for type #{type}" unless klass
         | 
| 21 | 
            +
             | 
| 20 22 | 
             
                      klass.new(worker_name: worker_name, config: config)
         | 
| 21 23 | 
             
                    end
         | 
| 22 24 |  | 
| 23 25 | 
             
                    def reschedule_abandoned_jobs
         | 
| 24 26 | 
             
                      return if Settings.worker_health_check_type == :none
         | 
| 27 | 
            +
             | 
| 25 28 | 
             
                      Delayed::Job.transaction do
         | 
| 26 | 
            -
                        # this  | 
| 29 | 
            +
                        # this action is a special case, and SHOULD NOT be a periodic job
         | 
| 27 30 | 
             
                        # because if it gets wiped out suddenly during execution
         | 
| 28 | 
            -
                        # it can't go clean up  | 
| 29 | 
            -
                        # we  | 
| 30 | 
            -
                        #  | 
| 31 | 
            -
                        #  | 
| 32 | 
            -
                         | 
| 33 | 
            -
                         | 
| 31 | 
            +
                        # it can't go clean up its abandoned self.  Therefore,
         | 
| 32 | 
            +
                        # we expect it to get run from it's own process forked from the job pool
         | 
| 33 | 
            +
                        # and we try to get an advisory lock when it runs.  If we succeed,
         | 
| 34 | 
            +
                        # no other worker is trying to do this right now (and if we abandon the
         | 
| 35 | 
            +
                        # operation, the transaction will end, releasing the advisory lock).
         | 
| 36 | 
            +
                        result = Delayed::Job.attempt_advisory_lock("Delayed::Worker::HealthCheck#reschedule_abandoned_jobs")
         | 
| 37 | 
            +
                        next unless result
         | 
| 38 | 
            +
             | 
| 39 | 
            +
                        horizon = 5.minutes.ago
         | 
| 40 | 
            +
             | 
| 34 41 | 
             
                        checker = Worker::HealthCheck.build(
         | 
| 35 42 | 
             
                          type: Settings.worker_health_check_type,
         | 
| 36 43 | 
             
                          config: Settings.worker_health_check_config,
         | 
| 37 | 
            -
                          worker_name:  | 
| 44 | 
            +
                          worker_name: "cleanup-crew"
         | 
| 38 45 | 
             
                        )
         | 
| 39 46 | 
             
                        live_workers = checker.live_workers
         | 
| 40 47 |  | 
| 41 | 
            -
                         | 
| 42 | 
            -
                           | 
| 43 | 
            -
             | 
| 44 | 
            -
             | 
| 45 | 
            -
             | 
| 46 | 
            -
             | 
| 47 | 
            -
             | 
| 48 | 
            -
             | 
| 49 | 
            -
             | 
| 50 | 
            -
             | 
| 51 | 
            -
             | 
| 52 | 
            -
             | 
| 53 | 
            -
                               | 
| 48 | 
            +
                        loop do
         | 
| 49 | 
            +
                          batch = Delayed::Job.running_jobs
         | 
| 50 | 
            +
                                              .where("locked_at<?", horizon)
         | 
| 51 | 
            +
                                              .where.not("locked_by LIKE 'prefetch:%'")
         | 
| 52 | 
            +
                                              .where.not(locked_by: live_workers)
         | 
| 53 | 
            +
                                              .limit(100)
         | 
| 54 | 
            +
                                              .to_a
         | 
| 55 | 
            +
                          break if batch.empty?
         | 
| 56 | 
            +
             | 
| 57 | 
            +
                          batch.each do |job|
         | 
| 58 | 
            +
                            Delayed::Job.transaction do
         | 
| 59 | 
            +
                              # double check that the job is still there. locked_by will immediately be reset
         | 
| 60 | 
            +
                              # to nil in this transaction by Job#reschedule
         | 
| 61 | 
            +
                              next unless Delayed::Job.where(id: job,
         | 
| 62 | 
            +
                                                             locked_by: job.locked_by)
         | 
| 63 | 
            +
                                                      .update_all(locked_by: "abandoned job cleanup") == 1
         | 
| 64 | 
            +
             | 
| 65 | 
            +
                              job.reschedule
         | 
| 54 66 | 
             
                            end
         | 
| 55 67 | 
             
                          end
         | 
| 68 | 
            +
                        rescue
         | 
| 69 | 
            +
                          ::Rails.logger.error "Failure rescheduling abandoned job #{job.id} #{$!.inspect}"
         | 
| 56 70 | 
             
                        end
         | 
| 57 71 | 
             
                      end
         | 
| 58 72 | 
             
                    end
         | 
| 59 | 
            -
             | 
| 60 | 
            -
                    def attempt_advisory_lock
         | 
| 61 | 
            -
                      lock_name = "Delayed::Worker::HealthCheck#reschedule_abandoned_jobs"
         | 
| 62 | 
            -
                      output = ActiveRecord::Base.connection.execute("SELECT pg_try_advisory_xact_lock(half_md5_as_bigint('#{lock_name}'));")
         | 
| 63 | 
            -
                      output.getvalue(0, 0)
         | 
| 64 | 
            -
                    end
         | 
| 65 73 | 
             
                  end
         | 
| 66 74 |  | 
| 67 75 | 
             
                  attr_accessor :config, :worker_name
         | 
| @@ -3,14 +3,13 @@ | |
| 3 3 | 
             
            module Delayed
         | 
| 4 4 | 
             
              class Worker
         | 
| 5 5 | 
             
                module ProcessHelper
         | 
| 6 | 
            -
             | 
| 7 | 
            -
                   | 
| 8 | 
            -
                   | 
| 9 | 
            -
                  STAT = RUBY_PLATFORM =~ /darwin/ ? STAT_MAC : STAT_LINUX
         | 
| 6 | 
            +
                  STAT_LINUX = "stat --format=%%Y /proc/$WORKER_PID"
         | 
| 7 | 
            +
                  STAT_MAC = "ps -o lstart -p $WORKER_PID"
         | 
| 8 | 
            +
                  STAT = RUBY_PLATFORM.include?("darwin") ? STAT_MAC : STAT_LINUX
         | 
| 10 9 | 
             
                  ALIVE_CHECK_LINUX = '[ -d "/proc/$WORKER_PID" ]'
         | 
| 11 | 
            -
                  ALIVE_CHECK_MAC =  | 
| 12 | 
            -
                  ALIVE_CHECK = RUBY_PLATFORM | 
| 13 | 
            -
                  SCRIPT_TEMPLATE = <<-BASH | 
| 10 | 
            +
                  ALIVE_CHECK_MAC = "ps -p $WORKER_PID > /dev/null"
         | 
| 11 | 
            +
                  ALIVE_CHECK = RUBY_PLATFORM.include?("darwin") ? ALIVE_CHECK_MAC : ALIVE_CHECK_LINUX
         | 
| 12 | 
            +
                  SCRIPT_TEMPLATE = <<-BASH
         | 
| 14 13 | 
             
                    WORKER_PID="%<pid>d" # an example, filled from ruby when the check is created
         | 
| 15 14 | 
             
                    ORIGINAL_MTIME="%<mtime>s" # an example, filled from ruby when the check is created
         | 
| 16 15 |  | 
| @@ -30,20 +29,20 @@ module Delayed | |
| 30 29 | 
             
                  BASH
         | 
| 31 30 |  | 
| 32 31 | 
             
                  def self.mtime(pid)
         | 
| 33 | 
            -
                    if RUBY_PLATFORM | 
| 34 | 
            -
                      `ps -o lstart -p #{pid}`.sub(/\n$/,  | 
| 32 | 
            +
                    if RUBY_PLATFORM.include?("darwin")
         | 
| 33 | 
            +
                      `ps -o lstart -p #{pid}`.sub(/\n$/, "").presence
         | 
| 35 34 | 
             
                    else
         | 
| 36 35 | 
             
                      File::Stat.new("/proc/#{pid}").mtime.to_i.to_s rescue nil
         | 
| 37 36 | 
             
                    end
         | 
| 38 37 | 
             
                  end
         | 
| 39 38 |  | 
| 40 39 | 
             
                  def self.check_script(pid, mtime)
         | 
| 41 | 
            -
                     | 
| 40 | 
            +
                    format(SCRIPT_TEMPLATE, { pid: pid, mtime: mtime })
         | 
| 42 41 | 
             
                  end
         | 
| 43 42 |  | 
| 44 43 | 
             
                  def self.process_is_still_running?(pid, mtime)
         | 
| 45 | 
            -
                    system( | 
| 44 | 
            +
                    system(check_script(pid, mtime))
         | 
| 46 45 | 
             
                  end
         | 
| 47 46 | 
             
                end
         | 
| 48 47 | 
             
              end
         | 
| 49 | 
            -
            end
         | 
| 48 | 
            +
            end
         |