gitlab-sidekiq-fetcher 0.3.0 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,73 @@
1
+ require 'spec_helper'
2
+ require 'fetch_shared_examples'
3
+ require 'sidekiq/base_reliable_fetch'
4
+ require 'sidekiq/reliable_fetch'
5
+ require 'sidekiq/semi_reliable_fetch'
6
+
7
+ describe Sidekiq::BaseReliableFetch do
8
+ before { Sidekiq.redis(&:flushdb) }
9
+
10
+ describe 'UnitOfWork' do
11
+ let(:fetcher) { Sidekiq::ReliableFetch.new(queues: ['foo']) }
12
+
13
+ describe '#requeue' do
14
+ it 'requeues job' do
15
+ Sidekiq.redis { |conn| conn.rpush('queue:foo', 'msg') }
16
+
17
+ uow = fetcher.retrieve_work
18
+
19
+ uow.requeue
20
+
21
+ expect(Sidekiq::Queue.new('foo').size).to eq 1
22
+ expect(working_queue_size('foo')).to eq 0
23
+ end
24
+ end
25
+
26
+ describe '#acknowledge' do
27
+ it 'acknowledges job' do
28
+ Sidekiq.redis { |conn| conn.rpush('queue:foo', 'msg') }
29
+
30
+ uow = fetcher.retrieve_work
31
+
32
+ expect { uow.acknowledge }
33
+ .to change { working_queue_size('foo') }.by(-1)
34
+
35
+ expect(Sidekiq::Queue.new('foo').size).to eq 0
36
+ end
37
+ end
38
+ end
39
+
40
+ describe '.bulk_requeue' do
41
+ it 'requeues the bulk' do
42
+ queue1 = Sidekiq::Queue.new('foo')
43
+ queue2 = Sidekiq::Queue.new('bar')
44
+
45
+ expect(queue1.size).to eq 0
46
+ expect(queue2.size).to eq 0
47
+
48
+ uow = described_class::UnitOfWork
49
+ jobs = [ uow.new('queue:foo', 'bob'), uow.new('queue:foo', 'bar'), uow.new('queue:bar', 'widget') ]
50
+ described_class.bulk_requeue(jobs, queues: [])
51
+
52
+ expect(queue1.size).to eq 2
53
+ expect(queue2.size).to eq 1
54
+ end
55
+ end
56
+
57
+ it 'sets heartbeat' do
58
+ config = double(:sidekiq_config, options: {})
59
+
60
+ heartbeat_thread = described_class.setup_reliable_fetch!(config)
61
+
62
+ Sidekiq.redis do |conn|
63
+ sleep 0.2 # Give the time to heartbeat thread to make a loop
64
+
65
+ heartbeat_key = described_class.heartbeat_key(Socket.gethostname, ::Process.pid)
66
+ heartbeat = conn.get(heartbeat_key)
67
+
68
+ expect(heartbeat).not_to be_nil
69
+ end
70
+
71
+ heartbeat_thread.kill
72
+ end
73
+ end
@@ -0,0 +1,118 @@
1
+ shared_examples 'a Sidekiq fetcher' do
2
+ let(:queues) { ['assigned'] }
3
+
4
+ before { Sidekiq.redis(&:flushdb) }
5
+
6
+ describe '#retrieve_work' do
7
+ let(:fetcher) { described_class.new(queues: ['assigned']) }
8
+
9
+ it 'retrieves the job and puts it to working queue' do
10
+ Sidekiq.redis { |conn| conn.rpush('queue:assigned', 'msg') }
11
+
12
+ uow = fetcher.retrieve_work
13
+
14
+ expect(working_queue_size('assigned')).to eq 1
15
+ expect(uow.queue_name).to eq 'assigned'
16
+ expect(uow.job).to eq 'msg'
17
+ expect(Sidekiq::Queue.new('assigned').size).to eq 0
18
+ end
19
+
20
+ it 'does not retrieve a job from foreign queue' do
21
+ Sidekiq.redis { |conn| conn.rpush('queue:not_assigned', 'msg') }
22
+
23
+ expect(fetcher.retrieve_work).to be_nil
24
+ end
25
+
26
+ it 'requeues jobs from dead working queue' do
27
+ Sidekiq.redis do |conn|
28
+ conn.rpush(other_process_working_queue_name('assigned'), 'msg')
29
+ end
30
+
31
+ uow = fetcher.retrieve_work
32
+
33
+ expect(uow.job).to eq 'msg'
34
+
35
+ Sidekiq.redis do |conn|
36
+ expect(conn.llen(other_process_working_queue_name('assigned'))).to eq 0
37
+ end
38
+ end
39
+
40
+ it 'does not requeue jobs from live working queue' do
41
+ working_queue = live_other_process_working_queue_name('assigned')
42
+
43
+ Sidekiq.redis do |conn|
44
+ conn.rpush(working_queue, 'msg')
45
+ end
46
+
47
+ uow = fetcher.retrieve_work
48
+
49
+ expect(uow).to be_nil
50
+
51
+ Sidekiq.redis do |conn|
52
+ expect(conn.llen(working_queue)).to eq 1
53
+ end
54
+ end
55
+
56
+ it 'does not clean up orphaned jobs more than once per cleanup interval' do
57
+ Sidekiq.redis = Sidekiq::RedisConnection.create(url: REDIS_URL, size: 10)
58
+
59
+ expect_any_instance_of(described_class)
60
+ .to receive(:clean_working_queues!).once
61
+
62
+ threads = 10.times.map do
63
+ Thread.new do
64
+ described_class.new(queues: ['assigned']).retrieve_work
65
+ end
66
+ end
67
+
68
+ threads.map(&:join)
69
+ end
70
+
71
+ it 'retrieves by order when strictly order is enabled' do
72
+ fetcher = described_class.new(strict: true, queues: ['first', 'second'])
73
+
74
+ Sidekiq.redis do |conn|
75
+ conn.rpush('queue:first', ['msg3', 'msg2', 'msg1'])
76
+ conn.rpush('queue:second', 'msg4')
77
+ end
78
+
79
+ jobs = (1..4).map { fetcher.retrieve_work.job }
80
+
81
+ expect(jobs).to eq ['msg1', 'msg2', 'msg3', 'msg4']
82
+ end
83
+
84
+ it 'does not starve any queue when queues are not strictly ordered' do
85
+ fetcher = described_class.new(queues: ['first', 'second'])
86
+
87
+ Sidekiq.redis do |conn|
88
+ conn.rpush('queue:first', (1..200).map { |i| "msg#{i}" })
89
+ conn.rpush('queue:second', 'this_job_should_not_stuck')
90
+ end
91
+
92
+ jobs = (1..100).map { fetcher.retrieve_work.job }
93
+
94
+ expect(jobs).to include 'this_job_should_not_stuck'
95
+ end
96
+ end
97
+ end
98
+
99
+ def working_queue_size(queue_name)
100
+ Sidekiq.redis do |c|
101
+ c.llen(Sidekiq::BaseReliableFetch.working_queue_name("queue:#{queue_name}"))
102
+ end
103
+ end
104
+
105
+ def other_process_working_queue_name(queue)
106
+ "#{Sidekiq::BaseReliableFetch::WORKING_QUEUE_PREFIX}:queue:#{queue}:#{Socket.gethostname}:#{::Process.pid + 1}"
107
+ end
108
+
109
+ def live_other_process_working_queue_name(queue)
110
+ pid = ::Process.pid + 1
111
+ hostname = Socket.gethostname
112
+
113
+ Sidekiq.redis do |conn|
114
+ conn.set(Sidekiq::BaseReliableFetch.heartbeat_key(hostname, pid), 1)
115
+ end
116
+
117
+ "#{Sidekiq::BaseReliableFetch::WORKING_QUEUE_PREFIX}:queue:#{queue}:#{hostname}:#{pid}"
118
+ end
@@ -0,0 +1,7 @@
1
+ require 'spec_helper'
2
+ require 'fetch_shared_examples'
3
+ require 'sidekiq/reliable_fetch'
4
+
5
+ describe Sidekiq::ReliableFetch do
6
+ include_examples 'a Sidekiq fetcher'
7
+ end
@@ -0,0 +1,7 @@
1
+ require 'spec_helper'
2
+ require 'fetch_shared_examples'
3
+ require 'sidekiq/semi_reliable_fetch'
4
+
5
+ describe Sidekiq::SemiReliableFetch do
6
+ include_examples 'a Sidekiq fetcher'
7
+ end
@@ -0,0 +1,115 @@
1
+ require 'sidekiq'
2
+ require 'sidekiq/util'
3
+ require 'sidekiq/api'
4
+ require 'pry'
5
+ require 'simplecov'
6
+
7
+ SimpleCov.start
8
+
9
+ REDIS_URL = ENV['REDIS_URL'] || 'redis://localhost:6379/10'
10
+
11
+ Sidekiq.configure_client do |config|
12
+ config.redis = { url: REDIS_URL }
13
+ end
14
+
15
+ Sidekiq.logger.level = Logger::ERROR
16
+ # This file was generated by the `rspec --init` command. Conventionally, all
17
+ # specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
18
+ # The generated `.rspec` file contains `--require spec_helper` which will cause
19
+ # this file to always be loaded, without a need to explicitly require it in any
20
+ # files.
21
+ #
22
+ # Given that it is always loaded, you are encouraged to keep this file as
23
+ # light-weight as possible. Requiring heavyweight dependencies from this file
24
+ # will add to the boot time of your test suite on EVERY test run, even for an
25
+ # individual file that may not need all of that loaded. Instead, consider making
26
+ # a separate helper file that requires the additional dependencies and performs
27
+ # the additional setup, and require it from the spec files that actually need
28
+ # it.
29
+ #
30
+ # See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
31
+ RSpec.configure do |config|
32
+ # rspec-expectations config goes here. You can use an alternate
33
+ # assertion/expectation library such as wrong or the stdlib/minitest
34
+ # assertions if you prefer.
35
+ config.expect_with :rspec do |expectations|
36
+ # This option will default to `true` in RSpec 4. It makes the `description`
37
+ # and `failure_message` of custom matchers include text for helper methods
38
+ # defined using `chain`, e.g.:
39
+ # be_bigger_than(2).and_smaller_than(4).description
40
+ # # => "be bigger than 2 and smaller than 4"
41
+ # ...rather than:
42
+ # # => "be bigger than 2"
43
+ expectations.include_chain_clauses_in_custom_matcher_descriptions = true
44
+ end
45
+
46
+ # rspec-mocks config goes here. You can use an alternate test double
47
+ # library (such as bogus or mocha) by changing the `mock_with` option here.
48
+ config.mock_with :rspec do |mocks|
49
+ # Prevents you from mocking or stubbing a method that does not exist on
50
+ # a real object. This is generally recommended, and will default to
51
+ # `true` in RSpec 4.
52
+ mocks.verify_partial_doubles = true
53
+ end
54
+
55
+ # This option will default to `:apply_to_host_groups` in RSpec 4 (and will
56
+ # have no way to turn it off -- the option exists only for backwards
57
+ # compatibility in RSpec 3). It causes shared context metadata to be
58
+ # inherited by the metadata hash of host groups and examples, rather than
59
+ # triggering implicit auto-inclusion in groups with matching metadata.
60
+ config.shared_context_metadata_behavior = :apply_to_host_groups
61
+
62
+ # The settings below are suggested to provide a good initial experience
63
+ # with RSpec, but feel free to customize to your heart's content.
64
+ =begin
65
+ # This allows you to limit a spec run to individual examples or groups
66
+ # you care about by tagging them with `:focus` metadata. When nothing
67
+ # is tagged with `:focus`, all examples get run. RSpec also provides
68
+ # aliases for `it`, `describe`, and `context` that include `:focus`
69
+ # metadata: `fit`, `fdescribe` and `fcontext`, respectively.
70
+ config.filter_run_when_matching :focus
71
+
72
+ # Allows RSpec to persist some state between runs in order to support
73
+ # the `--only-failures` and `--next-failure` CLI options. We recommend
74
+ # you configure your source control system to ignore this file.
75
+ config.example_status_persistence_file_path = "spec/examples.txt"
76
+
77
+ # Limits the available syntax to the non-monkey patched syntax that is
78
+ # recommended. For more details, see:
79
+ # - http://rspec.info/blog/2012/06/rspecs-new-expectation-syntax/
80
+ # - http://www.teaisaweso.me/blog/2013/05/27/rspecs-new-message-expectation-syntax/
81
+ # - http://rspec.info/blog/2014/05/notable-changes-in-rspec-3/#zero-monkey-patching-mode
82
+ config.disable_monkey_patching!
83
+
84
+ # This setting enables warnings. It's recommended, but in some cases may
85
+ # be too noisy due to issues in dependencies.
86
+ config.warnings = true
87
+
88
+ # Many RSpec users commonly either run the entire suite or an individual
89
+ # file, and it's useful to allow more verbose output when running an
90
+ # individual spec file.
91
+ if config.files_to_run.one?
92
+ # Use the documentation formatter for detailed output,
93
+ # unless a formatter has already been configured
94
+ # (e.g. via a command-line flag).
95
+ config.default_formatter = "doc"
96
+ end
97
+
98
+ # Print the 10 slowest examples and example groups at the
99
+ # end of the spec run, to help surface which specs are running
100
+ # particularly slow.
101
+ config.profile_examples = 10
102
+
103
+ # Run specs in random order to surface order dependencies. If you find an
104
+ # order dependency and want to debug it, you can fix the order by providing
105
+ # the seed, which is printed after each run.
106
+ # --seed 1234
107
+ config.order = :random
108
+
109
+ # Seed global randomization in this process using the `--seed` CLI option.
110
+ # Setting this allows you to use `--seed` to deterministically reproduce
111
+ # test failures related to randomization by passing the same `--seed` value
112
+ # as the one that triggered the failure.
113
+ Kernel.srand config.seed
114
+ =end
115
+ end
@@ -0,0 +1,34 @@
1
+ # How to run
2
+
3
+ ```
4
+ cd test
5
+ bundle exec ruby reliability_test.rb
6
+ ```
7
+
8
+ You can adjust some parameters of the test in the `config.rb`
9
+
10
+
11
+ # How it works
12
+
13
+ This tool spawns configured number of Sidekiq workers and when the amount of processed jobs is about half of origin
14
+ number it will kill all the workers with `kill -9` and then it will spawn new workers again until all the jobs are processed. To track the process and counters we use Redis keys/counters.
15
+
16
+ # How to run tests
17
+
18
+ To run rspec:
19
+
20
+ ```
21
+ bundle exec rspec
22
+ ```
23
+
24
+ To run performance tests:
25
+
26
+ ```
27
+ cd test
28
+ JOB_FETCHER=semi bundle exec ruby reliability_test.rb
29
+ ```
30
+
31
+ JOB_FETCHER can be set to one of these values: `semi`, `reliable`, `basic`
32
+
33
+ To run both kind of tests you need to have redis server running on default HTTP port `6379`. To use other HTTP port, you can define
34
+ `REDIS_URL` environment varible with the port you need(example: `REDIS_URL="redis://localhost:9999"`).
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../lib/sidekiq/base_reliable_fetch'
4
+ require_relative '../lib/sidekiq/reliable_fetch'
5
+ require_relative '../lib/sidekiq/semi_reliable_fetch'
6
+ require_relative 'worker'
7
+
8
+ REDIS_FINISHED_LIST = 'reliable-fetcher-finished-jids'
9
+
10
+ NUMBER_OF_WORKERS = ENV['NUMBER_OF_WORKERS'] || 10
11
+ NUMBER_OF_JOBS = ENV['NUMBER_OF_JOBS'] || 1000
12
+ JOB_FETCHER = (ENV['JOB_FETCHER'] || :semi).to_sym # :basic, :semi, :reliable
13
+ TEST_CLEANUP_INTERVAL = 20
14
+ TEST_LEASE_INTERVAL = 5
15
+ WAIT_CLEANUP = TEST_CLEANUP_INTERVAL +
16
+ TEST_LEASE_INTERVAL +
17
+ Sidekiq::ReliableFetch::HEARTBEAT_LIFESPAN
18
+
19
+ Sidekiq.configure_server do |config|
20
+ if %i[semi reliable].include?(JOB_FETCHER)
21
+ config.options[:semi_reliable_fetch] = (JOB_FETCHER == :semi)
22
+
23
+ # We need to override these parameters to not wait too long
24
+ # The default values are good for production use only
25
+ # These will be ignored for :basic
26
+ config.options[:cleanup_interval] = TEST_CLEANUP_INTERVAL
27
+ config.options[:lease_interval] = TEST_LEASE_INTERVAL
28
+
29
+ Sidekiq::ReliableFetch.setup_reliable_fetch!(config)
30
+ end
31
+ end
@@ -0,0 +1,116 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'sidekiq'
4
+ require 'sidekiq/util'
5
+ require 'sidekiq/cli'
6
+ require_relative 'config'
7
+
8
+ def spawn_workers_and_stop_them_on_a_half_way
9
+ pids = spawn_workers
10
+
11
+ wait_until do |queue_size|
12
+ queue_size < NUMBER_OF_JOBS / 2
13
+ end
14
+
15
+ first_half_pids, second_half_pids = split_array(pids)
16
+
17
+ puts 'Killing half of the workers...'
18
+ signal_to_workers('KILL', first_half_pids)
19
+
20
+ puts 'Stopping another half of the workers...'
21
+ signal_to_workers('TERM', second_half_pids)
22
+ end
23
+
24
+ def spawn_workers_and_let_them_finish
25
+ puts 'Spawn workers and let them finish...'
26
+
27
+ pids = spawn_workers
28
+
29
+ wait_until do |queue_size|
30
+ queue_size.zero?
31
+ end
32
+
33
+ if %i[semi reliable].include? JOB_FETCHER
34
+ puts 'Waiting for clean up process that will requeue dead jobs...'
35
+ sleep WAIT_CLEANUP
36
+ end
37
+
38
+ signal_to_workers('TERM', pids)
39
+ end
40
+
41
+ def wait_until
42
+ loop do
43
+ sleep 3
44
+
45
+ queue_size = current_queue_size
46
+ puts "Jobs in the queue:#{queue_size}"
47
+
48
+ break if yield(queue_size)
49
+ end
50
+ end
51
+
52
+ def signal_to_workers(signal, pids)
53
+ pids.each { |pid| Process.kill(signal, pid) }
54
+ pids.each { |pid| Process.wait(pid) }
55
+ end
56
+
57
+ def spawn_workers
58
+ pids = []
59
+ NUMBER_OF_WORKERS.times do
60
+ pids << spawn('sidekiq -r ./config.rb')
61
+ end
62
+
63
+ pids
64
+ end
65
+
66
+ def current_queue_size
67
+ Sidekiq.redis { |c| c.llen('queue:default') }
68
+ end
69
+
70
+ def duplicates
71
+ Sidekiq.redis { |c| c.llen(REDIS_FINISHED_LIST) }
72
+ end
73
+
74
+ # Splits array into two halves
75
+ def split_array(arr)
76
+ first_arr = arr.take(arr.size / 2)
77
+ second_arr = arr - first_arr
78
+ [first_arr, second_arr]
79
+ end
80
+
81
+ ##########################################################
82
+
83
+ puts '########################################'
84
+ puts "Mode: #{JOB_FETCHER}"
85
+ puts '########################################'
86
+
87
+ Sidekiq.redis(&:flushdb)
88
+
89
+ jobs = []
90
+
91
+ NUMBER_OF_JOBS.times do
92
+ jobs << TestWorker.perform_async
93
+ end
94
+
95
+ puts "Queued #{NUMBER_OF_JOBS} jobs"
96
+
97
+ spawn_workers_and_stop_them_on_a_half_way
98
+ spawn_workers_and_let_them_finish
99
+
100
+ jobs_lost = 0
101
+
102
+ Sidekiq.redis do |redis|
103
+ jobs.each do |job|
104
+ next if redis.lrem(REDIS_FINISHED_LIST, 1, job) == 1
105
+ jobs_lost += 1
106
+ end
107
+ end
108
+
109
+ puts "Remaining unprocessed: #{jobs_lost}"
110
+ puts "Duplicates found: #{duplicates}"
111
+
112
+ if jobs_lost.zero? && duplicates.zero?
113
+ exit 0
114
+ else
115
+ exit 1
116
+ end