gitlab-sidekiq-fetcher 0.7.1 → 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: efdc9461358b538f2b0692cc179639b802a6f0bf4959b73e1d0821a4c40f0713
4
- data.tar.gz: ccbe252394f22e6fafb198ddba9481e23776e7cfa2848807e1e5c34a5269c76d
3
+ metadata.gz: 7f264e7d628cba58e996b5e2428e81a6e2d6a4921738bd5ade6fd9c3637fa8ba
4
+ data.tar.gz: 588f7a6e9a5d24d229bc6dbc7d0a6ca4cb9ec96a517bd9015659ed5b3c67eb8e
5
5
  SHA512:
6
- metadata.gz: 010da1750797c367b01cd8e1d8df281fa70759e9035f0ebaea71f53349f3ebbe54bb04dea74694d50bb595938b049733119ffd5ee4c85d5fd42d719e71a07809
7
- data.tar.gz: d9f89a5dc1e6d6117df1f3704159ea79160a038d8a7a47939d1c772f07c4f3be2426389cd63d14a0ecf469035cc583ba3ea943cf5a14c2232ee3cb44bf09b21c
6
+ metadata.gz: 955716fcccc6be01e941e1e366268c78d8179577d007e324bb68a56c7741ee6045e3fc7e66dab1b58d794f5c22ffc2c2af400c95dbe61ad697e8e5685a753eae
7
+ data.tar.gz: df0fd2c4c72a8401e2f0ac7f8eb3e7f5f47d1fa75da4904e0a0b67c4d80a57a47334e4bdf72728e830f756bbbb6b004412067ad36e8a66a9605471ddc010898f
data/.gitlab-ci.yml CHANGED
@@ -1,4 +1,8 @@
1
- image: "ruby:2.5"
1
+ variables:
2
+ RUBY_VERSION: "2.7"
3
+
4
+ default:
5
+ image: ruby:${RUBY_VERSION}
2
6
 
3
7
  before_script:
4
8
  - ruby -v
@@ -21,6 +25,10 @@ rspec:
21
25
  when: always
22
26
  paths:
23
27
  - coverage/
28
+ parallel:
29
+ matrix:
30
+ - RUBY_VERSION: "2.7"
31
+ - RUBY_VERSION: "3.0"
24
32
 
25
33
  .integration:
26
34
  stage: test
@@ -29,6 +37,10 @@ rspec:
29
37
  - bundle exec ruby reliability_test.rb
30
38
  services:
31
39
  - redis:alpine
40
+ parallel:
41
+ matrix:
42
+ - RUBY_VERSION: "2.7"
43
+ - RUBY_VERSION: "3.0"
32
44
 
33
45
  integration_semi:
34
46
  extends: .integration
@@ -53,6 +65,10 @@ kill_interruption:
53
65
  - bundle exec ruby test_kill_signal.rb
54
66
  services:
55
67
  - redis:alpine
68
+ parallel:
69
+ matrix:
70
+ - RUBY_VERSION: "2.7"
71
+ - RUBY_VERSION: "3.0"
56
72
 
57
73
  term_interruption:
58
74
  stage: test
@@ -61,6 +77,10 @@ term_interruption:
61
77
  - bundle exec ruby test_term_signal.rb
62
78
  services:
63
79
  - redis:alpine
80
+ parallel:
81
+ matrix:
82
+ - RUBY_VERSION: "2.7"
83
+ - RUBY_VERSION: "3.0"
64
84
 
65
85
  # rubocop:
66
86
  # script:
data/CONTRIBUTING.md ADDED
@@ -0,0 +1,41 @@
1
+ ## Developer Certificate of Origin and License
2
+
3
+ By contributing to GitLab B.V., you accept and agree to the following terms and
4
+ conditions for your present and future contributions submitted to GitLab B.V.
5
+ Except for the license granted herein to GitLab B.V. and recipients of software
6
+ distributed by GitLab B.V., you reserve all right, title, and interest in and to
7
+ your Contributions.
8
+
9
+ All contributions are subject to the Developer Certificate of Origin and license set out at [docs.gitlab.com/ce/legal/developer_certificate_of_origin](https://docs.gitlab.com/ce/legal/developer_certificate_of_origin).
10
+
11
+ _This notice should stay as the first item in the CONTRIBUTING.md file._
12
+
13
+ ## Code of conduct
14
+
15
+ As contributors and maintainers of this project, we pledge to respect all people
16
+ who contribute through reporting issues, posting feature requests, updating
17
+ documentation, submitting pull requests or patches, and other activities.
18
+
19
+ We are committed to making participation in this project a harassment-free
20
+ experience for everyone, regardless of level of experience, gender, gender
21
+ identity and expression, sexual orientation, disability, personal appearance,
22
+ body size, race, ethnicity, age, or religion.
23
+
24
+ Examples of unacceptable behavior by participants include the use of sexual
25
+ language or imagery, derogatory comments or personal attacks, trolling, public
26
+ or private harassment, insults, or other unprofessional conduct.
27
+
28
+ Project maintainers have the right and responsibility to remove, edit, or reject
29
+ comments, commits, code, wiki edits, issues, and other contributions that are
30
+ not aligned to this Code of Conduct. Project maintainers who do not follow the
31
+ Code of Conduct may be removed from the project team.
32
+
33
+ This code of conduct applies both within project spaces and in public spaces
34
+ when an individual is representing the project or its community.
35
+
36
+ Instances of abusive, harassing, or otherwise unacceptable behavior can be
37
+ reported by emailing contact@gitlab.com.
38
+
39
+ This Code of Conduct is adapted from the [Contributor Covenant](https://contributor-covenant.org), version 1.1.0,
40
+ available at [https://contributor-covenant.org/version/1/1/0/](https://contributor-covenant.org/version/1/1/0/).
41
+
data/Gemfile CHANGED
@@ -4,9 +4,11 @@ source "https://rubygems.org"
4
4
 
5
5
  git_source(:github) { |repo_name| "https://github.com/#{repo_name}" }
6
6
 
7
+ gemspec
8
+
7
9
  group :test do
8
10
  gem "rspec", '~> 3'
9
11
  gem "pry"
10
- gem "sidekiq", '~> 6.1'
11
12
  gem 'simplecov', require: false
13
+ gem 'stub_env', '~> 1.0'
12
14
  end
data/Gemfile.lock CHANGED
@@ -1,17 +1,24 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ gitlab-sidekiq-fetcher (0.8.0)
5
+ json (>= 2.5)
6
+ sidekiq (~> 6.1)
7
+
1
8
  GEM
2
9
  remote: https://rubygems.org/
3
10
  specs:
4
11
  coderay (1.1.2)
5
- connection_pool (2.2.3)
12
+ connection_pool (2.3.0)
6
13
  diff-lcs (1.3)
7
14
  docile (1.3.1)
8
- json (2.1.0)
15
+ json (2.5.1)
9
16
  method_source (0.9.0)
10
17
  pry (0.11.3)
11
18
  coderay (~> 1.1.0)
12
19
  method_source (~> 0.9.0)
13
- rack (2.2.3)
14
- redis (4.2.1)
20
+ rack (2.2.4)
21
+ redis (4.8.0)
15
22
  rspec (3.8.0)
16
23
  rspec-core (~> 3.8.0)
17
24
  rspec-expectations (~> 3.8.0)
@@ -25,24 +32,27 @@ GEM
25
32
  diff-lcs (>= 1.2.0, < 2.0)
26
33
  rspec-support (~> 3.8.0)
27
34
  rspec-support (3.8.0)
28
- sidekiq (6.1.0)
29
- connection_pool (>= 2.2.2)
35
+ sidekiq (6.5.7)
36
+ connection_pool (>= 2.2.5)
30
37
  rack (~> 2.0)
31
- redis (>= 4.2.0)
38
+ redis (>= 4.5.0, < 5)
32
39
  simplecov (0.16.1)
33
40
  docile (~> 1.1)
34
41
  json (>= 1.8, < 3)
35
42
  simplecov-html (~> 0.10.0)
36
43
  simplecov-html (0.10.2)
44
+ stub_env (1.0.4)
45
+ rspec (>= 2.0, < 4.0)
37
46
 
38
47
  PLATFORMS
39
48
  ruby
40
49
 
41
50
  DEPENDENCIES
51
+ gitlab-sidekiq-fetcher!
42
52
  pry
43
53
  rspec (~> 3)
44
- sidekiq (~> 6.1)
45
54
  simplecov
55
+ stub_env (~> 1.0)
46
56
 
47
57
  BUNDLED WITH
48
- 1.17.2
58
+ 2.3.24
data/README.md CHANGED
@@ -8,6 +8,8 @@ It's based on https://github.com/TEA-ebook/sidekiq-reliable-fetch.
8
8
 
9
9
  **IMPORTANT NOTE:** Since version `0.7.0` this gem works only with `sidekiq >= 6.1` (which introduced Fetch API breaking changes). Please use version `~> 0.5` if you use older version of the `sidekiq` .
10
10
 
11
+ **UPGRADE NOTE:** If upgrading from 0.7.0, strongly consider a full deployed step on 0.7.1 before 0.8.0; that fixes a bug in the queue name validation that will hit if sidekiq nodes running 0.7.0 see working queues named by 0.8.0. See https://gitlab.com/gitlab-org/sidekiq-reliable-fetch/-/merge_requests/22
12
+
11
13
  There are two strategies implemented: [Reliable fetch](http://redis.io/commands/rpoplpush#pattern-reliable-queue) using `rpoplpush` command and
12
14
  semi-reliable fetch that uses regular `brpop` and `lpush` to pick the job and put it to working queue. The main benefit of "Reliable" strategy is that `rpoplpush` is atomic, eliminating a race condition in which jobs can be lost.
13
15
  However, it comes at a cost because `rpoplpush` can't watch multiple lists at the same time so we need to iterate over the entire queue list which significantly increases pressure on Redis when there are more than a few queues. The "semi-reliable" strategy is much more reliable than the default Sidekiq fetcher, though. Compared to the reliable fetch strategy, it does not increase pressure on Redis significantly.
@@ -44,11 +46,11 @@ Sidekiq.configure_server do |config|
44
46
  end
45
47
  ```
46
48
 
47
- There is an additional parameter `config.options[:semi_reliable_fetch]` you can use to switch between two strategies:
49
+ There is an additional parameter `config[:semi_reliable_fetch]` you can use to switch between two strategies:
48
50
 
49
51
  ```ruby
50
52
  Sidekiq.configure_server do |config|
51
- config.options[:semi_reliable_fetch] = true # Default value is false
53
+ config[:semi_reliable_fetch] = true # Default value is false
52
54
 
53
55
  Sidekiq::ReliableFetch.setup_reliable_fetch!(config)
54
56
  end
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'gitlab-sidekiq-fetcher'
3
- s.version = '0.7.1'
3
+ s.version = '0.9.0'
4
4
  s.authors = ['TEA', 'GitLab']
5
5
  s.email = 'valery@gitlab.com'
6
6
  s.license = 'LGPL-3.0'
@@ -11,4 +11,5 @@ Gem::Specification.new do |s|
11
11
  s.files = `git ls-files`.split($\)
12
12
  s.test_files = []
13
13
  s.add_dependency 'sidekiq', '~> 6.1'
14
+ s.add_runtime_dependency 'json', '>= 2.5'
14
15
  end
@@ -21,6 +21,10 @@ module Sidekiq
21
21
  # How much time a job can be interrupted
22
22
  DEFAULT_MAX_RETRIES_AFTER_INTERRUPTION = 3
23
23
 
24
+ # Regexes for matching working queue keys
25
+ WORKING_QUEUE_REGEX = /#{WORKING_QUEUE_PREFIX}:(queue:.*):([^:]*:[0-9]*:[0-9a-f]*)\z/.freeze
26
+ LEGACY_WORKING_QUEUE_REGEX = /#{WORKING_QUEUE_PREFIX}:(queue:.*):([^:]*:[0-9]*)\z/.freeze
27
+
24
28
  UnitOfWork = Struct.new(:queue, :job) do
25
29
  def acknowledge
26
30
  Sidekiq.redis { |conn| conn.lrem(Sidekiq::BaseReliableFetch.working_queue_name(queue), 1, job) }
@@ -41,13 +45,15 @@ module Sidekiq
41
45
  end
42
46
 
43
47
  def self.setup_reliable_fetch!(config)
44
- fetch_strategy = if config.options[:semi_reliable_fetch]
48
+ config = config.options unless config.respond_to?(:[])
49
+
50
+ fetch_strategy = if config[:semi_reliable_fetch]
45
51
  Sidekiq::SemiReliableFetch
46
52
  else
47
53
  Sidekiq::ReliableFetch
48
54
  end
49
55
 
50
- config.options[:fetch] = fetch_strategy.new(config.options)
56
+ config[:fetch] = fetch_strategy.new(config)
51
57
 
52
58
  Sidekiq.logger.info('GitLab reliable fetch activated!')
53
59
 
@@ -70,32 +76,36 @@ module Sidekiq
70
76
  end
71
77
  end
72
78
 
73
- def self.pid
74
- @pid ||= ::Process.pid
79
+ def self.hostname
80
+ Socket.gethostname
81
+ end
82
+
83
+ def self.process_nonce
84
+ @@process_nonce ||= SecureRandom.hex(6)
75
85
  end
76
86
 
77
- def self.hostname
78
- @hostname ||= Socket.gethostname
87
+ def self.identity
88
+ @@identity ||= "#{hostname}:#{$$}:#{process_nonce}"
79
89
  end
80
90
 
81
91
  def self.heartbeat
82
92
  Sidekiq.redis do |conn|
83
- conn.set(heartbeat_key(hostname, pid), 1, ex: HEARTBEAT_LIFESPAN)
93
+ conn.set(heartbeat_key(identity), 1, ex: HEARTBEAT_LIFESPAN)
84
94
  end
85
95
 
86
- Sidekiq.logger.debug("Heartbeat for hostname: #{hostname} and pid: #{pid}")
96
+ Sidekiq.logger.debug("Heartbeat for #{identity}")
87
97
  end
88
98
 
89
- def self.worker_dead?(hostname, pid, conn)
90
- !conn.get(heartbeat_key(hostname, pid))
99
+ def self.worker_dead?(identity, conn)
100
+ !conn.get(heartbeat_key(identity))
91
101
  end
92
102
 
93
- def self.heartbeat_key(hostname, pid)
94
- "reliable-fetcher-heartbeat-#{hostname}-#{pid}"
103
+ def self.heartbeat_key(identity)
104
+ "reliable-fetcher-heartbeat-#{identity.gsub(':', '-')}"
95
105
  end
96
106
 
97
107
  def self.working_queue_name(queue)
98
- "#{WORKING_QUEUE_PREFIX}:#{queue}:#{hostname}:#{pid}"
108
+ "#{WORKING_QUEUE_PREFIX}:#{queue}:#{identity}"
99
109
  end
100
110
 
101
111
  attr_reader :cleanup_interval, :last_try_to_take_lease_at, :lease_interval,
@@ -105,6 +115,8 @@ module Sidekiq
105
115
  def initialize(options)
106
116
  raise ArgumentError, 'missing queue list' unless options[:queues]
107
117
 
118
+ @config = options
119
+ @interrupted_set = Sidekiq::InterruptedSet.new
108
120
  @cleanup_interval = options.fetch(:cleanup_interval, DEFAULT_CLEANUP_INTERVAL)
109
121
  @lease_interval = options.fetch(:lease_interval, DEFAULT_LEASE_INTERVAL)
110
122
  @last_try_to_take_lease_at = 0
@@ -166,6 +178,19 @@ module Sidekiq
166
178
  )
167
179
  end
168
180
 
181
+ def extract_queue_and_identity(key)
182
+ # New identity format is "{hostname}:{pid}:{randomhex}
183
+ # Old identity format is "{hostname}:{pid}"
184
+ # Queue names may also have colons (namespaced).
185
+ # Expressing this in a single regex is unreadable
186
+
187
+ # Test the newer expected format first, only checking the older if necessary
188
+ original_queue, identity = key.scan(WORKING_QUEUE_REGEX).flatten
189
+ return original_queue, identity unless original_queue.nil? || identity.nil?
190
+
191
+ key.scan(LEGACY_WORKING_QUEUE_REGEX).flatten
192
+ end
193
+
169
194
  # Detect "old" jobs and requeue them because the worker they were assigned
170
195
  # to probably failed miserably.
171
196
  def clean_working_queues!
@@ -173,19 +198,16 @@ module Sidekiq
173
198
 
174
199
  Sidekiq.redis do |conn|
175
200
  conn.scan_each(match: "#{WORKING_QUEUE_PREFIX}:queue:*", count: SCAN_COUNT) do |key|
176
- # Example: "working:name_of_the_job:queue:{hostname}:{PID}"
177
- hostname, pid = key.scan(/:([^:]*):([0-9]*)\z/).flatten
201
+ original_queue, identity = extract_queue_and_identity(key)
178
202
 
179
- next if hostname.nil? || pid.nil?
203
+ next if original_queue.nil? || identity.nil?
180
204
 
181
- clean_working_queue!(key) if self.class.worker_dead?(hostname, pid, conn)
205
+ clean_working_queue!(original_queue, key) if self.class.worker_dead?(identity, conn)
182
206
  end
183
207
  end
184
208
  end
185
209
 
186
- def clean_working_queue!(working_queue)
187
- original_queue = working_queue.gsub(/#{WORKING_QUEUE_PREFIX}:|:[^:]*:[0-9]*\z/, '')
188
-
210
+ def clean_working_queue!(original_queue, working_queue)
189
211
  Sidekiq.redis do |conn|
190
212
  while job = conn.rpop(working_queue)
191
213
  preprocess_interrupted_job(job, original_queue)
@@ -207,7 +229,7 @@ module Sidekiq
207
229
  rescue NameError
208
230
  end
209
231
 
210
- max_retries_after_interruption ||= Sidekiq.options[:max_retries_after_interruption]
232
+ max_retries_after_interruption ||= @config[:max_retries_after_interruption]
211
233
  max_retries_after_interruption ||= DEFAULT_MAX_RETRIES_AFTER_INTERRUPTION
212
234
  max_retries_after_interruption
213
235
  end
@@ -220,7 +242,7 @@ module Sidekiq
220
242
  )
221
243
 
222
244
  job = Sidekiq.dump_json(msg)
223
- Sidekiq::InterruptedSet.new.put(job, connection: multi_connection)
245
+ @interrupted_set.put(job, connection: multi_connection)
224
246
  end
225
247
 
226
248
  # Yield block with an existing connection or creates another one
@@ -37,11 +37,15 @@ module Sidekiq
37
37
  end
38
38
 
39
39
  def self.max_jobs
40
- Sidekiq.options[:interrupted_max_jobs] || DEFAULT_MAX_CAPACITY
40
+ options[:interrupted_max_jobs] || DEFAULT_MAX_CAPACITY
41
41
  end
42
42
 
43
43
  def self.timeout
44
- Sidekiq.options[:interrupted_timeout_in_seconds] || DEFAULT_MAX_TIMEOUT
44
+ options[:interrupted_timeout_in_seconds] || DEFAULT_MAX_TIMEOUT
45
+ end
46
+
47
+ def self.options
48
+ Sidekiq.respond_to?(:[]) ? Sidekiq : Sidekiq.options
45
49
  end
46
50
  end
47
51
  end
@@ -5,14 +5,14 @@ module Sidekiq
5
5
  # We want the fetch operation to timeout every few seconds so the thread
6
6
  # can check if the process is shutting down. This constant is only used
7
7
  # for semi-reliable fetch.
8
- SEMI_RELIABLE_FETCH_TIMEOUT = 2 # seconds
8
+ DEFAULT_SEMI_RELIABLE_FETCH_TIMEOUT = 2 # seconds
9
9
 
10
10
  def initialize(options)
11
11
  super
12
12
 
13
13
  if strictly_ordered_queues
14
14
  @queues = @queues.uniq
15
- @queues << SEMI_RELIABLE_FETCH_TIMEOUT
15
+ @queues << { timeout: semi_reliable_fetch_timeout }
16
16
  end
17
17
  end
18
18
 
@@ -36,9 +36,13 @@ module Sidekiq
36
36
  @queues
37
37
  else
38
38
  queues = @queues.shuffle.uniq
39
- queues << SEMI_RELIABLE_FETCH_TIMEOUT
39
+ queues << { timeout: semi_reliable_fetch_timeout }
40
40
  queues
41
41
  end
42
42
  end
43
+
44
+ def semi_reliable_fetch_timeout
45
+ @semi_reliable_fetch_timeout ||= ENV['SIDEKIQ_SEMI_RELIABLE_FETCH_TIMEOUT']&.to_i || DEFAULT_SEMI_RELIABLE_FETCH_TIMEOUT
46
+ end
43
47
  end
44
48
  end
@@ -65,7 +65,7 @@ describe Sidekiq::BaseReliableFetch do
65
65
  end
66
66
 
67
67
  it 'does not put jobs into interrupted queue if it is disabled' do
68
- Sidekiq.options[:max_retries_after_interruption] = -1
68
+ options[:max_retries_after_interruption] = -1
69
69
 
70
70
  uow = described_class::UnitOfWork
71
71
  interrupted_job = Sidekiq.dump_json(class: 'Bob', args: [1, 2, 'foo'], interrupted_count: 3)
@@ -75,8 +75,6 @@ describe Sidekiq::BaseReliableFetch do
75
75
  expect(queue1.size).to eq 2
76
76
  expect(queue2.size).to eq 1
77
77
  expect(Sidekiq::InterruptedSet.new.size).to eq 0
78
-
79
- Sidekiq.options[:max_retries_after_interruption] = 3
80
78
  end
81
79
  end
82
80
 
@@ -88,7 +86,7 @@ describe Sidekiq::BaseReliableFetch do
88
86
  Sidekiq.redis do |conn|
89
87
  sleep 0.2 # Give the time to heartbeat thread to make a loop
90
88
 
91
- heartbeat_key = described_class.heartbeat_key(Socket.gethostname, ::Process.pid)
89
+ heartbeat_key = described_class.heartbeat_key(described_class.identity)
92
90
  heartbeat = conn.get(heartbeat_key)
93
91
 
94
92
  expect(heartbeat).not_to be_nil
@@ -7,111 +7,163 @@ shared_examples 'a Sidekiq fetcher' do
7
7
  let(:job) { Sidekiq.dump_json(class: 'Bob', args: [1, 2, 'foo']) }
8
8
  let(:fetcher) { described_class.new(queues: queues) }
9
9
 
10
- it 'retrieves the job and puts it to working queue' do
11
- Sidekiq.redis { |conn| conn.rpush('queue:assigned', job) }
12
-
13
- uow = fetcher.retrieve_work
10
+ it 'does not clean up orphaned jobs more than once per cleanup interval' do
11
+ Sidekiq.redis = Sidekiq::RedisConnection.create(url: REDIS_URL, size: 10)
14
12
 
15
- expect(working_queue_size('assigned')).to eq 1
16
- expect(uow.queue_name).to eq 'assigned'
17
- expect(uow.job).to eq job
18
- expect(Sidekiq::Queue.new('assigned').size).to eq 0
19
- end
13
+ expect(fetcher).to receive(:clean_working_queues!).once
20
14
 
21
- it 'does not retrieve a job from foreign queue' do
22
- Sidekiq.redis { |conn| conn.rpush('queue:not_assigned', job) }
15
+ threads = 10.times.map do
16
+ Thread.new do
17
+ fetcher.retrieve_work
18
+ end
19
+ end
23
20
 
24
- expect(fetcher.retrieve_work).to be_nil
21
+ threads.map(&:join)
25
22
  end
26
23
 
27
- it 'requeues jobs from dead working queue with incremented interrupted_count' do
24
+ it 'retrieves by order when strictly order is enabled' do
25
+ fetcher = described_class.new(strict: true, queues: ['first', 'second'])
26
+
28
27
  Sidekiq.redis do |conn|
29
- conn.rpush(other_process_working_queue_name('assigned'), job)
28
+ conn.rpush('queue:first', ['msg3', 'msg2', 'msg1'])
29
+ conn.rpush('queue:second', 'msg4')
30
30
  end
31
31
 
32
- expected_job = Sidekiq.load_json(job)
33
- expected_job['interrupted_count'] = 1
34
- expected_job = Sidekiq.dump_json(expected_job)
32
+ jobs = (1..4).map { fetcher.retrieve_work.job }
35
33
 
36
- uow = fetcher.retrieve_work
34
+ expect(jobs).to eq ['msg1', 'msg2', 'msg3', 'msg4']
35
+ end
37
36
 
38
- expect(uow.job).to eq expected_job
37
+ it 'does not starve any queue when queues are not strictly ordered' do
38
+ fetcher = described_class.new(queues: ['first', 'second'])
39
39
 
40
40
  Sidekiq.redis do |conn|
41
- expect(conn.llen(other_process_working_queue_name('assigned'))).to eq 0
41
+ conn.rpush('queue:first', (1..200).map { |i| "msg#{i}" })
42
+ conn.rpush('queue:second', 'this_job_should_not_stuck')
42
43
  end
44
+
45
+ jobs = (1..100).map { fetcher.retrieve_work.job }
46
+
47
+ expect(jobs).to include 'this_job_should_not_stuck'
43
48
  end
44
49
 
45
- it 'ignores working queue keys in unknown formats' do
46
- # Add a spurious non-numeric char segment at the end; this simulates any other
47
- # incorrect form in general
48
- malformed_key = "#{other_process_working_queue_name('assigned')}:X"
49
- Sidekiq.redis do |conn|
50
- conn.rpush(malformed_key, job)
51
- end
50
+ shared_examples "basic queue handling" do |queue|
51
+ let (:fetcher) { described_class.new(queues: [queue]) }
52
52
 
53
- uow = fetcher.retrieve_work
53
+ it 'retrieves the job and puts it to working queue' do
54
+ Sidekiq.redis { |conn| conn.rpush("queue:#{queue}", job) }
54
55
 
55
- Sidekiq.redis do |conn|
56
- expect(conn.llen(malformed_key)).to eq 1
56
+ uow = fetcher.retrieve_work
57
+
58
+ expect(working_queue_size(queue)).to eq 1
59
+ expect(uow.queue_name).to eq queue
60
+ expect(uow.job).to eq job
61
+ expect(Sidekiq::Queue.new(queue).size).to eq 0
57
62
  end
58
- end
59
63
 
64
+ it 'does not retrieve a job from foreign queue' do
65
+ Sidekiq.redis { |conn| conn.rpush("'queue:#{queue}:not", job) }
66
+ expect(fetcher.retrieve_work).to be_nil
60
67
 
61
- it 'does not requeue jobs from live working queue' do
62
- working_queue = live_other_process_working_queue_name('assigned')
68
+ Sidekiq.redis { |conn| conn.rpush("'queue:not_#{queue}", job) }
69
+ expect(fetcher.retrieve_work).to be_nil
63
70
 
64
- Sidekiq.redis do |conn|
65
- conn.rpush(working_queue, job)
71
+ Sidekiq.redis { |conn| conn.rpush("'queue:random_name", job) }
72
+ expect(fetcher.retrieve_work).to be_nil
66
73
  end
67
74
 
68
- uow = fetcher.retrieve_work
75
+ it 'requeues jobs from legacy dead working queue with incremented interrupted_count' do
76
+ Sidekiq.redis do |conn|
77
+ conn.rpush(legacy_other_process_working_queue_name(queue), job)
78
+ end
69
79
 
70
- expect(uow).to be_nil
80
+ expected_job = Sidekiq.load_json(job)
81
+ expected_job['interrupted_count'] = 1
82
+ expected_job = Sidekiq.dump_json(expected_job)
71
83
 
72
- Sidekiq.redis do |conn|
73
- expect(conn.llen(working_queue)).to eq 1
84
+ uow = fetcher.retrieve_work
85
+
86
+ expect(uow).to_not be_nil
87
+ expect(uow.job).to eq expected_job
88
+
89
+ Sidekiq.redis do |conn|
90
+ expect(conn.llen(legacy_other_process_working_queue_name(queue))).to eq 0
91
+ end
74
92
  end
75
- end
76
93
 
77
- it 'does not clean up orphaned jobs more than once per cleanup interval' do
78
- Sidekiq.redis = Sidekiq::RedisConnection.create(url: REDIS_URL, size: 10)
94
+ it 'ignores working queue keys in unknown formats' do
95
+ # Add a spurious non-numeric char segment at the end; this simulates any other
96
+ # incorrect form in general
97
+ malformed_key = "#{other_process_working_queue_name(queue)}:X"
98
+ Sidekiq.redis do |conn|
99
+ conn.rpush(malformed_key, job)
100
+ end
79
101
 
80
- expect(fetcher).to receive(:clean_working_queues!).once
102
+ uow = fetcher.retrieve_work
81
103
 
82
- threads = 10.times.map do
83
- Thread.new do
84
- fetcher.retrieve_work
104
+ Sidekiq.redis do |conn|
105
+ expect(conn.llen(malformed_key)).to eq 1
85
106
  end
86
107
  end
87
108
 
88
- threads.map(&:join)
89
- end
109
+ it 'requeues jobs from dead working queue with incremented interrupted_count' do
110
+ Sidekiq.redis do |conn|
111
+ conn.rpush(other_process_working_queue_name(queue), job)
112
+ end
90
113
 
91
- it 'retrieves by order when strictly order is enabled' do
92
- fetcher = described_class.new(strict: true, queues: ['first', 'second'])
114
+ expected_job = Sidekiq.load_json(job)
115
+ expected_job['interrupted_count'] = 1
116
+ expected_job = Sidekiq.dump_json(expected_job)
93
117
 
94
- Sidekiq.redis do |conn|
95
- conn.rpush('queue:first', ['msg3', 'msg2', 'msg1'])
96
- conn.rpush('queue:second', 'msg4')
118
+ uow = fetcher.retrieve_work
119
+
120
+ expect(uow).to_not be_nil
121
+ expect(uow.job).to eq expected_job
122
+
123
+ Sidekiq.redis do |conn|
124
+ expect(conn.llen(other_process_working_queue_name(queue))).to eq 0
125
+ end
97
126
  end
98
127
 
99
- jobs = (1..4).map { fetcher.retrieve_work.job }
128
+ it 'does not requeue jobs from live working queue' do
129
+ working_queue = live_other_process_working_queue_name(queue)
100
130
 
101
- expect(jobs).to eq ['msg1', 'msg2', 'msg3', 'msg4']
102
- end
131
+ Sidekiq.redis do |conn|
132
+ conn.rpush(working_queue, job)
133
+ end
103
134
 
104
- it 'does not starve any queue when queues are not strictly ordered' do
105
- fetcher = described_class.new(queues: ['first', 'second'])
135
+ uow = fetcher.retrieve_work
106
136
 
107
- Sidekiq.redis do |conn|
108
- conn.rpush('queue:first', (1..200).map { |i| "msg#{i}" })
109
- conn.rpush('queue:second', 'this_job_should_not_stuck')
137
+ expect(uow).to be_nil
138
+
139
+ Sidekiq.redis do |conn|
140
+ expect(conn.llen(working_queue)).to eq 1
141
+ end
110
142
  end
143
+ end
111
144
 
112
- jobs = (1..100).map { fetcher.retrieve_work.job }
145
+ context 'with various queues' do
146
+ %w[assigned namespace:assigned namespace:deeper:assigned].each do |queue|
147
+ it_behaves_like "basic queue handling", queue
148
+ end
149
+ end
113
150
 
114
- expect(jobs).to include 'this_job_should_not_stuck'
151
+ context 'with short cleanup interval' do
152
+ let(:short_interval) { 1 }
153
+ let(:fetcher) { described_class.new(queues: queues, lease_interval: short_interval, cleanup_interval: short_interval) }
154
+
155
+ it 'requeues when there is no heartbeat' do
156
+ Sidekiq.redis { |conn| conn.rpush('queue:assigned', job) }
157
+ # Use of retrieve_work twice with a sleep ensures we have exercised the
158
+ # `identity` method to create the working queue key name and that it
159
+ # matches the patterns used in the cleanup
160
+ uow = fetcher.retrieve_work
161
+ sleep(short_interval + 1)
162
+ uow = fetcher.retrieve_work
163
+
164
+ # Will only receive a UnitOfWork if the job was detected as failed and requeued
165
+ expect(uow).to_not be_nil
166
+ end
115
167
  end
116
168
  end
117
169
  end
@@ -122,17 +174,22 @@ def working_queue_size(queue_name)
122
174
  end
123
175
  end
124
176
 
125
- def other_process_working_queue_name(queue)
177
+ def legacy_other_process_working_queue_name(queue)
126
178
  "#{Sidekiq::BaseReliableFetch::WORKING_QUEUE_PREFIX}:queue:#{queue}:#{Socket.gethostname}:#{::Process.pid + 1}"
127
179
  end
128
180
 
181
+ def other_process_working_queue_name(queue)
182
+ "#{Sidekiq::BaseReliableFetch::WORKING_QUEUE_PREFIX}:queue:#{queue}:#{Socket.gethostname}:#{::Process.pid + 1}:#{::SecureRandom.hex(6)}"
183
+ end
184
+
129
185
  def live_other_process_working_queue_name(queue)
130
186
  pid = ::Process.pid + 1
131
187
  hostname = Socket.gethostname
188
+ nonce = SecureRandom.hex(6)
132
189
 
133
190
  Sidekiq.redis do |conn|
134
- conn.set(Sidekiq::BaseReliableFetch.heartbeat_key(hostname, pid), 1)
191
+ conn.set(Sidekiq::BaseReliableFetch.heartbeat_key("#{hostname}-#{pid}-#{nonce}"), 1)
135
192
  end
136
193
 
137
- "#{Sidekiq::BaseReliableFetch::WORKING_QUEUE_PREFIX}:queue:#{queue}:#{hostname}:#{pid}"
194
+ "#{Sidekiq::BaseReliableFetch::WORKING_QUEUE_PREFIX}:queue:#{queue}:#{hostname}:#{pid}:#{nonce}"
138
195
  end
@@ -5,4 +5,39 @@ require 'sidekiq/semi_reliable_fetch'
5
5
 
6
6
  describe Sidekiq::SemiReliableFetch do
7
7
  include_examples 'a Sidekiq fetcher'
8
+
9
+ describe '#retrieve_work' do
10
+ context 'timeout config' do
11
+ let(:queues) { ['stuff_to_do'] }
12
+ let(:fetcher) { described_class.new(queues: queues) }
13
+
14
+ before do
15
+ stub_env('SIDEKIQ_SEMI_RELIABLE_FETCH_TIMEOUT', timeout)
16
+ end
17
+
18
+ context 'when the timeout is not configured' do
19
+ let(:timeout) { nil }
20
+
21
+ it 'brpops with the default timeout timeout' do
22
+ Sidekiq.redis do |connection|
23
+ expect(connection).to receive(:brpop).with("queue:stuff_to_do", { timeout: 2 }).once.and_call_original
24
+
25
+ fetcher.retrieve_work
26
+ end
27
+ end
28
+ end
29
+
30
+ context 'when the timeout is set in the env' do
31
+ let(:timeout) { '5' }
32
+
33
+ it 'brpops with the default timeout timeout' do
34
+ Sidekiq.redis do |connection|
35
+ expect(connection).to receive(:brpop).with("queue:stuff_to_do", { timeout: 5 }).once.and_call_original
36
+
37
+ fetcher.retrieve_work
38
+ end
39
+ end
40
+ end
41
+ end
42
+ end
8
43
  end
data/spec/spec_helper.rb CHANGED
@@ -1,8 +1,8 @@
1
1
  require 'sidekiq'
2
- require 'sidekiq/util'
3
2
  require 'sidekiq/api'
4
3
  require 'pry'
5
4
  require 'simplecov'
5
+ require 'stub_env'
6
6
 
7
7
  SimpleCov.start
8
8
 
@@ -29,6 +29,7 @@ Sidekiq.logger.level = Logger::ERROR
29
29
  #
30
30
  # See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
31
31
  RSpec.configure do |config|
32
+ config.include StubEnv::Helpers
32
33
  # rspec-expectations config goes here. You can use an alternate
33
34
  # assertion/expectation library such as wrong or the stdlib/minitest
34
35
  # assertions if you prefer.
data/tests/README.md CHANGED
@@ -1,7 +1,7 @@
1
1
  # How to run reliability tests
2
2
 
3
3
  ```
4
- cd reliability_test
4
+ cd tests/reliability
5
5
  bundle exec ruby reliability_test.rb
6
6
  ```
7
7
 
@@ -7,13 +7,13 @@ TEST_CLEANUP_INTERVAL = 20
7
7
  TEST_LEASE_INTERVAL = 5
8
8
 
9
9
  Sidekiq.configure_server do |config|
10
- config.options[:semi_reliable_fetch] = true
10
+ config[:semi_reliable_fetch] = true
11
11
 
12
12
  # We need to override these parameters to not wait too long
13
13
  # The default values are good for production use only
14
14
  # These will be ignored for :basic
15
- config.options[:cleanup_interval] = TEST_CLEANUP_INTERVAL
16
- config.options[:lease_interval] = TEST_LEASE_INTERVAL
15
+ config[:cleanup_interval] = TEST_CLEANUP_INTERVAL
16
+ config[:lease_interval] = TEST_LEASE_INTERVAL
17
17
 
18
18
  Sidekiq::ReliableFetch.setup_reliable_fetch!(config)
19
19
  end
@@ -16,13 +16,13 @@ WAIT_CLEANUP = TEST_CLEANUP_INTERVAL +
16
16
 
17
17
  Sidekiq.configure_server do |config|
18
18
  if %i[semi reliable].include?(JOB_FETCHER)
19
- config.options[:semi_reliable_fetch] = (JOB_FETCHER == :semi)
19
+ config[:semi_reliable_fetch] = (JOB_FETCHER == :semi)
20
20
 
21
21
  # We need to override these parameters to not wait too long
22
22
  # The default values are good for production use only
23
23
  # These will be ignored for :basic
24
- config.options[:cleanup_interval] = TEST_CLEANUP_INTERVAL
25
- config.options[:lease_interval] = TEST_LEASE_INTERVAL
24
+ config[:cleanup_interval] = TEST_CLEANUP_INTERVAL
25
+ config[:lease_interval] = TEST_LEASE_INTERVAL
26
26
 
27
27
  Sidekiq::ReliableFetch.setup_reliable_fetch!(config)
28
28
  end
@@ -1,7 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'sidekiq'
4
- require 'sidekiq/util'
5
4
  require 'sidekiq/cli'
6
5
  require_relative 'config'
7
6
 
metadata CHANGED
@@ -1,15 +1,15 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gitlab-sidekiq-fetcher
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.1
4
+ version: 0.9.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - TEA
8
8
  - GitLab
9
- autorequire:
9
+ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2021-02-18 00:00:00.000000000 Z
12
+ date: 2022-11-03 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: sidekiq
@@ -25,6 +25,20 @@ dependencies:
25
25
  - - "~>"
26
26
  - !ruby/object:Gem::Version
27
27
  version: '6.1'
28
+ - !ruby/object:Gem::Dependency
29
+ name: json
30
+ requirement: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - ">="
33
+ - !ruby/object:Gem::Version
34
+ version: '2.5'
35
+ type: :runtime
36
+ prerelease: false
37
+ version_requirements: !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - ">="
40
+ - !ruby/object:Gem::Version
41
+ version: '2.5'
28
42
  description: Redis reliable queue pattern implemented in Sidekiq
29
43
  email: valery@gitlab.com
30
44
  executables: []
@@ -34,6 +48,7 @@ files:
34
48
  - ".gitignore"
35
49
  - ".gitlab-ci.yml"
36
50
  - ".rspec"
51
+ - CONTRIBUTING.md
37
52
  - Gemfile
38
53
  - Gemfile.lock
39
54
  - LICENSE
@@ -63,7 +78,7 @@ homepage: https://gitlab.com/gitlab-org/sidekiq-reliable-fetch/
63
78
  licenses:
64
79
  - LGPL-3.0
65
80
  metadata: {}
66
- post_install_message:
81
+ post_install_message:
67
82
  rdoc_options: []
68
83
  require_paths:
69
84
  - lib
@@ -78,8 +93,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
78
93
  - !ruby/object:Gem::Version
79
94
  version: '0'
80
95
  requirements: []
81
- rubygems_version: 3.1.4
82
- signing_key:
96
+ rubygems_version: 3.2.22
97
+ signing_key:
83
98
  specification_version: 4
84
99
  summary: Reliable fetch extension for Sidekiq
85
100
  test_files: []