gitlab-sidekiq-fetcher 0.7.1 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: efdc9461358b538f2b0692cc179639b802a6f0bf4959b73e1d0821a4c40f0713
4
- data.tar.gz: ccbe252394f22e6fafb198ddba9481e23776e7cfa2848807e1e5c34a5269c76d
3
+ metadata.gz: 7f264e7d628cba58e996b5e2428e81a6e2d6a4921738bd5ade6fd9c3637fa8ba
4
+ data.tar.gz: 588f7a6e9a5d24d229bc6dbc7d0a6ca4cb9ec96a517bd9015659ed5b3c67eb8e
5
5
  SHA512:
6
- metadata.gz: 010da1750797c367b01cd8e1d8df281fa70759e9035f0ebaea71f53349f3ebbe54bb04dea74694d50bb595938b049733119ffd5ee4c85d5fd42d719e71a07809
7
- data.tar.gz: d9f89a5dc1e6d6117df1f3704159ea79160a038d8a7a47939d1c772f07c4f3be2426389cd63d14a0ecf469035cc583ba3ea943cf5a14c2232ee3cb44bf09b21c
6
+ metadata.gz: 955716fcccc6be01e941e1e366268c78d8179577d007e324bb68a56c7741ee6045e3fc7e66dab1b58d794f5c22ffc2c2af400c95dbe61ad697e8e5685a753eae
7
+ data.tar.gz: df0fd2c4c72a8401e2f0ac7f8eb3e7f5f47d1fa75da4904e0a0b67c4d80a57a47334e4bdf72728e830f756bbbb6b004412067ad36e8a66a9605471ddc010898f
data/.gitlab-ci.yml CHANGED
@@ -1,4 +1,8 @@
1
- image: "ruby:2.5"
1
+ variables:
2
+ RUBY_VERSION: "2.7"
3
+
4
+ default:
5
+ image: ruby:${RUBY_VERSION}
2
6
 
3
7
  before_script:
4
8
  - ruby -v
@@ -21,6 +25,10 @@ rspec:
21
25
  when: always
22
26
  paths:
23
27
  - coverage/
28
+ parallel:
29
+ matrix:
30
+ - RUBY_VERSION: "2.7"
31
+ - RUBY_VERSION: "3.0"
24
32
 
25
33
  .integration:
26
34
  stage: test
@@ -29,6 +37,10 @@ rspec:
29
37
  - bundle exec ruby reliability_test.rb
30
38
  services:
31
39
  - redis:alpine
40
+ parallel:
41
+ matrix:
42
+ - RUBY_VERSION: "2.7"
43
+ - RUBY_VERSION: "3.0"
32
44
 
33
45
  integration_semi:
34
46
  extends: .integration
@@ -53,6 +65,10 @@ kill_interruption:
53
65
  - bundle exec ruby test_kill_signal.rb
54
66
  services:
55
67
  - redis:alpine
68
+ parallel:
69
+ matrix:
70
+ - RUBY_VERSION: "2.7"
71
+ - RUBY_VERSION: "3.0"
56
72
 
57
73
  term_interruption:
58
74
  stage: test
@@ -61,6 +77,10 @@ term_interruption:
61
77
  - bundle exec ruby test_term_signal.rb
62
78
  services:
63
79
  - redis:alpine
80
+ parallel:
81
+ matrix:
82
+ - RUBY_VERSION: "2.7"
83
+ - RUBY_VERSION: "3.0"
64
84
 
65
85
  # rubocop:
66
86
  # script:
data/CONTRIBUTING.md ADDED
@@ -0,0 +1,41 @@
1
+ ## Developer Certificate of Origin and License
2
+
3
+ By contributing to GitLab B.V., you accept and agree to the following terms and
4
+ conditions for your present and future contributions submitted to GitLab B.V.
5
+ Except for the license granted herein to GitLab B.V. and recipients of software
6
+ distributed by GitLab B.V., you reserve all right, title, and interest in and to
7
+ your Contributions.
8
+
9
+ All contributions are subject to the Developer Certificate of Origin and license set out at [docs.gitlab.com/ce/legal/developer_certificate_of_origin](https://docs.gitlab.com/ce/legal/developer_certificate_of_origin).
10
+
11
+ _This notice should stay as the first item in the CONTRIBUTING.md file._
12
+
13
+ ## Code of conduct
14
+
15
+ As contributors and maintainers of this project, we pledge to respect all people
16
+ who contribute through reporting issues, posting feature requests, updating
17
+ documentation, submitting pull requests or patches, and other activities.
18
+
19
+ We are committed to making participation in this project a harassment-free
20
+ experience for everyone, regardless of level of experience, gender, gender
21
+ identity and expression, sexual orientation, disability, personal appearance,
22
+ body size, race, ethnicity, age, or religion.
23
+
24
+ Examples of unacceptable behavior by participants include the use of sexual
25
+ language or imagery, derogatory comments or personal attacks, trolling, public
26
+ or private harassment, insults, or other unprofessional conduct.
27
+
28
+ Project maintainers have the right and responsibility to remove, edit, or reject
29
+ comments, commits, code, wiki edits, issues, and other contributions that are
30
+ not aligned to this Code of Conduct. Project maintainers who do not follow the
31
+ Code of Conduct may be removed from the project team.
32
+
33
+ This code of conduct applies both within project spaces and in public spaces
34
+ when an individual is representing the project or its community.
35
+
36
+ Instances of abusive, harassing, or otherwise unacceptable behavior can be
37
+ reported by emailing contact@gitlab.com.
38
+
39
+ This Code of Conduct is adapted from the [Contributor Covenant](https://contributor-covenant.org), version 1.1.0,
40
+ available at [https://contributor-covenant.org/version/1/1/0/](https://contributor-covenant.org/version/1/1/0/).
41
+
data/Gemfile CHANGED
@@ -4,9 +4,11 @@ source "https://rubygems.org"
4
4
 
5
5
  git_source(:github) { |repo_name| "https://github.com/#{repo_name}" }
6
6
 
7
+ gemspec
8
+
7
9
  group :test do
8
10
  gem "rspec", '~> 3'
9
11
  gem "pry"
10
- gem "sidekiq", '~> 6.1'
11
12
  gem 'simplecov', require: false
13
+ gem 'stub_env', '~> 1.0'
12
14
  end
data/Gemfile.lock CHANGED
@@ -1,17 +1,24 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ gitlab-sidekiq-fetcher (0.8.0)
5
+ json (>= 2.5)
6
+ sidekiq (~> 6.1)
7
+
1
8
  GEM
2
9
  remote: https://rubygems.org/
3
10
  specs:
4
11
  coderay (1.1.2)
5
- connection_pool (2.2.3)
12
+ connection_pool (2.3.0)
6
13
  diff-lcs (1.3)
7
14
  docile (1.3.1)
8
- json (2.1.0)
15
+ json (2.5.1)
9
16
  method_source (0.9.0)
10
17
  pry (0.11.3)
11
18
  coderay (~> 1.1.0)
12
19
  method_source (~> 0.9.0)
13
- rack (2.2.3)
14
- redis (4.2.1)
20
+ rack (2.2.4)
21
+ redis (4.8.0)
15
22
  rspec (3.8.0)
16
23
  rspec-core (~> 3.8.0)
17
24
  rspec-expectations (~> 3.8.0)
@@ -25,24 +32,27 @@ GEM
25
32
  diff-lcs (>= 1.2.0, < 2.0)
26
33
  rspec-support (~> 3.8.0)
27
34
  rspec-support (3.8.0)
28
- sidekiq (6.1.0)
29
- connection_pool (>= 2.2.2)
35
+ sidekiq (6.5.7)
36
+ connection_pool (>= 2.2.5)
30
37
  rack (~> 2.0)
31
- redis (>= 4.2.0)
38
+ redis (>= 4.5.0, < 5)
32
39
  simplecov (0.16.1)
33
40
  docile (~> 1.1)
34
41
  json (>= 1.8, < 3)
35
42
  simplecov-html (~> 0.10.0)
36
43
  simplecov-html (0.10.2)
44
+ stub_env (1.0.4)
45
+ rspec (>= 2.0, < 4.0)
37
46
 
38
47
  PLATFORMS
39
48
  ruby
40
49
 
41
50
  DEPENDENCIES
51
+ gitlab-sidekiq-fetcher!
42
52
  pry
43
53
  rspec (~> 3)
44
- sidekiq (~> 6.1)
45
54
  simplecov
55
+ stub_env (~> 1.0)
46
56
 
47
57
  BUNDLED WITH
48
- 1.17.2
58
+ 2.3.24
data/README.md CHANGED
@@ -8,6 +8,8 @@ It's based on https://github.com/TEA-ebook/sidekiq-reliable-fetch.
8
8
 
9
9
  **IMPORTANT NOTE:** Since version `0.7.0` this gem works only with `sidekiq >= 6.1` (which introduced Fetch API breaking changes). Please use version `~> 0.5` if you use older version of the `sidekiq` .
10
10
 
11
+ **UPGRADE NOTE:** If upgrading from 0.7.0, strongly consider a full deployed step on 0.7.1 before 0.8.0; that fixes a bug in the queue name validation that will hit if sidekiq nodes running 0.7.0 see working queues named by 0.8.0. See https://gitlab.com/gitlab-org/sidekiq-reliable-fetch/-/merge_requests/22
12
+
11
13
  There are two strategies implemented: [Reliable fetch](http://redis.io/commands/rpoplpush#pattern-reliable-queue) using `rpoplpush` command and
12
14
  semi-reliable fetch that uses regular `brpop` and `lpush` to pick the job and put it to working queue. The main benefit of "Reliable" strategy is that `rpoplpush` is atomic, eliminating a race condition in which jobs can be lost.
13
15
  However, it comes at a cost because `rpoplpush` can't watch multiple lists at the same time so we need to iterate over the entire queue list which significantly increases pressure on Redis when there are more than a few queues. The "semi-reliable" strategy is much more reliable than the default Sidekiq fetcher, though. Compared to the reliable fetch strategy, it does not increase pressure on Redis significantly.
@@ -44,11 +46,11 @@ Sidekiq.configure_server do |config|
44
46
  end
45
47
  ```
46
48
 
47
- There is an additional parameter `config.options[:semi_reliable_fetch]` you can use to switch between two strategies:
49
+ There is an additional parameter `config[:semi_reliable_fetch]` you can use to switch between two strategies:
48
50
 
49
51
  ```ruby
50
52
  Sidekiq.configure_server do |config|
51
- config.options[:semi_reliable_fetch] = true # Default value is false
53
+ config[:semi_reliable_fetch] = true # Default value is false
52
54
 
53
55
  Sidekiq::ReliableFetch.setup_reliable_fetch!(config)
54
56
  end
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'gitlab-sidekiq-fetcher'
3
- s.version = '0.7.1'
3
+ s.version = '0.9.0'
4
4
  s.authors = ['TEA', 'GitLab']
5
5
  s.email = 'valery@gitlab.com'
6
6
  s.license = 'LGPL-3.0'
@@ -11,4 +11,5 @@ Gem::Specification.new do |s|
11
11
  s.files = `git ls-files`.split($\)
12
12
  s.test_files = []
13
13
  s.add_dependency 'sidekiq', '~> 6.1'
14
+ s.add_runtime_dependency 'json', '>= 2.5'
14
15
  end
@@ -21,6 +21,10 @@ module Sidekiq
21
21
  # How much time a job can be interrupted
22
22
  DEFAULT_MAX_RETRIES_AFTER_INTERRUPTION = 3
23
23
 
24
+ # Regexes for matching working queue keys
25
+ WORKING_QUEUE_REGEX = /#{WORKING_QUEUE_PREFIX}:(queue:.*):([^:]*:[0-9]*:[0-9a-f]*)\z/.freeze
26
+ LEGACY_WORKING_QUEUE_REGEX = /#{WORKING_QUEUE_PREFIX}:(queue:.*):([^:]*:[0-9]*)\z/.freeze
27
+
24
28
  UnitOfWork = Struct.new(:queue, :job) do
25
29
  def acknowledge
26
30
  Sidekiq.redis { |conn| conn.lrem(Sidekiq::BaseReliableFetch.working_queue_name(queue), 1, job) }
@@ -41,13 +45,15 @@ module Sidekiq
41
45
  end
42
46
 
43
47
  def self.setup_reliable_fetch!(config)
44
- fetch_strategy = if config.options[:semi_reliable_fetch]
48
+ config = config.options unless config.respond_to?(:[])
49
+
50
+ fetch_strategy = if config[:semi_reliable_fetch]
45
51
  Sidekiq::SemiReliableFetch
46
52
  else
47
53
  Sidekiq::ReliableFetch
48
54
  end
49
55
 
50
- config.options[:fetch] = fetch_strategy.new(config.options)
56
+ config[:fetch] = fetch_strategy.new(config)
51
57
 
52
58
  Sidekiq.logger.info('GitLab reliable fetch activated!')
53
59
 
@@ -70,32 +76,36 @@ module Sidekiq
70
76
  end
71
77
  end
72
78
 
73
- def self.pid
74
- @pid ||= ::Process.pid
79
+ def self.hostname
80
+ Socket.gethostname
81
+ end
82
+
83
+ def self.process_nonce
84
+ @@process_nonce ||= SecureRandom.hex(6)
75
85
  end
76
86
 
77
- def self.hostname
78
- @hostname ||= Socket.gethostname
87
+ def self.identity
88
+ @@identity ||= "#{hostname}:#{$$}:#{process_nonce}"
79
89
  end
80
90
 
81
91
  def self.heartbeat
82
92
  Sidekiq.redis do |conn|
83
- conn.set(heartbeat_key(hostname, pid), 1, ex: HEARTBEAT_LIFESPAN)
93
+ conn.set(heartbeat_key(identity), 1, ex: HEARTBEAT_LIFESPAN)
84
94
  end
85
95
 
86
- Sidekiq.logger.debug("Heartbeat for hostname: #{hostname} and pid: #{pid}")
96
+ Sidekiq.logger.debug("Heartbeat for #{identity}")
87
97
  end
88
98
 
89
- def self.worker_dead?(hostname, pid, conn)
90
- !conn.get(heartbeat_key(hostname, pid))
99
+ def self.worker_dead?(identity, conn)
100
+ !conn.get(heartbeat_key(identity))
91
101
  end
92
102
 
93
- def self.heartbeat_key(hostname, pid)
94
- "reliable-fetcher-heartbeat-#{hostname}-#{pid}"
103
+ def self.heartbeat_key(identity)
104
+ "reliable-fetcher-heartbeat-#{identity.gsub(':', '-')}"
95
105
  end
96
106
 
97
107
  def self.working_queue_name(queue)
98
- "#{WORKING_QUEUE_PREFIX}:#{queue}:#{hostname}:#{pid}"
108
+ "#{WORKING_QUEUE_PREFIX}:#{queue}:#{identity}"
99
109
  end
100
110
 
101
111
  attr_reader :cleanup_interval, :last_try_to_take_lease_at, :lease_interval,
@@ -105,6 +115,8 @@ module Sidekiq
105
115
  def initialize(options)
106
116
  raise ArgumentError, 'missing queue list' unless options[:queues]
107
117
 
118
+ @config = options
119
+ @interrupted_set = Sidekiq::InterruptedSet.new
108
120
  @cleanup_interval = options.fetch(:cleanup_interval, DEFAULT_CLEANUP_INTERVAL)
109
121
  @lease_interval = options.fetch(:lease_interval, DEFAULT_LEASE_INTERVAL)
110
122
  @last_try_to_take_lease_at = 0
@@ -166,6 +178,19 @@ module Sidekiq
166
178
  )
167
179
  end
168
180
 
181
+ def extract_queue_and_identity(key)
182
+ # New identity format is "{hostname}:{pid}:{randomhex}
183
+ # Old identity format is "{hostname}:{pid}"
184
+ # Queue names may also have colons (namespaced).
185
+ # Expressing this in a single regex is unreadable
186
+
187
+ # Test the newer expected format first, only checking the older if necessary
188
+ original_queue, identity = key.scan(WORKING_QUEUE_REGEX).flatten
189
+ return original_queue, identity unless original_queue.nil? || identity.nil?
190
+
191
+ key.scan(LEGACY_WORKING_QUEUE_REGEX).flatten
192
+ end
193
+
169
194
  # Detect "old" jobs and requeue them because the worker they were assigned
170
195
  # to probably failed miserably.
171
196
  def clean_working_queues!
@@ -173,19 +198,16 @@ module Sidekiq
173
198
 
174
199
  Sidekiq.redis do |conn|
175
200
  conn.scan_each(match: "#{WORKING_QUEUE_PREFIX}:queue:*", count: SCAN_COUNT) do |key|
176
- # Example: "working:name_of_the_job:queue:{hostname}:{PID}"
177
- hostname, pid = key.scan(/:([^:]*):([0-9]*)\z/).flatten
201
+ original_queue, identity = extract_queue_and_identity(key)
178
202
 
179
- next if hostname.nil? || pid.nil?
203
+ next if original_queue.nil? || identity.nil?
180
204
 
181
- clean_working_queue!(key) if self.class.worker_dead?(hostname, pid, conn)
205
+ clean_working_queue!(original_queue, key) if self.class.worker_dead?(identity, conn)
182
206
  end
183
207
  end
184
208
  end
185
209
 
186
- def clean_working_queue!(working_queue)
187
- original_queue = working_queue.gsub(/#{WORKING_QUEUE_PREFIX}:|:[^:]*:[0-9]*\z/, '')
188
-
210
+ def clean_working_queue!(original_queue, working_queue)
189
211
  Sidekiq.redis do |conn|
190
212
  while job = conn.rpop(working_queue)
191
213
  preprocess_interrupted_job(job, original_queue)
@@ -207,7 +229,7 @@ module Sidekiq
207
229
  rescue NameError
208
230
  end
209
231
 
210
- max_retries_after_interruption ||= Sidekiq.options[:max_retries_after_interruption]
232
+ max_retries_after_interruption ||= @config[:max_retries_after_interruption]
211
233
  max_retries_after_interruption ||= DEFAULT_MAX_RETRIES_AFTER_INTERRUPTION
212
234
  max_retries_after_interruption
213
235
  end
@@ -220,7 +242,7 @@ module Sidekiq
220
242
  )
221
243
 
222
244
  job = Sidekiq.dump_json(msg)
223
- Sidekiq::InterruptedSet.new.put(job, connection: multi_connection)
245
+ @interrupted_set.put(job, connection: multi_connection)
224
246
  end
225
247
 
226
248
  # Yield block with an existing connection or creates another one
@@ -37,11 +37,15 @@ module Sidekiq
37
37
  end
38
38
 
39
39
  def self.max_jobs
40
- Sidekiq.options[:interrupted_max_jobs] || DEFAULT_MAX_CAPACITY
40
+ options[:interrupted_max_jobs] || DEFAULT_MAX_CAPACITY
41
41
  end
42
42
 
43
43
  def self.timeout
44
- Sidekiq.options[:interrupted_timeout_in_seconds] || DEFAULT_MAX_TIMEOUT
44
+ options[:interrupted_timeout_in_seconds] || DEFAULT_MAX_TIMEOUT
45
+ end
46
+
47
+ def self.options
48
+ Sidekiq.respond_to?(:[]) ? Sidekiq : Sidekiq.options
45
49
  end
46
50
  end
47
51
  end
@@ -5,14 +5,14 @@ module Sidekiq
5
5
  # We want the fetch operation to timeout every few seconds so the thread
6
6
  # can check if the process is shutting down. This constant is only used
7
7
  # for semi-reliable fetch.
8
- SEMI_RELIABLE_FETCH_TIMEOUT = 2 # seconds
8
+ DEFAULT_SEMI_RELIABLE_FETCH_TIMEOUT = 2 # seconds
9
9
 
10
10
  def initialize(options)
11
11
  super
12
12
 
13
13
  if strictly_ordered_queues
14
14
  @queues = @queues.uniq
15
- @queues << SEMI_RELIABLE_FETCH_TIMEOUT
15
+ @queues << { timeout: semi_reliable_fetch_timeout }
16
16
  end
17
17
  end
18
18
 
@@ -36,9 +36,13 @@ module Sidekiq
36
36
  @queues
37
37
  else
38
38
  queues = @queues.shuffle.uniq
39
- queues << SEMI_RELIABLE_FETCH_TIMEOUT
39
+ queues << { timeout: semi_reliable_fetch_timeout }
40
40
  queues
41
41
  end
42
42
  end
43
+
44
+ def semi_reliable_fetch_timeout
45
+ @semi_reliable_fetch_timeout ||= ENV['SIDEKIQ_SEMI_RELIABLE_FETCH_TIMEOUT']&.to_i || DEFAULT_SEMI_RELIABLE_FETCH_TIMEOUT
46
+ end
43
47
  end
44
48
  end
@@ -65,7 +65,7 @@ describe Sidekiq::BaseReliableFetch do
65
65
  end
66
66
 
67
67
  it 'does not put jobs into interrupted queue if it is disabled' do
68
- Sidekiq.options[:max_retries_after_interruption] = -1
68
+ options[:max_retries_after_interruption] = -1
69
69
 
70
70
  uow = described_class::UnitOfWork
71
71
  interrupted_job = Sidekiq.dump_json(class: 'Bob', args: [1, 2, 'foo'], interrupted_count: 3)
@@ -75,8 +75,6 @@ describe Sidekiq::BaseReliableFetch do
75
75
  expect(queue1.size).to eq 2
76
76
  expect(queue2.size).to eq 1
77
77
  expect(Sidekiq::InterruptedSet.new.size).to eq 0
78
-
79
- Sidekiq.options[:max_retries_after_interruption] = 3
80
78
  end
81
79
  end
82
80
 
@@ -88,7 +86,7 @@ describe Sidekiq::BaseReliableFetch do
88
86
  Sidekiq.redis do |conn|
89
87
  sleep 0.2 # Give the time to heartbeat thread to make a loop
90
88
 
91
- heartbeat_key = described_class.heartbeat_key(Socket.gethostname, ::Process.pid)
89
+ heartbeat_key = described_class.heartbeat_key(described_class.identity)
92
90
  heartbeat = conn.get(heartbeat_key)
93
91
 
94
92
  expect(heartbeat).not_to be_nil
@@ -7,111 +7,163 @@ shared_examples 'a Sidekiq fetcher' do
7
7
  let(:job) { Sidekiq.dump_json(class: 'Bob', args: [1, 2, 'foo']) }
8
8
  let(:fetcher) { described_class.new(queues: queues) }
9
9
 
10
- it 'retrieves the job and puts it to working queue' do
11
- Sidekiq.redis { |conn| conn.rpush('queue:assigned', job) }
12
-
13
- uow = fetcher.retrieve_work
10
+ it 'does not clean up orphaned jobs more than once per cleanup interval' do
11
+ Sidekiq.redis = Sidekiq::RedisConnection.create(url: REDIS_URL, size: 10)
14
12
 
15
- expect(working_queue_size('assigned')).to eq 1
16
- expect(uow.queue_name).to eq 'assigned'
17
- expect(uow.job).to eq job
18
- expect(Sidekiq::Queue.new('assigned').size).to eq 0
19
- end
13
+ expect(fetcher).to receive(:clean_working_queues!).once
20
14
 
21
- it 'does not retrieve a job from foreign queue' do
22
- Sidekiq.redis { |conn| conn.rpush('queue:not_assigned', job) }
15
+ threads = 10.times.map do
16
+ Thread.new do
17
+ fetcher.retrieve_work
18
+ end
19
+ end
23
20
 
24
- expect(fetcher.retrieve_work).to be_nil
21
+ threads.map(&:join)
25
22
  end
26
23
 
27
- it 'requeues jobs from dead working queue with incremented interrupted_count' do
24
+ it 'retrieves by order when strictly order is enabled' do
25
+ fetcher = described_class.new(strict: true, queues: ['first', 'second'])
26
+
28
27
  Sidekiq.redis do |conn|
29
- conn.rpush(other_process_working_queue_name('assigned'), job)
28
+ conn.rpush('queue:first', ['msg3', 'msg2', 'msg1'])
29
+ conn.rpush('queue:second', 'msg4')
30
30
  end
31
31
 
32
- expected_job = Sidekiq.load_json(job)
33
- expected_job['interrupted_count'] = 1
34
- expected_job = Sidekiq.dump_json(expected_job)
32
+ jobs = (1..4).map { fetcher.retrieve_work.job }
35
33
 
36
- uow = fetcher.retrieve_work
34
+ expect(jobs).to eq ['msg1', 'msg2', 'msg3', 'msg4']
35
+ end
37
36
 
38
- expect(uow.job).to eq expected_job
37
+ it 'does not starve any queue when queues are not strictly ordered' do
38
+ fetcher = described_class.new(queues: ['first', 'second'])
39
39
 
40
40
  Sidekiq.redis do |conn|
41
- expect(conn.llen(other_process_working_queue_name('assigned'))).to eq 0
41
+ conn.rpush('queue:first', (1..200).map { |i| "msg#{i}" })
42
+ conn.rpush('queue:second', 'this_job_should_not_stuck')
42
43
  end
44
+
45
+ jobs = (1..100).map { fetcher.retrieve_work.job }
46
+
47
+ expect(jobs).to include 'this_job_should_not_stuck'
43
48
  end
44
49
 
45
- it 'ignores working queue keys in unknown formats' do
46
- # Add a spurious non-numeric char segment at the end; this simulates any other
47
- # incorrect form in general
48
- malformed_key = "#{other_process_working_queue_name('assigned')}:X"
49
- Sidekiq.redis do |conn|
50
- conn.rpush(malformed_key, job)
51
- end
50
+ shared_examples "basic queue handling" do |queue|
51
+ let (:fetcher) { described_class.new(queues: [queue]) }
52
52
 
53
- uow = fetcher.retrieve_work
53
+ it 'retrieves the job and puts it to working queue' do
54
+ Sidekiq.redis { |conn| conn.rpush("queue:#{queue}", job) }
54
55
 
55
- Sidekiq.redis do |conn|
56
- expect(conn.llen(malformed_key)).to eq 1
56
+ uow = fetcher.retrieve_work
57
+
58
+ expect(working_queue_size(queue)).to eq 1
59
+ expect(uow.queue_name).to eq queue
60
+ expect(uow.job).to eq job
61
+ expect(Sidekiq::Queue.new(queue).size).to eq 0
57
62
  end
58
- end
59
63
 
64
+ it 'does not retrieve a job from foreign queue' do
65
+ Sidekiq.redis { |conn| conn.rpush("'queue:#{queue}:not", job) }
66
+ expect(fetcher.retrieve_work).to be_nil
60
67
 
61
- it 'does not requeue jobs from live working queue' do
62
- working_queue = live_other_process_working_queue_name('assigned')
68
+ Sidekiq.redis { |conn| conn.rpush("'queue:not_#{queue}", job) }
69
+ expect(fetcher.retrieve_work).to be_nil
63
70
 
64
- Sidekiq.redis do |conn|
65
- conn.rpush(working_queue, job)
71
+ Sidekiq.redis { |conn| conn.rpush("'queue:random_name", job) }
72
+ expect(fetcher.retrieve_work).to be_nil
66
73
  end
67
74
 
68
- uow = fetcher.retrieve_work
75
+ it 'requeues jobs from legacy dead working queue with incremented interrupted_count' do
76
+ Sidekiq.redis do |conn|
77
+ conn.rpush(legacy_other_process_working_queue_name(queue), job)
78
+ end
69
79
 
70
- expect(uow).to be_nil
80
+ expected_job = Sidekiq.load_json(job)
81
+ expected_job['interrupted_count'] = 1
82
+ expected_job = Sidekiq.dump_json(expected_job)
71
83
 
72
- Sidekiq.redis do |conn|
73
- expect(conn.llen(working_queue)).to eq 1
84
+ uow = fetcher.retrieve_work
85
+
86
+ expect(uow).to_not be_nil
87
+ expect(uow.job).to eq expected_job
88
+
89
+ Sidekiq.redis do |conn|
90
+ expect(conn.llen(legacy_other_process_working_queue_name(queue))).to eq 0
91
+ end
74
92
  end
75
- end
76
93
 
77
- it 'does not clean up orphaned jobs more than once per cleanup interval' do
78
- Sidekiq.redis = Sidekiq::RedisConnection.create(url: REDIS_URL, size: 10)
94
+ it 'ignores working queue keys in unknown formats' do
95
+ # Add a spurious non-numeric char segment at the end; this simulates any other
96
+ # incorrect form in general
97
+ malformed_key = "#{other_process_working_queue_name(queue)}:X"
98
+ Sidekiq.redis do |conn|
99
+ conn.rpush(malformed_key, job)
100
+ end
79
101
 
80
- expect(fetcher).to receive(:clean_working_queues!).once
102
+ uow = fetcher.retrieve_work
81
103
 
82
- threads = 10.times.map do
83
- Thread.new do
84
- fetcher.retrieve_work
104
+ Sidekiq.redis do |conn|
105
+ expect(conn.llen(malformed_key)).to eq 1
85
106
  end
86
107
  end
87
108
 
88
- threads.map(&:join)
89
- end
109
+ it 'requeues jobs from dead working queue with incremented interrupted_count' do
110
+ Sidekiq.redis do |conn|
111
+ conn.rpush(other_process_working_queue_name(queue), job)
112
+ end
90
113
 
91
- it 'retrieves by order when strictly order is enabled' do
92
- fetcher = described_class.new(strict: true, queues: ['first', 'second'])
114
+ expected_job = Sidekiq.load_json(job)
115
+ expected_job['interrupted_count'] = 1
116
+ expected_job = Sidekiq.dump_json(expected_job)
93
117
 
94
- Sidekiq.redis do |conn|
95
- conn.rpush('queue:first', ['msg3', 'msg2', 'msg1'])
96
- conn.rpush('queue:second', 'msg4')
118
+ uow = fetcher.retrieve_work
119
+
120
+ expect(uow).to_not be_nil
121
+ expect(uow.job).to eq expected_job
122
+
123
+ Sidekiq.redis do |conn|
124
+ expect(conn.llen(other_process_working_queue_name(queue))).to eq 0
125
+ end
97
126
  end
98
127
 
99
- jobs = (1..4).map { fetcher.retrieve_work.job }
128
+ it 'does not requeue jobs from live working queue' do
129
+ working_queue = live_other_process_working_queue_name(queue)
100
130
 
101
- expect(jobs).to eq ['msg1', 'msg2', 'msg3', 'msg4']
102
- end
131
+ Sidekiq.redis do |conn|
132
+ conn.rpush(working_queue, job)
133
+ end
103
134
 
104
- it 'does not starve any queue when queues are not strictly ordered' do
105
- fetcher = described_class.new(queues: ['first', 'second'])
135
+ uow = fetcher.retrieve_work
106
136
 
107
- Sidekiq.redis do |conn|
108
- conn.rpush('queue:first', (1..200).map { |i| "msg#{i}" })
109
- conn.rpush('queue:second', 'this_job_should_not_stuck')
137
+ expect(uow).to be_nil
138
+
139
+ Sidekiq.redis do |conn|
140
+ expect(conn.llen(working_queue)).to eq 1
141
+ end
110
142
  end
143
+ end
111
144
 
112
- jobs = (1..100).map { fetcher.retrieve_work.job }
145
+ context 'with various queues' do
146
+ %w[assigned namespace:assigned namespace:deeper:assigned].each do |queue|
147
+ it_behaves_like "basic queue handling", queue
148
+ end
149
+ end
113
150
 
114
- expect(jobs).to include 'this_job_should_not_stuck'
151
+ context 'with short cleanup interval' do
152
+ let(:short_interval) { 1 }
153
+ let(:fetcher) { described_class.new(queues: queues, lease_interval: short_interval, cleanup_interval: short_interval) }
154
+
155
+ it 'requeues when there is no heartbeat' do
156
+ Sidekiq.redis { |conn| conn.rpush('queue:assigned', job) }
157
+ # Use of retrieve_work twice with a sleep ensures we have exercised the
158
+ # `identity` method to create the working queue key name and that it
159
+ # matches the patterns used in the cleanup
160
+ uow = fetcher.retrieve_work
161
+ sleep(short_interval + 1)
162
+ uow = fetcher.retrieve_work
163
+
164
+ # Will only receive a UnitOfWork if the job was detected as failed and requeued
165
+ expect(uow).to_not be_nil
166
+ end
115
167
  end
116
168
  end
117
169
  end
@@ -122,17 +174,22 @@ def working_queue_size(queue_name)
122
174
  end
123
175
  end
124
176
 
125
- def other_process_working_queue_name(queue)
177
+ def legacy_other_process_working_queue_name(queue)
126
178
  "#{Sidekiq::BaseReliableFetch::WORKING_QUEUE_PREFIX}:queue:#{queue}:#{Socket.gethostname}:#{::Process.pid + 1}"
127
179
  end
128
180
 
181
+ def other_process_working_queue_name(queue)
182
+ "#{Sidekiq::BaseReliableFetch::WORKING_QUEUE_PREFIX}:queue:#{queue}:#{Socket.gethostname}:#{::Process.pid + 1}:#{::SecureRandom.hex(6)}"
183
+ end
184
+
129
185
  def live_other_process_working_queue_name(queue)
130
186
  pid = ::Process.pid + 1
131
187
  hostname = Socket.gethostname
188
+ nonce = SecureRandom.hex(6)
132
189
 
133
190
  Sidekiq.redis do |conn|
134
- conn.set(Sidekiq::BaseReliableFetch.heartbeat_key(hostname, pid), 1)
191
+ conn.set(Sidekiq::BaseReliableFetch.heartbeat_key("#{hostname}-#{pid}-#{nonce}"), 1)
135
192
  end
136
193
 
137
- "#{Sidekiq::BaseReliableFetch::WORKING_QUEUE_PREFIX}:queue:#{queue}:#{hostname}:#{pid}"
194
+ "#{Sidekiq::BaseReliableFetch::WORKING_QUEUE_PREFIX}:queue:#{queue}:#{hostname}:#{pid}:#{nonce}"
138
195
  end
@@ -5,4 +5,39 @@ require 'sidekiq/semi_reliable_fetch'
5
5
 
6
6
  describe Sidekiq::SemiReliableFetch do
7
7
  include_examples 'a Sidekiq fetcher'
8
+
9
+ describe '#retrieve_work' do
10
+ context 'timeout config' do
11
+ let(:queues) { ['stuff_to_do'] }
12
+ let(:fetcher) { described_class.new(queues: queues) }
13
+
14
+ before do
15
+ stub_env('SIDEKIQ_SEMI_RELIABLE_FETCH_TIMEOUT', timeout)
16
+ end
17
+
18
+ context 'when the timeout is not configured' do
19
+ let(:timeout) { nil }
20
+
21
+ it 'brpops with the default timeout timeout' do
22
+ Sidekiq.redis do |connection|
23
+ expect(connection).to receive(:brpop).with("queue:stuff_to_do", { timeout: 2 }).once.and_call_original
24
+
25
+ fetcher.retrieve_work
26
+ end
27
+ end
28
+ end
29
+
30
+ context 'when the timeout is set in the env' do
31
+ let(:timeout) { '5' }
32
+
33
+ it 'brpops with the default timeout timeout' do
34
+ Sidekiq.redis do |connection|
35
+ expect(connection).to receive(:brpop).with("queue:stuff_to_do", { timeout: 5 }).once.and_call_original
36
+
37
+ fetcher.retrieve_work
38
+ end
39
+ end
40
+ end
41
+ end
42
+ end
8
43
  end
data/spec/spec_helper.rb CHANGED
@@ -1,8 +1,8 @@
1
1
  require 'sidekiq'
2
- require 'sidekiq/util'
3
2
  require 'sidekiq/api'
4
3
  require 'pry'
5
4
  require 'simplecov'
5
+ require 'stub_env'
6
6
 
7
7
  SimpleCov.start
8
8
 
@@ -29,6 +29,7 @@ Sidekiq.logger.level = Logger::ERROR
29
29
  #
30
30
  # See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
31
31
  RSpec.configure do |config|
32
+ config.include StubEnv::Helpers
32
33
  # rspec-expectations config goes here. You can use an alternate
33
34
  # assertion/expectation library such as wrong or the stdlib/minitest
34
35
  # assertions if you prefer.
data/tests/README.md CHANGED
@@ -1,7 +1,7 @@
1
1
  # How to run reliability tests
2
2
 
3
3
  ```
4
- cd reliability_test
4
+ cd tests/reliability
5
5
  bundle exec ruby reliability_test.rb
6
6
  ```
7
7
 
@@ -7,13 +7,13 @@ TEST_CLEANUP_INTERVAL = 20
7
7
  TEST_LEASE_INTERVAL = 5
8
8
 
9
9
  Sidekiq.configure_server do |config|
10
- config.options[:semi_reliable_fetch] = true
10
+ config[:semi_reliable_fetch] = true
11
11
 
12
12
  # We need to override these parameters to not wait too long
13
13
  # The default values are good for production use only
14
14
  # These will be ignored for :basic
15
- config.options[:cleanup_interval] = TEST_CLEANUP_INTERVAL
16
- config.options[:lease_interval] = TEST_LEASE_INTERVAL
15
+ config[:cleanup_interval] = TEST_CLEANUP_INTERVAL
16
+ config[:lease_interval] = TEST_LEASE_INTERVAL
17
17
 
18
18
  Sidekiq::ReliableFetch.setup_reliable_fetch!(config)
19
19
  end
@@ -16,13 +16,13 @@ WAIT_CLEANUP = TEST_CLEANUP_INTERVAL +
16
16
 
17
17
  Sidekiq.configure_server do |config|
18
18
  if %i[semi reliable].include?(JOB_FETCHER)
19
- config.options[:semi_reliable_fetch] = (JOB_FETCHER == :semi)
19
+ config[:semi_reliable_fetch] = (JOB_FETCHER == :semi)
20
20
 
21
21
  # We need to override these parameters to not wait too long
22
22
  # The default values are good for production use only
23
23
  # These will be ignored for :basic
24
- config.options[:cleanup_interval] = TEST_CLEANUP_INTERVAL
25
- config.options[:lease_interval] = TEST_LEASE_INTERVAL
24
+ config[:cleanup_interval] = TEST_CLEANUP_INTERVAL
25
+ config[:lease_interval] = TEST_LEASE_INTERVAL
26
26
 
27
27
  Sidekiq::ReliableFetch.setup_reliable_fetch!(config)
28
28
  end
@@ -1,7 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'sidekiq'
4
- require 'sidekiq/util'
5
4
  require 'sidekiq/cli'
6
5
  require_relative 'config'
7
6
 
metadata CHANGED
@@ -1,15 +1,15 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gitlab-sidekiq-fetcher
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.1
4
+ version: 0.9.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - TEA
8
8
  - GitLab
9
- autorequire:
9
+ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2021-02-18 00:00:00.000000000 Z
12
+ date: 2022-11-03 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: sidekiq
@@ -25,6 +25,20 @@ dependencies:
25
25
  - - "~>"
26
26
  - !ruby/object:Gem::Version
27
27
  version: '6.1'
28
+ - !ruby/object:Gem::Dependency
29
+ name: json
30
+ requirement: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - ">="
33
+ - !ruby/object:Gem::Version
34
+ version: '2.5'
35
+ type: :runtime
36
+ prerelease: false
37
+ version_requirements: !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - ">="
40
+ - !ruby/object:Gem::Version
41
+ version: '2.5'
28
42
  description: Redis reliable queue pattern implemented in Sidekiq
29
43
  email: valery@gitlab.com
30
44
  executables: []
@@ -34,6 +48,7 @@ files:
34
48
  - ".gitignore"
35
49
  - ".gitlab-ci.yml"
36
50
  - ".rspec"
51
+ - CONTRIBUTING.md
37
52
  - Gemfile
38
53
  - Gemfile.lock
39
54
  - LICENSE
@@ -63,7 +78,7 @@ homepage: https://gitlab.com/gitlab-org/sidekiq-reliable-fetch/
63
78
  licenses:
64
79
  - LGPL-3.0
65
80
  metadata: {}
66
- post_install_message:
81
+ post_install_message:
67
82
  rdoc_options: []
68
83
  require_paths:
69
84
  - lib
@@ -78,8 +93,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
78
93
  - !ruby/object:Gem::Version
79
94
  version: '0'
80
95
  requirements: []
81
- rubygems_version: 3.1.4
82
- signing_key:
96
+ rubygems_version: 3.2.22
97
+ signing_key:
83
98
  specification_version: 4
84
99
  summary: Reliable fetch extension for Sidekiq
85
100
  test_files: []