gitlab-sidekiq-fetcher 0.7.1 → 0.9.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitlab-ci.yml +21 -1
- data/CONTRIBUTING.md +41 -0
- data/Gemfile +3 -1
- data/Gemfile.lock +19 -9
- data/README.md +4 -2
- data/gitlab-sidekiq-fetcher.gemspec +2 -1
- data/lib/sidekiq/base_reliable_fetch.rb +44 -22
- data/lib/sidekiq/interrupted_set.rb +6 -2
- data/lib/sidekiq/semi_reliable_fetch.rb +7 -3
- data/spec/base_reliable_fetch_spec.rb +2 -4
- data/spec/fetch_shared_examples.rb +123 -66
- data/spec/semi_reliable_fetch_spec.rb +35 -0
- data/spec/spec_helper.rb +2 -1
- data/tests/README.md +1 -1
- data/tests/interruption/config.rb +3 -3
- data/tests/reliability/config.rb +3 -3
- data/tests/reliability/reliability_test.rb +0 -1
- metadata +21 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7f264e7d628cba58e996b5e2428e81a6e2d6a4921738bd5ade6fd9c3637fa8ba
|
4
|
+
data.tar.gz: 588f7a6e9a5d24d229bc6dbc7d0a6ca4cb9ec96a517bd9015659ed5b3c67eb8e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 955716fcccc6be01e941e1e366268c78d8179577d007e324bb68a56c7741ee6045e3fc7e66dab1b58d794f5c22ffc2c2af400c95dbe61ad697e8e5685a753eae
|
7
|
+
data.tar.gz: df0fd2c4c72a8401e2f0ac7f8eb3e7f5f47d1fa75da4904e0a0b67c4d80a57a47334e4bdf72728e830f756bbbb6b004412067ad36e8a66a9605471ddc010898f
|
data/.gitlab-ci.yml
CHANGED
@@ -1,4 +1,8 @@
|
|
1
|
-
|
1
|
+
variables:
|
2
|
+
RUBY_VERSION: "2.7"
|
3
|
+
|
4
|
+
default:
|
5
|
+
image: ruby:${RUBY_VERSION}
|
2
6
|
|
3
7
|
before_script:
|
4
8
|
- ruby -v
|
@@ -21,6 +25,10 @@ rspec:
|
|
21
25
|
when: always
|
22
26
|
paths:
|
23
27
|
- coverage/
|
28
|
+
parallel:
|
29
|
+
matrix:
|
30
|
+
- RUBY_VERSION: "2.7"
|
31
|
+
- RUBY_VERSION: "3.0"
|
24
32
|
|
25
33
|
.integration:
|
26
34
|
stage: test
|
@@ -29,6 +37,10 @@ rspec:
|
|
29
37
|
- bundle exec ruby reliability_test.rb
|
30
38
|
services:
|
31
39
|
- redis:alpine
|
40
|
+
parallel:
|
41
|
+
matrix:
|
42
|
+
- RUBY_VERSION: "2.7"
|
43
|
+
- RUBY_VERSION: "3.0"
|
32
44
|
|
33
45
|
integration_semi:
|
34
46
|
extends: .integration
|
@@ -53,6 +65,10 @@ kill_interruption:
|
|
53
65
|
- bundle exec ruby test_kill_signal.rb
|
54
66
|
services:
|
55
67
|
- redis:alpine
|
68
|
+
parallel:
|
69
|
+
matrix:
|
70
|
+
- RUBY_VERSION: "2.7"
|
71
|
+
- RUBY_VERSION: "3.0"
|
56
72
|
|
57
73
|
term_interruption:
|
58
74
|
stage: test
|
@@ -61,6 +77,10 @@ term_interruption:
|
|
61
77
|
- bundle exec ruby test_term_signal.rb
|
62
78
|
services:
|
63
79
|
- redis:alpine
|
80
|
+
parallel:
|
81
|
+
matrix:
|
82
|
+
- RUBY_VERSION: "2.7"
|
83
|
+
- RUBY_VERSION: "3.0"
|
64
84
|
|
65
85
|
# rubocop:
|
66
86
|
# script:
|
data/CONTRIBUTING.md
ADDED
@@ -0,0 +1,41 @@
|
|
1
|
+
## Developer Certificate of Origin and License
|
2
|
+
|
3
|
+
By contributing to GitLab B.V., you accept and agree to the following terms and
|
4
|
+
conditions for your present and future contributions submitted to GitLab B.V.
|
5
|
+
Except for the license granted herein to GitLab B.V. and recipients of software
|
6
|
+
distributed by GitLab B.V., you reserve all right, title, and interest in and to
|
7
|
+
your Contributions.
|
8
|
+
|
9
|
+
All contributions are subject to the Developer Certificate of Origin and license set out at [docs.gitlab.com/ce/legal/developer_certificate_of_origin](https://docs.gitlab.com/ce/legal/developer_certificate_of_origin).
|
10
|
+
|
11
|
+
_This notice should stay as the first item in the CONTRIBUTING.md file._
|
12
|
+
|
13
|
+
## Code of conduct
|
14
|
+
|
15
|
+
As contributors and maintainers of this project, we pledge to respect all people
|
16
|
+
who contribute through reporting issues, posting feature requests, updating
|
17
|
+
documentation, submitting pull requests or patches, and other activities.
|
18
|
+
|
19
|
+
We are committed to making participation in this project a harassment-free
|
20
|
+
experience for everyone, regardless of level of experience, gender, gender
|
21
|
+
identity and expression, sexual orientation, disability, personal appearance,
|
22
|
+
body size, race, ethnicity, age, or religion.
|
23
|
+
|
24
|
+
Examples of unacceptable behavior by participants include the use of sexual
|
25
|
+
language or imagery, derogatory comments or personal attacks, trolling, public
|
26
|
+
or private harassment, insults, or other unprofessional conduct.
|
27
|
+
|
28
|
+
Project maintainers have the right and responsibility to remove, edit, or reject
|
29
|
+
comments, commits, code, wiki edits, issues, and other contributions that are
|
30
|
+
not aligned to this Code of Conduct. Project maintainers who do not follow the
|
31
|
+
Code of Conduct may be removed from the project team.
|
32
|
+
|
33
|
+
This code of conduct applies both within project spaces and in public spaces
|
34
|
+
when an individual is representing the project or its community.
|
35
|
+
|
36
|
+
Instances of abusive, harassing, or otherwise unacceptable behavior can be
|
37
|
+
reported by emailing contact@gitlab.com.
|
38
|
+
|
39
|
+
This Code of Conduct is adapted from the [Contributor Covenant](https://contributor-covenant.org), version 1.1.0,
|
40
|
+
available at [https://contributor-covenant.org/version/1/1/0/](https://contributor-covenant.org/version/1/1/0/).
|
41
|
+
|
data/Gemfile
CHANGED
@@ -4,9 +4,11 @@ source "https://rubygems.org"
|
|
4
4
|
|
5
5
|
git_source(:github) { |repo_name| "https://github.com/#{repo_name}" }
|
6
6
|
|
7
|
+
gemspec
|
8
|
+
|
7
9
|
group :test do
|
8
10
|
gem "rspec", '~> 3'
|
9
11
|
gem "pry"
|
10
|
-
gem "sidekiq", '~> 6.1'
|
11
12
|
gem 'simplecov', require: false
|
13
|
+
gem 'stub_env', '~> 1.0'
|
12
14
|
end
|
data/Gemfile.lock
CHANGED
@@ -1,17 +1,24 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
gitlab-sidekiq-fetcher (0.8.0)
|
5
|
+
json (>= 2.5)
|
6
|
+
sidekiq (~> 6.1)
|
7
|
+
|
1
8
|
GEM
|
2
9
|
remote: https://rubygems.org/
|
3
10
|
specs:
|
4
11
|
coderay (1.1.2)
|
5
|
-
connection_pool (2.
|
12
|
+
connection_pool (2.3.0)
|
6
13
|
diff-lcs (1.3)
|
7
14
|
docile (1.3.1)
|
8
|
-
json (2.1
|
15
|
+
json (2.5.1)
|
9
16
|
method_source (0.9.0)
|
10
17
|
pry (0.11.3)
|
11
18
|
coderay (~> 1.1.0)
|
12
19
|
method_source (~> 0.9.0)
|
13
|
-
rack (2.2.
|
14
|
-
redis (4.
|
20
|
+
rack (2.2.4)
|
21
|
+
redis (4.8.0)
|
15
22
|
rspec (3.8.0)
|
16
23
|
rspec-core (~> 3.8.0)
|
17
24
|
rspec-expectations (~> 3.8.0)
|
@@ -25,24 +32,27 @@ GEM
|
|
25
32
|
diff-lcs (>= 1.2.0, < 2.0)
|
26
33
|
rspec-support (~> 3.8.0)
|
27
34
|
rspec-support (3.8.0)
|
28
|
-
sidekiq (6.
|
29
|
-
connection_pool (>= 2.2.
|
35
|
+
sidekiq (6.5.7)
|
36
|
+
connection_pool (>= 2.2.5)
|
30
37
|
rack (~> 2.0)
|
31
|
-
redis (>= 4.
|
38
|
+
redis (>= 4.5.0, < 5)
|
32
39
|
simplecov (0.16.1)
|
33
40
|
docile (~> 1.1)
|
34
41
|
json (>= 1.8, < 3)
|
35
42
|
simplecov-html (~> 0.10.0)
|
36
43
|
simplecov-html (0.10.2)
|
44
|
+
stub_env (1.0.4)
|
45
|
+
rspec (>= 2.0, < 4.0)
|
37
46
|
|
38
47
|
PLATFORMS
|
39
48
|
ruby
|
40
49
|
|
41
50
|
DEPENDENCIES
|
51
|
+
gitlab-sidekiq-fetcher!
|
42
52
|
pry
|
43
53
|
rspec (~> 3)
|
44
|
-
sidekiq (~> 6.1)
|
45
54
|
simplecov
|
55
|
+
stub_env (~> 1.0)
|
46
56
|
|
47
57
|
BUNDLED WITH
|
48
|
-
|
58
|
+
2.3.24
|
data/README.md
CHANGED
@@ -8,6 +8,8 @@ It's based on https://github.com/TEA-ebook/sidekiq-reliable-fetch.
|
|
8
8
|
|
9
9
|
**IMPORTANT NOTE:** Since version `0.7.0` this gem works only with `sidekiq >= 6.1` (which introduced Fetch API breaking changes). Please use version `~> 0.5` if you use older version of the `sidekiq` .
|
10
10
|
|
11
|
+
**UPGRADE NOTE:** If upgrading from 0.7.0, strongly consider a full deployed step on 0.7.1 before 0.8.0; that fixes a bug in the queue name validation that will hit if sidekiq nodes running 0.7.0 see working queues named by 0.8.0. See https://gitlab.com/gitlab-org/sidekiq-reliable-fetch/-/merge_requests/22
|
12
|
+
|
11
13
|
There are two strategies implemented: [Reliable fetch](http://redis.io/commands/rpoplpush#pattern-reliable-queue) using `rpoplpush` command and
|
12
14
|
semi-reliable fetch that uses regular `brpop` and `lpush` to pick the job and put it to working queue. The main benefit of "Reliable" strategy is that `rpoplpush` is atomic, eliminating a race condition in which jobs can be lost.
|
13
15
|
However, it comes at a cost because `rpoplpush` can't watch multiple lists at the same time so we need to iterate over the entire queue list which significantly increases pressure on Redis when there are more than a few queues. The "semi-reliable" strategy is much more reliable than the default Sidekiq fetcher, though. Compared to the reliable fetch strategy, it does not increase pressure on Redis significantly.
|
@@ -44,11 +46,11 @@ Sidekiq.configure_server do |config|
|
|
44
46
|
end
|
45
47
|
```
|
46
48
|
|
47
|
-
There is an additional parameter `config
|
49
|
+
There is an additional parameter `config[:semi_reliable_fetch]` you can use to switch between two strategies:
|
48
50
|
|
49
51
|
```ruby
|
50
52
|
Sidekiq.configure_server do |config|
|
51
|
-
config
|
53
|
+
config[:semi_reliable_fetch] = true # Default value is false
|
52
54
|
|
53
55
|
Sidekiq::ReliableFetch.setup_reliable_fetch!(config)
|
54
56
|
end
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
s.name = 'gitlab-sidekiq-fetcher'
|
3
|
-
s.version = '0.
|
3
|
+
s.version = '0.9.0'
|
4
4
|
s.authors = ['TEA', 'GitLab']
|
5
5
|
s.email = 'valery@gitlab.com'
|
6
6
|
s.license = 'LGPL-3.0'
|
@@ -11,4 +11,5 @@ Gem::Specification.new do |s|
|
|
11
11
|
s.files = `git ls-files`.split($\)
|
12
12
|
s.test_files = []
|
13
13
|
s.add_dependency 'sidekiq', '~> 6.1'
|
14
|
+
s.add_runtime_dependency 'json', '>= 2.5'
|
14
15
|
end
|
@@ -21,6 +21,10 @@ module Sidekiq
|
|
21
21
|
# How much time a job can be interrupted
|
22
22
|
DEFAULT_MAX_RETRIES_AFTER_INTERRUPTION = 3
|
23
23
|
|
24
|
+
# Regexes for matching working queue keys
|
25
|
+
WORKING_QUEUE_REGEX = /#{WORKING_QUEUE_PREFIX}:(queue:.*):([^:]*:[0-9]*:[0-9a-f]*)\z/.freeze
|
26
|
+
LEGACY_WORKING_QUEUE_REGEX = /#{WORKING_QUEUE_PREFIX}:(queue:.*):([^:]*:[0-9]*)\z/.freeze
|
27
|
+
|
24
28
|
UnitOfWork = Struct.new(:queue, :job) do
|
25
29
|
def acknowledge
|
26
30
|
Sidekiq.redis { |conn| conn.lrem(Sidekiq::BaseReliableFetch.working_queue_name(queue), 1, job) }
|
@@ -41,13 +45,15 @@ module Sidekiq
|
|
41
45
|
end
|
42
46
|
|
43
47
|
def self.setup_reliable_fetch!(config)
|
44
|
-
|
48
|
+
config = config.options unless config.respond_to?(:[])
|
49
|
+
|
50
|
+
fetch_strategy = if config[:semi_reliable_fetch]
|
45
51
|
Sidekiq::SemiReliableFetch
|
46
52
|
else
|
47
53
|
Sidekiq::ReliableFetch
|
48
54
|
end
|
49
55
|
|
50
|
-
config
|
56
|
+
config[:fetch] = fetch_strategy.new(config)
|
51
57
|
|
52
58
|
Sidekiq.logger.info('GitLab reliable fetch activated!')
|
53
59
|
|
@@ -70,32 +76,36 @@ module Sidekiq
|
|
70
76
|
end
|
71
77
|
end
|
72
78
|
|
73
|
-
def self.
|
74
|
-
|
79
|
+
def self.hostname
|
80
|
+
Socket.gethostname
|
81
|
+
end
|
82
|
+
|
83
|
+
def self.process_nonce
|
84
|
+
@@process_nonce ||= SecureRandom.hex(6)
|
75
85
|
end
|
76
86
|
|
77
|
-
def self.
|
78
|
-
|
87
|
+
def self.identity
|
88
|
+
@@identity ||= "#{hostname}:#{$$}:#{process_nonce}"
|
79
89
|
end
|
80
90
|
|
81
91
|
def self.heartbeat
|
82
92
|
Sidekiq.redis do |conn|
|
83
|
-
conn.set(heartbeat_key(
|
93
|
+
conn.set(heartbeat_key(identity), 1, ex: HEARTBEAT_LIFESPAN)
|
84
94
|
end
|
85
95
|
|
86
|
-
Sidekiq.logger.debug("Heartbeat for
|
96
|
+
Sidekiq.logger.debug("Heartbeat for #{identity}")
|
87
97
|
end
|
88
98
|
|
89
|
-
def self.worker_dead?(
|
90
|
-
!conn.get(heartbeat_key(
|
99
|
+
def self.worker_dead?(identity, conn)
|
100
|
+
!conn.get(heartbeat_key(identity))
|
91
101
|
end
|
92
102
|
|
93
|
-
def self.heartbeat_key(
|
94
|
-
"reliable-fetcher-heartbeat-#{
|
103
|
+
def self.heartbeat_key(identity)
|
104
|
+
"reliable-fetcher-heartbeat-#{identity.gsub(':', '-')}"
|
95
105
|
end
|
96
106
|
|
97
107
|
def self.working_queue_name(queue)
|
98
|
-
"#{WORKING_QUEUE_PREFIX}:#{queue}:#{
|
108
|
+
"#{WORKING_QUEUE_PREFIX}:#{queue}:#{identity}"
|
99
109
|
end
|
100
110
|
|
101
111
|
attr_reader :cleanup_interval, :last_try_to_take_lease_at, :lease_interval,
|
@@ -105,6 +115,8 @@ module Sidekiq
|
|
105
115
|
def initialize(options)
|
106
116
|
raise ArgumentError, 'missing queue list' unless options[:queues]
|
107
117
|
|
118
|
+
@config = options
|
119
|
+
@interrupted_set = Sidekiq::InterruptedSet.new
|
108
120
|
@cleanup_interval = options.fetch(:cleanup_interval, DEFAULT_CLEANUP_INTERVAL)
|
109
121
|
@lease_interval = options.fetch(:lease_interval, DEFAULT_LEASE_INTERVAL)
|
110
122
|
@last_try_to_take_lease_at = 0
|
@@ -166,6 +178,19 @@ module Sidekiq
|
|
166
178
|
)
|
167
179
|
end
|
168
180
|
|
181
|
+
def extract_queue_and_identity(key)
|
182
|
+
# New identity format is "{hostname}:{pid}:{randomhex}
|
183
|
+
# Old identity format is "{hostname}:{pid}"
|
184
|
+
# Queue names may also have colons (namespaced).
|
185
|
+
# Expressing this in a single regex is unreadable
|
186
|
+
|
187
|
+
# Test the newer expected format first, only checking the older if necessary
|
188
|
+
original_queue, identity = key.scan(WORKING_QUEUE_REGEX).flatten
|
189
|
+
return original_queue, identity unless original_queue.nil? || identity.nil?
|
190
|
+
|
191
|
+
key.scan(LEGACY_WORKING_QUEUE_REGEX).flatten
|
192
|
+
end
|
193
|
+
|
169
194
|
# Detect "old" jobs and requeue them because the worker they were assigned
|
170
195
|
# to probably failed miserably.
|
171
196
|
def clean_working_queues!
|
@@ -173,19 +198,16 @@ module Sidekiq
|
|
173
198
|
|
174
199
|
Sidekiq.redis do |conn|
|
175
200
|
conn.scan_each(match: "#{WORKING_QUEUE_PREFIX}:queue:*", count: SCAN_COUNT) do |key|
|
176
|
-
|
177
|
-
hostname, pid = key.scan(/:([^:]*):([0-9]*)\z/).flatten
|
201
|
+
original_queue, identity = extract_queue_and_identity(key)
|
178
202
|
|
179
|
-
next if
|
203
|
+
next if original_queue.nil? || identity.nil?
|
180
204
|
|
181
|
-
clean_working_queue!(key) if self.class.worker_dead?(
|
205
|
+
clean_working_queue!(original_queue, key) if self.class.worker_dead?(identity, conn)
|
182
206
|
end
|
183
207
|
end
|
184
208
|
end
|
185
209
|
|
186
|
-
def clean_working_queue!(working_queue)
|
187
|
-
original_queue = working_queue.gsub(/#{WORKING_QUEUE_PREFIX}:|:[^:]*:[0-9]*\z/, '')
|
188
|
-
|
210
|
+
def clean_working_queue!(original_queue, working_queue)
|
189
211
|
Sidekiq.redis do |conn|
|
190
212
|
while job = conn.rpop(working_queue)
|
191
213
|
preprocess_interrupted_job(job, original_queue)
|
@@ -207,7 +229,7 @@ module Sidekiq
|
|
207
229
|
rescue NameError
|
208
230
|
end
|
209
231
|
|
210
|
-
max_retries_after_interruption ||=
|
232
|
+
max_retries_after_interruption ||= @config[:max_retries_after_interruption]
|
211
233
|
max_retries_after_interruption ||= DEFAULT_MAX_RETRIES_AFTER_INTERRUPTION
|
212
234
|
max_retries_after_interruption
|
213
235
|
end
|
@@ -220,7 +242,7 @@ module Sidekiq
|
|
220
242
|
)
|
221
243
|
|
222
244
|
job = Sidekiq.dump_json(msg)
|
223
|
-
|
245
|
+
@interrupted_set.put(job, connection: multi_connection)
|
224
246
|
end
|
225
247
|
|
226
248
|
# Yield block with an existing connection or creates another one
|
@@ -37,11 +37,15 @@ module Sidekiq
|
|
37
37
|
end
|
38
38
|
|
39
39
|
def self.max_jobs
|
40
|
-
|
40
|
+
options[:interrupted_max_jobs] || DEFAULT_MAX_CAPACITY
|
41
41
|
end
|
42
42
|
|
43
43
|
def self.timeout
|
44
|
-
|
44
|
+
options[:interrupted_timeout_in_seconds] || DEFAULT_MAX_TIMEOUT
|
45
|
+
end
|
46
|
+
|
47
|
+
def self.options
|
48
|
+
Sidekiq.respond_to?(:[]) ? Sidekiq : Sidekiq.options
|
45
49
|
end
|
46
50
|
end
|
47
51
|
end
|
@@ -5,14 +5,14 @@ module Sidekiq
|
|
5
5
|
# We want the fetch operation to timeout every few seconds so the thread
|
6
6
|
# can check if the process is shutting down. This constant is only used
|
7
7
|
# for semi-reliable fetch.
|
8
|
-
|
8
|
+
DEFAULT_SEMI_RELIABLE_FETCH_TIMEOUT = 2 # seconds
|
9
9
|
|
10
10
|
def initialize(options)
|
11
11
|
super
|
12
12
|
|
13
13
|
if strictly_ordered_queues
|
14
14
|
@queues = @queues.uniq
|
15
|
-
@queues <<
|
15
|
+
@queues << { timeout: semi_reliable_fetch_timeout }
|
16
16
|
end
|
17
17
|
end
|
18
18
|
|
@@ -36,9 +36,13 @@ module Sidekiq
|
|
36
36
|
@queues
|
37
37
|
else
|
38
38
|
queues = @queues.shuffle.uniq
|
39
|
-
queues <<
|
39
|
+
queues << { timeout: semi_reliable_fetch_timeout }
|
40
40
|
queues
|
41
41
|
end
|
42
42
|
end
|
43
|
+
|
44
|
+
def semi_reliable_fetch_timeout
|
45
|
+
@semi_reliable_fetch_timeout ||= ENV['SIDEKIQ_SEMI_RELIABLE_FETCH_TIMEOUT']&.to_i || DEFAULT_SEMI_RELIABLE_FETCH_TIMEOUT
|
46
|
+
end
|
43
47
|
end
|
44
48
|
end
|
@@ -65,7 +65,7 @@ describe Sidekiq::BaseReliableFetch do
|
|
65
65
|
end
|
66
66
|
|
67
67
|
it 'does not put jobs into interrupted queue if it is disabled' do
|
68
|
-
|
68
|
+
options[:max_retries_after_interruption] = -1
|
69
69
|
|
70
70
|
uow = described_class::UnitOfWork
|
71
71
|
interrupted_job = Sidekiq.dump_json(class: 'Bob', args: [1, 2, 'foo'], interrupted_count: 3)
|
@@ -75,8 +75,6 @@ describe Sidekiq::BaseReliableFetch do
|
|
75
75
|
expect(queue1.size).to eq 2
|
76
76
|
expect(queue2.size).to eq 1
|
77
77
|
expect(Sidekiq::InterruptedSet.new.size).to eq 0
|
78
|
-
|
79
|
-
Sidekiq.options[:max_retries_after_interruption] = 3
|
80
78
|
end
|
81
79
|
end
|
82
80
|
|
@@ -88,7 +86,7 @@ describe Sidekiq::BaseReliableFetch do
|
|
88
86
|
Sidekiq.redis do |conn|
|
89
87
|
sleep 0.2 # Give the time to heartbeat thread to make a loop
|
90
88
|
|
91
|
-
heartbeat_key = described_class.heartbeat_key(
|
89
|
+
heartbeat_key = described_class.heartbeat_key(described_class.identity)
|
92
90
|
heartbeat = conn.get(heartbeat_key)
|
93
91
|
|
94
92
|
expect(heartbeat).not_to be_nil
|
@@ -7,111 +7,163 @@ shared_examples 'a Sidekiq fetcher' do
|
|
7
7
|
let(:job) { Sidekiq.dump_json(class: 'Bob', args: [1, 2, 'foo']) }
|
8
8
|
let(:fetcher) { described_class.new(queues: queues) }
|
9
9
|
|
10
|
-
it '
|
11
|
-
Sidekiq.redis
|
12
|
-
|
13
|
-
uow = fetcher.retrieve_work
|
10
|
+
it 'does not clean up orphaned jobs more than once per cleanup interval' do
|
11
|
+
Sidekiq.redis = Sidekiq::RedisConnection.create(url: REDIS_URL, size: 10)
|
14
12
|
|
15
|
-
expect(
|
16
|
-
expect(uow.queue_name).to eq 'assigned'
|
17
|
-
expect(uow.job).to eq job
|
18
|
-
expect(Sidekiq::Queue.new('assigned').size).to eq 0
|
19
|
-
end
|
13
|
+
expect(fetcher).to receive(:clean_working_queues!).once
|
20
14
|
|
21
|
-
|
22
|
-
|
15
|
+
threads = 10.times.map do
|
16
|
+
Thread.new do
|
17
|
+
fetcher.retrieve_work
|
18
|
+
end
|
19
|
+
end
|
23
20
|
|
24
|
-
|
21
|
+
threads.map(&:join)
|
25
22
|
end
|
26
23
|
|
27
|
-
it '
|
24
|
+
it 'retrieves by order when strictly order is enabled' do
|
25
|
+
fetcher = described_class.new(strict: true, queues: ['first', 'second'])
|
26
|
+
|
28
27
|
Sidekiq.redis do |conn|
|
29
|
-
conn.rpush(
|
28
|
+
conn.rpush('queue:first', ['msg3', 'msg2', 'msg1'])
|
29
|
+
conn.rpush('queue:second', 'msg4')
|
30
30
|
end
|
31
31
|
|
32
|
-
|
33
|
-
expected_job['interrupted_count'] = 1
|
34
|
-
expected_job = Sidekiq.dump_json(expected_job)
|
32
|
+
jobs = (1..4).map { fetcher.retrieve_work.job }
|
35
33
|
|
36
|
-
|
34
|
+
expect(jobs).to eq ['msg1', 'msg2', 'msg3', 'msg4']
|
35
|
+
end
|
37
36
|
|
38
|
-
|
37
|
+
it 'does not starve any queue when queues are not strictly ordered' do
|
38
|
+
fetcher = described_class.new(queues: ['first', 'second'])
|
39
39
|
|
40
40
|
Sidekiq.redis do |conn|
|
41
|
-
|
41
|
+
conn.rpush('queue:first', (1..200).map { |i| "msg#{i}" })
|
42
|
+
conn.rpush('queue:second', 'this_job_should_not_stuck')
|
42
43
|
end
|
44
|
+
|
45
|
+
jobs = (1..100).map { fetcher.retrieve_work.job }
|
46
|
+
|
47
|
+
expect(jobs).to include 'this_job_should_not_stuck'
|
43
48
|
end
|
44
49
|
|
45
|
-
|
46
|
-
|
47
|
-
# incorrect form in general
|
48
|
-
malformed_key = "#{other_process_working_queue_name('assigned')}:X"
|
49
|
-
Sidekiq.redis do |conn|
|
50
|
-
conn.rpush(malformed_key, job)
|
51
|
-
end
|
50
|
+
shared_examples "basic queue handling" do |queue|
|
51
|
+
let (:fetcher) { described_class.new(queues: [queue]) }
|
52
52
|
|
53
|
-
|
53
|
+
it 'retrieves the job and puts it to working queue' do
|
54
|
+
Sidekiq.redis { |conn| conn.rpush("queue:#{queue}", job) }
|
54
55
|
|
55
|
-
|
56
|
-
|
56
|
+
uow = fetcher.retrieve_work
|
57
|
+
|
58
|
+
expect(working_queue_size(queue)).to eq 1
|
59
|
+
expect(uow.queue_name).to eq queue
|
60
|
+
expect(uow.job).to eq job
|
61
|
+
expect(Sidekiq::Queue.new(queue).size).to eq 0
|
57
62
|
end
|
58
|
-
end
|
59
63
|
|
64
|
+
it 'does not retrieve a job from foreign queue' do
|
65
|
+
Sidekiq.redis { |conn| conn.rpush("'queue:#{queue}:not", job) }
|
66
|
+
expect(fetcher.retrieve_work).to be_nil
|
60
67
|
|
61
|
-
|
62
|
-
|
68
|
+
Sidekiq.redis { |conn| conn.rpush("'queue:not_#{queue}", job) }
|
69
|
+
expect(fetcher.retrieve_work).to be_nil
|
63
70
|
|
64
|
-
|
65
|
-
|
71
|
+
Sidekiq.redis { |conn| conn.rpush("'queue:random_name", job) }
|
72
|
+
expect(fetcher.retrieve_work).to be_nil
|
66
73
|
end
|
67
74
|
|
68
|
-
|
75
|
+
it 'requeues jobs from legacy dead working queue with incremented interrupted_count' do
|
76
|
+
Sidekiq.redis do |conn|
|
77
|
+
conn.rpush(legacy_other_process_working_queue_name(queue), job)
|
78
|
+
end
|
69
79
|
|
70
|
-
|
80
|
+
expected_job = Sidekiq.load_json(job)
|
81
|
+
expected_job['interrupted_count'] = 1
|
82
|
+
expected_job = Sidekiq.dump_json(expected_job)
|
71
83
|
|
72
|
-
|
73
|
-
|
84
|
+
uow = fetcher.retrieve_work
|
85
|
+
|
86
|
+
expect(uow).to_not be_nil
|
87
|
+
expect(uow.job).to eq expected_job
|
88
|
+
|
89
|
+
Sidekiq.redis do |conn|
|
90
|
+
expect(conn.llen(legacy_other_process_working_queue_name(queue))).to eq 0
|
91
|
+
end
|
74
92
|
end
|
75
|
-
end
|
76
93
|
|
77
|
-
|
78
|
-
|
94
|
+
it 'ignores working queue keys in unknown formats' do
|
95
|
+
# Add a spurious non-numeric char segment at the end; this simulates any other
|
96
|
+
# incorrect form in general
|
97
|
+
malformed_key = "#{other_process_working_queue_name(queue)}:X"
|
98
|
+
Sidekiq.redis do |conn|
|
99
|
+
conn.rpush(malformed_key, job)
|
100
|
+
end
|
79
101
|
|
80
|
-
|
102
|
+
uow = fetcher.retrieve_work
|
81
103
|
|
82
|
-
|
83
|
-
|
84
|
-
fetcher.retrieve_work
|
104
|
+
Sidekiq.redis do |conn|
|
105
|
+
expect(conn.llen(malformed_key)).to eq 1
|
85
106
|
end
|
86
107
|
end
|
87
108
|
|
88
|
-
|
89
|
-
|
109
|
+
it 'requeues jobs from dead working queue with incremented interrupted_count' do
|
110
|
+
Sidekiq.redis do |conn|
|
111
|
+
conn.rpush(other_process_working_queue_name(queue), job)
|
112
|
+
end
|
90
113
|
|
91
|
-
|
92
|
-
|
114
|
+
expected_job = Sidekiq.load_json(job)
|
115
|
+
expected_job['interrupted_count'] = 1
|
116
|
+
expected_job = Sidekiq.dump_json(expected_job)
|
93
117
|
|
94
|
-
|
95
|
-
|
96
|
-
|
118
|
+
uow = fetcher.retrieve_work
|
119
|
+
|
120
|
+
expect(uow).to_not be_nil
|
121
|
+
expect(uow.job).to eq expected_job
|
122
|
+
|
123
|
+
Sidekiq.redis do |conn|
|
124
|
+
expect(conn.llen(other_process_working_queue_name(queue))).to eq 0
|
125
|
+
end
|
97
126
|
end
|
98
127
|
|
99
|
-
jobs
|
128
|
+
it 'does not requeue jobs from live working queue' do
|
129
|
+
working_queue = live_other_process_working_queue_name(queue)
|
100
130
|
|
101
|
-
|
102
|
-
|
131
|
+
Sidekiq.redis do |conn|
|
132
|
+
conn.rpush(working_queue, job)
|
133
|
+
end
|
103
134
|
|
104
|
-
|
105
|
-
fetcher = described_class.new(queues: ['first', 'second'])
|
135
|
+
uow = fetcher.retrieve_work
|
106
136
|
|
107
|
-
|
108
|
-
|
109
|
-
|
137
|
+
expect(uow).to be_nil
|
138
|
+
|
139
|
+
Sidekiq.redis do |conn|
|
140
|
+
expect(conn.llen(working_queue)).to eq 1
|
141
|
+
end
|
110
142
|
end
|
143
|
+
end
|
111
144
|
|
112
|
-
|
145
|
+
context 'with various queues' do
|
146
|
+
%w[assigned namespace:assigned namespace:deeper:assigned].each do |queue|
|
147
|
+
it_behaves_like "basic queue handling", queue
|
148
|
+
end
|
149
|
+
end
|
113
150
|
|
114
|
-
|
151
|
+
context 'with short cleanup interval' do
|
152
|
+
let(:short_interval) { 1 }
|
153
|
+
let(:fetcher) { described_class.new(queues: queues, lease_interval: short_interval, cleanup_interval: short_interval) }
|
154
|
+
|
155
|
+
it 'requeues when there is no heartbeat' do
|
156
|
+
Sidekiq.redis { |conn| conn.rpush('queue:assigned', job) }
|
157
|
+
# Use of retrieve_work twice with a sleep ensures we have exercised the
|
158
|
+
# `identity` method to create the working queue key name and that it
|
159
|
+
# matches the patterns used in the cleanup
|
160
|
+
uow = fetcher.retrieve_work
|
161
|
+
sleep(short_interval + 1)
|
162
|
+
uow = fetcher.retrieve_work
|
163
|
+
|
164
|
+
# Will only receive a UnitOfWork if the job was detected as failed and requeued
|
165
|
+
expect(uow).to_not be_nil
|
166
|
+
end
|
115
167
|
end
|
116
168
|
end
|
117
169
|
end
|
@@ -122,17 +174,22 @@ def working_queue_size(queue_name)
|
|
122
174
|
end
|
123
175
|
end
|
124
176
|
|
125
|
-
def
|
177
|
+
def legacy_other_process_working_queue_name(queue)
|
126
178
|
"#{Sidekiq::BaseReliableFetch::WORKING_QUEUE_PREFIX}:queue:#{queue}:#{Socket.gethostname}:#{::Process.pid + 1}"
|
127
179
|
end
|
128
180
|
|
181
|
+
def other_process_working_queue_name(queue)
|
182
|
+
"#{Sidekiq::BaseReliableFetch::WORKING_QUEUE_PREFIX}:queue:#{queue}:#{Socket.gethostname}:#{::Process.pid + 1}:#{::SecureRandom.hex(6)}"
|
183
|
+
end
|
184
|
+
|
129
185
|
def live_other_process_working_queue_name(queue)
|
130
186
|
pid = ::Process.pid + 1
|
131
187
|
hostname = Socket.gethostname
|
188
|
+
nonce = SecureRandom.hex(6)
|
132
189
|
|
133
190
|
Sidekiq.redis do |conn|
|
134
|
-
conn.set(Sidekiq::BaseReliableFetch.heartbeat_key(hostname
|
191
|
+
conn.set(Sidekiq::BaseReliableFetch.heartbeat_key("#{hostname}-#{pid}-#{nonce}"), 1)
|
135
192
|
end
|
136
193
|
|
137
|
-
"#{Sidekiq::BaseReliableFetch::WORKING_QUEUE_PREFIX}:queue:#{queue}:#{hostname}:#{pid}"
|
194
|
+
"#{Sidekiq::BaseReliableFetch::WORKING_QUEUE_PREFIX}:queue:#{queue}:#{hostname}:#{pid}:#{nonce}"
|
138
195
|
end
|
@@ -5,4 +5,39 @@ require 'sidekiq/semi_reliable_fetch'
|
|
5
5
|
|
6
6
|
describe Sidekiq::SemiReliableFetch do
|
7
7
|
include_examples 'a Sidekiq fetcher'
|
8
|
+
|
9
|
+
describe '#retrieve_work' do
|
10
|
+
context 'timeout config' do
|
11
|
+
let(:queues) { ['stuff_to_do'] }
|
12
|
+
let(:fetcher) { described_class.new(queues: queues) }
|
13
|
+
|
14
|
+
before do
|
15
|
+
stub_env('SIDEKIQ_SEMI_RELIABLE_FETCH_TIMEOUT', timeout)
|
16
|
+
end
|
17
|
+
|
18
|
+
context 'when the timeout is not configured' do
|
19
|
+
let(:timeout) { nil }
|
20
|
+
|
21
|
+
it 'brpops with the default timeout timeout' do
|
22
|
+
Sidekiq.redis do |connection|
|
23
|
+
expect(connection).to receive(:brpop).with("queue:stuff_to_do", { timeout: 2 }).once.and_call_original
|
24
|
+
|
25
|
+
fetcher.retrieve_work
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
context 'when the timeout is set in the env' do
|
31
|
+
let(:timeout) { '5' }
|
32
|
+
|
33
|
+
it 'brpops with the default timeout timeout' do
|
34
|
+
Sidekiq.redis do |connection|
|
35
|
+
expect(connection).to receive(:brpop).with("queue:stuff_to_do", { timeout: 5 }).once.and_call_original
|
36
|
+
|
37
|
+
fetcher.retrieve_work
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
8
43
|
end
|
data/spec/spec_helper.rb
CHANGED
@@ -1,8 +1,8 @@
|
|
1
1
|
require 'sidekiq'
|
2
|
-
require 'sidekiq/util'
|
3
2
|
require 'sidekiq/api'
|
4
3
|
require 'pry'
|
5
4
|
require 'simplecov'
|
5
|
+
require 'stub_env'
|
6
6
|
|
7
7
|
SimpleCov.start
|
8
8
|
|
@@ -29,6 +29,7 @@ Sidekiq.logger.level = Logger::ERROR
|
|
29
29
|
#
|
30
30
|
# See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
|
31
31
|
RSpec.configure do |config|
|
32
|
+
config.include StubEnv::Helpers
|
32
33
|
# rspec-expectations config goes here. You can use an alternate
|
33
34
|
# assertion/expectation library such as wrong or the stdlib/minitest
|
34
35
|
# assertions if you prefer.
|
data/tests/README.md
CHANGED
@@ -7,13 +7,13 @@ TEST_CLEANUP_INTERVAL = 20
|
|
7
7
|
TEST_LEASE_INTERVAL = 5
|
8
8
|
|
9
9
|
Sidekiq.configure_server do |config|
|
10
|
-
config
|
10
|
+
config[:semi_reliable_fetch] = true
|
11
11
|
|
12
12
|
# We need to override these parameters to not wait too long
|
13
13
|
# The default values are good for production use only
|
14
14
|
# These will be ignored for :basic
|
15
|
-
config
|
16
|
-
config
|
15
|
+
config[:cleanup_interval] = TEST_CLEANUP_INTERVAL
|
16
|
+
config[:lease_interval] = TEST_LEASE_INTERVAL
|
17
17
|
|
18
18
|
Sidekiq::ReliableFetch.setup_reliable_fetch!(config)
|
19
19
|
end
|
data/tests/reliability/config.rb
CHANGED
@@ -16,13 +16,13 @@ WAIT_CLEANUP = TEST_CLEANUP_INTERVAL +
|
|
16
16
|
|
17
17
|
Sidekiq.configure_server do |config|
|
18
18
|
if %i[semi reliable].include?(JOB_FETCHER)
|
19
|
-
config
|
19
|
+
config[:semi_reliable_fetch] = (JOB_FETCHER == :semi)
|
20
20
|
|
21
21
|
# We need to override these parameters to not wait too long
|
22
22
|
# The default values are good for production use only
|
23
23
|
# These will be ignored for :basic
|
24
|
-
config
|
25
|
-
config
|
24
|
+
config[:cleanup_interval] = TEST_CLEANUP_INTERVAL
|
25
|
+
config[:lease_interval] = TEST_LEASE_INTERVAL
|
26
26
|
|
27
27
|
Sidekiq::ReliableFetch.setup_reliable_fetch!(config)
|
28
28
|
end
|
metadata
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gitlab-sidekiq-fetcher
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.9.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- TEA
|
8
8
|
- GitLab
|
9
|
-
autorequire:
|
9
|
+
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2022-11-03 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: sidekiq
|
@@ -25,6 +25,20 @@ dependencies:
|
|
25
25
|
- - "~>"
|
26
26
|
- !ruby/object:Gem::Version
|
27
27
|
version: '6.1'
|
28
|
+
- !ruby/object:Gem::Dependency
|
29
|
+
name: json
|
30
|
+
requirement: !ruby/object:Gem::Requirement
|
31
|
+
requirements:
|
32
|
+
- - ">="
|
33
|
+
- !ruby/object:Gem::Version
|
34
|
+
version: '2.5'
|
35
|
+
type: :runtime
|
36
|
+
prerelease: false
|
37
|
+
version_requirements: !ruby/object:Gem::Requirement
|
38
|
+
requirements:
|
39
|
+
- - ">="
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
version: '2.5'
|
28
42
|
description: Redis reliable queue pattern implemented in Sidekiq
|
29
43
|
email: valery@gitlab.com
|
30
44
|
executables: []
|
@@ -34,6 +48,7 @@ files:
|
|
34
48
|
- ".gitignore"
|
35
49
|
- ".gitlab-ci.yml"
|
36
50
|
- ".rspec"
|
51
|
+
- CONTRIBUTING.md
|
37
52
|
- Gemfile
|
38
53
|
- Gemfile.lock
|
39
54
|
- LICENSE
|
@@ -63,7 +78,7 @@ homepage: https://gitlab.com/gitlab-org/sidekiq-reliable-fetch/
|
|
63
78
|
licenses:
|
64
79
|
- LGPL-3.0
|
65
80
|
metadata: {}
|
66
|
-
post_install_message:
|
81
|
+
post_install_message:
|
67
82
|
rdoc_options: []
|
68
83
|
require_paths:
|
69
84
|
- lib
|
@@ -78,8 +93,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
78
93
|
- !ruby/object:Gem::Version
|
79
94
|
version: '0'
|
80
95
|
requirements: []
|
81
|
-
rubygems_version: 3.
|
82
|
-
signing_key:
|
96
|
+
rubygems_version: 3.2.22
|
97
|
+
signing_key:
|
83
98
|
specification_version: 4
|
84
99
|
summary: Reliable fetch extension for Sidekiq
|
85
100
|
test_files: []
|