gitlab-sidekiq-fetcher 0.7.1 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +2 -0
- data/gitlab-sidekiq-fetcher.gemspec +1 -1
- data/lib/sidekiq/base_reliable_fetch.rb +36 -18
- data/spec/base_reliable_fetch_spec.rb +1 -1
- data/spec/fetch_shared_examples.rb +123 -66
- metadata +5 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c7be23d59956ffa44288a1c870bcca66fd0119682f810325d71a3ebaa8b76e80
|
4
|
+
data.tar.gz: 013a7124f61044572ad93335e95c18357c60804dd89024d987485b2d87775787
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 92653bc5f9b5729f4dd50a8243a869c20d9621a1a9d25c46d729e735895e0f2d4d940c5a766803f1a5fd908ab0d9340f27d255dbd99f31bab4923e2f539c1882
|
7
|
+
data.tar.gz: d763b8b0ee3c2522752130fac86b83e67e8513faf919dd361aad9896aac684809650959b1627dd38e0440343c063f95b807b67fc31a10217ce1f15c428759803
|
data/README.md
CHANGED
@@ -8,6 +8,8 @@ It's based on https://github.com/TEA-ebook/sidekiq-reliable-fetch.
|
|
8
8
|
|
9
9
|
**IMPORTANT NOTE:** Since version `0.7.0` this gem works only with `sidekiq >= 6.1` (which introduced Fetch API breaking changes). Please use version `~> 0.5` if you use older version of the `sidekiq` .
|
10
10
|
|
11
|
+
**UPGRADE NOTE:** If upgrading from 0.7.0, strongly consider a full deployed step on 0.7.1 before 0.8.0; that fixes a bug in the queue name validation that will hit if sidekiq nodes running 0.7.0 see working queues named by 0.8.0. See https://gitlab.com/gitlab-org/sidekiq-reliable-fetch/-/merge_requests/22
|
12
|
+
|
11
13
|
There are two strategies implemented: [Reliable fetch](http://redis.io/commands/rpoplpush#pattern-reliable-queue) using `rpoplpush` command and
|
12
14
|
semi-reliable fetch that uses regular `brpop` and `lpush` to pick the job and put it to working queue. The main benefit of "Reliable" strategy is that `rpoplpush` is atomic, eliminating a race condition in which jobs can be lost.
|
13
15
|
However, it comes at a cost because `rpoplpush` can't watch multiple lists at the same time so we need to iterate over the entire queue list which significantly increases pressure on Redis when there are more than a few queues. The "semi-reliable" strategy is much more reliable than the default Sidekiq fetcher, though. Compared to the reliable fetch strategy, it does not increase pressure on Redis significantly.
|
@@ -21,6 +21,10 @@ module Sidekiq
|
|
21
21
|
# How much time a job can be interrupted
|
22
22
|
DEFAULT_MAX_RETRIES_AFTER_INTERRUPTION = 3
|
23
23
|
|
24
|
+
# Regexes for matching working queue keys
|
25
|
+
WORKING_QUEUE_REGEX = /#{WORKING_QUEUE_PREFIX}:(queue:.*):([^:]*:[0-9]*:[0-9a-f]*)\z/.freeze
|
26
|
+
LEGACY_WORKING_QUEUE_REGEX = /#{WORKING_QUEUE_PREFIX}:(queue:.*):([^:]*:[0-9]*)\z/.freeze
|
27
|
+
|
24
28
|
UnitOfWork = Struct.new(:queue, :job) do
|
25
29
|
def acknowledge
|
26
30
|
Sidekiq.redis { |conn| conn.lrem(Sidekiq::BaseReliableFetch.working_queue_name(queue), 1, job) }
|
@@ -70,32 +74,36 @@ module Sidekiq
|
|
70
74
|
end
|
71
75
|
end
|
72
76
|
|
73
|
-
def self.
|
74
|
-
|
77
|
+
def self.hostname
|
78
|
+
Socket.gethostname
|
75
79
|
end
|
76
80
|
|
77
|
-
def self.
|
78
|
-
|
81
|
+
def self.process_nonce
|
82
|
+
@@process_nonce ||= SecureRandom.hex(6)
|
83
|
+
end
|
84
|
+
|
85
|
+
def self.identity
|
86
|
+
@@identity ||= "#{hostname}:#{$$}:#{process_nonce}"
|
79
87
|
end
|
80
88
|
|
81
89
|
def self.heartbeat
|
82
90
|
Sidekiq.redis do |conn|
|
83
|
-
conn.set(heartbeat_key(
|
91
|
+
conn.set(heartbeat_key(identity), 1, ex: HEARTBEAT_LIFESPAN)
|
84
92
|
end
|
85
93
|
|
86
|
-
Sidekiq.logger.debug("Heartbeat for
|
94
|
+
Sidekiq.logger.debug("Heartbeat for #{identity}")
|
87
95
|
end
|
88
96
|
|
89
|
-
def self.worker_dead?(
|
90
|
-
!conn.get(heartbeat_key(
|
97
|
+
def self.worker_dead?(identity, conn)
|
98
|
+
!conn.get(heartbeat_key(identity))
|
91
99
|
end
|
92
100
|
|
93
|
-
def self.heartbeat_key(
|
94
|
-
"reliable-fetcher-heartbeat-#{
|
101
|
+
def self.heartbeat_key(identity)
|
102
|
+
"reliable-fetcher-heartbeat-#{identity.gsub(':', '-')}"
|
95
103
|
end
|
96
104
|
|
97
105
|
def self.working_queue_name(queue)
|
98
|
-
"#{WORKING_QUEUE_PREFIX}:#{queue}:#{
|
106
|
+
"#{WORKING_QUEUE_PREFIX}:#{queue}:#{identity}"
|
99
107
|
end
|
100
108
|
|
101
109
|
attr_reader :cleanup_interval, :last_try_to_take_lease_at, :lease_interval,
|
@@ -166,6 +174,19 @@ module Sidekiq
|
|
166
174
|
)
|
167
175
|
end
|
168
176
|
|
177
|
+
def extract_queue_and_identity(key)
|
178
|
+
# New identity format is "{hostname}:{pid}:{randomhex}
|
179
|
+
# Old identity format is "{hostname}:{pid}"
|
180
|
+
# Queue names may also have colons (namespaced).
|
181
|
+
# Expressing this in a single regex is unreadable
|
182
|
+
|
183
|
+
# Test the newer expected format first, only checking the older if necessary
|
184
|
+
original_queue, identity = key.scan(WORKING_QUEUE_REGEX).flatten
|
185
|
+
return original_queue, identity unless original_queue.nil? || identity.nil?
|
186
|
+
|
187
|
+
key.scan(LEGACY_WORKING_QUEUE_REGEX).flatten
|
188
|
+
end
|
189
|
+
|
169
190
|
# Detect "old" jobs and requeue them because the worker they were assigned
|
170
191
|
# to probably failed miserably.
|
171
192
|
def clean_working_queues!
|
@@ -173,19 +194,16 @@ module Sidekiq
|
|
173
194
|
|
174
195
|
Sidekiq.redis do |conn|
|
175
196
|
conn.scan_each(match: "#{WORKING_QUEUE_PREFIX}:queue:*", count: SCAN_COUNT) do |key|
|
176
|
-
|
177
|
-
hostname, pid = key.scan(/:([^:]*):([0-9]*)\z/).flatten
|
197
|
+
original_queue, identity = extract_queue_and_identity(key)
|
178
198
|
|
179
|
-
next if
|
199
|
+
next if original_queue.nil? || identity.nil?
|
180
200
|
|
181
|
-
clean_working_queue!(key) if self.class.worker_dead?(
|
201
|
+
clean_working_queue!(original_queue, key) if self.class.worker_dead?(identity, conn)
|
182
202
|
end
|
183
203
|
end
|
184
204
|
end
|
185
205
|
|
186
|
-
def clean_working_queue!(working_queue)
|
187
|
-
original_queue = working_queue.gsub(/#{WORKING_QUEUE_PREFIX}:|:[^:]*:[0-9]*\z/, '')
|
188
|
-
|
206
|
+
def clean_working_queue!(original_queue, working_queue)
|
189
207
|
Sidekiq.redis do |conn|
|
190
208
|
while job = conn.rpop(working_queue)
|
191
209
|
preprocess_interrupted_job(job, original_queue)
|
@@ -88,7 +88,7 @@ describe Sidekiq::BaseReliableFetch do
|
|
88
88
|
Sidekiq.redis do |conn|
|
89
89
|
sleep 0.2 # Give the time to heartbeat thread to make a loop
|
90
90
|
|
91
|
-
heartbeat_key = described_class.heartbeat_key(
|
91
|
+
heartbeat_key = described_class.heartbeat_key(described_class.identity)
|
92
92
|
heartbeat = conn.get(heartbeat_key)
|
93
93
|
|
94
94
|
expect(heartbeat).not_to be_nil
|
@@ -7,111 +7,163 @@ shared_examples 'a Sidekiq fetcher' do
|
|
7
7
|
let(:job) { Sidekiq.dump_json(class: 'Bob', args: [1, 2, 'foo']) }
|
8
8
|
let(:fetcher) { described_class.new(queues: queues) }
|
9
9
|
|
10
|
-
it '
|
11
|
-
Sidekiq.redis
|
12
|
-
|
13
|
-
uow = fetcher.retrieve_work
|
10
|
+
it 'does not clean up orphaned jobs more than once per cleanup interval' do
|
11
|
+
Sidekiq.redis = Sidekiq::RedisConnection.create(url: REDIS_URL, size: 10)
|
14
12
|
|
15
|
-
expect(
|
16
|
-
expect(uow.queue_name).to eq 'assigned'
|
17
|
-
expect(uow.job).to eq job
|
18
|
-
expect(Sidekiq::Queue.new('assigned').size).to eq 0
|
19
|
-
end
|
13
|
+
expect(fetcher).to receive(:clean_working_queues!).once
|
20
14
|
|
21
|
-
|
22
|
-
|
15
|
+
threads = 10.times.map do
|
16
|
+
Thread.new do
|
17
|
+
fetcher.retrieve_work
|
18
|
+
end
|
19
|
+
end
|
23
20
|
|
24
|
-
|
21
|
+
threads.map(&:join)
|
25
22
|
end
|
26
23
|
|
27
|
-
it '
|
24
|
+
it 'retrieves by order when strictly order is enabled' do
|
25
|
+
fetcher = described_class.new(strict: true, queues: ['first', 'second'])
|
26
|
+
|
28
27
|
Sidekiq.redis do |conn|
|
29
|
-
conn.rpush(
|
28
|
+
conn.rpush('queue:first', ['msg3', 'msg2', 'msg1'])
|
29
|
+
conn.rpush('queue:second', 'msg4')
|
30
30
|
end
|
31
31
|
|
32
|
-
|
33
|
-
expected_job['interrupted_count'] = 1
|
34
|
-
expected_job = Sidekiq.dump_json(expected_job)
|
32
|
+
jobs = (1..4).map { fetcher.retrieve_work.job }
|
35
33
|
|
36
|
-
|
34
|
+
expect(jobs).to eq ['msg1', 'msg2', 'msg3', 'msg4']
|
35
|
+
end
|
37
36
|
|
38
|
-
|
37
|
+
it 'does not starve any queue when queues are not strictly ordered' do
|
38
|
+
fetcher = described_class.new(queues: ['first', 'second'])
|
39
39
|
|
40
40
|
Sidekiq.redis do |conn|
|
41
|
-
|
41
|
+
conn.rpush('queue:first', (1..200).map { |i| "msg#{i}" })
|
42
|
+
conn.rpush('queue:second', 'this_job_should_not_stuck')
|
42
43
|
end
|
44
|
+
|
45
|
+
jobs = (1..100).map { fetcher.retrieve_work.job }
|
46
|
+
|
47
|
+
expect(jobs).to include 'this_job_should_not_stuck'
|
43
48
|
end
|
44
49
|
|
45
|
-
|
46
|
-
|
47
|
-
# incorrect form in general
|
48
|
-
malformed_key = "#{other_process_working_queue_name('assigned')}:X"
|
49
|
-
Sidekiq.redis do |conn|
|
50
|
-
conn.rpush(malformed_key, job)
|
51
|
-
end
|
50
|
+
shared_examples "basic queue handling" do |queue|
|
51
|
+
let (:fetcher) { described_class.new(queues: [queue]) }
|
52
52
|
|
53
|
-
|
53
|
+
it 'retrieves the job and puts it to working queue' do
|
54
|
+
Sidekiq.redis { |conn| conn.rpush("queue:#{queue}", job) }
|
54
55
|
|
55
|
-
|
56
|
-
|
56
|
+
uow = fetcher.retrieve_work
|
57
|
+
|
58
|
+
expect(working_queue_size(queue)).to eq 1
|
59
|
+
expect(uow.queue_name).to eq queue
|
60
|
+
expect(uow.job).to eq job
|
61
|
+
expect(Sidekiq::Queue.new(queue).size).to eq 0
|
57
62
|
end
|
58
|
-
end
|
59
63
|
|
64
|
+
it 'does not retrieve a job from foreign queue' do
|
65
|
+
Sidekiq.redis { |conn| conn.rpush("'queue:#{queue}:not", job) }
|
66
|
+
expect(fetcher.retrieve_work).to be_nil
|
60
67
|
|
61
|
-
|
62
|
-
|
68
|
+
Sidekiq.redis { |conn| conn.rpush("'queue:not_#{queue}", job) }
|
69
|
+
expect(fetcher.retrieve_work).to be_nil
|
63
70
|
|
64
|
-
|
65
|
-
|
71
|
+
Sidekiq.redis { |conn| conn.rpush("'queue:random_name", job) }
|
72
|
+
expect(fetcher.retrieve_work).to be_nil
|
66
73
|
end
|
67
74
|
|
68
|
-
|
75
|
+
it 'requeues jobs from legacy dead working queue with incremented interrupted_count' do
|
76
|
+
Sidekiq.redis do |conn|
|
77
|
+
conn.rpush(legacy_other_process_working_queue_name(queue), job)
|
78
|
+
end
|
69
79
|
|
70
|
-
|
80
|
+
expected_job = Sidekiq.load_json(job)
|
81
|
+
expected_job['interrupted_count'] = 1
|
82
|
+
expected_job = Sidekiq.dump_json(expected_job)
|
71
83
|
|
72
|
-
|
73
|
-
|
84
|
+
uow = fetcher.retrieve_work
|
85
|
+
|
86
|
+
expect(uow).to_not be_nil
|
87
|
+
expect(uow.job).to eq expected_job
|
88
|
+
|
89
|
+
Sidekiq.redis do |conn|
|
90
|
+
expect(conn.llen(legacy_other_process_working_queue_name(queue))).to eq 0
|
91
|
+
end
|
74
92
|
end
|
75
|
-
end
|
76
93
|
|
77
|
-
|
78
|
-
|
94
|
+
it 'ignores working queue keys in unknown formats' do
|
95
|
+
# Add a spurious non-numeric char segment at the end; this simulates any other
|
96
|
+
# incorrect form in general
|
97
|
+
malformed_key = "#{other_process_working_queue_name(queue)}:X"
|
98
|
+
Sidekiq.redis do |conn|
|
99
|
+
conn.rpush(malformed_key, job)
|
100
|
+
end
|
79
101
|
|
80
|
-
|
102
|
+
uow = fetcher.retrieve_work
|
81
103
|
|
82
|
-
|
83
|
-
|
84
|
-
fetcher.retrieve_work
|
104
|
+
Sidekiq.redis do |conn|
|
105
|
+
expect(conn.llen(malformed_key)).to eq 1
|
85
106
|
end
|
86
107
|
end
|
87
108
|
|
88
|
-
|
89
|
-
|
109
|
+
it 'requeues jobs from dead working queue with incremented interrupted_count' do
|
110
|
+
Sidekiq.redis do |conn|
|
111
|
+
conn.rpush(other_process_working_queue_name(queue), job)
|
112
|
+
end
|
90
113
|
|
91
|
-
|
92
|
-
|
114
|
+
expected_job = Sidekiq.load_json(job)
|
115
|
+
expected_job['interrupted_count'] = 1
|
116
|
+
expected_job = Sidekiq.dump_json(expected_job)
|
93
117
|
|
94
|
-
|
95
|
-
|
96
|
-
|
118
|
+
uow = fetcher.retrieve_work
|
119
|
+
|
120
|
+
expect(uow).to_not be_nil
|
121
|
+
expect(uow.job).to eq expected_job
|
122
|
+
|
123
|
+
Sidekiq.redis do |conn|
|
124
|
+
expect(conn.llen(other_process_working_queue_name(queue))).to eq 0
|
125
|
+
end
|
97
126
|
end
|
98
127
|
|
99
|
-
jobs
|
128
|
+
it 'does not requeue jobs from live working queue' do
|
129
|
+
working_queue = live_other_process_working_queue_name(queue)
|
100
130
|
|
101
|
-
|
102
|
-
|
131
|
+
Sidekiq.redis do |conn|
|
132
|
+
conn.rpush(working_queue, job)
|
133
|
+
end
|
103
134
|
|
104
|
-
|
105
|
-
fetcher = described_class.new(queues: ['first', 'second'])
|
135
|
+
uow = fetcher.retrieve_work
|
106
136
|
|
107
|
-
|
108
|
-
|
109
|
-
|
137
|
+
expect(uow).to be_nil
|
138
|
+
|
139
|
+
Sidekiq.redis do |conn|
|
140
|
+
expect(conn.llen(working_queue)).to eq 1
|
141
|
+
end
|
110
142
|
end
|
143
|
+
end
|
111
144
|
|
112
|
-
|
145
|
+
context 'with various queues' do
|
146
|
+
%w[assigned namespace:assigned namespace:deeper:assigned].each do |queue|
|
147
|
+
it_behaves_like "basic queue handling", queue
|
148
|
+
end
|
149
|
+
end
|
113
150
|
|
114
|
-
|
151
|
+
context 'with short cleanup interval' do
|
152
|
+
let(:short_interval) { 1 }
|
153
|
+
let(:fetcher) { described_class.new(queues: queues, lease_interval: short_interval, cleanup_interval: short_interval) }
|
154
|
+
|
155
|
+
it 'requeues when there is no heartbeat' do
|
156
|
+
Sidekiq.redis { |conn| conn.rpush('queue:assigned', job) }
|
157
|
+
# Use of retrieve_work twice with a sleep ensures we have exercised the
|
158
|
+
# `identity` method to create the working queue key name and that it
|
159
|
+
# matches the patterns used in the cleanup
|
160
|
+
uow = fetcher.retrieve_work
|
161
|
+
sleep(short_interval + 1)
|
162
|
+
uow = fetcher.retrieve_work
|
163
|
+
|
164
|
+
# Will only receive a UnitOfWork if the job was detected as failed and requeued
|
165
|
+
expect(uow).to_not be_nil
|
166
|
+
end
|
115
167
|
end
|
116
168
|
end
|
117
169
|
end
|
@@ -122,17 +174,22 @@ def working_queue_size(queue_name)
|
|
122
174
|
end
|
123
175
|
end
|
124
176
|
|
125
|
-
def
|
177
|
+
def legacy_other_process_working_queue_name(queue)
|
126
178
|
"#{Sidekiq::BaseReliableFetch::WORKING_QUEUE_PREFIX}:queue:#{queue}:#{Socket.gethostname}:#{::Process.pid + 1}"
|
127
179
|
end
|
128
180
|
|
181
|
+
def other_process_working_queue_name(queue)
|
182
|
+
"#{Sidekiq::BaseReliableFetch::WORKING_QUEUE_PREFIX}:queue:#{queue}:#{Socket.gethostname}:#{::Process.pid + 1}:#{::SecureRandom.hex(6)}"
|
183
|
+
end
|
184
|
+
|
129
185
|
def live_other_process_working_queue_name(queue)
|
130
186
|
pid = ::Process.pid + 1
|
131
187
|
hostname = Socket.gethostname
|
188
|
+
nonce = SecureRandom.hex(6)
|
132
189
|
|
133
190
|
Sidekiq.redis do |conn|
|
134
|
-
conn.set(Sidekiq::BaseReliableFetch.heartbeat_key(hostname
|
191
|
+
conn.set(Sidekiq::BaseReliableFetch.heartbeat_key("#{hostname}-#{pid}-#{nonce}"), 1)
|
135
192
|
end
|
136
193
|
|
137
|
-
"#{Sidekiq::BaseReliableFetch::WORKING_QUEUE_PREFIX}:queue:#{queue}:#{hostname}:#{pid}"
|
194
|
+
"#{Sidekiq::BaseReliableFetch::WORKING_QUEUE_PREFIX}:queue:#{queue}:#{hostname}:#{pid}:#{nonce}"
|
138
195
|
end
|
metadata
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: gitlab-sidekiq-fetcher
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.8.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- TEA
|
8
8
|
- GitLab
|
9
|
-
autorequire:
|
9
|
+
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2021-02
|
12
|
+
date: 2021-03-02 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: sidekiq
|
@@ -63,7 +63,7 @@ homepage: https://gitlab.com/gitlab-org/sidekiq-reliable-fetch/
|
|
63
63
|
licenses:
|
64
64
|
- LGPL-3.0
|
65
65
|
metadata: {}
|
66
|
-
post_install_message:
|
66
|
+
post_install_message:
|
67
67
|
rdoc_options: []
|
68
68
|
require_paths:
|
69
69
|
- lib
|
@@ -79,7 +79,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
79
79
|
version: '0'
|
80
80
|
requirements: []
|
81
81
|
rubygems_version: 3.1.4
|
82
|
-
signing_key:
|
82
|
+
signing_key:
|
83
83
|
specification_version: 4
|
84
84
|
summary: Reliable fetch extension for Sidekiq
|
85
85
|
test_files: []
|