gitlab-sidekiq-fetcher 0.3.0 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.gitignore +1 -1
- data/.gitlab-ci.yml +53 -0
- data/.rspec +1 -0
- data/Gemfile +12 -0
- data/Gemfile.lock +50 -0
- data/README.md +15 -4
- data/{README-GITLAB.md → RELEASE-GITLAB.md} +0 -0
- data/gitlab-sidekiq-fetcher.gemspec +2 -4
- data/lib/sidekiq-reliable-fetch.rb +3 -2
- data/lib/sidekiq/base_reliable_fetch.rb +185 -0
- data/lib/sidekiq/reliable_fetch.rb +40 -0
- data/lib/sidekiq/semi_reliable_fetch.rb +44 -0
- data/spec/base_reliable_fetch_spec.rb +73 -0
- data/spec/fetch_shared_examples.rb +118 -0
- data/spec/reliable_fetch_spec.rb +7 -0
- data/spec/semi_reliable_fetch_spec.rb +7 -0
- data/spec/spec_helper.rb +115 -0
- data/test/README.md +34 -0
- data/test/config.rb +31 -0
- data/test/reliability_test.rb +116 -0
- data/test/worker.rb +26 -0
- metadata +21 -10
- data/lib/sidekiq-reliable-fetch/api.rb +0 -56
- data/lib/sidekiq-reliable-fetch/web.rb +0 -24
- data/lib/sidekiq/reliable_fetcher.rb +0 -143
- data/web/views/working_queue.erb +0 -25
- data/web/views/working_queues.erb +0 -17
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: cc2e47cf7679deb6a6d526e199a09c20a671e3b55bad22d7c05ce17405eb6103
|
4
|
+
data.tar.gz: e71949587df8a635223ca8fa36339949df771f493e7edda0a4d9c34198600fb5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ea7d6b7283354053a4f9fc24f419ab56da97efff3ce3ac3a2a10517ab2a1bd184a44b0ffb9d39b1dbdf27b23032c58b1139a18d8f2ec1bf5357deed65187e3ed
|
7
|
+
data.tar.gz: ae4c78eca271dc63abf98bc112a582a2f56fc4e1e7be843189c30193ff0dabd99232f5a019b9b69d45cd1f08ca7bd88afe0c4341e0256c88d352441474a08400
|
data/.gitignore
CHANGED
data/.gitlab-ci.yml
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
image: "ruby:2.5"
|
2
|
+
|
3
|
+
before_script:
|
4
|
+
- ruby -v
|
5
|
+
- which ruby
|
6
|
+
- gem install bundler --no-ri --no-rdoc
|
7
|
+
- bundle install --jobs $(nproc) "${FLAGS[@]}"
|
8
|
+
|
9
|
+
variables:
|
10
|
+
REDIS_URL: "redis://redis"
|
11
|
+
|
12
|
+
rspec:
|
13
|
+
stage: test
|
14
|
+
coverage: '/LOC \((\d+\.\d+%)\) covered.$/'
|
15
|
+
script:
|
16
|
+
- bundle exec rspec
|
17
|
+
services:
|
18
|
+
- redis:alpine
|
19
|
+
artifacts:
|
20
|
+
expire_in: 31d
|
21
|
+
when: always
|
22
|
+
paths:
|
23
|
+
- coverage/
|
24
|
+
|
25
|
+
.integration:
|
26
|
+
stage: test
|
27
|
+
script:
|
28
|
+
- cd test
|
29
|
+
- bundle exec ruby reliability_test.rb
|
30
|
+
services:
|
31
|
+
- redis:alpine
|
32
|
+
|
33
|
+
integration_semi:
|
34
|
+
extends: .integration
|
35
|
+
variables:
|
36
|
+
JOB_FETCHER: semi
|
37
|
+
|
38
|
+
integration_reliable:
|
39
|
+
extends: .integration
|
40
|
+
variables:
|
41
|
+
JOB_FETCHER: reliable
|
42
|
+
|
43
|
+
|
44
|
+
integration_basic:
|
45
|
+
extends: .integration
|
46
|
+
allow_failure: yes
|
47
|
+
variables:
|
48
|
+
JOB_FETCHER: basic
|
49
|
+
|
50
|
+
|
51
|
+
# rubocop:
|
52
|
+
# script:
|
53
|
+
# - bundle exec rubocop
|
data/.rspec
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--require spec_helper
|
data/Gemfile
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
source "https://rubygems.org"
|
4
|
+
|
5
|
+
git_source(:github) { |repo_name| "https://github.com/#{repo_name}" }
|
6
|
+
|
7
|
+
group :test do
|
8
|
+
gem "rspec", '~> 3'
|
9
|
+
gem "pry"
|
10
|
+
gem "sidekiq", '~> 5.0'
|
11
|
+
gem 'simplecov', require: false
|
12
|
+
end
|
data/Gemfile.lock
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
GEM
|
2
|
+
remote: https://rubygems.org/
|
3
|
+
specs:
|
4
|
+
coderay (1.1.2)
|
5
|
+
connection_pool (2.2.2)
|
6
|
+
diff-lcs (1.3)
|
7
|
+
docile (1.3.1)
|
8
|
+
json (2.1.0)
|
9
|
+
method_source (0.9.0)
|
10
|
+
pry (0.11.3)
|
11
|
+
coderay (~> 1.1.0)
|
12
|
+
method_source (~> 0.9.0)
|
13
|
+
rack (2.0.5)
|
14
|
+
rack-protection (2.0.4)
|
15
|
+
rack
|
16
|
+
redis (4.0.2)
|
17
|
+
rspec (3.8.0)
|
18
|
+
rspec-core (~> 3.8.0)
|
19
|
+
rspec-expectations (~> 3.8.0)
|
20
|
+
rspec-mocks (~> 3.8.0)
|
21
|
+
rspec-core (3.8.0)
|
22
|
+
rspec-support (~> 3.8.0)
|
23
|
+
rspec-expectations (3.8.1)
|
24
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
25
|
+
rspec-support (~> 3.8.0)
|
26
|
+
rspec-mocks (3.8.0)
|
27
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
28
|
+
rspec-support (~> 3.8.0)
|
29
|
+
rspec-support (3.8.0)
|
30
|
+
sidekiq (5.2.2)
|
31
|
+
connection_pool (~> 2.2, >= 2.2.2)
|
32
|
+
rack-protection (>= 1.5.0)
|
33
|
+
redis (>= 3.3.5, < 5)
|
34
|
+
simplecov (0.16.1)
|
35
|
+
docile (~> 1.1)
|
36
|
+
json (>= 1.8, < 3)
|
37
|
+
simplecov-html (~> 0.10.0)
|
38
|
+
simplecov-html (0.10.2)
|
39
|
+
|
40
|
+
PLATFORMS
|
41
|
+
ruby
|
42
|
+
|
43
|
+
DEPENDENCIES
|
44
|
+
pry
|
45
|
+
rspec (~> 3)
|
46
|
+
sidekiq (~> 5.0)
|
47
|
+
simplecov
|
48
|
+
|
49
|
+
BUNDLED WITH
|
50
|
+
1.17.1
|
data/README.md
CHANGED
@@ -5,10 +5,11 @@ gitlab-sidekiq-fetcher
|
|
5
5
|
fetches from Redis.
|
6
6
|
|
7
7
|
It's based on https://github.com/TEA-ebook/sidekiq-reliable-fetch.
|
8
|
-
At this time we only added Sidekiq 5+ support to it.
|
9
8
|
|
10
|
-
|
11
|
-
|
9
|
+
There are two strategies implemented: [Reliable fetch](http://redis.io/commands/rpoplpush#pattern-reliable-queue) using `rpoplpush` command and
|
10
|
+
semi-reliable fetch that uses regular `brpop` and `lpush` to pick the job and put it to working queue. The main benefit of "Reliable" strategy is that `rpoplpush` is atomic, eliminating a race condition in which jobs can be lost.
|
11
|
+
However, it comes at a cost because `rpoplpush` can't watch multiple lists at the same time so we need to iterate over the entire queue list which significantly increases pressure on Redis when there are more than a few queues. The "semi-reliable" strategy is much more reliable than the default Sidekiq fetcher, though. Compared to the reliable fetch strategy, it does not increase pressure on Redis significantly.
|
12
|
+
|
12
13
|
|
13
14
|
## Installation
|
14
15
|
|
@@ -24,12 +25,22 @@ Enable reliable fetches by calling this gem from your Sidekiq configuration:
|
|
24
25
|
|
25
26
|
```ruby
|
26
27
|
Sidekiq.configure_server do |config|
|
27
|
-
Sidekiq::
|
28
|
+
Sidekiq::ReliableFetch.setup_reliable_fetch!(config)
|
28
29
|
|
29
30
|
# …
|
30
31
|
end
|
31
32
|
```
|
32
33
|
|
34
|
+
There is an additional parameter `config.options[:semi_reliable_fetch]` you can use to switch between two strategies:
|
35
|
+
|
36
|
+
```ruby
|
37
|
+
Sidekiq.configure_server do |config|
|
38
|
+
config.options[:semi_reliable_fetch] = true # Default value is false
|
39
|
+
|
40
|
+
Sidekiq::ReliableFetch.setup_reliable_fetch!(config)
|
41
|
+
end
|
42
|
+
```
|
43
|
+
|
33
44
|
## License
|
34
45
|
|
35
46
|
LGPL-3.0, see the LICENSE file.
|
File without changes
|
@@ -1,16 +1,14 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
s.name = 'gitlab-sidekiq-fetcher'
|
3
|
-
s.version = '0.
|
3
|
+
s.version = '0.4.0'
|
4
4
|
s.authors = ['TEA', 'GitLab']
|
5
5
|
s.email = 'valery@gitlab.com'
|
6
6
|
s.license = 'LGPL-3.0'
|
7
|
-
s.homepage = 'https://
|
7
|
+
s.homepage = 'https://gitlab.com/gitlab-org/sidekiq-reliable-fetch/'
|
8
8
|
s.summary = 'Reliable fetch extension for Sidekiq'
|
9
9
|
s.description = 'Redis reliable queue pattern implemented in Sidekiq'
|
10
10
|
s.require_paths = ['lib']
|
11
|
-
|
12
11
|
s.files = `git ls-files`.split($\)
|
13
12
|
s.test_files = []
|
14
|
-
|
15
13
|
s.add_dependency 'sidekiq', '~> 5'
|
16
14
|
end
|
@@ -0,0 +1,185 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Sidekiq
|
4
|
+
class BaseReliableFetch
|
5
|
+
DEFAULT_CLEANUP_INTERVAL = 60 * 60 # 1 hour
|
6
|
+
HEARTBEAT_INTERVAL = 20 # seconds
|
7
|
+
HEARTBEAT_LIFESPAN = 60 # seconds
|
8
|
+
HEARTBEAT_RETRY_DELAY = 1 # seconds
|
9
|
+
WORKING_QUEUE_PREFIX = 'working'
|
10
|
+
|
11
|
+
# Defines how often we try to take a lease to not flood our
|
12
|
+
# Redis server with SET requests
|
13
|
+
DEFAULT_LEASE_INTERVAL = 2 * 60 # seconds
|
14
|
+
LEASE_KEY = 'reliable-fetcher-cleanup-lock'
|
15
|
+
|
16
|
+
# Defines the COUNT parameter that will be passed to Redis SCAN command
|
17
|
+
SCAN_COUNT = 1000
|
18
|
+
|
19
|
+
UnitOfWork = Struct.new(:queue, :job) do
|
20
|
+
def acknowledge
|
21
|
+
Sidekiq.redis { |conn| conn.lrem(Sidekiq::BaseReliableFetch.working_queue_name(queue), 1, job) }
|
22
|
+
end
|
23
|
+
|
24
|
+
def queue_name
|
25
|
+
queue.sub(/.*queue:/, '')
|
26
|
+
end
|
27
|
+
|
28
|
+
def requeue
|
29
|
+
Sidekiq.redis do |conn|
|
30
|
+
conn.multi do |multi|
|
31
|
+
multi.lpush(queue, job)
|
32
|
+
multi.lrem(Sidekiq::BaseReliableFetch.working_queue_name(queue), 1, job)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def self.setup_reliable_fetch!(config)
|
39
|
+
config.options[:fetch] = if config.options[:semi_reliable_fetch]
|
40
|
+
Sidekiq::SemiReliableFetch
|
41
|
+
else
|
42
|
+
Sidekiq::ReliableFetch
|
43
|
+
end
|
44
|
+
|
45
|
+
Sidekiq.logger.info('GitLab reliable fetch activated!')
|
46
|
+
|
47
|
+
start_heartbeat_thread
|
48
|
+
end
|
49
|
+
|
50
|
+
def self.start_heartbeat_thread
|
51
|
+
Thread.new do
|
52
|
+
loop do
|
53
|
+
begin
|
54
|
+
heartbeat
|
55
|
+
|
56
|
+
sleep HEARTBEAT_INTERVAL
|
57
|
+
rescue => e
|
58
|
+
Sidekiq.logger.error("Heartbeat thread error: #{e.message}")
|
59
|
+
|
60
|
+
sleep HEARTBEAT_RETRY_DELAY
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
def self.pid
|
67
|
+
@pid ||= ::Process.pid
|
68
|
+
end
|
69
|
+
|
70
|
+
def self.hostname
|
71
|
+
@hostname ||= Socket.gethostname
|
72
|
+
end
|
73
|
+
|
74
|
+
def self.heartbeat
|
75
|
+
Sidekiq.redis do |conn|
|
76
|
+
conn.set(heartbeat_key(hostname, pid), 1, ex: HEARTBEAT_LIFESPAN)
|
77
|
+
end
|
78
|
+
|
79
|
+
Sidekiq.logger.debug("Heartbeat for hostname: #{hostname} and pid: #{pid}")
|
80
|
+
end
|
81
|
+
|
82
|
+
def self.bulk_requeue(inprogress, _options)
|
83
|
+
return if inprogress.empty?
|
84
|
+
|
85
|
+
Sidekiq.logger.debug('Re-queueing terminated jobs')
|
86
|
+
|
87
|
+
Sidekiq.redis do |conn|
|
88
|
+
inprogress.each do |unit_of_work|
|
89
|
+
conn.multi do |multi|
|
90
|
+
multi.lpush(unit_of_work.queue, unit_of_work.job)
|
91
|
+
multi.lrem(working_queue_name(unit_of_work.queue), 1, unit_of_work.job)
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
Sidekiq.logger.info("Pushed #{inprogress.size} jobs back to Redis")
|
97
|
+
rescue => e
|
98
|
+
Sidekiq.logger.warn("Failed to requeue #{inprogress.size} jobs: #{e.message}")
|
99
|
+
end
|
100
|
+
|
101
|
+
def self.heartbeat_key(hostname, pid)
|
102
|
+
"reliable-fetcher-heartbeat-#{hostname}-#{pid}"
|
103
|
+
end
|
104
|
+
|
105
|
+
def self.working_queue_name(queue)
|
106
|
+
"#{WORKING_QUEUE_PREFIX}:#{queue}:#{hostname}:#{pid}"
|
107
|
+
end
|
108
|
+
|
109
|
+
attr_reader :cleanup_interval, :last_try_to_take_lease_at, :lease_interval,
|
110
|
+
:queues, :use_semi_reliable_fetch,
|
111
|
+
:strictly_ordered_queues
|
112
|
+
|
113
|
+
def initialize(options)
|
114
|
+
@cleanup_interval = options.fetch(:cleanup_interval, DEFAULT_CLEANUP_INTERVAL)
|
115
|
+
@lease_interval = options.fetch(:lease_interval, DEFAULT_LEASE_INTERVAL)
|
116
|
+
@last_try_to_take_lease_at = 0
|
117
|
+
@strictly_ordered_queues = !!options[:strict]
|
118
|
+
@queues = options[:queues].map { |q| "queue:#{q}" }
|
119
|
+
end
|
120
|
+
|
121
|
+
def retrieve_work
|
122
|
+
clean_working_queues! if take_lease
|
123
|
+
|
124
|
+
retrieve_unit_of_work
|
125
|
+
end
|
126
|
+
|
127
|
+
def retrieve_unit_of_work
|
128
|
+
raise NotImplementedError,
|
129
|
+
"#{self.class} does not implement #{__method__}"
|
130
|
+
end
|
131
|
+
|
132
|
+
private
|
133
|
+
|
134
|
+
def clean_working_queue!(working_queue)
|
135
|
+
original_queue = working_queue.gsub(/#{WORKING_QUEUE_PREFIX}:|:[^:]*:[0-9]*\z/, '')
|
136
|
+
|
137
|
+
Sidekiq.redis do |conn|
|
138
|
+
count = 0
|
139
|
+
|
140
|
+
while conn.rpoplpush(working_queue, original_queue) do
|
141
|
+
count += 1
|
142
|
+
end
|
143
|
+
|
144
|
+
Sidekiq.logger.info("Requeued #{count} dead jobs to #{original_queue}")
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
# Detect "old" jobs and requeue them because the worker they were assigned
|
149
|
+
# to probably failed miserably.
|
150
|
+
def clean_working_queues!
|
151
|
+
Sidekiq.logger.info("Cleaning working queues")
|
152
|
+
|
153
|
+
Sidekiq.redis do |conn|
|
154
|
+
conn.scan_each(match: "#{WORKING_QUEUE_PREFIX}:queue:*", count: SCAN_COUNT) do |key|
|
155
|
+
# Example: "working:name_of_the_job:queue:{hostname}:{PID}"
|
156
|
+
hostname, pid = key.scan(/:([^:]*):([0-9]*)\z/).flatten
|
157
|
+
|
158
|
+
continue if hostname.nil? || pid.nil?
|
159
|
+
|
160
|
+
clean_working_queue!(key) if worker_dead?(hostname, pid)
|
161
|
+
end
|
162
|
+
end
|
163
|
+
end
|
164
|
+
|
165
|
+
def worker_dead?(hostname, pid)
|
166
|
+
Sidekiq.redis do |conn|
|
167
|
+
!conn.get(self.class.heartbeat_key(hostname, pid))
|
168
|
+
end
|
169
|
+
end
|
170
|
+
|
171
|
+
def take_lease
|
172
|
+
return unless allowed_to_take_a_lease?
|
173
|
+
|
174
|
+
@last_try_to_take_lease_at = Time.now.to_f
|
175
|
+
|
176
|
+
Sidekiq.redis do |conn|
|
177
|
+
conn.set(LEASE_KEY, 1, nx: true, ex: cleanup_interval)
|
178
|
+
end
|
179
|
+
end
|
180
|
+
|
181
|
+
def allowed_to_take_a_lease?
|
182
|
+
Time.now.to_f - last_try_to_take_lease_at > lease_interval
|
183
|
+
end
|
184
|
+
end
|
185
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Sidekiq
|
4
|
+
class ReliableFetch < BaseReliableFetch
|
5
|
+
# For reliable fetch we don't use Redis' blocking operations so
|
6
|
+
# we inject a regular sleep into the loop.
|
7
|
+
RELIABLE_FETCH_IDLE_TIMEOUT = 5 # seconds
|
8
|
+
|
9
|
+
attr_reader :queues_iterator, :queues_size
|
10
|
+
|
11
|
+
def initialize(options)
|
12
|
+
super
|
13
|
+
|
14
|
+
@queues_size = queues.size
|
15
|
+
@queues_iterator = queues.cycle
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
|
20
|
+
def retrieve_unit_of_work
|
21
|
+
@queues_iterator.rewind if strictly_ordered_queues
|
22
|
+
|
23
|
+
queues_size.times do
|
24
|
+
queue = queues_iterator.next
|
25
|
+
|
26
|
+
work = Sidekiq.redis do |conn|
|
27
|
+
conn.rpoplpush(queue, self.class.working_queue_name(queue))
|
28
|
+
end
|
29
|
+
|
30
|
+
return UnitOfWork.new(queue, work) if work
|
31
|
+
end
|
32
|
+
|
33
|
+
# We didn't find a job in any of the configured queues. Let's sleep a bit
|
34
|
+
# to avoid uselessly burning too much CPU
|
35
|
+
sleep(RELIABLE_FETCH_IDLE_TIMEOUT)
|
36
|
+
|
37
|
+
nil
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Sidekiq
|
4
|
+
class SemiReliableFetch < BaseReliableFetch
|
5
|
+
# We want the fetch operation to timeout every few seconds so the thread
|
6
|
+
# can check if the process is shutting down. This constant is only used
|
7
|
+
# for semi-reliable fetch.
|
8
|
+
SEMI_RELIABLE_FETCH_TIMEOUT = 2 # seconds
|
9
|
+
|
10
|
+
def initialize(options)
|
11
|
+
super
|
12
|
+
|
13
|
+
if strictly_ordered_queues
|
14
|
+
@queues = @queues.uniq
|
15
|
+
@queues << SEMI_RELIABLE_FETCH_TIMEOUT
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
private
|
20
|
+
|
21
|
+
def retrieve_unit_of_work
|
22
|
+
work = Sidekiq.redis { |conn| conn.brpop(*queues_cmd) }
|
23
|
+
return unless work
|
24
|
+
|
25
|
+
unit_of_work = UnitOfWork.new(*work)
|
26
|
+
|
27
|
+
Sidekiq.redis do |conn|
|
28
|
+
conn.lpush(self.class.working_queue_name(unit_of_work.queue), unit_of_work.job)
|
29
|
+
end
|
30
|
+
|
31
|
+
unit_of_work
|
32
|
+
end
|
33
|
+
|
34
|
+
def queues_cmd
|
35
|
+
if strictly_ordered_queues
|
36
|
+
@queues
|
37
|
+
else
|
38
|
+
queues = @queues.shuffle.uniq
|
39
|
+
queues << SEMI_RELIABLE_FETCH_TIMEOUT
|
40
|
+
queues
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|