gitlab-sidekiq-fetcher 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.gitignore +1 -1
- data/.gitlab-ci.yml +53 -0
- data/.rspec +1 -0
- data/Gemfile +12 -0
- data/Gemfile.lock +50 -0
- data/README.md +15 -4
- data/{README-GITLAB.md → RELEASE-GITLAB.md} +0 -0
- data/gitlab-sidekiq-fetcher.gemspec +2 -4
- data/lib/sidekiq-reliable-fetch.rb +3 -2
- data/lib/sidekiq/base_reliable_fetch.rb +185 -0
- data/lib/sidekiq/reliable_fetch.rb +40 -0
- data/lib/sidekiq/semi_reliable_fetch.rb +44 -0
- data/spec/base_reliable_fetch_spec.rb +73 -0
- data/spec/fetch_shared_examples.rb +118 -0
- data/spec/reliable_fetch_spec.rb +7 -0
- data/spec/semi_reliable_fetch_spec.rb +7 -0
- data/spec/spec_helper.rb +115 -0
- data/test/README.md +34 -0
- data/test/config.rb +31 -0
- data/test/reliability_test.rb +116 -0
- data/test/worker.rb +26 -0
- metadata +21 -10
- data/lib/sidekiq-reliable-fetch/api.rb +0 -56
- data/lib/sidekiq-reliable-fetch/web.rb +0 -24
- data/lib/sidekiq/reliable_fetcher.rb +0 -143
- data/web/views/working_queue.erb +0 -25
- data/web/views/working_queues.erb +0 -17
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: cc2e47cf7679deb6a6d526e199a09c20a671e3b55bad22d7c05ce17405eb6103
|
4
|
+
data.tar.gz: e71949587df8a635223ca8fa36339949df771f493e7edda0a4d9c34198600fb5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ea7d6b7283354053a4f9fc24f419ab56da97efff3ce3ac3a2a10517ab2a1bd184a44b0ffb9d39b1dbdf27b23032c58b1139a18d8f2ec1bf5357deed65187e3ed
|
7
|
+
data.tar.gz: ae4c78eca271dc63abf98bc112a582a2f56fc4e1e7be843189c30193ff0dabd99232f5a019b9b69d45cd1f08ca7bd88afe0c4341e0256c88d352441474a08400
|
data/.gitignore
CHANGED
data/.gitlab-ci.yml
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
image: "ruby:2.5"
|
2
|
+
|
3
|
+
before_script:
|
4
|
+
- ruby -v
|
5
|
+
- which ruby
|
6
|
+
- gem install bundler --no-ri --no-rdoc
|
7
|
+
- bundle install --jobs $(nproc) "${FLAGS[@]}"
|
8
|
+
|
9
|
+
variables:
|
10
|
+
REDIS_URL: "redis://redis"
|
11
|
+
|
12
|
+
rspec:
|
13
|
+
stage: test
|
14
|
+
coverage: '/LOC \((\d+\.\d+%)\) covered.$/'
|
15
|
+
script:
|
16
|
+
- bundle exec rspec
|
17
|
+
services:
|
18
|
+
- redis:alpine
|
19
|
+
artifacts:
|
20
|
+
expire_in: 31d
|
21
|
+
when: always
|
22
|
+
paths:
|
23
|
+
- coverage/
|
24
|
+
|
25
|
+
.integration:
|
26
|
+
stage: test
|
27
|
+
script:
|
28
|
+
- cd test
|
29
|
+
- bundle exec ruby reliability_test.rb
|
30
|
+
services:
|
31
|
+
- redis:alpine
|
32
|
+
|
33
|
+
integration_semi:
|
34
|
+
extends: .integration
|
35
|
+
variables:
|
36
|
+
JOB_FETCHER: semi
|
37
|
+
|
38
|
+
integration_reliable:
|
39
|
+
extends: .integration
|
40
|
+
variables:
|
41
|
+
JOB_FETCHER: reliable
|
42
|
+
|
43
|
+
|
44
|
+
integration_basic:
|
45
|
+
extends: .integration
|
46
|
+
allow_failure: yes
|
47
|
+
variables:
|
48
|
+
JOB_FETCHER: basic
|
49
|
+
|
50
|
+
|
51
|
+
# rubocop:
|
52
|
+
# script:
|
53
|
+
# - bundle exec rubocop
|
data/.rspec
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--require spec_helper
|
data/Gemfile
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
source "https://rubygems.org"
|
4
|
+
|
5
|
+
git_source(:github) { |repo_name| "https://github.com/#{repo_name}" }
|
6
|
+
|
7
|
+
group :test do
|
8
|
+
gem "rspec", '~> 3'
|
9
|
+
gem "pry"
|
10
|
+
gem "sidekiq", '~> 5.0'
|
11
|
+
gem 'simplecov', require: false
|
12
|
+
end
|
data/Gemfile.lock
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
GEM
|
2
|
+
remote: https://rubygems.org/
|
3
|
+
specs:
|
4
|
+
coderay (1.1.2)
|
5
|
+
connection_pool (2.2.2)
|
6
|
+
diff-lcs (1.3)
|
7
|
+
docile (1.3.1)
|
8
|
+
json (2.1.0)
|
9
|
+
method_source (0.9.0)
|
10
|
+
pry (0.11.3)
|
11
|
+
coderay (~> 1.1.0)
|
12
|
+
method_source (~> 0.9.0)
|
13
|
+
rack (2.0.5)
|
14
|
+
rack-protection (2.0.4)
|
15
|
+
rack
|
16
|
+
redis (4.0.2)
|
17
|
+
rspec (3.8.0)
|
18
|
+
rspec-core (~> 3.8.0)
|
19
|
+
rspec-expectations (~> 3.8.0)
|
20
|
+
rspec-mocks (~> 3.8.0)
|
21
|
+
rspec-core (3.8.0)
|
22
|
+
rspec-support (~> 3.8.0)
|
23
|
+
rspec-expectations (3.8.1)
|
24
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
25
|
+
rspec-support (~> 3.8.0)
|
26
|
+
rspec-mocks (3.8.0)
|
27
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
28
|
+
rspec-support (~> 3.8.0)
|
29
|
+
rspec-support (3.8.0)
|
30
|
+
sidekiq (5.2.2)
|
31
|
+
connection_pool (~> 2.2, >= 2.2.2)
|
32
|
+
rack-protection (>= 1.5.0)
|
33
|
+
redis (>= 3.3.5, < 5)
|
34
|
+
simplecov (0.16.1)
|
35
|
+
docile (~> 1.1)
|
36
|
+
json (>= 1.8, < 3)
|
37
|
+
simplecov-html (~> 0.10.0)
|
38
|
+
simplecov-html (0.10.2)
|
39
|
+
|
40
|
+
PLATFORMS
|
41
|
+
ruby
|
42
|
+
|
43
|
+
DEPENDENCIES
|
44
|
+
pry
|
45
|
+
rspec (~> 3)
|
46
|
+
sidekiq (~> 5.0)
|
47
|
+
simplecov
|
48
|
+
|
49
|
+
BUNDLED WITH
|
50
|
+
1.17.1
|
data/README.md
CHANGED
@@ -5,10 +5,11 @@ gitlab-sidekiq-fetcher
|
|
5
5
|
fetches from Redis.
|
6
6
|
|
7
7
|
It's based on https://github.com/TEA-ebook/sidekiq-reliable-fetch.
|
8
|
-
At this time we only added Sidekiq 5+ support to it.
|
9
8
|
|
10
|
-
|
11
|
-
|
9
|
+
There are two strategies implemented: [Reliable fetch](http://redis.io/commands/rpoplpush#pattern-reliable-queue) using `rpoplpush` command and
|
10
|
+
semi-reliable fetch that uses regular `brpop` and `lpush` to pick the job and put it to working queue. The main benefit of "Reliable" strategy is that `rpoplpush` is atomic, eliminating a race condition in which jobs can be lost.
|
11
|
+
However, it comes at a cost because `rpoplpush` can't watch multiple lists at the same time so we need to iterate over the entire queue list which significantly increases pressure on Redis when there are more than a few queues. The "semi-reliable" strategy is much more reliable than the default Sidekiq fetcher, though. Compared to the reliable fetch strategy, it does not increase pressure on Redis significantly.
|
12
|
+
|
12
13
|
|
13
14
|
## Installation
|
14
15
|
|
@@ -24,12 +25,22 @@ Enable reliable fetches by calling this gem from your Sidekiq configuration:
|
|
24
25
|
|
25
26
|
```ruby
|
26
27
|
Sidekiq.configure_server do |config|
|
27
|
-
Sidekiq::
|
28
|
+
Sidekiq::ReliableFetch.setup_reliable_fetch!(config)
|
28
29
|
|
29
30
|
# …
|
30
31
|
end
|
31
32
|
```
|
32
33
|
|
34
|
+
There is an additional parameter `config.options[:semi_reliable_fetch]` you can use to switch between two strategies:
|
35
|
+
|
36
|
+
```ruby
|
37
|
+
Sidekiq.configure_server do |config|
|
38
|
+
config.options[:semi_reliable_fetch] = true # Default value is false
|
39
|
+
|
40
|
+
Sidekiq::ReliableFetch.setup_reliable_fetch!(config)
|
41
|
+
end
|
42
|
+
```
|
43
|
+
|
33
44
|
## License
|
34
45
|
|
35
46
|
LGPL-3.0, see the LICENSE file.
|
File without changes
|
@@ -1,16 +1,14 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
s.name = 'gitlab-sidekiq-fetcher'
|
3
|
-
s.version = '0.
|
3
|
+
s.version = '0.4.0'
|
4
4
|
s.authors = ['TEA', 'GitLab']
|
5
5
|
s.email = 'valery@gitlab.com'
|
6
6
|
s.license = 'LGPL-3.0'
|
7
|
-
s.homepage = 'https://
|
7
|
+
s.homepage = 'https://gitlab.com/gitlab-org/sidekiq-reliable-fetch/'
|
8
8
|
s.summary = 'Reliable fetch extension for Sidekiq'
|
9
9
|
s.description = 'Redis reliable queue pattern implemented in Sidekiq'
|
10
10
|
s.require_paths = ['lib']
|
11
|
-
|
12
11
|
s.files = `git ls-files`.split($\)
|
13
12
|
s.test_files = []
|
14
|
-
|
15
13
|
s.add_dependency 'sidekiq', '~> 5'
|
16
14
|
end
|
@@ -0,0 +1,185 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Sidekiq
|
4
|
+
class BaseReliableFetch
|
5
|
+
DEFAULT_CLEANUP_INTERVAL = 60 * 60 # 1 hour
|
6
|
+
HEARTBEAT_INTERVAL = 20 # seconds
|
7
|
+
HEARTBEAT_LIFESPAN = 60 # seconds
|
8
|
+
HEARTBEAT_RETRY_DELAY = 1 # seconds
|
9
|
+
WORKING_QUEUE_PREFIX = 'working'
|
10
|
+
|
11
|
+
# Defines how often we try to take a lease to not flood our
|
12
|
+
# Redis server with SET requests
|
13
|
+
DEFAULT_LEASE_INTERVAL = 2 * 60 # seconds
|
14
|
+
LEASE_KEY = 'reliable-fetcher-cleanup-lock'
|
15
|
+
|
16
|
+
# Defines the COUNT parameter that will be passed to Redis SCAN command
|
17
|
+
SCAN_COUNT = 1000
|
18
|
+
|
19
|
+
UnitOfWork = Struct.new(:queue, :job) do
|
20
|
+
def acknowledge
|
21
|
+
Sidekiq.redis { |conn| conn.lrem(Sidekiq::BaseReliableFetch.working_queue_name(queue), 1, job) }
|
22
|
+
end
|
23
|
+
|
24
|
+
def queue_name
|
25
|
+
queue.sub(/.*queue:/, '')
|
26
|
+
end
|
27
|
+
|
28
|
+
def requeue
|
29
|
+
Sidekiq.redis do |conn|
|
30
|
+
conn.multi do |multi|
|
31
|
+
multi.lpush(queue, job)
|
32
|
+
multi.lrem(Sidekiq::BaseReliableFetch.working_queue_name(queue), 1, job)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def self.setup_reliable_fetch!(config)
|
39
|
+
config.options[:fetch] = if config.options[:semi_reliable_fetch]
|
40
|
+
Sidekiq::SemiReliableFetch
|
41
|
+
else
|
42
|
+
Sidekiq::ReliableFetch
|
43
|
+
end
|
44
|
+
|
45
|
+
Sidekiq.logger.info('GitLab reliable fetch activated!')
|
46
|
+
|
47
|
+
start_heartbeat_thread
|
48
|
+
end
|
49
|
+
|
50
|
+
def self.start_heartbeat_thread
|
51
|
+
Thread.new do
|
52
|
+
loop do
|
53
|
+
begin
|
54
|
+
heartbeat
|
55
|
+
|
56
|
+
sleep HEARTBEAT_INTERVAL
|
57
|
+
rescue => e
|
58
|
+
Sidekiq.logger.error("Heartbeat thread error: #{e.message}")
|
59
|
+
|
60
|
+
sleep HEARTBEAT_RETRY_DELAY
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
def self.pid
|
67
|
+
@pid ||= ::Process.pid
|
68
|
+
end
|
69
|
+
|
70
|
+
def self.hostname
|
71
|
+
@hostname ||= Socket.gethostname
|
72
|
+
end
|
73
|
+
|
74
|
+
def self.heartbeat
|
75
|
+
Sidekiq.redis do |conn|
|
76
|
+
conn.set(heartbeat_key(hostname, pid), 1, ex: HEARTBEAT_LIFESPAN)
|
77
|
+
end
|
78
|
+
|
79
|
+
Sidekiq.logger.debug("Heartbeat for hostname: #{hostname} and pid: #{pid}")
|
80
|
+
end
|
81
|
+
|
82
|
+
def self.bulk_requeue(inprogress, _options)
|
83
|
+
return if inprogress.empty?
|
84
|
+
|
85
|
+
Sidekiq.logger.debug('Re-queueing terminated jobs')
|
86
|
+
|
87
|
+
Sidekiq.redis do |conn|
|
88
|
+
inprogress.each do |unit_of_work|
|
89
|
+
conn.multi do |multi|
|
90
|
+
multi.lpush(unit_of_work.queue, unit_of_work.job)
|
91
|
+
multi.lrem(working_queue_name(unit_of_work.queue), 1, unit_of_work.job)
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
Sidekiq.logger.info("Pushed #{inprogress.size} jobs back to Redis")
|
97
|
+
rescue => e
|
98
|
+
Sidekiq.logger.warn("Failed to requeue #{inprogress.size} jobs: #{e.message}")
|
99
|
+
end
|
100
|
+
|
101
|
+
def self.heartbeat_key(hostname, pid)
|
102
|
+
"reliable-fetcher-heartbeat-#{hostname}-#{pid}"
|
103
|
+
end
|
104
|
+
|
105
|
+
def self.working_queue_name(queue)
|
106
|
+
"#{WORKING_QUEUE_PREFIX}:#{queue}:#{hostname}:#{pid}"
|
107
|
+
end
|
108
|
+
|
109
|
+
attr_reader :cleanup_interval, :last_try_to_take_lease_at, :lease_interval,
|
110
|
+
:queues, :use_semi_reliable_fetch,
|
111
|
+
:strictly_ordered_queues
|
112
|
+
|
113
|
+
def initialize(options)
|
114
|
+
@cleanup_interval = options.fetch(:cleanup_interval, DEFAULT_CLEANUP_INTERVAL)
|
115
|
+
@lease_interval = options.fetch(:lease_interval, DEFAULT_LEASE_INTERVAL)
|
116
|
+
@last_try_to_take_lease_at = 0
|
117
|
+
@strictly_ordered_queues = !!options[:strict]
|
118
|
+
@queues = options[:queues].map { |q| "queue:#{q}" }
|
119
|
+
end
|
120
|
+
|
121
|
+
def retrieve_work
|
122
|
+
clean_working_queues! if take_lease
|
123
|
+
|
124
|
+
retrieve_unit_of_work
|
125
|
+
end
|
126
|
+
|
127
|
+
def retrieve_unit_of_work
|
128
|
+
raise NotImplementedError,
|
129
|
+
"#{self.class} does not implement #{__method__}"
|
130
|
+
end
|
131
|
+
|
132
|
+
private
|
133
|
+
|
134
|
+
def clean_working_queue!(working_queue)
|
135
|
+
original_queue = working_queue.gsub(/#{WORKING_QUEUE_PREFIX}:|:[^:]*:[0-9]*\z/, '')
|
136
|
+
|
137
|
+
Sidekiq.redis do |conn|
|
138
|
+
count = 0
|
139
|
+
|
140
|
+
while conn.rpoplpush(working_queue, original_queue) do
|
141
|
+
count += 1
|
142
|
+
end
|
143
|
+
|
144
|
+
Sidekiq.logger.info("Requeued #{count} dead jobs to #{original_queue}")
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
# Detect "old" jobs and requeue them because the worker they were assigned
|
149
|
+
# to probably failed miserably.
|
150
|
+
def clean_working_queues!
|
151
|
+
Sidekiq.logger.info("Cleaning working queues")
|
152
|
+
|
153
|
+
Sidekiq.redis do |conn|
|
154
|
+
conn.scan_each(match: "#{WORKING_QUEUE_PREFIX}:queue:*", count: SCAN_COUNT) do |key|
|
155
|
+
# Example: "working:name_of_the_job:queue:{hostname}:{PID}"
|
156
|
+
hostname, pid = key.scan(/:([^:]*):([0-9]*)\z/).flatten
|
157
|
+
|
158
|
+
continue if hostname.nil? || pid.nil?
|
159
|
+
|
160
|
+
clean_working_queue!(key) if worker_dead?(hostname, pid)
|
161
|
+
end
|
162
|
+
end
|
163
|
+
end
|
164
|
+
|
165
|
+
def worker_dead?(hostname, pid)
|
166
|
+
Sidekiq.redis do |conn|
|
167
|
+
!conn.get(self.class.heartbeat_key(hostname, pid))
|
168
|
+
end
|
169
|
+
end
|
170
|
+
|
171
|
+
def take_lease
|
172
|
+
return unless allowed_to_take_a_lease?
|
173
|
+
|
174
|
+
@last_try_to_take_lease_at = Time.now.to_f
|
175
|
+
|
176
|
+
Sidekiq.redis do |conn|
|
177
|
+
conn.set(LEASE_KEY, 1, nx: true, ex: cleanup_interval)
|
178
|
+
end
|
179
|
+
end
|
180
|
+
|
181
|
+
def allowed_to_take_a_lease?
|
182
|
+
Time.now.to_f - last_try_to_take_lease_at > lease_interval
|
183
|
+
end
|
184
|
+
end
|
185
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Sidekiq
|
4
|
+
class ReliableFetch < BaseReliableFetch
|
5
|
+
# For reliable fetch we don't use Redis' blocking operations so
|
6
|
+
# we inject a regular sleep into the loop.
|
7
|
+
RELIABLE_FETCH_IDLE_TIMEOUT = 5 # seconds
|
8
|
+
|
9
|
+
attr_reader :queues_iterator, :queues_size
|
10
|
+
|
11
|
+
def initialize(options)
|
12
|
+
super
|
13
|
+
|
14
|
+
@queues_size = queues.size
|
15
|
+
@queues_iterator = queues.cycle
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
|
20
|
+
def retrieve_unit_of_work
|
21
|
+
@queues_iterator.rewind if strictly_ordered_queues
|
22
|
+
|
23
|
+
queues_size.times do
|
24
|
+
queue = queues_iterator.next
|
25
|
+
|
26
|
+
work = Sidekiq.redis do |conn|
|
27
|
+
conn.rpoplpush(queue, self.class.working_queue_name(queue))
|
28
|
+
end
|
29
|
+
|
30
|
+
return UnitOfWork.new(queue, work) if work
|
31
|
+
end
|
32
|
+
|
33
|
+
# We didn't find a job in any of the configured queues. Let's sleep a bit
|
34
|
+
# to avoid uselessly burning too much CPU
|
35
|
+
sleep(RELIABLE_FETCH_IDLE_TIMEOUT)
|
36
|
+
|
37
|
+
nil
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Sidekiq
|
4
|
+
class SemiReliableFetch < BaseReliableFetch
|
5
|
+
# We want the fetch operation to timeout every few seconds so the thread
|
6
|
+
# can check if the process is shutting down. This constant is only used
|
7
|
+
# for semi-reliable fetch.
|
8
|
+
SEMI_RELIABLE_FETCH_TIMEOUT = 2 # seconds
|
9
|
+
|
10
|
+
def initialize(options)
|
11
|
+
super
|
12
|
+
|
13
|
+
if strictly_ordered_queues
|
14
|
+
@queues = @queues.uniq
|
15
|
+
@queues << SEMI_RELIABLE_FETCH_TIMEOUT
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
private
|
20
|
+
|
21
|
+
def retrieve_unit_of_work
|
22
|
+
work = Sidekiq.redis { |conn| conn.brpop(*queues_cmd) }
|
23
|
+
return unless work
|
24
|
+
|
25
|
+
unit_of_work = UnitOfWork.new(*work)
|
26
|
+
|
27
|
+
Sidekiq.redis do |conn|
|
28
|
+
conn.lpush(self.class.working_queue_name(unit_of_work.queue), unit_of_work.job)
|
29
|
+
end
|
30
|
+
|
31
|
+
unit_of_work
|
32
|
+
end
|
33
|
+
|
34
|
+
def queues_cmd
|
35
|
+
if strictly_ordered_queues
|
36
|
+
@queues
|
37
|
+
else
|
38
|
+
queues = @queues.shuffle.uniq
|
39
|
+
queues << SEMI_RELIABLE_FETCH_TIMEOUT
|
40
|
+
queues
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|