gitlab-sidekiq-fetcher 0.6.1 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a71699d717aeb95cb406ed566e394a34e7978df8289db2fe89aff82046dd8e19
4
- data.tar.gz: 4390a2a95507b8a6c5c08a9d3dac384f58f0d11311afd1da5973e4c61240120a
3
+ metadata.gz: 6b9cf610a1915d63141331ba0e4820306235fc1c58e37e0124a9700d19005b99
4
+ data.tar.gz: 3690c4aaff9d47c8ec108ff264984343bc3a4412123ad56ac727cdf1ae3e8fd3
5
5
  SHA512:
6
- metadata.gz: fa95de2f9b33f01b45c547b23bfd4728ab88ed304ecbeaa2416e03b1a6d6bff678f097b37ae85679ef69f520a37a6f7115207170284229df706172f60d103b19
7
- data.tar.gz: d7981fd4afe0abf8454caffac4c1fa8e290dd8617ce28edf45b4a29a9deb9b3a9cd583f64038cc3808b814923ab74fc370d65d9a42257b4af9fa97ea48542959
6
+ metadata.gz: 6d5d61280c6db3b91c8107fca593fa12246db18c727389ef31cf8edf58e923bc78a2cbbe2be505962664ed56c8189513dc0ad0312e545988b70666839f191f66
7
+ data.tar.gz: cd1459179a3f97b3b3194a21e8335367e9021db590cb9441613d6d88030b1b86cc495b00e1ff37dc0cf7ea683d40e6b10d3c511c7d956a70e8c26c67a4bc0903
@@ -40,7 +40,6 @@ integration_reliable:
40
40
  variables:
41
41
  JOB_FETCHER: reliable
42
42
 
43
-
44
43
  integration_basic:
45
44
  extends: .integration
46
45
  allow_failure: yes
@@ -63,7 +62,6 @@ term_interruption:
63
62
  services:
64
63
  - redis:alpine
65
64
 
66
-
67
65
  # rubocop:
68
66
  # script:
69
67
  # - bundle exec rubocop
data/Gemfile CHANGED
@@ -7,6 +7,6 @@ git_source(:github) { |repo_name| "https://github.com/#{repo_name}" }
7
7
  group :test do
8
8
  gem "rspec", '~> 3'
9
9
  gem "pry"
10
- gem "sidekiq", '~> 6.0.0'
10
+ gem "sidekiq", '~> 6.1'
11
11
  gem 'simplecov', require: false
12
12
  end
@@ -11,8 +11,6 @@ GEM
11
11
  coderay (~> 1.1.0)
12
12
  method_source (~> 0.9.0)
13
13
  rack (2.2.3)
14
- rack-protection (2.0.8.1)
15
- rack
16
14
  redis (4.2.1)
17
15
  rspec (3.8.0)
18
16
  rspec-core (~> 3.8.0)
@@ -27,11 +25,10 @@ GEM
27
25
  diff-lcs (>= 1.2.0, < 2.0)
28
26
  rspec-support (~> 3.8.0)
29
27
  rspec-support (3.8.0)
30
- sidekiq (6.0.7)
28
+ sidekiq (6.1.0)
31
29
  connection_pool (>= 2.2.2)
32
30
  rack (~> 2.0)
33
- rack-protection (>= 2.0.0)
34
- redis (>= 4.1.0)
31
+ redis (>= 4.2.0)
35
32
  simplecov (0.16.1)
36
33
  docile (~> 1.1)
37
34
  json (>= 1.8, < 3)
@@ -44,7 +41,7 @@ PLATFORMS
44
41
  DEPENDENCIES
45
42
  pry
46
43
  rspec (~> 3)
47
- sidekiq (~> 6.0.0)
44
+ sidekiq (~> 6.1)
48
45
  simplecov
49
46
 
50
47
  BUNDLED WITH
data/README.md CHANGED
@@ -6,6 +6,8 @@ fetches from Redis.
6
6
 
7
7
  It's based on https://github.com/TEA-ebook/sidekiq-reliable-fetch.
8
8
 
9
+ **IMPORTANT NOTE:** Since version `0.7.0` this gem works only with `sidekiq >= 6.1` (which introduced Fetch API breaking changes). Please use version `~> 0.5` if you use older version of the `sidekiq` .
10
+
9
11
  There are two strategies implemented: [Reliable fetch](http://redis.io/commands/rpoplpush#pattern-reliable-queue) using `rpoplpush` command and
10
12
  semi-reliable fetch that uses regular `brpop` and `lpush` to pick the job and put it to working queue. The main benefit of "Reliable" strategy is that `rpoplpush` is atomic, eliminating a race condition in which jobs can be lost.
11
13
  However, it comes at a cost because `rpoplpush` can't watch multiple lists at the same time so we need to iterate over the entire queue list which significantly increases pressure on Redis when there are more than a few queues. The "semi-reliable" strategy is much more reliable than the default Sidekiq fetcher, though. Compared to the reliable fetch strategy, it does not increase pressure on Redis significantly.
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'gitlab-sidekiq-fetcher'
3
- s.version = '0.6.1'
3
+ s.version = '0.7.0'
4
4
  s.authors = ['TEA', 'GitLab']
5
5
  s.email = 'valery@gitlab.com'
6
6
  s.license = 'LGPL-3.0'
@@ -10,5 +10,5 @@ Gem::Specification.new do |s|
10
10
  s.require_paths = ['lib']
11
11
  s.files = `git ls-files`.split($\)
12
12
  s.test_files = []
13
- s.add_dependency 'sidekiq', '>= 5', '< 6.1'
13
+ s.add_dependency 'sidekiq', '~> 6.1'
14
14
  end
@@ -41,11 +41,13 @@ module Sidekiq
41
41
  end
42
42
 
43
43
  def self.setup_reliable_fetch!(config)
44
- config.options[:fetch] = if config.options[:semi_reliable_fetch]
45
- Sidekiq::SemiReliableFetch
46
- else
47
- Sidekiq::ReliableFetch
48
- end
44
+ fetch_strategy = if config.options[:semi_reliable_fetch]
45
+ Sidekiq::SemiReliableFetch
46
+ else
47
+ Sidekiq::ReliableFetch
48
+ end
49
+
50
+ config.options[:fetch] = fetch_strategy.new(config.options)
49
51
 
50
52
  Sidekiq.logger.info('GitLab reliable fetch activated!')
51
53
 
@@ -84,7 +86,44 @@ module Sidekiq
84
86
  Sidekiq.logger.debug("Heartbeat for hostname: #{hostname} and pid: #{pid}")
85
87
  end
86
88
 
87
- def self.bulk_requeue(inprogress, _options)
89
+ def self.worker_dead?(hostname, pid, conn)
90
+ !conn.get(heartbeat_key(hostname, pid))
91
+ end
92
+
93
+ def self.heartbeat_key(hostname, pid)
94
+ "reliable-fetcher-heartbeat-#{hostname}-#{pid}"
95
+ end
96
+
97
+ def self.working_queue_name(queue)
98
+ "#{WORKING_QUEUE_PREFIX}:#{queue}:#{hostname}:#{pid}"
99
+ end
100
+
101
+ attr_reader :cleanup_interval, :last_try_to_take_lease_at, :lease_interval,
102
+ :queues, :use_semi_reliable_fetch,
103
+ :strictly_ordered_queues
104
+
105
+ def initialize(options)
106
+ raise ArgumentError, 'missing queue list' unless options[:queues]
107
+
108
+ @cleanup_interval = options.fetch(:cleanup_interval, DEFAULT_CLEANUP_INTERVAL)
109
+ @lease_interval = options.fetch(:lease_interval, DEFAULT_LEASE_INTERVAL)
110
+ @last_try_to_take_lease_at = 0
111
+ @strictly_ordered_queues = !!options[:strict]
112
+ @queues = options[:queues].map { |q| "queue:#{q}" }
113
+ end
114
+
115
+ def retrieve_work
116
+ clean_working_queues! if take_lease
117
+
118
+ retrieve_unit_of_work
119
+ end
120
+
121
+ def retrieve_unit_of_work
122
+ raise NotImplementedError,
123
+ "#{self.class} does not implement #{__method__}"
124
+ end
125
+
126
+ def bulk_requeue(inprogress, _options)
88
127
  return if inprogress.empty?
89
128
 
90
129
  Sidekiq.redis do |conn|
@@ -92,7 +131,7 @@ module Sidekiq
92
131
  conn.multi do |multi|
93
132
  preprocess_interrupted_job(unit_of_work.job, unit_of_work.queue, multi)
94
133
 
95
- multi.lrem(working_queue_name(unit_of_work.queue), 1, unit_of_work.job)
134
+ multi.lrem(self.class.working_queue_name(unit_of_work.queue), 1, unit_of_work.job)
96
135
  end
97
136
  end
98
137
  end
@@ -100,17 +139,9 @@ module Sidekiq
100
139
  Sidekiq.logger.warn("Failed to requeue #{inprogress.size} jobs: #{e.message}")
101
140
  end
102
141
 
103
- def self.clean_working_queue!(working_queue)
104
- original_queue = working_queue.gsub(/#{WORKING_QUEUE_PREFIX}:|:[^:]*:[0-9]*\z/, '')
105
-
106
- Sidekiq.redis do |conn|
107
- while job = conn.rpop(working_queue)
108
- preprocess_interrupted_job(job, original_queue)
109
- end
110
- end
111
- end
142
+ private
112
143
 
113
- def self.preprocess_interrupted_job(job, queue, conn = nil)
144
+ def preprocess_interrupted_job(job, queue, conn = nil)
114
145
  msg = Sidekiq.load_json(job)
115
146
  msg['interrupted_count'] = msg['interrupted_count'].to_i + 1
116
147
 
@@ -121,9 +152,23 @@ module Sidekiq
121
152
  end
122
153
  end
123
154
 
155
+ # If you want this method to be run in a scope of multi connection
156
+ # you need to pass it
157
+ def requeue_job(queue, msg, conn)
158
+ with_connection(conn) do |conn|
159
+ conn.lpush(queue, Sidekiq.dump_json(msg))
160
+ end
161
+
162
+ Sidekiq.logger.info(
163
+ message: "Pushed job #{msg['jid']} back to queue #{queue}",
164
+ jid: msg['jid'],
165
+ queue: queue
166
+ )
167
+ end
168
+
124
169
  # Detect "old" jobs and requeue them because the worker they were assigned
125
170
  # to probably failed miserably.
126
- def self.clean_working_queues!
171
+ def clean_working_queues!
127
172
  Sidekiq.logger.info('Cleaning working queues')
128
173
 
129
174
  Sidekiq.redis do |conn|
@@ -133,30 +178,28 @@ module Sidekiq
133
178
 
134
179
  continue if hostname.nil? || pid.nil?
135
180
 
136
- clean_working_queue!(key) if worker_dead?(hostname, pid, conn)
181
+ clean_working_queue!(key) if self.class.worker_dead?(hostname, pid, conn)
137
182
  end
138
183
  end
139
184
  end
140
185
 
141
- def self.worker_dead?(hostname, pid, conn)
142
- !conn.get(heartbeat_key(hostname, pid))
143
- end
144
-
145
- def self.heartbeat_key(hostname, pid)
146
- "reliable-fetcher-heartbeat-#{hostname}-#{pid}"
147
- end
186
+ def clean_working_queue!(working_queue)
187
+ original_queue = working_queue.gsub(/#{WORKING_QUEUE_PREFIX}:|:[^:]*:[0-9]*\z/, '')
148
188
 
149
- def self.working_queue_name(queue)
150
- "#{WORKING_QUEUE_PREFIX}:#{queue}:#{hostname}:#{pid}"
189
+ Sidekiq.redis do |conn|
190
+ while job = conn.rpop(working_queue)
191
+ preprocess_interrupted_job(job, original_queue)
192
+ end
193
+ end
151
194
  end
152
195
 
153
- def self.interruption_exhausted?(msg)
196
+ def interruption_exhausted?(msg)
154
197
  return false if max_retries_after_interruption(msg['class']) < 0
155
198
 
156
199
  msg['interrupted_count'].to_i >= max_retries_after_interruption(msg['class'])
157
200
  end
158
201
 
159
- def self.max_retries_after_interruption(worker_class)
202
+ def max_retries_after_interruption(worker_class)
160
203
  max_retries_after_interruption = nil
161
204
 
162
205
  max_retries_after_interruption ||= begin
@@ -169,7 +212,7 @@ module Sidekiq
169
212
  max_retries_after_interruption
170
213
  end
171
214
 
172
- def self.send_to_quarantine(msg, multi_connection = nil)
215
+ def send_to_quarantine(msg, multi_connection = nil)
173
216
  Sidekiq.logger.warn(
174
217
  class: msg['class'],
175
218
  jid: msg['jid'],
@@ -180,52 +223,13 @@ module Sidekiq
180
223
  Sidekiq::InterruptedSet.new.put(job, connection: multi_connection)
181
224
  end
182
225
 
183
- # If you want this method to be run is a scope of multi connection
184
- # you need to pass it
185
- def self.requeue_job(queue, msg, conn)
186
- with_connection(conn) do |conn|
187
- conn.lpush(queue, Sidekiq.dump_json(msg))
188
- end
189
-
190
- Sidekiq.logger.info(
191
- message: "Pushed job #{msg['jid']} back to queue #{queue}",
192
- jid: msg['jid'],
193
- queue: queue
194
- )
195
- end
196
-
197
226
  # Yield block with an existing connection or creates another one
198
- def self.with_connection(conn, &block)
227
+ def with_connection(conn)
199
228
  return yield(conn) if conn
200
229
 
201
- Sidekiq.redis { |conn| yield(conn) }
202
- end
203
-
204
- attr_reader :cleanup_interval, :last_try_to_take_lease_at, :lease_interval,
205
- :queues, :use_semi_reliable_fetch,
206
- :strictly_ordered_queues
207
-
208
- def initialize(options)
209
- @cleanup_interval = options.fetch(:cleanup_interval, DEFAULT_CLEANUP_INTERVAL)
210
- @lease_interval = options.fetch(:lease_interval, DEFAULT_LEASE_INTERVAL)
211
- @last_try_to_take_lease_at = 0
212
- @strictly_ordered_queues = !!options[:strict]
213
- @queues = options[:queues].map { |q| "queue:#{q}" }
214
- end
215
-
216
- def retrieve_work
217
- self.class.clean_working_queues! if take_lease
218
-
219
- retrieve_unit_of_work
230
+ Sidekiq.redis { |redis_conn| yield(redis_conn) }
220
231
  end
221
232
 
222
- def retrieve_unit_of_work
223
- raise NotImplementedError,
224
- "#{self.class} does not implement #{__method__}"
225
- end
226
-
227
- private
228
-
229
233
  def take_lease
230
234
  return unless allowed_to_take_a_lease?
231
235
 
@@ -6,23 +6,21 @@ module Sidekiq
6
6
  # we inject a regular sleep into the loop.
7
7
  RELIABLE_FETCH_IDLE_TIMEOUT = 5 # seconds
8
8
 
9
- attr_reader :queues_iterator, :queues_size
9
+ attr_reader :queues_size
10
10
 
11
11
  def initialize(options)
12
12
  super
13
13
 
14
+ @queues = queues.uniq if strictly_ordered_queues
14
15
  @queues_size = queues.size
15
- @queues_iterator = queues.cycle
16
16
  end
17
17
 
18
18
  private
19
19
 
20
20
  def retrieve_unit_of_work
21
- @queues_iterator.rewind if strictly_ordered_queues
22
-
23
- queues_size.times do
24
- queue = queues_iterator.next
21
+ queues_list = strictly_ordered_queues ? queues : queues.shuffle
25
22
 
23
+ queues_list.each do |queue|
26
24
  work = Sidekiq.redis do |conn|
27
25
  conn.rpoplpush(queue, self.class.working_queue_name(queue))
28
26
  end
@@ -39,14 +39,15 @@ describe Sidekiq::BaseReliableFetch do
39
39
  end
40
40
  end
41
41
 
42
- describe '.bulk_requeue' do
42
+ describe '#bulk_requeue' do
43
+ let(:options) { { queues: %w[foo bar] } }
43
44
  let!(:queue1) { Sidekiq::Queue.new('foo') }
44
45
  let!(:queue2) { Sidekiq::Queue.new('bar') }
45
46
 
46
47
  it 'requeues the bulk' do
47
48
  uow = described_class::UnitOfWork
48
49
  jobs = [ uow.new('queue:foo', job), uow.new('queue:foo', job), uow.new('queue:bar', job) ]
49
- described_class.bulk_requeue(jobs, queues: [])
50
+ described_class.new(options).bulk_requeue(jobs, nil)
50
51
 
51
52
  expect(queue1.size).to eq 2
52
53
  expect(queue2.size).to eq 1
@@ -56,7 +57,7 @@ describe Sidekiq::BaseReliableFetch do
56
57
  uow = described_class::UnitOfWork
57
58
  interrupted_job = Sidekiq.dump_json(class: 'Bob', args: [1, 2, 'foo'], interrupted_count: 3)
58
59
  jobs = [ uow.new('queue:foo', interrupted_job), uow.new('queue:foo', job), uow.new('queue:bar', job) ]
59
- described_class.bulk_requeue(jobs, queues: [])
60
+ described_class.new(options).bulk_requeue(jobs, nil)
60
61
 
61
62
  expect(queue1.size).to eq 1
62
63
  expect(queue2.size).to eq 1
@@ -69,7 +70,7 @@ describe Sidekiq::BaseReliableFetch do
69
70
  uow = described_class::UnitOfWork
70
71
  interrupted_job = Sidekiq.dump_json(class: 'Bob', args: [1, 2, 'foo'], interrupted_count: 3)
71
72
  jobs = [ uow.new('queue:foo', interrupted_job), uow.new('queue:foo', job), uow.new('queue:bar', job) ]
72
- described_class.bulk_requeue(jobs, queues: [])
73
+ described_class.new(options).bulk_requeue(jobs, nil)
73
74
 
74
75
  expect(queue1.size).to eq 2
75
76
  expect(queue2.size).to eq 1
@@ -80,7 +81,7 @@ describe Sidekiq::BaseReliableFetch do
80
81
  end
81
82
 
82
83
  it 'sets heartbeat' do
83
- config = double(:sidekiq_config, options: { queues: [] })
84
+ config = double(:sidekiq_config, options: { queues: %w[foo bar] })
84
85
 
85
86
  heartbeat_thread = described_class.setup_reliable_fetch!(config)
86
87
 
@@ -5,7 +5,7 @@ shared_examples 'a Sidekiq fetcher' do
5
5
 
6
6
  describe '#retrieve_work' do
7
7
  let(:job) { Sidekiq.dump_json(class: 'Bob', args: [1, 2, 'foo']) }
8
- let(:fetcher) { described_class.new(queues: ['assigned']) }
8
+ let(:fetcher) { described_class.new(queues: queues) }
9
9
 
10
10
  it 'retrieves the job and puts it to working queue' do
11
11
  Sidekiq.redis { |conn| conn.rpush('queue:assigned', job) }
@@ -61,11 +61,11 @@ shared_examples 'a Sidekiq fetcher' do
61
61
  it 'does not clean up orphaned jobs more than once per cleanup interval' do
62
62
  Sidekiq.redis = Sidekiq::RedisConnection.create(url: REDIS_URL, size: 10)
63
63
 
64
- expect(described_class).to receive(:clean_working_queues!).once
64
+ expect(fetcher).to receive(:clean_working_queues!).once
65
65
 
66
66
  threads = 10.times.map do
67
67
  Thread.new do
68
- described_class.new(queues: ['assigned']).retrieve_work
68
+ fetcher.retrieve_work
69
69
  end
70
70
  end
71
71
 
@@ -57,7 +57,7 @@ end
57
57
  def spawn_workers
58
58
  pids = []
59
59
  NUMBER_OF_WORKERS.times do
60
- pids << spawn('sidekiq -r ./config.rb')
60
+ pids << spawn('sidekiq -q default -q low -q high -r ./config.rb')
61
61
  end
62
62
 
63
63
  pids
@@ -11,7 +11,7 @@ def spawn_workers(number)
11
11
  pids = []
12
12
 
13
13
  number.times do
14
- pids << spawn('sidekiq -r ./config.rb')
14
+ pids << spawn('sidekiq -q default -q high -q low -r ./config.rb')
15
15
  end
16
16
 
17
17
  pids
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gitlab-sidekiq-fetcher
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.1
4
+ version: 0.7.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - TEA
@@ -9,26 +9,20 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2020-08-03 00:00:00.000000000 Z
12
+ date: 2020-07-30 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: sidekiq
16
16
  requirement: !ruby/object:Gem::Requirement
17
17
  requirements:
18
- - - ">="
19
- - !ruby/object:Gem::Version
20
- version: '5'
21
- - - "<"
18
+ - - "~>"
22
19
  - !ruby/object:Gem::Version
23
20
  version: '6.1'
24
21
  type: :runtime
25
22
  prerelease: false
26
23
  version_requirements: !ruby/object:Gem::Requirement
27
24
  requirements:
28
- - - ">="
29
- - !ruby/object:Gem::Version
30
- version: '5'
31
- - - "<"
25
+ - - "~>"
32
26
  - !ruby/object:Gem::Version
33
27
  version: '6.1'
34
28
  description: Redis reliable queue pattern implemented in Sidekiq