gitlab-sidekiq-fetcher 0.1.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.rspec DELETED
@@ -1 +0,0 @@
1
- --require spec_helper
data/Gemfile DELETED
@@ -1,12 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- source "https://rubygems.org"
4
-
5
- git_source(:github) { |repo_name| "https://github.com/#{repo_name}" }
6
-
7
- group :test do
8
- gem "rspec", '~> 3'
9
- gem "pry"
10
- gem "sidekiq", '~> 5.0'
11
- gem 'simplecov', require: false
12
- end
data/Gemfile.lock DELETED
@@ -1,50 +0,0 @@
1
- GEM
2
- remote: https://rubygems.org/
3
- specs:
4
- coderay (1.1.2)
5
- connection_pool (2.2.2)
6
- diff-lcs (1.3)
7
- docile (1.3.1)
8
- json (2.1.0)
9
- method_source (0.9.0)
10
- pry (0.11.3)
11
- coderay (~> 1.1.0)
12
- method_source (~> 0.9.0)
13
- rack (2.0.5)
14
- rack-protection (2.0.4)
15
- rack
16
- redis (4.0.2)
17
- rspec (3.8.0)
18
- rspec-core (~> 3.8.0)
19
- rspec-expectations (~> 3.8.0)
20
- rspec-mocks (~> 3.8.0)
21
- rspec-core (3.8.0)
22
- rspec-support (~> 3.8.0)
23
- rspec-expectations (3.8.1)
24
- diff-lcs (>= 1.2.0, < 2.0)
25
- rspec-support (~> 3.8.0)
26
- rspec-mocks (3.8.0)
27
- diff-lcs (>= 1.2.0, < 2.0)
28
- rspec-support (~> 3.8.0)
29
- rspec-support (3.8.0)
30
- sidekiq (5.2.2)
31
- connection_pool (~> 2.2, >= 2.2.2)
32
- rack-protection (>= 1.5.0)
33
- redis (>= 3.3.5, < 5)
34
- simplecov (0.16.1)
35
- docile (~> 1.1)
36
- json (>= 1.8, < 3)
37
- simplecov-html (~> 0.10.0)
38
- simplecov-html (0.10.2)
39
-
40
- PLATFORMS
41
- ruby
42
-
43
- DEPENDENCIES
44
- pry
45
- rspec (~> 3)
46
- sidekiq (~> 5.0)
47
- simplecov
48
-
49
- BUNDLED WITH
50
- 1.17.1
@@ -1,185 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Sidekiq
4
- class BaseReliableFetch
5
- DEFAULT_CLEANUP_INTERVAL = 60 * 60 # 1 hour
6
- HEARTBEAT_INTERVAL = 20 # seconds
7
- HEARTBEAT_LIFESPAN = 60 # seconds
8
- HEARTBEAT_RETRY_DELAY = 1 # seconds
9
- WORKING_QUEUE_PREFIX = 'working'
10
-
11
- # Defines how often we try to take a lease to not flood our
12
- # Redis server with SET requests
13
- DEFAULT_LEASE_INTERVAL = 2 * 60 # seconds
14
- LEASE_KEY = 'reliable-fetcher-cleanup-lock'
15
-
16
- # Defines the COUNT parameter that will be passed to Redis SCAN command
17
- SCAN_COUNT = 1000
18
-
19
- UnitOfWork = Struct.new(:queue, :job) do
20
- def acknowledge
21
- Sidekiq.redis { |conn| conn.lrem(Sidekiq::BaseReliableFetch.working_queue_name(queue), 1, job) }
22
- end
23
-
24
- def queue_name
25
- queue.sub(/.*queue:/, '')
26
- end
27
-
28
- def requeue
29
- Sidekiq.redis do |conn|
30
- conn.multi do |multi|
31
- multi.lpush(queue, job)
32
- multi.lrem(Sidekiq::BaseReliableFetch.working_queue_name(queue), 1, job)
33
- end
34
- end
35
- end
36
- end
37
-
38
- def self.setup_reliable_fetch!(config)
39
- config.options[:fetch] = if config.options[:semi_reliable_fetch]
40
- Sidekiq::SemiReliableFetch
41
- else
42
- Sidekiq::ReliableFetch
43
- end
44
-
45
- Sidekiq.logger.info('GitLab reliable fetch activated!')
46
-
47
- start_heartbeat_thread
48
- end
49
-
50
- def self.start_heartbeat_thread
51
- Thread.new do
52
- loop do
53
- begin
54
- heartbeat
55
-
56
- sleep HEARTBEAT_INTERVAL
57
- rescue => e
58
- Sidekiq.logger.error("Heartbeat thread error: #{e.message}")
59
-
60
- sleep HEARTBEAT_RETRY_DELAY
61
- end
62
- end
63
- end
64
- end
65
-
66
- def self.pid
67
- @pid ||= ::Process.pid
68
- end
69
-
70
- def self.hostname
71
- @hostname ||= Socket.gethostname
72
- end
73
-
74
- def self.heartbeat
75
- Sidekiq.redis do |conn|
76
- conn.set(heartbeat_key(hostname, pid), 1, ex: HEARTBEAT_LIFESPAN)
77
- end
78
-
79
- Sidekiq.logger.debug("Heartbeat for hostname: #{hostname} and pid: #{pid}")
80
- end
81
-
82
- def self.bulk_requeue(inprogress, _options)
83
- return if inprogress.empty?
84
-
85
- Sidekiq.logger.debug('Re-queueing terminated jobs')
86
-
87
- Sidekiq.redis do |conn|
88
- inprogress.each do |unit_of_work|
89
- conn.multi do |multi|
90
- multi.lpush(unit_of_work.queue, unit_of_work.job)
91
- multi.lrem(working_queue_name(unit_of_work.queue), 1, unit_of_work.job)
92
- end
93
- end
94
- end
95
-
96
- Sidekiq.logger.info("Pushed #{inprogress.size} jobs back to Redis")
97
- rescue => e
98
- Sidekiq.logger.warn("Failed to requeue #{inprogress.size} jobs: #{e.message}")
99
- end
100
-
101
- def self.heartbeat_key(hostname, pid)
102
- "reliable-fetcher-heartbeat-#{hostname}-#{pid}"
103
- end
104
-
105
- def self.working_queue_name(queue)
106
- "#{WORKING_QUEUE_PREFIX}:#{queue}:#{hostname}:#{pid}"
107
- end
108
-
109
- attr_reader :cleanup_interval, :last_try_to_take_lease_at, :lease_interval,
110
- :queues, :use_semi_reliable_fetch,
111
- :strictly_ordered_queues
112
-
113
- def initialize(options)
114
- @cleanup_interval = options.fetch(:cleanup_interval, DEFAULT_CLEANUP_INTERVAL)
115
- @lease_interval = options.fetch(:lease_interval, DEFAULT_LEASE_INTERVAL)
116
- @last_try_to_take_lease_at = 0
117
- @strictly_ordered_queues = !!options[:strict]
118
- @queues = options[:queues].map { |q| "queue:#{q}" }
119
- end
120
-
121
- def retrieve_work
122
- clean_working_queues! if take_lease
123
-
124
- retrieve_unit_of_work
125
- end
126
-
127
- def retrieve_unit_of_work
128
- raise NotImplementedError,
129
- "#{self.class} does not implement #{__method__}"
130
- end
131
-
132
- private
133
-
134
- def clean_working_queue!(working_queue)
135
- original_queue = working_queue.gsub(/#{WORKING_QUEUE_PREFIX}:|:[^:]*:[0-9]*\z/, '')
136
-
137
- Sidekiq.redis do |conn|
138
- count = 0
139
-
140
- while conn.rpoplpush(working_queue, original_queue) do
141
- count += 1
142
- end
143
-
144
- Sidekiq.logger.info("Requeued #{count} dead jobs to #{original_queue}")
145
- end
146
- end
147
-
148
- # Detect "old" jobs and requeue them because the worker they were assigned
149
- # to probably failed miserably.
150
- def clean_working_queues!
151
- Sidekiq.logger.info("Cleaning working queues")
152
-
153
- Sidekiq.redis do |conn|
154
- conn.scan_each(match: "#{WORKING_QUEUE_PREFIX}:queue:*", count: SCAN_COUNT) do |key|
155
- # Example: "working:name_of_the_job:queue:{hostname}:{PID}"
156
- hostname, pid = key.scan(/:([^:]*):([0-9]*)\z/).flatten
157
-
158
- continue if hostname.nil? || pid.nil?
159
-
160
- clean_working_queue!(key) if worker_dead?(hostname, pid)
161
- end
162
- end
163
- end
164
-
165
- def worker_dead?(hostname, pid)
166
- Sidekiq.redis do |conn|
167
- !conn.get(self.class.heartbeat_key(hostname, pid))
168
- end
169
- end
170
-
171
- def take_lease
172
- return unless allowed_to_take_a_lease?
173
-
174
- @last_try_to_take_lease_at = Time.now.to_f
175
-
176
- Sidekiq.redis do |conn|
177
- conn.set(LEASE_KEY, 1, nx: true, ex: cleanup_interval)
178
- end
179
- end
180
-
181
- def allowed_to_take_a_lease?
182
- Time.now.to_f - last_try_to_take_lease_at > lease_interval
183
- end
184
- end
185
- end
@@ -1,40 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Sidekiq
4
- class ReliableFetch < BaseReliableFetch
5
- # For reliable fetch we don't use Redis' blocking operations so
6
- # we inject a regular sleep into the loop.
7
- RELIABLE_FETCH_IDLE_TIMEOUT = 5 # seconds
8
-
9
- attr_reader :queues_iterator, :queues_size
10
-
11
- def initialize(options)
12
- super
13
-
14
- @queues_size = queues.size
15
- @queues_iterator = queues.cycle
16
- end
17
-
18
- private
19
-
20
- def retrieve_unit_of_work
21
- @queues_iterator.rewind if strictly_ordered_queues
22
-
23
- queues_size.times do
24
- queue = queues_iterator.next
25
-
26
- work = Sidekiq.redis do |conn|
27
- conn.rpoplpush(queue, self.class.working_queue_name(queue))
28
- end
29
-
30
- return UnitOfWork.new(queue, work) if work
31
- end
32
-
33
- # We didn't find a job in any of the configured queues. Let's sleep a bit
34
- # to avoid uselessly burning too much CPU
35
- sleep(RELIABLE_FETCH_IDLE_TIMEOUT)
36
-
37
- nil
38
- end
39
- end
40
- end
@@ -1,44 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Sidekiq
4
- class SemiReliableFetch < BaseReliableFetch
5
- # We want the fetch operation to timeout every few seconds so the thread
6
- # can check if the process is shutting down. This constant is only used
7
- # for semi-reliable fetch.
8
- SEMI_RELIABLE_FETCH_TIMEOUT = 2 # seconds
9
-
10
- def initialize(options)
11
- super
12
-
13
- if strictly_ordered_queues
14
- @queues = @queues.uniq
15
- @queues << SEMI_RELIABLE_FETCH_TIMEOUT
16
- end
17
- end
18
-
19
- private
20
-
21
- def retrieve_unit_of_work
22
- work = Sidekiq.redis { |conn| conn.brpop(*queues_cmd) }
23
- return unless work
24
-
25
- unit_of_work = UnitOfWork.new(*work)
26
-
27
- Sidekiq.redis do |conn|
28
- conn.lpush(self.class.working_queue_name(unit_of_work.queue), unit_of_work.job)
29
- end
30
-
31
- unit_of_work
32
- end
33
-
34
- def queues_cmd
35
- if strictly_ordered_queues
36
- @queues
37
- else
38
- queues = @queues.shuffle.uniq
39
- queues << SEMI_RELIABLE_FETCH_TIMEOUT
40
- queues
41
- end
42
- end
43
- end
44
- end
@@ -1,73 +0,0 @@
1
- require 'spec_helper'
2
- require 'fetch_shared_examples'
3
- require 'sidekiq/base_reliable_fetch'
4
- require 'sidekiq/reliable_fetch'
5
- require 'sidekiq/semi_reliable_fetch'
6
-
7
- describe Sidekiq::BaseReliableFetch do
8
- before { Sidekiq.redis(&:flushdb) }
9
-
10
- describe 'UnitOfWork' do
11
- let(:fetcher) { Sidekiq::ReliableFetch.new(queues: ['foo']) }
12
-
13
- describe '#requeue' do
14
- it 'requeues job' do
15
- Sidekiq.redis { |conn| conn.rpush('queue:foo', 'msg') }
16
-
17
- uow = fetcher.retrieve_work
18
-
19
- uow.requeue
20
-
21
- expect(Sidekiq::Queue.new('foo').size).to eq 1
22
- expect(working_queue_size('foo')).to eq 0
23
- end
24
- end
25
-
26
- describe '#acknowledge' do
27
- it 'acknowledges job' do
28
- Sidekiq.redis { |conn| conn.rpush('queue:foo', 'msg') }
29
-
30
- uow = fetcher.retrieve_work
31
-
32
- expect { uow.acknowledge }
33
- .to change { working_queue_size('foo') }.by(-1)
34
-
35
- expect(Sidekiq::Queue.new('foo').size).to eq 0
36
- end
37
- end
38
- end
39
-
40
- describe '.bulk_requeue' do
41
- it 'requeues the bulk' do
42
- queue1 = Sidekiq::Queue.new('foo')
43
- queue2 = Sidekiq::Queue.new('bar')
44
-
45
- expect(queue1.size).to eq 0
46
- expect(queue2.size).to eq 0
47
-
48
- uow = described_class::UnitOfWork
49
- jobs = [ uow.new('queue:foo', 'bob'), uow.new('queue:foo', 'bar'), uow.new('queue:bar', 'widget') ]
50
- described_class.bulk_requeue(jobs, queues: [])
51
-
52
- expect(queue1.size).to eq 2
53
- expect(queue2.size).to eq 1
54
- end
55
- end
56
-
57
- it 'sets heartbeat' do
58
- config = double(:sidekiq_config, options: {})
59
-
60
- heartbeat_thread = described_class.setup_reliable_fetch!(config)
61
-
62
- Sidekiq.redis do |conn|
63
- sleep 0.2 # Give the time to heartbeat thread to make a loop
64
-
65
- heartbeat_key = described_class.heartbeat_key(Socket.gethostname, ::Process.pid)
66
- heartbeat = conn.get(heartbeat_key)
67
-
68
- expect(heartbeat).not_to be_nil
69
- end
70
-
71
- heartbeat_thread.kill
72
- end
73
- end
@@ -1,118 +0,0 @@
1
- shared_examples 'a Sidekiq fetcher' do
2
- let(:queues) { ['assigned'] }
3
-
4
- before { Sidekiq.redis(&:flushdb) }
5
-
6
- describe '#retrieve_work' do
7
- let(:fetcher) { described_class.new(queues: ['assigned']) }
8
-
9
- it 'retrieves the job and puts it to working queue' do
10
- Sidekiq.redis { |conn| conn.rpush('queue:assigned', 'msg') }
11
-
12
- uow = fetcher.retrieve_work
13
-
14
- expect(working_queue_size('assigned')).to eq 1
15
- expect(uow.queue_name).to eq 'assigned'
16
- expect(uow.job).to eq 'msg'
17
- expect(Sidekiq::Queue.new('assigned').size).to eq 0
18
- end
19
-
20
- it 'does not retrieve a job from foreign queue' do
21
- Sidekiq.redis { |conn| conn.rpush('queue:not_assigned', 'msg') }
22
-
23
- expect(fetcher.retrieve_work).to be_nil
24
- end
25
-
26
- it 'requeues jobs from dead working queue' do
27
- Sidekiq.redis do |conn|
28
- conn.rpush(other_process_working_queue_name('assigned'), 'msg')
29
- end
30
-
31
- uow = fetcher.retrieve_work
32
-
33
- expect(uow.job).to eq 'msg'
34
-
35
- Sidekiq.redis do |conn|
36
- expect(conn.llen(other_process_working_queue_name('assigned'))).to eq 0
37
- end
38
- end
39
-
40
- it 'does not requeue jobs from live working queue' do
41
- working_queue = live_other_process_working_queue_name('assigned')
42
-
43
- Sidekiq.redis do |conn|
44
- conn.rpush(working_queue, 'msg')
45
- end
46
-
47
- uow = fetcher.retrieve_work
48
-
49
- expect(uow).to be_nil
50
-
51
- Sidekiq.redis do |conn|
52
- expect(conn.llen(working_queue)).to eq 1
53
- end
54
- end
55
-
56
- it 'does not clean up orphaned jobs more than once per cleanup interval' do
57
- Sidekiq.redis = Sidekiq::RedisConnection.create(url: REDIS_URL, size: 10)
58
-
59
- expect_any_instance_of(described_class)
60
- .to receive(:clean_working_queues!).once
61
-
62
- threads = 10.times.map do
63
- Thread.new do
64
- described_class.new(queues: ['assigned']).retrieve_work
65
- end
66
- end
67
-
68
- threads.map(&:join)
69
- end
70
-
71
- it 'retrieves by order when strictly order is enabled' do
72
- fetcher = described_class.new(strict: true, queues: ['first', 'second'])
73
-
74
- Sidekiq.redis do |conn|
75
- conn.rpush('queue:first', ['msg3', 'msg2', 'msg1'])
76
- conn.rpush('queue:second', 'msg4')
77
- end
78
-
79
- jobs = (1..4).map { fetcher.retrieve_work.job }
80
-
81
- expect(jobs).to eq ['msg1', 'msg2', 'msg3', 'msg4']
82
- end
83
-
84
- it 'does not starve any queue when queues are not strictly ordered' do
85
- fetcher = described_class.new(queues: ['first', 'second'])
86
-
87
- Sidekiq.redis do |conn|
88
- conn.rpush('queue:first', (1..200).map { |i| "msg#{i}" })
89
- conn.rpush('queue:second', 'this_job_should_not_stuck')
90
- end
91
-
92
- jobs = (1..100).map { fetcher.retrieve_work.job }
93
-
94
- expect(jobs).to include 'this_job_should_not_stuck'
95
- end
96
- end
97
- end
98
-
99
- def working_queue_size(queue_name)
100
- Sidekiq.redis do |c|
101
- c.llen(Sidekiq::BaseReliableFetch.working_queue_name("queue:#{queue_name}"))
102
- end
103
- end
104
-
105
- def other_process_working_queue_name(queue)
106
- "#{Sidekiq::BaseReliableFetch::WORKING_QUEUE_PREFIX}:queue:#{queue}:#{Socket.gethostname}:#{::Process.pid + 1}"
107
- end
108
-
109
- def live_other_process_working_queue_name(queue)
110
- pid = ::Process.pid + 1
111
- hostname = Socket.gethostname
112
-
113
- Sidekiq.redis do |conn|
114
- conn.set(Sidekiq::BaseReliableFetch.heartbeat_key(hostname, pid), 1)
115
- end
116
-
117
- "#{Sidekiq::BaseReliableFetch::WORKING_QUEUE_PREFIX}:queue:#{queue}:#{hostname}:#{pid}"
118
- end