rails_failover 0.5.8 → 0.6.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +28 -0
- data/Gemfile.lock +3 -2
- data/lib/rails_failover/active_record.rb +8 -4
- data/lib/rails_failover/active_record/handler.rb +34 -53
- data/lib/rails_failover/redis.rb +8 -4
- data/lib/rails_failover/redis/connector.rb +5 -1
- data/lib/rails_failover/redis/handler.rb +69 -101
- data/lib/rails_failover/version.rb +1 -1
- data/rails_failover.gemspec +1 -0
- metadata +16 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1586a787b6a0c7573a295515804a0342496cc1f8f9effe1d2e90d34004427dea
|
4
|
+
data.tar.gz: e1358cf8426ea5f6df3f8b039809d5062b18413f6950ecdea20d0379ae68bf5c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d54c7fddc889d91a5f69505b04af146630a13038d3d4d1af05f3e19d083c9ff05fed3ff7e42c0ebe7f576c8e91d6880bdff8466b88990fb27488579a63de7df9
|
7
|
+
data.tar.gz: 88a4cc4c0c7848a0d1eef1ac1bb50d380df2c7bcc4a209fe7ea7733080c63fc929f4d5480e48c945713c2b5083efa967825b909da1ecf52bf414c5b9160d822d
|
data/CHANGELOG.md
CHANGED
@@ -6,6 +6,34 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
6
6
|
|
7
7
|
## [Unreleased]
|
8
8
|
|
9
|
+
## [0.6.3] - 2020-12-07
|
10
|
+
|
11
|
+
- FIX: Handle clients which are connecting during fallback
|
12
|
+
|
13
|
+
## [0.6.2] - 2020-11-19
|
14
|
+
|
15
|
+
- FIX: Use concurrent-ruby maps to simplify concurrency logic. Resolves a number of possible concurrency issues
|
16
|
+
|
17
|
+
## [0.6.1] - 2020-11-19
|
18
|
+
|
19
|
+
- FIX: Recover correctly if both the primary and replica go offline
|
20
|
+
|
21
|
+
Previously, a replica failing would cause it to be added to the 'primaries_down' list. The fallback handler would then continuously try and fallback the replica to itself, looping forever, and meaning that fallback to primary would never happen.
|
22
|
+
|
23
|
+
## [0.6.0] - 2020-11-09
|
24
|
+
- FEATURE: Run failover/fallback callbacks once for each backend
|
25
|
+
|
26
|
+
Previously the failover callback would only fire when the first backend failed, and the fallback callback would only fire when the last backend recovered. Now both failover and fallback callbacks will be triggered for each backend. The key for each backend is also passed to the callbacks for consumption by consuming applications.
|
27
|
+
|
28
|
+
- FEATURE: Add primaries_down_count function to failover handlers
|
29
|
+
|
30
|
+
This is intended for consumption by monitoring systems (e.g. the Discourse prometheus exporter)
|
31
|
+
|
32
|
+
## [0.5.9] - 2020-11-06
|
33
|
+
- FIX: Ignore errors from the redis socket shutdown call
|
34
|
+
|
35
|
+
This can fail with various i/o errors, but in all cases we want the thread to continue closing the connection with the error, and all the other connections.
|
36
|
+
|
9
37
|
## [0.5.8] - 2020-11-05
|
10
38
|
|
11
39
|
- FIX: Handle concurrency issues during redis disconnection (#10)
|
data/Gemfile.lock
CHANGED
@@ -1,8 +1,9 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
rails_failover (0.
|
4
|
+
rails_failover (0.6.3)
|
5
5
|
activerecord (~> 6.0)
|
6
|
+
concurrent-ruby
|
6
7
|
railties (~> 6.0)
|
7
8
|
|
8
9
|
GEM
|
@@ -38,7 +39,7 @@ GEM
|
|
38
39
|
concurrent-ruby (1.1.6)
|
39
40
|
crass (1.0.6)
|
40
41
|
diff-lcs (1.3)
|
41
|
-
erubi (1.
|
42
|
+
erubi (1.10.0)
|
42
43
|
i18n (1.8.2)
|
43
44
|
concurrent-ruby (~> 1.0)
|
44
45
|
loofah (2.7.0)
|
@@ -47,16 +47,20 @@ module RailsFailover
|
|
47
47
|
@on_failover_callback = block
|
48
48
|
end
|
49
49
|
|
50
|
-
def self.on_failover_callback
|
51
|
-
@on_failover_callback
|
50
|
+
def self.on_failover_callback!(key)
|
51
|
+
@on_failover_callback&.call(key)
|
52
|
+
rescue => e
|
53
|
+
logger.warn("RailsFailover::ActiveRecord.on_failover failed: #{e.class} #{e.message}\n#{e.backtrace.join("\n")}")
|
52
54
|
end
|
53
55
|
|
54
56
|
def self.on_fallback(&block)
|
55
57
|
@on_fallback_callback = block
|
56
58
|
end
|
57
59
|
|
58
|
-
def self.on_fallback_callback
|
59
|
-
@on_fallback_callback
|
60
|
+
def self.on_fallback_callback!(key)
|
61
|
+
@on_fallback_callback&.call(key)
|
62
|
+
rescue => e
|
63
|
+
logger.warn("RailsFailover::ActiveRecord.on_fallback failed: #{e.class} #{e.message}\n#{e.backtrace.join("\n")}")
|
60
64
|
end
|
61
65
|
end
|
62
66
|
end
|
@@ -1,6 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
require 'singleton'
|
3
3
|
require 'monitor'
|
4
|
+
require 'concurrent'
|
4
5
|
|
5
6
|
module RailsFailover
|
6
7
|
module ActiveRecord
|
@@ -11,41 +12,38 @@ module RailsFailover
|
|
11
12
|
VERIFY_FREQUENCY_BUFFER_PRECENT = 20
|
12
13
|
|
13
14
|
def initialize
|
14
|
-
@primaries_down =
|
15
|
-
@ancestor_pid = Process.pid
|
15
|
+
@primaries_down = Concurrent::Map.new
|
16
16
|
|
17
17
|
super() # Monitor#initialize
|
18
18
|
end
|
19
19
|
|
20
20
|
def verify_primary(handler_key)
|
21
|
+
primary_down(handler_key)
|
22
|
+
|
21
23
|
mon_synchronize do
|
22
|
-
primary_down(handler_key)
|
23
24
|
return if @thread&.alive?
|
24
|
-
|
25
25
|
logger.warn "Failover for ActiveRecord has been initiated"
|
26
|
+
@thread = Thread.new { loop_until_all_up }
|
27
|
+
end
|
28
|
+
end
|
26
29
|
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
logger.warn("RailsFailover::ActiveRecord.on_failover_callback failed: #{e.class} #{e.message}\n#{e.backtrace.join("\n")}")
|
31
|
-
end
|
30
|
+
def primary_down?(handler_key)
|
31
|
+
primaries_down[handler_key]
|
32
|
+
end
|
32
33
|
|
33
|
-
|
34
|
-
|
35
|
-
|
34
|
+
def primaries_down_count
|
35
|
+
primaries_down.size
|
36
|
+
end
|
36
37
|
|
37
|
-
|
38
|
-
logger.warn "Fallback to primary for ActiveRecord has been completed."
|
38
|
+
private
|
39
39
|
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
logger.warn("RailsFailover::ActiveRecord.on_fallback_callback failed: #{e.class} #{e.message}\n#{e.backtrace.join("\n")}")
|
44
|
-
end
|
40
|
+
def loop_until_all_up
|
41
|
+
loop do
|
42
|
+
initiate_fallback_to_primary
|
45
43
|
|
46
|
-
|
47
|
-
|
48
|
-
|
44
|
+
if all_primaries_up
|
45
|
+
logger.warn "Fallback to primary for ActiveRecord has been completed."
|
46
|
+
break
|
49
47
|
end
|
50
48
|
end
|
51
49
|
end
|
@@ -83,28 +81,18 @@ module RailsFailover
|
|
83
81
|
end
|
84
82
|
end
|
85
83
|
|
86
|
-
def primary_down?(handler_key)
|
87
|
-
primaries_down[handler_key]
|
88
|
-
end
|
89
|
-
|
90
|
-
private
|
91
|
-
|
92
84
|
def all_primaries_up
|
93
|
-
|
94
|
-
primaries_down.empty?
|
95
|
-
end
|
85
|
+
primaries_down.empty?
|
96
86
|
end
|
97
87
|
|
98
88
|
def primary_down(handler_key)
|
99
|
-
|
100
|
-
|
101
|
-
end
|
89
|
+
already_down = primaries_down.put_if_absent(handler_key, true)
|
90
|
+
RailsFailover::ActiveRecord.on_failover_callback!(handler_key) if !already_down
|
102
91
|
end
|
103
92
|
|
104
93
|
def primary_up(handler_key)
|
105
|
-
|
106
|
-
|
107
|
-
end
|
94
|
+
already_up = !primaries_down.delete(handler_key)
|
95
|
+
RailsFailover::ActiveRecord.on_fallback_callback!(handler_key) if !already_up
|
108
96
|
end
|
109
97
|
|
110
98
|
def spec_name
|
@@ -112,24 +100,17 @@ module RailsFailover
|
|
112
100
|
end
|
113
101
|
|
114
102
|
def primaries_down
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
@primaries_down[process_pid] = @primaries_down[@ancestor_pid] || {}
|
121
|
-
|
122
|
-
if process_pid != @ancestor_pid
|
123
|
-
@primaries_down.delete(@ancestor_pid)
|
124
|
-
|
125
|
-
@primaries_down[process_pid].each_key do |handler_key|
|
126
|
-
verify_primary(handler_key)
|
127
|
-
end
|
128
|
-
end
|
129
|
-
end
|
103
|
+
ancestor_pids = nil
|
104
|
+
value = @primaries_down.compute_if_absent(Process.pid) do
|
105
|
+
ancestor_pids = @primaries_down.keys
|
106
|
+
@primaries_down.values.first || Concurrent::Map.new
|
107
|
+
end
|
130
108
|
|
131
|
-
|
109
|
+
ancestor_pids&.each do |pid|
|
110
|
+
@primaries_down.delete(pid)&.each_key { |key| verify_primary(key) }
|
132
111
|
end
|
112
|
+
|
113
|
+
value
|
133
114
|
end
|
134
115
|
|
135
116
|
def logger
|
data/lib/rails_failover/redis.rb
CHANGED
@@ -40,16 +40,20 @@ module RailsFailover
|
|
40
40
|
@on_failover_callback = block
|
41
41
|
end
|
42
42
|
|
43
|
-
def self.on_failover_callback
|
44
|
-
@on_failover_callback
|
43
|
+
def self.on_failover_callback!(key)
|
44
|
+
@on_failover_callback&.call(key)
|
45
|
+
rescue => e
|
46
|
+
logger.warn("RailsFailover::Redis.on_failover failed: #{e.class} #{e.message}\n#{e.backtrace.join("\n")}")
|
45
47
|
end
|
46
48
|
|
47
49
|
def self.on_fallback(&block)
|
48
50
|
@on_fallback_callback = block
|
49
51
|
end
|
50
52
|
|
51
|
-
def self.on_fallback_callback
|
52
|
-
@on_fallback_callback
|
53
|
+
def self.on_fallback_callback!(key)
|
54
|
+
@on_fallback_callback&.call(key)
|
55
|
+
rescue => e
|
56
|
+
logger.warn("RailsFailover::Redis.on_fallback failed: #{e.class} #{e.message}\n#{e.backtrace.join("\n")}")
|
53
57
|
end
|
54
58
|
|
55
59
|
# For testing
|
@@ -26,7 +26,7 @@ module RailsFailover
|
|
26
26
|
Errno::ETIMEDOUT,
|
27
27
|
Errno::EINVAL => e
|
28
28
|
|
29
|
-
Handler.instance.verify_primary(options)
|
29
|
+
Handler.instance.verify_primary(options) if !is_failover_replica
|
30
30
|
raise e
|
31
31
|
end
|
32
32
|
|
@@ -55,6 +55,10 @@ module RailsFailover
|
|
55
55
|
|
56
56
|
def check(client)
|
57
57
|
Handler.instance.register_client(client)
|
58
|
+
expected_role = Handler.instance.primary_down?(@options) ? REPLICA : PRIMARY
|
59
|
+
if client.connection.rails_failover_role != expected_role
|
60
|
+
raise ::Redis::CannotConnectError, "Opened with unexpected failover role"
|
61
|
+
end
|
58
62
|
end
|
59
63
|
|
60
64
|
def on_disconnect(client)
|
@@ -2,6 +2,7 @@
|
|
2
2
|
|
3
3
|
require 'monitor'
|
4
4
|
require 'singleton'
|
5
|
+
require 'concurrent'
|
5
6
|
|
6
7
|
module RailsFailover
|
7
8
|
class Redis
|
@@ -16,53 +17,51 @@ module RailsFailover
|
|
16
17
|
SOFT_DISCONNECT_POLL_SECONDS = 0.05
|
17
18
|
|
18
19
|
def initialize
|
19
|
-
@primaries_down =
|
20
|
-
@clients =
|
21
|
-
@ancestor_pid = Process.pid
|
20
|
+
@primaries_down = Concurrent::Map.new
|
21
|
+
@clients = Concurrent::Map.new
|
22
22
|
|
23
23
|
super() # Monitor#initialize
|
24
24
|
end
|
25
25
|
|
26
26
|
def verify_primary(options)
|
27
|
+
primary_down(options)
|
28
|
+
|
27
29
|
mon_synchronize do
|
28
|
-
|
29
|
-
|
30
|
+
return if @thread&.alive?
|
31
|
+
logger&.warn "Failover for Redis has been initiated"
|
32
|
+
@thread = Thread.new { loop_until_all_up }
|
30
33
|
end
|
31
34
|
end
|
32
35
|
|
33
|
-
def
|
34
|
-
|
36
|
+
def register_client(client)
|
37
|
+
id = client.options[:id]
|
38
|
+
clients_for_id(id).put_if_absent(client, true)
|
39
|
+
end
|
35
40
|
|
36
|
-
|
41
|
+
def deregister_client(client)
|
42
|
+
id = client.options[:id]
|
43
|
+
clients_for_id(id).delete(client)
|
44
|
+
end
|
37
45
|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
logger&.warn("RailsFailover::Redis.on_failover_callback failed: #{e.class} #{e.message}\n#{e.backtrace.join("\n")}")
|
42
|
-
end
|
46
|
+
def primary_down?(options)
|
47
|
+
primaries_down[options[:id]]
|
48
|
+
end
|
43
49
|
|
44
|
-
|
45
|
-
|
46
|
-
ensure_primary_clients_disconnected
|
47
|
-
try_fallback_to_primary
|
48
|
-
|
49
|
-
if all_primaries_up
|
50
|
-
logger&.warn "Fallback to primary for Redis has been completed."
|
51
|
-
|
52
|
-
begin
|
53
|
-
RailsFailover::Redis.on_fallback_callback&.call
|
54
|
-
rescue => e
|
55
|
-
logger&.warn("RailsFailover::Redis.on_fallback_callback failed: #{e.class} #{e.message}\n#{e.backtrace.join("\n")}")
|
56
|
-
end
|
57
|
-
break
|
58
|
-
end
|
59
|
-
end
|
60
|
-
end
|
50
|
+
def primaries_down_count
|
51
|
+
primaries_down.size
|
61
52
|
end
|
62
53
|
|
63
|
-
|
64
|
-
|
65
|
-
|
54
|
+
private
|
55
|
+
|
56
|
+
def loop_until_all_up
|
57
|
+
loop do
|
58
|
+
ensure_primary_clients_disconnected
|
59
|
+
try_fallback_to_primary
|
60
|
+
|
61
|
+
if all_primaries_up
|
62
|
+
logger&.warn "Fallback to primary for Redis has been completed."
|
63
|
+
break
|
64
|
+
end
|
66
65
|
end
|
67
66
|
end
|
68
67
|
|
@@ -72,7 +71,7 @@ module RailsFailover
|
|
72
71
|
|
73
72
|
active_primaries_keys = {}
|
74
73
|
|
75
|
-
|
74
|
+
primaries_down.each do |key, options|
|
76
75
|
info = nil
|
77
76
|
options = options.dup
|
78
77
|
|
@@ -99,93 +98,58 @@ module RailsFailover
|
|
99
98
|
end
|
100
99
|
end
|
101
100
|
|
102
|
-
def
|
103
|
-
|
104
|
-
|
105
|
-
mon_synchronize do
|
106
|
-
clients[key] ||= []
|
107
|
-
clients[key] << client
|
108
|
-
end
|
101
|
+
def all_primaries_up
|
102
|
+
primaries_down.empty?
|
109
103
|
end
|
110
104
|
|
111
|
-
def
|
112
|
-
|
113
|
-
|
114
|
-
mon_synchronize do
|
115
|
-
if clients[key]
|
116
|
-
clients[key].delete(client)
|
117
|
-
|
118
|
-
if clients[key].empty?
|
119
|
-
clients.delete(key)
|
120
|
-
end
|
121
|
-
end
|
122
|
-
end
|
105
|
+
def primary_up(options)
|
106
|
+
already_up = !primaries_down.delete(options[:id])
|
107
|
+
RailsFailover::Redis.on_fallback_callback!(options[:id]) if !already_up
|
123
108
|
end
|
124
109
|
|
125
|
-
def primary_down
|
126
|
-
|
127
|
-
|
128
|
-
end
|
110
|
+
def primary_down(options)
|
111
|
+
already_down = primaries_down.put_if_absent(options[:id], options.dup)
|
112
|
+
RailsFailover::Redis.on_failover_callback!(options[:id]) if !already_down
|
129
113
|
end
|
130
114
|
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
115
|
+
def primaries_down
|
116
|
+
ancestor_pids = nil
|
117
|
+
value = @primaries_down.compute_if_absent(Process.pid) do
|
118
|
+
ancestor_pids = @primaries_down.keys
|
119
|
+
@primaries_down.values.first || Concurrent::Map.new
|
120
|
+
end
|
136
121
|
|
137
|
-
|
138
|
-
|
139
|
-
primaries_down.delete(options[:id])
|
122
|
+
ancestor_pids&.each do |pid|
|
123
|
+
@primaries_down.delete(pid)&.each { |id, options| verify_primary(options) }
|
140
124
|
end
|
125
|
+
|
126
|
+
value
|
141
127
|
end
|
142
128
|
|
143
|
-
def
|
144
|
-
|
145
|
-
primaries_down[options[:id]] = options.dup
|
146
|
-
end
|
129
|
+
def clients_for_id(id)
|
130
|
+
clients.compute_if_absent(id) { Concurrent::Map.new }
|
147
131
|
end
|
148
132
|
|
149
133
|
def clients
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
if !@clients[process_pid]
|
155
|
-
@clients[process_pid] = {}
|
156
|
-
|
157
|
-
if process_pid != @ancestor_pid
|
158
|
-
@clients.delete(@ancestor_pid)
|
159
|
-
end
|
160
|
-
end
|
161
|
-
|
162
|
-
@clients[process_pid]
|
134
|
+
ancestor_pids = nil
|
135
|
+
clients_for_pid = @clients.compute_if_absent(Process.pid) do
|
136
|
+
ancestor_pids = @clients.keys
|
137
|
+
Concurrent::Map.new
|
163
138
|
end
|
139
|
+
ancestor_pids&.each { |k| @clients.delete(k) }
|
140
|
+
clients_for_pid
|
164
141
|
end
|
165
142
|
|
166
|
-
def
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
mon_synchronize do
|
171
|
-
if !@primaries_down[process_pid]
|
172
|
-
@primaries_down[process_pid] = @primaries_down[@ancestor_pid] || {}
|
173
|
-
|
174
|
-
if process_pid != @ancestor_pid
|
175
|
-
@primaries_down.delete(@ancestor_pid)&.each do |id, options|
|
176
|
-
verify_primary(options)
|
177
|
-
end
|
178
|
-
end
|
179
|
-
end
|
180
|
-
|
181
|
-
@primaries_down[process_pid]
|
143
|
+
def ensure_primary_clients_disconnected
|
144
|
+
primaries_down.each do |key, options|
|
145
|
+
disconnect_clients(options, RailsFailover::Redis::PRIMARY)
|
182
146
|
end
|
183
147
|
end
|
184
148
|
|
185
149
|
def disconnect_clients(options, role)
|
186
|
-
|
150
|
+
id = options[:id]
|
187
151
|
|
188
|
-
matched_clients =
|
152
|
+
matched_clients = clients_for_id(id)&.keys
|
189
153
|
&.filter { |c| c.connection.rails_failover_role == role }
|
190
154
|
&.to_set
|
191
155
|
|
@@ -208,7 +172,11 @@ module RailsFailover
|
|
208
172
|
has_lock = redis.mon_try_enter
|
209
173
|
|
210
174
|
if !has_lock
|
211
|
-
|
175
|
+
begin
|
176
|
+
client.connection.shutdown_socket
|
177
|
+
rescue => e
|
178
|
+
logger&.warn "Redis shutdown_socket for (#{role}) failed with #{e.class} '#{e.message}'"
|
179
|
+
end
|
212
180
|
|
213
181
|
waiting_since = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
214
182
|
loop do # Keep trying
|
data/rails_failover.gemspec
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rails_failover
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.6.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Alan Tan
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-12-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activerecord
|
@@ -38,6 +38,20 @@ dependencies:
|
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '6.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: concurrent-ruby
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
41
55
|
description:
|
42
56
|
email:
|
43
57
|
- tgx@discourse.org
|