network_resiliency 0.3.2 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/Gemfile.lock +1 -1
- data/lib/network_resiliency/adapter/redis.rb +26 -12
- data/lib/network_resiliency/refinements.rb +8 -0
- data/lib/network_resiliency/version.rb +1 -1
- data/lib/network_resiliency.rb +130 -7
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c372873056610bf21197e55a265bdfd03e200b064c727a4713ea8725ae3d696d
|
4
|
+
data.tar.gz: 5da4933543bf9a57d46ed4a3b4d94a692524e641f6c1293d8bcb99aa74c0c9d9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b8e9b66cb83ff5bfda080c43935e8834228cfbd16afd0c1229be2b7edbea8469702d917ae7b6bb7383b774ebf68802e9a5572ee89357723d479d1dd8b3a91f2c
|
7
|
+
data.tar.gz: 68f241f0bd9b7ef5b0b291e2e1be11aa3d2c377f6b4b70552ba32638122bff300bc1252b279a13c41e96af3ffa8843bc571b74bd9aac7f1300f27472e18001ff
|
data/CHANGELOG.md
CHANGED
data/Gemfile.lock
CHANGED
@@ -36,35 +36,49 @@ module NetworkResiliency
|
|
36
36
|
end
|
37
37
|
|
38
38
|
module Instrumentation
|
39
|
-
# def initialize(...)
|
40
|
-
# super
|
41
|
-
|
42
|
-
# @network_resiliency_attempts = options[:reconnect_attempts]
|
43
|
-
# options[:reconnect_attempts] = 0
|
44
|
-
# end
|
45
|
-
|
46
39
|
def establish_connection
|
47
40
|
return super unless NetworkResiliency.enabled?(:redis)
|
48
41
|
|
42
|
+
original_timeout = @options[:connect_timeout]
|
43
|
+
|
44
|
+
timeouts = NetworkResiliency.timeouts_for(
|
45
|
+
adapter: "redis",
|
46
|
+
action: "connect",
|
47
|
+
destination: host,
|
48
|
+
max: original_timeout,
|
49
|
+
)
|
50
|
+
|
51
|
+
attempts = 0
|
52
|
+
ts = -NetworkResiliency.timestamp
|
53
|
+
|
49
54
|
begin
|
50
|
-
|
55
|
+
attempts += 1
|
56
|
+
error = nil
|
57
|
+
|
58
|
+
@options[:connect_timeout] = timeouts.shift
|
51
59
|
|
52
60
|
super
|
53
61
|
rescue ::Redis::CannotConnectError => e
|
54
62
|
# capture error
|
63
|
+
|
64
|
+
# grab underlying exception within Redis wrapper
|
65
|
+
error = e.cause.class
|
66
|
+
|
67
|
+
retry if timeouts.size > 0
|
68
|
+
|
55
69
|
raise
|
56
70
|
ensure
|
57
71
|
ts += NetworkResiliency.timestamp
|
58
|
-
|
59
|
-
# grab underlying exception within Redis wrapper
|
60
|
-
error = e ? e.cause.class : nil
|
72
|
+
@options[:connect_timeout] = original_timeout
|
61
73
|
|
62
74
|
NetworkResiliency.record(
|
63
75
|
adapter: "redis",
|
64
76
|
action: "connect",
|
65
77
|
destination: host,
|
66
|
-
error: error,
|
67
78
|
duration: ts,
|
79
|
+
error: error,
|
80
|
+
timeout: @options[:connect_timeout],
|
81
|
+
attempts: attempts,
|
68
82
|
)
|
69
83
|
end
|
70
84
|
end
|
@@ -8,6 +8,14 @@ module NetworkResiliency
|
|
8
8
|
log10 = Math.log10(self.round)
|
9
9
|
10 ** (ceil ? log10.ceil : log10.floor)
|
10
10
|
end
|
11
|
+
|
12
|
+
def power_ceil
|
13
|
+
return 0 if self <= 0
|
14
|
+
return 1 if self <= 1
|
15
|
+
|
16
|
+
digits = Math.log10(self).floor
|
17
|
+
10 ** digits * (self.to_f / 10 ** digits).ceil
|
18
|
+
end
|
11
19
|
end
|
12
20
|
end
|
13
21
|
end
|
data/lib/network_resiliency.rb
CHANGED
@@ -14,6 +14,9 @@ module NetworkResiliency
|
|
14
14
|
autoload :Postgres, "network_resiliency/adapter/postgres"
|
15
15
|
end
|
16
16
|
|
17
|
+
MODE = [ :observe, :resilient ].freeze
|
18
|
+
RESILIENCY_SIZE_THRESHOLD = 1_000
|
19
|
+
|
17
20
|
extend self
|
18
21
|
|
19
22
|
attr_accessor :statsd, :redis
|
@@ -87,9 +90,21 @@ module NetworkResiliency
|
|
87
90
|
Process.clock_gettime(Process::CLOCK_MONOTONIC) * 1_000
|
88
91
|
end
|
89
92
|
|
93
|
+
def mode
|
94
|
+
@mode || :observe
|
95
|
+
end
|
96
|
+
|
97
|
+
def mode=(mode)
|
98
|
+
unless MODE.include?(mode)
|
99
|
+
raise ArgumentError, "invalid NetworkResiliency mode: #{mode}"
|
100
|
+
end
|
101
|
+
|
102
|
+
@mode = mode
|
103
|
+
end
|
104
|
+
|
90
105
|
# private
|
91
106
|
|
92
|
-
def record(adapter:, action:, destination:, duration:, error: nil)
|
107
|
+
def record(adapter:, action:, destination:, duration:, error:, timeout: nil, attempts: 1)
|
93
108
|
return if ignore_destination?(adapter, action, destination)
|
94
109
|
|
95
110
|
NetworkResiliency.statsd&.distribution(
|
@@ -99,6 +114,7 @@ module NetworkResiliency
|
|
99
114
|
adapter: adapter,
|
100
115
|
destination: destination,
|
101
116
|
error: error,
|
117
|
+
attempts: (attempts if attempts > 1),
|
102
118
|
}.compact,
|
103
119
|
)
|
104
120
|
|
@@ -112,8 +128,37 @@ module NetworkResiliency
|
|
112
128
|
}.compact,
|
113
129
|
)
|
114
130
|
|
115
|
-
|
116
|
-
|
131
|
+
NetworkResiliency.statsd&.gauge(
|
132
|
+
"network_resiliency.#{action}.timeout",
|
133
|
+
timeout,
|
134
|
+
tags: {
|
135
|
+
adapter: adapter,
|
136
|
+
destination: destination,
|
137
|
+
},
|
138
|
+
)
|
139
|
+
|
140
|
+
if error
|
141
|
+
NetworkResiliency.statsd&.distribution(
|
142
|
+
"network_resiliency.#{action}.time_saved",
|
143
|
+
timeout - duration,
|
144
|
+
tags: {
|
145
|
+
adapter: adapter,
|
146
|
+
destination: destination,
|
147
|
+
},
|
148
|
+
) if timeout
|
149
|
+
else
|
150
|
+
# track successful retries
|
151
|
+
NetworkResiliency.statsd&.increment(
|
152
|
+
"network_resiliency.#{action}.resilient",
|
153
|
+
tags: {
|
154
|
+
adapter: adapter,
|
155
|
+
destination: destination,
|
156
|
+
},
|
157
|
+
) if attempts > 1
|
158
|
+
|
159
|
+
# record stats
|
160
|
+
key = [ adapter, action, destination ].join(":")
|
161
|
+
stats = StatsEngine.add(key, duration)
|
117
162
|
tags = {
|
118
163
|
adapter: adapter,
|
119
164
|
destination: destination,
|
@@ -138,6 +183,8 @@ module NetworkResiliency
|
|
138
183
|
tags: tags,
|
139
184
|
)
|
140
185
|
end
|
186
|
+
|
187
|
+
nil
|
141
188
|
rescue => e
|
142
189
|
NetworkResiliency.statsd&.increment(
|
143
190
|
"network_resiliency.error",
|
@@ -157,11 +204,89 @@ module NetworkResiliency
|
|
157
204
|
IP_ADDRESS_REGEX.match?(destination)
|
158
205
|
end
|
159
206
|
|
207
|
+
def timeouts_for(adapter:, action:, destination:, max: nil)
|
208
|
+
default = [ max ]
|
209
|
+
|
210
|
+
return default if NetworkResiliency.mode == :observe
|
211
|
+
|
212
|
+
key = [ adapter, action, destination ].join(":")
|
213
|
+
stats = StatsEngine.get(key)
|
214
|
+
|
215
|
+
return default unless stats.n >= RESILIENCY_SIZE_THRESHOLD
|
216
|
+
|
217
|
+
tags = {
|
218
|
+
adapter: adapter,
|
219
|
+
action: action,
|
220
|
+
destination: destination,
|
221
|
+
}
|
222
|
+
|
223
|
+
p99 = (stats.avg + stats.stdev * 3).power_ceil
|
224
|
+
timeouts = []
|
225
|
+
|
226
|
+
if max
|
227
|
+
if p99 < max
|
228
|
+
timeouts << p99
|
229
|
+
|
230
|
+
# fallback attempt
|
231
|
+
if max - p99 > p99
|
232
|
+
# use remaining time for second attempt
|
233
|
+
timeouts << max - p99
|
234
|
+
else
|
235
|
+
timeouts << max
|
236
|
+
|
237
|
+
NetworkResiliency.statsd&.increment(
|
238
|
+
"network_resiliency.timeout.raised",
|
239
|
+
tags: tags,
|
240
|
+
)
|
241
|
+
end
|
242
|
+
else
|
243
|
+
# the specified timeout is less than our expected p99...awkward
|
244
|
+
timeouts << max
|
245
|
+
|
246
|
+
NetworkResiliency.statsd&.increment(
|
247
|
+
"network_resiliency.timeout.too_low",
|
248
|
+
tags: tags,
|
249
|
+
)
|
250
|
+
end
|
251
|
+
else
|
252
|
+
timeouts << p99
|
253
|
+
|
254
|
+
# timeouts << p99 * 10 if NetworkResiliency.mode == :resolute
|
255
|
+
|
256
|
+
# unbounded second attempt
|
257
|
+
timeouts << nil
|
258
|
+
|
259
|
+
NetworkResiliency.statsd&.increment(
|
260
|
+
"network_resiliency.timeout.missing",
|
261
|
+
tags: tags,
|
262
|
+
)
|
263
|
+
end
|
264
|
+
|
265
|
+
timeouts
|
266
|
+
rescue => e
|
267
|
+
NetworkResiliency.statsd&.increment(
|
268
|
+
"network_resiliency.error",
|
269
|
+
tags: {
|
270
|
+
method: __method__,
|
271
|
+
type: e.class,
|
272
|
+
},
|
273
|
+
)
|
274
|
+
|
275
|
+
warn "[ERROR] NetworkResiliency: #{e.class}: #{e.message}"
|
276
|
+
|
277
|
+
default
|
278
|
+
end
|
279
|
+
|
160
280
|
def reset
|
161
281
|
@enabled = nil
|
282
|
+
@mode = nil
|
162
283
|
Thread.current["network_resiliency"] = nil
|
163
284
|
StatsEngine.reset
|
164
|
-
|
285
|
+
|
286
|
+
if @sync_worker
|
287
|
+
@sync_worker.kill
|
288
|
+
@sync_worker = nil
|
289
|
+
end
|
165
290
|
end
|
166
291
|
|
167
292
|
private
|
@@ -176,13 +301,11 @@ module NetworkResiliency
|
|
176
301
|
raise "Redis not configured" unless redis
|
177
302
|
|
178
303
|
@sync_worker = Thread.new do
|
179
|
-
|
304
|
+
loop do
|
180
305
|
StatsEngine.sync(redis)
|
181
306
|
|
182
307
|
sleep(3)
|
183
308
|
end
|
184
|
-
rescue Interrupt
|
185
|
-
# goodbye
|
186
309
|
end
|
187
310
|
end
|
188
311
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: network_resiliency
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Daniel Pepper
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-11-
|
11
|
+
date: 2023-11-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: byebug
|