network_resiliency 0.3.2 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/Gemfile.lock +1 -1
- data/lib/network_resiliency/adapter/redis.rb +26 -12
- data/lib/network_resiliency/refinements.rb +8 -0
- data/lib/network_resiliency/version.rb +1 -1
- data/lib/network_resiliency.rb +130 -7
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c372873056610bf21197e55a265bdfd03e200b064c727a4713ea8725ae3d696d
|
4
|
+
data.tar.gz: 5da4933543bf9a57d46ed4a3b4d94a692524e641f6c1293d8bcb99aa74c0c9d9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b8e9b66cb83ff5bfda080c43935e8834228cfbd16afd0c1229be2b7edbea8469702d917ae7b6bb7383b774ebf68802e9a5572ee89357723d479d1dd8b3a91f2c
|
7
|
+
data.tar.gz: 68f241f0bd9b7ef5b0b291e2e1be11aa3d2c377f6b4b70552ba32638122bff300bc1252b279a13c41e96af3ffa8843bc571b74bd9aac7f1300f27472e18001ff
|
data/CHANGELOG.md
CHANGED
data/Gemfile.lock
CHANGED
@@ -36,35 +36,49 @@ module NetworkResiliency
|
|
36
36
|
end
|
37
37
|
|
38
38
|
module Instrumentation
|
39
|
-
# def initialize(...)
|
40
|
-
# super
|
41
|
-
|
42
|
-
# @network_resiliency_attempts = options[:reconnect_attempts]
|
43
|
-
# options[:reconnect_attempts] = 0
|
44
|
-
# end
|
45
|
-
|
46
39
|
def establish_connection
|
47
40
|
return super unless NetworkResiliency.enabled?(:redis)
|
48
41
|
|
42
|
+
original_timeout = @options[:connect_timeout]
|
43
|
+
|
44
|
+
timeouts = NetworkResiliency.timeouts_for(
|
45
|
+
adapter: "redis",
|
46
|
+
action: "connect",
|
47
|
+
destination: host,
|
48
|
+
max: original_timeout,
|
49
|
+
)
|
50
|
+
|
51
|
+
attempts = 0
|
52
|
+
ts = -NetworkResiliency.timestamp
|
53
|
+
|
49
54
|
begin
|
50
|
-
|
55
|
+
attempts += 1
|
56
|
+
error = nil
|
57
|
+
|
58
|
+
@options[:connect_timeout] = timeouts.shift
|
51
59
|
|
52
60
|
super
|
53
61
|
rescue ::Redis::CannotConnectError => e
|
54
62
|
# capture error
|
63
|
+
|
64
|
+
# grab underlying exception within Redis wrapper
|
65
|
+
error = e.cause.class
|
66
|
+
|
67
|
+
retry if timeouts.size > 0
|
68
|
+
|
55
69
|
raise
|
56
70
|
ensure
|
57
71
|
ts += NetworkResiliency.timestamp
|
58
|
-
|
59
|
-
# grab underlying exception within Redis wrapper
|
60
|
-
error = e ? e.cause.class : nil
|
72
|
+
@options[:connect_timeout] = original_timeout
|
61
73
|
|
62
74
|
NetworkResiliency.record(
|
63
75
|
adapter: "redis",
|
64
76
|
action: "connect",
|
65
77
|
destination: host,
|
66
|
-
error: error,
|
67
78
|
duration: ts,
|
79
|
+
error: error,
|
80
|
+
timeout: @options[:connect_timeout],
|
81
|
+
attempts: attempts,
|
68
82
|
)
|
69
83
|
end
|
70
84
|
end
|
@@ -8,6 +8,14 @@ module NetworkResiliency
|
|
8
8
|
log10 = Math.log10(self.round)
|
9
9
|
10 ** (ceil ? log10.ceil : log10.floor)
|
10
10
|
end
|
11
|
+
|
12
|
+
def power_ceil
|
13
|
+
return 0 if self <= 0
|
14
|
+
return 1 if self <= 1
|
15
|
+
|
16
|
+
digits = Math.log10(self).floor
|
17
|
+
10 ** digits * (self.to_f / 10 ** digits).ceil
|
18
|
+
end
|
11
19
|
end
|
12
20
|
end
|
13
21
|
end
|
data/lib/network_resiliency.rb
CHANGED
@@ -14,6 +14,9 @@ module NetworkResiliency
|
|
14
14
|
autoload :Postgres, "network_resiliency/adapter/postgres"
|
15
15
|
end
|
16
16
|
|
17
|
+
MODE = [ :observe, :resilient ].freeze
|
18
|
+
RESILIENCY_SIZE_THRESHOLD = 1_000
|
19
|
+
|
17
20
|
extend self
|
18
21
|
|
19
22
|
attr_accessor :statsd, :redis
|
@@ -87,9 +90,21 @@ module NetworkResiliency
|
|
87
90
|
Process.clock_gettime(Process::CLOCK_MONOTONIC) * 1_000
|
88
91
|
end
|
89
92
|
|
93
|
+
def mode
|
94
|
+
@mode || :observe
|
95
|
+
end
|
96
|
+
|
97
|
+
def mode=(mode)
|
98
|
+
unless MODE.include?(mode)
|
99
|
+
raise ArgumentError, "invalid NetworkResiliency mode: #{mode}"
|
100
|
+
end
|
101
|
+
|
102
|
+
@mode = mode
|
103
|
+
end
|
104
|
+
|
90
105
|
# private
|
91
106
|
|
92
|
-
def record(adapter:, action:, destination:, duration:, error: nil)
|
107
|
+
def record(adapter:, action:, destination:, duration:, error:, timeout: nil, attempts: 1)
|
93
108
|
return if ignore_destination?(adapter, action, destination)
|
94
109
|
|
95
110
|
NetworkResiliency.statsd&.distribution(
|
@@ -99,6 +114,7 @@ module NetworkResiliency
|
|
99
114
|
adapter: adapter,
|
100
115
|
destination: destination,
|
101
116
|
error: error,
|
117
|
+
attempts: (attempts if attempts > 1),
|
102
118
|
}.compact,
|
103
119
|
)
|
104
120
|
|
@@ -112,8 +128,37 @@ module NetworkResiliency
|
|
112
128
|
}.compact,
|
113
129
|
)
|
114
130
|
|
115
|
-
|
116
|
-
|
131
|
+
NetworkResiliency.statsd&.gauge(
|
132
|
+
"network_resiliency.#{action}.timeout",
|
133
|
+
timeout,
|
134
|
+
tags: {
|
135
|
+
adapter: adapter,
|
136
|
+
destination: destination,
|
137
|
+
},
|
138
|
+
)
|
139
|
+
|
140
|
+
if error
|
141
|
+
NetworkResiliency.statsd&.distribution(
|
142
|
+
"network_resiliency.#{action}.time_saved",
|
143
|
+
timeout - duration,
|
144
|
+
tags: {
|
145
|
+
adapter: adapter,
|
146
|
+
destination: destination,
|
147
|
+
},
|
148
|
+
) if timeout
|
149
|
+
else
|
150
|
+
# track successful retries
|
151
|
+
NetworkResiliency.statsd&.increment(
|
152
|
+
"network_resiliency.#{action}.resilient",
|
153
|
+
tags: {
|
154
|
+
adapter: adapter,
|
155
|
+
destination: destination,
|
156
|
+
},
|
157
|
+
) if attempts > 1
|
158
|
+
|
159
|
+
# record stats
|
160
|
+
key = [ adapter, action, destination ].join(":")
|
161
|
+
stats = StatsEngine.add(key, duration)
|
117
162
|
tags = {
|
118
163
|
adapter: adapter,
|
119
164
|
destination: destination,
|
@@ -138,6 +183,8 @@ module NetworkResiliency
|
|
138
183
|
tags: tags,
|
139
184
|
)
|
140
185
|
end
|
186
|
+
|
187
|
+
nil
|
141
188
|
rescue => e
|
142
189
|
NetworkResiliency.statsd&.increment(
|
143
190
|
"network_resiliency.error",
|
@@ -157,11 +204,89 @@ module NetworkResiliency
|
|
157
204
|
IP_ADDRESS_REGEX.match?(destination)
|
158
205
|
end
|
159
206
|
|
207
|
+
def timeouts_for(adapter:, action:, destination:, max: nil)
|
208
|
+
default = [ max ]
|
209
|
+
|
210
|
+
return default if NetworkResiliency.mode == :observe
|
211
|
+
|
212
|
+
key = [ adapter, action, destination ].join(":")
|
213
|
+
stats = StatsEngine.get(key)
|
214
|
+
|
215
|
+
return default unless stats.n >= RESILIENCY_SIZE_THRESHOLD
|
216
|
+
|
217
|
+
tags = {
|
218
|
+
adapter: adapter,
|
219
|
+
action: action,
|
220
|
+
destination: destination,
|
221
|
+
}
|
222
|
+
|
223
|
+
p99 = (stats.avg + stats.stdev * 3).power_ceil
|
224
|
+
timeouts = []
|
225
|
+
|
226
|
+
if max
|
227
|
+
if p99 < max
|
228
|
+
timeouts << p99
|
229
|
+
|
230
|
+
# fallback attempt
|
231
|
+
if max - p99 > p99
|
232
|
+
# use remaining time for second attempt
|
233
|
+
timeouts << max - p99
|
234
|
+
else
|
235
|
+
timeouts << max
|
236
|
+
|
237
|
+
NetworkResiliency.statsd&.increment(
|
238
|
+
"network_resiliency.timeout.raised",
|
239
|
+
tags: tags,
|
240
|
+
)
|
241
|
+
end
|
242
|
+
else
|
243
|
+
# the specified timeout is less than our expected p99...awkward
|
244
|
+
timeouts << max
|
245
|
+
|
246
|
+
NetworkResiliency.statsd&.increment(
|
247
|
+
"network_resiliency.timeout.too_low",
|
248
|
+
tags: tags,
|
249
|
+
)
|
250
|
+
end
|
251
|
+
else
|
252
|
+
timeouts << p99
|
253
|
+
|
254
|
+
# timeouts << p99 * 10 if NetworkResiliency.mode == :resolute
|
255
|
+
|
256
|
+
# unbounded second attempt
|
257
|
+
timeouts << nil
|
258
|
+
|
259
|
+
NetworkResiliency.statsd&.increment(
|
260
|
+
"network_resiliency.timeout.missing",
|
261
|
+
tags: tags,
|
262
|
+
)
|
263
|
+
end
|
264
|
+
|
265
|
+
timeouts
|
266
|
+
rescue => e
|
267
|
+
NetworkResiliency.statsd&.increment(
|
268
|
+
"network_resiliency.error",
|
269
|
+
tags: {
|
270
|
+
method: __method__,
|
271
|
+
type: e.class,
|
272
|
+
},
|
273
|
+
)
|
274
|
+
|
275
|
+
warn "[ERROR] NetworkResiliency: #{e.class}: #{e.message}"
|
276
|
+
|
277
|
+
default
|
278
|
+
end
|
279
|
+
|
160
280
|
def reset
|
161
281
|
@enabled = nil
|
282
|
+
@mode = nil
|
162
283
|
Thread.current["network_resiliency"] = nil
|
163
284
|
StatsEngine.reset
|
164
|
-
|
285
|
+
|
286
|
+
if @sync_worker
|
287
|
+
@sync_worker.kill
|
288
|
+
@sync_worker = nil
|
289
|
+
end
|
165
290
|
end
|
166
291
|
|
167
292
|
private
|
@@ -176,13 +301,11 @@ module NetworkResiliency
|
|
176
301
|
raise "Redis not configured" unless redis
|
177
302
|
|
178
303
|
@sync_worker = Thread.new do
|
179
|
-
|
304
|
+
loop do
|
180
305
|
StatsEngine.sync(redis)
|
181
306
|
|
182
307
|
sleep(3)
|
183
308
|
end
|
184
|
-
rescue Interrupt
|
185
|
-
# goodbye
|
186
309
|
end
|
187
310
|
end
|
188
311
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: network_resiliency
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Daniel Pepper
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-11-
|
11
|
+
date: 2023-11-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: byebug
|