network_resiliency 0.3.2 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 19102b63c5f8090d6805ac5ce1d71a25e30ab7483ef11bf5d9cee771bb4ce508
4
- data.tar.gz: a9f37e7fafbfc1d45e0f8b24b655cf72b33300b5b6335e396d9dbffc22479799
3
+ metadata.gz: c372873056610bf21197e55a265bdfd03e200b064c727a4713ea8725ae3d696d
4
+ data.tar.gz: 5da4933543bf9a57d46ed4a3b4d94a692524e641f6c1293d8bcb99aa74c0c9d9
5
5
  SHA512:
6
- metadata.gz: 27390e9651bef755a92a02218c286291971e7c77baabbddd062e3279d5a52f3a819fbbd9b0a9f91e289bd3124afdc028e42caf6b504c54bb198d959ba54dd1f6
7
- data.tar.gz: 56763326a26679f03124a1a4f8b5dab2de10edeeebaff1a4dab8c821ee9b8a035b3b8cf285089bac944c722cc647bcb47264b7fe60295dfbf51d35c4977d2d1f
6
+ metadata.gz: b8e9b66cb83ff5bfda080c43935e8834228cfbd16afd0c1229be2b7edbea8469702d917ae7b6bb7383b774ebf68802e9a5572ee89357723d479d1dd8b3a91f2c
7
+ data.tar.gz: 68f241f0bd9b7ef5b0b291e2e1be11aa3d2c377f6b4b70552ba32638122bff300bc1252b279a13c41e96af3ffa8843bc571b74bd9aac7f1300f27472e18001ff
data/CHANGELOG.md CHANGED
@@ -1,3 +1,8 @@
1
+ ### v0.4.0 (2023-11-13)
2
+ - redis resiliency
3
+ - timeouts_for
4
+ - mode
5
+
1
6
  ### v0.3.2 (2023-11-03)
2
7
  - stats observability
3
8
  - surface errors in specs
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- network_resiliency (0.3.2)
4
+ network_resiliency (0.4.0)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
@@ -36,35 +36,49 @@ module NetworkResiliency
36
36
  end
37
37
 
38
38
  module Instrumentation
39
- # def initialize(...)
40
- # super
41
-
42
- # @network_resiliency_attempts = options[:reconnect_attempts]
43
- # options[:reconnect_attempts] = 0
44
- # end
45
-
46
39
  def establish_connection
47
40
  return super unless NetworkResiliency.enabled?(:redis)
48
41
 
42
+ original_timeout = @options[:connect_timeout]
43
+
44
+ timeouts = NetworkResiliency.timeouts_for(
45
+ adapter: "redis",
46
+ action: "connect",
47
+ destination: host,
48
+ max: original_timeout,
49
+ )
50
+
51
+ attempts = 0
52
+ ts = -NetworkResiliency.timestamp
53
+
49
54
  begin
50
- ts = -NetworkResiliency.timestamp
55
+ attempts += 1
56
+ error = nil
57
+
58
+ @options[:connect_timeout] = timeouts.shift
51
59
 
52
60
  super
53
61
  rescue ::Redis::CannotConnectError => e
54
62
  # capture error
63
+
64
+ # grab underlying exception within Redis wrapper
65
+ error = e.cause.class
66
+
67
+ retry if timeouts.size > 0
68
+
55
69
  raise
56
70
  ensure
57
71
  ts += NetworkResiliency.timestamp
58
-
59
- # grab underlying exception within Redis wrapper
60
- error = e ? e.cause.class : nil
72
+ @options[:connect_timeout] = original_timeout
61
73
 
62
74
  NetworkResiliency.record(
63
75
  adapter: "redis",
64
76
  action: "connect",
65
77
  destination: host,
66
- error: error,
67
78
  duration: ts,
79
+ error: error,
80
+ timeout: @options[:connect_timeout],
81
+ attempts: attempts,
68
82
  )
69
83
  end
70
84
  end
@@ -8,6 +8,14 @@ module NetworkResiliency
8
8
  log10 = Math.log10(self.round)
9
9
  10 ** (ceil ? log10.ceil : log10.floor)
10
10
  end
11
+
12
+ def power_ceil
13
+ return 0 if self <= 0
14
+ return 1 if self <= 1
15
+
16
+ digits = Math.log10(self).floor
17
+ 10 ** digits * (self.to_f / 10 ** digits).ceil
18
+ end
11
19
  end
12
20
  end
13
21
  end
@@ -1,3 +1,3 @@
1
1
  module NetworkResiliency
2
- VERSION = "0.3.2"
2
+ VERSION = "0.4.0"
3
3
  end
@@ -14,6 +14,9 @@ module NetworkResiliency
14
14
  autoload :Postgres, "network_resiliency/adapter/postgres"
15
15
  end
16
16
 
17
+ MODE = [ :observe, :resilient ].freeze
18
+ RESILIENCY_SIZE_THRESHOLD = 1_000
19
+
17
20
  extend self
18
21
 
19
22
  attr_accessor :statsd, :redis
@@ -87,9 +90,21 @@ module NetworkResiliency
87
90
  Process.clock_gettime(Process::CLOCK_MONOTONIC) * 1_000
88
91
  end
89
92
 
93
+ def mode
94
+ @mode || :observe
95
+ end
96
+
97
+ def mode=(mode)
98
+ unless MODE.include?(mode)
99
+ raise ArgumentError, "invalid NetworkResiliency mode: #{mode}"
100
+ end
101
+
102
+ @mode = mode
103
+ end
104
+
90
105
  # private
91
106
 
92
- def record(adapter:, action:, destination:, duration:, error: nil)
107
+ def record(adapter:, action:, destination:, duration:, error:, timeout: nil, attempts: 1)
93
108
  return if ignore_destination?(adapter, action, destination)
94
109
 
95
110
  NetworkResiliency.statsd&.distribution(
@@ -99,6 +114,7 @@ module NetworkResiliency
99
114
  adapter: adapter,
100
115
  destination: destination,
101
116
  error: error,
117
+ attempts: (attempts if attempts > 1),
102
118
  }.compact,
103
119
  )
104
120
 
@@ -112,8 +128,37 @@ module NetworkResiliency
112
128
  }.compact,
113
129
  )
114
130
 
115
- key = [ adapter, action, destination ].join(":")
116
- StatsEngine.add(key, duration).tap do |stats|
131
+ NetworkResiliency.statsd&.gauge(
132
+ "network_resiliency.#{action}.timeout",
133
+ timeout,
134
+ tags: {
135
+ adapter: adapter,
136
+ destination: destination,
137
+ },
138
+ )
139
+
140
+ if error
141
+ NetworkResiliency.statsd&.distribution(
142
+ "network_resiliency.#{action}.time_saved",
143
+ timeout - duration,
144
+ tags: {
145
+ adapter: adapter,
146
+ destination: destination,
147
+ },
148
+ ) if timeout
149
+ else
150
+ # track successful retries
151
+ NetworkResiliency.statsd&.increment(
152
+ "network_resiliency.#{action}.resilient",
153
+ tags: {
154
+ adapter: adapter,
155
+ destination: destination,
156
+ },
157
+ ) if attempts > 1
158
+
159
+ # record stats
160
+ key = [ adapter, action, destination ].join(":")
161
+ stats = StatsEngine.add(key, duration)
117
162
  tags = {
118
163
  adapter: adapter,
119
164
  destination: destination,
@@ -138,6 +183,8 @@ module NetworkResiliency
138
183
  tags: tags,
139
184
  )
140
185
  end
186
+
187
+ nil
141
188
  rescue => e
142
189
  NetworkResiliency.statsd&.increment(
143
190
  "network_resiliency.error",
@@ -157,11 +204,89 @@ module NetworkResiliency
157
204
  IP_ADDRESS_REGEX.match?(destination)
158
205
  end
159
206
 
207
+ def timeouts_for(adapter:, action:, destination:, max: nil)
208
+ default = [ max ]
209
+
210
+ return default if NetworkResiliency.mode == :observe
211
+
212
+ key = [ adapter, action, destination ].join(":")
213
+ stats = StatsEngine.get(key)
214
+
215
+ return default unless stats.n >= RESILIENCY_SIZE_THRESHOLD
216
+
217
+ tags = {
218
+ adapter: adapter,
219
+ action: action,
220
+ destination: destination,
221
+ }
222
+
223
+ p99 = (stats.avg + stats.stdev * 3).power_ceil
224
+ timeouts = []
225
+
226
+ if max
227
+ if p99 < max
228
+ timeouts << p99
229
+
230
+ # fallback attempt
231
+ if max - p99 > p99
232
+ # use remaining time for second attempt
233
+ timeouts << max - p99
234
+ else
235
+ timeouts << max
236
+
237
+ NetworkResiliency.statsd&.increment(
238
+ "network_resiliency.timeout.raised",
239
+ tags: tags,
240
+ )
241
+ end
242
+ else
243
+ # the specified timeout is less than our expected p99...awkward
244
+ timeouts << max
245
+
246
+ NetworkResiliency.statsd&.increment(
247
+ "network_resiliency.timeout.too_low",
248
+ tags: tags,
249
+ )
250
+ end
251
+ else
252
+ timeouts << p99
253
+
254
+ # timeouts << p99 * 10 if NetworkResiliency.mode == :resolute
255
+
256
+ # unbounded second attempt
257
+ timeouts << nil
258
+
259
+ NetworkResiliency.statsd&.increment(
260
+ "network_resiliency.timeout.missing",
261
+ tags: tags,
262
+ )
263
+ end
264
+
265
+ timeouts
266
+ rescue => e
267
+ NetworkResiliency.statsd&.increment(
268
+ "network_resiliency.error",
269
+ tags: {
270
+ method: __method__,
271
+ type: e.class,
272
+ },
273
+ )
274
+
275
+ warn "[ERROR] NetworkResiliency: #{e.class}: #{e.message}"
276
+
277
+ default
278
+ end
279
+
160
280
  def reset
161
281
  @enabled = nil
282
+ @mode = nil
162
283
  Thread.current["network_resiliency"] = nil
163
284
  StatsEngine.reset
164
- @sync_worker.kill if @sync_worker
285
+
286
+ if @sync_worker
287
+ @sync_worker.kill
288
+ @sync_worker = nil
289
+ end
165
290
  end
166
291
 
167
292
  private
@@ -176,13 +301,11 @@ module NetworkResiliency
176
301
  raise "Redis not configured" unless redis
177
302
 
178
303
  @sync_worker = Thread.new do
179
- while true do
304
+ loop do
180
305
  StatsEngine.sync(redis)
181
306
 
182
307
  sleep(3)
183
308
  end
184
- rescue Interrupt
185
- # goodbye
186
309
  end
187
310
  end
188
311
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: network_resiliency
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.2
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Daniel Pepper
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-11-03 00:00:00.000000000 Z
11
+ date: 2023-11-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: byebug