network_resiliency 0.3.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 19102b63c5f8090d6805ac5ce1d71a25e30ab7483ef11bf5d9cee771bb4ce508
4
- data.tar.gz: a9f37e7fafbfc1d45e0f8b24b655cf72b33300b5b6335e396d9dbffc22479799
3
+ metadata.gz: c372873056610bf21197e55a265bdfd03e200b064c727a4713ea8725ae3d696d
4
+ data.tar.gz: 5da4933543bf9a57d46ed4a3b4d94a692524e641f6c1293d8bcb99aa74c0c9d9
5
5
  SHA512:
6
- metadata.gz: 27390e9651bef755a92a02218c286291971e7c77baabbddd062e3279d5a52f3a819fbbd9b0a9f91e289bd3124afdc028e42caf6b504c54bb198d959ba54dd1f6
7
- data.tar.gz: 56763326a26679f03124a1a4f8b5dab2de10edeeebaff1a4dab8c821ee9b8a035b3b8cf285089bac944c722cc647bcb47264b7fe60295dfbf51d35c4977d2d1f
6
+ metadata.gz: b8e9b66cb83ff5bfda080c43935e8834228cfbd16afd0c1229be2b7edbea8469702d917ae7b6bb7383b774ebf68802e9a5572ee89357723d479d1dd8b3a91f2c
7
+ data.tar.gz: 68f241f0bd9b7ef5b0b291e2e1be11aa3d2c377f6b4b70552ba32638122bff300bc1252b279a13c41e96af3ffa8843bc571b74bd9aac7f1300f27472e18001ff
data/CHANGELOG.md CHANGED
@@ -1,3 +1,8 @@
1
+ ### v0.4.0 (2023-11-13)
2
+ - redis resiliency
3
+ - timeouts_for
4
+ - mode
5
+
1
6
  ### v0.3.2 (2023-11-03)
2
7
  - stats observability
3
8
  - surface errors in specs
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- network_resiliency (0.3.2)
4
+ network_resiliency (0.4.0)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
@@ -36,35 +36,49 @@ module NetworkResiliency
36
36
  end
37
37
 
38
38
  module Instrumentation
39
- # def initialize(...)
40
- # super
41
-
42
- # @network_resiliency_attempts = options[:reconnect_attempts]
43
- # options[:reconnect_attempts] = 0
44
- # end
45
-
46
39
  def establish_connection
47
40
  return super unless NetworkResiliency.enabled?(:redis)
48
41
 
42
+ original_timeout = @options[:connect_timeout]
43
+
44
+ timeouts = NetworkResiliency.timeouts_for(
45
+ adapter: "redis",
46
+ action: "connect",
47
+ destination: host,
48
+ max: original_timeout,
49
+ )
50
+
51
+ attempts = 0
52
+ ts = -NetworkResiliency.timestamp
53
+
49
54
  begin
50
- ts = -NetworkResiliency.timestamp
55
+ attempts += 1
56
+ error = nil
57
+
58
+ @options[:connect_timeout] = timeouts.shift
51
59
 
52
60
  super
53
61
  rescue ::Redis::CannotConnectError => e
54
62
  # capture error
63
+
64
+ # grab underlying exception within Redis wrapper
65
+ error = e.cause.class
66
+
67
+ retry if timeouts.size > 0
68
+
55
69
  raise
56
70
  ensure
57
71
  ts += NetworkResiliency.timestamp
58
-
59
- # grab underlying exception within Redis wrapper
60
- error = e ? e.cause.class : nil
72
+ @options[:connect_timeout] = original_timeout
61
73
 
62
74
  NetworkResiliency.record(
63
75
  adapter: "redis",
64
76
  action: "connect",
65
77
  destination: host,
66
- error: error,
67
78
  duration: ts,
79
+ error: error,
80
+ timeout: @options[:connect_timeout],
81
+ attempts: attempts,
68
82
  )
69
83
  end
70
84
  end
@@ -8,6 +8,14 @@ module NetworkResiliency
8
8
  log10 = Math.log10(self.round)
9
9
  10 ** (ceil ? log10.ceil : log10.floor)
10
10
  end
11
+
12
+ def power_ceil
13
+ return 0 if self <= 0
14
+ return 1 if self <= 1
15
+
16
+ digits = Math.log10(self).floor
17
+ 10 ** digits * (self.to_f / 10 ** digits).ceil
18
+ end
11
19
  end
12
20
  end
13
21
  end
@@ -1,3 +1,3 @@
1
1
  module NetworkResiliency
2
- VERSION = "0.3.2"
2
+ VERSION = "0.4.0"
3
3
  end
@@ -14,6 +14,9 @@ module NetworkResiliency
14
14
  autoload :Postgres, "network_resiliency/adapter/postgres"
15
15
  end
16
16
 
17
+ MODE = [ :observe, :resilient ].freeze
18
+ RESILIENCY_SIZE_THRESHOLD = 1_000
19
+
17
20
  extend self
18
21
 
19
22
  attr_accessor :statsd, :redis
@@ -87,9 +90,21 @@ module NetworkResiliency
87
90
  Process.clock_gettime(Process::CLOCK_MONOTONIC) * 1_000
88
91
  end
89
92
 
93
+ def mode
94
+ @mode || :observe
95
+ end
96
+
97
+ def mode=(mode)
98
+ unless MODE.include?(mode)
99
+ raise ArgumentError, "invalid NetworkResiliency mode: #{mode}"
100
+ end
101
+
102
+ @mode = mode
103
+ end
104
+
90
105
  # private
91
106
 
92
- def record(adapter:, action:, destination:, duration:, error: nil)
107
+ def record(adapter:, action:, destination:, duration:, error:, timeout: nil, attempts: 1)
93
108
  return if ignore_destination?(adapter, action, destination)
94
109
 
95
110
  NetworkResiliency.statsd&.distribution(
@@ -99,6 +114,7 @@ module NetworkResiliency
99
114
  adapter: adapter,
100
115
  destination: destination,
101
116
  error: error,
117
+ attempts: (attempts if attempts > 1),
102
118
  }.compact,
103
119
  )
104
120
 
@@ -112,8 +128,37 @@ module NetworkResiliency
112
128
  }.compact,
113
129
  )
114
130
 
115
- key = [ adapter, action, destination ].join(":")
116
- StatsEngine.add(key, duration).tap do |stats|
131
+ NetworkResiliency.statsd&.gauge(
132
+ "network_resiliency.#{action}.timeout",
133
+ timeout,
134
+ tags: {
135
+ adapter: adapter,
136
+ destination: destination,
137
+ },
138
+ )
139
+
140
+ if error
141
+ NetworkResiliency.statsd&.distribution(
142
+ "network_resiliency.#{action}.time_saved",
143
+ timeout - duration,
144
+ tags: {
145
+ adapter: adapter,
146
+ destination: destination,
147
+ },
148
+ ) if timeout
149
+ else
150
+ # track successful retries
151
+ NetworkResiliency.statsd&.increment(
152
+ "network_resiliency.#{action}.resilient",
153
+ tags: {
154
+ adapter: adapter,
155
+ destination: destination,
156
+ },
157
+ ) if attempts > 1
158
+
159
+ # record stats
160
+ key = [ adapter, action, destination ].join(":")
161
+ stats = StatsEngine.add(key, duration)
117
162
  tags = {
118
163
  adapter: adapter,
119
164
  destination: destination,
@@ -138,6 +183,8 @@ module NetworkResiliency
138
183
  tags: tags,
139
184
  )
140
185
  end
186
+
187
+ nil
141
188
  rescue => e
142
189
  NetworkResiliency.statsd&.increment(
143
190
  "network_resiliency.error",
@@ -157,11 +204,89 @@ module NetworkResiliency
157
204
  IP_ADDRESS_REGEX.match?(destination)
158
205
  end
159
206
 
207
+ def timeouts_for(adapter:, action:, destination:, max: nil)
208
+ default = [ max ]
209
+
210
+ return default if NetworkResiliency.mode == :observe
211
+
212
+ key = [ adapter, action, destination ].join(":")
213
+ stats = StatsEngine.get(key)
214
+
215
+ return default unless stats.n >= RESILIENCY_SIZE_THRESHOLD
216
+
217
+ tags = {
218
+ adapter: adapter,
219
+ action: action,
220
+ destination: destination,
221
+ }
222
+
223
+ p99 = (stats.avg + stats.stdev * 3).power_ceil
224
+ timeouts = []
225
+
226
+ if max
227
+ if p99 < max
228
+ timeouts << p99
229
+
230
+ # fallback attempt
231
+ if max - p99 > p99
232
+ # use remaining time for second attempt
233
+ timeouts << max - p99
234
+ else
235
+ timeouts << max
236
+
237
+ NetworkResiliency.statsd&.increment(
238
+ "network_resiliency.timeout.raised",
239
+ tags: tags,
240
+ )
241
+ end
242
+ else
243
+ # the specified timeout is less than our expected p99...awkward
244
+ timeouts << max
245
+
246
+ NetworkResiliency.statsd&.increment(
247
+ "network_resiliency.timeout.too_low",
248
+ tags: tags,
249
+ )
250
+ end
251
+ else
252
+ timeouts << p99
253
+
254
+ # timeouts << p99 * 10 if NetworkResiliency.mode == :resolute
255
+
256
+ # unbounded second attempt
257
+ timeouts << nil
258
+
259
+ NetworkResiliency.statsd&.increment(
260
+ "network_resiliency.timeout.missing",
261
+ tags: tags,
262
+ )
263
+ end
264
+
265
+ timeouts
266
+ rescue => e
267
+ NetworkResiliency.statsd&.increment(
268
+ "network_resiliency.error",
269
+ tags: {
270
+ method: __method__,
271
+ type: e.class,
272
+ },
273
+ )
274
+
275
+ warn "[ERROR] NetworkResiliency: #{e.class}: #{e.message}"
276
+
277
+ default
278
+ end
279
+
160
280
  def reset
161
281
  @enabled = nil
282
+ @mode = nil
162
283
  Thread.current["network_resiliency"] = nil
163
284
  StatsEngine.reset
164
- @sync_worker.kill if @sync_worker
285
+
286
+ if @sync_worker
287
+ @sync_worker.kill
288
+ @sync_worker = nil
289
+ end
165
290
  end
166
291
 
167
292
  private
@@ -176,13 +301,11 @@ module NetworkResiliency
176
301
  raise "Redis not configured" unless redis
177
302
 
178
303
  @sync_worker = Thread.new do
179
- while true do
304
+ loop do
180
305
  StatsEngine.sync(redis)
181
306
 
182
307
  sleep(3)
183
308
  end
184
- rescue Interrupt
185
- # goodbye
186
309
  end
187
310
  end
188
311
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: network_resiliency
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.2
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Daniel Pepper
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-11-03 00:00:00.000000000 Z
11
+ date: 2023-11-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: byebug