network_resiliency 0.3.2 → 0.4.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 19102b63c5f8090d6805ac5ce1d71a25e30ab7483ef11bf5d9cee771bb4ce508
4
- data.tar.gz: a9f37e7fafbfc1d45e0f8b24b655cf72b33300b5b6335e396d9dbffc22479799
3
+ metadata.gz: 8949456e392f5198caa080238dff62a567114a9386d3a6f575fdb855fd5b5e98
4
+ data.tar.gz: 95d58f753ccb7f4aa5ffe1093d98d0f5e33016bb7e06caa111a181d29c8ac5fb
5
5
  SHA512:
6
- metadata.gz: 27390e9651bef755a92a02218c286291971e7c77baabbddd062e3279d5a52f3a819fbbd9b0a9f91e289bd3124afdc028e42caf6b504c54bb198d959ba54dd1f6
7
- data.tar.gz: 56763326a26679f03124a1a4f8b5dab2de10edeeebaff1a4dab8c821ee9b8a035b3b8cf285089bac944c722cc647bcb47264b7fe60295dfbf51d35c4977d2d1f
6
+ metadata.gz: d1544a231ebcdd48c6838a8b398f3d4463c9cae2e18182c8e28fda3c4cfeba51b75b6e355342f98a16a51979da035d65040dffdd83e1bcd0273c636f14332236
7
+ data.tar.gz: a90be5d5152dee3ce23ef7d682ce62dd495293e429d06494e5dcc91eb9cf3bf4a0bcf26b1cece0665815ab181394feff48016c9781e81b69408de3dca1572ec1
data/CHANGELOG.md CHANGED
@@ -1,3 +1,11 @@
1
+ ### v0.4.1 (2023-11-14)
2
+ - timeout units
3
+
4
+ ### v0.4.0 (2023-11-13)
5
+ - redis resiliency
6
+ - timeouts_for
7
+ - mode
8
+
1
9
  ### v0.3.2 (2023-11-03)
2
10
  - stats observability
3
11
  - surface errors in specs
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- network_resiliency (0.3.2)
4
+ network_resiliency (0.4.1)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
@@ -36,35 +36,50 @@ module NetworkResiliency
36
36
  end
37
37
 
38
38
  module Instrumentation
39
- # def initialize(...)
40
- # super
41
-
42
- # @network_resiliency_attempts = options[:reconnect_attempts]
43
- # options[:reconnect_attempts] = 0
44
- # end
45
-
46
39
  def establish_connection
47
40
  return super unless NetworkResiliency.enabled?(:redis)
48
41
 
42
+ original_timeout = @options[:connect_timeout]
43
+
44
+ timeouts = NetworkResiliency.timeouts_for(
45
+ adapter: "redis",
46
+ action: "connect",
47
+ destination: host,
48
+ max: original_timeout,
49
+ units: :seconds,
50
+ )
51
+
52
+ attempts = 0
53
+ ts = -NetworkResiliency.timestamp
54
+
49
55
  begin
50
- ts = -NetworkResiliency.timestamp
56
+ attempts += 1
57
+ error = nil
58
+
59
+ @options[:connect_timeout] = timeouts.shift
51
60
 
52
61
  super
53
62
  rescue ::Redis::CannotConnectError => e
54
63
  # capture error
64
+
65
+ # grab underlying exception within Redis wrapper
66
+ error = e.cause.class
67
+
68
+ retry if timeouts.size > 0
69
+
55
70
  raise
56
71
  ensure
57
72
  ts += NetworkResiliency.timestamp
58
-
59
- # grab underlying exception within Redis wrapper
60
- error = e ? e.cause.class : nil
73
+ @options[:connect_timeout] = original_timeout
61
74
 
62
75
  NetworkResiliency.record(
63
76
  adapter: "redis",
64
77
  action: "connect",
65
78
  destination: host,
66
- error: error,
67
79
  duration: ts,
80
+ error: error,
81
+ timeout: @options[:connect_timeout] * 1_000,
82
+ attempts: attempts,
68
83
  )
69
84
  end
70
85
  end
@@ -8,6 +8,14 @@ module NetworkResiliency
8
8
  log10 = Math.log10(self.round)
9
9
  10 ** (ceil ? log10.ceil : log10.floor)
10
10
  end
11
+
12
+ def power_ceil
13
+ return 0 if self <= 0
14
+ return 1 if self <= 1
15
+
16
+ digits = Math.log10(self).floor
17
+ 10 ** digits * (self.to_f / 10 ** digits).ceil
18
+ end
11
19
  end
12
20
  end
13
21
  end
@@ -31,7 +31,7 @@ module NetworkResiliency
31
31
 
32
32
  # select data to be synced
33
33
  data = synchronize do
34
- # ensure sync is not run concurrently
34
+ # ensure sync does not run concurrently
35
35
  return [] if @syncing
36
36
  @syncing = Thread.current
37
37
 
@@ -1,3 +1,3 @@
1
1
  module NetworkResiliency
2
- VERSION = "0.3.2"
2
+ VERSION = "0.4.1"
3
3
  end
@@ -14,6 +14,9 @@ module NetworkResiliency
14
14
  autoload :Postgres, "network_resiliency/adapter/postgres"
15
15
  end
16
16
 
17
+ MODE = [ :observe, :resilient ].freeze
18
+ RESILIENCY_SIZE_THRESHOLD = 1_000
19
+
17
20
  extend self
18
21
 
19
22
  attr_accessor :statsd, :redis
@@ -87,9 +90,21 @@ module NetworkResiliency
87
90
  Process.clock_gettime(Process::CLOCK_MONOTONIC) * 1_000
88
91
  end
89
92
 
93
+ def mode
94
+ @mode || :observe
95
+ end
96
+
97
+ def mode=(mode)
98
+ unless MODE.include?(mode)
99
+ raise ArgumentError, "invalid NetworkResiliency mode: #{mode}"
100
+ end
101
+
102
+ @mode = mode
103
+ end
104
+
90
105
  # private
91
106
 
92
- def record(adapter:, action:, destination:, duration:, error: nil)
107
+ def record(adapter:, action:, destination:, duration:, error:, timeout: nil, attempts: 1)
93
108
  return if ignore_destination?(adapter, action, destination)
94
109
 
95
110
  NetworkResiliency.statsd&.distribution(
@@ -99,6 +114,7 @@ module NetworkResiliency
99
114
  adapter: adapter,
100
115
  destination: destination,
101
116
  error: error,
117
+ attempts: (attempts if attempts > 1),
102
118
  }.compact,
103
119
  )
104
120
 
@@ -112,8 +128,37 @@ module NetworkResiliency
112
128
  }.compact,
113
129
  )
114
130
 
115
- key = [ adapter, action, destination ].join(":")
116
- StatsEngine.add(key, duration).tap do |stats|
131
+ NetworkResiliency.statsd&.gauge(
132
+ "network_resiliency.#{action}.timeout",
133
+ timeout,
134
+ tags: {
135
+ adapter: adapter,
136
+ destination: destination,
137
+ },
138
+ )
139
+
140
+ if error
141
+ NetworkResiliency.statsd&.distribution(
142
+ "network_resiliency.#{action}.time_saved",
143
+ timeout - duration,
144
+ tags: {
145
+ adapter: adapter,
146
+ destination: destination,
147
+ },
148
+ ) if timeout
149
+ else
150
+ # track successful retries
151
+ NetworkResiliency.statsd&.increment(
152
+ "network_resiliency.#{action}.resilient",
153
+ tags: {
154
+ adapter: adapter,
155
+ destination: destination,
156
+ },
157
+ ) if attempts > 1
158
+
159
+ # record stats
160
+ key = [ adapter, action, destination ].join(":")
161
+ stats = StatsEngine.add(key, duration)
117
162
  tags = {
118
163
  adapter: adapter,
119
164
  destination: destination,
@@ -138,6 +183,8 @@ module NetworkResiliency
138
183
  tags: tags,
139
184
  )
140
185
  end
186
+
187
+ nil
141
188
  rescue => e
142
189
  NetworkResiliency.statsd&.increment(
143
190
  "network_resiliency.error",
@@ -157,11 +204,98 @@ module NetworkResiliency
157
204
  IP_ADDRESS_REGEX.match?(destination)
158
205
  end
159
206
 
207
+ def timeouts_for(adapter:, action:, destination:, max: nil, units: :ms)
208
+ default = [ max ]
209
+
210
+ return default if NetworkResiliency.mode == :observe
211
+
212
+ key = [ adapter, action, destination ].join(":")
213
+ stats = StatsEngine.get(key)
214
+
215
+ return default unless stats.n >= RESILIENCY_SIZE_THRESHOLD
216
+
217
+ tags = {
218
+ adapter: adapter,
219
+ action: action,
220
+ destination: destination,
221
+ }
222
+
223
+ p99 = (stats.avg + stats.stdev * 3).power_ceil
224
+ timeouts = []
225
+
226
+ if max
227
+ max *= 1_000 if units == :s || units == :seconds
228
+
229
+ if p99 < max
230
+ timeouts << p99
231
+
232
+ # fallback attempt
233
+ if max - p99 > p99
234
+ # use remaining time for second attempt
235
+ timeouts << max - p99
236
+ else
237
+ timeouts << max
238
+
239
+ NetworkResiliency.statsd&.increment(
240
+ "network_resiliency.timeout.raised",
241
+ tags: tags,
242
+ )
243
+ end
244
+ else
245
+ # the specified timeout is less than our expected p99...awkward
246
+ timeouts << max
247
+
248
+ NetworkResiliency.statsd&.increment(
249
+ "network_resiliency.timeout.too_low",
250
+ tags: tags,
251
+ )
252
+ end
253
+ else
254
+ timeouts << p99
255
+
256
+ # timeouts << p99 * 10 if NetworkResiliency.mode == :resolute
257
+
258
+ # unbounded second attempt
259
+ timeouts << nil
260
+
261
+ NetworkResiliency.statsd&.increment(
262
+ "network_resiliency.timeout.missing",
263
+ tags: tags,
264
+ )
265
+ end
266
+
267
+ case units
268
+ when nil, :ms, :milliseconds
269
+ timeouts
270
+ when :s, :seconds
271
+ timeouts.map { |t| t.to_f / 1_000 if t }
272
+ else
273
+ raise ArgumentError, "invalid units: #{units}"
274
+ end
275
+ rescue => e
276
+ NetworkResiliency.statsd&.increment(
277
+ "network_resiliency.error",
278
+ tags: {
279
+ method: __method__,
280
+ type: e.class,
281
+ },
282
+ )
283
+
284
+ warn "[ERROR] NetworkResiliency: #{e.class}: #{e.message}"
285
+
286
+ default
287
+ end
288
+
160
289
  def reset
161
290
  @enabled = nil
291
+ @mode = nil
162
292
  Thread.current["network_resiliency"] = nil
163
293
  StatsEngine.reset
164
- @sync_worker.kill if @sync_worker
294
+
295
+ if @sync_worker
296
+ @sync_worker.kill
297
+ @sync_worker = nil
298
+ end
165
299
  end
166
300
 
167
301
  private
@@ -176,13 +310,11 @@ module NetworkResiliency
176
310
  raise "Redis not configured" unless redis
177
311
 
178
312
  @sync_worker = Thread.new do
179
- while true do
313
+ loop do
180
314
  StatsEngine.sync(redis)
181
315
 
182
316
  sleep(3)
183
317
  end
184
- rescue Interrupt
185
- # goodbye
186
318
  end
187
319
  end
188
320
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: network_resiliency
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.2
4
+ version: 0.4.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Daniel Pepper
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-11-03 00:00:00.000000000 Z
11
+ date: 2023-11-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: byebug