network_resiliency 0.3.1 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a9a1103993c635cbda0e6a3ba3ef523c8179680353fa67b93842f0bdfa44b197
4
- data.tar.gz: aeb11a089950492bc6099933c137ccf79c27447a18ce6335e2a46c9982201591
3
+ metadata.gz: c372873056610bf21197e55a265bdfd03e200b064c727a4713ea8725ae3d696d
4
+ data.tar.gz: 5da4933543bf9a57d46ed4a3b4d94a692524e641f6c1293d8bcb99aa74c0c9d9
5
5
  SHA512:
6
- metadata.gz: 7ea2cedcf4e6044299111e69c211a92a3d7df5305e66b4d6be83d32b949425daee581af33142f73f4d618ec178bd10e19ea0599175420838f57b0b61f157a8f5
7
- data.tar.gz: 772d5749b23a32e6be7542c10d18794f1905fd64dde7da5b702bba4b4fb6ceb62fa3b971ce120c550db2fbc89ccec11670e3029ee0649ec7c21266597b0cb637
6
+ metadata.gz: b8e9b66cb83ff5bfda080c43935e8834228cfbd16afd0c1229be2b7edbea8469702d917ae7b6bb7383b774ebf68802e9a5572ee89357723d479d1dd8b3a91f2c
7
+ data.tar.gz: 68f241f0bd9b7ef5b0b291e2e1be11aa3d2c377f6b4b70552ba32638122bff300bc1252b279a13c41e96af3ffa8843bc571b74bd9aac7f1300f27472e18001ff
data/CHANGELOG.md CHANGED
@@ -1,3 +1,15 @@
1
+ ### v0.4.0 (2023-11-13)
2
+ - redis resiliency
3
+ - timeouts_for
4
+ - mode
5
+
6
+ ### v0.3.2 (2023-11-03)
7
+ - stats observability
8
+ - surface errors in specs
9
+ - destination filter
10
+ - error observability
11
+ - improve order of magnitude
12
+
1
13
  ### v0.3.1 (2023-11-02)
2
14
  - sync thread safety
3
15
  - order of magnitude stats
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- network_resiliency (0.3.1)
4
+ network_resiliency (0.4.0)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
@@ -36,35 +36,49 @@ module NetworkResiliency
36
36
  end
37
37
 
38
38
  module Instrumentation
39
- # def initialize(...)
40
- # super
41
-
42
- # @network_resiliency_attempts = options[:reconnect_attempts]
43
- # options[:reconnect_attempts] = 0
44
- # end
45
-
46
39
  def establish_connection
47
40
  return super unless NetworkResiliency.enabled?(:redis)
48
41
 
42
+ original_timeout = @options[:connect_timeout]
43
+
44
+ timeouts = NetworkResiliency.timeouts_for(
45
+ adapter: "redis",
46
+ action: "connect",
47
+ destination: host,
48
+ max: original_timeout,
49
+ )
50
+
51
+ attempts = 0
52
+ ts = -NetworkResiliency.timestamp
53
+
49
54
  begin
50
- ts = -NetworkResiliency.timestamp
55
+ attempts += 1
56
+ error = nil
57
+
58
+ @options[:connect_timeout] = timeouts.shift
51
59
 
52
60
  super
53
61
  rescue ::Redis::CannotConnectError => e
54
62
  # capture error
63
+
64
+ # grab underlying exception within Redis wrapper
65
+ error = e.cause.class
66
+
67
+ retry if timeouts.size > 0
68
+
55
69
  raise
56
70
  ensure
57
71
  ts += NetworkResiliency.timestamp
58
-
59
- # grab underlying exception within Redis wrapper
60
- error = e ? e.cause.class : nil
72
+ @options[:connect_timeout] = original_timeout
61
73
 
62
74
  NetworkResiliency.record(
63
75
  adapter: "redis",
64
76
  action: "connect",
65
77
  destination: host,
66
- error: error,
67
78
  duration: ts,
79
+ error: error,
80
+ timeout: @options[:connect_timeout],
81
+ attempts: attempts,
68
82
  )
69
83
  end
70
84
  end
@@ -1,8 +1,20 @@
1
1
  module NetworkResiliency
2
2
  module Refinements
3
3
  refine Numeric do
4
- def order_of_magnitude
5
- self == 0 ? 0 : 10 ** Math.log10(self).ceil
4
+ def order_of_magnitude(ceil: false)
5
+ return 0 if self <= 0
6
+ return 1 if self <= 1
7
+
8
+ log10 = Math.log10(self.round)
9
+ 10 ** (ceil ? log10.ceil : log10.floor)
10
+ end
11
+
12
+ def power_ceil
13
+ return 0 if self <= 0
14
+ return 1 if self <= 1
15
+
16
+ digits = Math.log10(self).floor
17
+ 10 ** digits * (self.to_f / 10 ** digits).ceil
6
18
  end
7
19
  end
8
20
  end
@@ -1,3 +1,3 @@
1
1
  module NetworkResiliency
2
- VERSION = "0.3.1"
2
+ VERSION = "0.4.0"
3
3
  end
@@ -14,6 +14,9 @@ module NetworkResiliency
14
14
  autoload :Postgres, "network_resiliency/adapter/postgres"
15
15
  end
16
16
 
17
+ MODE = [ :observe, :resilient ].freeze
18
+ RESILIENCY_SIZE_THRESHOLD = 1_000
19
+
17
20
  extend self
18
21
 
19
22
  attr_accessor :statsd, :redis
@@ -87,13 +90,22 @@ module NetworkResiliency
87
90
  Process.clock_gettime(Process::CLOCK_MONOTONIC) * 1_000
88
91
  end
89
92
 
90
- # private
93
+ def mode
94
+ @mode || :observe
95
+ end
91
96
 
92
- IP_ADDRESS_REGEX = Regexp.new(/\d{1,3}(\.\d{1,3}){3}/)
97
+ def mode=(mode)
98
+ unless MODE.include?(mode)
99
+ raise ArgumentError, "invalid NetworkResiliency mode: #{mode}"
100
+ end
93
101
 
94
- def record(adapter:, action:, destination:, duration:, error: nil)
95
- # filter raw IP addresses
96
- return if IP_ADDRESS_REGEX.match?(destination)
102
+ @mode = mode
103
+ end
104
+
105
+ # private
106
+
107
+ def record(adapter:, action:, destination:, duration:, error:, timeout: nil, attempts: 1)
108
+ return if ignore_destination?(adapter, action, destination)
97
109
 
98
110
  NetworkResiliency.statsd&.distribution(
99
111
  "network_resiliency.#{action}",
@@ -102,12 +114,13 @@ module NetworkResiliency
102
114
  adapter: adapter,
103
115
  destination: destination,
104
116
  error: error,
117
+ attempts: (attempts if attempts > 1),
105
118
  }.compact,
106
119
  )
107
120
 
108
121
  NetworkResiliency.statsd&.distribution(
109
122
  "network_resiliency.#{action}.magnitude",
110
- duration.order_of_magnitude,
123
+ duration.order_of_magnitude(ceil: true),
111
124
  tags: {
112
125
  adapter: adapter,
113
126
  destination: destination,
@@ -115,24 +128,165 @@ module NetworkResiliency
115
128
  }.compact,
116
129
  )
117
130
 
131
+ NetworkResiliency.statsd&.gauge(
132
+ "network_resiliency.#{action}.timeout",
133
+ timeout,
134
+ tags: {
135
+ adapter: adapter,
136
+ destination: destination,
137
+ },
138
+ )
139
+
140
+ if error
141
+ NetworkResiliency.statsd&.distribution(
142
+ "network_resiliency.#{action}.time_saved",
143
+ timeout - duration,
144
+ tags: {
145
+ adapter: adapter,
146
+ destination: destination,
147
+ },
148
+ ) if timeout
149
+ else
150
+ # track successful retries
151
+ NetworkResiliency.statsd&.increment(
152
+ "network_resiliency.#{action}.resilient",
153
+ tags: {
154
+ adapter: adapter,
155
+ destination: destination,
156
+ },
157
+ ) if attempts > 1
158
+
159
+ # record stats
160
+ key = [ adapter, action, destination ].join(":")
161
+ stats = StatsEngine.add(key, duration)
162
+ tags = {
163
+ adapter: adapter,
164
+ destination: destination,
165
+ n: stats.n.order_of_magnitude,
166
+ }
167
+
168
+ NetworkResiliency.statsd&.distribution(
169
+ "network_resiliency.#{action}.stats.n",
170
+ stats.n,
171
+ tags: tags,
172
+ )
173
+
174
+ NetworkResiliency.statsd&.distribution(
175
+ "network_resiliency.#{action}.stats.avg",
176
+ stats.avg,
177
+ tags: tags,
178
+ )
179
+
180
+ NetworkResiliency.statsd&.distribution(
181
+ "network_resiliency.#{action}.stats.stdev",
182
+ stats.stdev,
183
+ tags: tags,
184
+ )
185
+ end
186
+
187
+ nil
188
+ rescue => e
189
+ NetworkResiliency.statsd&.increment(
190
+ "network_resiliency.error",
191
+ tags: {
192
+ method: __method__,
193
+ type: e.class,
194
+ },
195
+ )
196
+
197
+ warn "[ERROR] NetworkResiliency: #{e.class}: #{e.message}"
198
+ end
199
+
200
+ IP_ADDRESS_REGEX = Regexp.new(/\d{1,3}(\.\d{1,3}){3}/)
201
+
202
+ def ignore_destination?(adapter, action, destination)
203
+ # filter raw IP addresses
204
+ IP_ADDRESS_REGEX.match?(destination)
205
+ end
206
+
207
+ def timeouts_for(adapter:, action:, destination:, max: nil)
208
+ default = [ max ]
209
+
210
+ return default if NetworkResiliency.mode == :observe
211
+
118
212
  key = [ adapter, action, destination ].join(":")
119
- StatsEngine.add(key, duration)
213
+ stats = StatsEngine.get(key)
214
+
215
+ return default unless stats.n >= RESILIENCY_SIZE_THRESHOLD
216
+
217
+ tags = {
218
+ adapter: adapter,
219
+ action: action,
220
+ destination: destination,
221
+ }
222
+
223
+ p99 = (stats.avg + stats.stdev * 3).power_ceil
224
+ timeouts = []
225
+
226
+ if max
227
+ if p99 < max
228
+ timeouts << p99
229
+
230
+ # fallback attempt
231
+ if max - p99 > p99
232
+ # use remaining time for second attempt
233
+ timeouts << max - p99
234
+ else
235
+ timeouts << max
236
+
237
+ NetworkResiliency.statsd&.increment(
238
+ "network_resiliency.timeout.raised",
239
+ tags: tags,
240
+ )
241
+ end
242
+ else
243
+ # the specified timeout is less than our expected p99...awkward
244
+ timeouts << max
245
+
246
+ NetworkResiliency.statsd&.increment(
247
+ "network_resiliency.timeout.too_low",
248
+ tags: tags,
249
+ )
250
+ end
251
+ else
252
+ timeouts << p99
253
+
254
+ # timeouts << p99 * 10 if NetworkResiliency.mode == :resolute
255
+
256
+ # unbounded second attempt
257
+ timeouts << nil
258
+
259
+ NetworkResiliency.statsd&.increment(
260
+ "network_resiliency.timeout.missing",
261
+ tags: tags,
262
+ )
263
+ end
264
+
265
+ timeouts
120
266
  rescue => e
121
267
  NetworkResiliency.statsd&.increment(
122
268
  "network_resiliency.error",
123
269
  tags: {
270
+ method: __method__,
124
271
  type: e.class,
125
272
  },
126
273
  )
127
274
 
128
275
  warn "[ERROR] NetworkResiliency: #{e.class}: #{e.message}"
276
+
277
+ default
129
278
  end
130
279
 
131
280
  def reset
132
281
  @enabled = nil
282
+ @mode = nil
133
283
  Thread.current["network_resiliency"] = nil
134
284
  StatsEngine.reset
135
- @sync_worker.kill if @sync_worker
285
+
286
+ if @sync_worker
287
+ @sync_worker.kill
288
+ @sync_worker = nil
289
+ end
136
290
  end
137
291
 
138
292
  private
@@ -147,13 +301,11 @@ module NetworkResiliency
147
301
  raise "Redis not configured" unless redis
148
302
 
149
303
  @sync_worker = Thread.new do
150
- while true do
304
+ loop do
151
305
  StatsEngine.sync(redis)
152
306
 
153
307
  sleep(3)
154
308
  end
155
- rescue Interrupt
156
- # goodbye
157
309
  end
158
310
  end
159
311
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: network_resiliency
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Daniel Pepper
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-11-02 00:00:00.000000000 Z
11
+ date: 2023-11-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: byebug