network_resiliency 0.3.1 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a9a1103993c635cbda0e6a3ba3ef523c8179680353fa67b93842f0bdfa44b197
4
- data.tar.gz: aeb11a089950492bc6099933c137ccf79c27447a18ce6335e2a46c9982201591
3
+ metadata.gz: c372873056610bf21197e55a265bdfd03e200b064c727a4713ea8725ae3d696d
4
+ data.tar.gz: 5da4933543bf9a57d46ed4a3b4d94a692524e641f6c1293d8bcb99aa74c0c9d9
5
5
  SHA512:
6
- metadata.gz: 7ea2cedcf4e6044299111e69c211a92a3d7df5305e66b4d6be83d32b949425daee581af33142f73f4d618ec178bd10e19ea0599175420838f57b0b61f157a8f5
7
- data.tar.gz: 772d5749b23a32e6be7542c10d18794f1905fd64dde7da5b702bba4b4fb6ceb62fa3b971ce120c550db2fbc89ccec11670e3029ee0649ec7c21266597b0cb637
6
+ metadata.gz: b8e9b66cb83ff5bfda080c43935e8834228cfbd16afd0c1229be2b7edbea8469702d917ae7b6bb7383b774ebf68802e9a5572ee89357723d479d1dd8b3a91f2c
7
+ data.tar.gz: 68f241f0bd9b7ef5b0b291e2e1be11aa3d2c377f6b4b70552ba32638122bff300bc1252b279a13c41e96af3ffa8843bc571b74bd9aac7f1300f27472e18001ff
data/CHANGELOG.md CHANGED
@@ -1,3 +1,15 @@
1
+ ### v0.4.0 (2023-11-13)
2
+ - redis resiliency
3
+ - timeouts_for
4
+ - mode
5
+
6
+ ### v0.3.2 (2023-11-03)
7
+ - stats observability
8
+ - surface errors in specs
9
+ - destination filter
10
+ - error observability
11
+ - improve order of magnitude
12
+
1
13
  ### v0.3.1 (2023-11-02)
2
14
  - sync thread safety
3
15
  - order of magnitude stats
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- network_resiliency (0.3.1)
4
+ network_resiliency (0.4.0)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
@@ -36,35 +36,49 @@ module NetworkResiliency
36
36
  end
37
37
 
38
38
  module Instrumentation
39
- # def initialize(...)
40
- # super
41
-
42
- # @network_resiliency_attempts = options[:reconnect_attempts]
43
- # options[:reconnect_attempts] = 0
44
- # end
45
-
46
39
  def establish_connection
47
40
  return super unless NetworkResiliency.enabled?(:redis)
48
41
 
42
+ original_timeout = @options[:connect_timeout]
43
+
44
+ timeouts = NetworkResiliency.timeouts_for(
45
+ adapter: "redis",
46
+ action: "connect",
47
+ destination: host,
48
+ max: original_timeout,
49
+ )
50
+
51
+ attempts = 0
52
+ ts = -NetworkResiliency.timestamp
53
+
49
54
  begin
50
- ts = -NetworkResiliency.timestamp
55
+ attempts += 1
56
+ error = nil
57
+
58
+ @options[:connect_timeout] = timeouts.shift
51
59
 
52
60
  super
53
61
  rescue ::Redis::CannotConnectError => e
54
62
  # capture error
63
+
64
+ # grab underlying exception within Redis wrapper
65
+ error = e.cause.class
66
+
67
+ retry if timeouts.size > 0
68
+
55
69
  raise
56
70
  ensure
57
71
  ts += NetworkResiliency.timestamp
58
-
59
- # grab underlying exception within Redis wrapper
60
- error = e ? e.cause.class : nil
72
+ @options[:connect_timeout] = original_timeout
61
73
 
62
74
  NetworkResiliency.record(
63
75
  adapter: "redis",
64
76
  action: "connect",
65
77
  destination: host,
66
- error: error,
67
78
  duration: ts,
79
+ error: error,
80
+ timeout: @options[:connect_timeout],
81
+ attempts: attempts,
68
82
  )
69
83
  end
70
84
  end
@@ -1,8 +1,20 @@
1
1
  module NetworkResiliency
2
2
  module Refinements
3
3
  refine Numeric do
4
- def order_of_magnitude
5
- self == 0 ? 0 : 10 ** Math.log10(self).ceil
4
+ def order_of_magnitude(ceil: false)
5
+ return 0 if self <= 0
6
+ return 1 if self <= 1
7
+
8
+ log10 = Math.log10(self.round)
9
+ 10 ** (ceil ? log10.ceil : log10.floor)
10
+ end
11
+
12
+ def power_ceil
13
+ return 0 if self <= 0
14
+ return 1 if self <= 1
15
+
16
+ digits = Math.log10(self).floor
17
+ 10 ** digits * (self.to_f / 10 ** digits).ceil
6
18
  end
7
19
  end
8
20
  end
@@ -1,3 +1,3 @@
1
1
  module NetworkResiliency
2
- VERSION = "0.3.1"
2
+ VERSION = "0.4.0"
3
3
  end
@@ -14,6 +14,9 @@ module NetworkResiliency
14
14
  autoload :Postgres, "network_resiliency/adapter/postgres"
15
15
  end
16
16
 
17
+ MODE = [ :observe, :resilient ].freeze
18
+ RESILIENCY_SIZE_THRESHOLD = 1_000
19
+
17
20
  extend self
18
21
 
19
22
  attr_accessor :statsd, :redis
@@ -87,13 +90,22 @@ module NetworkResiliency
87
90
  Process.clock_gettime(Process::CLOCK_MONOTONIC) * 1_000
88
91
  end
89
92
 
90
- # private
93
+ def mode
94
+ @mode || :observe
95
+ end
91
96
 
92
- IP_ADDRESS_REGEX = Regexp.new(/\d{1,3}(\.\d{1,3}){3}/)
97
+ def mode=(mode)
98
+ unless MODE.include?(mode)
99
+ raise ArgumentError, "invalid NetworkResiliency mode: #{mode}"
100
+ end
93
101
 
94
- def record(adapter:, action:, destination:, duration:, error: nil)
95
- # filter raw IP addresses
96
- return if IP_ADDRESS_REGEX.match?(destination)
102
+ @mode = mode
103
+ end
104
+
105
+ # private
106
+
107
+ def record(adapter:, action:, destination:, duration:, error:, timeout: nil, attempts: 1)
108
+ return if ignore_destination?(adapter, action, destination)
97
109
 
98
110
  NetworkResiliency.statsd&.distribution(
99
111
  "network_resiliency.#{action}",
@@ -102,12 +114,13 @@ module NetworkResiliency
102
114
  adapter: adapter,
103
115
  destination: destination,
104
116
  error: error,
117
+ attempts: (attempts if attempts > 1),
105
118
  }.compact,
106
119
  )
107
120
 
108
121
  NetworkResiliency.statsd&.distribution(
109
122
  "network_resiliency.#{action}.magnitude",
110
- duration.order_of_magnitude,
123
+ duration.order_of_magnitude(ceil: true),
111
124
  tags: {
112
125
  adapter: adapter,
113
126
  destination: destination,
@@ -115,24 +128,165 @@ module NetworkResiliency
115
128
  }.compact,
116
129
  )
117
130
 
131
+ NetworkResiliency.statsd&.gauge(
132
+ "network_resiliency.#{action}.timeout",
133
+ timeout,
134
+ tags: {
135
+ adapter: adapter,
136
+ destination: destination,
137
+ },
138
+ )
139
+
140
+ if error
141
+ NetworkResiliency.statsd&.distribution(
142
+ "network_resiliency.#{action}.time_saved",
143
+ timeout - duration,
144
+ tags: {
145
+ adapter: adapter,
146
+ destination: destination,
147
+ },
148
+ ) if timeout
149
+ else
150
+ # track successful retries
151
+ NetworkResiliency.statsd&.increment(
152
+ "network_resiliency.#{action}.resilient",
153
+ tags: {
154
+ adapter: adapter,
155
+ destination: destination,
156
+ },
157
+ ) if attempts > 1
158
+
159
+ # record stats
160
+ key = [ adapter, action, destination ].join(":")
161
+ stats = StatsEngine.add(key, duration)
162
+ tags = {
163
+ adapter: adapter,
164
+ destination: destination,
165
+ n: stats.n.order_of_magnitude,
166
+ }
167
+
168
+ NetworkResiliency.statsd&.distribution(
169
+ "network_resiliency.#{action}.stats.n",
170
+ stats.n,
171
+ tags: tags,
172
+ )
173
+
174
+ NetworkResiliency.statsd&.distribution(
175
+ "network_resiliency.#{action}.stats.avg",
176
+ stats.avg,
177
+ tags: tags,
178
+ )
179
+
180
+ NetworkResiliency.statsd&.distribution(
181
+ "network_resiliency.#{action}.stats.stdev",
182
+ stats.stdev,
183
+ tags: tags,
184
+ )
185
+ end
186
+
187
+ nil
188
+ rescue => e
189
+ NetworkResiliency.statsd&.increment(
190
+ "network_resiliency.error",
191
+ tags: {
192
+ method: __method__,
193
+ type: e.class,
194
+ },
195
+ )
196
+
197
+ warn "[ERROR] NetworkResiliency: #{e.class}: #{e.message}"
198
+ end
199
+
200
+ IP_ADDRESS_REGEX = Regexp.new(/\d{1,3}(\.\d{1,3}){3}/)
201
+
202
+ def ignore_destination?(adapter, action, destination)
203
+ # filter raw IP addresses
204
+ IP_ADDRESS_REGEX.match?(destination)
205
+ end
206
+
207
+ def timeouts_for(adapter:, action:, destination:, max: nil)
208
+ default = [ max ]
209
+
210
+ return default if NetworkResiliency.mode == :observe
211
+
118
212
  key = [ adapter, action, destination ].join(":")
119
- StatsEngine.add(key, duration)
213
+ stats = StatsEngine.get(key)
214
+
215
+ return default unless stats.n >= RESILIENCY_SIZE_THRESHOLD
216
+
217
+ tags = {
218
+ adapter: adapter,
219
+ action: action,
220
+ destination: destination,
221
+ }
222
+
223
+ p99 = (stats.avg + stats.stdev * 3).power_ceil
224
+ timeouts = []
225
+
226
+ if max
227
+ if p99 < max
228
+ timeouts << p99
229
+
230
+ # fallback attempt
231
+ if max - p99 > p99
232
+ # use remaining time for second attempt
233
+ timeouts << max - p99
234
+ else
235
+ timeouts << max
236
+
237
+ NetworkResiliency.statsd&.increment(
238
+ "network_resiliency.timeout.raised",
239
+ tags: tags,
240
+ )
241
+ end
242
+ else
243
+ # the specified timeout is less than our expected p99...awkward
244
+ timeouts << max
245
+
246
+ NetworkResiliency.statsd&.increment(
247
+ "network_resiliency.timeout.too_low",
248
+ tags: tags,
249
+ )
250
+ end
251
+ else
252
+ timeouts << p99
253
+
254
+ # timeouts << p99 * 10 if NetworkResiliency.mode == :resolute
255
+
256
+ # unbounded second attempt
257
+ timeouts << nil
258
+
259
+ NetworkResiliency.statsd&.increment(
260
+ "network_resiliency.timeout.missing",
261
+ tags: tags,
262
+ )
263
+ end
264
+
265
+ timeouts
120
266
  rescue => e
121
267
  NetworkResiliency.statsd&.increment(
122
268
  "network_resiliency.error",
123
269
  tags: {
270
+ method: __method__,
124
271
  type: e.class,
125
272
  },
126
273
  )
127
274
 
128
275
  warn "[ERROR] NetworkResiliency: #{e.class}: #{e.message}"
276
+
277
+ default
129
278
  end
130
279
 
131
280
  def reset
132
281
  @enabled = nil
282
+ @mode = nil
133
283
  Thread.current["network_resiliency"] = nil
134
284
  StatsEngine.reset
135
- @sync_worker.kill if @sync_worker
285
+
286
+ if @sync_worker
287
+ @sync_worker.kill
288
+ @sync_worker = nil
289
+ end
136
290
  end
137
291
 
138
292
  private
@@ -147,13 +301,11 @@ module NetworkResiliency
147
301
  raise "Redis not configured" unless redis
148
302
 
149
303
  @sync_worker = Thread.new do
150
- while true do
304
+ loop do
151
305
  StatsEngine.sync(redis)
152
306
 
153
307
  sleep(3)
154
308
  end
155
- rescue Interrupt
156
- # goodbye
157
309
  end
158
310
  end
159
311
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: network_resiliency
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Daniel Pepper
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-11-02 00:00:00.000000000 Z
11
+ date: 2023-11-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: byebug