network_resiliency 0.7.9 → 0.7.11
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -0
- data/Gemfile.lock +1 -1
- data/lib/network_resiliency/stats.rb +4 -6
- data/lib/network_resiliency/stats_engine.rb +2 -0
- data/lib/network_resiliency/syncer.rb +7 -1
- data/lib/network_resiliency/version.rb +1 -1
- data/lib/network_resiliency.rb +29 -28
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1eec5786e7daaea4b6f3c285dffc1e4bf1c70d7096ce69d44f4cc4ea80111ea5
|
4
|
+
data.tar.gz: bc3ca5e592f1a809bfb28453e43d49a64405a424861aef2c6c6745ef2f27ebb4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fa1af69b1f175fb708247ddd0e9501ae2b945909197b26c343347e113bb78e3e645c5217c5f74d63fda780a027b730746b8641b9f77cb5c31626bc2f985c60ff
|
7
|
+
data.tar.gz: e9733ddd7978b321f8c49218de503f42b300aa269757559cac442dcefb43e0b136e1507360629e9aad1976cedd31c7c0ccd4e9a256c41962a990e50f046d3979
|
data/CHANGELOG.md
CHANGED
data/Gemfile.lock
CHANGED
@@ -97,7 +97,7 @@ module NetworkResiliency
|
|
97
97
|
MIN_SAMPLE_SIZE = 1000
|
98
98
|
MAX_WINDOW_LENGTH = 1000
|
99
99
|
STATS_TTL = 24 * 60 * 60 # 1 day
|
100
|
-
CACHE_TTL =
|
100
|
+
CACHE_TTL = 120 # seconds
|
101
101
|
|
102
102
|
LUA_SCRIPT = <<~LUA
|
103
103
|
local results = {}
|
@@ -189,11 +189,9 @@ module NetworkResiliency
|
|
189
189
|
end
|
190
190
|
|
191
191
|
res = redis.eval(LUA_SCRIPT, keys, args)
|
192
|
-
data.keys.zip(res.each_slice(3)).
|
193
|
-
n, avg, sq_dist
|
194
|
-
|
195
|
-
[ key, Stats.from(n: n, avg: avg, sq_dist: sq_dist) ]
|
196
|
-
end.to_h
|
192
|
+
data.keys.zip(res.each_slice(3)).to_h.transform_values! do |n, avg, sq_dist|
|
193
|
+
Stats.from(n: n, avg: avg, sq_dist: sq_dist)
|
194
|
+
end
|
197
195
|
end
|
198
196
|
|
199
197
|
def self.fetch(redis, keys)
|
@@ -65,11 +65,13 @@ module NetworkResiliency
|
|
65
65
|
empty: data.empty?,
|
66
66
|
truncated: data.size < dirty_keys.size,
|
67
67
|
}.select { |_, v| v },
|
68
|
+
sample_rate: SAMPLE_RATE[:sync],
|
68
69
|
)
|
69
70
|
|
70
71
|
NetworkResiliency.statsd&.distribution(
|
71
72
|
"network_resiliency.sync.keys.dirty",
|
72
73
|
dirty_keys.select { |_, n| n > 0 }.count,
|
74
|
+
sample_rate: SAMPLE_RATE[:sync],
|
73
75
|
)
|
74
76
|
|
75
77
|
return [] if data.empty?
|
@@ -1,6 +1,7 @@
|
|
1
1
|
module NetworkResiliency
|
2
2
|
class Syncer < Thread
|
3
3
|
LOCK = Mutex.new
|
4
|
+
SLEEP_DURATION = 10
|
4
5
|
|
5
6
|
class << self
|
6
7
|
def start
|
@@ -45,12 +46,17 @@ module NetworkResiliency
|
|
45
46
|
private
|
46
47
|
|
47
48
|
def sync
|
49
|
+
# force redis to reconnect post fork
|
50
|
+
NetworkResiliency.redis.disconnect! if NetworkResiliency.redis.connected?
|
51
|
+
|
48
52
|
until @shutdown
|
49
53
|
StatsEngine.sync(NetworkResiliency.redis)
|
50
54
|
|
51
|
-
sleep(
|
55
|
+
sleep(SLEEP_DURATION)
|
52
56
|
end
|
53
57
|
rescue Interrupt
|
58
|
+
rescue => e
|
59
|
+
NetworkResiliency.warn(__method__, e)
|
54
60
|
end
|
55
61
|
end
|
56
62
|
end
|
data/lib/network_resiliency.rb
CHANGED
@@ -20,6 +20,11 @@ module NetworkResiliency
|
|
20
20
|
ADAPTERS = [ :http, :faraday, :redis, :mysql, :postgres, :rails ].freeze
|
21
21
|
MODE = [ :observe, :resilient ].freeze
|
22
22
|
RESILIENCY_SIZE_THRESHOLD = 1_000
|
23
|
+
SAMPLE_RATE = {
|
24
|
+
timeout: 0.1,
|
25
|
+
stats: 0.1,
|
26
|
+
sync: 0.1,
|
27
|
+
}
|
23
28
|
|
24
29
|
extend self
|
25
30
|
|
@@ -121,15 +126,7 @@ module NetworkResiliency
|
|
121
126
|
|
122
127
|
mode
|
123
128
|
rescue => e
|
124
|
-
|
125
|
-
"network_resiliency.error",
|
126
|
-
tags: {
|
127
|
-
method: __method__,
|
128
|
-
type: e.class,
|
129
|
-
},
|
130
|
-
)
|
131
|
-
|
132
|
-
warn "[ERROR] NetworkResiliency: #{e.class}: #{e.message}"
|
129
|
+
warn(__method__, e)
|
133
130
|
|
134
131
|
:observe
|
135
132
|
end
|
@@ -243,6 +240,7 @@ module NetworkResiliency
|
|
243
240
|
adapter: adapter,
|
244
241
|
destination: destination,
|
245
242
|
},
|
243
|
+
sample_rate: SAMPLE_RATE[:timeout],
|
246
244
|
) if timeout && timeout > 0
|
247
245
|
|
248
246
|
if error
|
@@ -272,32 +270,27 @@ module NetworkResiliency
|
|
272
270
|
"network_resiliency.#{action}.stats.n",
|
273
271
|
stats.n,
|
274
272
|
tags: tags,
|
273
|
+
sample_rate: SAMPLE_RATE[:stats],
|
275
274
|
)
|
276
275
|
|
277
276
|
NetworkResiliency.statsd&.distribution(
|
278
277
|
"network_resiliency.#{action}.stats.avg",
|
279
278
|
stats.avg,
|
280
279
|
tags: tags,
|
280
|
+
sample_rate: SAMPLE_RATE[:stats],
|
281
281
|
)
|
282
282
|
|
283
283
|
NetworkResiliency.statsd&.distribution(
|
284
284
|
"network_resiliency.#{action}.stats.stdev",
|
285
285
|
stats.stdev,
|
286
286
|
tags: tags,
|
287
|
+
sample_rate: SAMPLE_RATE[:stats],
|
287
288
|
)
|
288
289
|
end
|
289
290
|
|
290
291
|
nil
|
291
292
|
rescue => e
|
292
|
-
|
293
|
-
"network_resiliency.error",
|
294
|
-
tags: {
|
295
|
-
method: __method__,
|
296
|
-
type: e.class,
|
297
|
-
},
|
298
|
-
)
|
299
|
-
|
300
|
-
warn "[ERROR] NetworkResiliency: #{e.class}: #{e.message}"
|
293
|
+
warn(__method__, e)
|
301
294
|
end
|
302
295
|
|
303
296
|
IP_ADDRESS_REGEX = /\d{1,3}(\.\d{1,3}){3}/
|
@@ -323,7 +316,7 @@ module NetworkResiliency
|
|
323
316
|
destination: destination,
|
324
317
|
}
|
325
318
|
|
326
|
-
p99 = (stats.avg + stats.stdev *
|
319
|
+
p99 = (stats.avg + stats.stdev * 2).order_of_magnitude(ceil: true)
|
327
320
|
|
328
321
|
timeouts = []
|
329
322
|
|
@@ -343,6 +336,7 @@ module NetworkResiliency
|
|
343
336
|
NetworkResiliency.statsd&.increment(
|
344
337
|
"network_resiliency.timeout.raised",
|
345
338
|
tags: tags,
|
339
|
+
sample_rate: SAMPLE_RATE[:timeout],
|
346
340
|
)
|
347
341
|
end
|
348
342
|
else
|
@@ -352,6 +346,7 @@ module NetworkResiliency
|
|
352
346
|
NetworkResiliency.statsd&.increment(
|
353
347
|
"network_resiliency.timeout.too_low",
|
354
348
|
tags: tags,
|
349
|
+
sample_rate: SAMPLE_RATE[:timeout],
|
355
350
|
)
|
356
351
|
end
|
357
352
|
else
|
@@ -365,6 +360,7 @@ module NetworkResiliency
|
|
365
360
|
NetworkResiliency.statsd&.increment(
|
366
361
|
"network_resiliency.timeout.missing",
|
367
362
|
tags: tags,
|
363
|
+
sample_rate: SAMPLE_RATE[:timeout],
|
368
364
|
)
|
369
365
|
end
|
370
366
|
|
@@ -375,6 +371,7 @@ module NetworkResiliency
|
|
375
371
|
adapter: adapter,
|
376
372
|
destination: destination,
|
377
373
|
},
|
374
|
+
sample_rate: SAMPLE_RATE[:timeout],
|
378
375
|
)
|
379
376
|
|
380
377
|
case units
|
@@ -386,15 +383,7 @@ module NetworkResiliency
|
|
386
383
|
raise ArgumentError, "invalid units: #{units}"
|
387
384
|
end
|
388
385
|
rescue => e
|
389
|
-
|
390
|
-
"network_resiliency.error",
|
391
|
-
tags: {
|
392
|
-
method: __method__,
|
393
|
-
type: e.class,
|
394
|
-
},
|
395
|
-
)
|
396
|
-
|
397
|
-
warn "[ERROR] NetworkResiliency: #{e.class}: #{e.message}"
|
386
|
+
warn(__method__, e)
|
398
387
|
|
399
388
|
default
|
400
389
|
end
|
@@ -414,4 +403,16 @@ module NetworkResiliency
|
|
414
403
|
def thread_state
|
415
404
|
Thread.current["network_resiliency"] ||= {}
|
416
405
|
end
|
406
|
+
|
407
|
+
def warn(method, e)
|
408
|
+
NetworkResiliency.statsd&.increment(
|
409
|
+
"network_resiliency.error",
|
410
|
+
tags: {
|
411
|
+
method: method,
|
412
|
+
type: e.class,
|
413
|
+
},
|
414
|
+
)
|
415
|
+
|
416
|
+
Kernel.warn "[ERROR] NetworkResiliency #{method}: #{e.class}: #{e.message}"
|
417
|
+
end
|
417
418
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: network_resiliency
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.7.
|
4
|
+
version: 0.7.11
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Daniel Pepper
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-02-
|
11
|
+
date: 2024-02-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: byebug
|