network_resiliency 0.7.11 → 0.7.13
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/Gemfile.lock +1 -1
- data/README.md +5 -0
- data/lib/network_resiliency/power_stats.rb +132 -0
- data/lib/network_resiliency/stats.rb +11 -1
- data/lib/network_resiliency/syncer.rb +3 -1
- data/lib/network_resiliency/version.rb +1 -1
- data/lib/network_resiliency.rb +32 -23
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c006caf6d6fefeca3185a0a6a1d75be6bcf1cbc16ed9e1b00f2b147114b8442f
|
4
|
+
data.tar.gz: cad33936cc88b7d586b9b1ca92ab36308e37af11768509e67fb6f20e2af354ba
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e4eaf6bf8b4a4b176a8bd2ae3b79a80fc1a9b01c0d0a6dc486e870d656eb3793c1626dd923c2afd1c2b06774012067d77815a1710e5de44f06d79f098dd8db2b
|
7
|
+
data.tar.gz: 36d9406a01c76ca09d0859194401bbd49a177f644c1ca38d5df3c1fb8ad27907304d8d17ad2c165ced7dcfc67b467ee4e2061ce63e80a884fc8a5a372e994f2b
|
data/CHANGELOG.md
CHANGED
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -39,3 +39,8 @@ Yes please :)
|
|
39
39
|
https://github.com/lostisland/faraday-retry/blob/main/lib/faraday/retry/middleware.rb
|
40
40
|
|
41
41
|
https://github.com/ankane/the-ultimate-guide-to-ruby-timeouts
|
42
|
+
|
43
|
+
|
44
|
+
https://reprep.io/writings/20220326_timeouts_deadline_propagation.html
|
45
|
+
|
46
|
+
https://grpc.io/blog/deadlines
|
@@ -0,0 +1,132 @@
|
|
1
|
+
require "network_resiliency/refinements"
|
2
|
+
|
3
|
+
using NetworkResiliency::Refinements
|
4
|
+
|
5
|
+
module NetworkResiliency
|
6
|
+
class PowerStats
|
7
|
+
MIN_VALUE = 1
|
8
|
+
LOCK = Thread::Mutex.new
|
9
|
+
STATS = {}
|
10
|
+
|
11
|
+
attr_reader :n
|
12
|
+
|
13
|
+
class << self
|
14
|
+
def [](key)
|
15
|
+
LOCK.synchronize { STATS[key] ||= new }
|
16
|
+
end
|
17
|
+
|
18
|
+
def reset
|
19
|
+
LOCK.synchronize { STATS.clear }
|
20
|
+
end
|
21
|
+
|
22
|
+
private
|
23
|
+
|
24
|
+
def synchronize(fn_name)
|
25
|
+
fn = instance_method(fn_name)
|
26
|
+
|
27
|
+
define_method(fn_name) do |*args|
|
28
|
+
@lock.synchronize { fn.bind(self).call(*args) }
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def initialize(values = [])
|
34
|
+
@lock = Thread::Mutex.new
|
35
|
+
reset
|
36
|
+
|
37
|
+
values.each {|x| add(x) }
|
38
|
+
end
|
39
|
+
|
40
|
+
def <<(value)
|
41
|
+
case value
|
42
|
+
when Array
|
43
|
+
value.each {|x| add(x) }
|
44
|
+
when self.class
|
45
|
+
merge!(value)
|
46
|
+
else
|
47
|
+
add(value)
|
48
|
+
end
|
49
|
+
|
50
|
+
self
|
51
|
+
end
|
52
|
+
|
53
|
+
synchronize def add(value)
|
54
|
+
raise ArgumentError, "Numeric expected, found #{value.class}" unless value.is_a?(Numeric)
|
55
|
+
|
56
|
+
value = [ value, MIN_VALUE ].max
|
57
|
+
i = Math.log10(value).ceil
|
58
|
+
|
59
|
+
@buckets[i] ||= 0
|
60
|
+
@buckets[i] += 1
|
61
|
+
@n += 1
|
62
|
+
end
|
63
|
+
|
64
|
+
synchronize def percentile(p)
|
65
|
+
raise ArgumentError, "Percentile must be between 0 and 100" unless p.between?(0, 100)
|
66
|
+
|
67
|
+
return 0 if @n == 0
|
68
|
+
|
69
|
+
threshold = ((100 - p) / 100.0 * @n).floor
|
70
|
+
index = @buckets.size - 1
|
71
|
+
|
72
|
+
while index >= 0
|
73
|
+
if @buckets[index]
|
74
|
+
break if @buckets[index] >= threshold
|
75
|
+
|
76
|
+
threshold -= @buckets[index]
|
77
|
+
end
|
78
|
+
|
79
|
+
index -= 1
|
80
|
+
end
|
81
|
+
|
82
|
+
10 ** index
|
83
|
+
end
|
84
|
+
alias_method :p, :percentile
|
85
|
+
|
86
|
+
def p99
|
87
|
+
percentile(99)
|
88
|
+
end
|
89
|
+
|
90
|
+
def merge(other)
|
91
|
+
dup.merge!(other)
|
92
|
+
end
|
93
|
+
alias_method :+, :merge
|
94
|
+
|
95
|
+
synchronize def merge!(other)
|
96
|
+
raise ArgumentError unless other.is_a?(self.class)
|
97
|
+
|
98
|
+
other_buckets = other.instance_variable_get(:@buckets)
|
99
|
+
|
100
|
+
if @n == 0
|
101
|
+
@n = other.n
|
102
|
+
@buckets = other_buckets.dup
|
103
|
+
elsif other.n > 0
|
104
|
+
@n += other.n
|
105
|
+
|
106
|
+
other_buckets.each_with_index do |count, i|
|
107
|
+
next unless count
|
108
|
+
|
109
|
+
@buckets[i] ||= 0
|
110
|
+
@buckets[i] += count
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
self
|
115
|
+
end
|
116
|
+
|
117
|
+
synchronize def scale!(percentage)
|
118
|
+
raise ArgumentError, "Numeric expected, found #{percentage.class}" unless percentage.is_a?(Numeric)
|
119
|
+
raise ArgumentError, "argument must be between 0 and 100" unless percentage.between?(0, 100)
|
120
|
+
|
121
|
+
factor = percentage / 100.0
|
122
|
+
|
123
|
+
@buckets.map! {|x| (x * factor).round if x }
|
124
|
+
@n = @buckets.compact.sum
|
125
|
+
end
|
126
|
+
|
127
|
+
synchronize def reset
|
128
|
+
@n = 0
|
129
|
+
@buckets = []
|
130
|
+
end
|
131
|
+
end
|
132
|
+
end
|
@@ -80,6 +80,16 @@ module NetworkResiliency
|
|
80
80
|
self
|
81
81
|
end
|
82
82
|
|
83
|
+
synchronize def scale!(percentage)
|
84
|
+
raise ArgumentError, "Numeric expected, found #{percentage.class}" unless percentage.is_a?(Numeric)
|
85
|
+
raise ArgumentError, "argument must be between 0 and 100" unless percentage.between?(0, 100)
|
86
|
+
|
87
|
+
factor = percentage / 100.0
|
88
|
+
|
89
|
+
@sq_dist *= factor
|
90
|
+
@n = (@n * factor).round
|
91
|
+
end
|
92
|
+
|
83
93
|
def ==(other)
|
84
94
|
return false unless other.is_a?(self.class)
|
85
95
|
|
@@ -94,7 +104,7 @@ module NetworkResiliency
|
|
94
104
|
@sq_dist = 0.0 # sum of squared distance from mean
|
95
105
|
end
|
96
106
|
|
97
|
-
MIN_SAMPLE_SIZE =
|
107
|
+
MIN_SAMPLE_SIZE = 300
|
98
108
|
MAX_WINDOW_LENGTH = 1000
|
99
109
|
STATS_TTL = 24 * 60 * 60 # 1 day
|
100
110
|
CACHE_TTL = 120 # seconds
|
@@ -50,7 +50,9 @@ module NetworkResiliency
|
|
50
50
|
NetworkResiliency.redis.disconnect! if NetworkResiliency.redis.connected?
|
51
51
|
|
52
52
|
until @shutdown
|
53
|
-
|
53
|
+
NetworkResiliency.redis.with_reconnect do
|
54
|
+
StatsEngine.sync(NetworkResiliency.redis)
|
55
|
+
end
|
54
56
|
|
55
57
|
sleep(SLEEP_DURATION)
|
56
58
|
end
|
data/lib/network_resiliency.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
require "network_resiliency/power_stats"
|
1
2
|
require "network_resiliency/refinements"
|
2
3
|
require "network_resiliency/stats"
|
3
4
|
require "network_resiliency/stats_engine"
|
@@ -19,7 +20,7 @@ module NetworkResiliency
|
|
19
20
|
ACTIONS = [ :connect, :request ].freeze
|
20
21
|
ADAPTERS = [ :http, :faraday, :redis, :mysql, :postgres, :rails ].freeze
|
21
22
|
MODE = [ :observe, :resilient ].freeze
|
22
|
-
RESILIENCY_SIZE_THRESHOLD =
|
23
|
+
RESILIENCY_SIZE_THRESHOLD = 300
|
23
24
|
SAMPLE_RATE = {
|
24
25
|
timeout: 0.1,
|
25
26
|
stats: 0.1,
|
@@ -256,6 +257,12 @@ module NetworkResiliency
|
|
256
257
|
# record stats
|
257
258
|
key = [ adapter, action, destination ].join(":")
|
258
259
|
stats = StatsEngine.add(key, duration)
|
260
|
+
|
261
|
+
if stats.n > RESILIENCY_SIZE_THRESHOLD * 4
|
262
|
+
# downsample to age out old stats
|
263
|
+
stats.scale!(50)
|
264
|
+
end
|
265
|
+
|
259
266
|
tags = {
|
260
267
|
adapter: adapter,
|
261
268
|
destination: destination,
|
@@ -266,26 +273,28 @@ module NetworkResiliency
|
|
266
273
|
# ensure Syncer is running
|
267
274
|
Syncer.start
|
268
275
|
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
276
|
+
if rand < SAMPLE_RATE[:stats]
|
277
|
+
NetworkResiliency.statsd&.distribution(
|
278
|
+
"network_resiliency.#{action}.stats.n",
|
279
|
+
stats.n,
|
280
|
+
tags: tags,
|
281
|
+
sample_rate: SAMPLE_RATE[:stats],
|
282
|
+
)
|
275
283
|
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
284
|
+
NetworkResiliency.statsd&.distribution(
|
285
|
+
"network_resiliency.#{action}.stats.avg",
|
286
|
+
stats.avg,
|
287
|
+
tags: tags,
|
288
|
+
sample_rate: SAMPLE_RATE[:stats],
|
289
|
+
)
|
282
290
|
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
291
|
+
NetworkResiliency.statsd&.distribution(
|
292
|
+
"network_resiliency.#{action}.stats.stdev",
|
293
|
+
stats.stdev,
|
294
|
+
tags: tags,
|
295
|
+
sample_rate: SAMPLE_RATE[:stats],
|
296
|
+
)
|
297
|
+
end
|
289
298
|
end
|
290
299
|
|
291
300
|
nil
|
@@ -337,7 +346,7 @@ module NetworkResiliency
|
|
337
346
|
"network_resiliency.timeout.raised",
|
338
347
|
tags: tags,
|
339
348
|
sample_rate: SAMPLE_RATE[:timeout],
|
340
|
-
)
|
349
|
+
) if rand < SAMPLE_RATE[:timeout]
|
341
350
|
end
|
342
351
|
else
|
343
352
|
# the specified timeout is less than our expected p99...awkward
|
@@ -347,7 +356,7 @@ module NetworkResiliency
|
|
347
356
|
"network_resiliency.timeout.too_low",
|
348
357
|
tags: tags,
|
349
358
|
sample_rate: SAMPLE_RATE[:timeout],
|
350
|
-
)
|
359
|
+
) if rand < SAMPLE_RATE[:timeout]
|
351
360
|
end
|
352
361
|
else
|
353
362
|
timeouts << p99
|
@@ -361,7 +370,7 @@ module NetworkResiliency
|
|
361
370
|
"network_resiliency.timeout.missing",
|
362
371
|
tags: tags,
|
363
372
|
sample_rate: SAMPLE_RATE[:timeout],
|
364
|
-
)
|
373
|
+
) if rand < SAMPLE_RATE[:timeout]
|
365
374
|
end
|
366
375
|
|
367
376
|
NetworkResiliency.statsd&.distribution(
|
@@ -372,7 +381,7 @@ module NetworkResiliency
|
|
372
381
|
destination: destination,
|
373
382
|
},
|
374
383
|
sample_rate: SAMPLE_RATE[:timeout],
|
375
|
-
)
|
384
|
+
) if rand < SAMPLE_RATE[:timeout]
|
376
385
|
|
377
386
|
case units
|
378
387
|
when nil, :ms, :milliseconds
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: network_resiliency
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.7.
|
4
|
+
version: 0.7.13
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Daniel Pepper
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-03-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: byebug
|
@@ -238,6 +238,7 @@ files:
|
|
238
238
|
- lib/network_resiliency/adapter/postgres.rb
|
239
239
|
- lib/network_resiliency/adapter/rails.rb
|
240
240
|
- lib/network_resiliency/adapter/redis.rb
|
241
|
+
- lib/network_resiliency/power_stats.rb
|
241
242
|
- lib/network_resiliency/refinements.rb
|
242
243
|
- lib/network_resiliency/stats.rb
|
243
244
|
- lib/network_resiliency/stats_engine.rb
|