network_resiliency 0.7.11 → 0.7.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/Gemfile.lock +1 -1
- data/README.md +5 -0
- data/lib/network_resiliency/power_stats.rb +132 -0
- data/lib/network_resiliency/stats.rb +11 -1
- data/lib/network_resiliency/syncer.rb +3 -1
- data/lib/network_resiliency/version.rb +1 -1
- data/lib/network_resiliency.rb +32 -23
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c006caf6d6fefeca3185a0a6a1d75be6bcf1cbc16ed9e1b00f2b147114b8442f
|
4
|
+
data.tar.gz: cad33936cc88b7d586b9b1ca92ab36308e37af11768509e67fb6f20e2af354ba
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e4eaf6bf8b4a4b176a8bd2ae3b79a80fc1a9b01c0d0a6dc486e870d656eb3793c1626dd923c2afd1c2b06774012067d77815a1710e5de44f06d79f098dd8db2b
|
7
|
+
data.tar.gz: 36d9406a01c76ca09d0859194401bbd49a177f644c1ca38d5df3c1fb8ad27907304d8d17ad2c165ced7dcfc67b467ee4e2061ce63e80a884fc8a5a372e994f2b
|
data/CHANGELOG.md
CHANGED
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -39,3 +39,8 @@ Yes please :)
|
|
39
39
|
https://github.com/lostisland/faraday-retry/blob/main/lib/faraday/retry/middleware.rb
|
40
40
|
|
41
41
|
https://github.com/ankane/the-ultimate-guide-to-ruby-timeouts
|
42
|
+
|
43
|
+
|
44
|
+
https://reprep.io/writings/20220326_timeouts_deadline_propagation.html
|
45
|
+
|
46
|
+
https://grpc.io/blog/deadlines
|
@@ -0,0 +1,132 @@
|
|
1
|
+
require "network_resiliency/refinements"
|
2
|
+
|
3
|
+
using NetworkResiliency::Refinements
|
4
|
+
|
5
|
+
module NetworkResiliency
|
6
|
+
class PowerStats
|
7
|
+
MIN_VALUE = 1
|
8
|
+
LOCK = Thread::Mutex.new
|
9
|
+
STATS = {}
|
10
|
+
|
11
|
+
attr_reader :n
|
12
|
+
|
13
|
+
class << self
|
14
|
+
def [](key)
|
15
|
+
LOCK.synchronize { STATS[key] ||= new }
|
16
|
+
end
|
17
|
+
|
18
|
+
def reset
|
19
|
+
LOCK.synchronize { STATS.clear }
|
20
|
+
end
|
21
|
+
|
22
|
+
private
|
23
|
+
|
24
|
+
def synchronize(fn_name)
|
25
|
+
fn = instance_method(fn_name)
|
26
|
+
|
27
|
+
define_method(fn_name) do |*args|
|
28
|
+
@lock.synchronize { fn.bind(self).call(*args) }
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def initialize(values = [])
|
34
|
+
@lock = Thread::Mutex.new
|
35
|
+
reset
|
36
|
+
|
37
|
+
values.each {|x| add(x) }
|
38
|
+
end
|
39
|
+
|
40
|
+
def <<(value)
|
41
|
+
case value
|
42
|
+
when Array
|
43
|
+
value.each {|x| add(x) }
|
44
|
+
when self.class
|
45
|
+
merge!(value)
|
46
|
+
else
|
47
|
+
add(value)
|
48
|
+
end
|
49
|
+
|
50
|
+
self
|
51
|
+
end
|
52
|
+
|
53
|
+
synchronize def add(value)
|
54
|
+
raise ArgumentError, "Numeric expected, found #{value.class}" unless value.is_a?(Numeric)
|
55
|
+
|
56
|
+
value = [ value, MIN_VALUE ].max
|
57
|
+
i = Math.log10(value).ceil
|
58
|
+
|
59
|
+
@buckets[i] ||= 0
|
60
|
+
@buckets[i] += 1
|
61
|
+
@n += 1
|
62
|
+
end
|
63
|
+
|
64
|
+
synchronize def percentile(p)
|
65
|
+
raise ArgumentError, "Percentile must be between 0 and 100" unless p.between?(0, 100)
|
66
|
+
|
67
|
+
return 0 if @n == 0
|
68
|
+
|
69
|
+
threshold = ((100 - p) / 100.0 * @n).floor
|
70
|
+
index = @buckets.size - 1
|
71
|
+
|
72
|
+
while index >= 0
|
73
|
+
if @buckets[index]
|
74
|
+
break if @buckets[index] >= threshold
|
75
|
+
|
76
|
+
threshold -= @buckets[index]
|
77
|
+
end
|
78
|
+
|
79
|
+
index -= 1
|
80
|
+
end
|
81
|
+
|
82
|
+
10 ** index
|
83
|
+
end
|
84
|
+
alias_method :p, :percentile
|
85
|
+
|
86
|
+
def p99
|
87
|
+
percentile(99)
|
88
|
+
end
|
89
|
+
|
90
|
+
def merge(other)
|
91
|
+
dup.merge!(other)
|
92
|
+
end
|
93
|
+
alias_method :+, :merge
|
94
|
+
|
95
|
+
synchronize def merge!(other)
|
96
|
+
raise ArgumentError unless other.is_a?(self.class)
|
97
|
+
|
98
|
+
other_buckets = other.instance_variable_get(:@buckets)
|
99
|
+
|
100
|
+
if @n == 0
|
101
|
+
@n = other.n
|
102
|
+
@buckets = other_buckets.dup
|
103
|
+
elsif other.n > 0
|
104
|
+
@n += other.n
|
105
|
+
|
106
|
+
other_buckets.each_with_index do |count, i|
|
107
|
+
next unless count
|
108
|
+
|
109
|
+
@buckets[i] ||= 0
|
110
|
+
@buckets[i] += count
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
self
|
115
|
+
end
|
116
|
+
|
117
|
+
synchronize def scale!(percentage)
|
118
|
+
raise ArgumentError, "Numeric expected, found #{percentage.class}" unless percentage.is_a?(Numeric)
|
119
|
+
raise ArgumentError, "argument must be between 0 and 100" unless percentage.between?(0, 100)
|
120
|
+
|
121
|
+
factor = percentage / 100.0
|
122
|
+
|
123
|
+
@buckets.map! {|x| (x * factor).round if x }
|
124
|
+
@n = @buckets.compact.sum
|
125
|
+
end
|
126
|
+
|
127
|
+
synchronize def reset
|
128
|
+
@n = 0
|
129
|
+
@buckets = []
|
130
|
+
end
|
131
|
+
end
|
132
|
+
end
|
@@ -80,6 +80,16 @@ module NetworkResiliency
|
|
80
80
|
self
|
81
81
|
end
|
82
82
|
|
83
|
+
synchronize def scale!(percentage)
|
84
|
+
raise ArgumentError, "Numeric expected, found #{percentage.class}" unless percentage.is_a?(Numeric)
|
85
|
+
raise ArgumentError, "argument must be between 0 and 100" unless percentage.between?(0, 100)
|
86
|
+
|
87
|
+
factor = percentage / 100.0
|
88
|
+
|
89
|
+
@sq_dist *= factor
|
90
|
+
@n = (@n * factor).round
|
91
|
+
end
|
92
|
+
|
83
93
|
def ==(other)
|
84
94
|
return false unless other.is_a?(self.class)
|
85
95
|
|
@@ -94,7 +104,7 @@ module NetworkResiliency
|
|
94
104
|
@sq_dist = 0.0 # sum of squared distance from mean
|
95
105
|
end
|
96
106
|
|
97
|
-
MIN_SAMPLE_SIZE =
|
107
|
+
MIN_SAMPLE_SIZE = 300
|
98
108
|
MAX_WINDOW_LENGTH = 1000
|
99
109
|
STATS_TTL = 24 * 60 * 60 # 1 day
|
100
110
|
CACHE_TTL = 120 # seconds
|
@@ -50,7 +50,9 @@ module NetworkResiliency
|
|
50
50
|
NetworkResiliency.redis.disconnect! if NetworkResiliency.redis.connected?
|
51
51
|
|
52
52
|
until @shutdown
|
53
|
-
|
53
|
+
NetworkResiliency.redis.with_reconnect do
|
54
|
+
StatsEngine.sync(NetworkResiliency.redis)
|
55
|
+
end
|
54
56
|
|
55
57
|
sleep(SLEEP_DURATION)
|
56
58
|
end
|
data/lib/network_resiliency.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
require "network_resiliency/power_stats"
|
1
2
|
require "network_resiliency/refinements"
|
2
3
|
require "network_resiliency/stats"
|
3
4
|
require "network_resiliency/stats_engine"
|
@@ -19,7 +20,7 @@ module NetworkResiliency
|
|
19
20
|
ACTIONS = [ :connect, :request ].freeze
|
20
21
|
ADAPTERS = [ :http, :faraday, :redis, :mysql, :postgres, :rails ].freeze
|
21
22
|
MODE = [ :observe, :resilient ].freeze
|
22
|
-
RESILIENCY_SIZE_THRESHOLD =
|
23
|
+
RESILIENCY_SIZE_THRESHOLD = 300
|
23
24
|
SAMPLE_RATE = {
|
24
25
|
timeout: 0.1,
|
25
26
|
stats: 0.1,
|
@@ -256,6 +257,12 @@ module NetworkResiliency
|
|
256
257
|
# record stats
|
257
258
|
key = [ adapter, action, destination ].join(":")
|
258
259
|
stats = StatsEngine.add(key, duration)
|
260
|
+
|
261
|
+
if stats.n > RESILIENCY_SIZE_THRESHOLD * 4
|
262
|
+
# downsample to age out old stats
|
263
|
+
stats.scale!(50)
|
264
|
+
end
|
265
|
+
|
259
266
|
tags = {
|
260
267
|
adapter: adapter,
|
261
268
|
destination: destination,
|
@@ -266,26 +273,28 @@ module NetworkResiliency
|
|
266
273
|
# ensure Syncer is running
|
267
274
|
Syncer.start
|
268
275
|
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
276
|
+
if rand < SAMPLE_RATE[:stats]
|
277
|
+
NetworkResiliency.statsd&.distribution(
|
278
|
+
"network_resiliency.#{action}.stats.n",
|
279
|
+
stats.n,
|
280
|
+
tags: tags,
|
281
|
+
sample_rate: SAMPLE_RATE[:stats],
|
282
|
+
)
|
275
283
|
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
284
|
+
NetworkResiliency.statsd&.distribution(
|
285
|
+
"network_resiliency.#{action}.stats.avg",
|
286
|
+
stats.avg,
|
287
|
+
tags: tags,
|
288
|
+
sample_rate: SAMPLE_RATE[:stats],
|
289
|
+
)
|
282
290
|
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
291
|
+
NetworkResiliency.statsd&.distribution(
|
292
|
+
"network_resiliency.#{action}.stats.stdev",
|
293
|
+
stats.stdev,
|
294
|
+
tags: tags,
|
295
|
+
sample_rate: SAMPLE_RATE[:stats],
|
296
|
+
)
|
297
|
+
end
|
289
298
|
end
|
290
299
|
|
291
300
|
nil
|
@@ -337,7 +346,7 @@ module NetworkResiliency
|
|
337
346
|
"network_resiliency.timeout.raised",
|
338
347
|
tags: tags,
|
339
348
|
sample_rate: SAMPLE_RATE[:timeout],
|
340
|
-
)
|
349
|
+
) if rand < SAMPLE_RATE[:timeout]
|
341
350
|
end
|
342
351
|
else
|
343
352
|
# the specified timeout is less than our expected p99...awkward
|
@@ -347,7 +356,7 @@ module NetworkResiliency
|
|
347
356
|
"network_resiliency.timeout.too_low",
|
348
357
|
tags: tags,
|
349
358
|
sample_rate: SAMPLE_RATE[:timeout],
|
350
|
-
)
|
359
|
+
) if rand < SAMPLE_RATE[:timeout]
|
351
360
|
end
|
352
361
|
else
|
353
362
|
timeouts << p99
|
@@ -361,7 +370,7 @@ module NetworkResiliency
|
|
361
370
|
"network_resiliency.timeout.missing",
|
362
371
|
tags: tags,
|
363
372
|
sample_rate: SAMPLE_RATE[:timeout],
|
364
|
-
)
|
373
|
+
) if rand < SAMPLE_RATE[:timeout]
|
365
374
|
end
|
366
375
|
|
367
376
|
NetworkResiliency.statsd&.distribution(
|
@@ -372,7 +381,7 @@ module NetworkResiliency
|
|
372
381
|
destination: destination,
|
373
382
|
},
|
374
383
|
sample_rate: SAMPLE_RATE[:timeout],
|
375
|
-
)
|
384
|
+
) if rand < SAMPLE_RATE[:timeout]
|
376
385
|
|
377
386
|
case units
|
378
387
|
when nil, :ms, :milliseconds
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: network_resiliency
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.7.
|
4
|
+
version: 0.7.13
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Daniel Pepper
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-03-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: byebug
|
@@ -238,6 +238,7 @@ files:
|
|
238
238
|
- lib/network_resiliency/adapter/postgres.rb
|
239
239
|
- lib/network_resiliency/adapter/rails.rb
|
240
240
|
- lib/network_resiliency/adapter/redis.rb
|
241
|
+
- lib/network_resiliency/power_stats.rb
|
241
242
|
- lib/network_resiliency/refinements.rb
|
242
243
|
- lib/network_resiliency/stats.rb
|
243
244
|
- lib/network_resiliency/stats_engine.rb
|