network_resiliency 0.7.10 → 0.7.12
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/Gemfile.lock +1 -1
- data/README.md +5 -0
- data/lib/network_resiliency/power_stats.rb +132 -0
- data/lib/network_resiliency/stats.rb +15 -7
- data/lib/network_resiliency/version.rb +1 -1
- data/lib/network_resiliency.rb +27 -24
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 93c15344f1a3f02c3ef567baeb1650745b27286a92db1975368a2066e29df423
|
4
|
+
data.tar.gz: '092ee5a69a9f6b3e2e299946404c83353bb142e7d6d596d7fe4d556640500c5f'
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9a84c796661456dd272fa052647ec99991956f5d69435a8dd046a93634de0bbfdc5100cd7cb9b8dfb54d3641d18c71cb5b0728c70448ed6de40fa2bd181eb02e
|
7
|
+
data.tar.gz: d9487add703cd5c2ef71abbed8d913619338e240faceaaeac8621c58dbfe2af6203029328f37875f75315d94f91498a6283e252f3c3703fefd12b546e46745c1
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,13 @@
|
|
1
|
+
### v0.7.12 (2024-02-23)
|
2
|
+
- improve metric sampling
|
3
|
+
- lower resiliency threshold
|
4
|
+
- power buckets
|
5
|
+
|
6
|
+
### v0.7.11 (2024-02-20)
|
7
|
+
- lower dynamic timeout
|
8
|
+
- fix metric specs
|
9
|
+
- extend cache ttl
|
10
|
+
|
1
11
|
### v0.7.10 (2024-02-15)
|
2
12
|
- ddog sampling
|
3
13
|
- reconnect redis
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -39,3 +39,8 @@ Yes please :)
|
|
39
39
|
https://github.com/lostisland/faraday-retry/blob/main/lib/faraday/retry/middleware.rb
|
40
40
|
|
41
41
|
https://github.com/ankane/the-ultimate-guide-to-ruby-timeouts
|
42
|
+
|
43
|
+
|
44
|
+
https://reprep.io/writings/20220326_timeouts_deadline_propagation.html
|
45
|
+
|
46
|
+
https://grpc.io/blog/deadlines
|
@@ -0,0 +1,132 @@
|
|
1
|
+
require "network_resiliency/refinements"
|
2
|
+
|
3
|
+
using NetworkResiliency::Refinements
|
4
|
+
|
5
|
+
module NetworkResiliency
|
6
|
+
class PowerStats
|
7
|
+
MIN_VALUE = 1
|
8
|
+
LOCK = Thread::Mutex.new
|
9
|
+
STATS = {}
|
10
|
+
|
11
|
+
attr_reader :n
|
12
|
+
|
13
|
+
class << self
|
14
|
+
def [](key)
|
15
|
+
LOCK.synchronize { STATS[key] ||= new }
|
16
|
+
end
|
17
|
+
|
18
|
+
def reset
|
19
|
+
LOCK.synchronize { STATS.clear }
|
20
|
+
end
|
21
|
+
|
22
|
+
private
|
23
|
+
|
24
|
+
def synchronize(fn_name)
|
25
|
+
fn = instance_method(fn_name)
|
26
|
+
|
27
|
+
define_method(fn_name) do |*args|
|
28
|
+
@lock.synchronize { fn.bind(self).call(*args) }
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def initialize(values = [])
|
34
|
+
@lock = Thread::Mutex.new
|
35
|
+
reset
|
36
|
+
|
37
|
+
values.each {|x| add(x) }
|
38
|
+
end
|
39
|
+
|
40
|
+
def <<(value)
|
41
|
+
case value
|
42
|
+
when Array
|
43
|
+
value.each {|x| add(x) }
|
44
|
+
when self.class
|
45
|
+
merge!(value)
|
46
|
+
else
|
47
|
+
add(value)
|
48
|
+
end
|
49
|
+
|
50
|
+
self
|
51
|
+
end
|
52
|
+
|
53
|
+
synchronize def add(value)
|
54
|
+
raise ArgumentError, "Numeric expected, found #{value.class}" unless value.is_a?(Numeric)
|
55
|
+
|
56
|
+
value = [ value, MIN_VALUE ].max
|
57
|
+
i = Math.log10(value).ceil
|
58
|
+
|
59
|
+
@buckets[i] ||= 0
|
60
|
+
@buckets[i] += 1
|
61
|
+
@n += 1
|
62
|
+
end
|
63
|
+
|
64
|
+
synchronize def percentile(p)
|
65
|
+
raise ArgumentError, "Percentile must be between 0 and 100" unless p.between?(0, 100)
|
66
|
+
|
67
|
+
return 0 if @n == 0
|
68
|
+
|
69
|
+
threshold = ((100 - p) / 100.0 * @n).floor
|
70
|
+
index = @buckets.size - 1
|
71
|
+
|
72
|
+
while index >= 0
|
73
|
+
if @buckets[index]
|
74
|
+
break if @buckets[index] >= threshold
|
75
|
+
|
76
|
+
threshold -= @buckets[index]
|
77
|
+
end
|
78
|
+
|
79
|
+
index -= 1
|
80
|
+
end
|
81
|
+
|
82
|
+
10 ** index
|
83
|
+
end
|
84
|
+
alias_method :p, :percentile
|
85
|
+
|
86
|
+
def p99
|
87
|
+
percentile(99)
|
88
|
+
end
|
89
|
+
|
90
|
+
def merge(other)
|
91
|
+
dup.merge!(other)
|
92
|
+
end
|
93
|
+
alias_method :+, :merge
|
94
|
+
|
95
|
+
synchronize def merge!(other)
|
96
|
+
raise ArgumentError unless other.is_a?(self.class)
|
97
|
+
|
98
|
+
other_buckets = other.instance_variable_get(:@buckets)
|
99
|
+
|
100
|
+
if @n == 0
|
101
|
+
@n = other.n
|
102
|
+
@buckets = other_buckets.dup
|
103
|
+
elsif other.n > 0
|
104
|
+
@n += other.n
|
105
|
+
|
106
|
+
other_buckets.each_with_index do |count, i|
|
107
|
+
next unless count
|
108
|
+
|
109
|
+
@buckets[i] ||= 0
|
110
|
+
@buckets[i] += count
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
self
|
115
|
+
end
|
116
|
+
|
117
|
+
synchronize def scale!(percentage)
|
118
|
+
raise ArgumentError, "Numeric expected, found #{percentage.class}" unless percentage.is_a?(Numeric)
|
119
|
+
raise ArgumentError, "argument must be between 0 and 100" unless percentage.between?(0, 100)
|
120
|
+
|
121
|
+
factor = percentage / 100.0
|
122
|
+
|
123
|
+
@buckets.map! {|x| (x * factor).round if x }
|
124
|
+
@n = @buckets.compact.sum
|
125
|
+
end
|
126
|
+
|
127
|
+
synchronize def reset
|
128
|
+
@n = 0
|
129
|
+
@buckets = []
|
130
|
+
end
|
131
|
+
end
|
132
|
+
end
|
@@ -80,6 +80,16 @@ module NetworkResiliency
|
|
80
80
|
self
|
81
81
|
end
|
82
82
|
|
83
|
+
synchronize def scale!(percentage)
|
84
|
+
raise ArgumentError, "Numeric expected, found #{percentage.class}" unless percentage.is_a?(Numeric)
|
85
|
+
raise ArgumentError, "argument must be between 0 and 100" unless percentage.between?(0, 100)
|
86
|
+
|
87
|
+
factor = percentage / 100.0
|
88
|
+
|
89
|
+
@sq_dist *= factor
|
90
|
+
@n = (@n * factor).round
|
91
|
+
end
|
92
|
+
|
83
93
|
def ==(other)
|
84
94
|
return false unless other.is_a?(self.class)
|
85
95
|
|
@@ -94,10 +104,10 @@ module NetworkResiliency
|
|
94
104
|
@sq_dist = 0.0 # sum of squared distance from mean
|
95
105
|
end
|
96
106
|
|
97
|
-
MIN_SAMPLE_SIZE =
|
107
|
+
MIN_SAMPLE_SIZE = 300
|
98
108
|
MAX_WINDOW_LENGTH = 1000
|
99
109
|
STATS_TTL = 24 * 60 * 60 # 1 day
|
100
|
-
CACHE_TTL =
|
110
|
+
CACHE_TTL = 120 # seconds
|
101
111
|
|
102
112
|
LUA_SCRIPT = <<~LUA
|
103
113
|
local results = {}
|
@@ -189,11 +199,9 @@ module NetworkResiliency
|
|
189
199
|
end
|
190
200
|
|
191
201
|
res = redis.eval(LUA_SCRIPT, keys, args)
|
192
|
-
data.keys.zip(res.each_slice(3)).
|
193
|
-
n, avg, sq_dist
|
194
|
-
|
195
|
-
[ key, Stats.from(n: n, avg: avg, sq_dist: sq_dist) ]
|
196
|
-
end.to_h
|
202
|
+
data.keys.zip(res.each_slice(3)).to_h.transform_values! do |n, avg, sq_dist|
|
203
|
+
Stats.from(n: n, avg: avg, sq_dist: sq_dist)
|
204
|
+
end
|
197
205
|
end
|
198
206
|
|
199
207
|
def self.fetch(redis, keys)
|
data/lib/network_resiliency.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
require "network_resiliency/power_stats"
|
1
2
|
require "network_resiliency/refinements"
|
2
3
|
require "network_resiliency/stats"
|
3
4
|
require "network_resiliency/stats_engine"
|
@@ -19,7 +20,7 @@ module NetworkResiliency
|
|
19
20
|
ACTIONS = [ :connect, :request ].freeze
|
20
21
|
ADAPTERS = [ :http, :faraday, :redis, :mysql, :postgres, :rails ].freeze
|
21
22
|
MODE = [ :observe, :resilient ].freeze
|
22
|
-
RESILIENCY_SIZE_THRESHOLD =
|
23
|
+
RESILIENCY_SIZE_THRESHOLD = 300
|
23
24
|
SAMPLE_RATE = {
|
24
25
|
timeout: 0.1,
|
25
26
|
stats: 0.1,
|
@@ -266,26 +267,28 @@ module NetworkResiliency
|
|
266
267
|
# ensure Syncer is running
|
267
268
|
Syncer.start
|
268
269
|
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
270
|
+
if rand < SAMPLE_RATE[:stats]
|
271
|
+
NetworkResiliency.statsd&.distribution(
|
272
|
+
"network_resiliency.#{action}.stats.n",
|
273
|
+
stats.n,
|
274
|
+
tags: tags,
|
275
|
+
sample_rate: SAMPLE_RATE[:stats],
|
276
|
+
)
|
275
277
|
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
278
|
+
NetworkResiliency.statsd&.distribution(
|
279
|
+
"network_resiliency.#{action}.stats.avg",
|
280
|
+
stats.avg,
|
281
|
+
tags: tags,
|
282
|
+
sample_rate: SAMPLE_RATE[:stats],
|
283
|
+
)
|
282
284
|
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
285
|
+
NetworkResiliency.statsd&.distribution(
|
286
|
+
"network_resiliency.#{action}.stats.stdev",
|
287
|
+
stats.stdev,
|
288
|
+
tags: tags,
|
289
|
+
sample_rate: SAMPLE_RATE[:stats],
|
290
|
+
)
|
291
|
+
end
|
289
292
|
end
|
290
293
|
|
291
294
|
nil
|
@@ -316,7 +319,7 @@ module NetworkResiliency
|
|
316
319
|
destination: destination,
|
317
320
|
}
|
318
321
|
|
319
|
-
p99 = (stats.avg + stats.stdev *
|
322
|
+
p99 = (stats.avg + stats.stdev * 2).order_of_magnitude(ceil: true)
|
320
323
|
|
321
324
|
timeouts = []
|
322
325
|
|
@@ -337,7 +340,7 @@ module NetworkResiliency
|
|
337
340
|
"network_resiliency.timeout.raised",
|
338
341
|
tags: tags,
|
339
342
|
sample_rate: SAMPLE_RATE[:timeout],
|
340
|
-
)
|
343
|
+
) if rand < SAMPLE_RATE[:timeout]
|
341
344
|
end
|
342
345
|
else
|
343
346
|
# the specified timeout is less than our expected p99...awkward
|
@@ -347,7 +350,7 @@ module NetworkResiliency
|
|
347
350
|
"network_resiliency.timeout.too_low",
|
348
351
|
tags: tags,
|
349
352
|
sample_rate: SAMPLE_RATE[:timeout],
|
350
|
-
)
|
353
|
+
) if rand < SAMPLE_RATE[:timeout]
|
351
354
|
end
|
352
355
|
else
|
353
356
|
timeouts << p99
|
@@ -361,7 +364,7 @@ module NetworkResiliency
|
|
361
364
|
"network_resiliency.timeout.missing",
|
362
365
|
tags: tags,
|
363
366
|
sample_rate: SAMPLE_RATE[:timeout],
|
364
|
-
)
|
367
|
+
) if rand < SAMPLE_RATE[:timeout]
|
365
368
|
end
|
366
369
|
|
367
370
|
NetworkResiliency.statsd&.distribution(
|
@@ -372,7 +375,7 @@ module NetworkResiliency
|
|
372
375
|
destination: destination,
|
373
376
|
},
|
374
377
|
sample_rate: SAMPLE_RATE[:timeout],
|
375
|
-
)
|
378
|
+
) if rand < SAMPLE_RATE[:timeout]
|
376
379
|
|
377
380
|
case units
|
378
381
|
when nil, :ms, :milliseconds
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: network_resiliency
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.7.
|
4
|
+
version: 0.7.12
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Daniel Pepper
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-02-
|
11
|
+
date: 2024-02-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: byebug
|
@@ -238,6 +238,7 @@ files:
|
|
238
238
|
- lib/network_resiliency/adapter/postgres.rb
|
239
239
|
- lib/network_resiliency/adapter/rails.rb
|
240
240
|
- lib/network_resiliency/adapter/redis.rb
|
241
|
+
- lib/network_resiliency/power_stats.rb
|
241
242
|
- lib/network_resiliency/refinements.rb
|
242
243
|
- lib/network_resiliency/stats.rb
|
243
244
|
- lib/network_resiliency/stats_engine.rb
|