network_resiliency 0.7.11 → 0.7.13

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1eec5786e7daaea4b6f3c285dffc1e4bf1c70d7096ce69d44f4cc4ea80111ea5
4
- data.tar.gz: bc3ca5e592f1a809bfb28453e43d49a64405a424861aef2c6c6745ef2f27ebb4
3
+ metadata.gz: c006caf6d6fefeca3185a0a6a1d75be6bcf1cbc16ed9e1b00f2b147114b8442f
4
+ data.tar.gz: cad33936cc88b7d586b9b1ca92ab36308e37af11768509e67fb6f20e2af354ba
5
5
  SHA512:
6
- metadata.gz: fa1af69b1f175fb708247ddd0e9501ae2b945909197b26c343347e113bb78e3e645c5217c5f74d63fda780a027b730746b8641b9f77cb5c31626bc2f985c60ff
7
- data.tar.gz: e9733ddd7978b321f8c49218de503f42b300aa269757559cac442dcefb43e0b136e1507360629e9aad1976cedd31c7c0ccd4e9a256c41962a990e50f046d3979
6
+ metadata.gz: e4eaf6bf8b4a4b176a8bd2ae3b79a80fc1a9b01c0d0a6dc486e870d656eb3793c1626dd923c2afd1c2b06774012067d77815a1710e5de44f06d79f098dd8db2b
7
+ data.tar.gz: 36d9406a01c76ca09d0859194401bbd49a177f644c1ca38d5df3c1fb8ad27907304d8d17ad2c165ced7dcfc67b467ee4e2061ce63e80a884fc8a5a372e994f2b
data/CHANGELOG.md CHANGED
@@ -1,3 +1,12 @@
1
+ ### v0.7.13 (2024-03-04)
2
+ - downsampling
3
+ - redis resiliency
4
+
5
+ ### v0.7.12 (2024-02-23)
6
+ - improve metric sampling
7
+ - lower resiliency threshold
8
+ - power buckets
9
+
1
10
  ### v0.7.11 (2024-02-20)
2
11
  - lower dynamic timeout
3
12
  - fix metric specs
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- network_resiliency (0.7.11)
4
+ network_resiliency (0.7.13)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
data/README.md CHANGED
@@ -39,3 +39,8 @@ Yes please :)
39
39
  https://github.com/lostisland/faraday-retry/blob/main/lib/faraday/retry/middleware.rb
40
40
 
41
41
  https://github.com/ankane/the-ultimate-guide-to-ruby-timeouts
42
+
43
+
44
+ https://reprep.io/writings/20220326_timeouts_deadline_propagation.html
45
+
46
+ https://grpc.io/blog/deadlines
@@ -0,0 +1,132 @@
1
+ require "network_resiliency/refinements"
2
+
3
+ using NetworkResiliency::Refinements
4
+
5
+ module NetworkResiliency
6
+ class PowerStats
7
+ MIN_VALUE = 1
8
+ LOCK = Thread::Mutex.new
9
+ STATS = {}
10
+
11
+ attr_reader :n
12
+
13
+ class << self
14
+ def [](key)
15
+ LOCK.synchronize { STATS[key] ||= new }
16
+ end
17
+
18
+ def reset
19
+ LOCK.synchronize { STATS.clear }
20
+ end
21
+
22
+ private
23
+
24
+ def synchronize(fn_name)
25
+ fn = instance_method(fn_name)
26
+
27
+ define_method(fn_name) do |*args|
28
+ @lock.synchronize { fn.bind(self).call(*args) }
29
+ end
30
+ end
31
+ end
32
+
33
+ def initialize(values = [])
34
+ @lock = Thread::Mutex.new
35
+ reset
36
+
37
+ values.each {|x| add(x) }
38
+ end
39
+
40
+ def <<(value)
41
+ case value
42
+ when Array
43
+ value.each {|x| add(x) }
44
+ when self.class
45
+ merge!(value)
46
+ else
47
+ add(value)
48
+ end
49
+
50
+ self
51
+ end
52
+
53
+ synchronize def add(value)
54
+ raise ArgumentError, "Numeric expected, found #{value.class}" unless value.is_a?(Numeric)
55
+
56
+ value = [ value, MIN_VALUE ].max
57
+ i = Math.log10(value).ceil
58
+
59
+ @buckets[i] ||= 0
60
+ @buckets[i] += 1
61
+ @n += 1
62
+ end
63
+
64
+ synchronize def percentile(p)
65
+ raise ArgumentError, "Percentile must be between 0 and 100" unless p.between?(0, 100)
66
+
67
+ return 0 if @n == 0
68
+
69
+ threshold = ((100 - p) / 100.0 * @n).floor
70
+ index = @buckets.size - 1
71
+
72
+ while index >= 0
73
+ if @buckets[index]
74
+ break if @buckets[index] >= threshold
75
+
76
+ threshold -= @buckets[index]
77
+ end
78
+
79
+ index -= 1
80
+ end
81
+
82
+ 10 ** index
83
+ end
84
+ alias_method :p, :percentile
85
+
86
+ def p99
87
+ percentile(99)
88
+ end
89
+
90
+ def merge(other)
91
+ dup.merge!(other)
92
+ end
93
+ alias_method :+, :merge
94
+
95
+ synchronize def merge!(other)
96
+ raise ArgumentError unless other.is_a?(self.class)
97
+
98
+ other_buckets = other.instance_variable_get(:@buckets)
99
+
100
+ if @n == 0
101
+ @n = other.n
102
+ @buckets = other_buckets.dup
103
+ elsif other.n > 0
104
+ @n += other.n
105
+
106
+ other_buckets.each_with_index do |count, i|
107
+ next unless count
108
+
109
+ @buckets[i] ||= 0
110
+ @buckets[i] += count
111
+ end
112
+ end
113
+
114
+ self
115
+ end
116
+
117
+ synchronize def scale!(percentage)
118
+ raise ArgumentError, "Numeric expected, found #{percentage.class}" unless percentage.is_a?(Numeric)
119
+ raise ArgumentError, "argument must be between 0 and 100" unless percentage.between?(0, 100)
120
+
121
+ factor = percentage / 100.0
122
+
123
+ @buckets.map! {|x| (x * factor).round if x }
124
+ @n = @buckets.compact.sum
125
+ end
126
+
127
+ synchronize def reset
128
+ @n = 0
129
+ @buckets = []
130
+ end
131
+ end
132
+ end
@@ -80,6 +80,16 @@ module NetworkResiliency
80
80
  self
81
81
  end
82
82
 
83
+ synchronize def scale!(percentage)
84
+ raise ArgumentError, "Numeric expected, found #{percentage.class}" unless percentage.is_a?(Numeric)
85
+ raise ArgumentError, "argument must be between 0 and 100" unless percentage.between?(0, 100)
86
+
87
+ factor = percentage / 100.0
88
+
89
+ @sq_dist *= factor
90
+ @n = (@n * factor).round
91
+ end
92
+
83
93
  def ==(other)
84
94
  return false unless other.is_a?(self.class)
85
95
 
@@ -94,7 +104,7 @@ module NetworkResiliency
94
104
  @sq_dist = 0.0 # sum of squared distance from mean
95
105
  end
96
106
 
97
- MIN_SAMPLE_SIZE = 1000
107
+ MIN_SAMPLE_SIZE = 300
98
108
  MAX_WINDOW_LENGTH = 1000
99
109
  STATS_TTL = 24 * 60 * 60 # 1 day
100
110
  CACHE_TTL = 120 # seconds
@@ -50,7 +50,9 @@ module NetworkResiliency
50
50
  NetworkResiliency.redis.disconnect! if NetworkResiliency.redis.connected?
51
51
 
52
52
  until @shutdown
53
- StatsEngine.sync(NetworkResiliency.redis)
53
+ NetworkResiliency.redis.with_reconnect do
54
+ StatsEngine.sync(NetworkResiliency.redis)
55
+ end
54
56
 
55
57
  sleep(SLEEP_DURATION)
56
58
  end
@@ -1,3 +1,3 @@
1
1
  module NetworkResiliency
2
- VERSION = "0.7.11"
2
+ VERSION = "0.7.13"
3
3
  end
@@ -1,3 +1,4 @@
1
+ require "network_resiliency/power_stats"
1
2
  require "network_resiliency/refinements"
2
3
  require "network_resiliency/stats"
3
4
  require "network_resiliency/stats_engine"
@@ -19,7 +20,7 @@ module NetworkResiliency
19
20
  ACTIONS = [ :connect, :request ].freeze
20
21
  ADAPTERS = [ :http, :faraday, :redis, :mysql, :postgres, :rails ].freeze
21
22
  MODE = [ :observe, :resilient ].freeze
22
- RESILIENCY_SIZE_THRESHOLD = 1_000
23
+ RESILIENCY_SIZE_THRESHOLD = 300
23
24
  SAMPLE_RATE = {
24
25
  timeout: 0.1,
25
26
  stats: 0.1,
@@ -256,6 +257,12 @@ module NetworkResiliency
256
257
  # record stats
257
258
  key = [ adapter, action, destination ].join(":")
258
259
  stats = StatsEngine.add(key, duration)
260
+
261
+ if stats.n > RESILIENCY_SIZE_THRESHOLD * 4
262
+ # downsample to age out old stats
263
+ stats.scale!(50)
264
+ end
265
+
259
266
  tags = {
260
267
  adapter: adapter,
261
268
  destination: destination,
@@ -266,26 +273,28 @@ module NetworkResiliency
266
273
  # ensure Syncer is running
267
274
  Syncer.start
268
275
 
269
- NetworkResiliency.statsd&.distribution(
270
- "network_resiliency.#{action}.stats.n",
271
- stats.n,
272
- tags: tags,
273
- sample_rate: SAMPLE_RATE[:stats],
274
- )
276
+ if rand < SAMPLE_RATE[:stats]
277
+ NetworkResiliency.statsd&.distribution(
278
+ "network_resiliency.#{action}.stats.n",
279
+ stats.n,
280
+ tags: tags,
281
+ sample_rate: SAMPLE_RATE[:stats],
282
+ )
275
283
 
276
- NetworkResiliency.statsd&.distribution(
277
- "network_resiliency.#{action}.stats.avg",
278
- stats.avg,
279
- tags: tags,
280
- sample_rate: SAMPLE_RATE[:stats],
281
- )
284
+ NetworkResiliency.statsd&.distribution(
285
+ "network_resiliency.#{action}.stats.avg",
286
+ stats.avg,
287
+ tags: tags,
288
+ sample_rate: SAMPLE_RATE[:stats],
289
+ )
282
290
 
283
- NetworkResiliency.statsd&.distribution(
284
- "network_resiliency.#{action}.stats.stdev",
285
- stats.stdev,
286
- tags: tags,
287
- sample_rate: SAMPLE_RATE[:stats],
288
- )
291
+ NetworkResiliency.statsd&.distribution(
292
+ "network_resiliency.#{action}.stats.stdev",
293
+ stats.stdev,
294
+ tags: tags,
295
+ sample_rate: SAMPLE_RATE[:stats],
296
+ )
297
+ end
289
298
  end
290
299
 
291
300
  nil
@@ -337,7 +346,7 @@ module NetworkResiliency
337
346
  "network_resiliency.timeout.raised",
338
347
  tags: tags,
339
348
  sample_rate: SAMPLE_RATE[:timeout],
340
- )
349
+ ) if rand < SAMPLE_RATE[:timeout]
341
350
  end
342
351
  else
343
352
  # the specified timeout is less than our expected p99...awkward
@@ -347,7 +356,7 @@ module NetworkResiliency
347
356
  "network_resiliency.timeout.too_low",
348
357
  tags: tags,
349
358
  sample_rate: SAMPLE_RATE[:timeout],
350
- )
359
+ ) if rand < SAMPLE_RATE[:timeout]
351
360
  end
352
361
  else
353
362
  timeouts << p99
@@ -361,7 +370,7 @@ module NetworkResiliency
361
370
  "network_resiliency.timeout.missing",
362
371
  tags: tags,
363
372
  sample_rate: SAMPLE_RATE[:timeout],
364
- )
373
+ ) if rand < SAMPLE_RATE[:timeout]
365
374
  end
366
375
 
367
376
  NetworkResiliency.statsd&.distribution(
@@ -372,7 +381,7 @@ module NetworkResiliency
372
381
  destination: destination,
373
382
  },
374
383
  sample_rate: SAMPLE_RATE[:timeout],
375
- )
384
+ ) if rand < SAMPLE_RATE[:timeout]
376
385
 
377
386
  case units
378
387
  when nil, :ms, :milliseconds
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: network_resiliency
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.11
4
+ version: 0.7.13
5
5
  platform: ruby
6
6
  authors:
7
7
  - Daniel Pepper
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-02-20 00:00:00.000000000 Z
11
+ date: 2024-03-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: byebug
@@ -238,6 +238,7 @@ files:
238
238
  - lib/network_resiliency/adapter/postgres.rb
239
239
  - lib/network_resiliency/adapter/rails.rb
240
240
  - lib/network_resiliency/adapter/redis.rb
241
+ - lib/network_resiliency/power_stats.rb
241
242
  - lib/network_resiliency/refinements.rb
242
243
  - lib/network_resiliency/stats.rb
243
244
  - lib/network_resiliency/stats_engine.rb