network_resiliency 0.7.10 → 0.7.12

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a9ddcd8230d4e9ac39deb2aaea8dd4deca88043515a4e0a5e9e992c8ef4d45a2
4
- data.tar.gz: 3784f0deb54c42adcbe6507b3883ee76976a1580a561832d47c61809263c46f5
3
+ metadata.gz: 93c15344f1a3f02c3ef567baeb1650745b27286a92db1975368a2066e29df423
4
+ data.tar.gz: '092ee5a69a9f6b3e2e299946404c83353bb142e7d6d596d7fe4d556640500c5f'
5
5
  SHA512:
6
- metadata.gz: '0693d64108424d5d78087728f65a2c7aea2d3e891e4c0f2297ab6ada7278f3837413fbe7f343852dc88bbc0f883601d2f5b9c617ee2bc152380edbacfbfa0c2c'
7
- data.tar.gz: c358bf2728f3892c310e90823ef6fb64dadbca5ab2ad1840e960dcc5cb1c17fc279e8f4cadc8b140727a13a713861b91552d60c30f48749c3909d8c4f8c03377
6
+ metadata.gz: 9a84c796661456dd272fa052647ec99991956f5d69435a8dd046a93634de0bbfdc5100cd7cb9b8dfb54d3641d18c71cb5b0728c70448ed6de40fa2bd181eb02e
7
+ data.tar.gz: d9487add703cd5c2ef71abbed8d913619338e240faceaaeac8621c58dbfe2af6203029328f37875f75315d94f91498a6283e252f3c3703fefd12b546e46745c1
data/CHANGELOG.md CHANGED
@@ -1,3 +1,13 @@
1
+ ### v0.7.12 (2024-02-23)
2
+ - improve metric sampling
3
+ - lower resiliency threshold
4
+ - power buckets
5
+
6
+ ### v0.7.11 (2024-02-20)
7
+ - lower dynamic timeout
8
+ - fix metric specs
9
+ - extend cache ttl
10
+
1
11
  ### v0.7.10 (2024-02-15)
2
12
  - ddog sampling
3
13
  - reconnect redis
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- network_resiliency (0.7.10)
4
+ network_resiliency (0.7.12)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
data/README.md CHANGED
@@ -39,3 +39,8 @@ Yes please :)
39
39
  https://github.com/lostisland/faraday-retry/blob/main/lib/faraday/retry/middleware.rb
40
40
 
41
41
  https://github.com/ankane/the-ultimate-guide-to-ruby-timeouts
42
+
43
+
44
+ https://reprep.io/writings/20220326_timeouts_deadline_propagation.html
45
+
46
+ https://grpc.io/blog/deadlines
@@ -0,0 +1,132 @@
1
+ require "network_resiliency/refinements"
2
+
3
+ using NetworkResiliency::Refinements
4
+
5
+ module NetworkResiliency
6
+ class PowerStats
7
+ MIN_VALUE = 1
8
+ LOCK = Thread::Mutex.new
9
+ STATS = {}
10
+
11
+ attr_reader :n
12
+
13
+ class << self
14
+ def [](key)
15
+ LOCK.synchronize { STATS[key] ||= new }
16
+ end
17
+
18
+ def reset
19
+ LOCK.synchronize { STATS.clear }
20
+ end
21
+
22
+ private
23
+
24
+ def synchronize(fn_name)
25
+ fn = instance_method(fn_name)
26
+
27
+ define_method(fn_name) do |*args|
28
+ @lock.synchronize { fn.bind(self).call(*args) }
29
+ end
30
+ end
31
+ end
32
+
33
+ def initialize(values = [])
34
+ @lock = Thread::Mutex.new
35
+ reset
36
+
37
+ values.each {|x| add(x) }
38
+ end
39
+
40
+ def <<(value)
41
+ case value
42
+ when Array
43
+ value.each {|x| add(x) }
44
+ when self.class
45
+ merge!(value)
46
+ else
47
+ add(value)
48
+ end
49
+
50
+ self
51
+ end
52
+
53
+ synchronize def add(value)
54
+ raise ArgumentError, "Numeric expected, found #{value.class}" unless value.is_a?(Numeric)
55
+
56
+ value = [ value, MIN_VALUE ].max
57
+ i = Math.log10(value).ceil
58
+
59
+ @buckets[i] ||= 0
60
+ @buckets[i] += 1
61
+ @n += 1
62
+ end
63
+
64
+ synchronize def percentile(p)
65
+ raise ArgumentError, "Percentile must be between 0 and 100" unless p.between?(0, 100)
66
+
67
+ return 0 if @n == 0
68
+
69
+ threshold = ((100 - p) / 100.0 * @n).floor
70
+ index = @buckets.size - 1
71
+
72
+ while index >= 0
73
+ if @buckets[index]
74
+ break if @buckets[index] >= threshold
75
+
76
+ threshold -= @buckets[index]
77
+ end
78
+
79
+ index -= 1
80
+ end
81
+
82
+ 10 ** index
83
+ end
84
+ alias_method :p, :percentile
85
+
86
+ def p99
87
+ percentile(99)
88
+ end
89
+
90
+ def merge(other)
91
+ dup.merge!(other)
92
+ end
93
+ alias_method :+, :merge
94
+
95
+ synchronize def merge!(other)
96
+ raise ArgumentError unless other.is_a?(self.class)
97
+
98
+ other_buckets = other.instance_variable_get(:@buckets)
99
+
100
+ if @n == 0
101
+ @n = other.n
102
+ @buckets = other_buckets.dup
103
+ elsif other.n > 0
104
+ @n += other.n
105
+
106
+ other_buckets.each_with_index do |count, i|
107
+ next unless count
108
+
109
+ @buckets[i] ||= 0
110
+ @buckets[i] += count
111
+ end
112
+ end
113
+
114
+ self
115
+ end
116
+
117
+ synchronize def scale!(percentage)
118
+ raise ArgumentError, "Numeric expected, found #{percentage.class}" unless percentage.is_a?(Numeric)
119
+ raise ArgumentError, "argument must be between 0 and 100" unless percentage.between?(0, 100)
120
+
121
+ factor = percentage / 100.0
122
+
123
+ @buckets.map! {|x| (x * factor).round if x }
124
+ @n = @buckets.compact.sum
125
+ end
126
+
127
+ synchronize def reset
128
+ @n = 0
129
+ @buckets = []
130
+ end
131
+ end
132
+ end
@@ -80,6 +80,16 @@ module NetworkResiliency
80
80
  self
81
81
  end
82
82
 
83
+ synchronize def scale!(percentage)
84
+ raise ArgumentError, "Numeric expected, found #{percentage.class}" unless percentage.is_a?(Numeric)
85
+ raise ArgumentError, "argument must be between 0 and 100" unless percentage.between?(0, 100)
86
+
87
+ factor = percentage / 100.0
88
+
89
+ @sq_dist *= factor
90
+ @n = (@n * factor).round
91
+ end
92
+
83
93
  def ==(other)
84
94
  return false unless other.is_a?(self.class)
85
95
 
@@ -94,10 +104,10 @@ module NetworkResiliency
94
104
  @sq_dist = 0.0 # sum of squared distance from mean
95
105
  end
96
106
 
97
- MIN_SAMPLE_SIZE = 1000
107
+ MIN_SAMPLE_SIZE = 300
98
108
  MAX_WINDOW_LENGTH = 1000
99
109
  STATS_TTL = 24 * 60 * 60 # 1 day
100
- CACHE_TTL = 60 # seconds
110
+ CACHE_TTL = 120 # seconds
101
111
 
102
112
  LUA_SCRIPT = <<~LUA
103
113
  local results = {}
@@ -189,11 +199,9 @@ module NetworkResiliency
189
199
  end
190
200
 
191
201
  res = redis.eval(LUA_SCRIPT, keys, args)
192
- data.keys.zip(res.each_slice(3)).map do |key, stats|
193
- n, avg, sq_dist = *stats
194
-
195
- [ key, Stats.from(n: n, avg: avg, sq_dist: sq_dist) ]
196
- end.to_h
202
+ data.keys.zip(res.each_slice(3)).to_h.transform_values! do |n, avg, sq_dist|
203
+ Stats.from(n: n, avg: avg, sq_dist: sq_dist)
204
+ end
197
205
  end
198
206
 
199
207
  def self.fetch(redis, keys)
@@ -1,3 +1,3 @@
1
1
  module NetworkResiliency
2
- VERSION = "0.7.10"
2
+ VERSION = "0.7.12"
3
3
  end
@@ -1,3 +1,4 @@
1
+ require "network_resiliency/power_stats"
1
2
  require "network_resiliency/refinements"
2
3
  require "network_resiliency/stats"
3
4
  require "network_resiliency/stats_engine"
@@ -19,7 +20,7 @@ module NetworkResiliency
19
20
  ACTIONS = [ :connect, :request ].freeze
20
21
  ADAPTERS = [ :http, :faraday, :redis, :mysql, :postgres, :rails ].freeze
21
22
  MODE = [ :observe, :resilient ].freeze
22
- RESILIENCY_SIZE_THRESHOLD = 1_000
23
+ RESILIENCY_SIZE_THRESHOLD = 300
23
24
  SAMPLE_RATE = {
24
25
  timeout: 0.1,
25
26
  stats: 0.1,
@@ -266,26 +267,28 @@ module NetworkResiliency
266
267
  # ensure Syncer is running
267
268
  Syncer.start
268
269
 
269
- NetworkResiliency.statsd&.distribution(
270
- "network_resiliency.#{action}.stats.n",
271
- stats.n,
272
- tags: tags,
273
- sample_rate: SAMPLE_RATE[:stats],
274
- )
270
+ if rand < SAMPLE_RATE[:stats]
271
+ NetworkResiliency.statsd&.distribution(
272
+ "network_resiliency.#{action}.stats.n",
273
+ stats.n,
274
+ tags: tags,
275
+ sample_rate: SAMPLE_RATE[:stats],
276
+ )
275
277
 
276
- NetworkResiliency.statsd&.distribution(
277
- "network_resiliency.#{action}.stats.avg",
278
- stats.avg,
279
- tags: tags,
280
- sample_rate: SAMPLE_RATE[:stats],
281
- )
278
+ NetworkResiliency.statsd&.distribution(
279
+ "network_resiliency.#{action}.stats.avg",
280
+ stats.avg,
281
+ tags: tags,
282
+ sample_rate: SAMPLE_RATE[:stats],
283
+ )
282
284
 
283
- NetworkResiliency.statsd&.distribution(
284
- "network_resiliency.#{action}.stats.stdev",
285
- stats.stdev,
286
- tags: tags,
287
- sample_rate: SAMPLE_RATE[:stats],
288
- )
285
+ NetworkResiliency.statsd&.distribution(
286
+ "network_resiliency.#{action}.stats.stdev",
287
+ stats.stdev,
288
+ tags: tags,
289
+ sample_rate: SAMPLE_RATE[:stats],
290
+ )
291
+ end
289
292
  end
290
293
 
291
294
  nil
@@ -316,7 +319,7 @@ module NetworkResiliency
316
319
  destination: destination,
317
320
  }
318
321
 
319
- p99 = (stats.avg + stats.stdev * 3).order_of_magnitude(ceil: true)
322
+ p99 = (stats.avg + stats.stdev * 2).order_of_magnitude(ceil: true)
320
323
 
321
324
  timeouts = []
322
325
 
@@ -337,7 +340,7 @@ module NetworkResiliency
337
340
  "network_resiliency.timeout.raised",
338
341
  tags: tags,
339
342
  sample_rate: SAMPLE_RATE[:timeout],
340
- )
343
+ ) if rand < SAMPLE_RATE[:timeout]
341
344
  end
342
345
  else
343
346
  # the specified timeout is less than our expected p99...awkward
@@ -347,7 +350,7 @@ module NetworkResiliency
347
350
  "network_resiliency.timeout.too_low",
348
351
  tags: tags,
349
352
  sample_rate: SAMPLE_RATE[:timeout],
350
- )
353
+ ) if rand < SAMPLE_RATE[:timeout]
351
354
  end
352
355
  else
353
356
  timeouts << p99
@@ -361,7 +364,7 @@ module NetworkResiliency
361
364
  "network_resiliency.timeout.missing",
362
365
  tags: tags,
363
366
  sample_rate: SAMPLE_RATE[:timeout],
364
- )
367
+ ) if rand < SAMPLE_RATE[:timeout]
365
368
  end
366
369
 
367
370
  NetworkResiliency.statsd&.distribution(
@@ -372,7 +375,7 @@ module NetworkResiliency
372
375
  destination: destination,
373
376
  },
374
377
  sample_rate: SAMPLE_RATE[:timeout],
375
- )
378
+ ) if rand < SAMPLE_RATE[:timeout]
376
379
 
377
380
  case units
378
381
  when nil, :ms, :milliseconds
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: network_resiliency
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.10
4
+ version: 0.7.12
5
5
  platform: ruby
6
6
  authors:
7
7
  - Daniel Pepper
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-02-15 00:00:00.000000000 Z
11
+ date: 2024-02-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: byebug
@@ -238,6 +238,7 @@ files:
238
238
  - lib/network_resiliency/adapter/postgres.rb
239
239
  - lib/network_resiliency/adapter/rails.rb
240
240
  - lib/network_resiliency/adapter/redis.rb
241
+ - lib/network_resiliency/power_stats.rb
241
242
  - lib/network_resiliency/refinements.rb
242
243
  - lib/network_resiliency/stats.rb
243
244
  - lib/network_resiliency/stats_engine.rb