network_resiliency 0.7.11 → 0.7.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1eec5786e7daaea4b6f3c285dffc1e4bf1c70d7096ce69d44f4cc4ea80111ea5
4
- data.tar.gz: bc3ca5e592f1a809bfb28453e43d49a64405a424861aef2c6c6745ef2f27ebb4
3
+ metadata.gz: 93c15344f1a3f02c3ef567baeb1650745b27286a92db1975368a2066e29df423
4
+ data.tar.gz: '092ee5a69a9f6b3e2e299946404c83353bb142e7d6d596d7fe4d556640500c5f'
5
5
  SHA512:
6
- metadata.gz: fa1af69b1f175fb708247ddd0e9501ae2b945909197b26c343347e113bb78e3e645c5217c5f74d63fda780a027b730746b8641b9f77cb5c31626bc2f985c60ff
7
- data.tar.gz: e9733ddd7978b321f8c49218de503f42b300aa269757559cac442dcefb43e0b136e1507360629e9aad1976cedd31c7c0ccd4e9a256c41962a990e50f046d3979
6
+ metadata.gz: 9a84c796661456dd272fa052647ec99991956f5d69435a8dd046a93634de0bbfdc5100cd7cb9b8dfb54d3641d18c71cb5b0728c70448ed6de40fa2bd181eb02e
7
+ data.tar.gz: d9487add703cd5c2ef71abbed8d913619338e240faceaaeac8621c58dbfe2af6203029328f37875f75315d94f91498a6283e252f3c3703fefd12b546e46745c1
data/CHANGELOG.md CHANGED
@@ -1,3 +1,8 @@
1
+ ### v0.7.12 (2024-02-23)
2
+ - improve metric sampling
3
+ - lower resiliency threshold
4
+ - power buckets
5
+
1
6
  ### v0.7.11 (2024-02-20)
2
7
  - lower dynamic timeout
3
8
  - fix metric specs
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- network_resiliency (0.7.11)
4
+ network_resiliency (0.7.12)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
data/README.md CHANGED
@@ -39,3 +39,8 @@ Yes please :)
39
39
  https://github.com/lostisland/faraday-retry/blob/main/lib/faraday/retry/middleware.rb
40
40
 
41
41
  https://github.com/ankane/the-ultimate-guide-to-ruby-timeouts
42
+
43
+
44
+ https://reprep.io/writings/20220326_timeouts_deadline_propagation.html
45
+
46
+ https://grpc.io/blog/deadlines
@@ -0,0 +1,132 @@
1
+ require "network_resiliency/refinements"
2
+
3
+ using NetworkResiliency::Refinements
4
+
5
+ module NetworkResiliency
6
+ class PowerStats
7
+ MIN_VALUE = 1
8
+ LOCK = Thread::Mutex.new
9
+ STATS = {}
10
+
11
+ attr_reader :n
12
+
13
+ class << self
14
+ def [](key)
15
+ LOCK.synchronize { STATS[key] ||= new }
16
+ end
17
+
18
+ def reset
19
+ LOCK.synchronize { STATS.clear }
20
+ end
21
+
22
+ private
23
+
24
+ def synchronize(fn_name)
25
+ fn = instance_method(fn_name)
26
+
27
+ define_method(fn_name) do |*args|
28
+ @lock.synchronize { fn.bind(self).call(*args) }
29
+ end
30
+ end
31
+ end
32
+
33
+ def initialize(values = [])
34
+ @lock = Thread::Mutex.new
35
+ reset
36
+
37
+ values.each {|x| add(x) }
38
+ end
39
+
40
+ def <<(value)
41
+ case value
42
+ when Array
43
+ value.each {|x| add(x) }
44
+ when self.class
45
+ merge!(value)
46
+ else
47
+ add(value)
48
+ end
49
+
50
+ self
51
+ end
52
+
53
+ synchronize def add(value)
54
+ raise ArgumentError, "Numeric expected, found #{value.class}" unless value.is_a?(Numeric)
55
+
56
+ value = [ value, MIN_VALUE ].max
57
+ i = Math.log10(value).ceil
58
+
59
+ @buckets[i] ||= 0
60
+ @buckets[i] += 1
61
+ @n += 1
62
+ end
63
+
64
+ synchronize def percentile(p)
65
+ raise ArgumentError, "Percentile must be between 0 and 100" unless p.between?(0, 100)
66
+
67
+ return 0 if @n == 0
68
+
69
+ threshold = ((100 - p) / 100.0 * @n).floor
70
+ index = @buckets.size - 1
71
+
72
+ while index >= 0
73
+ if @buckets[index]
74
+ break if @buckets[index] >= threshold
75
+
76
+ threshold -= @buckets[index]
77
+ end
78
+
79
+ index -= 1
80
+ end
81
+
82
+ 10 ** index
83
+ end
84
+ alias_method :p, :percentile
85
+
86
+ def p99
87
+ percentile(99)
88
+ end
89
+
90
+ def merge(other)
91
+ dup.merge!(other)
92
+ end
93
+ alias_method :+, :merge
94
+
95
+ synchronize def merge!(other)
96
+ raise ArgumentError unless other.is_a?(self.class)
97
+
98
+ other_buckets = other.instance_variable_get(:@buckets)
99
+
100
+ if @n == 0
101
+ @n = other.n
102
+ @buckets = other_buckets.dup
103
+ elsif other.n > 0
104
+ @n += other.n
105
+
106
+ other_buckets.each_with_index do |count, i|
107
+ next unless count
108
+
109
+ @buckets[i] ||= 0
110
+ @buckets[i] += count
111
+ end
112
+ end
113
+
114
+ self
115
+ end
116
+
117
+ synchronize def scale!(percentage)
118
+ raise ArgumentError, "Numeric expected, found #{percentage.class}" unless percentage.is_a?(Numeric)
119
+ raise ArgumentError, "argument must be between 0 and 100" unless percentage.between?(0, 100)
120
+
121
+ factor = percentage / 100.0
122
+
123
+ @buckets.map! {|x| (x * factor).round if x }
124
+ @n = @buckets.compact.sum
125
+ end
126
+
127
+ synchronize def reset
128
+ @n = 0
129
+ @buckets = []
130
+ end
131
+ end
132
+ end
@@ -80,6 +80,16 @@ module NetworkResiliency
80
80
  self
81
81
  end
82
82
 
83
+ synchronize def scale!(percentage)
84
+ raise ArgumentError, "Numeric expected, found #{percentage.class}" unless percentage.is_a?(Numeric)
85
+ raise ArgumentError, "argument must be between 0 and 100" unless percentage.between?(0, 100)
86
+
87
+ factor = percentage / 100.0
88
+
89
+ @sq_dist *= factor
90
+ @n = (@n * factor).round
91
+ end
92
+
83
93
  def ==(other)
84
94
  return false unless other.is_a?(self.class)
85
95
 
@@ -94,7 +104,7 @@ module NetworkResiliency
94
104
  @sq_dist = 0.0 # sum of squared distance from mean
95
105
  end
96
106
 
97
- MIN_SAMPLE_SIZE = 1000
107
+ MIN_SAMPLE_SIZE = 300
98
108
  MAX_WINDOW_LENGTH = 1000
99
109
  STATS_TTL = 24 * 60 * 60 # 1 day
100
110
  CACHE_TTL = 120 # seconds
@@ -1,3 +1,3 @@
1
1
  module NetworkResiliency
2
- VERSION = "0.7.11"
2
+ VERSION = "0.7.12"
3
3
  end
@@ -1,3 +1,4 @@
1
+ require "network_resiliency/power_stats"
1
2
  require "network_resiliency/refinements"
2
3
  require "network_resiliency/stats"
3
4
  require "network_resiliency/stats_engine"
@@ -19,7 +20,7 @@ module NetworkResiliency
19
20
  ACTIONS = [ :connect, :request ].freeze
20
21
  ADAPTERS = [ :http, :faraday, :redis, :mysql, :postgres, :rails ].freeze
21
22
  MODE = [ :observe, :resilient ].freeze
22
- RESILIENCY_SIZE_THRESHOLD = 1_000
23
+ RESILIENCY_SIZE_THRESHOLD = 300
23
24
  SAMPLE_RATE = {
24
25
  timeout: 0.1,
25
26
  stats: 0.1,
@@ -266,26 +267,28 @@ module NetworkResiliency
266
267
  # ensure Syncer is running
267
268
  Syncer.start
268
269
 
269
- NetworkResiliency.statsd&.distribution(
270
- "network_resiliency.#{action}.stats.n",
271
- stats.n,
272
- tags: tags,
273
- sample_rate: SAMPLE_RATE[:stats],
274
- )
270
+ if rand < SAMPLE_RATE[:stats]
271
+ NetworkResiliency.statsd&.distribution(
272
+ "network_resiliency.#{action}.stats.n",
273
+ stats.n,
274
+ tags: tags,
275
+ sample_rate: SAMPLE_RATE[:stats],
276
+ )
275
277
 
276
- NetworkResiliency.statsd&.distribution(
277
- "network_resiliency.#{action}.stats.avg",
278
- stats.avg,
279
- tags: tags,
280
- sample_rate: SAMPLE_RATE[:stats],
281
- )
278
+ NetworkResiliency.statsd&.distribution(
279
+ "network_resiliency.#{action}.stats.avg",
280
+ stats.avg,
281
+ tags: tags,
282
+ sample_rate: SAMPLE_RATE[:stats],
283
+ )
282
284
 
283
- NetworkResiliency.statsd&.distribution(
284
- "network_resiliency.#{action}.stats.stdev",
285
- stats.stdev,
286
- tags: tags,
287
- sample_rate: SAMPLE_RATE[:stats],
288
- )
285
+ NetworkResiliency.statsd&.distribution(
286
+ "network_resiliency.#{action}.stats.stdev",
287
+ stats.stdev,
288
+ tags: tags,
289
+ sample_rate: SAMPLE_RATE[:stats],
290
+ )
291
+ end
289
292
  end
290
293
 
291
294
  nil
@@ -337,7 +340,7 @@ module NetworkResiliency
337
340
  "network_resiliency.timeout.raised",
338
341
  tags: tags,
339
342
  sample_rate: SAMPLE_RATE[:timeout],
340
- )
343
+ ) if rand < SAMPLE_RATE[:timeout]
341
344
  end
342
345
  else
343
346
  # the specified timeout is less than our expected p99...awkward
@@ -347,7 +350,7 @@ module NetworkResiliency
347
350
  "network_resiliency.timeout.too_low",
348
351
  tags: tags,
349
352
  sample_rate: SAMPLE_RATE[:timeout],
350
- )
353
+ ) if rand < SAMPLE_RATE[:timeout]
351
354
  end
352
355
  else
353
356
  timeouts << p99
@@ -361,7 +364,7 @@ module NetworkResiliency
361
364
  "network_resiliency.timeout.missing",
362
365
  tags: tags,
363
366
  sample_rate: SAMPLE_RATE[:timeout],
364
- )
367
+ ) if rand < SAMPLE_RATE[:timeout]
365
368
  end
366
369
 
367
370
  NetworkResiliency.statsd&.distribution(
@@ -372,7 +375,7 @@ module NetworkResiliency
372
375
  destination: destination,
373
376
  },
374
377
  sample_rate: SAMPLE_RATE[:timeout],
375
- )
378
+ ) if rand < SAMPLE_RATE[:timeout]
376
379
 
377
380
  case units
378
381
  when nil, :ms, :milliseconds
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: network_resiliency
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.11
4
+ version: 0.7.12
5
5
  platform: ruby
6
6
  authors:
7
7
  - Daniel Pepper
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-02-20 00:00:00.000000000 Z
11
+ date: 2024-02-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: byebug
@@ -238,6 +238,7 @@ files:
238
238
  - lib/network_resiliency/adapter/postgres.rb
239
239
  - lib/network_resiliency/adapter/rails.rb
240
240
  - lib/network_resiliency/adapter/redis.rb
241
+ - lib/network_resiliency/power_stats.rb
241
242
  - lib/network_resiliency/refinements.rb
242
243
  - lib/network_resiliency/stats.rb
243
244
  - lib/network_resiliency/stats_engine.rb