ring-sqa 0.0.18 → 0.0.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: daa3666fef098532d9a3d8758e1f106ce9d0e63e
4
- data.tar.gz: 60959c28ce98980b6045b9708a2e9e348c4504f8
3
+ metadata.gz: 3118fdb4ee6c8ef1701ba4290bdee0c82bcb07db
4
+ data.tar.gz: 5dc539f039179912322562d3354ceec8b55b99b5
5
5
  SHA512:
6
- metadata.gz: 8d81bd070959777b765c7654e8912f624cc2293f9ebde5edee4305a6b03545e49d59e3366dd7d2153dcefac13886415bd29fd5a0e9486fca04c1436e3816bb2d
7
- data.tar.gz: 9d9b16dbfe7f85ed3420e0f9af3a095590fc1191bd178ccb6d785e35ee6ef03fdcd46c969689e99c32599f02b56198e9319566d14ec1d78e369d5c5df7217b59
6
+ metadata.gz: bcdfe20771cb15f22135512d9861a7c56b3eafbd185e42f3cdfefa3ed7950f104e3bcda33d3026713a90d5250efba22e3449d02fb1f47cd6023f6e177bcb070e
7
+ data.tar.gz: eee37b25fb65db3036e90ebc5327c093bbce7f2c67a453e16a8ebc42f5e59e0d1d661b108091812e5e80bb35c3f2d93248747235d0f4bd4f22ebd4f737dedaec
@@ -30,8 +30,7 @@ class SQA
30
30
 
31
31
  private
32
32
 
33
- def initialize database
34
- @db = database
33
+ def initialize
35
34
  @methods = []
36
35
  @methods << Email.new if CFG.email.to?
37
36
  @methods << UDP2IRC.new if CFG.irc.password?
@@ -42,19 +41,18 @@ class SQA
42
41
  def compose_message alarm_buffer
43
42
  exceeding_nodes = alarm_buffer.exceeding_nodes
44
43
  msg = {short: "#{@hostname}: raising alarm - #{exceeding_nodes.size} new nodes down"}
45
- nodes = NodesJSON.new
44
+ nodes_json = NodesJSON.new
45
+ exceeding_nodes = exceeding_nodes.map { |node| nodes_json.get node }
46
46
 
47
47
  nodes_list = ''
48
- exceeding_nodes.sort!.each do |node|
49
- json = nodes.get node
50
- nodes_list << "- %-30s %14s AS%5s %2s\n" % [json['hostname'], node, json['asn'], json['countrycode']]
48
+ exceeding_nodes.sort_by{ |node| node[:cc] }.each do |node|
49
+ nodes_list << "- %-35s %15s AS%-6s %2s\n" % [node[:name], node[:ip], node[:as], node[:cc]]
51
50
  end
52
51
 
53
52
  mtr_list = ''
54
53
  exceeding_nodes.sample(3).each do |node|
55
- json = nodes.get node
56
- mtr_list << "%-30s AS%5s (%2s)\n" % [json['hostname'], json['asn'], json['countrycode']]
57
- mtr_list << MTR.run(node)
54
+ mtr_list << "%-35s AS%-6s (%2s)\n" % [node[:name], node[:as], node[:cc]]
55
+ mtr_list << MTR.run(node[:ip])
58
56
  mtr_list << "\n"
59
57
  end
60
58
 
@@ -62,35 +60,36 @@ class SQA
62
60
  time = alarm_buffer.array.size-1
63
61
  alarm_buffer.array.each do |ary|
64
62
  buffer_list << "%2s min ago %3s measurements failed" % [time, ary.size/2]
65
- type = time.to_i < 3 ? " (raised alarm)\n" : " (baseline)\n"
66
- buffer_list << type
63
+ buffer_list << (time.to_i < 3 ? " (raised alarm)\n" : " (baseline)\n")
67
64
  time -= 1
68
65
  end
69
66
 
70
67
  msg[:long] = <<EOF
71
- This is an automated alert from the distributed partial outage
68
+ Regarding: #{hostname}
69
+
70
+ This is an automated alert from the distributed partial outage
72
71
  monitoring system "RING SQA".
73
72
 
74
- At #{Time.now.utc} the following measurements were analysed
75
- as indicating that there is a high probability your NLNOG RING node
76
- cannot reach the entire internet. Possible causes could be an outage
73
+ At #{Time.now.utc} the following measurements were analysed
74
+ as indicating that there is a high probability your NLNOG RING node
75
+ cannot reach the entire internet. Possible causes could be an outage
77
76
  in your upstream's or peer's network.
78
77
 
79
- The following nodes previously were reachable, but became unreachable
78
+ The following #{exceeding_nodes.size} nodes previously were reachable, but became unreachable
80
79
  over the course of the last 3 minutes:
81
80
 
82
81
  #{nodes_list}
83
82
 
84
- As a debug starting point 3 traceroutes were launched right after
83
+ As a debug starting point 3 traceroutes were launched right after
85
84
  detecting the event, they might assist in pinpointing what broke:
86
85
 
87
86
  #{mtr_list}
88
87
 
89
- An alarm is raised under the following conditions: every 30 seconds
90
- your node pings all other nodes. The amount of nodes that cannot be
91
- reached is stored in a circular buffer, with each element representing
92
- a minute of measurements. In the event that the last three minutes are
93
- #{Ring::SQA::CFG.analyzer.tolerance} above the median of the previous 27 measurement slots, a partial
88
+ An alarm is raised under the following conditions: every 30 seconds
89
+ your node pings all other nodes. The amount of nodes that cannot be
90
+ reached is stored in a circular buffer, with each element representing
91
+ a minute of measurements. In the event that the last three minutes are
92
+ #{Ring::SQA::CFG.analyzer.tolerance} above the median of the previous 27 measurement slots, a partial
94
93
  outage is assumed. The ring buffer's output is as following:
95
94
 
96
95
  #{buffer_list}
@@ -30,39 +30,38 @@ class SQA
30
30
  def initialize database, nodes
31
31
  @db = database
32
32
  @nodes = nodes
33
- @alarm = Alarm.new @db
34
- @buffer = AnalyzeBuffer.new
33
+ @alarm = Alarm.new
34
+ @buffer = AnalyzeBuffer.new @nodes.list.size
35
35
  @db_id_seen = 0
36
36
  end
37
37
  end
38
38
 
39
39
  class AnalyzeBuffer
40
40
  attr_reader :array
41
- def initialize max_size=30
42
- @max_size = max_size
43
- init_nodes = Array.new 99, ''
44
- @array = Array.new max_size, init_nodes
41
+ def initialize nodes_count, max_size=30, median_of=27
42
+ @max_size = max_size
43
+ @median_of = median_of
44
+ init_nodes = Array.new nodes_count * 2, ''
45
+ @array = Array.new max_size, init_nodes
45
46
  end
46
47
  def push e
47
48
  @array.shift
48
49
  @array.push e
49
50
  end
50
- def median of_first=27
51
- of_first = of_first-1
52
- middle = of_first/2
53
- node_count[0..of_first].sort[middle]
51
+ def median
52
+ last = @median_of-1
53
+ node_count[0..last].sort[last/2]
54
54
  end
55
- def exceed_median? last=3, tolerance=CFG.analyzer.tolerance
56
- first = @max_size-last
55
+ def exceed_median? tolerance=CFG.analyzer.tolerance
57
56
  violate = (median+1)*tolerance
58
- node_count[first..-1].all? { |e| e > violate }
57
+ node_count[@median_of..-1].all? { |e| e > violate }
59
58
  end
60
59
  def node_count
61
60
  @array.map { |nodes| nodes.size }
62
61
  end
63
62
  def exceeding_nodes
64
- exceed = @array[27] & @array[28] & @array[29]
65
- exceed - @array[0..26].flatten.uniq
63
+ exceed = @array[@median_of..-1].inject :&
64
+ exceed - @array[0..@median_of-1].flatten.uniq
66
65
  end
67
66
  end
68
67
 
@@ -24,6 +24,7 @@ class SQA
24
24
  end
25
25
 
26
26
  def get_list
27
+ Log.info "loading #{FILE}"
27
28
  list = []
28
29
  File.read(FILE).lines.each do |line|
29
30
  entry = line.split(/\s+/)
@@ -5,7 +5,13 @@ class SQA
5
5
 
6
6
  class NodesJSON
7
7
  def get node
8
- (@nodes[node] or {})
8
+ json = (@nodes[node] or {})
9
+ {
10
+ name: json['hostname'],
11
+ ip: node,
12
+ as: json['asn'],
13
+ cc: json['countrycode'],
14
+ }
9
15
  rescue
10
16
  {}
11
17
  end
data/ring-sqa.gemspec CHANGED
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'ring-sqa'
3
- s.version = '0.0.18'
3
+ s.version = '0.0.19'
4
4
  s.licenses = %w( Apache-2.0 )
5
5
  s.platform = Gem::Platform::RUBY
6
6
  s.authors = [ 'Saku Ytti', 'Job Snijders' ]
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ring-sqa
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.18
4
+ version: 0.0.19
5
5
  platform: ruby
6
6
  authors:
7
7
  - Saku Ytti
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2014-07-24 00:00:00.000000000 Z
12
+ date: 2014-07-25 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: slop