ring-sqa 0.0.18 → 0.0.19

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: daa3666fef098532d9a3d8758e1f106ce9d0e63e
4
- data.tar.gz: 60959c28ce98980b6045b9708a2e9e348c4504f8
3
+ metadata.gz: 3118fdb4ee6c8ef1701ba4290bdee0c82bcb07db
4
+ data.tar.gz: 5dc539f039179912322562d3354ceec8b55b99b5
5
5
  SHA512:
6
- metadata.gz: 8d81bd070959777b765c7654e8912f624cc2293f9ebde5edee4305a6b03545e49d59e3366dd7d2153dcefac13886415bd29fd5a0e9486fca04c1436e3816bb2d
7
- data.tar.gz: 9d9b16dbfe7f85ed3420e0f9af3a095590fc1191bd178ccb6d785e35ee6ef03fdcd46c969689e99c32599f02b56198e9319566d14ec1d78e369d5c5df7217b59
6
+ metadata.gz: bcdfe20771cb15f22135512d9861a7c56b3eafbd185e42f3cdfefa3ed7950f104e3bcda33d3026713a90d5250efba22e3449d02fb1f47cd6023f6e177bcb070e
7
+ data.tar.gz: eee37b25fb65db3036e90ebc5327c093bbce7f2c67a453e16a8ebc42f5e59e0d1d661b108091812e5e80bb35c3f2d93248747235d0f4bd4f22ebd4f737dedaec
@@ -30,8 +30,7 @@ class SQA
30
30
 
31
31
  private
32
32
 
33
- def initialize database
34
- @db = database
33
+ def initialize
35
34
  @methods = []
36
35
  @methods << Email.new if CFG.email.to?
37
36
  @methods << UDP2IRC.new if CFG.irc.password?
@@ -42,19 +41,18 @@ class SQA
42
41
  def compose_message alarm_buffer
43
42
  exceeding_nodes = alarm_buffer.exceeding_nodes
44
43
  msg = {short: "#{@hostname}: raising alarm - #{exceeding_nodes.size} new nodes down"}
45
- nodes = NodesJSON.new
44
+ nodes_json = NodesJSON.new
45
+ exceeding_nodes = exceeding_nodes.map { |node| nodes_json.get node }
46
46
 
47
47
  nodes_list = ''
48
- exceeding_nodes.sort!.each do |node|
49
- json = nodes.get node
50
- nodes_list << "- %-30s %14s AS%5s %2s\n" % [json['hostname'], node, json['asn'], json['countrycode']]
48
+ exceeding_nodes.sort_by{ |node| node[:cc] }.each do |node|
49
+ nodes_list << "- %-35s %15s AS%-6s %2s\n" % [node[:name], node[:ip], node[:as], node[:cc]]
51
50
  end
52
51
 
53
52
  mtr_list = ''
54
53
  exceeding_nodes.sample(3).each do |node|
55
- json = nodes.get node
56
- mtr_list << "%-30s AS%5s (%2s)\n" % [json['hostname'], json['asn'], json['countrycode']]
57
- mtr_list << MTR.run(node)
54
+ mtr_list << "%-35s AS%-6s (%2s)\n" % [node[:name], node[:as], node[:cc]]
55
+ mtr_list << MTR.run(node[:ip])
58
56
  mtr_list << "\n"
59
57
  end
60
58
 
@@ -62,35 +60,36 @@ class SQA
62
60
  time = alarm_buffer.array.size-1
63
61
  alarm_buffer.array.each do |ary|
64
62
  buffer_list << "%2s min ago %3s measurements failed" % [time, ary.size/2]
65
- type = time.to_i < 3 ? " (raised alarm)\n" : " (baseline)\n"
66
- buffer_list << type
63
+ buffer_list << (time.to_i < 3 ? " (raised alarm)\n" : " (baseline)\n")
67
64
  time -= 1
68
65
  end
69
66
 
70
67
  msg[:long] = <<EOF
71
- This is an automated alert from the distributed partial outage
68
+ Regarding: #{hostname}
69
+
70
+ This is an automated alert from the distributed partial outage
72
71
  monitoring system "RING SQA".
73
72
 
74
- At #{Time.now.utc} the following measurements were analysed
75
- as indicating that there is a high probability your NLNOG RING node
76
- cannot reach the entire internet. Possible causes could be an outage
73
+ At #{Time.now.utc} the following measurements were analysed
74
+ as indicating that there is a high probability your NLNOG RING node
75
+ cannot reach the entire internet. Possible causes could be an outage
77
76
  in your upstream's or peer's network.
78
77
 
79
- The following nodes previously were reachable, but became unreachable
78
+ The following #{exceeding_nodes.size} nodes previously were reachable, but became unreachable
80
79
  over the course of the last 3 minutes:
81
80
 
82
81
  #{nodes_list}
83
82
 
84
- As a debug starting point 3 traceroutes were launched right after
83
+ As a debug starting point 3 traceroutes were launched right after
85
84
  detecting the event, they might assist in pinpointing what broke:
86
85
 
87
86
  #{mtr_list}
88
87
 
89
- An alarm is raised under the following conditions: every 30 seconds
90
- your node pings all other nodes. The amount of nodes that cannot be
91
- reached is stored in a circular buffer, with each element representing
92
- a minute of measurements. In the event that the last three minutes are
93
- #{Ring::SQA::CFG.analyzer.tolerance} above the median of the previous 27 measurement slots, a partial
88
+ An alarm is raised under the following conditions: every 30 seconds
89
+ your node pings all other nodes. The amount of nodes that cannot be
90
+ reached is stored in a circular buffer, with each element representing
91
+ a minute of measurements. In the event that the last three minutes are
92
+ #{Ring::SQA::CFG.analyzer.tolerance} above the median of the previous 27 measurement slots, a partial
94
93
  outage is assumed. The ring buffer's output is as following:
95
94
 
96
95
  #{buffer_list}
@@ -30,39 +30,38 @@ class SQA
30
30
  def initialize database, nodes
31
31
  @db = database
32
32
  @nodes = nodes
33
- @alarm = Alarm.new @db
34
- @buffer = AnalyzeBuffer.new
33
+ @alarm = Alarm.new
34
+ @buffer = AnalyzeBuffer.new @nodes.list.size
35
35
  @db_id_seen = 0
36
36
  end
37
37
  end
38
38
 
39
39
  class AnalyzeBuffer
40
40
  attr_reader :array
41
- def initialize max_size=30
42
- @max_size = max_size
43
- init_nodes = Array.new 99, ''
44
- @array = Array.new max_size, init_nodes
41
+ def initialize nodes_count, max_size=30, median_of=27
42
+ @max_size = max_size
43
+ @median_of = median_of
44
+ init_nodes = Array.new nodes_count * 2, ''
45
+ @array = Array.new max_size, init_nodes
45
46
  end
46
47
  def push e
47
48
  @array.shift
48
49
  @array.push e
49
50
  end
50
- def median of_first=27
51
- of_first = of_first-1
52
- middle = of_first/2
53
- node_count[0..of_first].sort[middle]
51
+ def median
52
+ last = @median_of-1
53
+ node_count[0..last].sort[last/2]
54
54
  end
55
- def exceed_median? last=3, tolerance=CFG.analyzer.tolerance
56
- first = @max_size-last
55
+ def exceed_median? tolerance=CFG.analyzer.tolerance
57
56
  violate = (median+1)*tolerance
58
- node_count[first..-1].all? { |e| e > violate }
57
+ node_count[@median_of..-1].all? { |e| e > violate }
59
58
  end
60
59
  def node_count
61
60
  @array.map { |nodes| nodes.size }
62
61
  end
63
62
  def exceeding_nodes
64
- exceed = @array[27] & @array[28] & @array[29]
65
- exceed - @array[0..26].flatten.uniq
63
+ exceed = @array[@median_of..-1].inject :&
64
+ exceed - @array[0..@median_of-1].flatten.uniq
66
65
  end
67
66
  end
68
67
 
@@ -24,6 +24,7 @@ class SQA
24
24
  end
25
25
 
26
26
  def get_list
27
+ Log.info "loading #{FILE}"
27
28
  list = []
28
29
  File.read(FILE).lines.each do |line|
29
30
  entry = line.split(/\s+/)
@@ -5,7 +5,13 @@ class SQA
5
5
 
6
6
  class NodesJSON
7
7
  def get node
8
- (@nodes[node] or {})
8
+ json = (@nodes[node] or {})
9
+ {
10
+ name: json['hostname'],
11
+ ip: node,
12
+ as: json['asn'],
13
+ cc: json['countrycode'],
14
+ }
9
15
  rescue
10
16
  {}
11
17
  end
data/ring-sqa.gemspec CHANGED
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'ring-sqa'
3
- s.version = '0.0.18'
3
+ s.version = '0.0.19'
4
4
  s.licenses = %w( Apache-2.0 )
5
5
  s.platform = Gem::Platform::RUBY
6
6
  s.authors = [ 'Saku Ytti', 'Job Snijders' ]
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ring-sqa
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.18
4
+ version: 0.0.19
5
5
  platform: ruby
6
6
  authors:
7
7
  - Saku Ytti
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2014-07-24 00:00:00.000000000 Z
12
+ date: 2014-07-25 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: slop