ring-sqa 0.0.18 → 0.0.19
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/ring/sqa/alarm.rb +21 -22
- data/lib/ring/sqa/analyzer.rb +14 -15
- data/lib/ring/sqa/nodes.rb +1 -0
- data/lib/ring/sqa/nodes_json.rb +7 -1
- data/ring-sqa.gemspec +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3118fdb4ee6c8ef1701ba4290bdee0c82bcb07db
|
4
|
+
data.tar.gz: 5dc539f039179912322562d3354ceec8b55b99b5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bcdfe20771cb15f22135512d9861a7c56b3eafbd185e42f3cdfefa3ed7950f104e3bcda33d3026713a90d5250efba22e3449d02fb1f47cd6023f6e177bcb070e
|
7
|
+
data.tar.gz: eee37b25fb65db3036e90ebc5327c093bbce7f2c67a453e16a8ebc42f5e59e0d1d661b108091812e5e80bb35c3f2d93248747235d0f4bd4f22ebd4f737dedaec
|
data/lib/ring/sqa/alarm.rb
CHANGED
@@ -30,8 +30,7 @@ class SQA
|
|
30
30
|
|
31
31
|
private
|
32
32
|
|
33
|
-
def initialize
|
34
|
-
@db = database
|
33
|
+
def initialize
|
35
34
|
@methods = []
|
36
35
|
@methods << Email.new if CFG.email.to?
|
37
36
|
@methods << UDP2IRC.new if CFG.irc.password?
|
@@ -42,19 +41,18 @@ class SQA
|
|
42
41
|
def compose_message alarm_buffer
|
43
42
|
exceeding_nodes = alarm_buffer.exceeding_nodes
|
44
43
|
msg = {short: "#{@hostname}: raising alarm - #{exceeding_nodes.size} new nodes down"}
|
45
|
-
|
44
|
+
nodes_json = NodesJSON.new
|
45
|
+
exceeding_nodes = exceeding_nodes.map { |node| nodes_json.get node }
|
46
46
|
|
47
47
|
nodes_list = ''
|
48
|
-
exceeding_nodes.
|
49
|
-
|
50
|
-
nodes_list << "- %-30s %14s AS%5s %2s\n" % [json['hostname'], node, json['asn'], json['countrycode']]
|
48
|
+
exceeding_nodes.sort_by{ |node| node[:cc] }.each do |node|
|
49
|
+
nodes_list << "- %-35s %15s AS%-6s %2s\n" % [node[:name], node[:ip], node[:as], node[:cc]]
|
51
50
|
end
|
52
51
|
|
53
52
|
mtr_list = ''
|
54
53
|
exceeding_nodes.sample(3).each do |node|
|
55
|
-
|
56
|
-
mtr_list <<
|
57
|
-
mtr_list << MTR.run(node)
|
54
|
+
mtr_list << "%-35s AS%-6s (%2s)\n" % [node[:name], node[:as], node[:cc]]
|
55
|
+
mtr_list << MTR.run(node[:ip])
|
58
56
|
mtr_list << "\n"
|
59
57
|
end
|
60
58
|
|
@@ -62,35 +60,36 @@ class SQA
|
|
62
60
|
time = alarm_buffer.array.size-1
|
63
61
|
alarm_buffer.array.each do |ary|
|
64
62
|
buffer_list << "%2s min ago %3s measurements failed" % [time, ary.size/2]
|
65
|
-
|
66
|
-
buffer_list << type
|
63
|
+
buffer_list << (time.to_i < 3 ? " (raised alarm)\n" : " (baseline)\n")
|
67
64
|
time -= 1
|
68
65
|
end
|
69
66
|
|
70
67
|
msg[:long] = <<EOF
|
71
|
-
|
68
|
+
Regarding: #{hostname}
|
69
|
+
|
70
|
+
This is an automated alert from the distributed partial outage
|
72
71
|
monitoring system "RING SQA".
|
73
72
|
|
74
|
-
At #{Time.now.utc} the following measurements were analysed
|
75
|
-
as indicating that there is a high probability your NLNOG RING node
|
76
|
-
cannot reach the entire internet. Possible causes could be an outage
|
73
|
+
At #{Time.now.utc} the following measurements were analysed
|
74
|
+
as indicating that there is a high probability your NLNOG RING node
|
75
|
+
cannot reach the entire internet. Possible causes could be an outage
|
77
76
|
in your upstream's or peer's network.
|
78
77
|
|
79
|
-
The following nodes previously were reachable, but became unreachable
|
78
|
+
The following #{exceeding_nodes.size} nodes previously were reachable, but became unreachable
|
80
79
|
over the course of the last 3 minutes:
|
81
80
|
|
82
81
|
#{nodes_list}
|
83
82
|
|
84
|
-
As a debug starting point 3 traceroutes were launched right after
|
83
|
+
As a debug starting point 3 traceroutes were launched right after
|
85
84
|
detecting the event, they might assist in pinpointing what broke:
|
86
85
|
|
87
86
|
#{mtr_list}
|
88
87
|
|
89
|
-
An alarm is raised under the following conditions: every 30 seconds
|
90
|
-
your node pings all other nodes. The amount of nodes that cannot be
|
91
|
-
reached is stored in a circular buffer, with each element representing
|
92
|
-
a minute of measurements. In the event that the last three minutes are
|
93
|
-
#{Ring::SQA::CFG.analyzer.tolerance} above the median of the previous 27 measurement slots, a partial
|
88
|
+
An alarm is raised under the following conditions: every 30 seconds
|
89
|
+
your node pings all other nodes. The amount of nodes that cannot be
|
90
|
+
reached is stored in a circular buffer, with each element representing
|
91
|
+
a minute of measurements. In the event that the last three minutes are
|
92
|
+
#{Ring::SQA::CFG.analyzer.tolerance} above the median of the previous 27 measurement slots, a partial
|
94
93
|
outage is assumed. The ring buffer's output is as following:
|
95
94
|
|
96
95
|
#{buffer_list}
|
data/lib/ring/sqa/analyzer.rb
CHANGED
@@ -30,39 +30,38 @@ class SQA
|
|
30
30
|
def initialize database, nodes
|
31
31
|
@db = database
|
32
32
|
@nodes = nodes
|
33
|
-
@alarm = Alarm.new
|
34
|
-
@buffer = AnalyzeBuffer.new
|
33
|
+
@alarm = Alarm.new
|
34
|
+
@buffer = AnalyzeBuffer.new @nodes.list.size
|
35
35
|
@db_id_seen = 0
|
36
36
|
end
|
37
37
|
end
|
38
38
|
|
39
39
|
class AnalyzeBuffer
|
40
40
|
attr_reader :array
|
41
|
-
def initialize max_size=30
|
42
|
-
@max_size
|
43
|
-
|
44
|
-
|
41
|
+
def initialize nodes_count, max_size=30, median_of=27
|
42
|
+
@max_size = max_size
|
43
|
+
@median_of = median_of
|
44
|
+
init_nodes = Array.new nodes_count * 2, ''
|
45
|
+
@array = Array.new max_size, init_nodes
|
45
46
|
end
|
46
47
|
def push e
|
47
48
|
@array.shift
|
48
49
|
@array.push e
|
49
50
|
end
|
50
|
-
def median
|
51
|
-
|
52
|
-
|
53
|
-
node_count[0..of_first].sort[middle]
|
51
|
+
def median
|
52
|
+
last = @median_of-1
|
53
|
+
node_count[0..last].sort[last/2]
|
54
54
|
end
|
55
|
-
def exceed_median?
|
56
|
-
first = @max_size-last
|
55
|
+
def exceed_median? tolerance=CFG.analyzer.tolerance
|
57
56
|
violate = (median+1)*tolerance
|
58
|
-
node_count[
|
57
|
+
node_count[@median_of..-1].all? { |e| e > violate }
|
59
58
|
end
|
60
59
|
def node_count
|
61
60
|
@array.map { |nodes| nodes.size }
|
62
61
|
end
|
63
62
|
def exceeding_nodes
|
64
|
-
exceed = @array[
|
65
|
-
exceed - @array[0
|
63
|
+
exceed = @array[@median_of..-1].inject :&
|
64
|
+
exceed - @array[0..@median_of-1].flatten.uniq
|
66
65
|
end
|
67
66
|
end
|
68
67
|
|
data/lib/ring/sqa/nodes.rb
CHANGED
data/lib/ring/sqa/nodes_json.rb
CHANGED
data/ring-sqa.gemspec
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ring-sqa
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.19
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Saku Ytti
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2014-07-
|
12
|
+
date: 2014-07-25 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: slop
|