ring-sqa 0.0.18 → 0.0.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/ring/sqa/alarm.rb +21 -22
- data/lib/ring/sqa/analyzer.rb +14 -15
- data/lib/ring/sqa/nodes.rb +1 -0
- data/lib/ring/sqa/nodes_json.rb +7 -1
- data/ring-sqa.gemspec +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3118fdb4ee6c8ef1701ba4290bdee0c82bcb07db
|
4
|
+
data.tar.gz: 5dc539f039179912322562d3354ceec8b55b99b5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bcdfe20771cb15f22135512d9861a7c56b3eafbd185e42f3cdfefa3ed7950f104e3bcda33d3026713a90d5250efba22e3449d02fb1f47cd6023f6e177bcb070e
|
7
|
+
data.tar.gz: eee37b25fb65db3036e90ebc5327c093bbce7f2c67a453e16a8ebc42f5e59e0d1d661b108091812e5e80bb35c3f2d93248747235d0f4bd4f22ebd4f737dedaec
|
data/lib/ring/sqa/alarm.rb
CHANGED
@@ -30,8 +30,7 @@ class SQA
|
|
30
30
|
|
31
31
|
private
|
32
32
|
|
33
|
-
def initialize
|
34
|
-
@db = database
|
33
|
+
def initialize
|
35
34
|
@methods = []
|
36
35
|
@methods << Email.new if CFG.email.to?
|
37
36
|
@methods << UDP2IRC.new if CFG.irc.password?
|
@@ -42,19 +41,18 @@ class SQA
|
|
42
41
|
def compose_message alarm_buffer
|
43
42
|
exceeding_nodes = alarm_buffer.exceeding_nodes
|
44
43
|
msg = {short: "#{@hostname}: raising alarm - #{exceeding_nodes.size} new nodes down"}
|
45
|
-
|
44
|
+
nodes_json = NodesJSON.new
|
45
|
+
exceeding_nodes = exceeding_nodes.map { |node| nodes_json.get node }
|
46
46
|
|
47
47
|
nodes_list = ''
|
48
|
-
exceeding_nodes.
|
49
|
-
|
50
|
-
nodes_list << "- %-30s %14s AS%5s %2s\n" % [json['hostname'], node, json['asn'], json['countrycode']]
|
48
|
+
exceeding_nodes.sort_by{ |node| node[:cc] }.each do |node|
|
49
|
+
nodes_list << "- %-35s %15s AS%-6s %2s\n" % [node[:name], node[:ip], node[:as], node[:cc]]
|
51
50
|
end
|
52
51
|
|
53
52
|
mtr_list = ''
|
54
53
|
exceeding_nodes.sample(3).each do |node|
|
55
|
-
|
56
|
-
mtr_list <<
|
57
|
-
mtr_list << MTR.run(node)
|
54
|
+
mtr_list << "%-35s AS%-6s (%2s)\n" % [node[:name], node[:as], node[:cc]]
|
55
|
+
mtr_list << MTR.run(node[:ip])
|
58
56
|
mtr_list << "\n"
|
59
57
|
end
|
60
58
|
|
@@ -62,35 +60,36 @@ class SQA
|
|
62
60
|
time = alarm_buffer.array.size-1
|
63
61
|
alarm_buffer.array.each do |ary|
|
64
62
|
buffer_list << "%2s min ago %3s measurements failed" % [time, ary.size/2]
|
65
|
-
|
66
|
-
buffer_list << type
|
63
|
+
buffer_list << (time.to_i < 3 ? " (raised alarm)\n" : " (baseline)\n")
|
67
64
|
time -= 1
|
68
65
|
end
|
69
66
|
|
70
67
|
msg[:long] = <<EOF
|
71
|
-
|
68
|
+
Regarding: #{hostname}
|
69
|
+
|
70
|
+
This is an automated alert from the distributed partial outage
|
72
71
|
monitoring system "RING SQA".
|
73
72
|
|
74
|
-
At #{Time.now.utc} the following measurements were analysed
|
75
|
-
as indicating that there is a high probability your NLNOG RING node
|
76
|
-
cannot reach the entire internet. Possible causes could be an outage
|
73
|
+
At #{Time.now.utc} the following measurements were analysed
|
74
|
+
as indicating that there is a high probability your NLNOG RING node
|
75
|
+
cannot reach the entire internet. Possible causes could be an outage
|
77
76
|
in your upstream's or peer's network.
|
78
77
|
|
79
|
-
The following nodes previously were reachable, but became unreachable
|
78
|
+
The following #{exceeding_nodes.size} nodes previously were reachable, but became unreachable
|
80
79
|
over the course of the last 3 minutes:
|
81
80
|
|
82
81
|
#{nodes_list}
|
83
82
|
|
84
|
-
As a debug starting point 3 traceroutes were launched right after
|
83
|
+
As a debug starting point 3 traceroutes were launched right after
|
85
84
|
detecting the event, they might assist in pinpointing what broke:
|
86
85
|
|
87
86
|
#{mtr_list}
|
88
87
|
|
89
|
-
An alarm is raised under the following conditions: every 30 seconds
|
90
|
-
your node pings all other nodes. The amount of nodes that cannot be
|
91
|
-
reached is stored in a circular buffer, with each element representing
|
92
|
-
a minute of measurements. In the event that the last three minutes are
|
93
|
-
#{Ring::SQA::CFG.analyzer.tolerance} above the median of the previous 27 measurement slots, a partial
|
88
|
+
An alarm is raised under the following conditions: every 30 seconds
|
89
|
+
your node pings all other nodes. The amount of nodes that cannot be
|
90
|
+
reached is stored in a circular buffer, with each element representing
|
91
|
+
a minute of measurements. In the event that the last three minutes are
|
92
|
+
#{Ring::SQA::CFG.analyzer.tolerance} above the median of the previous 27 measurement slots, a partial
|
94
93
|
outage is assumed. The ring buffer's output is as following:
|
95
94
|
|
96
95
|
#{buffer_list}
|
data/lib/ring/sqa/analyzer.rb
CHANGED
@@ -30,39 +30,38 @@ class SQA
|
|
30
30
|
def initialize database, nodes
|
31
31
|
@db = database
|
32
32
|
@nodes = nodes
|
33
|
-
@alarm = Alarm.new
|
34
|
-
@buffer = AnalyzeBuffer.new
|
33
|
+
@alarm = Alarm.new
|
34
|
+
@buffer = AnalyzeBuffer.new @nodes.list.size
|
35
35
|
@db_id_seen = 0
|
36
36
|
end
|
37
37
|
end
|
38
38
|
|
39
39
|
class AnalyzeBuffer
|
40
40
|
attr_reader :array
|
41
|
-
def initialize max_size=30
|
42
|
-
@max_size
|
43
|
-
|
44
|
-
|
41
|
+
def initialize nodes_count, max_size=30, median_of=27
|
42
|
+
@max_size = max_size
|
43
|
+
@median_of = median_of
|
44
|
+
init_nodes = Array.new nodes_count * 2, ''
|
45
|
+
@array = Array.new max_size, init_nodes
|
45
46
|
end
|
46
47
|
def push e
|
47
48
|
@array.shift
|
48
49
|
@array.push e
|
49
50
|
end
|
50
|
-
def median
|
51
|
-
|
52
|
-
|
53
|
-
node_count[0..of_first].sort[middle]
|
51
|
+
def median
|
52
|
+
last = @median_of-1
|
53
|
+
node_count[0..last].sort[last/2]
|
54
54
|
end
|
55
|
-
def exceed_median?
|
56
|
-
first = @max_size-last
|
55
|
+
def exceed_median? tolerance=CFG.analyzer.tolerance
|
57
56
|
violate = (median+1)*tolerance
|
58
|
-
node_count[
|
57
|
+
node_count[@median_of..-1].all? { |e| e > violate }
|
59
58
|
end
|
60
59
|
def node_count
|
61
60
|
@array.map { |nodes| nodes.size }
|
62
61
|
end
|
63
62
|
def exceeding_nodes
|
64
|
-
exceed = @array[
|
65
|
-
exceed - @array[0
|
63
|
+
exceed = @array[@median_of..-1].inject :&
|
64
|
+
exceed - @array[0..@median_of-1].flatten.uniq
|
66
65
|
end
|
67
66
|
end
|
68
67
|
|
data/lib/ring/sqa/nodes.rb
CHANGED
data/lib/ring/sqa/nodes_json.rb
CHANGED
data/ring-sqa.gemspec
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ring-sqa
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.19
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Saku Ytti
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2014-07-
|
12
|
+
date: 2014-07-25 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: slop
|