ring-sqa 0.0.19 → 0.0.20
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Rakefile +1 -0
- data/lib/ring/sqa/alarm.rb +6 -39
- data/lib/ring/sqa/alarm/message.rb +44 -0
- data/lib/ring/sqa/analyzer.rb +2 -2
- data/lib/ring/sqa/cfg.rb +3 -2
- data/lib/ring/sqa/nodes.rb +44 -10
- data/lib/ring/sqa/paste.rb +2 -2
- data/lib/ring/sqa/poller.rb +2 -1
- data/lib/ring/sqa/poller/sender.rb +1 -1
- data/ring-sqa.gemspec +1 -1
- metadata +2 -2
- data/lib/ring/sqa/nodes_json.rb +0 -38
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: caa9122c18e3a803a62729778308c5af962fdfab
|
4
|
+
data.tar.gz: 877437aef04803f4baf0f9603e938f46b886445d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b0fde0350ff73add7549347cb05b493a01bdb98f8cac9669616c10c4c53045ac32ed8e4e52c63654d1470810bf50d1260556503b0bc95376a1648b67fc2f290a
|
7
|
+
data.tar.gz: bded7fb091e59c8706412efabe163ccb7e72516fe6feaf0ef6b9eeb1a469561d7afe05f1da09a32fed4b34e5d82752fabf777e6706f4ccfd1b75a9fa04811756
|
data/Rakefile
CHANGED
data/lib/ring/sqa/alarm.rb
CHANGED
@@ -1,9 +1,9 @@
|
|
1
1
|
require_relative 'alarm/email'
|
2
2
|
require_relative 'alarm/udp2irc'
|
3
3
|
require_relative 'alarm/cfg'
|
4
|
+
require_relative 'alarm/message'
|
4
5
|
require_relative 'mtr'
|
5
6
|
require_relative 'paste'
|
6
|
-
require_relative 'nodes_json'
|
7
7
|
|
8
8
|
module Ring
|
9
9
|
class SQA
|
@@ -30,19 +30,19 @@ class SQA
|
|
30
30
|
|
31
31
|
private
|
32
32
|
|
33
|
-
def initialize
|
33
|
+
def initialize nodes
|
34
|
+
@nodes = nodes
|
34
35
|
@methods = []
|
35
36
|
@methods << Email.new if CFG.email.to?
|
36
37
|
@methods << UDP2IRC.new if CFG.irc.password?
|
38
|
+
@hostname = Ring::SQA::CFG.host.name
|
37
39
|
@alarm = false
|
38
|
-
@hostname = (Socket.gethostname rescue 'anonymous')
|
39
40
|
end
|
40
41
|
|
41
42
|
def compose_message alarm_buffer
|
42
43
|
exceeding_nodes = alarm_buffer.exceeding_nodes
|
43
44
|
msg = {short: "#{@hostname}: raising alarm - #{exceeding_nodes.size} new nodes down"}
|
44
|
-
|
45
|
-
exceeding_nodes = exceeding_nodes.map { |node| nodes_json.get node }
|
45
|
+
exceeding_nodes = exceeding_nodes.map { |node| @nodes.get node }
|
46
46
|
|
47
47
|
nodes_list = ''
|
48
48
|
exceeding_nodes.sort_by{ |node| node[:cc] }.each do |node|
|
@@ -64,40 +64,7 @@ class SQA
|
|
64
64
|
time -= 1
|
65
65
|
end
|
66
66
|
|
67
|
-
msg[:long] =
|
68
|
-
Regarding: #{hostname}
|
69
|
-
|
70
|
-
This is an automated alert from the distributed partial outage
|
71
|
-
monitoring system "RING SQA".
|
72
|
-
|
73
|
-
At #{Time.now.utc} the following measurements were analysed
|
74
|
-
as indicating that there is a high probability your NLNOG RING node
|
75
|
-
cannot reach the entire internet. Possible causes could be an outage
|
76
|
-
in your upstream's or peer's network.
|
77
|
-
|
78
|
-
The following #{exceeding_nodes.size} nodes previously were reachable, but became unreachable
|
79
|
-
over the course of the last 3 minutes:
|
80
|
-
|
81
|
-
#{nodes_list}
|
82
|
-
|
83
|
-
As a debug starting point 3 traceroutes were launched right after
|
84
|
-
detecting the event, they might assist in pinpointing what broke:
|
85
|
-
|
86
|
-
#{mtr_list}
|
87
|
-
|
88
|
-
An alarm is raised under the following conditions: every 30 seconds
|
89
|
-
your node pings all other nodes. The amount of nodes that cannot be
|
90
|
-
reached is stored in a circular buffer, with each element representing
|
91
|
-
a minute of measurements. In the event that the last three minutes are
|
92
|
-
#{Ring::SQA::CFG.analyzer.tolerance} above the median of the previous 27 measurement slots, a partial
|
93
|
-
outage is assumed. The ring buffer's output is as following:
|
94
|
-
|
95
|
-
#{buffer_list}
|
96
|
-
|
97
|
-
Kind regards,
|
98
|
-
|
99
|
-
NLNOG RING
|
100
|
-
EOF
|
67
|
+
msg[:long] = message nodes_list, mtr_list, buffer_list
|
101
68
|
msg
|
102
69
|
end
|
103
70
|
|
@@ -0,0 +1,44 @@
|
|
1
|
+
module Ring
|
2
|
+
class SQA
|
3
|
+
|
4
|
+
class Alarm
|
5
|
+
def message nodes_list, mtr_list, buffer_list
|
6
|
+
"
|
7
|
+
Regarding: #{Ring::SQA::CFG.host.name}
|
8
|
+
|
9
|
+
This is an automated alert from the distributed partial outage
|
10
|
+
monitoring system 'RING SQA'.
|
11
|
+
|
12
|
+
At #{Time.now.utc} the following measurements were analysed
|
13
|
+
as indicating that there is a high probability your NLNOG RING node
|
14
|
+
cannot reach the entire internet. Possible causes could be an outage
|
15
|
+
in your upstream's or peer's network.
|
16
|
+
|
17
|
+
The following #{nodes_list.size} nodes previously were reachable, but became unreachable
|
18
|
+
over the course of the last 3 minutes:
|
19
|
+
|
20
|
+
#{nodes_list}
|
21
|
+
|
22
|
+
As a debug starting point 3 traceroutes were launched right after
|
23
|
+
detecting the event, they might assist in pinpointing what broke:
|
24
|
+
|
25
|
+
#{mtr_list}
|
26
|
+
|
27
|
+
An alarm is raised under the following conditions: every 30 seconds
|
28
|
+
your node pings all other nodes. The amount of nodes that cannot be
|
29
|
+
reached is stored in a circular buffer, with each element representing
|
30
|
+
a minute of measurements. In the event that the last three minutes are
|
31
|
+
#{Ring::SQA::CFG.analyzer.tolerance} above the median of the previous 27 measurement slots, a partial
|
32
|
+
outage is assumed. The ring buffer's output is as following:
|
33
|
+
|
34
|
+
#{buffer_list}
|
35
|
+
|
36
|
+
Kind regards,
|
37
|
+
|
38
|
+
NLNOG RING
|
39
|
+
"
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
44
|
+
end
|
data/lib/ring/sqa/analyzer.rb
CHANGED
@@ -30,8 +30,8 @@ class SQA
|
|
30
30
|
def initialize database, nodes
|
31
31
|
@db = database
|
32
32
|
@nodes = nodes
|
33
|
-
@alarm = Alarm.new
|
34
|
-
@buffer = AnalyzeBuffer.new @nodes.
|
33
|
+
@alarm = Alarm.new @nodes
|
34
|
+
@buffer = AnalyzeBuffer.new @nodes.all.size
|
35
35
|
@db_id_seen = 0
|
36
36
|
end
|
37
37
|
end
|
data/lib/ring/sqa/cfg.rb
CHANGED
@@ -27,8 +27,9 @@ module Ring
|
|
27
27
|
|
28
28
|
CFG = Config.cfg
|
29
29
|
|
30
|
-
CFG.
|
31
|
-
CFG.
|
30
|
+
CFG.host.name = Socket.gethostname
|
31
|
+
CFG.host.ipv4 = Socket::getaddrinfo(CFG.host.name,"echo",Socket::AF_INET)[0][3]
|
32
|
+
CFG.host.ipv6 = Socket::getaddrinfo(CFG.host.name,"echo",Socket::AF_INET6)[0][3]
|
32
33
|
|
33
34
|
raise NoConfig, 'edit /etc/ring-sqa/main.conf' if Config.create
|
34
35
|
end
|
data/lib/ring/sqa/nodes.rb
CHANGED
@@ -1,29 +1,34 @@
|
|
1
1
|
require 'rb-inotify'
|
2
2
|
require 'ipaddr'
|
3
|
+
require 'json'
|
3
4
|
|
4
5
|
module Ring
|
5
6
|
class SQA
|
6
7
|
|
7
8
|
class Nodes
|
8
|
-
FILE
|
9
|
-
attr_reader :
|
9
|
+
FILE = '/etc/hosts'
|
10
|
+
attr_reader :all
|
10
11
|
|
11
12
|
def run
|
12
13
|
Thread.new { @inotify.run }
|
13
14
|
end
|
14
15
|
|
16
|
+
def get node
|
17
|
+
(@all[node] or {})
|
18
|
+
end
|
19
|
+
|
15
20
|
private
|
16
21
|
|
17
22
|
def initialize
|
18
|
-
@
|
23
|
+
@all = read_nodes
|
19
24
|
@inotify = INotify::Notifier.new
|
20
25
|
@inotify.watch(File.dirname(FILE), :modify, :create) do |event|
|
21
|
-
@
|
26
|
+
@all = read_nodes if event.name == FILE.split('/').last
|
22
27
|
end
|
23
28
|
run
|
24
29
|
end
|
25
30
|
|
26
|
-
def
|
31
|
+
def read_nodes
|
27
32
|
Log.info "loading #{FILE}"
|
28
33
|
list = []
|
29
34
|
File.read(FILE).lines.each do |line|
|
@@ -31,26 +36,55 @@ class SQA
|
|
31
36
|
next if entry_skip? entry
|
32
37
|
list << entry.first
|
33
38
|
end
|
34
|
-
list
|
39
|
+
nodes_hash list
|
40
|
+
rescue => error
|
41
|
+
Log.warn "#{error.class} raised with message '#{error.message}' while generating nodes list"
|
42
|
+
@all
|
43
|
+
end
|
44
|
+
|
45
|
+
def nodes_hash ips, file=CFG.nodes_json
|
46
|
+
nodes = {}
|
47
|
+
json = JSON.load File.read(file)
|
48
|
+
json['results']['nodes'].each do |node|
|
49
|
+
addr = CFG.ipv6? ? node['ipv6'] : node['ipv4']
|
50
|
+
next unless ips.include? addr
|
51
|
+
nodes[addr] = node
|
52
|
+
end
|
53
|
+
json_to_nodes_hash nodes
|
35
54
|
end
|
36
55
|
|
56
|
+
def json_to_nodes_hash from_json
|
57
|
+
nodes= {}
|
58
|
+
from_json.each do |ip, json|
|
59
|
+
node = {
|
60
|
+
name: json['hostname'],
|
61
|
+
ip: ip,
|
62
|
+
as: json['asn'],
|
63
|
+
cc: json['countrycode'],
|
64
|
+
}
|
65
|
+
next if CFG.host.name == node[:name]
|
66
|
+
nodes[ip] = node
|
67
|
+
end
|
68
|
+
nodes
|
69
|
+
end
|
70
|
+
|
71
|
+
|
37
72
|
def entry_skip? entry
|
38
73
|
return true unless entry.size > 2
|
39
|
-
return true if entry.first.match
|
74
|
+
return true if entry.first.match(/^\s*#/)
|
40
75
|
return true if CFG.hosts.ignore.any? { |re| entry[2].match Regexp.new(re) }
|
41
76
|
return true unless CFG.hosts.load.any? { |re| entry[2].match Regexp.new(re) }
|
42
77
|
|
43
78
|
address = IPAddr.new(entry.first) rescue (return true)
|
44
79
|
if CFG.ipv6?
|
45
80
|
return true if address.ipv4?
|
46
|
-
return true if address == IPAddr.new(CFG.
|
81
|
+
return true if address == IPAddr.new(CFG.host.ipv6)
|
47
82
|
else
|
48
83
|
return true if address.ipv6?
|
49
|
-
return true if address == IPAddr.new(CFG.
|
84
|
+
return true if address == IPAddr.new(CFG.host.ipv4)
|
50
85
|
end
|
51
86
|
false
|
52
87
|
end
|
53
|
-
|
54
88
|
end
|
55
89
|
|
56
90
|
end
|
data/lib/ring/sqa/paste.rb
CHANGED
data/lib/ring/sqa/poller.rb
CHANGED
data/ring-sqa.gemspec
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ring-sqa
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.20
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Saku Ytti
|
@@ -103,6 +103,7 @@ files:
|
|
103
103
|
- lib/ring/sqa/alarm.rb
|
104
104
|
- lib/ring/sqa/alarm/cfg.rb
|
105
105
|
- lib/ring/sqa/alarm/email.rb
|
106
|
+
- lib/ring/sqa/alarm/message.rb
|
106
107
|
- lib/ring/sqa/alarm/udp2irc.rb
|
107
108
|
- lib/ring/sqa/analyzer.rb
|
108
109
|
- lib/ring/sqa/cfg.rb
|
@@ -113,7 +114,6 @@ files:
|
|
113
114
|
- lib/ring/sqa/log.rb
|
114
115
|
- lib/ring/sqa/mtr.rb
|
115
116
|
- lib/ring/sqa/nodes.rb
|
116
|
-
- lib/ring/sqa/nodes_json.rb
|
117
117
|
- lib/ring/sqa/paste.rb
|
118
118
|
- lib/ring/sqa/poller.rb
|
119
119
|
- lib/ring/sqa/poller/receiver.rb
|
data/lib/ring/sqa/nodes_json.rb
DELETED
@@ -1,38 +0,0 @@
|
|
1
|
-
require 'json'
|
2
|
-
|
3
|
-
module Ring
|
4
|
-
class SQA
|
5
|
-
|
6
|
-
class NodesJSON
|
7
|
-
def get node
|
8
|
-
json = (@nodes[node] or {})
|
9
|
-
{
|
10
|
-
name: json['hostname'],
|
11
|
-
ip: node,
|
12
|
-
as: json['asn'],
|
13
|
-
cc: json['countrycode'],
|
14
|
-
}
|
15
|
-
rescue
|
16
|
-
{}
|
17
|
-
end
|
18
|
-
|
19
|
-
private
|
20
|
-
|
21
|
-
def initialize
|
22
|
-
@file = CFG.nodes_json
|
23
|
-
@nodes = (load_json rescue {})
|
24
|
-
end
|
25
|
-
|
26
|
-
def load_json
|
27
|
-
nodes = {}
|
28
|
-
json = JSON.load File.read(@file)
|
29
|
-
json['results']['nodes'].each do |node|
|
30
|
-
addr = CFG.ipv6? ? node['ipv6'] : node['ipv4']
|
31
|
-
nodes[addr] = node
|
32
|
-
end
|
33
|
-
nodes
|
34
|
-
end
|
35
|
-
end
|
36
|
-
|
37
|
-
end
|
38
|
-
end
|