ring-sqa 0.0.19 → 0.0.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Rakefile +1 -0
- data/lib/ring/sqa/alarm.rb +6 -39
- data/lib/ring/sqa/alarm/message.rb +44 -0
- data/lib/ring/sqa/analyzer.rb +2 -2
- data/lib/ring/sqa/cfg.rb +3 -2
- data/lib/ring/sqa/nodes.rb +44 -10
- data/lib/ring/sqa/paste.rb +2 -2
- data/lib/ring/sqa/poller.rb +2 -1
- data/lib/ring/sqa/poller/sender.rb +1 -1
- data/ring-sqa.gemspec +1 -1
- metadata +2 -2
- data/lib/ring/sqa/nodes_json.rb +0 -38
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: caa9122c18e3a803a62729778308c5af962fdfab
|
4
|
+
data.tar.gz: 877437aef04803f4baf0f9603e938f46b886445d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b0fde0350ff73add7549347cb05b493a01bdb98f8cac9669616c10c4c53045ac32ed8e4e52c63654d1470810bf50d1260556503b0bc95376a1648b67fc2f290a
|
7
|
+
data.tar.gz: bded7fb091e59c8706412efabe163ccb7e72516fe6feaf0ef6b9eeb1a469561d7afe05f1da09a32fed4b34e5d82752fabf777e6706f4ccfd1b75a9fa04811756
|
data/Rakefile
CHANGED
data/lib/ring/sqa/alarm.rb
CHANGED
@@ -1,9 +1,9 @@
|
|
1
1
|
require_relative 'alarm/email'
|
2
2
|
require_relative 'alarm/udp2irc'
|
3
3
|
require_relative 'alarm/cfg'
|
4
|
+
require_relative 'alarm/message'
|
4
5
|
require_relative 'mtr'
|
5
6
|
require_relative 'paste'
|
6
|
-
require_relative 'nodes_json'
|
7
7
|
|
8
8
|
module Ring
|
9
9
|
class SQA
|
@@ -30,19 +30,19 @@ class SQA
|
|
30
30
|
|
31
31
|
private
|
32
32
|
|
33
|
-
def initialize
|
33
|
+
def initialize nodes
|
34
|
+
@nodes = nodes
|
34
35
|
@methods = []
|
35
36
|
@methods << Email.new if CFG.email.to?
|
36
37
|
@methods << UDP2IRC.new if CFG.irc.password?
|
38
|
+
@hostname = Ring::SQA::CFG.host.name
|
37
39
|
@alarm = false
|
38
|
-
@hostname = (Socket.gethostname rescue 'anonymous')
|
39
40
|
end
|
40
41
|
|
41
42
|
def compose_message alarm_buffer
|
42
43
|
exceeding_nodes = alarm_buffer.exceeding_nodes
|
43
44
|
msg = {short: "#{@hostname}: raising alarm - #{exceeding_nodes.size} new nodes down"}
|
44
|
-
|
45
|
-
exceeding_nodes = exceeding_nodes.map { |node| nodes_json.get node }
|
45
|
+
exceeding_nodes = exceeding_nodes.map { |node| @nodes.get node }
|
46
46
|
|
47
47
|
nodes_list = ''
|
48
48
|
exceeding_nodes.sort_by{ |node| node[:cc] }.each do |node|
|
@@ -64,40 +64,7 @@ class SQA
|
|
64
64
|
time -= 1
|
65
65
|
end
|
66
66
|
|
67
|
-
msg[:long] =
|
68
|
-
Regarding: #{hostname}
|
69
|
-
|
70
|
-
This is an automated alert from the distributed partial outage
|
71
|
-
monitoring system "RING SQA".
|
72
|
-
|
73
|
-
At #{Time.now.utc} the following measurements were analysed
|
74
|
-
as indicating that there is a high probability your NLNOG RING node
|
75
|
-
cannot reach the entire internet. Possible causes could be an outage
|
76
|
-
in your upstream's or peer's network.
|
77
|
-
|
78
|
-
The following #{exceeding_nodes.size} nodes previously were reachable, but became unreachable
|
79
|
-
over the course of the last 3 minutes:
|
80
|
-
|
81
|
-
#{nodes_list}
|
82
|
-
|
83
|
-
As a debug starting point 3 traceroutes were launched right after
|
84
|
-
detecting the event, they might assist in pinpointing what broke:
|
85
|
-
|
86
|
-
#{mtr_list}
|
87
|
-
|
88
|
-
An alarm is raised under the following conditions: every 30 seconds
|
89
|
-
your node pings all other nodes. The amount of nodes that cannot be
|
90
|
-
reached is stored in a circular buffer, with each element representing
|
91
|
-
a minute of measurements. In the event that the last three minutes are
|
92
|
-
#{Ring::SQA::CFG.analyzer.tolerance} above the median of the previous 27 measurement slots, a partial
|
93
|
-
outage is assumed. The ring buffer's output is as following:
|
94
|
-
|
95
|
-
#{buffer_list}
|
96
|
-
|
97
|
-
Kind regards,
|
98
|
-
|
99
|
-
NLNOG RING
|
100
|
-
EOF
|
67
|
+
msg[:long] = message nodes_list, mtr_list, buffer_list
|
101
68
|
msg
|
102
69
|
end
|
103
70
|
|
@@ -0,0 +1,44 @@
|
|
1
|
+
module Ring
|
2
|
+
class SQA
|
3
|
+
|
4
|
+
class Alarm
|
5
|
+
def message nodes_list, mtr_list, buffer_list
|
6
|
+
"
|
7
|
+
Regarding: #{Ring::SQA::CFG.host.name}
|
8
|
+
|
9
|
+
This is an automated alert from the distributed partial outage
|
10
|
+
monitoring system 'RING SQA'.
|
11
|
+
|
12
|
+
At #{Time.now.utc} the following measurements were analysed
|
13
|
+
as indicating that there is a high probability your NLNOG RING node
|
14
|
+
cannot reach the entire internet. Possible causes could be an outage
|
15
|
+
in your upstream's or peer's network.
|
16
|
+
|
17
|
+
The following #{nodes_list.size} nodes previously were reachable, but became unreachable
|
18
|
+
over the course of the last 3 minutes:
|
19
|
+
|
20
|
+
#{nodes_list}
|
21
|
+
|
22
|
+
As a debug starting point 3 traceroutes were launched right after
|
23
|
+
detecting the event, they might assist in pinpointing what broke:
|
24
|
+
|
25
|
+
#{mtr_list}
|
26
|
+
|
27
|
+
An alarm is raised under the following conditions: every 30 seconds
|
28
|
+
your node pings all other nodes. The amount of nodes that cannot be
|
29
|
+
reached is stored in a circular buffer, with each element representing
|
30
|
+
a minute of measurements. In the event that the last three minutes are
|
31
|
+
#{Ring::SQA::CFG.analyzer.tolerance} above the median of the previous 27 measurement slots, a partial
|
32
|
+
outage is assumed. The ring buffer's output is as following:
|
33
|
+
|
34
|
+
#{buffer_list}
|
35
|
+
|
36
|
+
Kind regards,
|
37
|
+
|
38
|
+
NLNOG RING
|
39
|
+
"
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
44
|
+
end
|
data/lib/ring/sqa/analyzer.rb
CHANGED
@@ -30,8 +30,8 @@ class SQA
|
|
30
30
|
def initialize database, nodes
|
31
31
|
@db = database
|
32
32
|
@nodes = nodes
|
33
|
-
@alarm = Alarm.new
|
34
|
-
@buffer = AnalyzeBuffer.new @nodes.
|
33
|
+
@alarm = Alarm.new @nodes
|
34
|
+
@buffer = AnalyzeBuffer.new @nodes.all.size
|
35
35
|
@db_id_seen = 0
|
36
36
|
end
|
37
37
|
end
|
data/lib/ring/sqa/cfg.rb
CHANGED
@@ -27,8 +27,9 @@ module Ring
|
|
27
27
|
|
28
28
|
CFG = Config.cfg
|
29
29
|
|
30
|
-
CFG.
|
31
|
-
CFG.
|
30
|
+
CFG.host.name = Socket.gethostname
|
31
|
+
CFG.host.ipv4 = Socket::getaddrinfo(CFG.host.name,"echo",Socket::AF_INET)[0][3]
|
32
|
+
CFG.host.ipv6 = Socket::getaddrinfo(CFG.host.name,"echo",Socket::AF_INET6)[0][3]
|
32
33
|
|
33
34
|
raise NoConfig, 'edit /etc/ring-sqa/main.conf' if Config.create
|
34
35
|
end
|
data/lib/ring/sqa/nodes.rb
CHANGED
@@ -1,29 +1,34 @@
|
|
1
1
|
require 'rb-inotify'
|
2
2
|
require 'ipaddr'
|
3
|
+
require 'json'
|
3
4
|
|
4
5
|
module Ring
|
5
6
|
class SQA
|
6
7
|
|
7
8
|
class Nodes
|
8
|
-
FILE
|
9
|
-
attr_reader :
|
9
|
+
FILE = '/etc/hosts'
|
10
|
+
attr_reader :all
|
10
11
|
|
11
12
|
def run
|
12
13
|
Thread.new { @inotify.run }
|
13
14
|
end
|
14
15
|
|
16
|
+
def get node
|
17
|
+
(@all[node] or {})
|
18
|
+
end
|
19
|
+
|
15
20
|
private
|
16
21
|
|
17
22
|
def initialize
|
18
|
-
@
|
23
|
+
@all = read_nodes
|
19
24
|
@inotify = INotify::Notifier.new
|
20
25
|
@inotify.watch(File.dirname(FILE), :modify, :create) do |event|
|
21
|
-
@
|
26
|
+
@all = read_nodes if event.name == FILE.split('/').last
|
22
27
|
end
|
23
28
|
run
|
24
29
|
end
|
25
30
|
|
26
|
-
def
|
31
|
+
def read_nodes
|
27
32
|
Log.info "loading #{FILE}"
|
28
33
|
list = []
|
29
34
|
File.read(FILE).lines.each do |line|
|
@@ -31,26 +36,55 @@ class SQA
|
|
31
36
|
next if entry_skip? entry
|
32
37
|
list << entry.first
|
33
38
|
end
|
34
|
-
list
|
39
|
+
nodes_hash list
|
40
|
+
rescue => error
|
41
|
+
Log.warn "#{error.class} raised with message '#{error.message}' while generating nodes list"
|
42
|
+
@all
|
43
|
+
end
|
44
|
+
|
45
|
+
def nodes_hash ips, file=CFG.nodes_json
|
46
|
+
nodes = {}
|
47
|
+
json = JSON.load File.read(file)
|
48
|
+
json['results']['nodes'].each do |node|
|
49
|
+
addr = CFG.ipv6? ? node['ipv6'] : node['ipv4']
|
50
|
+
next unless ips.include? addr
|
51
|
+
nodes[addr] = node
|
52
|
+
end
|
53
|
+
json_to_nodes_hash nodes
|
35
54
|
end
|
36
55
|
|
56
|
+
def json_to_nodes_hash from_json
|
57
|
+
nodes= {}
|
58
|
+
from_json.each do |ip, json|
|
59
|
+
node = {
|
60
|
+
name: json['hostname'],
|
61
|
+
ip: ip,
|
62
|
+
as: json['asn'],
|
63
|
+
cc: json['countrycode'],
|
64
|
+
}
|
65
|
+
next if CFG.host.name == node[:name]
|
66
|
+
nodes[ip] = node
|
67
|
+
end
|
68
|
+
nodes
|
69
|
+
end
|
70
|
+
|
71
|
+
|
37
72
|
def entry_skip? entry
|
38
73
|
return true unless entry.size > 2
|
39
|
-
return true if entry.first.match
|
74
|
+
return true if entry.first.match(/^\s*#/)
|
40
75
|
return true if CFG.hosts.ignore.any? { |re| entry[2].match Regexp.new(re) }
|
41
76
|
return true unless CFG.hosts.load.any? { |re| entry[2].match Regexp.new(re) }
|
42
77
|
|
43
78
|
address = IPAddr.new(entry.first) rescue (return true)
|
44
79
|
if CFG.ipv6?
|
45
80
|
return true if address.ipv4?
|
46
|
-
return true if address == IPAddr.new(CFG.
|
81
|
+
return true if address == IPAddr.new(CFG.host.ipv6)
|
47
82
|
else
|
48
83
|
return true if address.ipv6?
|
49
|
-
return true if address == IPAddr.new(CFG.
|
84
|
+
return true if address == IPAddr.new(CFG.host.ipv4)
|
50
85
|
end
|
51
86
|
false
|
52
87
|
end
|
53
|
-
|
54
88
|
end
|
55
89
|
|
56
90
|
end
|
data/lib/ring/sqa/paste.rb
CHANGED
data/lib/ring/sqa/poller.rb
CHANGED
data/ring-sqa.gemspec
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ring-sqa
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.20
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Saku Ytti
|
@@ -103,6 +103,7 @@ files:
|
|
103
103
|
- lib/ring/sqa/alarm.rb
|
104
104
|
- lib/ring/sqa/alarm/cfg.rb
|
105
105
|
- lib/ring/sqa/alarm/email.rb
|
106
|
+
- lib/ring/sqa/alarm/message.rb
|
106
107
|
- lib/ring/sqa/alarm/udp2irc.rb
|
107
108
|
- lib/ring/sqa/analyzer.rb
|
108
109
|
- lib/ring/sqa/cfg.rb
|
@@ -113,7 +114,6 @@ files:
|
|
113
114
|
- lib/ring/sqa/log.rb
|
114
115
|
- lib/ring/sqa/mtr.rb
|
115
116
|
- lib/ring/sqa/nodes.rb
|
116
|
-
- lib/ring/sqa/nodes_json.rb
|
117
117
|
- lib/ring/sqa/paste.rb
|
118
118
|
- lib/ring/sqa/poller.rb
|
119
119
|
- lib/ring/sqa/poller/receiver.rb
|
data/lib/ring/sqa/nodes_json.rb
DELETED
@@ -1,38 +0,0 @@
|
|
1
|
-
require 'json'
|
2
|
-
|
3
|
-
module Ring
|
4
|
-
class SQA
|
5
|
-
|
6
|
-
class NodesJSON
|
7
|
-
def get node
|
8
|
-
json = (@nodes[node] or {})
|
9
|
-
{
|
10
|
-
name: json['hostname'],
|
11
|
-
ip: node,
|
12
|
-
as: json['asn'],
|
13
|
-
cc: json['countrycode'],
|
14
|
-
}
|
15
|
-
rescue
|
16
|
-
{}
|
17
|
-
end
|
18
|
-
|
19
|
-
private
|
20
|
-
|
21
|
-
def initialize
|
22
|
-
@file = CFG.nodes_json
|
23
|
-
@nodes = (load_json rescue {})
|
24
|
-
end
|
25
|
-
|
26
|
-
def load_json
|
27
|
-
nodes = {}
|
28
|
-
json = JSON.load File.read(@file)
|
29
|
-
json['results']['nodes'].each do |node|
|
30
|
-
addr = CFG.ipv6? ? node['ipv6'] : node['ipv4']
|
31
|
-
nodes[addr] = node
|
32
|
-
end
|
33
|
-
nodes
|
34
|
-
end
|
35
|
-
end
|
36
|
-
|
37
|
-
end
|
38
|
-
end
|