ring-sqa 0.0.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile +2 -0
- data/README.md +18 -0
- data/Rakefile +46 -0
- data/bin/ring-sqad +9 -0
- data/lib/ring/sqa/alarm/cfg.rb +24 -0
- data/lib/ring/sqa/alarm/email.rb +45 -0
- data/lib/ring/sqa/alarm/udp2irc.rb +23 -0
- data/lib/ring/sqa/alarm.rb +94 -0
- data/lib/ring/sqa/analyzer.rb +70 -0
- data/lib/ring/sqa/cfg.rb +34 -0
- data/lib/ring/sqa/cli.rb +55 -0
- data/lib/ring/sqa/core.rb +29 -0
- data/lib/ring/sqa/database/model.rb +18 -0
- data/lib/ring/sqa/database.rb +56 -0
- data/lib/ring/sqa/log.rb +18 -0
- data/lib/ring/sqa/mtr.rb +39 -0
- data/lib/ring/sqa/nodes.rb +56 -0
- data/lib/ring/sqa/nodes_json.rb +32 -0
- data/lib/ring/sqa/poller/receiver.rb +28 -0
- data/lib/ring/sqa/poller/responder.rb +27 -0
- data/lib/ring/sqa/poller/sender.rb +49 -0
- data/lib/ring/sqa/poller.rb +27 -0
- data/lib/ring/sqa.rb +7 -0
- data/ring-sqa.gemspec +22 -0
- metadata +145 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: caf8e25b1a61b065566c3e4d76ccc9376fc84f2b
|
4
|
+
data.tar.gz: a468d0d59a4a8cd7a590942257d7d58c94be1b4d
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 29d354abb4af2ff8e85ac70b37651769641dd6cbb14ab467d00a0a87f473971a95a733f6ddd3171603ac9067afd2db5f92f97ed2cb515f1232601f74f2062d3e
|
7
|
+
data.tar.gz: d3daaf6398f7e6958b28134ddec2550baa11ab7ef6a3c48c1988b9dfc2600ce60eb0db1f26d12f819da717cf6714a3e47762f5f8bc82daabfdde801b6306e398
|
data/Gemfile
ADDED
data/README.md
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
# Ring SQA
|
2
|
+
Discovers NLNOG Ring nodes by monitoring /etc/hosts with inotify. UDP pings
|
3
|
+
each node periodically recording latency as microseconds in SQL database
|
4
|
+
|
5
|
+
Currently 4 threads
|
6
|
+
|
7
|
+
1. main thread, launches everything and finally gives control to Analyze class
|
8
|
+
2. querier thread, sends queries and waits for responses, populates database
|
9
|
+
3. responder thread, waits for queries and echoes them back
|
10
|
+
4. inotify monitor thread
|
11
|
+
|
12
|
+
## Use
|
13
|
+
ring-sqad --help
|
14
|
+
ring-sqad --daemonize
|
15
|
+
|
16
|
+
## Todo
|
17
|
+
1. Querier loop should sleep dynamically between nodes to spread CPU/network demand
|
18
|
+
2. Analyzer class should actually do something (use average of numbers before median as norm, if last Y measurements are Z times above norm (or more than X standard deviations?) raise alarm?
|
data/Rakefile
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
begin
|
2
|
+
require 'rake/testtask'
|
3
|
+
require 'bundler'
|
4
|
+
# Bundler.setup
|
5
|
+
rescue LoadError
|
6
|
+
warn 'bunler missing'
|
7
|
+
end
|
8
|
+
|
9
|
+
gemspec = eval(File.read(Dir['*.gemspec'].first))
|
10
|
+
file = [gemspec.name, gemspec.version].join('-') + '.gem'
|
11
|
+
|
12
|
+
desc 'Validate gemspec'
|
13
|
+
task :gemspec do
|
14
|
+
gemspec.validate
|
15
|
+
end
|
16
|
+
|
17
|
+
desc 'Run minitest'
|
18
|
+
task :test do
|
19
|
+
Rake::TestTask.new do |t|
|
20
|
+
t.libs.push "lib"
|
21
|
+
t.test_files = FileList['spec/*_spec.rb']
|
22
|
+
t.verbose = true
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
desc 'Build gem'
|
27
|
+
task :build do
|
28
|
+
system "gem build #{gemspec.name}.gemspec"
|
29
|
+
FileUtils.mkdir_p 'gems'
|
30
|
+
FileUtils.mv file, 'gems'
|
31
|
+
end
|
32
|
+
|
33
|
+
desc 'Install gem'
|
34
|
+
task :install => :build do
|
35
|
+
system "sudo -Es sh -c \'umask 022; gem install gems/#{file}\'"
|
36
|
+
end
|
37
|
+
|
38
|
+
desc 'Remove gems'
|
39
|
+
task :clean do
|
40
|
+
FileUtils.rm_rf 'gems'
|
41
|
+
end
|
42
|
+
|
43
|
+
desc 'Push to rubygems'
|
44
|
+
task :push do
|
45
|
+
system "gem push gems/#{file}"
|
46
|
+
end
|
data/bin/ring-sqad
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
module Ring
|
2
|
+
class SQA
|
3
|
+
|
4
|
+
class Alarm
|
5
|
+
Config = Asetus.new name: 'sqa', load: false, usrdir: Directory, cfgfile: 'alarm.conf'
|
6
|
+
Config.default.email.to = false
|
7
|
+
Config.default.email.from = 'foo@example.com'
|
8
|
+
Config.default.email.prefix = false
|
9
|
+
Config.default.irc.host = '213.136.8.179'
|
10
|
+
Config.default.irc.port = 5502
|
11
|
+
Config.default.irc.password = 'shough2oChoo'
|
12
|
+
Config.default.irc.channel = '#ring'
|
13
|
+
|
14
|
+
begin
|
15
|
+
Config.load
|
16
|
+
rescue => error
|
17
|
+
raise InvalidConfig, "Error loading alarm.conf configuration: #{error.message}"
|
18
|
+
end
|
19
|
+
CFG = Config.cfg
|
20
|
+
Config.create
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
require 'net/smtp'
|
2
|
+
|
3
|
+
module Ring
|
4
|
+
class SQA
|
5
|
+
class Alarm
|
6
|
+
|
7
|
+
class Email
|
8
|
+
SERVER = 'localhost'
|
9
|
+
|
10
|
+
def send msg
|
11
|
+
@from = CFG.email.from
|
12
|
+
@to = [CFG.email.to].flatten
|
13
|
+
prefix = CFG.email.prefix? ? CFG.email.prefix : ''
|
14
|
+
@subject = prefix + msg[:short]
|
15
|
+
@body = msg[:long]
|
16
|
+
send_email compose_email
|
17
|
+
end
|
18
|
+
|
19
|
+
private
|
20
|
+
|
21
|
+
def initialize
|
22
|
+
end
|
23
|
+
|
24
|
+
def compose_email
|
25
|
+
mail = []
|
26
|
+
mail << 'From: ' + @from
|
27
|
+
mail << 'To: ' + @to.join(', ')
|
28
|
+
mail << 'Subject: ' + @subject
|
29
|
+
mail << 'List-Id: ' + 'ring-sqa <sqa.ring.nlnog.net>'
|
30
|
+
mail << 'X-Mailer: ' + 'ring-sqa'
|
31
|
+
mail << ''
|
32
|
+
mail = mail.join("\n")
|
33
|
+
mail+@body
|
34
|
+
end
|
35
|
+
|
36
|
+
def send_email email
|
37
|
+
Net::SMTP.start('localhost') do |smtp|
|
38
|
+
smtp.send_message email, @from, @to
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module Ring
|
2
|
+
class SQA
|
3
|
+
class Alarm
|
4
|
+
|
5
|
+
class UDP2IRC
|
6
|
+
def send message, channel=CFG.irc.channel
|
7
|
+
msg = [@password, channel, message[:short]].join ' '
|
8
|
+
msg += "\0" while msg.size % 16 > 0
|
9
|
+
UDPSocket.new.send msg, 0, HOST, PORT
|
10
|
+
end
|
11
|
+
|
12
|
+
private
|
13
|
+
|
14
|
+
def initialize host=CFG.irc.host, port=CFG.irc.port, password=CFG.irc.password
|
15
|
+
@host = host
|
16
|
+
@port = port
|
17
|
+
@password = password
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,94 @@
|
|
1
|
+
require_relative 'alarm/email'
|
2
|
+
require_relative 'alarm/udp2irc'
|
3
|
+
require_relative 'alarm/cfg'
|
4
|
+
require_relative 'mtr'
|
5
|
+
require_relative 'nodes_json'
|
6
|
+
|
7
|
+
module Ring
|
8
|
+
class SQA
|
9
|
+
|
10
|
+
class Alarm
|
11
|
+
def set alarm_buffer
|
12
|
+
if @alarm == false
|
13
|
+
@alarm = true
|
14
|
+
msg = compose_message alarm_buffer
|
15
|
+
Log.info msg[:short]
|
16
|
+
@methods.each { |alarm_method| alarm_method.send msg }
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def clear
|
21
|
+
if @alarm == true
|
22
|
+
@alarm = false
|
23
|
+
msg = { short: "#{@hostname}: clearing alarm" }
|
24
|
+
msg[:long] = msg[:short]
|
25
|
+
Log.info msg[:short]
|
26
|
+
@methods.each { |alarm_method| alarm_method.send msg }
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
|
32
|
+
def initialize database
|
33
|
+
@db = database
|
34
|
+
@methods = []
|
35
|
+
@methods << Email.new if CFG.email.to?
|
36
|
+
@methods << UDP2IRC.new if CFG.irc.password?
|
37
|
+
@alarm = false
|
38
|
+
@hostname = (Socket.gethostname rescue 'anonymous')
|
39
|
+
end
|
40
|
+
|
41
|
+
def compose_message alarm_buffer
|
42
|
+
exceeding_nodes = alarm_buffer.exceeding_nodes
|
43
|
+
msg = {short: "#{@hostname}: raising alarm - #{exceeding_nodes.size} new nodes down"}
|
44
|
+
nodes = NodesJSON.new
|
45
|
+
|
46
|
+
nodes_list = ''
|
47
|
+
exceeding_nodes.each do |node|
|
48
|
+
json = nodes.get node
|
49
|
+
nodes_list << "- %-30s %14s AS%5s %2s\n" % [json['hostname'], node, json['asn'], json['countrycode']]
|
50
|
+
end
|
51
|
+
|
52
|
+
mtr_list = ''
|
53
|
+
exceeding_nodes.sample(3).each do |node|
|
54
|
+
json = nodes.get node
|
55
|
+
mtr_list << "%-30s AS%5s (%2s)\n" % [json['hostname'], json['asn'], json['countrycode']]
|
56
|
+
mtr_list << MTR.run(node)
|
57
|
+
mtr_list << "\n"
|
58
|
+
end
|
59
|
+
|
60
|
+
buffer_list = ''
|
61
|
+
time = alarm_buffer.size-1
|
62
|
+
alarm_buffer.array.each do |ary|
|
63
|
+
buffer_list << "%2s min ago %3s measurements failed\n" % [time, ary.size/2]
|
64
|
+
time -= 1
|
65
|
+
end
|
66
|
+
|
67
|
+
msg[:long] = <<EOF
|
68
|
+
This is an automated alert from the distributed partial outage monitoring system "RING SQA".
|
69
|
+
|
70
|
+
At #{Time.now.utc} the following measurements were analysed as indicating that there is a high probability your NLNOG RING node cannot reach the entire internet. Possible causes could be an outage in your upstream's or peer's network.
|
71
|
+
|
72
|
+
The following nodes previously were reachable, but became unreachable over the course of the last 3 minutes:
|
73
|
+
|
74
|
+
#{nodes_list}
|
75
|
+
|
76
|
+
As a debug starting point 3 traceroutes were launched right after detecting the event, they might assist in pinpointing what broke:
|
77
|
+
|
78
|
+
#{mtr_list}
|
79
|
+
|
80
|
+
An alarm is raised under the following conditions: every 30 seconds your node pings all other nodes. The amount of nodes that cannot be reached is stored in a circular buffer, with each element representing a minute of measurements. In the event that the last three minutes are #{Ring::SQA::CFG.analyzer.tolerance} above the median of the previous 27 measurement slots, a partial outage is assumed. The ring buffer's output is as following:
|
81
|
+
|
82
|
+
#{buffer_list}
|
83
|
+
|
84
|
+
Kind regards,
|
85
|
+
|
86
|
+
NLNOG RING
|
87
|
+
EOF
|
88
|
+
msg
|
89
|
+
end
|
90
|
+
|
91
|
+
end
|
92
|
+
|
93
|
+
end
|
94
|
+
end
|
@@ -0,0 +1,70 @@
|
|
1
|
+
require_relative 'alarm'
|
2
|
+
|
3
|
+
module Ring
|
4
|
+
class SQA
|
5
|
+
|
6
|
+
class Analyzer
|
7
|
+
INTERVAL = 60 # how often to run analyze loop
|
8
|
+
INFLIGHT_WAIT = 1 # how long to wait for inflight records
|
9
|
+
def run
|
10
|
+
sleep INTERVAL
|
11
|
+
loop do
|
12
|
+
start = Time.now
|
13
|
+
@db.purge
|
14
|
+
@db_id_seen, records = @db.nodes_down(@db_id_seen+1)
|
15
|
+
sleep INFLIGHT_WAIT
|
16
|
+
records = records.all
|
17
|
+
@buffer.push records.map { |record| record.peer }
|
18
|
+
@buffer.exceed_median? ? @alarm.set(@buffer) : @alarm.clear
|
19
|
+
delay = INTERVAL-(Time.now-start)
|
20
|
+
if delay > 0
|
21
|
+
sleep delay
|
22
|
+
else
|
23
|
+
Log.error "Analyzer loop took longer than #{INTERVAL}, wanted to sleep for #{delay}s"
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
private
|
29
|
+
|
30
|
+
def initialize database, nodes
|
31
|
+
@db = database
|
32
|
+
@nodes = nodes
|
33
|
+
@alarm = Alarm.new @db
|
34
|
+
@buffer = AnalyzeBuffer.new
|
35
|
+
@db_id_seen = 0
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
class AnalyzeBuffer
|
40
|
+
attr_reader :array
|
41
|
+
def initialize max_size=30
|
42
|
+
@max_size = max_size
|
43
|
+
init_nodes = Array.new 99, ''
|
44
|
+
@array = Array.new max_size, init_nodes
|
45
|
+
end
|
46
|
+
def push e
|
47
|
+
@array.shift
|
48
|
+
@array.push e
|
49
|
+
end
|
50
|
+
def median of_first=27
|
51
|
+
of_first = of_first-1
|
52
|
+
middle = of_first/2
|
53
|
+
node_count[0..of_first].sort[middle]
|
54
|
+
end
|
55
|
+
def exceed_median? last=3, tolerance=CFG.analyzer.tolerance
|
56
|
+
first = @max_size-last
|
57
|
+
violate = (median+1)*tolerance
|
58
|
+
node_count[first..-1].all? { |e| e > violate }
|
59
|
+
end
|
60
|
+
def node_count
|
61
|
+
@array.map { |nodes| nodes.size }
|
62
|
+
end
|
63
|
+
def exceeding_nodes
|
64
|
+
exceed = @array[27] & @array[28] & @array[29]
|
65
|
+
exceed - @array[0..26].flatten.uniq
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
end
|
70
|
+
end
|
data/lib/ring/sqa/cfg.rb
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
require 'asetus'
|
2
|
+
|
3
|
+
module Ring
|
4
|
+
class SQA
|
5
|
+
Directory = '/etc/ring-sqa'
|
6
|
+
class InvalidConfig < StandardError; end
|
7
|
+
class NoConfig < StandardError; end
|
8
|
+
|
9
|
+
Config = Asetus.new name: 'sqa', load: false, usrdir: Directory, cfgfile: 'main.conf'
|
10
|
+
Config.default.directory = Directory
|
11
|
+
Config.default.debug = false
|
12
|
+
Config.default.hosts.load = %w( ring.nlnog.net )
|
13
|
+
Config.default.hosts.ignore = %w( infra.ring.nlnog.net )
|
14
|
+
Config.default.port = 'ring'.to_i(36)/100
|
15
|
+
Config.default.analyzer.tolerance = 1.2
|
16
|
+
Config.default.nodes_json = '/etc/ring/nodes.json'
|
17
|
+
Config.default.mtr.args = '-i0.5 -c5 -r -w -n'
|
18
|
+
Config.default.mtr.timeout = 15
|
19
|
+
Config.default.ram_database = false
|
20
|
+
|
21
|
+
begin
|
22
|
+
Config.load
|
23
|
+
rescue => error
|
24
|
+
raise InvalidConfig, "Error loading configuration: #{error.message}"
|
25
|
+
end
|
26
|
+
|
27
|
+
CFG = Config.cfg
|
28
|
+
|
29
|
+
CFG.bind.ipv4 = Socket::getaddrinfo(Socket.gethostname,"echo",Socket::AF_INET)[0][3]
|
30
|
+
CFG.bind.ipv6 = Socket::getaddrinfo(Socket.gethostname,"echo",Socket::AF_INET6)[0][3]
|
31
|
+
|
32
|
+
raise NoConfig, 'edit /etc/ring-sqa/main.conf' if Config.create
|
33
|
+
end
|
34
|
+
end
|
data/lib/ring/sqa/cli.rb
ADDED
@@ -0,0 +1,55 @@
|
|
1
|
+
require 'slop'
|
2
|
+
require 'ring/sqa'
|
3
|
+
|
4
|
+
module Ring
|
5
|
+
class SQA
|
6
|
+
|
7
|
+
class CLI
|
8
|
+
attr_reader :opts
|
9
|
+
|
10
|
+
def run
|
11
|
+
pid = $$
|
12
|
+
puts "Running as pid: #{pid}"
|
13
|
+
Process.daemon if @opts.daemonize?
|
14
|
+
SQA.new
|
15
|
+
rescue => error
|
16
|
+
crash error
|
17
|
+
raise
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
def initialize
|
23
|
+
_args, @opts = opts_parse
|
24
|
+
CFG.debug = @opts.debug?
|
25
|
+
CFG.ipv6 = @opts.ipv6?
|
26
|
+
require_relative 'log'
|
27
|
+
Log.level = Logger::DEBUG if @opts.debug?
|
28
|
+
run
|
29
|
+
end
|
30
|
+
|
31
|
+
def opts_parse
|
32
|
+
slop = Slop.new(:help=>true) do
|
33
|
+
banner 'Usage: ring-sqad [options]'
|
34
|
+
on 'd', '--debug', 'turn on debugging'
|
35
|
+
on '6', '--ipv6', 'use ipv6 instead of ipv4'
|
36
|
+
on '--daemonize', 'run in background'
|
37
|
+
end
|
38
|
+
[slop.parse!, slop]
|
39
|
+
end
|
40
|
+
|
41
|
+
def crash error
|
42
|
+
file = File.join CFG.directory, 'crash.txt'
|
43
|
+
open file, 'w' do |file|
|
44
|
+
file.puts error.class.to_s + ' => ' + error.message
|
45
|
+
file.puts '-' * 70
|
46
|
+
file.puts error.backtrace
|
47
|
+
file.puts '-' * 70
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
52
|
+
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
@@ -0,0 +1,29 @@
|
|
1
|
+
require 'pp'
|
2
|
+
require 'socket'
|
3
|
+
require_relative 'cfg'
|
4
|
+
require_relative 'database'
|
5
|
+
require_relative 'poller'
|
6
|
+
require_relative 'analyzer'
|
7
|
+
require_relative 'nodes'
|
8
|
+
|
9
|
+
module Ring
|
10
|
+
class SQA
|
11
|
+
def run
|
12
|
+
Thread.abort_on_exception = true
|
13
|
+
Thread.new { Responder.new }
|
14
|
+
Thread.new { Sender.new @database, @nodes }
|
15
|
+
Thread.new { Receiver.new @database }
|
16
|
+
Analyzer.new(@database, @nodes).run
|
17
|
+
end
|
18
|
+
|
19
|
+
private
|
20
|
+
|
21
|
+
def initialize
|
22
|
+
require_relative 'log'
|
23
|
+
@database = Database.new
|
24
|
+
@nodes = Nodes.new
|
25
|
+
run
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module Ring
|
2
|
+
class SQA
|
3
|
+
class Database
|
4
|
+
|
5
|
+
class Ping < Sequel::Model
|
6
|
+
set_schema do
|
7
|
+
primary_key :id
|
8
|
+
Fixnum :time
|
9
|
+
String :peer
|
10
|
+
Fixnum :latency
|
11
|
+
String :result
|
12
|
+
end
|
13
|
+
create_table unless table_exists?
|
14
|
+
end
|
15
|
+
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
require 'sequel'
|
2
|
+
require 'sqlite3'
|
3
|
+
|
4
|
+
module Ring
|
5
|
+
class SQA
|
6
|
+
|
7
|
+
class Database
|
8
|
+
def add record
|
9
|
+
record[:time] = Time.now.utc.to_i
|
10
|
+
record[:latency] = nil
|
11
|
+
record[:result] = 'no response'
|
12
|
+
Log.debug "adding '#{record}' to database" if CFG.debug?
|
13
|
+
Ping.new(record).save
|
14
|
+
end
|
15
|
+
|
16
|
+
def update record_id, result, latency=nil
|
17
|
+
if record = Ping[record_id]
|
18
|
+
Log.debug "updating record_id '#{record_id}' with result '#{result}' and latency '#{latency}'" if CFG.debug?
|
19
|
+
record.update(:result=>result, :latency=>latency)
|
20
|
+
else
|
21
|
+
Log.error "wanted to update record_id #{record_id}, but it does not exist"
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def nodes_down first_id
|
26
|
+
max_id = (Ping.max(:id) or first_id)
|
27
|
+
[max_id, Ping.distinct.where(:id=>first_id..max_id).exclude(:result => 'ok')]
|
28
|
+
end
|
29
|
+
|
30
|
+
def up_since? id, peer
|
31
|
+
Ping.where{id > id}.where(:peer=>peer).count > 0
|
32
|
+
end
|
33
|
+
|
34
|
+
def purge older_than=3600
|
35
|
+
Ping.where{time < (Time.now.utc-older_than).to_i}.delete
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
def initialize
|
41
|
+
Sequel::Model.plugin :schema
|
42
|
+
sequel_opts = { max_connections: 3, pool_timout: 60 }
|
43
|
+
if CFG.ram_database?
|
44
|
+
@db = Sequel.sqlite sequel_opts
|
45
|
+
else
|
46
|
+
file = CFG.ipv6? ? 'ipv6.db' : 'ipv4.db'
|
47
|
+
file = File.join CFG.directory, file
|
48
|
+
File.unlink file rescue nil # delete old database
|
49
|
+
@db = Sequel.sqlite file, sequel_opts
|
50
|
+
end
|
51
|
+
require_relative 'database/model.rb'
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
end
|
56
|
+
end
|
data/lib/ring/sqa/log.rb
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
module Ring
|
2
|
+
class SQA
|
3
|
+
|
4
|
+
if CFG.debug?
|
5
|
+
require 'logger'
|
6
|
+
Log = Logger.new STDERR
|
7
|
+
else
|
8
|
+
begin
|
9
|
+
require 'syslog/logger'
|
10
|
+
Log = Syslog::Logger.new 'ring-sqad'
|
11
|
+
rescue LoadError
|
12
|
+
require 'logger'
|
13
|
+
Log = Logger.new STDERR
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
end
|
18
|
+
end
|
data/lib/ring/sqa/mtr.rb
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
require 'open3'
|
2
|
+
require 'timeout'
|
3
|
+
|
4
|
+
module Ring
|
5
|
+
class SQA
|
6
|
+
|
7
|
+
class MTR
|
8
|
+
BIN = 'mtr'
|
9
|
+
def self.run host
|
10
|
+
MTR.new.run host
|
11
|
+
end
|
12
|
+
|
13
|
+
def run host, args=nil
|
14
|
+
Timeout::timeout(@timeout) do
|
15
|
+
args ||= CFG.mtr.args.split(' ')
|
16
|
+
mtr host, args
|
17
|
+
end
|
18
|
+
rescue Timeout::Error
|
19
|
+
"MTR runtime exceeded #{@timeout}s"
|
20
|
+
end
|
21
|
+
|
22
|
+
private
|
23
|
+
|
24
|
+
def initialize timeout=CFG.mtr.timeout
|
25
|
+
@timeout = timeout
|
26
|
+
end
|
27
|
+
|
28
|
+
def mtr host, *args
|
29
|
+
out = ''
|
30
|
+
args = [*args, host].flatten
|
31
|
+
Open3.popen3(BIN, *args) do |stdin, stdout, stderr, wait_thr|
|
32
|
+
out << stdout.read until stdout.eof?
|
33
|
+
end
|
34
|
+
out.each_line.to_a[1..-1].join rescue ''
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
require 'rb-inotify'
|
2
|
+
require 'ipaddr'
|
3
|
+
|
4
|
+
module Ring
|
5
|
+
class SQA
|
6
|
+
|
7
|
+
class Nodes
|
8
|
+
FILE = '/etc/hosts'
|
9
|
+
attr_reader :list
|
10
|
+
|
11
|
+
def run
|
12
|
+
Thread.new { @inotify.run }
|
13
|
+
end
|
14
|
+
|
15
|
+
private
|
16
|
+
|
17
|
+
def initialize
|
18
|
+
@list = get_list
|
19
|
+
@inotify = INotify::Notifier.new
|
20
|
+
@inotify.watch(File.dirname(FILE), :modify, :create) do |event|
|
21
|
+
@list = get_list if event.name == FILE.split('/').last
|
22
|
+
end
|
23
|
+
run
|
24
|
+
end
|
25
|
+
|
26
|
+
def get_list
|
27
|
+
list = []
|
28
|
+
File.read(FILE).lines.each do |line|
|
29
|
+
entry = line.split(/\s+/)
|
30
|
+
next if entry_skip? entry
|
31
|
+
list << entry.first
|
32
|
+
end
|
33
|
+
list.sort
|
34
|
+
end
|
35
|
+
|
36
|
+
def entry_skip? entry
|
37
|
+
return true unless entry.size > 2
|
38
|
+
return true if entry.first.match /^\s*#/
|
39
|
+
return true if CFG.hosts.ignore.any? { |re| entry[2].match Regexp.new(re) }
|
40
|
+
return true unless CFG.hosts.load.any? { |re| entry[2].match Regexp.new(re) }
|
41
|
+
|
42
|
+
address = IPAddr.new(entry.first) rescue (return true)
|
43
|
+
if CFG.ipv6?
|
44
|
+
return true if address.ipv4?
|
45
|
+
return true if address == IPAddr.new(CFG.bind.ipv6)
|
46
|
+
else
|
47
|
+
return true if address.ipv6?
|
48
|
+
return true if address == IPAddr.new(CFG.bind.ipv4)
|
49
|
+
end
|
50
|
+
false
|
51
|
+
end
|
52
|
+
|
53
|
+
end
|
54
|
+
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'json'
|
2
|
+
|
3
|
+
module Ring
|
4
|
+
class SQA
|
5
|
+
|
6
|
+
class NodesJSON
|
7
|
+
def get node
|
8
|
+
(@nodes[node] or {})
|
9
|
+
rescue
|
10
|
+
{}
|
11
|
+
end
|
12
|
+
|
13
|
+
private
|
14
|
+
|
15
|
+
def initialize
|
16
|
+
@file = CFG.nodes_json
|
17
|
+
@nodes = (load_json rescue {})
|
18
|
+
end
|
19
|
+
|
20
|
+
def load_json
|
21
|
+
nodes = {}
|
22
|
+
json = JSON.load File.read(@file)
|
23
|
+
json['results']['nodes'].each do |node|
|
24
|
+
addr = CFG.ipv6? ? node['ipv6'] : node['ipv4']
|
25
|
+
nodes[addr] = node
|
26
|
+
end
|
27
|
+
nodes
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module Ring
|
2
|
+
class SQA
|
3
|
+
|
4
|
+
class Receiver < Poller
|
5
|
+
|
6
|
+
def run
|
7
|
+
udp = udp_socket
|
8
|
+
udp.bind address, port+1
|
9
|
+
loop { receive udp }
|
10
|
+
end
|
11
|
+
|
12
|
+
private
|
13
|
+
|
14
|
+
def initialize database
|
15
|
+
@db = database
|
16
|
+
run
|
17
|
+
end
|
18
|
+
|
19
|
+
def receive udp
|
20
|
+
data, _ = udp.recvfrom MAX_READ
|
21
|
+
timestamp, row_id = data.split(/\s+/)
|
22
|
+
latency = (Time.now.utc.to_f - timestamp.to_f)*1_000_000
|
23
|
+
@db.update row_id.to_i, 'ok', latency.to_i
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module Ring
|
2
|
+
class SQA
|
3
|
+
|
4
|
+
class Responder < Poller
|
5
|
+
def run
|
6
|
+
udp = udp_socket
|
7
|
+
Log.debug "Responder binding to #{address.inspect} in port #{port}" if CFG.debug?
|
8
|
+
udp.bind address, port
|
9
|
+
loop { respond udp }
|
10
|
+
end
|
11
|
+
|
12
|
+
private
|
13
|
+
|
14
|
+
def initialize
|
15
|
+
run
|
16
|
+
end
|
17
|
+
|
18
|
+
def respond udp
|
19
|
+
data, far_end = udp.recvfrom MAX_READ
|
20
|
+
udp.send data, 0, far_end[3], port+1
|
21
|
+
Log.debug "Sent response '#{data}' to '#{far_end[3]}'" if CFG.debug?
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
module Ring
|
2
|
+
class SQA
|
3
|
+
|
4
|
+
class Sender < Poller
|
5
|
+
INTERVAL = 30 # duration pinging all nodes should take
|
6
|
+
INTER_NODE_GAP = 0.01 # delay to sleep between each node
|
7
|
+
|
8
|
+
def run
|
9
|
+
udp = udp_socket
|
10
|
+
loop do
|
11
|
+
loop_start = Time.now
|
12
|
+
@nodes.list.each do |node|
|
13
|
+
query node, udp
|
14
|
+
sleep INTER_NODE_GAP
|
15
|
+
end
|
16
|
+
duration = Time.now-loop_start
|
17
|
+
if duration < INTERVAL
|
18
|
+
sleep INTERVAL-duration
|
19
|
+
else
|
20
|
+
Log.warn "Send loop took longer than #{INTERVAL}s"
|
21
|
+
end
|
22
|
+
end
|
23
|
+
udp.close
|
24
|
+
end
|
25
|
+
|
26
|
+
private
|
27
|
+
|
28
|
+
def initialize database, nodes
|
29
|
+
@db = database
|
30
|
+
@nodes = nodes
|
31
|
+
run
|
32
|
+
end
|
33
|
+
|
34
|
+
def query node, udp
|
35
|
+
Log.debug "Sending query to #{node}" if CFG.debug?
|
36
|
+
record = @db.add peer: node
|
37
|
+
msg = [Time.now.utc.to_f.to_s, record.id].join ' '
|
38
|
+
udp.send msg, 0, node, port
|
39
|
+
rescue Errno::ECONNREFUSED
|
40
|
+
Log.warn "connection refused to '#{node}'"
|
41
|
+
@db.update record.id, 'connection refused'
|
42
|
+
rescue Errno::ENETUNREACH
|
43
|
+
Log.warn "network unreachable to '#{node}'"
|
44
|
+
@db.update record.id, 'network unreachable'
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
end
|
49
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module Ring
|
2
|
+
class SQA
|
3
|
+
|
4
|
+
class Poller
|
5
|
+
MAX_READ = 500
|
6
|
+
|
7
|
+
def address
|
8
|
+
CFG.ipv6? ? CFG.bind.ipv6 : CFG.bind.ipv4
|
9
|
+
end
|
10
|
+
|
11
|
+
def port
|
12
|
+
CFG.port.to_i
|
13
|
+
end
|
14
|
+
|
15
|
+
def udp_socket
|
16
|
+
CFG.ipv6? ? UDPSocket.new(Socket::AF_INET6) : UDPSocket.new
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
require_relative 'poller/sender'
|
25
|
+
require_relative 'poller/receiver'
|
26
|
+
require_relative 'poller/responder'
|
27
|
+
|
data/lib/ring/sqa.rb
ADDED
data/ring-sqa.gemspec
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Gem::Specification.new do |s|
|
2
|
+
s.name = 'ring-sqa'
|
3
|
+
s.version = '0.0.15'
|
4
|
+
s.licenses = %w( Apache-2.0 )
|
5
|
+
s.platform = Gem::Platform::RUBY
|
6
|
+
s.authors = [ 'Saku Ytti' ]
|
7
|
+
s.email = %w( saku@ytti.fi )
|
8
|
+
s.homepage = 'http://github.com/ytti/ring-sqa'
|
9
|
+
s.summary = 'NLNOG Ring SQA'
|
10
|
+
s.description = 'gets list of nodes and pings from each to each storing results'
|
11
|
+
s.rubyforge_project = s.name
|
12
|
+
s.files = `git ls-files`.split("\n")
|
13
|
+
s.executables = %w( ring-sqad )
|
14
|
+
s.require_path = 'lib'
|
15
|
+
|
16
|
+
s.required_ruby_version = '>= 1.9.3'
|
17
|
+
s.add_runtime_dependency 'slop', '~> 3.5'
|
18
|
+
s.add_runtime_dependency 'rb-inotify', '~> 0.9'
|
19
|
+
s.add_runtime_dependency 'sequel', '~> 4.12'
|
20
|
+
s.add_runtime_dependency 'sqlite3', '~> 1.3'
|
21
|
+
s.add_runtime_dependency 'asetus', '~> 0.1', '>= 0.1.2'
|
22
|
+
end
|
metadata
ADDED
@@ -0,0 +1,145 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: ring-sqa
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.15
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Saku Ytti
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-07-24 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: slop
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '3.5'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '3.5'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rb-inotify
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0.9'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0.9'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: sequel
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '4.12'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '4.12'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: sqlite3
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '1.3'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '1.3'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: asetus
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0.1'
|
76
|
+
- - ">="
|
77
|
+
- !ruby/object:Gem::Version
|
78
|
+
version: 0.1.2
|
79
|
+
type: :runtime
|
80
|
+
prerelease: false
|
81
|
+
version_requirements: !ruby/object:Gem::Requirement
|
82
|
+
requirements:
|
83
|
+
- - "~>"
|
84
|
+
- !ruby/object:Gem::Version
|
85
|
+
version: '0.1'
|
86
|
+
- - ">="
|
87
|
+
- !ruby/object:Gem::Version
|
88
|
+
version: 0.1.2
|
89
|
+
description: gets list of nodes and pings from each to each storing results
|
90
|
+
email:
|
91
|
+
- saku@ytti.fi
|
92
|
+
executables:
|
93
|
+
- ring-sqad
|
94
|
+
extensions: []
|
95
|
+
extra_rdoc_files: []
|
96
|
+
files:
|
97
|
+
- Gemfile
|
98
|
+
- README.md
|
99
|
+
- Rakefile
|
100
|
+
- bin/ring-sqad
|
101
|
+
- lib/ring/sqa.rb
|
102
|
+
- lib/ring/sqa/alarm.rb
|
103
|
+
- lib/ring/sqa/alarm/cfg.rb
|
104
|
+
- lib/ring/sqa/alarm/email.rb
|
105
|
+
- lib/ring/sqa/alarm/udp2irc.rb
|
106
|
+
- lib/ring/sqa/analyzer.rb
|
107
|
+
- lib/ring/sqa/cfg.rb
|
108
|
+
- lib/ring/sqa/cli.rb
|
109
|
+
- lib/ring/sqa/core.rb
|
110
|
+
- lib/ring/sqa/database.rb
|
111
|
+
- lib/ring/sqa/database/model.rb
|
112
|
+
- lib/ring/sqa/log.rb
|
113
|
+
- lib/ring/sqa/mtr.rb
|
114
|
+
- lib/ring/sqa/nodes.rb
|
115
|
+
- lib/ring/sqa/nodes_json.rb
|
116
|
+
- lib/ring/sqa/poller.rb
|
117
|
+
- lib/ring/sqa/poller/receiver.rb
|
118
|
+
- lib/ring/sqa/poller/responder.rb
|
119
|
+
- lib/ring/sqa/poller/sender.rb
|
120
|
+
- ring-sqa.gemspec
|
121
|
+
homepage: http://github.com/ytti/ring-sqa
|
122
|
+
licenses:
|
123
|
+
- Apache-2.0
|
124
|
+
metadata: {}
|
125
|
+
post_install_message:
|
126
|
+
rdoc_options: []
|
127
|
+
require_paths:
|
128
|
+
- lib
|
129
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
130
|
+
requirements:
|
131
|
+
- - ">="
|
132
|
+
- !ruby/object:Gem::Version
|
133
|
+
version: 1.9.3
|
134
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - ">="
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: '0'
|
139
|
+
requirements: []
|
140
|
+
rubyforge_project: ring-sqa
|
141
|
+
rubygems_version: 2.2.2
|
142
|
+
signing_key:
|
143
|
+
specification_version: 4
|
144
|
+
summary: NLNOG Ring SQA
|
145
|
+
test_files: []
|