solanum 0.2.0 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +51 -45
- data/bin/solanum +11 -53
- data/lib/solanum.rb +109 -54
- data/lib/solanum/config.rb +67 -74
- data/lib/solanum/output/print.rb +18 -0
- data/lib/solanum/output/riemann.rb +20 -0
- data/lib/solanum/schedule.rb +55 -0
- data/lib/solanum/source.rb +13 -106
- data/lib/solanum/source/certificate.rb +88 -0
- data/lib/solanum/source/cpu.rb +119 -0
- data/lib/solanum/source/diskstats.rb +118 -0
- data/lib/solanum/source/load.rb +44 -0
- data/lib/solanum/source/memory.rb +70 -0
- data/lib/solanum/source/network.rb +65 -0
- data/lib/solanum/source/uptime.rb +28 -0
- data/lib/solanum/util.rb +40 -0
- metadata +31 -16
- checksums.yaml +0 -7
- data/lib/solanum/matcher.rb +0 -70
data/README.md
CHANGED
@@ -1,51 +1,57 @@
|
|
1
1
|
Solanum
|
2
2
|
=======
|
3
3
|
|
4
|
-
This gem provides a
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
##
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
##
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
4
|
+
This gem provides a monitoring daemon which can be configured to collect data
|
5
|
+
from a variety of pluggable sources. The results can be printed to the console
|
6
|
+
or sent to a [Riemann](http://riemann.io/) server. This requires the
|
7
|
+
`riemann-client` gem to work.
|
8
|
+
|
9
|
+
|
10
|
+
## Installation
|
11
|
+
|
12
|
+
**TODO**
|
13
|
+
|
14
|
+
|
15
|
+
## Metric Events
|
16
|
+
|
17
|
+
Solanum represents each measurement datapoint as an _event_. Each event must
|
18
|
+
have at minimum a `service` and `metric` with the measurement name and value,
|
19
|
+
respectively. Events may also contain other attributes such as a `state`, `ttl`,
|
20
|
+
`tags`, and so on - see the [Riemann concepts](http://riemann.io/concepts.html)
|
21
|
+
page for more details.
|
22
|
+
|
23
|
+
|
24
|
+
## Configuration
|
25
|
+
|
26
|
+
Solanum is configured using one or more YAML files. These specify common event
|
27
|
+
attributes, sources, and outputs.
|
28
|
+
|
29
|
+
See the [example config](config.yml) in this repo for possible config options.
|
30
|
+
|
31
|
+
### Defaults
|
32
|
+
|
33
|
+
The `defaults` section of the config provides common attributes to apply to
|
34
|
+
every event. This can be used to provide a common TTL, tags, and more.
|
35
|
+
|
36
|
+
### Sources
|
37
|
+
|
38
|
+
A _source_ is a class which extends `Solanum::Source` and implements the
|
39
|
+
`collect!` method to return metric events. Solanum comes with several metric
|
40
|
+
sources built in, including basic host-level monitoring of CPU usage, load,
|
41
|
+
memory, diskstats, network, and more.
|
42
|
+
|
43
|
+
Additional custom sources can be provided, as long as they are in Ruby's lib
|
44
|
+
path for the daemon.
|
45
|
+
|
46
|
+
### Outputs
|
47
|
+
|
48
|
+
An _output_ is a destination to report the collected events to. The simplest
|
49
|
+
one is the `print` output, which writes each event to STDOUT. This is useful for
|
50
|
+
debugging, but you probably won't leave it on for deployed daemons.
|
51
|
+
|
52
|
+
The other included choice is the `riemann` output, which sends each event to a
|
53
|
+
Riemann monitoring server.
|
54
|
+
|
49
55
|
|
50
56
|
## License
|
51
57
|
|
data/bin/solanum
CHANGED
@@ -6,16 +6,13 @@ require 'optparse'
|
|
6
6
|
require 'solanum'
|
7
7
|
|
8
8
|
$options = {
|
9
|
-
|
10
|
-
riemann_port: 5555,
|
11
|
-
interval: 5,
|
12
|
-
verbose: false,
|
9
|
+
period: 10,
|
13
10
|
}
|
14
11
|
|
15
12
|
$defaults = {
|
16
|
-
host: %x{hostname}.chomp,
|
13
|
+
host: %x{hostname --fqdn}.chomp,
|
17
14
|
tags: [],
|
18
|
-
ttl:
|
15
|
+
ttl: 60,
|
19
16
|
}
|
20
17
|
|
21
18
|
def fail(msg, code=1)
|
@@ -23,25 +20,18 @@ def fail(msg, code=1)
|
|
23
20
|
exit code
|
24
21
|
end
|
25
22
|
|
26
|
-
def log(msg)
|
27
|
-
puts "%s %s" % [Time.now.strftime("%H:%M:%S"), msg] if $options[:verbose]
|
28
|
-
end
|
29
|
-
|
30
23
|
# Parse command-line options.
|
31
24
|
options = OptionParser.new do |opts|
|
32
|
-
opts.banner = "Usage: #{File.basename($0)} [options] <
|
25
|
+
opts.banner = "Usage: #{File.basename($0)} [options] <config.yml> [config2.yml ...]"
|
33
26
|
opts.separator ""
|
34
27
|
opts.separator "Event Attributes:"
|
35
28
|
opts.on( '--host HOST', "Event hostname (default: #{$defaults[:host]})") {|v| $defaults[:host] = v }
|
36
|
-
opts.on('-a', '--attribute KEY=VAL', "Attribute to add to
|
29
|
+
opts.on('-a', '--attribute KEY=VAL', "Attribute to add to every event (may be given multiple times)") {|attr| k,v = attr.split(/=/); if k and v then $defaults[k.intern] = v end }
|
37
30
|
opts.on('-t', '--tag TAG', "Tag to add to events (may be given multiple times)") {|v| $defaults[:tags] << v }
|
38
|
-
opts.on( '--ttl SECONDS', "Default TTL for events (default: #{$
|
31
|
+
opts.on( '--ttl SECONDS', "Default TTL for events (default: #{$defaults[:ttl]})") {|v| $defaults[:ttl] = v.to_i }
|
39
32
|
opts.separator ""
|
40
33
|
opts.separator "General Options:"
|
41
|
-
opts.on(
|
42
|
-
opts.on( '--riemann-port PORT', "Riemann port (default: #{$options[:riemann_port]})") {|v| $options[:riemann_port] = v.to_i }
|
43
|
-
opts.on('-i', '--interval SECONDS', "Seconds between updates (default: #{$options[:interval]})") {|v| $options[:interval] = v.to_i }
|
44
|
-
opts.on('-v', '--verbose', "Print additional information to stdout") { $options[:verbose] = true }
|
34
|
+
opts.on('-p', '--period SECONDS', "Seconds between updates (default: #{$options[:period]})") {|v| $options[:period] = v.to_i }
|
45
35
|
opts.on('-h', '--help', "Displays usage information") { print opts; exit }
|
46
36
|
end
|
47
37
|
options.parse!
|
@@ -49,46 +39,14 @@ options.parse!
|
|
49
39
|
# Check usage.
|
50
40
|
fail options if ARGV.empty?
|
51
41
|
|
52
|
-
|
53
|
-
|
54
|
-
##### MONITORING CONFIGS #####
|
55
|
-
|
42
|
+
# Construct monitoring system.
|
56
43
|
$solanum = Solanum.new(ARGV)
|
57
44
|
fail "No sources loaded!" if $solanum.sources.empty?
|
58
45
|
|
59
|
-
|
60
|
-
begin
|
61
|
-
require 'riemann/client'
|
62
|
-
rescue LoadError
|
63
|
-
fail "ERROR: could not load Riemann client library! `gem install riemann-client` to enable reporting"
|
64
|
-
end
|
65
|
-
|
66
|
-
$riemann = Riemann::Client.new(host: $options[:riemann_host], port: $options[:riemann_port])
|
67
|
-
end
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
##### REPORT LOOP #####
|
72
|
-
|
46
|
+
# Handle ^C interrupts gracefully.
|
73
47
|
trap "SIGINT" do
|
74
48
|
exit
|
75
49
|
end
|
76
50
|
|
77
|
-
loop
|
78
|
-
|
79
|
-
events = $solanum.build_events($defaults)
|
80
|
-
|
81
|
-
events.each do |event|
|
82
|
-
if $options[:verbose] || $riemann.nil?
|
83
|
-
puts "%-40s %5s (%s) %s" % [
|
84
|
-
event[:service], event[:metric],
|
85
|
-
event[:state].nil? ? "--" : event[:state],
|
86
|
-
event.inspect
|
87
|
-
]
|
88
|
-
end
|
89
|
-
|
90
|
-
$riemann << event if $riemann
|
91
|
-
end
|
92
|
-
|
93
|
-
sleep $options[:interval]
|
94
|
-
end
|
51
|
+
# Scheduling loop.
|
52
|
+
$solanum.run!
|
data/lib/solanum.rb
CHANGED
@@ -1,81 +1,136 @@
|
|
1
|
+
require 'solanum/config'
|
2
|
+
require 'solanum/schedule'
|
3
|
+
require 'thread'
|
4
|
+
|
5
|
+
|
1
6
|
# Class which wraps up an active Solanum monitoring system into an object.
|
2
|
-
#
|
3
|
-
# Author:: Greg Look
|
4
7
|
class Solanum
|
5
|
-
attr_reader :
|
8
|
+
attr_reader :defaults, :sources, :outputs
|
6
9
|
|
7
|
-
|
8
|
-
|
10
|
+
# Merge two event attribute maps together, concatenating tags.
|
11
|
+
def self.merge_attrs(a, b)
|
12
|
+
stringify = lambda do |x|
|
13
|
+
o = {}
|
14
|
+
x.keys.each do |k|
|
15
|
+
o[k.to_s] = x[k]
|
16
|
+
end
|
17
|
+
o
|
18
|
+
end
|
19
|
+
|
20
|
+
if a.nil?
|
21
|
+
stringify[b]
|
22
|
+
elsif b.nil?
|
23
|
+
stringify[a]
|
24
|
+
else
|
25
|
+
a = stringify[a]
|
26
|
+
b = stringify[b]
|
27
|
+
tags = a['tags'] ? a['tags'].dup : []
|
28
|
+
tags.concat(b['tags']) if b['tags']
|
29
|
+
tags.uniq!
|
30
|
+
x = a.dup.merge(b)
|
31
|
+
x['tags'] = tags unless tags.empty?
|
32
|
+
x
|
33
|
+
end
|
34
|
+
end
|
9
35
|
|
10
36
|
|
11
|
-
# Loads the given
|
12
|
-
|
13
|
-
|
37
|
+
# Loads the given configuration file(s) and initializes the system.
|
38
|
+
def initialize(config_paths)
|
39
|
+
@defaults = {tags: []}
|
14
40
|
@sources = []
|
15
|
-
@
|
16
|
-
@metrics = {}
|
41
|
+
@outputs = []
|
17
42
|
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
43
|
+
# Load and merge files.
|
44
|
+
config_paths.each do |path|
|
45
|
+
conf = Config.load_file(path)
|
46
|
+
|
47
|
+
# merge defaults, update tags
|
48
|
+
@defaults = Solanum.merge_attrs(@defaults, conf[:defaults])
|
49
|
+
|
50
|
+
# sources and outputs are additive
|
51
|
+
@sources.concat(conf[:sources])
|
52
|
+
@outputs.concat(conf[:outputs])
|
53
|
+
end
|
54
|
+
|
55
|
+
# Add default print output.
|
56
|
+
if @outputs.empty?
|
57
|
+
require 'solanum/output/print'
|
58
|
+
@outputs << Solanum::Output::Print.new()
|
26
59
|
end
|
27
60
|
|
61
|
+
@defaults.freeze
|
62
|
+
@outputs.freeze
|
28
63
|
@sources.freeze
|
29
|
-
|
64
|
+
|
65
|
+
@schedule = Solanum::Schedule.new
|
66
|
+
@sources.each_with_index do |source, i|
|
67
|
+
@schedule.insert!(source.next_run, i)
|
68
|
+
end
|
30
69
|
end
|
31
70
|
|
32
71
|
|
33
|
-
#
|
34
|
-
|
35
|
-
|
36
|
-
@
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
metrics.merge(new_metrics)
|
41
|
-
rescue => e
|
42
|
-
STDERR.puts "Error collecting metrics from #{source}: #{e}"
|
43
|
-
metrics
|
72
|
+
# Reschedule the given source for later running.
|
73
|
+
def reschedule!(source)
|
74
|
+
idx = nil
|
75
|
+
@sources.each_with_index do |s, i|
|
76
|
+
if s == source
|
77
|
+
idx = i
|
78
|
+
break
|
44
79
|
end
|
45
80
|
end
|
81
|
+
raise "Source #{source.inspect} is not present in source list!" unless idx
|
82
|
+
@schedule.insert!(source.next_run, idx)
|
83
|
+
@scheduler.wakeup
|
46
84
|
end
|
47
85
|
|
48
86
|
|
49
|
-
#
|
50
|
-
|
51
|
-
|
52
|
-
@
|
53
|
-
|
54
|
-
|
87
|
+
# Report a batch of events to all reporters.
|
88
|
+
def record!(events)
|
89
|
+
# TODO: does this need locking?
|
90
|
+
@outputs.each do |output|
|
91
|
+
output.write_events events
|
92
|
+
end
|
93
|
+
end
|
55
94
|
|
56
|
-
state = prototype[:state] ? prototype[:state].call(value) : :ok
|
57
|
-
tags = ((prototype[:tags] || []) + (defaults[:tags] || [])).uniq
|
58
|
-
ttl = prototype[:ttl] || defaults[:ttl]
|
59
95
|
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
96
|
+
# Run collection from the given source in a new thread.
|
97
|
+
def collect_events!(source)
|
98
|
+
Thread.new do
|
99
|
+
begin
|
100
|
+
events = source.collect!
|
101
|
+
attrs = Solanum.merge_attrs(@defaults, source.attributes)
|
102
|
+
events = events.map do |event|
|
103
|
+
Solanum.merge_attrs(attrs, event)
|
66
104
|
end
|
105
|
+
record! events
|
106
|
+
rescue => e
|
107
|
+
STDERR.puts "Error collecting events from source #{source.type}: #{e}"
|
67
108
|
end
|
109
|
+
reschedule! source
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
68
113
|
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
114
|
+
# Runs the collection loop.
|
115
|
+
def run!
|
116
|
+
@scheduler = Thread.current
|
117
|
+
loop do
|
118
|
+
# Determine when next scheduled source should run, and sleep if needed.
|
119
|
+
duration = @schedule.next_wait || 1
|
120
|
+
if 0 < duration
|
121
|
+
sleep duration
|
122
|
+
next
|
77
123
|
end
|
78
|
-
|
124
|
+
|
125
|
+
# Get the next ready source.
|
126
|
+
idx = @schedule.pop_ready!
|
127
|
+
source = @sources[idx] if idx
|
128
|
+
next unless source
|
129
|
+
#puts "Source #{source.type} is ready to run!" # DEBUG
|
130
|
+
|
131
|
+
# Start thread to collect and report events.
|
132
|
+
collect_events! source
|
133
|
+
end
|
79
134
|
end
|
80
135
|
|
81
136
|
end
|
data/lib/solanum/config.rb
CHANGED
@@ -1,97 +1,90 @@
|
|
1
|
-
require '
|
1
|
+
require 'yaml'
|
2
2
|
|
3
|
-
class Solanum
|
4
|
-
|
3
|
+
class Solanum
|
4
|
+
module Config
|
5
5
|
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
instance_eval ::File.readlines(path).join, path, 1
|
11
|
-
|
12
|
-
raise "No sources loaded from monitor script: #{path}" if @sources.empty?
|
6
|
+
# Helper method to clear the type cache.
|
7
|
+
def self.clear_type_cache!
|
8
|
+
@@type_classes = {}
|
13
9
|
end
|
14
10
|
|
15
11
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
# the source with instance_exec.
|
20
|
-
def register_source(source, config=nil)
|
21
|
-
source.instance_exec &config if config
|
22
|
-
@sources << source
|
23
|
-
source
|
24
|
-
end
|
12
|
+
# Resolve a type based on a library path.
|
13
|
+
def self.resolve_type(namespace, type, lib_path=nil, class_name=nil)
|
14
|
+
@@type_classes ||= {}
|
25
15
|
|
16
|
+
type_key = "#{namespace}:#{type}"
|
17
|
+
return @@type_classes[type_key] if @@type_classes.include?(type_key)
|
26
18
|
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
19
|
+
lib_path ||= type.include?('/') ? type : "solanum/#{namespace}/#{type}"
|
20
|
+
if class_name
|
21
|
+
cls_path = class_name.split('::')
|
22
|
+
else
|
23
|
+
cls_path = lib_path.split('/').map {|w| w.capitalize }
|
24
|
+
end
|
31
25
|
|
26
|
+
begin
|
27
|
+
require lib_path
|
28
|
+
cls = cls_path.inject(Object) do |mod, class_name|
|
29
|
+
mod.const_get(class_name) if mod
|
30
|
+
end
|
31
|
+
STDERR.puts "Unable to resolve class #{cls_path.join('::')}" unless cls
|
32
|
+
@@type_classes[type_key] = cls
|
33
|
+
rescue LoadError => e
|
34
|
+
STDERR.puts "Unable to load code for #{type_key} type: #{e}"
|
35
|
+
@@type_classes[type_key] = nil
|
36
|
+
end
|
32
37
|
|
33
|
-
|
34
|
-
def read(path, &config)
|
35
|
-
register_source Solanum::Source::File.new(path), config
|
38
|
+
@@type_classes[type_key]
|
36
39
|
end
|
37
40
|
|
38
41
|
|
39
|
-
#
|
40
|
-
|
41
|
-
|
42
|
+
# Resolves a type config string and constructs a new instance of it. Memoizes
|
43
|
+
# the results of loading the class in the `@@type_classes` field.
|
44
|
+
def self.construct_type(namespace, type, args)
|
45
|
+
cls = resolve_type(namespace, type, args['lib_path'], args['class'])
|
46
|
+
if cls.nil?
|
47
|
+
STDERR.puts "Skipping construction of failed #{namespace} type #{type}"
|
48
|
+
nil
|
49
|
+
else
|
50
|
+
begin
|
51
|
+
#puts "#{cls}.new(#{args.inspect})" # DEBUG
|
52
|
+
cls.new(args)
|
53
|
+
rescue => e
|
54
|
+
STDERR.puts "Error constructing #{namespace} type #{type}: #{args.inspect} #{e}"
|
55
|
+
nil
|
56
|
+
end
|
57
|
+
end
|
42
58
|
end
|
43
59
|
|
44
60
|
|
45
|
-
#
|
46
|
-
#
|
47
|
-
|
48
|
-
|
49
|
-
@services << [service, prototype]
|
50
|
-
end
|
61
|
+
# Load the given configuration file. Returns a map with initialized :sources
|
62
|
+
# and :outputs.
|
63
|
+
def self.load_file(path)
|
64
|
+
config = File.open(path) {|f| YAML.load(f) }
|
51
65
|
|
66
|
+
defaults = config['defaults'] || {}
|
52
67
|
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
# be alternating numeric thresholds and state values to assign if the metric
|
58
|
-
# value exceeds the threshold.
|
59
|
-
#
|
60
|
-
# For example, for an 'availability' metric you often want to warn on low
|
61
|
-
# values. To assign a 'critical' state to values between 0% and 10%,
|
62
|
-
# 'warning' between 10% and 25%, and 'ok' above, use the following:
|
63
|
-
#
|
64
|
-
# thresholds(0.00, :critical, 0.10, :warning, 0.25, :ok)
|
65
|
-
#
|
66
|
-
# For 'usage' metrics it's the inverse, giving low values ok states and
|
67
|
-
# warning about high values:
|
68
|
-
#
|
69
|
-
# thresholds(:ok, 55, :warning, 65, :critical)
|
70
|
-
#
|
71
|
-
def thresholds(*args)
|
72
|
-
default_state = nil
|
73
|
-
default_state = args.shift unless args.first.kind_of? Numeric
|
74
|
-
|
75
|
-
# Check arguments.
|
76
|
-
raise "Thresholds must be paired with state values" unless args.count.even?
|
77
|
-
args.each_slice(2) do |threshold|
|
78
|
-
limit, state = *threshold
|
79
|
-
raise "Limits must be numeric: #{limit}" unless limit.kind_of? Numeric
|
80
|
-
raise "State values must be strings or symbols: #{state}" unless state.instance_of?(String) || state.instance_of?(Symbol)
|
68
|
+
# Construct sources from config.
|
69
|
+
source_configs = config['sources'] || []
|
70
|
+
sources = source_configs.map do |conf|
|
71
|
+
self.construct_type('source', conf['type'], conf)
|
81
72
|
end
|
73
|
+
sources.reject!(&:nil?)
|
82
74
|
|
83
|
-
#
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
if threshold[0] < v
|
88
|
-
state = threshold[1]
|
89
|
-
else
|
90
|
-
break
|
91
|
-
end
|
92
|
-
end
|
93
|
-
state
|
75
|
+
# Construct outputs from config.
|
76
|
+
output_configs = config['outputs'] || []
|
77
|
+
outputs = output_configs.map do |conf|
|
78
|
+
self.construct_type('output', conf['type'], conf)
|
94
79
|
end
|
80
|
+
outputs.reject!(&:nil?)
|
81
|
+
|
82
|
+
{
|
83
|
+
defaults: defaults,
|
84
|
+
sources: sources,
|
85
|
+
outputs: outputs,
|
86
|
+
}
|
95
87
|
end
|
96
88
|
|
97
89
|
end
|
90
|
+
end
|