solanum 0.2.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +51 -45
- data/bin/solanum +11 -53
- data/lib/solanum.rb +109 -54
- data/lib/solanum/config.rb +67 -74
- data/lib/solanum/output/print.rb +18 -0
- data/lib/solanum/output/riemann.rb +20 -0
- data/lib/solanum/schedule.rb +55 -0
- data/lib/solanum/source.rb +13 -106
- data/lib/solanum/source/certificate.rb +88 -0
- data/lib/solanum/source/cpu.rb +119 -0
- data/lib/solanum/source/diskstats.rb +118 -0
- data/lib/solanum/source/load.rb +44 -0
- data/lib/solanum/source/memory.rb +70 -0
- data/lib/solanum/source/network.rb +65 -0
- data/lib/solanum/source/uptime.rb +28 -0
- data/lib/solanum/util.rb +40 -0
- metadata +31 -16
- checksums.yaml +0 -7
- data/lib/solanum/matcher.rb +0 -70
data/README.md
CHANGED
@@ -1,51 +1,57 @@
|
|
1
1
|
Solanum
|
2
2
|
=======
|
3
3
|
|
4
|
-
This gem provides a
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
##
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
##
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
4
|
+
This gem provides a monitoring daemon which can be configured to collect data
|
5
|
+
from a variety of pluggable sources. The results can be printed to the console
|
6
|
+
or sent to a [Riemann](http://riemann.io/) server. This requires the
|
7
|
+
`riemann-client` gem to work.
|
8
|
+
|
9
|
+
|
10
|
+
## Installation
|
11
|
+
|
12
|
+
**TODO**
|
13
|
+
|
14
|
+
|
15
|
+
## Metric Events
|
16
|
+
|
17
|
+
Solanum represents each measurement datapoint as an _event_. Each event must
|
18
|
+
have at minimum a `service` and `metric` with the measurement name and value,
|
19
|
+
respectively. Events may also contain other attributes such as a `state`, `ttl`,
|
20
|
+
`tags`, and so on - see the [Riemann concepts](http://riemann.io/concepts.html)
|
21
|
+
page for more details.
|
22
|
+
|
23
|
+
|
24
|
+
## Configuration
|
25
|
+
|
26
|
+
Solanum is configured using one or more YAML files. These specify common event
|
27
|
+
attributes, sources, and outputs.
|
28
|
+
|
29
|
+
See the [example config](config.yml) in this repo for possible config options.
|
30
|
+
|
31
|
+
### Defaults
|
32
|
+
|
33
|
+
The `defaults` section of the config provides common attributes to apply to
|
34
|
+
every event. This can be used to provide a common TTL, tags, and more.
|
35
|
+
|
36
|
+
### Sources
|
37
|
+
|
38
|
+
A _source_ is a class which extends `Solanum::Source` and implements the
|
39
|
+
`collect!` method to return metric events. Solanum comes with several metric
|
40
|
+
sources built in, including basic host-level monitoring of CPU usage, load,
|
41
|
+
memory, diskstats, network, and more.
|
42
|
+
|
43
|
+
Additional custom sources can be provided, as long as they are in Ruby's lib
|
44
|
+
path for the daemon.
|
45
|
+
|
46
|
+
### Outputs
|
47
|
+
|
48
|
+
An _output_ is a destination to report the collected events to. The simplest
|
49
|
+
one is the `print` output, which writes each event to STDOUT. This is useful for
|
50
|
+
debugging, but you probably won't leave it on for deployed daemons.
|
51
|
+
|
52
|
+
The other included choice is the `riemann` output, which sends each event to a
|
53
|
+
Riemann monitoring server.
|
54
|
+
|
49
55
|
|
50
56
|
## License
|
51
57
|
|
data/bin/solanum
CHANGED
@@ -6,16 +6,13 @@ require 'optparse'
|
|
6
6
|
require 'solanum'
|
7
7
|
|
8
8
|
$options = {
|
9
|
-
|
10
|
-
riemann_port: 5555,
|
11
|
-
interval: 5,
|
12
|
-
verbose: false,
|
9
|
+
period: 10,
|
13
10
|
}
|
14
11
|
|
15
12
|
$defaults = {
|
16
|
-
host: %x{hostname}.chomp,
|
13
|
+
host: %x{hostname --fqdn}.chomp,
|
17
14
|
tags: [],
|
18
|
-
ttl:
|
15
|
+
ttl: 60,
|
19
16
|
}
|
20
17
|
|
21
18
|
def fail(msg, code=1)
|
@@ -23,25 +20,18 @@ def fail(msg, code=1)
|
|
23
20
|
exit code
|
24
21
|
end
|
25
22
|
|
26
|
-
def log(msg)
|
27
|
-
puts "%s %s" % [Time.now.strftime("%H:%M:%S"), msg] if $options[:verbose]
|
28
|
-
end
|
29
|
-
|
30
23
|
# Parse command-line options.
|
31
24
|
options = OptionParser.new do |opts|
|
32
|
-
opts.banner = "Usage: #{File.basename($0)} [options] <
|
25
|
+
opts.banner = "Usage: #{File.basename($0)} [options] <config.yml> [config2.yml ...]"
|
33
26
|
opts.separator ""
|
34
27
|
opts.separator "Event Attributes:"
|
35
28
|
opts.on( '--host HOST', "Event hostname (default: #{$defaults[:host]})") {|v| $defaults[:host] = v }
|
36
|
-
opts.on('-a', '--attribute KEY=VAL', "Attribute to add to
|
29
|
+
opts.on('-a', '--attribute KEY=VAL', "Attribute to add to every event (may be given multiple times)") {|attr| k,v = attr.split(/=/); if k and v then $defaults[k.intern] = v end }
|
37
30
|
opts.on('-t', '--tag TAG', "Tag to add to events (may be given multiple times)") {|v| $defaults[:tags] << v }
|
38
|
-
opts.on( '--ttl SECONDS', "Default TTL for events (default: #{$
|
31
|
+
opts.on( '--ttl SECONDS', "Default TTL for events (default: #{$defaults[:ttl]})") {|v| $defaults[:ttl] = v.to_i }
|
39
32
|
opts.separator ""
|
40
33
|
opts.separator "General Options:"
|
41
|
-
opts.on(
|
42
|
-
opts.on( '--riemann-port PORT', "Riemann port (default: #{$options[:riemann_port]})") {|v| $options[:riemann_port] = v.to_i }
|
43
|
-
opts.on('-i', '--interval SECONDS', "Seconds between updates (default: #{$options[:interval]})") {|v| $options[:interval] = v.to_i }
|
44
|
-
opts.on('-v', '--verbose', "Print additional information to stdout") { $options[:verbose] = true }
|
34
|
+
opts.on('-p', '--period SECONDS', "Seconds between updates (default: #{$options[:period]})") {|v| $options[:period] = v.to_i }
|
45
35
|
opts.on('-h', '--help', "Displays usage information") { print opts; exit }
|
46
36
|
end
|
47
37
|
options.parse!
|
@@ -49,46 +39,14 @@ options.parse!
|
|
49
39
|
# Check usage.
|
50
40
|
fail options if ARGV.empty?
|
51
41
|
|
52
|
-
|
53
|
-
|
54
|
-
##### MONITORING CONFIGS #####
|
55
|
-
|
42
|
+
# Construct monitoring system.
|
56
43
|
$solanum = Solanum.new(ARGV)
|
57
44
|
fail "No sources loaded!" if $solanum.sources.empty?
|
58
45
|
|
59
|
-
|
60
|
-
begin
|
61
|
-
require 'riemann/client'
|
62
|
-
rescue LoadError
|
63
|
-
fail "ERROR: could not load Riemann client library! `gem install riemann-client` to enable reporting"
|
64
|
-
end
|
65
|
-
|
66
|
-
$riemann = Riemann::Client.new(host: $options[:riemann_host], port: $options[:riemann_port])
|
67
|
-
end
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
##### REPORT LOOP #####
|
72
|
-
|
46
|
+
# Handle ^C interrupts gracefully.
|
73
47
|
trap "SIGINT" do
|
74
48
|
exit
|
75
49
|
end
|
76
50
|
|
77
|
-
loop
|
78
|
-
|
79
|
-
events = $solanum.build_events($defaults)
|
80
|
-
|
81
|
-
events.each do |event|
|
82
|
-
if $options[:verbose] || $riemann.nil?
|
83
|
-
puts "%-40s %5s (%s) %s" % [
|
84
|
-
event[:service], event[:metric],
|
85
|
-
event[:state].nil? ? "--" : event[:state],
|
86
|
-
event.inspect
|
87
|
-
]
|
88
|
-
end
|
89
|
-
|
90
|
-
$riemann << event if $riemann
|
91
|
-
end
|
92
|
-
|
93
|
-
sleep $options[:interval]
|
94
|
-
end
|
51
|
+
# Scheduling loop.
|
52
|
+
$solanum.run!
|
data/lib/solanum.rb
CHANGED
@@ -1,81 +1,136 @@
|
|
1
|
+
require 'solanum/config'
|
2
|
+
require 'solanum/schedule'
|
3
|
+
require 'thread'
|
4
|
+
|
5
|
+
|
1
6
|
# Class which wraps up an active Solanum monitoring system into an object.
|
2
|
-
#
|
3
|
-
# Author:: Greg Look
|
4
7
|
class Solanum
|
5
|
-
attr_reader :
|
8
|
+
attr_reader :defaults, :sources, :outputs
|
6
9
|
|
7
|
-
|
8
|
-
|
10
|
+
# Merge two event attribute maps together, concatenating tags.
|
11
|
+
def self.merge_attrs(a, b)
|
12
|
+
stringify = lambda do |x|
|
13
|
+
o = {}
|
14
|
+
x.keys.each do |k|
|
15
|
+
o[k.to_s] = x[k]
|
16
|
+
end
|
17
|
+
o
|
18
|
+
end
|
19
|
+
|
20
|
+
if a.nil?
|
21
|
+
stringify[b]
|
22
|
+
elsif b.nil?
|
23
|
+
stringify[a]
|
24
|
+
else
|
25
|
+
a = stringify[a]
|
26
|
+
b = stringify[b]
|
27
|
+
tags = a['tags'] ? a['tags'].dup : []
|
28
|
+
tags.concat(b['tags']) if b['tags']
|
29
|
+
tags.uniq!
|
30
|
+
x = a.dup.merge(b)
|
31
|
+
x['tags'] = tags unless tags.empty?
|
32
|
+
x
|
33
|
+
end
|
34
|
+
end
|
9
35
|
|
10
36
|
|
11
|
-
# Loads the given
|
12
|
-
|
13
|
-
|
37
|
+
# Loads the given configuration file(s) and initializes the system.
|
38
|
+
def initialize(config_paths)
|
39
|
+
@defaults = {tags: []}
|
14
40
|
@sources = []
|
15
|
-
@
|
16
|
-
@metrics = {}
|
41
|
+
@outputs = []
|
17
42
|
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
43
|
+
# Load and merge files.
|
44
|
+
config_paths.each do |path|
|
45
|
+
conf = Config.load_file(path)
|
46
|
+
|
47
|
+
# merge defaults, update tags
|
48
|
+
@defaults = Solanum.merge_attrs(@defaults, conf[:defaults])
|
49
|
+
|
50
|
+
# sources and outputs are additive
|
51
|
+
@sources.concat(conf[:sources])
|
52
|
+
@outputs.concat(conf[:outputs])
|
53
|
+
end
|
54
|
+
|
55
|
+
# Add default print output.
|
56
|
+
if @outputs.empty?
|
57
|
+
require 'solanum/output/print'
|
58
|
+
@outputs << Solanum::Output::Print.new()
|
26
59
|
end
|
27
60
|
|
61
|
+
@defaults.freeze
|
62
|
+
@outputs.freeze
|
28
63
|
@sources.freeze
|
29
|
-
|
64
|
+
|
65
|
+
@schedule = Solanum::Schedule.new
|
66
|
+
@sources.each_with_index do |source, i|
|
67
|
+
@schedule.insert!(source.next_run, i)
|
68
|
+
end
|
30
69
|
end
|
31
70
|
|
32
71
|
|
33
|
-
#
|
34
|
-
|
35
|
-
|
36
|
-
@
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
metrics.merge(new_metrics)
|
41
|
-
rescue => e
|
42
|
-
STDERR.puts "Error collecting metrics from #{source}: #{e}"
|
43
|
-
metrics
|
72
|
+
# Reschedule the given source for later running.
|
73
|
+
def reschedule!(source)
|
74
|
+
idx = nil
|
75
|
+
@sources.each_with_index do |s, i|
|
76
|
+
if s == source
|
77
|
+
idx = i
|
78
|
+
break
|
44
79
|
end
|
45
80
|
end
|
81
|
+
raise "Source #{source.inspect} is not present in source list!" unless idx
|
82
|
+
@schedule.insert!(source.next_run, idx)
|
83
|
+
@scheduler.wakeup
|
46
84
|
end
|
47
85
|
|
48
86
|
|
49
|
-
#
|
50
|
-
|
51
|
-
|
52
|
-
@
|
53
|
-
|
54
|
-
|
87
|
+
# Report a batch of events to all reporters.
|
88
|
+
def record!(events)
|
89
|
+
# TODO: does this need locking?
|
90
|
+
@outputs.each do |output|
|
91
|
+
output.write_events events
|
92
|
+
end
|
93
|
+
end
|
55
94
|
|
56
|
-
state = prototype[:state] ? prototype[:state].call(value) : :ok
|
57
|
-
tags = ((prototype[:tags] || []) + (defaults[:tags] || [])).uniq
|
58
|
-
ttl = prototype[:ttl] || defaults[:ttl]
|
59
95
|
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
96
|
+
# Run collection from the given source in a new thread.
|
97
|
+
def collect_events!(source)
|
98
|
+
Thread.new do
|
99
|
+
begin
|
100
|
+
events = source.collect!
|
101
|
+
attrs = Solanum.merge_attrs(@defaults, source.attributes)
|
102
|
+
events = events.map do |event|
|
103
|
+
Solanum.merge_attrs(attrs, event)
|
66
104
|
end
|
105
|
+
record! events
|
106
|
+
rescue => e
|
107
|
+
STDERR.puts "Error collecting events from source #{source.type}: #{e}"
|
67
108
|
end
|
109
|
+
reschedule! source
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
68
113
|
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
114
|
+
# Runs the collection loop.
|
115
|
+
def run!
|
116
|
+
@scheduler = Thread.current
|
117
|
+
loop do
|
118
|
+
# Determine when next scheduled source should run, and sleep if needed.
|
119
|
+
duration = @schedule.next_wait || 1
|
120
|
+
if 0 < duration
|
121
|
+
sleep duration
|
122
|
+
next
|
77
123
|
end
|
78
|
-
|
124
|
+
|
125
|
+
# Get the next ready source.
|
126
|
+
idx = @schedule.pop_ready!
|
127
|
+
source = @sources[idx] if idx
|
128
|
+
next unless source
|
129
|
+
#puts "Source #{source.type} is ready to run!" # DEBUG
|
130
|
+
|
131
|
+
# Start thread to collect and report events.
|
132
|
+
collect_events! source
|
133
|
+
end
|
79
134
|
end
|
80
135
|
|
81
136
|
end
|
data/lib/solanum/config.rb
CHANGED
@@ -1,97 +1,90 @@
|
|
1
|
-
require '
|
1
|
+
require 'yaml'
|
2
2
|
|
3
|
-
class Solanum
|
4
|
-
|
3
|
+
class Solanum
|
4
|
+
module Config
|
5
5
|
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
instance_eval ::File.readlines(path).join, path, 1
|
11
|
-
|
12
|
-
raise "No sources loaded from monitor script: #{path}" if @sources.empty?
|
6
|
+
# Helper method to clear the type cache.
|
7
|
+
def self.clear_type_cache!
|
8
|
+
@@type_classes = {}
|
13
9
|
end
|
14
10
|
|
15
11
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
# the source with instance_exec.
|
20
|
-
def register_source(source, config=nil)
|
21
|
-
source.instance_exec &config if config
|
22
|
-
@sources << source
|
23
|
-
source
|
24
|
-
end
|
12
|
+
# Resolve a type based on a library path.
|
13
|
+
def self.resolve_type(namespace, type, lib_path=nil, class_name=nil)
|
14
|
+
@@type_classes ||= {}
|
25
15
|
|
16
|
+
type_key = "#{namespace}:#{type}"
|
17
|
+
return @@type_classes[type_key] if @@type_classes.include?(type_key)
|
26
18
|
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
19
|
+
lib_path ||= type.include?('/') ? type : "solanum/#{namespace}/#{type}"
|
20
|
+
if class_name
|
21
|
+
cls_path = class_name.split('::')
|
22
|
+
else
|
23
|
+
cls_path = lib_path.split('/').map {|w| w.capitalize }
|
24
|
+
end
|
31
25
|
|
26
|
+
begin
|
27
|
+
require lib_path
|
28
|
+
cls = cls_path.inject(Object) do |mod, class_name|
|
29
|
+
mod.const_get(class_name) if mod
|
30
|
+
end
|
31
|
+
STDERR.puts "Unable to resolve class #{cls_path.join('::')}" unless cls
|
32
|
+
@@type_classes[type_key] = cls
|
33
|
+
rescue LoadError => e
|
34
|
+
STDERR.puts "Unable to load code for #{type_key} type: #{e}"
|
35
|
+
@@type_classes[type_key] = nil
|
36
|
+
end
|
32
37
|
|
33
|
-
|
34
|
-
def read(path, &config)
|
35
|
-
register_source Solanum::Source::File.new(path), config
|
38
|
+
@@type_classes[type_key]
|
36
39
|
end
|
37
40
|
|
38
41
|
|
39
|
-
#
|
40
|
-
|
41
|
-
|
42
|
+
# Resolves a type config string and constructs a new instance of it. Memoizes
|
43
|
+
# the results of loading the class in the `@@type_classes` field.
|
44
|
+
def self.construct_type(namespace, type, args)
|
45
|
+
cls = resolve_type(namespace, type, args['lib_path'], args['class'])
|
46
|
+
if cls.nil?
|
47
|
+
STDERR.puts "Skipping construction of failed #{namespace} type #{type}"
|
48
|
+
nil
|
49
|
+
else
|
50
|
+
begin
|
51
|
+
#puts "#{cls}.new(#{args.inspect})" # DEBUG
|
52
|
+
cls.new(args)
|
53
|
+
rescue => e
|
54
|
+
STDERR.puts "Error constructing #{namespace} type #{type}: #{args.inspect} #{e}"
|
55
|
+
nil
|
56
|
+
end
|
57
|
+
end
|
42
58
|
end
|
43
59
|
|
44
60
|
|
45
|
-
#
|
46
|
-
#
|
47
|
-
|
48
|
-
|
49
|
-
@services << [service, prototype]
|
50
|
-
end
|
61
|
+
# Load the given configuration file. Returns a map with initialized :sources
|
62
|
+
# and :outputs.
|
63
|
+
def self.load_file(path)
|
64
|
+
config = File.open(path) {|f| YAML.load(f) }
|
51
65
|
|
66
|
+
defaults = config['defaults'] || {}
|
52
67
|
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
# be alternating numeric thresholds and state values to assign if the metric
|
58
|
-
# value exceeds the threshold.
|
59
|
-
#
|
60
|
-
# For example, for an 'availability' metric you often want to warn on low
|
61
|
-
# values. To assign a 'critical' state to values between 0% and 10%,
|
62
|
-
# 'warning' between 10% and 25%, and 'ok' above, use the following:
|
63
|
-
#
|
64
|
-
# thresholds(0.00, :critical, 0.10, :warning, 0.25, :ok)
|
65
|
-
#
|
66
|
-
# For 'usage' metrics it's the inverse, giving low values ok states and
|
67
|
-
# warning about high values:
|
68
|
-
#
|
69
|
-
# thresholds(:ok, 55, :warning, 65, :critical)
|
70
|
-
#
|
71
|
-
def thresholds(*args)
|
72
|
-
default_state = nil
|
73
|
-
default_state = args.shift unless args.first.kind_of? Numeric
|
74
|
-
|
75
|
-
# Check arguments.
|
76
|
-
raise "Thresholds must be paired with state values" unless args.count.even?
|
77
|
-
args.each_slice(2) do |threshold|
|
78
|
-
limit, state = *threshold
|
79
|
-
raise "Limits must be numeric: #{limit}" unless limit.kind_of? Numeric
|
80
|
-
raise "State values must be strings or symbols: #{state}" unless state.instance_of?(String) || state.instance_of?(Symbol)
|
68
|
+
# Construct sources from config.
|
69
|
+
source_configs = config['sources'] || []
|
70
|
+
sources = source_configs.map do |conf|
|
71
|
+
self.construct_type('source', conf['type'], conf)
|
81
72
|
end
|
73
|
+
sources.reject!(&:nil?)
|
82
74
|
|
83
|
-
#
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
if threshold[0] < v
|
88
|
-
state = threshold[1]
|
89
|
-
else
|
90
|
-
break
|
91
|
-
end
|
92
|
-
end
|
93
|
-
state
|
75
|
+
# Construct outputs from config.
|
76
|
+
output_configs = config['outputs'] || []
|
77
|
+
outputs = output_configs.map do |conf|
|
78
|
+
self.construct_type('output', conf['type'], conf)
|
94
79
|
end
|
80
|
+
outputs.reject!(&:nil?)
|
81
|
+
|
82
|
+
{
|
83
|
+
defaults: defaults,
|
84
|
+
sources: sources,
|
85
|
+
outputs: outputs,
|
86
|
+
}
|
95
87
|
end
|
96
88
|
|
97
89
|
end
|
90
|
+
end
|