nagios-herald 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +8 -0
- data/.travis.yml +9 -0
- data/CHANGELOG.md +11 -0
- data/CONTRIBUTING.md +28 -0
- data/Gemfile +5 -0
- data/LICENSE +21 -0
- data/README.md +94 -0
- data/Rakefile +9 -0
- data/bin/draw_stack_bars +76 -0
- data/bin/dump_nagios_env.sh +25 -0
- data/bin/get_ganglia_graph +82 -0
- data/bin/get_graph +50 -0
- data/bin/get_graphite_graph +58 -0
- data/bin/nagios-herald +6 -0
- data/bin/splunk_alert_frequency +54 -0
- data/contrib/nrpe-plugins/check_cpu_stats.sh +186 -0
- data/contrib/nrpe-plugins/check_disk.sh +34 -0
- data/contrib/nrpe-plugins/check_mem.pl +181 -0
- data/contrib/nrpe-plugins/nrpe-plugin-examples.md +11 -0
- data/docs/config.md +62 -0
- data/docs/example_alerts.md +48 -0
- data/docs/formatters.md +180 -0
- data/docs/helpers.md +12 -0
- data/docs/images/cpu_no_context.png +0 -0
- data/docs/images/cpu_with_context.png +0 -0
- data/docs/images/disk_space_no_context.png +0 -0
- data/docs/images/disk_space_with_context.png +0 -0
- data/docs/images/memory_high_no_context.png +0 -0
- data/docs/images/memory_high_with_context.png +0 -0
- data/docs/images/nagios-herald-formatter-content-example.png +0 -0
- data/docs/images/nagios-herald.png +0 -0
- data/docs/images/stack-bars.png +0 -0
- data/docs/images/vanilla-nagios.png +0 -0
- data/docs/messages.md +16 -0
- data/docs/nagios-config.md +74 -0
- data/docs/tools.md +79 -0
- data/etc/config.yml.example +14 -0
- data/etc/readme.md +2 -0
- data/lib/nagios-herald/config.rb +25 -0
- data/lib/nagios-herald/executor.rb +265 -0
- data/lib/nagios-herald/formatter_loader.rb +82 -0
- data/lib/nagios-herald/formatters/base.rb +524 -0
- data/lib/nagios-herald/formatters/check_cpu.rb +71 -0
- data/lib/nagios-herald/formatters/check_disk.rb +143 -0
- data/lib/nagios-herald/formatters/check_logstash.rb +155 -0
- data/lib/nagios-herald/formatters/check_memory.rb +42 -0
- data/lib/nagios-herald/formatters/example.rb +19 -0
- data/lib/nagios-herald/formatters.rb +1 -0
- data/lib/nagios-herald/helpers/ganglia_graph.rb +99 -0
- data/lib/nagios-herald/helpers/graphite_graph.rb +85 -0
- data/lib/nagios-herald/helpers/logstash_query.rb +125 -0
- data/lib/nagios-herald/helpers/splunk_alert_frequency.rb +170 -0
- data/lib/nagios-herald/helpers/splunk_query.rb +119 -0
- data/lib/nagios-herald/helpers/url_image.rb +76 -0
- data/lib/nagios-herald/helpers.rb +5 -0
- data/lib/nagios-herald/logging.rb +48 -0
- data/lib/nagios-herald/message_loader.rb +40 -0
- data/lib/nagios-herald/messages/base.rb +56 -0
- data/lib/nagios-herald/messages/email.rb +150 -0
- data/lib/nagios-herald/messages/irc.rb +58 -0
- data/lib/nagios-herald/messages/pager.rb +75 -0
- data/lib/nagios-herald/messages.rb +3 -0
- data/lib/nagios-herald/test_helpers/base_test_case.rb +82 -0
- data/lib/nagios-herald/util.rb +45 -0
- data/lib/nagios-herald/version.rb +3 -0
- data/lib/nagios-herald.rb +7 -0
- data/lib/stackbars/__init__.py +0 -0
- data/lib/stackbars/chart_utils.py +25 -0
- data/lib/stackbars/grouped_stackbars.py +97 -0
- data/lib/stackbars/pilfonts/Tahoma.ttf +0 -0
- data/lib/stackbars/pilfonts/aerial.ttf +0 -0
- data/lib/stackbars/pilfonts/arial_black.ttf +0 -0
- data/lib/stackbars/stackbar.py +100 -0
- data/nagios-herald.gemspec +33 -0
- data/test/env_files/check_cpu_idle.CRITICAL +199 -0
- data/test/env_files/check_cpu_iowait.WARNING +199 -0
- data/test/env_files/check_disk.CRITICAL +197 -0
- data/test/env_files/check_disk.CRITICAL_ICINGA +197 -0
- data/test/env_files/check_disk.RECOVERY +197 -0
- data/test/env_files/check_memory.CRITICAL +197 -0
- data/test/env_files/nagios_vars.EXAMPLE +197 -0
- data/test/unit/test_config.rb +31 -0
- data/test/unit/test_executor.rb +65 -0
- data/test/unit/test_formatter_base.rb +131 -0
- data/test/unit/test_formatter_check_cpu_idle_critical.rb +135 -0
- data/test/unit/test_formatter_check_memory.rb +135 -0
- data/test/unit/test_icinga_variables.rb +31 -0
- data/test/unit/test_logging.rb +35 -0
- data/test/unit/test_message_email.rb +69 -0
- data/test/unit/test_message_pager.rb +69 -0
- metadata +204 -0
@@ -0,0 +1,143 @@
|
|
1
|
+
# CheckDisk formatter
|
2
|
+
# Colorizes and bolds text generated by the 'check_disk' NRPE check.
|
3
|
+
|
4
|
+
module NagiosHerald
|
5
|
+
class Formatter
|
6
|
+
class CheckDisk < NagiosHerald::Formatter
|
7
|
+
include NagiosHerald::Logging
|
8
|
+
|
9
|
+
# Public: Gets information about each of the partitions in the check's output.
|
10
|
+
# Parses the partition name and free space value and percentage
|
11
|
+
#
|
12
|
+
# Expects the check's output looks similar to one of the following cases:
|
13
|
+
# Simple output - ends with :
|
14
|
+
# DISK CRITICAL - free space: / 7002 MB (18% inode=60%): /data 16273093 MB (26% inode=99%):
|
15
|
+
# Long output - delimited by |
|
16
|
+
# DISK CRITICAL - free space: / 7051 MB (18% inode=60%); /data 16733467 MB (27% inode=99%);| /=31220MB;36287;2015;0;40319 /dev/shm=81MB;2236;124;0;2485 /data=44240486MB;54876558;3048697;0;60973954
|
17
|
+
#
|
18
|
+
# input - A string containing partition data to match.
|
19
|
+
#
|
20
|
+
# Returns an array of hash data per partition
|
21
|
+
def get_partitions_data(input)
|
22
|
+
partitions = []
|
23
|
+
space_data = /.*free space:\s*(?<size>[^|:]*)(\||:)/.match(input)
|
24
|
+
if space_data
|
25
|
+
space_str = space_data[:size]
|
26
|
+
splitter = (space_str.count(';') > 0)? ';' : ':'
|
27
|
+
space_str.split(splitter).each do |part|
|
28
|
+
partition_regex = Regexp.new('(?<partition>\S+)\s+(?<free_unit>.*)\s+\((?<free_percent>\d+)\%.*')
|
29
|
+
data = partition_regex.match(part)
|
30
|
+
hash_data = Hash[ data.names.zip( data.captures ) ]
|
31
|
+
partitions << hash_data if hash_data
|
32
|
+
end
|
33
|
+
end
|
34
|
+
return partitions
|
35
|
+
end
|
36
|
+
|
37
|
+
# Public: Generates an image of stack bars for all disk partitions.
|
38
|
+
#
|
39
|
+
# partitions_data - The array of hashes generated by #get_partitions_data
|
40
|
+
#
|
41
|
+
# Returns the filename of the generated image or nil if the image was not generated.
|
42
|
+
def get_partitions_stackedbars_chart(partitions_data)
|
43
|
+
# Sort results by the most full partition
|
44
|
+
partitions_data.sort! { |a,b| a[:free_percent] <=> b[:free_percent] }
|
45
|
+
# generate argument as string
|
46
|
+
volumes_space_str = partitions_data.map {|x| "#{x[:partition]}=#{100 - x[:free_percent].to_i}"}.compact
|
47
|
+
output_file = File.join(@sandbox, "host_status.png")
|
48
|
+
command = ""
|
49
|
+
command += NagiosHerald::Util::get_script_path('draw_stack_bars')
|
50
|
+
command += " --width=500 --output=#{output_file} "
|
51
|
+
command += volumes_space_str.join(" ")
|
52
|
+
%x(#{command})
|
53
|
+
if $? == 0
|
54
|
+
return output_file
|
55
|
+
else
|
56
|
+
return nil
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
# Public: Overrides Formatter::Base#additional_info.
|
61
|
+
# Calls on methods defined in this class to generate stack bars and download
|
62
|
+
# Ganglia graphs.
|
63
|
+
#
|
64
|
+
# Returns nothing. Updates the formatter content hash.
|
65
|
+
def additional_info
|
66
|
+
section = __method__
|
67
|
+
output = get_nagios_var("NAGIOS_#{@state_type}OUTPUT")
|
68
|
+
add_text(section, "Additional Info:\n #{unescape_text(output)}\n\n") if output
|
69
|
+
|
70
|
+
# Collect partitions data and plot a chart
|
71
|
+
# if the check has recovered, $NAGIOS_SERVICEOUTPUT doesn't contain the data we need to parse for images; just give us the A-OK message
|
72
|
+
if output =~ /DISK OK/
|
73
|
+
add_html(section, %Q(Additional Info:<br><b><font color="green"> #{output}</font><br><br>))
|
74
|
+
else
|
75
|
+
partitions = get_partitions_data(output)
|
76
|
+
partitions_chart = get_partitions_stackedbars_chart(partitions)
|
77
|
+
if partitions_chart
|
78
|
+
add_html(section, "<b>Additional Info</b>:<br> #{output}<br><br>") if output
|
79
|
+
add_attachment partitions_chart
|
80
|
+
add_html(section, %Q(<img src="#{partitions_chart}" width="500" alt="partitions_remaining_space" /><br><br>))
|
81
|
+
else
|
82
|
+
add_html(section, "<b>Additional Info</b>:<br> #{output}<br><br>") if output
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
# Public: Overrides Formatter::Base#additional_details.
|
88
|
+
# Calls on methods defined in this class to colorize and bold the `df` output
|
89
|
+
# generated by the check_disk NRPE check.
|
90
|
+
#
|
91
|
+
# Returns nothing. Updates the formatter content hash.
|
92
|
+
def additional_details
|
93
|
+
section = __method__
|
94
|
+
long_output = get_nagios_var("NAGIOS_LONG#{@state_type}OUTPUT")
|
95
|
+
lines = long_output.split('\n') # the "newlines" in this value are literal '\n' strings
|
96
|
+
# if we've been passed threshold information use it to color-format the df output
|
97
|
+
threshold_line = lines.grep( /THRESHOLDS - / ) # THRESHOLDS - WARNING:50%;CRITICAL:40%;
|
98
|
+
threshold_line.each do |line|
|
99
|
+
/WARNING:(?<warning_threshold>\d+)%;CRITICAL:(?<critical_threshold>\d+)%;/ =~ line
|
100
|
+
@warning_threshold = warning_threshold
|
101
|
+
@critical_threshold = critical_threshold
|
102
|
+
end
|
103
|
+
|
104
|
+
# if the thresholds are provided, color me... badd!
|
105
|
+
if @warning_threshold and @critical_threshold
|
106
|
+
output_lines = []
|
107
|
+
output_lines << "<pre>"
|
108
|
+
lines.each do |line|
|
109
|
+
if line =~ /THRESHOLDS/
|
110
|
+
output_lines << line
|
111
|
+
next # just throw this one in unchanged and move along
|
112
|
+
end
|
113
|
+
/(?<percent>\d+)%/ =~ line
|
114
|
+
if defined?( percent ) and !percent.nil?
|
115
|
+
percent_free = 100 - percent.to_i
|
116
|
+
if percent_free <= @critical_threshold.to_i
|
117
|
+
output_line = %Q(<b><font color="red">#{line}</font> Free disk space <font color="red">(#{percent_free}%)</font> is <= CRITICAL threshold (#{@critical_threshold}%).</b>)
|
118
|
+
output_lines << output_line
|
119
|
+
elsif percent_free <= @warning_threshold.to_i
|
120
|
+
output_line = %Q(<b><font color="orange">#{line}</font> Free disk space <font color="orange">(#{percent_free}%)</font> is <= WARNING threshold ( #{@warning_threshold}%).</b>)
|
121
|
+
output_lines << output_line
|
122
|
+
else
|
123
|
+
output_lines << line
|
124
|
+
end
|
125
|
+
else
|
126
|
+
output_lines << line
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
output_lines << "</pre>"
|
131
|
+
output_string = output_lines.join( "<br>" )
|
132
|
+
add_html(section, "<b>Additional Details</b>:")
|
133
|
+
add_html(section, output_string)
|
134
|
+
else # just spit out what we got from df
|
135
|
+
add_text(section, "Additional Details:\n#{unescape_text(long_output)}\n") if long_output
|
136
|
+
add_html(section, "<b>Additional Details</b>:<br><pre>#{unescape_text(long_output)}</pre><br><br>") if long_output
|
137
|
+
end
|
138
|
+
line_break(section)
|
139
|
+
end
|
140
|
+
|
141
|
+
end
|
142
|
+
end
|
143
|
+
end
|
@@ -0,0 +1,155 @@
|
|
1
|
+
# CheckDisk formatter
|
2
|
+
# Colorizes and bolds text generated by the 'check_disk' NRPE check.
|
3
|
+
|
4
|
+
module NagiosHerald
|
5
|
+
class Formatter
|
6
|
+
class CheckLogstash < NagiosHerald::Formatter
|
7
|
+
include NagiosHerald::Logging
|
8
|
+
|
9
|
+
# Public: Overrides Formatter::Base#additional_info.
|
10
|
+
# Calls on methods defined in this class to generate stack bars and download
|
11
|
+
# Ganglia graphs.
|
12
|
+
#
|
13
|
+
# Returns nothing. Updates the formatter content hash.
|
14
|
+
def additional_info
|
15
|
+
section = __method__ # this defines the section key in the formatter's content hash
|
16
|
+
service_output = get_nagios_var("NAGIOS_SERVICECHECKCOMMAND")
|
17
|
+
command_components = parse_command(service_output)
|
18
|
+
|
19
|
+
# The aggregation level limit for which we can render results
|
20
|
+
agg_level_limit = 3
|
21
|
+
|
22
|
+
logstash_helper = NagiosHerald::Helpers::LogstashQuery.new
|
23
|
+
results = get_logstash_results(logstash_helper, command_components[:query])
|
24
|
+
|
25
|
+
# Handle the case when an exception is thrown inside get_logstash_results
|
26
|
+
if results.empty?
|
27
|
+
add_text(section, "Something went wrong while getting logstash results\n\n")
|
28
|
+
return
|
29
|
+
end
|
30
|
+
|
31
|
+
if results["hits"]["hits"].empty? && results["aggregations"]
|
32
|
+
# We have aggregations
|
33
|
+
|
34
|
+
query_agg_data = logstash_helper.query["aggregations"] || logstash_helper.query["aggs"]
|
35
|
+
|
36
|
+
agg_depth_level = 1 + agg_depth(query_agg_data)
|
37
|
+
|
38
|
+
# We can't cope with more than 3 level deep aggregates
|
39
|
+
if agg_depth_level > agg_level_limit
|
40
|
+
#Add error text to the alert and return straight away
|
41
|
+
add_text(section, "Error - query contains #{agg_depth_level} levels of aggregation - more than #{agg_level_limit} levels are not supported by this plugin\n")
|
42
|
+
return
|
43
|
+
end
|
44
|
+
|
45
|
+
agg_field_name = query_agg_data.keys.first
|
46
|
+
|
47
|
+
html_output = generate_table_from_buckets(results["aggregations"][agg_field_name]["buckets"])
|
48
|
+
else
|
49
|
+
# We have normal search results
|
50
|
+
html_output = generate_html_output(results["hits"]["hits"])
|
51
|
+
end
|
52
|
+
|
53
|
+
add_html(section, html_output)
|
54
|
+
end
|
55
|
+
|
56
|
+
# Public: Overrides Formatter::Base#additional_details.
|
57
|
+
# Calls on methods defined in this class to colorize and bold the `df` output
|
58
|
+
# generated by the check_disk NRPE check.
|
59
|
+
#
|
60
|
+
# Returns nothing. Updates the formatter content hash.
|
61
|
+
def additional_details
|
62
|
+
|
63
|
+
end
|
64
|
+
|
65
|
+
private
|
66
|
+
|
67
|
+
def parse_command(service_command)
|
68
|
+
command_components = service_command.split("!")
|
69
|
+
{
|
70
|
+
:command => command_components[0],
|
71
|
+
:query => command_components[1],
|
72
|
+
:warn_threshold => command_components[2],
|
73
|
+
:crit_threshold => command_components[3],
|
74
|
+
:time_perdiod => command_components[4]
|
75
|
+
}
|
76
|
+
end
|
77
|
+
|
78
|
+
def agg_depth(agg_data)
|
79
|
+
agg_level = 0
|
80
|
+
if agg_data.kind_of?(String)
|
81
|
+
agg_level = agg_data.include?("aggs") || agg_data.include?("aggregations") ? 1 : 0
|
82
|
+
else
|
83
|
+
agg_data.each do |k,v|
|
84
|
+
this_level = k.include?("aggs") || k.include?("aggregations") ? 1 : 0
|
85
|
+
agg_level = this_level + agg_depth(v)
|
86
|
+
end
|
87
|
+
end
|
88
|
+
agg_level
|
89
|
+
end
|
90
|
+
|
91
|
+
def get_logstash_results(logstash_helper, query)
|
92
|
+
begin
|
93
|
+
if query.include?(".json")
|
94
|
+
logstash_helper.query_from_file(query)
|
95
|
+
else
|
96
|
+
logstash_helper.kibana_style_query(query)
|
97
|
+
end
|
98
|
+
rescue Exception => e
|
99
|
+
logger.error "Exception encountered retrieving Logstash Query - #{e.message}"
|
100
|
+
e.backtrace.each do |line|
|
101
|
+
logger.error "#{line}"
|
102
|
+
end
|
103
|
+
return []
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
def generate_html_output(results)
|
108
|
+
output_prefix = "<table border='1' cellpadding='0' cellspacing='1'>"
|
109
|
+
output_suffix = "</table>"
|
110
|
+
|
111
|
+
headers = "<tr>#{results.first["_source"].keys.map{|h|"<th>#{h}</th>"}.join}</tr>"
|
112
|
+
result_values = results.map{|r|r["_source"]}
|
113
|
+
|
114
|
+
body = result_values.map{|r| "<tr>#{r.map{|k,v|"<td>#{v}</td>"}.join}</tr>"}.join
|
115
|
+
|
116
|
+
output_prefix + headers + body + output_suffix
|
117
|
+
end
|
118
|
+
|
119
|
+
def generate_table_from_buckets(buckets)
|
120
|
+
unique_keys = buckets.map{|b|b.keys}.flatten.uniq
|
121
|
+
|
122
|
+
output_prefix = "<table border='1' cellpadding='0' cellspacing='1'>"
|
123
|
+
output_suffix = "</table>"
|
124
|
+
headers = "<tr>#{unique_keys.map{|h|"<th>#{h}</th>"}.join}</tr>"
|
125
|
+
body = buckets.map do |r|
|
126
|
+
generate_table_from_hash(r)
|
127
|
+
end.join
|
128
|
+
output_prefix + headers + body + output_suffix
|
129
|
+
end
|
130
|
+
|
131
|
+
def generate_table_from_hash(data,add_headers=false)
|
132
|
+
output_prefix = "<table border='1' cellpadding='0' cellspacing='1'>"
|
133
|
+
output_suffix = "</table>"
|
134
|
+
headers = add_headers ? "<tr>#{data.keys.map{|h|"<th>#{h}</th>"}.join}</tr>" : ""
|
135
|
+
body = "<tr>#{data.map do |k,v|
|
136
|
+
if v.kind_of?(Hash)
|
137
|
+
if v.has_key?("buckets")
|
138
|
+
"<td>#{generate_table_from_buckets(v["buckets"])}</td>"
|
139
|
+
else
|
140
|
+
"<td>#{generate_table_from_hash(v,true)}</td>"
|
141
|
+
end
|
142
|
+
else
|
143
|
+
"<td>#{v}</td>"
|
144
|
+
end
|
145
|
+
end.join}</tr>"
|
146
|
+
|
147
|
+
if add_headers
|
148
|
+
output_prefix + headers + body + output_suffix
|
149
|
+
else
|
150
|
+
body
|
151
|
+
end
|
152
|
+
end
|
153
|
+
end
|
154
|
+
end
|
155
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
module NagiosHerald
|
2
|
+
class Formatter
|
3
|
+
class CheckMemory < NagiosHerald::Formatter
|
4
|
+
include NagiosHerald::Logging
|
5
|
+
|
6
|
+
# Public: Overrides Formatter::Base#additional_details.
|
7
|
+
# Colorizes the `ps` output returned by the check_mem NRPE check.
|
8
|
+
# The output contains the top n processes by memory utilization similar to:
|
9
|
+
#
|
10
|
+
# TOP 5 PROCESSES BY MEMORY USAGE:
|
11
|
+
# %MEM RSS USER PID COMMAND
|
12
|
+
# 2.4 1231696 larry 6658 tmux
|
13
|
+
# 1.5 777204 moe 32234 tmux/tmux -CC
|
14
|
+
# 0.8 399964 curly 12161 /usr/sbin/gmond
|
15
|
+
# 0.7 384772 shep 1945 /usr/sbin/mysqld --basedir=/usr --datadir=/var/lib/mysql --plugin-dir=/usr/lib64/mysql/plugin --user=mysql --log-error=/var/lib/mysql/mysql.example.com.err --pid-file=/var/lib/mysql/mysql.example.com.pid
|
16
|
+
# 0.7 355148 root 1245 SCREEN
|
17
|
+
#
|
18
|
+
# Returns nothing. Updates the formatter content hash.
|
19
|
+
def additional_details
|
20
|
+
section = __method__
|
21
|
+
long_output = get_nagios_var("NAGIOS_LONG#{@state_type}OUTPUT")
|
22
|
+
lines = long_output.split('\n')
|
23
|
+
html = []
|
24
|
+
html << "<pre>"
|
25
|
+
html << lines[0] # TOP 5 PROCESSES BY MEMORY USAGE:
|
26
|
+
html << lines[1] # %MEM RSS USER PID COMMAND
|
27
|
+
html << "<font color='red'>#{lines[2]}</font>" # Color the first result red...
|
28
|
+
for i in 3..lines.length-1
|
29
|
+
html << "<font color='orange'>#{lines[i]}</font>" # ...and the remainder orange.
|
30
|
+
end
|
31
|
+
html << "</pre>"
|
32
|
+
output_string = html.join( "<br>" )
|
33
|
+
add_html(section, "<b>Additional Details</b>:")
|
34
|
+
add_html(section, output_string)
|
35
|
+
add_text(section, "Additional Details:\n#")
|
36
|
+
add_text(section, "#{unescape_text(long_output)}\n")
|
37
|
+
line_break(section)
|
38
|
+
end
|
39
|
+
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module NagiosHerald
|
2
|
+
class Formatter
|
3
|
+
class Example < NagiosHerald::Formatter
|
4
|
+
|
5
|
+
# ovrerride Formatter::Base#additional_details
|
6
|
+
def additional_details
|
7
|
+
section = __method__ # content section is named after the method
|
8
|
+
html = ""
|
9
|
+
text = ""
|
10
|
+
text += "Example text"
|
11
|
+
html += "Example <b>HTML</b>"
|
12
|
+
add_text(section, text)
|
13
|
+
add_html(section, html)
|
14
|
+
line_break(section) # trailing line break
|
15
|
+
end
|
16
|
+
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1 @@
|
|
1
|
+
require 'nagios-herald/formatters/base'
|
@@ -0,0 +1,99 @@
|
|
1
|
+
require 'chef/search/query'
|
2
|
+
|
3
|
+
module NagiosHerald
|
4
|
+
module Helpers
|
5
|
+
class GangliaGraph
|
6
|
+
include NagiosHerald::Logging
|
7
|
+
|
8
|
+
# Public: Initialize a GangliaGraph helper object.
|
9
|
+
#
|
10
|
+
# Returns GangliaGraph helper object.
|
11
|
+
def initialize
|
12
|
+
@ganglia_base_uri = Config.config['servers']['ganglia']
|
13
|
+
end
|
14
|
+
|
15
|
+
# Public: Loads a Chef knife config.
|
16
|
+
# I recognize not everyone uses Chef, or any CM for that matter.
|
17
|
+
# Still, I like having this example here.
|
18
|
+
#
|
19
|
+
# Returns true/false depending on whether a Chef knife config was loaded.
|
20
|
+
def load_knife_config
|
21
|
+
return @knife_config_loaded unless @knife_config_loaded.nil?
|
22
|
+
|
23
|
+
knife_config_file = Config.config['knife_config'] ? Config.config['knife_config'] : '~/.chef/knife.rb'
|
24
|
+
knife_config_file = File.expand_path(knife_config_file)
|
25
|
+
if !File.exist?(knife_config_file)
|
26
|
+
logger.warn("Knife config file not found (#{knife_config_file})")
|
27
|
+
@knife_config_loaded = false
|
28
|
+
else
|
29
|
+
Chef::Config.from_file(knife_config_file)
|
30
|
+
@knife_config_loaded = true
|
31
|
+
end
|
32
|
+
@knife_config_loaded
|
33
|
+
end
|
34
|
+
|
35
|
+
# Public: Get the Ganglia cluster name for a given host.
|
36
|
+
# So, if you're still reading this *and* you use Chef,
|
37
|
+
# I recognize that you may not store the Ganglia cluster name as a node
|
38
|
+
# attribute. If you do, it's probably not called 'cluster_name'...
|
39
|
+
#
|
40
|
+
# host - The name of the host whose Ganglia cluster name we need to look up.
|
41
|
+
#
|
42
|
+
# Returns the Ganglia cluster name for the host.
|
43
|
+
def get_cluster_name_for_host(host)
|
44
|
+
return nil unless load_knife_config
|
45
|
+
query = Chef::Search::Query.new
|
46
|
+
# we're only expecting a single node to be returned --> make sure it's the case!
|
47
|
+
chef_node = query.search('node', "fqdn:#{host}").first.first
|
48
|
+
chef_node.ganglia.cluster_name
|
49
|
+
end
|
50
|
+
|
51
|
+
# Public: Generate the URL required to download a graph of the require metric
|
52
|
+
#
|
53
|
+
# cluster_name - The Ganglia cluster this node belongs to.
|
54
|
+
# host - The hostname of the node we need a metric for.
|
55
|
+
# metric - The name of the Ganglia metric we need.
|
56
|
+
# range - The time period we expect the metric to cover.
|
57
|
+
#
|
58
|
+
# Example
|
59
|
+
#
|
60
|
+
# get_ganglia_url("Web", "web0001.example.com", "part_max_used", "1day")
|
61
|
+
#
|
62
|
+
# Returns a full Ganglia URL defining the metric graph to download
|
63
|
+
def get_ganglia_url(cluster_name, host, metric, range)
|
64
|
+
return "http://#{@ganglia_base_uri}/graph.php?&c=#{cluster_name}&h=#{host}&m=#{metric}&r=#{range}&z=medium"
|
65
|
+
end
|
66
|
+
|
67
|
+
# Public: Retrieve the Ganglia graphs we desire
|
68
|
+
#
|
69
|
+
# hosts - An array of hosts for which to retrieve metrics.
|
70
|
+
# metric - The name of the Ganglia metric we need.
|
71
|
+
# path - The local path on the host running nagios-herald under which image
|
72
|
+
# files will be temporarily generated.
|
73
|
+
# range - The time period we expect the metric to cover.
|
74
|
+
#
|
75
|
+
# Example
|
76
|
+
#
|
77
|
+
# get_graphs([web0001.example.com], "part_max_used", "/tmp/img1234", "1day")
|
78
|
+
#
|
79
|
+
# Returns the local paths of all downloaded images to be attached/inlined with a message.
|
80
|
+
def get_graphs( hosts, metric, path, range )
|
81
|
+
# strip the trailing slash (if it exists) so the components of image_name are clear
|
82
|
+
path = path.sub(/\/$/, "")
|
83
|
+
image_paths = []
|
84
|
+
hosts.each do |host|
|
85
|
+
cluster_name = get_cluster_name_for_host(host)
|
86
|
+
url = get_ganglia_url(cluster_name, host, metric, range)
|
87
|
+
image_path = "#{path}/#{host}-#{metric}.png"
|
88
|
+
success = NagiosHerald::Helpers::UrlImage.download_image(url, image_path)
|
89
|
+
if success
|
90
|
+
image_paths.push( image_path )
|
91
|
+
else
|
92
|
+
logger.warn("No Ganglia graph found for '#{host}' (cluster: '#{cluster_name}') - '#{metric}' in '#{range}'")
|
93
|
+
end
|
94
|
+
end
|
95
|
+
return image_paths
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
@@ -0,0 +1,85 @@
|
|
1
|
+
require 'securerandom'
|
2
|
+
require 'uri'
|
3
|
+
|
4
|
+
module NagiosHerald
|
5
|
+
module Helpers
|
6
|
+
class GraphiteGraph
|
7
|
+
include NagiosHerald::Logging
|
8
|
+
|
9
|
+
# Public: Initialize a GraphiteGraph helper object.
|
10
|
+
#
|
11
|
+
# Returns a GraphiteGraph helper object.
|
12
|
+
def initialize
|
13
|
+
# Currently hard-codes the value for optional graphs showing historical
|
14
|
+
# data.
|
15
|
+
@graphite_historical_lookup = '-24h'
|
16
|
+
@image_paths = []
|
17
|
+
end
|
18
|
+
|
19
|
+
# Public: Download a Graphite image.
|
20
|
+
#
|
21
|
+
# url - The Graphite url we'll download as an image.
|
22
|
+
# download_path - The path to where the image will be downloaded.
|
23
|
+
#
|
24
|
+
# Returns nothing. Appends the downloaded image path to @image_paths.
|
25
|
+
def download_image(url, download_path)
|
26
|
+
success = NagiosHerald::Helpers::UrlImage.download_image(url, download_path)
|
27
|
+
if success
|
28
|
+
@image_paths.push(download_path)
|
29
|
+
else
|
30
|
+
logger.warn("Could not download Graphite graph for '#{url}'")
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
# Public: Retrieve a Graphite graph
|
35
|
+
#
|
36
|
+
# url - A string containing the full URL to get from Graphite.
|
37
|
+
# path - The local path on the host running nagios-herald under which image
|
38
|
+
# files will be temporarily generated.
|
39
|
+
# show_historical - A boolean that allows one to optionally download a
|
40
|
+
# showing historical data for comparison.
|
41
|
+
# Defaults to false.
|
42
|
+
#
|
43
|
+
# Because this will probably be fed URLs used in Nagios checks, we'll
|
44
|
+
# strip out '&format' and '&rawData' query parameters to ensure we
|
45
|
+
# get an image instead of text/json/csv/etc.
|
46
|
+
#
|
47
|
+
# In cases where the method is called requesting an historical image
|
48
|
+
# we'll strip '&until' and replace the value of '&from' with that of
|
49
|
+
# @graphite_historical_lookup.
|
50
|
+
#
|
51
|
+
# Example
|
52
|
+
#
|
53
|
+
# get_graph("http://graphite.example.com/render/?target=foo.bar.baz?from=-15min", "/tmp/img1234", true)
|
54
|
+
#
|
55
|
+
# Returns the local path of the downloaded image to be attached/inlined with a message.
|
56
|
+
def get_graph(url, path, show_historical=nil)
|
57
|
+
uri = URI(url)
|
58
|
+
# Strip &rawData parameter.
|
59
|
+
uri.query.gsub!(/&rawData([^&]*)/, '')
|
60
|
+
# Strip the &format parameter.
|
61
|
+
uri.query.gsub!(/&format([^&])*/, '')
|
62
|
+
# Strip the trailing slash from the path.
|
63
|
+
path = path.sub(/\/$/, "")
|
64
|
+
# Generate a random UUID to be used in the image filename.
|
65
|
+
image_uuid = SecureRandom.uuid
|
66
|
+
image_path = "#{path}/#{image_uuid}.png"
|
67
|
+
image_url = uri.to_s
|
68
|
+
download_image(image_url, image_path)
|
69
|
+
if show_historical
|
70
|
+
historical_image_path = "#{path}/#{image_uuid}#{@graphite_historical_lookup}.png"
|
71
|
+
if uri.query =~ /&from/
|
72
|
+
# Replace the &from value.
|
73
|
+
uri.query.gsub!(/from=([^&]*)/, "from=#{@graphite_historical_lookup}")
|
74
|
+
else
|
75
|
+
# Set the &from value.
|
76
|
+
uri.query = "#{uri.query}&from=#{@graphite_historical_lookup}"
|
77
|
+
end
|
78
|
+
historical_url = uri.to_s
|
79
|
+
download_image(historical_url, historical_image_path)
|
80
|
+
end
|
81
|
+
return @image_paths
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
@@ -0,0 +1,125 @@
|
|
1
|
+
require 'net/http'
|
2
|
+
require 'uri'
|
3
|
+
require 'json'
|
4
|
+
require 'elasticsearch'
|
5
|
+
|
6
|
+
# Query Logstash with arbitrary search criteria
|
7
|
+
|
8
|
+
module NagiosHerald
|
9
|
+
module Helpers
|
10
|
+
class LogstashQuery
|
11
|
+
|
12
|
+
attr_reader :query
|
13
|
+
|
14
|
+
# Public: Initialize a new LogstashQuery object.
|
15
|
+
#
|
16
|
+
# query - A string representing the query to send to Logstash.
|
17
|
+
# index - Optional index to specify (else Splunk defaults to all indexes
|
18
|
+
# available to the authenticated user).
|
19
|
+
# output - The output format we'd like (i.e. csv, json, xml); defaults
|
20
|
+
# to json.
|
21
|
+
#
|
22
|
+
# Example:
|
23
|
+
#
|
24
|
+
# NEEDS EXAMPLES
|
25
|
+
#
|
26
|
+
# Returns a new LogstashQuery object.
|
27
|
+
def initialize(options={})
|
28
|
+
today = Time.now.strftime("%Y.%m.%d")
|
29
|
+
@logstash_index = options[:index] ? options[:index] : "logstash-#{today}"
|
30
|
+
@logstash_time_period = options[:time_period] ? options[:time_period] : "1h"
|
31
|
+
@logstash_num_results = Config.config['logstash']['num_results'] ? Config.config['logstash']['num_results'] : 10
|
32
|
+
@logstash_result_truncate = Config.config['logstash']['result_field_trucate'] ? Config.config['logstash']['result_field_trucate'] : nil
|
33
|
+
|
34
|
+
# Pull the Logstash URI, username, and password from the config.
|
35
|
+
logstash_url = Config.config['logstash']['url']
|
36
|
+
|
37
|
+
# Parse the URI.
|
38
|
+
uri = URI.parse(logstash_url)
|
39
|
+
@logstash_host = uri.host
|
40
|
+
@logstash_port = uri.port
|
41
|
+
@logstash_uri = uri.request_uri
|
42
|
+
|
43
|
+
@es = Elasticsearch::Client.new hosts: ["#{@logstash_host}:#{@logstash_port}"], reload_connections: true
|
44
|
+
end
|
45
|
+
|
46
|
+
# Public: Queries Logstash.
|
47
|
+
#
|
48
|
+
# Example:
|
49
|
+
#
|
50
|
+
# results = logstash_query.query
|
51
|
+
#
|
52
|
+
# Returns the results of the query in the requested format, nil otherwise.
|
53
|
+
def kibana_style_query(query_string)
|
54
|
+
|
55
|
+
# Strip leading and following single quotes from query if present
|
56
|
+
query_string = query_string[1..-1] if query_string[0] == "'"
|
57
|
+
query_string = query_string[0..-2] if query_string[-1] == "'"
|
58
|
+
|
59
|
+
@query = {
|
60
|
+
"from" => 0,
|
61
|
+
"size" => @logstash_num_results,
|
62
|
+
"query" => {
|
63
|
+
"filtered" => {
|
64
|
+
"query" => {
|
65
|
+
"bool" => {
|
66
|
+
"should" => [
|
67
|
+
{
|
68
|
+
"query_string" => {
|
69
|
+
"query" => "#{query_string}"
|
70
|
+
}
|
71
|
+
}
|
72
|
+
]
|
73
|
+
}
|
74
|
+
},
|
75
|
+
"filter" => {
|
76
|
+
"bool" => {
|
77
|
+
"must" => [
|
78
|
+
{
|
79
|
+
"match_all" => {}
|
80
|
+
},
|
81
|
+
{
|
82
|
+
"range" => {
|
83
|
+
"index_timestamp" => {
|
84
|
+
"from" => "now-#{@logstash_time_period}",
|
85
|
+
"to" => "now"
|
86
|
+
}
|
87
|
+
}
|
88
|
+
}
|
89
|
+
]
|
90
|
+
}
|
91
|
+
}
|
92
|
+
}
|
93
|
+
}
|
94
|
+
}
|
95
|
+
truncate_results(run_logstash_query(@query))
|
96
|
+
end
|
97
|
+
|
98
|
+
def query_from_file(query_file)
|
99
|
+
if File.exists? query_file
|
100
|
+
@query = JSON.parse(File.readlines(query_file).join)
|
101
|
+
else
|
102
|
+
raise "Query file #{query_file} does not exist"
|
103
|
+
end
|
104
|
+
|
105
|
+
truncate_results(run_logstash_query(@query))
|
106
|
+
end
|
107
|
+
|
108
|
+
private
|
109
|
+
|
110
|
+
def run_logstash_query(query_body)
|
111
|
+
begin
|
112
|
+
return @es.search index: @logstash_index, body: query_body
|
113
|
+
rescue Elasticsearch::Transport::Transport::Errors::BadRequest => e
|
114
|
+
raise "Elasticsearch doesn't like your query. Please check you escaped it correctly."
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
def truncate_results(results)
|
119
|
+
results["hits"]["hits"].each{|result|result["_source"].each{|field_name,field_value|result["_source"][field_name] = field_value[0..@logstash_result_truncate]}} if @logstash_result_truncate
|
120
|
+
return results
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|