nagios-herald 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +8 -0
  3. data/.travis.yml +9 -0
  4. data/CHANGELOG.md +11 -0
  5. data/CONTRIBUTING.md +28 -0
  6. data/Gemfile +5 -0
  7. data/LICENSE +21 -0
  8. data/README.md +94 -0
  9. data/Rakefile +9 -0
  10. data/bin/draw_stack_bars +76 -0
  11. data/bin/dump_nagios_env.sh +25 -0
  12. data/bin/get_ganglia_graph +82 -0
  13. data/bin/get_graph +50 -0
  14. data/bin/get_graphite_graph +58 -0
  15. data/bin/nagios-herald +6 -0
  16. data/bin/splunk_alert_frequency +54 -0
  17. data/contrib/nrpe-plugins/check_cpu_stats.sh +186 -0
  18. data/contrib/nrpe-plugins/check_disk.sh +34 -0
  19. data/contrib/nrpe-plugins/check_mem.pl +181 -0
  20. data/contrib/nrpe-plugins/nrpe-plugin-examples.md +11 -0
  21. data/docs/config.md +62 -0
  22. data/docs/example_alerts.md +48 -0
  23. data/docs/formatters.md +180 -0
  24. data/docs/helpers.md +12 -0
  25. data/docs/images/cpu_no_context.png +0 -0
  26. data/docs/images/cpu_with_context.png +0 -0
  27. data/docs/images/disk_space_no_context.png +0 -0
  28. data/docs/images/disk_space_with_context.png +0 -0
  29. data/docs/images/memory_high_no_context.png +0 -0
  30. data/docs/images/memory_high_with_context.png +0 -0
  31. data/docs/images/nagios-herald-formatter-content-example.png +0 -0
  32. data/docs/images/nagios-herald.png +0 -0
  33. data/docs/images/stack-bars.png +0 -0
  34. data/docs/images/vanilla-nagios.png +0 -0
  35. data/docs/messages.md +16 -0
  36. data/docs/nagios-config.md +74 -0
  37. data/docs/tools.md +79 -0
  38. data/etc/config.yml.example +14 -0
  39. data/etc/readme.md +2 -0
  40. data/lib/nagios-herald/config.rb +25 -0
  41. data/lib/nagios-herald/executor.rb +265 -0
  42. data/lib/nagios-herald/formatter_loader.rb +82 -0
  43. data/lib/nagios-herald/formatters/base.rb +524 -0
  44. data/lib/nagios-herald/formatters/check_cpu.rb +71 -0
  45. data/lib/nagios-herald/formatters/check_disk.rb +143 -0
  46. data/lib/nagios-herald/formatters/check_logstash.rb +155 -0
  47. data/lib/nagios-herald/formatters/check_memory.rb +42 -0
  48. data/lib/nagios-herald/formatters/example.rb +19 -0
  49. data/lib/nagios-herald/formatters.rb +1 -0
  50. data/lib/nagios-herald/helpers/ganglia_graph.rb +99 -0
  51. data/lib/nagios-herald/helpers/graphite_graph.rb +85 -0
  52. data/lib/nagios-herald/helpers/logstash_query.rb +125 -0
  53. data/lib/nagios-herald/helpers/splunk_alert_frequency.rb +170 -0
  54. data/lib/nagios-herald/helpers/splunk_query.rb +119 -0
  55. data/lib/nagios-herald/helpers/url_image.rb +76 -0
  56. data/lib/nagios-herald/helpers.rb +5 -0
  57. data/lib/nagios-herald/logging.rb +48 -0
  58. data/lib/nagios-herald/message_loader.rb +40 -0
  59. data/lib/nagios-herald/messages/base.rb +56 -0
  60. data/lib/nagios-herald/messages/email.rb +150 -0
  61. data/lib/nagios-herald/messages/irc.rb +58 -0
  62. data/lib/nagios-herald/messages/pager.rb +75 -0
  63. data/lib/nagios-herald/messages.rb +3 -0
  64. data/lib/nagios-herald/test_helpers/base_test_case.rb +82 -0
  65. data/lib/nagios-herald/util.rb +45 -0
  66. data/lib/nagios-herald/version.rb +3 -0
  67. data/lib/nagios-herald.rb +7 -0
  68. data/lib/stackbars/__init__.py +0 -0
  69. data/lib/stackbars/chart_utils.py +25 -0
  70. data/lib/stackbars/grouped_stackbars.py +97 -0
  71. data/lib/stackbars/pilfonts/Tahoma.ttf +0 -0
  72. data/lib/stackbars/pilfonts/aerial.ttf +0 -0
  73. data/lib/stackbars/pilfonts/arial_black.ttf +0 -0
  74. data/lib/stackbars/stackbar.py +100 -0
  75. data/nagios-herald.gemspec +33 -0
  76. data/test/env_files/check_cpu_idle.CRITICAL +199 -0
  77. data/test/env_files/check_cpu_iowait.WARNING +199 -0
  78. data/test/env_files/check_disk.CRITICAL +197 -0
  79. data/test/env_files/check_disk.CRITICAL_ICINGA +197 -0
  80. data/test/env_files/check_disk.RECOVERY +197 -0
  81. data/test/env_files/check_memory.CRITICAL +197 -0
  82. data/test/env_files/nagios_vars.EXAMPLE +197 -0
  83. data/test/unit/test_config.rb +31 -0
  84. data/test/unit/test_executor.rb +65 -0
  85. data/test/unit/test_formatter_base.rb +131 -0
  86. data/test/unit/test_formatter_check_cpu_idle_critical.rb +135 -0
  87. data/test/unit/test_formatter_check_memory.rb +135 -0
  88. data/test/unit/test_icinga_variables.rb +31 -0
  89. data/test/unit/test_logging.rb +35 -0
  90. data/test/unit/test_message_email.rb +69 -0
  91. data/test/unit/test_message_pager.rb +69 -0
  92. metadata +204 -0
@@ -0,0 +1,143 @@
1
+ # CheckDisk formatter
2
+ # Colorizes and bolds text generated by the 'check_disk' NRPE check.
3
+
4
+ module NagiosHerald
5
+ class Formatter
6
+ class CheckDisk < NagiosHerald::Formatter
7
+ include NagiosHerald::Logging
8
+
9
+ # Public: Gets information about each of the partitions in the check's output.
10
+ # Parses the partition name and free space value and percentage
11
+ #
12
+ # Expects the check's output looks similar to one of the following cases:
13
+ # Simple output - ends with :
14
+ # DISK CRITICAL - free space: / 7002 MB (18% inode=60%): /data 16273093 MB (26% inode=99%):
15
+ # Long output - delimited by |
16
+ # DISK CRITICAL - free space: / 7051 MB (18% inode=60%); /data 16733467 MB (27% inode=99%);| /=31220MB;36287;2015;0;40319 /dev/shm=81MB;2236;124;0;2485 /data=44240486MB;54876558;3048697;0;60973954
17
+ #
18
+ # input - A string containing partition data to match.
19
+ #
20
+ # Returns an array of hash data per partition
21
+ def get_partitions_data(input)
22
+ partitions = []
23
+ space_data = /.*free space:\s*(?<size>[^|:]*)(\||:)/.match(input)
24
+ if space_data
25
+ space_str = space_data[:size]
26
+ splitter = (space_str.count(';') > 0)? ';' : ':'
27
+ space_str.split(splitter).each do |part|
28
+ partition_regex = Regexp.new('(?<partition>\S+)\s+(?<free_unit>.*)\s+\((?<free_percent>\d+)\%.*')
29
+ data = partition_regex.match(part)
30
+ hash_data = Hash[ data.names.zip( data.captures ) ]
31
+ partitions << hash_data if hash_data
32
+ end
33
+ end
34
+ return partitions
35
+ end
36
+
37
+ # Public: Generates an image of stack bars for all disk partitions.
38
+ #
39
+ # partitions_data - The array of hashes generated by #get_partitions_data
40
+ #
41
+ # Returns the filename of the generated image or nil if the image was not generated.
42
+ def get_partitions_stackedbars_chart(partitions_data)
43
+ # Sort results by the most full partition
44
+ partitions_data.sort! { |a,b| a[:free_percent] <=> b[:free_percent] }
45
+ # generate argument as string
46
+ volumes_space_str = partitions_data.map {|x| "#{x[:partition]}=#{100 - x[:free_percent].to_i}"}.compact
47
+ output_file = File.join(@sandbox, "host_status.png")
48
+ command = ""
49
+ command += NagiosHerald::Util::get_script_path('draw_stack_bars')
50
+ command += " --width=500 --output=#{output_file} "
51
+ command += volumes_space_str.join(" ")
52
+ %x(#{command})
53
+ if $? == 0
54
+ return output_file
55
+ else
56
+ return nil
57
+ end
58
+ end
59
+
60
+ # Public: Overrides Formatter::Base#additional_info.
61
+ # Calls on methods defined in this class to generate stack bars and download
62
+ # Ganglia graphs.
63
+ #
64
+ # Returns nothing. Updates the formatter content hash.
65
+ def additional_info
66
+ section = __method__
67
+ output = get_nagios_var("NAGIOS_#{@state_type}OUTPUT")
68
+ add_text(section, "Additional Info:\n #{unescape_text(output)}\n\n") if output
69
+
70
+ # Collect partitions data and plot a chart
71
+ # if the check has recovered, $NAGIOS_SERVICEOUTPUT doesn't contain the data we need to parse for images; just give us the A-OK message
72
+ if output =~ /DISK OK/
73
+ add_html(section, %Q(Additional Info:<br><b><font color="green"> #{output}</font><br><br>))
74
+ else
75
+ partitions = get_partitions_data(output)
76
+ partitions_chart = get_partitions_stackedbars_chart(partitions)
77
+ if partitions_chart
78
+ add_html(section, "<b>Additional Info</b>:<br> #{output}<br><br>") if output
79
+ add_attachment partitions_chart
80
+ add_html(section, %Q(<img src="#{partitions_chart}" width="500" alt="partitions_remaining_space" /><br><br>))
81
+ else
82
+ add_html(section, "<b>Additional Info</b>:<br> #{output}<br><br>") if output
83
+ end
84
+ end
85
+ end
86
+
87
+ # Public: Overrides Formatter::Base#additional_details.
88
+ # Calls on methods defined in this class to colorize and bold the `df` output
89
+ # generated by the check_disk NRPE check.
90
+ #
91
+ # Returns nothing. Updates the formatter content hash.
92
+ def additional_details
93
+ section = __method__
94
+ long_output = get_nagios_var("NAGIOS_LONG#{@state_type}OUTPUT")
95
+ lines = long_output.split('\n') # the "newlines" in this value are literal '\n' strings
96
+ # if we've been passed threshold information use it to color-format the df output
97
+ threshold_line = lines.grep( /THRESHOLDS - / ) # THRESHOLDS - WARNING:50%;CRITICAL:40%;
98
+ threshold_line.each do |line|
99
+ /WARNING:(?<warning_threshold>\d+)%;CRITICAL:(?<critical_threshold>\d+)%;/ =~ line
100
+ @warning_threshold = warning_threshold
101
+ @critical_threshold = critical_threshold
102
+ end
103
+
104
+ # if the thresholds are provided, color me... badd!
105
+ if @warning_threshold and @critical_threshold
106
+ output_lines = []
107
+ output_lines << "<pre>"
108
+ lines.each do |line|
109
+ if line =~ /THRESHOLDS/
110
+ output_lines << line
111
+ next # just throw this one in unchanged and move along
112
+ end
113
+ /(?<percent>\d+)%/ =~ line
114
+ if defined?( percent ) and !percent.nil?
115
+ percent_free = 100 - percent.to_i
116
+ if percent_free <= @critical_threshold.to_i
117
+ output_line = %Q(<b><font color="red">#{line}</font> Free disk space <font color="red">(#{percent_free}%)</font> is <= CRITICAL threshold (#{@critical_threshold}%).</b>)
118
+ output_lines << output_line
119
+ elsif percent_free <= @warning_threshold.to_i
120
+ output_line = %Q(<b><font color="orange">#{line}</font> Free disk space <font color="orange">(#{percent_free}%)</font> is <= WARNING threshold ( #{@warning_threshold}%).</b>)
121
+ output_lines << output_line
122
+ else
123
+ output_lines << line
124
+ end
125
+ else
126
+ output_lines << line
127
+ end
128
+ end
129
+
130
+ output_lines << "</pre>"
131
+ output_string = output_lines.join( "<br>" )
132
+ add_html(section, "<b>Additional Details</b>:")
133
+ add_html(section, output_string)
134
+ else # just spit out what we got from df
135
+ add_text(section, "Additional Details:\n#{unescape_text(long_output)}\n") if long_output
136
+ add_html(section, "<b>Additional Details</b>:<br><pre>#{unescape_text(long_output)}</pre><br><br>") if long_output
137
+ end
138
+ line_break(section)
139
+ end
140
+
141
+ end
142
+ end
143
+ end
@@ -0,0 +1,155 @@
1
+ # CheckDisk formatter
2
+ # Colorizes and bolds text generated by the 'check_disk' NRPE check.
3
+
4
+ module NagiosHerald
5
+ class Formatter
6
+ class CheckLogstash < NagiosHerald::Formatter
7
+ include NagiosHerald::Logging
8
+
9
+ # Public: Overrides Formatter::Base#additional_info.
10
+ # Calls on methods defined in this class to generate stack bars and download
11
+ # Ganglia graphs.
12
+ #
13
+ # Returns nothing. Updates the formatter content hash.
14
+ def additional_info
15
+ section = __method__ # this defines the section key in the formatter's content hash
16
+ service_output = get_nagios_var("NAGIOS_SERVICECHECKCOMMAND")
17
+ command_components = parse_command(service_output)
18
+
19
+ # The aggregation level limit for which we can render results
20
+ agg_level_limit = 3
21
+
22
+ logstash_helper = NagiosHerald::Helpers::LogstashQuery.new
23
+ results = get_logstash_results(logstash_helper, command_components[:query])
24
+
25
+ # Handle the case when an exception is thrown inside get_logstash_results
26
+ if results.empty?
27
+ add_text(section, "Something went wrong while getting logstash results\n\n")
28
+ return
29
+ end
30
+
31
+ if results["hits"]["hits"].empty? && results["aggregations"]
32
+ # We have aggregations
33
+
34
+ query_agg_data = logstash_helper.query["aggregations"] || logstash_helper.query["aggs"]
35
+
36
+ agg_depth_level = 1 + agg_depth(query_agg_data)
37
+
38
+ # We can't cope with more than 3 level deep aggregates
39
+ if agg_depth_level > agg_level_limit
40
+ #Add error text to the alert and return straight away
41
+ add_text(section, "Error - query contains #{agg_depth_level} levels of aggregation - more than #{agg_level_limit} levels are not supported by this plugin\n")
42
+ return
43
+ end
44
+
45
+ agg_field_name = query_agg_data.keys.first
46
+
47
+ html_output = generate_table_from_buckets(results["aggregations"][agg_field_name]["buckets"])
48
+ else
49
+ # We have normal search results
50
+ html_output = generate_html_output(results["hits"]["hits"])
51
+ end
52
+
53
+ add_html(section, html_output)
54
+ end
55
+
56
+ # Public: Overrides Formatter::Base#additional_details.
57
+ # Calls on methods defined in this class to colorize and bold the `df` output
58
+ # generated by the check_disk NRPE check.
59
+ #
60
+ # Returns nothing. Updates the formatter content hash.
61
+ def additional_details
62
+
63
+ end
64
+
65
+ private
66
+
67
+ def parse_command(service_command)
68
+ command_components = service_command.split("!")
69
+ {
70
+ :command => command_components[0],
71
+ :query => command_components[1],
72
+ :warn_threshold => command_components[2],
73
+ :crit_threshold => command_components[3],
74
+ :time_perdiod => command_components[4]
75
+ }
76
+ end
77
+
78
+ def agg_depth(agg_data)
79
+ agg_level = 0
80
+ if agg_data.kind_of?(String)
81
+ agg_level = agg_data.include?("aggs") || agg_data.include?("aggregations") ? 1 : 0
82
+ else
83
+ agg_data.each do |k,v|
84
+ this_level = k.include?("aggs") || k.include?("aggregations") ? 1 : 0
85
+ agg_level = this_level + agg_depth(v)
86
+ end
87
+ end
88
+ agg_level
89
+ end
90
+
91
+ def get_logstash_results(logstash_helper, query)
92
+ begin
93
+ if query.include?(".json")
94
+ logstash_helper.query_from_file(query)
95
+ else
96
+ logstash_helper.kibana_style_query(query)
97
+ end
98
+ rescue Exception => e
99
+ logger.error "Exception encountered retrieving Logstash Query - #{e.message}"
100
+ e.backtrace.each do |line|
101
+ logger.error "#{line}"
102
+ end
103
+ return []
104
+ end
105
+ end
106
+
107
+ def generate_html_output(results)
108
+ output_prefix = "<table border='1' cellpadding='0' cellspacing='1'>"
109
+ output_suffix = "</table>"
110
+
111
+ headers = "<tr>#{results.first["_source"].keys.map{|h|"<th>#{h}</th>"}.join}</tr>"
112
+ result_values = results.map{|r|r["_source"]}
113
+
114
+ body = result_values.map{|r| "<tr>#{r.map{|k,v|"<td>#{v}</td>"}.join}</tr>"}.join
115
+
116
+ output_prefix + headers + body + output_suffix
117
+ end
118
+
119
+ def generate_table_from_buckets(buckets)
120
+ unique_keys = buckets.map{|b|b.keys}.flatten.uniq
121
+
122
+ output_prefix = "<table border='1' cellpadding='0' cellspacing='1'>"
123
+ output_suffix = "</table>"
124
+ headers = "<tr>#{unique_keys.map{|h|"<th>#{h}</th>"}.join}</tr>"
125
+ body = buckets.map do |r|
126
+ generate_table_from_hash(r)
127
+ end.join
128
+ output_prefix + headers + body + output_suffix
129
+ end
130
+
131
+ def generate_table_from_hash(data,add_headers=false)
132
+ output_prefix = "<table border='1' cellpadding='0' cellspacing='1'>"
133
+ output_suffix = "</table>"
134
+ headers = add_headers ? "<tr>#{data.keys.map{|h|"<th>#{h}</th>"}.join}</tr>" : ""
135
+ body = "<tr>#{data.map do |k,v|
136
+ if v.kind_of?(Hash)
137
+ if v.has_key?("buckets")
138
+ "<td>#{generate_table_from_buckets(v["buckets"])}</td>"
139
+ else
140
+ "<td>#{generate_table_from_hash(v,true)}</td>"
141
+ end
142
+ else
143
+ "<td>#{v}</td>"
144
+ end
145
+ end.join}</tr>"
146
+
147
+ if add_headers
148
+ output_prefix + headers + body + output_suffix
149
+ else
150
+ body
151
+ end
152
+ end
153
+ end
154
+ end
155
+ end
@@ -0,0 +1,42 @@
1
+ module NagiosHerald
2
+ class Formatter
3
+ class CheckMemory < NagiosHerald::Formatter
4
+ include NagiosHerald::Logging
5
+
6
+ # Public: Overrides Formatter::Base#additional_details.
7
+ # Colorizes the `ps` output returned by the check_mem NRPE check.
8
+ # The output contains the top n processes by memory utilization similar to:
9
+ #
10
+ # TOP 5 PROCESSES BY MEMORY USAGE:
11
+ # %MEM RSS USER PID COMMAND
12
+ # 2.4 1231696 larry 6658 tmux
13
+ # 1.5 777204 moe 32234 tmux/tmux -CC
14
+ # 0.8 399964 curly 12161 /usr/sbin/gmond
15
+ # 0.7 384772 shep 1945 /usr/sbin/mysqld --basedir=/usr --datadir=/var/lib/mysql --plugin-dir=/usr/lib64/mysql/plugin --user=mysql --log-error=/var/lib/mysql/mysql.example.com.err --pid-file=/var/lib/mysql/mysql.example.com.pid
16
+ # 0.7 355148 root 1245 SCREEN
17
+ #
18
+ # Returns nothing. Updates the formatter content hash.
19
+ def additional_details
20
+ section = __method__
21
+ long_output = get_nagios_var("NAGIOS_LONG#{@state_type}OUTPUT")
22
+ lines = long_output.split('\n')
23
+ html = []
24
+ html << "<pre>"
25
+ html << lines[0] # TOP 5 PROCESSES BY MEMORY USAGE:
26
+ html << lines[1] # %MEM RSS USER PID COMMAND
27
+ html << "<font color='red'>#{lines[2]}</font>" # Color the first result red...
28
+ for i in 3..lines.length-1
29
+ html << "<font color='orange'>#{lines[i]}</font>" # ...and the remainder orange.
30
+ end
31
+ html << "</pre>"
32
+ output_string = html.join( "<br>" )
33
+ add_html(section, "<b>Additional Details</b>:")
34
+ add_html(section, output_string)
35
+ add_text(section, "Additional Details:\n#")
36
+ add_text(section, "#{unescape_text(long_output)}\n")
37
+ line_break(section)
38
+ end
39
+
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,19 @@
1
+ module NagiosHerald
2
+ class Formatter
3
+ class Example < NagiosHerald::Formatter
4
+
5
+ # ovrerride Formatter::Base#additional_details
6
+ def additional_details
7
+ section = __method__ # content section is named after the method
8
+ html = ""
9
+ text = ""
10
+ text += "Example text"
11
+ html += "Example <b>HTML</b>"
12
+ add_text(section, text)
13
+ add_html(section, html)
14
+ line_break(section) # trailing line break
15
+ end
16
+
17
+ end
18
+ end
19
+ end
@@ -0,0 +1 @@
1
+ require 'nagios-herald/formatters/base'
@@ -0,0 +1,99 @@
1
+ require 'chef/search/query'
2
+
3
+ module NagiosHerald
4
+ module Helpers
5
+ class GangliaGraph
6
+ include NagiosHerald::Logging
7
+
8
+ # Public: Initialize a GangliaGraph helper object.
9
+ #
10
+ # Returns GangliaGraph helper object.
11
+ def initialize
12
+ @ganglia_base_uri = Config.config['servers']['ganglia']
13
+ end
14
+
15
+ # Public: Loads a Chef knife config.
16
+ # I recognize not everyone uses Chef, or any CM for that matter.
17
+ # Still, I like having this example here.
18
+ #
19
+ # Returns true/false depending on whether a Chef knife config was loaded.
20
+ def load_knife_config
21
+ return @knife_config_loaded unless @knife_config_loaded.nil?
22
+
23
+ knife_config_file = Config.config['knife_config'] ? Config.config['knife_config'] : '~/.chef/knife.rb'
24
+ knife_config_file = File.expand_path(knife_config_file)
25
+ if !File.exist?(knife_config_file)
26
+ logger.warn("Knife config file not found (#{knife_config_file})")
27
+ @knife_config_loaded = false
28
+ else
29
+ Chef::Config.from_file(knife_config_file)
30
+ @knife_config_loaded = true
31
+ end
32
+ @knife_config_loaded
33
+ end
34
+
35
+ # Public: Get the Ganglia cluster name for a given host.
36
+ # So, if you're still reading this *and* you use Chef,
37
+ # I recognize that you may not store the Ganglia cluster name as a node
38
+ # attribute. If you do, it's probably not called 'cluster_name'...
39
+ #
40
+ # host - The name of the host whose Ganglia cluster name we need to look up.
41
+ #
42
+ # Returns the Ganglia cluster name for the host.
43
+ def get_cluster_name_for_host(host)
44
+ return nil unless load_knife_config
45
+ query = Chef::Search::Query.new
46
+ # we're only expecting a single node to be returned --> make sure it's the case!
47
+ chef_node = query.search('node', "fqdn:#{host}").first.first
48
+ chef_node.ganglia.cluster_name
49
+ end
50
+
51
+ # Public: Generate the URL required to download a graph of the require metric
52
+ #
53
+ # cluster_name - The Ganglia cluster this node belongs to.
54
+ # host - The hostname of the node we need a metric for.
55
+ # metric - The name of the Ganglia metric we need.
56
+ # range - The time period we expect the metric to cover.
57
+ #
58
+ # Example
59
+ #
60
+ # get_ganglia_url("Web", "web0001.example.com", "part_max_used", "1day")
61
+ #
62
+ # Returns a full Ganglia URL defining the metric graph to download
63
+ def get_ganglia_url(cluster_name, host, metric, range)
64
+ return "http://#{@ganglia_base_uri}/graph.php?&c=#{cluster_name}&h=#{host}&m=#{metric}&r=#{range}&z=medium"
65
+ end
66
+
67
+ # Public: Retrieve the Ganglia graphs we desire
68
+ #
69
+ # hosts - An array of hosts for which to retrieve metrics.
70
+ # metric - The name of the Ganglia metric we need.
71
+ # path - The local path on the host running nagios-herald under which image
72
+ # files will be temporarily generated.
73
+ # range - The time period we expect the metric to cover.
74
+ #
75
+ # Example
76
+ #
77
+ # get_graphs([web0001.example.com], "part_max_used", "/tmp/img1234", "1day")
78
+ #
79
+ # Returns the local paths of all downloaded images to be attached/inlined with a message.
80
+ def get_graphs( hosts, metric, path, range )
81
+ # strip the trailing slash (if it exists) so the components of image_name are clear
82
+ path = path.sub(/\/$/, "")
83
+ image_paths = []
84
+ hosts.each do |host|
85
+ cluster_name = get_cluster_name_for_host(host)
86
+ url = get_ganglia_url(cluster_name, host, metric, range)
87
+ image_path = "#{path}/#{host}-#{metric}.png"
88
+ success = NagiosHerald::Helpers::UrlImage.download_image(url, image_path)
89
+ if success
90
+ image_paths.push( image_path )
91
+ else
92
+ logger.warn("No Ganglia graph found for '#{host}' (cluster: '#{cluster_name}') - '#{metric}' in '#{range}'")
93
+ end
94
+ end
95
+ return image_paths
96
+ end
97
+ end
98
+ end
99
+ end
@@ -0,0 +1,85 @@
1
+ require 'securerandom'
2
+ require 'uri'
3
+
4
+ module NagiosHerald
5
+ module Helpers
6
+ class GraphiteGraph
7
+ include NagiosHerald::Logging
8
+
9
+ # Public: Initialize a GraphiteGraph helper object.
10
+ #
11
+ # Returns a GraphiteGraph helper object.
12
+ def initialize
13
+ # Currently hard-codes the value for optional graphs showing historical
14
+ # data.
15
+ @graphite_historical_lookup = '-24h'
16
+ @image_paths = []
17
+ end
18
+
19
+ # Public: Download a Graphite image.
20
+ #
21
+ # url - The Graphite url we'll download as an image.
22
+ # download_path - The path to where the image will be downloaded.
23
+ #
24
+ # Returns nothing. Appends the downloaded image path to @image_paths.
25
+ def download_image(url, download_path)
26
+ success = NagiosHerald::Helpers::UrlImage.download_image(url, download_path)
27
+ if success
28
+ @image_paths.push(download_path)
29
+ else
30
+ logger.warn("Could not download Graphite graph for '#{url}'")
31
+ end
32
+ end
33
+
34
+ # Public: Retrieve a Graphite graph
35
+ #
36
+ # url - A string containing the full URL to get from Graphite.
37
+ # path - The local path on the host running nagios-herald under which image
38
+ # files will be temporarily generated.
39
+ # show_historical - A boolean that allows one to optionally download a
40
+ # showing historical data for comparison.
41
+ # Defaults to false.
42
+ #
43
+ # Because this will probably be fed URLs used in Nagios checks, we'll
44
+ # strip out '&format' and '&rawData' query parameters to ensure we
45
+ # get an image instead of text/json/csv/etc.
46
+ #
47
+ # In cases where the method is called requesting an historical image
48
+ # we'll strip '&until' and replace the value of '&from' with that of
49
+ # @graphite_historical_lookup.
50
+ #
51
+ # Example
52
+ #
53
+ # get_graph("http://graphite.example.com/render/?target=foo.bar.baz?from=-15min", "/tmp/img1234", true)
54
+ #
55
+ # Returns the local path of the downloaded image to be attached/inlined with a message.
56
+ def get_graph(url, path, show_historical=nil)
57
+ uri = URI(url)
58
+ # Strip &rawData parameter.
59
+ uri.query.gsub!(/&rawData([^&]*)/, '')
60
+ # Strip the &format parameter.
61
+ uri.query.gsub!(/&format([^&])*/, '')
62
+ # Strip the trailing slash from the path.
63
+ path = path.sub(/\/$/, "")
64
+ # Generate a random UUID to be used in the image filename.
65
+ image_uuid = SecureRandom.uuid
66
+ image_path = "#{path}/#{image_uuid}.png"
67
+ image_url = uri.to_s
68
+ download_image(image_url, image_path)
69
+ if show_historical
70
+ historical_image_path = "#{path}/#{image_uuid}#{@graphite_historical_lookup}.png"
71
+ if uri.query =~ /&from/
72
+ # Replace the &from value.
73
+ uri.query.gsub!(/from=([^&]*)/, "from=#{@graphite_historical_lookup}")
74
+ else
75
+ # Set the &from value.
76
+ uri.query = "#{uri.query}&from=#{@graphite_historical_lookup}"
77
+ end
78
+ historical_url = uri.to_s
79
+ download_image(historical_url, historical_image_path)
80
+ end
81
+ return @image_paths
82
+ end
83
+ end
84
+ end
85
+ end
@@ -0,0 +1,125 @@
1
+ require 'net/http'
2
+ require 'uri'
3
+ require 'json'
4
+ require 'elasticsearch'
5
+
6
+ # Query Logstash with arbitrary search criteria
7
+
8
+ module NagiosHerald
9
+ module Helpers
10
+ class LogstashQuery
11
+
12
+ attr_reader :query
13
+
14
+ # Public: Initialize a new LogstashQuery object.
15
+ #
16
+ # query - A string representing the query to send to Logstash.
17
+ # index - Optional index to specify (else Splunk defaults to all indexes
18
+ # available to the authenticated user).
19
+ # output - The output format we'd like (i.e. csv, json, xml); defaults
20
+ # to json.
21
+ #
22
+ # Example:
23
+ #
24
+ # NEEDS EXAMPLES
25
+ #
26
+ # Returns a new LogstashQuery object.
27
+ def initialize(options={})
28
+ today = Time.now.strftime("%Y.%m.%d")
29
+ @logstash_index = options[:index] ? options[:index] : "logstash-#{today}"
30
+ @logstash_time_period = options[:time_period] ? options[:time_period] : "1h"
31
+ @logstash_num_results = Config.config['logstash']['num_results'] ? Config.config['logstash']['num_results'] : 10
32
+ @logstash_result_truncate = Config.config['logstash']['result_field_trucate'] ? Config.config['logstash']['result_field_trucate'] : nil
33
+
34
+ # Pull the Logstash URI, username, and password from the config.
35
+ logstash_url = Config.config['logstash']['url']
36
+
37
+ # Parse the URI.
38
+ uri = URI.parse(logstash_url)
39
+ @logstash_host = uri.host
40
+ @logstash_port = uri.port
41
+ @logstash_uri = uri.request_uri
42
+
43
+ @es = Elasticsearch::Client.new hosts: ["#{@logstash_host}:#{@logstash_port}"], reload_connections: true
44
+ end
45
+
46
+ # Public: Queries Logstash.
47
+ #
48
+ # Example:
49
+ #
50
+ # results = logstash_query.query
51
+ #
52
+ # Returns the results of the query in the requested format, nil otherwise.
53
+ def kibana_style_query(query_string)
54
+
55
+ # Strip leading and following single quotes from query if present
56
+ query_string = query_string[1..-1] if query_string[0] == "'"
57
+ query_string = query_string[0..-2] if query_string[-1] == "'"
58
+
59
+ @query = {
60
+ "from" => 0,
61
+ "size" => @logstash_num_results,
62
+ "query" => {
63
+ "filtered" => {
64
+ "query" => {
65
+ "bool" => {
66
+ "should" => [
67
+ {
68
+ "query_string" => {
69
+ "query" => "#{query_string}"
70
+ }
71
+ }
72
+ ]
73
+ }
74
+ },
75
+ "filter" => {
76
+ "bool" => {
77
+ "must" => [
78
+ {
79
+ "match_all" => {}
80
+ },
81
+ {
82
+ "range" => {
83
+ "index_timestamp" => {
84
+ "from" => "now-#{@logstash_time_period}",
85
+ "to" => "now"
86
+ }
87
+ }
88
+ }
89
+ ]
90
+ }
91
+ }
92
+ }
93
+ }
94
+ }
95
+ truncate_results(run_logstash_query(@query))
96
+ end
97
+
98
+ def query_from_file(query_file)
99
+ if File.exists? query_file
100
+ @query = JSON.parse(File.readlines(query_file).join)
101
+ else
102
+ raise "Query file #{query_file} does not exist"
103
+ end
104
+
105
+ truncate_results(run_logstash_query(@query))
106
+ end
107
+
108
+ private
109
+
110
+ def run_logstash_query(query_body)
111
+ begin
112
+ return @es.search index: @logstash_index, body: query_body
113
+ rescue Elasticsearch::Transport::Transport::Errors::BadRequest => e
114
+ raise "Elasticsearch doesn't like your query. Please check you escaped it correctly."
115
+ end
116
+ end
117
+
118
+ def truncate_results(results)
119
+ results["hits"]["hits"].each{|result|result["_source"].each{|field_name,field_value|result["_source"][field_name] = field_value[0..@logstash_result_truncate]}} if @logstash_result_truncate
120
+ return results
121
+ end
122
+ end
123
+ end
124
+ end
125
+