malevich 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (48) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +20 -0
  3. data/Gemfile +2 -0
  4. data/LICENSE.txt +22 -0
  5. data/README.md +29 -0
  6. data/Rakefile +1 -0
  7. data/bin/malevich +38 -0
  8. data/example/cpu.rb +31 -0
  9. data/example/disk.rb +59 -0
  10. data/example/disk_stat.rb +28 -0
  11. data/example/dns_check.rb +7 -0
  12. data/example/exim.rb +15 -0
  13. data/example/find_files.rb +21 -0
  14. data/example/http.rb +25 -0
  15. data/example/iptables.rb +27 -0
  16. data/example/la.rb +21 -0
  17. data/example/mdadm.rb +44 -0
  18. data/example/megacli.rb +13 -0
  19. data/example/memory.rb +31 -0
  20. data/example/net.rb +25 -0
  21. data/example/net_stat.rb +25 -0
  22. data/example/nginx.rb +22 -0
  23. data/example/ntp.rb +15 -0
  24. data/example/pgsql.rb +71 -0
  25. data/example/runit.rb +48 -0
  26. data/example/status_file.rb +17 -0
  27. data/example/tw_cli.rb +17 -0
  28. data/lib/malevich.rb +59 -0
  29. data/lib/malevich/dsl.rb +78 -0
  30. data/lib/malevich/init.rb +17 -0
  31. data/lib/malevich/loader.rb +80 -0
  32. data/lib/malevich/monitor.rb +40 -0
  33. data/lib/malevich/plugin.rb +70 -0
  34. data/lib/malevich/plugin/error.rb +12 -0
  35. data/lib/malevich/plugin/event.rb +68 -0
  36. data/lib/malevich/plugin/http.rb +25 -0
  37. data/lib/malevich/plugin/init.rb +5 -0
  38. data/lib/malevich/plugin/shell_out.rb +28 -0
  39. data/lib/malevich/plugin/time.rb +9 -0
  40. data/lib/malevich/responders/error.rb +30 -0
  41. data/lib/malevich/responders/http.rb +46 -0
  42. data/lib/malevich/responders/init.rb +4 -0
  43. data/lib/malevich/responders/riemann.rb +59 -0
  44. data/lib/malevich/responders/udp.rb +59 -0
  45. data/lib/malevich/version.rb +3 -0
  46. data/malevich.gemspec +32 -0
  47. data/test/plugin_helpers_spec.rb +6 -0
  48. metadata +219 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: f1b68e4d209f800c0c2a08ce6166a01fe4c3f9dd
4
+ data.tar.gz: 0a28fab3e6992d1a6f29b2cc88c4bb3d020fd566
5
+ SHA512:
6
+ metadata.gz: d724ea316def8f81ea864a15a240dce95eb0adc2bcf3498317d8eab48b0cdafc7b0ac0da99500a0754875e37e4e07c452924ef8296bb05f155812fc09c7eb696
7
+ data.tar.gz: 04f3e20dcc6334742a56494c9618c331d0c2b91337dece4ddf29af2d0981d3987888abb63c4146c3c53d67660f7f8e03736691350fbe5894e765f8f53cd1a838
data/.gitignore ADDED
@@ -0,0 +1,20 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+ .idea
19
+ vendor
20
+ .bundle
data/Gemfile ADDED
@@ -0,0 +1,2 @@
1
+ source 'https://rubygems.org'
2
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 Vasiliev Dmitry
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,29 @@
1
+ # Malevich
2
+
3
+ TODO: Write a gem description
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'malevich'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install malevich
18
+
19
+ ## Usage
20
+
21
+ TODO: Write usage instructions here
22
+
23
+ ## Contributing
24
+
25
+ 1. Fork it
26
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
27
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
28
+ 4. Push to the branch (`git push origin my-new-feature`)
29
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require 'bundler/gem_tasks'
data/bin/malevich ADDED
@@ -0,0 +1,38 @@
1
+ #!/usr/bin/env ruby
2
+ # coding: utf-8
3
+
4
+ lib = File.expand_path('../lib', __FILE__)
5
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
6
+
7
+ require 'malevich'
8
+ require 'trollop'
9
+
10
+ cmd = Trollop::options do
11
+ version "Malevich #{Malevich::VERSION}"
12
+ banner <<-EOS
13
+ Malevich is tool for monitoring.
14
+
15
+ Usage:
16
+ malevich [options]
17
+ where [options] are:
18
+ EOS
19
+
20
+ opt :config, 'Config file', :default => '/etc/malevich/config.yml'
21
+ opt :'log-level', 'Log level', :default => 'DEBUG'
22
+ opt :'riemann-host', 'Riemann server', :short => '-r', :default => '127.0.0.1:5555', :multi => true
23
+ opt :'riemann-tcp', 'Use tcp to connect riemann', :default => true
24
+ opt :tags, 'Set tags', :short => '-t', :multi => true
25
+ opt :plugins, 'Directory for plugins', :short => '-d', :default => '/usr/share/malevich/plugins', :multi => true
26
+ opt :'http-responder', 'Bind http api', :short => '-h', :default => '0.0.0.0:55755'
27
+ opt :'udp-responder', 'Bind udp api', :short => '-u', :default => '127.0.0.1:55955'
28
+ opt :'test', 'Test plugin file', :type => :string
29
+ end
30
+
31
+ malevich.cmd.deep_merge!(cmd)
32
+ #malevich.cmd[:pwd] = Dir.pwd
33
+ malevich.log_level = cmd[:'log-level']
34
+ malevich.test_plugin(cmd[:'test'])
35
+ malevich.load_plugins(cmd[:'plugins'], cmd[:'config'])
36
+ malevich.load_responders
37
+ malevich.monitor.run!
38
+
data/example/cpu.rb ADDED
@@ -0,0 +1,31 @@
1
+ always_start true
2
+ interval 60
3
+ warning 70
4
+ critical 85
5
+
6
+ settings :per_process, false
7
+
8
+ collect "linux" do
9
+ @old_cpu ||= {}
10
+ File.read('/proc/stat').each_line do |cpu_line|
11
+ cpu_number = cpu_line.scan(/cpu(\d+|\s)\s+/)
12
+ next if cpu_number.empty?
13
+ cpu_number = cpu_number[0][0] == ' ' ? '_total' : cpu_number[0][0]
14
+ cpu_line[/cpu(\d+|\s)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)/]
15
+ _, u2, n2, s2, i2 = [$1, $2, $3, $4, $5].map { |e| e.to_i }
16
+ unless @old_cpu[cpu_number].nil?
17
+ u1, n1, s1, i1 = @old_cpu[cpu_number]
18
+ used = (u2+n2+s2) - (u1+n1+s1)
19
+ total = used + i2-i1
20
+ fraction = used.to_f / total
21
+ end
22
+ @old_cpu[cpu_number] = [u2, n2, s2, i2]
23
+ service = "cpu usage cpu#{cpu_number}"
24
+ description = "Cpu#{cpu_number} usage"
25
+ if cpu_number == '_total'
26
+ event(:service => service, :metric => fraction, :description => description)
27
+ else
28
+ event(:service => service, :metric => fraction, :description => description, :state => 'ok')
29
+ end
30
+ end
31
+ end
data/example/disk.rb ADDED
@@ -0,0 +1,59 @@
1
+ require 'sys/filesystem'
2
+
3
+ always_start true
4
+ interval 60
5
+
6
+ warning 70
7
+ critical 85
8
+
9
+ settings :not_monit_fs_4_size, %w(sysfs nfs devpts squashfs proc devtmpfs)
10
+ settings :monit_fs_4_fstab, %w(ext2 ext3 ext4 xfs tmpfs)
11
+ settings :not_monit_device_4_fstab, %w(none)
12
+ settings :not_monit_point_4_fstab, %w(/lib/init/rw /dev/shm /dev)
13
+ settings :check_fstab, true
14
+
15
+ collect "linux" do
16
+
17
+ def get_monit_points_for_size
18
+ monit_points = []
19
+ File.open('/proc/mounts', 'r') do |file|
20
+ while line = file.gets
21
+ mtab = line.split(/\s+/)
22
+ monit_points << mtab[1] unless settings.not_monit_fs_4_size.include? mtab[2]
23
+ end
24
+ end
25
+ monit_points
26
+ end
27
+
28
+ def get_monit_points_for_fstab
29
+ monit_points = []
30
+ File.open('/proc/mounts', 'r') do |file|
31
+ while line = file.gets
32
+ mtab = line.split(/\s+/)
33
+ if settings.monit_fs_4_fstab.include?(mtab[2]) &&
34
+ !settings.not_monit_point_4_fstab.include?(mtab[1]) &&
35
+ !settings.not_monit_device_4_fstab.include?(mtab[0])
36
+ monit_points << mtab[1]
37
+ end
38
+ end
39
+ end
40
+ monit_points
41
+ end
42
+
43
+ get_monit_points_for_size.each do |point|
44
+ point_stat = Sys::Filesystem.stat(point)
45
+ human_point = point == '/' ? '/root' : point
46
+ human_point = human_point.gsub(/^\//, '').gsub(/\//, '_')
47
+ event(:service => "disk #{human_point} % block", :description => "Disk usage #{point}, %", :metric => (1- point_stat.blocks_available.to_f/point_stat.blocks).round(2) * 100) unless point_stat.blocks == 0
48
+ event(:service => "disk #{human_point} % inode", :description => "Disk usage #{point}, inodes %", :metric => (1 - point_stat.files_available.to_f/point_stat.files).round(2) * 100) unless point_stat.files == 0
49
+ event(:service => "disk #{human_point} abs free", :description => "Disk free #{point}, B", :metric => point_stat.blocks_free * point_stat.block_size, :state => 'ok')
50
+ event(:service => "disk #{human_point} abs total", :description => "Disk space #{point}, B", :metric => point_stat.blocks * point_stat.block_size, :state => 'ok')
51
+ end
52
+
53
+ fstab = File.read('/etc/fstab').split("\n").delete_if { |x| x.strip.match(/^#/) }
54
+ fstab = fstab.join("\n")
55
+ get_monit_points_for_fstab.each do |point|
56
+ event(:service => "disk #{point} fstab entry", :description => "Mount point #{point} not matched in /etc/fstab", :state => 'critical') unless fstab.match(/#{point}(\s|\/\s)/)
57
+ end if settings.check_fstab
58
+
59
+ end
@@ -0,0 +1,28 @@
1
+ always_start true
2
+ interval 60
3
+
4
+ settings :words, %w( 'reads reqs', 'reads merged', 'reads sector', 'reads time',
5
+ 'writes reqs', 'writes merged', 'writes sector', 'writes time',
6
+ 'io reqs', 'io time', 'io weighted' )
7
+
8
+ settings :filter, %w('reads reqs', 'writes reqs')
9
+
10
+ run_if "linux" do
11
+ File.exists? '/proc/diskstats'
12
+ end
13
+
14
+ collect "linux" do
15
+ f = File.read('/proc/diskstats')
16
+ f.split("\n").reject { |d| d =~ /(ram|loop)/ }.inject({}) do |_, line|
17
+ if line =~ /^(?:\s+\d+){2}\s+([\w\d]+) (.*)$/
18
+ dev = $1
19
+ values = $2.split(/\s+/).map { |str| str.to_i }
20
+ next if !!(dev.match /\d+$/ || !(dev.match =~ /^xvd/))
21
+ settings.filter.each do |filter|
22
+ event(:service => "diskstat #{dev} #{filter}", :metric => values[settings.words.index(filter)].to_f/interval, :diff => true)
23
+ end
24
+ iops = values[settings.words.index('reads reqs')].to_i + values[settings.words.index('writes reqs')].to_i
25
+ event(:service => "diskstat #{dev} iops", :metric => iops.to_f/interval, :diff => true)
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,7 @@
1
+ interval 60
2
+
3
+ collect "linux", "mac_os_x" do
4
+ event(
5
+ :state => Resolv::DNS.new.getresources(ohai[:fqdn], Resolv::DNS::Resource::IN::A).count == 1,
6
+ :description => "Check resolv self FQDN")
7
+ end
data/example/exim.rb ADDED
@@ -0,0 +1,15 @@
1
+ always_start true
2
+ interval 60
3
+
4
+ warning 5
5
+ critical 20
6
+ settings :exim, '/usr/sbin/exim'
7
+
8
+ run_if do
9
+ File.exists? settings.exim
10
+ end
11
+
12
+ collect do
13
+ event(:service => 'exim', :metric => shell(settings.exim, ' -bpc').to_i,
14
+ :description => 'Exim: count frozen mails')
15
+ end
@@ -0,0 +1,21 @@
1
+ interval 60
2
+
3
+ warning 5
4
+
5
+ settings :file_mask, '.*'
6
+ settings :dir, '/tmp/dir'
7
+ settings :age, 24 * 60 * 60
8
+
9
+ collect do
10
+ if File.directory?(settings.dir)
11
+ count_files = 0
12
+ file_mask = Regexp.new(settings.file_mask)
13
+ Find.find(settings.dir).each do |file|
14
+ next unless File.file? file
15
+ next unless file_mask.match file
16
+ next unless Time.now.to_i - settings.age > File.new(file).mtime.to_i
17
+ count_files += 1
18
+ end
19
+ event(:service => "find files #{settings.dir}", :metric => count_files, :description => "Count files in #{settings.dir}")
20
+ end
21
+ end
data/example/http.rb ADDED
@@ -0,0 +1,25 @@
1
+ interval 60
2
+
3
+ settings :http_code, 200
4
+ settings :http_method, 'GET'
5
+ settings :connect_timeout, 5
6
+ settings :retry, 0
7
+ settings :retry_delay, 0
8
+ settings :max_time, 10
9
+ settings :insecure, false
10
+ settings :url, 'http://127.0.0.1:80'
11
+ settings :service, 'http check'
12
+
13
+ collect "linux", "mac_os_x" do
14
+
15
+ @cmd ||= begin
16
+ "curl -X#{settings.http_method} -s --connect-timeout #{settings.connect_timeout}" +
17
+ " #{'--insecure' if settings.insecure} " +
18
+ " -w '%{http_code}\\n' --retry #{settings.retry} --retry-delay #{settings.retry_delay}" +
19
+ " --max-time #{settings.max_time} --fail #{settings.url} -o /dev/null"
20
+ end
21
+
22
+ out = shell_out(@cmd).stdout.to_i
23
+ event(:service => settings.service, :metric => out, :description => "http code: #{out}", :state => out == settings.http_code)
24
+
25
+ end
@@ -0,0 +1,27 @@
1
+ interval 60
2
+ settings :rule_file, '/etc/network/iptables'
3
+ always_start true
4
+
5
+ run_if "linux" do
6
+ File.exists? settings.rule_file
7
+ end
8
+
9
+ collect "linux" do
10
+
11
+ def delete_counters(str)
12
+ str.gsub(/\[\d+\:\d+\]/, '').strip
13
+ end
14
+
15
+ current_rules = shell_out!('iptables-save').stdout.split("\n").map do |x|
16
+ x[0] == '#' ? nil : delete_counters(x)
17
+ end.compact.join("\n")
18
+ saved_rules = File.read(settings.rules_file).split("\n").map do |x|
19
+ x[0] == '#' ? nil : delete_counters(x) # delete counters and comments
20
+ end.compact.join("\n")
21
+
22
+ event(
23
+ :service => "iptables #{settings.rule_file}",
24
+ :state => current_rules == saved_rules,
25
+ :description => "iptables rules different between file: #{settings.rule_file} and iptables-save"
26
+ )
27
+ end
data/example/la.rb ADDED
@@ -0,0 +1,21 @@
1
+ always_start true
2
+ interval 60
3
+
4
+ state :warning, 4
5
+ state :critical, 50
6
+
7
+ collect "mac_os_x" do
8
+ event(
9
+ :metric => shell("sysctl -n vm.loadavg | awk '{print $2}'").to_f,
10
+ :description => 'LA averaged over 1 minute',
11
+ :service => 'la la_1'
12
+ )
13
+ end
14
+
15
+ collect "linux" do
16
+ event(
17
+ :metric => File.read('/proc/loadavg').scan(/[\d\.]+/)[0].to_f,
18
+ :description => 'LA averaged over 1 minute',
19
+ :service => 'la la_1'
20
+ )
21
+ end
data/example/mdadm.rb ADDED
@@ -0,0 +1,44 @@
1
+ interval 60
2
+ critical 1
3
+
4
+ run_if "linux" do
5
+ File.exists? '/proc/mdstat'
6
+ end
7
+
8
+ collect "linux" do
9
+
10
+ def rm_bracket(text)
11
+ text.gsub('[', '').gsub(']', '')
12
+ end
13
+
14
+ def status_well?(text)
15
+ text.gsub(/U/, '').empty?
16
+ end
17
+
18
+ def get_failed_parts (device)
19
+ begin
20
+ failed_parts = []
21
+ Dir["/sys/block/#{device}/md/dev-*"].each do |p|
22
+ state = File.read("#{p}/state").strip
23
+ next unless state != 'in_sync'
24
+ p.gsub!(/.+\/dev-/, '')
25
+ failed_parts << "#{p} (#{state})"
26
+ end
27
+ failed_parts.join(', ')
28
+ rescue
29
+ nil
30
+ end
31
+ end
32
+
33
+
34
+ mdstat = File.read('/proc/mdstat').split("\n")
35
+ mdstat.each_with_index do |line, index|
36
+ next unless line.include?('blocks')
37
+ device = file[index-1].split(':')[0].strip
38
+ mdstatus = rm_bracket(line.split(' ').last) # UUU
39
+ next if status_well?(mdstatus) # пропускаем все збс
40
+ next if mdstatus == settings.states.send(device).to_s # disabled in config
41
+ event(:service => "mdadm #{device}", :metric => 1, :description => "mdadm failed device #{device}: #{get_failed_parts(device)}")
42
+ end
43
+
44
+ end
@@ -0,0 +1,13 @@
1
+ interval 180
2
+ always_start true
3
+
4
+ critical 1
5
+ settings :cmd, 'megacli -AdpAllInfo -aAll -NoLog | awk -F": " \'/Virtual Drives/ { getline; print $2; }\''
6
+
7
+ run_if "linux" do
8
+ File.exists? '/usr/bin/megacli'
9
+ end
10
+
11
+ collect "linux" do
12
+ event(:metric => shell!(settings.cmd).to_i, :description => 'MegaCli status')
13
+ end
data/example/memory.rb ADDED
@@ -0,0 +1,31 @@
1
+ interval 60
2
+ always_start true
3
+
4
+ critical 70
5
+ warning 85
6
+
7
+ collect "linux" do
8
+ m = File.read('/proc/meminfo').split(/\n/).inject({}) do |info, line|
9
+ x = line.split(/:?\s+/)
10
+ info[x[0]] = x[1].to_i
11
+ info
12
+ end
13
+
14
+ free = m['MemFree'].to_i * 1024
15
+ cached = m['Cached'].to_i * 1024
16
+ buffers = m['Buffers'].to_i * 1024
17
+ total = m['MemTotal'].to_i * 1024
18
+ used = total - free
19
+ free_bc = free + buffers + cached
20
+ fraction = 1 - (free_bc.to_f / total)
21
+ swap_fraction = m['SwapTotal'] == 0 ? 0 : 1 - m['SwapFree'].to_f/m['SwapTotal']
22
+
23
+ event(:service => 'memory % free', :description => 'Memory usage, %', :metric => fraction.round(2) * 100)
24
+ event(:service => 'memory % swap', :description => 'Swap usage, %', :metric => swap_fraction.round(2) * 100)
25
+ event(:service => 'memory abs free', :description => 'Memory free (kB)', :metric => free, :state => 'ok')
26
+ event(:service => 'memory abs total', :description => 'Memory total (kB)', :metric => total, :state => 'ok')
27
+ event(:service => 'memory abs cached', :description => 'Memory usage, cached (kB)', :metric => cached, :state => 'ok')
28
+ event(:service => 'memory abs buffers', :description => 'Memory usage, buffers (kB)', :metric => buffers, :state => 'ok')
29
+ event(:service => 'memory abs used', :description => 'Memory usage, used (kB)', :metric => used, :state => 'ok')
30
+ event(:service => 'memory abs free_bc', :description => 'Memory usage with cache and buffers (kB)', :metric => free_bc, :state => 'ok')
31
+ end