malevich 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +20 -0
  3. data/Gemfile +2 -0
  4. data/LICENSE.txt +22 -0
  5. data/README.md +29 -0
  6. data/Rakefile +1 -0
  7. data/bin/malevich +38 -0
  8. data/example/cpu.rb +31 -0
  9. data/example/disk.rb +59 -0
  10. data/example/disk_stat.rb +28 -0
  11. data/example/dns_check.rb +7 -0
  12. data/example/exim.rb +15 -0
  13. data/example/find_files.rb +21 -0
  14. data/example/http.rb +25 -0
  15. data/example/iptables.rb +27 -0
  16. data/example/la.rb +21 -0
  17. data/example/mdadm.rb +44 -0
  18. data/example/megacli.rb +13 -0
  19. data/example/memory.rb +31 -0
  20. data/example/net.rb +25 -0
  21. data/example/net_stat.rb +25 -0
  22. data/example/nginx.rb +22 -0
  23. data/example/ntp.rb +15 -0
  24. data/example/pgsql.rb +71 -0
  25. data/example/runit.rb +48 -0
  26. data/example/status_file.rb +17 -0
  27. data/example/tw_cli.rb +17 -0
  28. data/lib/malevich.rb +59 -0
  29. data/lib/malevich/dsl.rb +78 -0
  30. data/lib/malevich/init.rb +17 -0
  31. data/lib/malevich/loader.rb +80 -0
  32. data/lib/malevich/monitor.rb +40 -0
  33. data/lib/malevich/plugin.rb +70 -0
  34. data/lib/malevich/plugin/error.rb +12 -0
  35. data/lib/malevich/plugin/event.rb +68 -0
  36. data/lib/malevich/plugin/http.rb +25 -0
  37. data/lib/malevich/plugin/init.rb +5 -0
  38. data/lib/malevich/plugin/shell_out.rb +28 -0
  39. data/lib/malevich/plugin/time.rb +9 -0
  40. data/lib/malevich/responders/error.rb +30 -0
  41. data/lib/malevich/responders/http.rb +46 -0
  42. data/lib/malevich/responders/init.rb +4 -0
  43. data/lib/malevich/responders/riemann.rb +59 -0
  44. data/lib/malevich/responders/udp.rb +59 -0
  45. data/lib/malevich/version.rb +3 -0
  46. data/malevich.gemspec +32 -0
  47. data/test/plugin_helpers_spec.rb +6 -0
  48. metadata +219 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: f1b68e4d209f800c0c2a08ce6166a01fe4c3f9dd
4
+ data.tar.gz: 0a28fab3e6992d1a6f29b2cc88c4bb3d020fd566
5
+ SHA512:
6
+ metadata.gz: d724ea316def8f81ea864a15a240dce95eb0adc2bcf3498317d8eab48b0cdafc7b0ac0da99500a0754875e37e4e07c452924ef8296bb05f155812fc09c7eb696
7
+ data.tar.gz: 04f3e20dcc6334742a56494c9618c331d0c2b91337dece4ddf29af2d0981d3987888abb63c4146c3c53d67660f7f8e03736691350fbe5894e765f8f53cd1a838
data/.gitignore ADDED
@@ -0,0 +1,20 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+ .idea
19
+ vendor
20
+ .bundle
data/Gemfile ADDED
@@ -0,0 +1,2 @@
1
+ source 'https://rubygems.org'
2
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 Vasiliev Dmitry
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,29 @@
1
+ # Malevich
2
+
3
+ TODO: Write a gem description
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ gem 'malevich'
10
+
11
+ And then execute:
12
+
13
+ $ bundle
14
+
15
+ Or install it yourself as:
16
+
17
+ $ gem install malevich
18
+
19
+ ## Usage
20
+
21
+ TODO: Write usage instructions here
22
+
23
+ ## Contributing
24
+
25
+ 1. Fork it
26
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
27
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
28
+ 4. Push to the branch (`git push origin my-new-feature`)
29
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require 'bundler/gem_tasks'
data/bin/malevich ADDED
@@ -0,0 +1,38 @@
1
+ #!/usr/bin/env ruby
2
+ # coding: utf-8
3
+
4
+ lib = File.expand_path('../lib', __FILE__)
5
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
6
+
7
+ require 'malevich'
8
+ require 'trollop'
9
+
10
+ cmd = Trollop::options do
11
+ version "Malevich #{Malevich::VERSION}"
12
+ banner <<-EOS
13
+ Malevich is tool for monitoring.
14
+
15
+ Usage:
16
+ malevich [options]
17
+ where [options] are:
18
+ EOS
19
+
20
+ opt :config, 'Config file', :default => '/etc/malevich/config.yml'
21
+ opt :'log-level', 'Log level', :default => 'DEBUG'
22
+ opt :'riemann-host', 'Riemann server', :short => '-r', :default => '127.0.0.1:5555', :multi => true
23
+ opt :'riemann-tcp', 'Use tcp to connect riemann', :default => true
24
+ opt :tags, 'Set tags', :short => '-t', :multi => true
25
+ opt :plugins, 'Directory for plugins', :short => '-d', :default => '/usr/share/malevich/plugins', :multi => true
26
+ opt :'http-responder', 'Bind http api', :short => '-h', :default => '0.0.0.0:55755'
27
+ opt :'udp-responder', 'Bind udp api', :short => '-u', :default => '127.0.0.1:55955'
28
+ opt :'test', 'Test plugin file', :type => :string
29
+ end
30
+
31
+ malevich.cmd.deep_merge!(cmd)
32
+ #malevich.cmd[:pwd] = Dir.pwd
33
+ malevich.log_level = cmd[:'log-level']
34
+ malevich.test_plugin(cmd[:'test'])
35
+ malevich.load_plugins(cmd[:'plugins'], cmd[:'config'])
36
+ malevich.load_responders
37
+ malevich.monitor.run!
38
+
data/example/cpu.rb ADDED
@@ -0,0 +1,31 @@
1
+ always_start true
2
+ interval 60
3
+ warning 70
4
+ critical 85
5
+
6
+ settings :per_process, false
7
+
8
+ collect "linux" do
9
+ @old_cpu ||= {}
10
+ File.read('/proc/stat').each_line do |cpu_line|
11
+ cpu_number = cpu_line.scan(/cpu(\d+|\s)\s+/)
12
+ next if cpu_number.empty?
13
+ cpu_number = cpu_number[0][0] == ' ' ? '_total' : cpu_number[0][0]
14
+ cpu_line[/cpu(\d+|\s)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)/]
15
+ _, u2, n2, s2, i2 = [$1, $2, $3, $4, $5].map { |e| e.to_i }
16
+ unless @old_cpu[cpu_number].nil?
17
+ u1, n1, s1, i1 = @old_cpu[cpu_number]
18
+ used = (u2+n2+s2) - (u1+n1+s1)
19
+ total = used + i2-i1
20
+ fraction = used.to_f / total
21
+ end
22
+ @old_cpu[cpu_number] = [u2, n2, s2, i2]
23
+ service = "cpu usage cpu#{cpu_number}"
24
+ description = "Cpu#{cpu_number} usage"
25
+ if cpu_number == '_total'
26
+ event(:service => service, :metric => fraction, :description => description)
27
+ else
28
+ event(:service => service, :metric => fraction, :description => description, :state => 'ok')
29
+ end
30
+ end
31
+ end
data/example/disk.rb ADDED
@@ -0,0 +1,59 @@
1
+ require 'sys/filesystem'
2
+
3
+ always_start true
4
+ interval 60
5
+
6
+ warning 70
7
+ critical 85
8
+
9
+ settings :not_monit_fs_4_size, %w(sysfs nfs devpts squashfs proc devtmpfs)
10
+ settings :monit_fs_4_fstab, %w(ext2 ext3 ext4 xfs tmpfs)
11
+ settings :not_monit_device_4_fstab, %w(none)
12
+ settings :not_monit_point_4_fstab, %w(/lib/init/rw /dev/shm /dev)
13
+ settings :check_fstab, true
14
+
15
+ collect "linux" do
16
+
17
+ def get_monit_points_for_size
18
+ monit_points = []
19
+ File.open('/proc/mounts', 'r') do |file|
20
+ while line = file.gets
21
+ mtab = line.split(/\s+/)
22
+ monit_points << mtab[1] unless settings.not_monit_fs_4_size.include? mtab[2]
23
+ end
24
+ end
25
+ monit_points
26
+ end
27
+
28
+ def get_monit_points_for_fstab
29
+ monit_points = []
30
+ File.open('/proc/mounts', 'r') do |file|
31
+ while line = file.gets
32
+ mtab = line.split(/\s+/)
33
+ if settings.monit_fs_4_fstab.include?(mtab[2]) &&
34
+ !settings.not_monit_point_4_fstab.include?(mtab[1]) &&
35
+ !settings.not_monit_device_4_fstab.include?(mtab[0])
36
+ monit_points << mtab[1]
37
+ end
38
+ end
39
+ end
40
+ monit_points
41
+ end
42
+
43
+ get_monit_points_for_size.each do |point|
44
+ point_stat = Sys::Filesystem.stat(point)
45
+ human_point = point == '/' ? '/root' : point
46
+ human_point = human_point.gsub(/^\//, '').gsub(/\//, '_')
47
+ event(:service => "disk #{human_point} % block", :description => "Disk usage #{point}, %", :metric => (1- point_stat.blocks_available.to_f/point_stat.blocks).round(2) * 100) unless point_stat.blocks == 0
48
+ event(:service => "disk #{human_point} % inode", :description => "Disk usage #{point}, inodes %", :metric => (1 - point_stat.files_available.to_f/point_stat.files).round(2) * 100) unless point_stat.files == 0
49
+ event(:service => "disk #{human_point} abs free", :description => "Disk free #{point}, B", :metric => point_stat.blocks_free * point_stat.block_size, :state => 'ok')
50
+ event(:service => "disk #{human_point} abs total", :description => "Disk space #{point}, B", :metric => point_stat.blocks * point_stat.block_size, :state => 'ok')
51
+ end
52
+
53
+ fstab = File.read('/etc/fstab').split("\n").delete_if { |x| x.strip.match(/^#/) }
54
+ fstab = fstab.join("\n")
55
+ get_monit_points_for_fstab.each do |point|
56
+ event(:service => "disk #{point} fstab entry", :description => "Mount point #{point} not matched in /etc/fstab", :state => 'critical') unless fstab.match(/#{point}(\s|\/\s)/)
57
+ end if settings.check_fstab
58
+
59
+ end
@@ -0,0 +1,28 @@
1
+ always_start true
2
+ interval 60
3
+
4
+ settings :words, %w( 'reads reqs', 'reads merged', 'reads sector', 'reads time',
5
+ 'writes reqs', 'writes merged', 'writes sector', 'writes time',
6
+ 'io reqs', 'io time', 'io weighted' )
7
+
8
+ settings :filter, %w('reads reqs', 'writes reqs')
9
+
10
+ run_if "linux" do
11
+ File.exists? '/proc/diskstats'
12
+ end
13
+
14
+ collect "linux" do
15
+ f = File.read('/proc/diskstats')
16
+ f.split("\n").reject { |d| d =~ /(ram|loop)/ }.inject({}) do |_, line|
17
+ if line =~ /^(?:\s+\d+){2}\s+([\w\d]+) (.*)$/
18
+ dev = $1
19
+ values = $2.split(/\s+/).map { |str| str.to_i }
20
+ next if !!(dev.match /\d+$/ || !(dev.match =~ /^xvd/))
21
+ settings.filter.each do |filter|
22
+ event(:service => "diskstat #{dev} #{filter}", :metric => values[settings.words.index(filter)].to_f/interval, :diff => true)
23
+ end
24
+ iops = values[settings.words.index('reads reqs')].to_i + values[settings.words.index('writes reqs')].to_i
25
+ event(:service => "diskstat #{dev} iops", :metric => iops.to_f/interval, :diff => true)
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,7 @@
1
+ interval 60
2
+
3
+ collect "linux", "mac_os_x" do
4
+ event(
5
+ :state => Resolv::DNS.new.getresources(ohai[:fqdn], Resolv::DNS::Resource::IN::A).count == 1,
6
+ :description => "Check resolv self FQDN")
7
+ end
data/example/exim.rb ADDED
@@ -0,0 +1,15 @@
1
+ always_start true
2
+ interval 60
3
+
4
+ warning 5
5
+ critical 20
6
+ settings :exim, '/usr/sbin/exim'
7
+
8
+ run_if do
9
+ File.exists? settings.exim
10
+ end
11
+
12
+ collect do
13
+ event(:service => 'exim', :metric => shell(settings.exim, ' -bpc').to_i,
14
+ :description => 'Exim: count frozen mails')
15
+ end
@@ -0,0 +1,21 @@
1
+ interval 60
2
+
3
+ warning 5
4
+
5
+ settings :file_mask, '.*'
6
+ settings :dir, '/tmp/dir'
7
+ settings :age, 24 * 60 * 60
8
+
9
+ collect do
10
+ if File.directory?(settings.dir)
11
+ count_files = 0
12
+ file_mask = Regexp.new(settings.file_mask)
13
+ Find.find(settings.dir).each do |file|
14
+ next unless File.file? file
15
+ next unless file_mask.match file
16
+ next unless Time.now.to_i - settings.age > File.new(file).mtime.to_i
17
+ count_files += 1
18
+ end
19
+ event(:service => "find files #{settings.dir}", :metric => count_files, :description => "Count files in #{settings.dir}")
20
+ end
21
+ end
data/example/http.rb ADDED
@@ -0,0 +1,25 @@
1
+ interval 60
2
+
3
+ settings :http_code, 200
4
+ settings :http_method, 'GET'
5
+ settings :connect_timeout, 5
6
+ settings :retry, 0
7
+ settings :retry_delay, 0
8
+ settings :max_time, 10
9
+ settings :insecure, false
10
+ settings :url, 'http://127.0.0.1:80'
11
+ settings :service, 'http check'
12
+
13
+ collect "linux", "mac_os_x" do
14
+
15
+ @cmd ||= begin
16
+ "curl -X#{settings.http_method} -s --connect-timeout #{settings.connect_timeout}" +
17
+ " #{'--insecure' if settings.insecure} " +
18
+ " -w '%{http_code}\\n' --retry #{settings.retry} --retry-delay #{settings.retry_delay}" +
19
+ " --max-time #{settings.max_time} --fail #{settings.url} -o /dev/null"
20
+ end
21
+
22
+ out = shell_out(@cmd).stdout.to_i
23
+ event(:service => settings.service, :metric => out, :description => "http code: #{out}", :state => out == settings.http_code)
24
+
25
+ end
@@ -0,0 +1,27 @@
1
+ interval 60
2
+ settings :rule_file, '/etc/network/iptables'
3
+ always_start true
4
+
5
+ run_if "linux" do
6
+ File.exists? settings.rule_file
7
+ end
8
+
9
+ collect "linux" do
10
+
11
+ def delete_counters(str)
12
+ str.gsub(/\[\d+\:\d+\]/, '').strip
13
+ end
14
+
15
+ current_rules = shell_out!('iptables-save').stdout.split("\n").map do |x|
16
+ x[0] == '#' ? nil : delete_counters(x)
17
+ end.compact.join("\n")
18
+ saved_rules = File.read(settings.rules_file).split("\n").map do |x|
19
+ x[0] == '#' ? nil : delete_counters(x) # delete counters and comments
20
+ end.compact.join("\n")
21
+
22
+ event(
23
+ :service => "iptables #{settings.rule_file}",
24
+ :state => current_rules == saved_rules,
25
+ :description => "iptables rules different between file: #{settings.rule_file} and iptables-save"
26
+ )
27
+ end
data/example/la.rb ADDED
@@ -0,0 +1,21 @@
1
+ always_start true
2
+ interval 60
3
+
4
+ state :warning, 4
5
+ state :critical, 50
6
+
7
+ collect "mac_os_x" do
8
+ event(
9
+ :metric => shell("sysctl -n vm.loadavg | awk '{print $2}'").to_f,
10
+ :description => 'LA averaged over 1 minute',
11
+ :service => 'la la_1'
12
+ )
13
+ end
14
+
15
+ collect "linux" do
16
+ event(
17
+ :metric => File.read('/proc/loadavg').scan(/[\d\.]+/)[0].to_f,
18
+ :description => 'LA averaged over 1 minute',
19
+ :service => 'la la_1'
20
+ )
21
+ end
data/example/mdadm.rb ADDED
@@ -0,0 +1,44 @@
1
+ interval 60
2
+ critical 1
3
+
4
+ run_if "linux" do
5
+ File.exists? '/proc/mdstat'
6
+ end
7
+
8
+ collect "linux" do
9
+
10
+ def rm_bracket(text)
11
+ text.gsub('[', '').gsub(']', '')
12
+ end
13
+
14
+ def status_well?(text)
15
+ text.gsub(/U/, '').empty?
16
+ end
17
+
18
+ def get_failed_parts (device)
19
+ begin
20
+ failed_parts = []
21
+ Dir["/sys/block/#{device}/md/dev-*"].each do |p|
22
+ state = File.read("#{p}/state").strip
23
+ next unless state != 'in_sync'
24
+ p.gsub!(/.+\/dev-/, '')
25
+ failed_parts << "#{p} (#{state})"
26
+ end
27
+ failed_parts.join(', ')
28
+ rescue
29
+ nil
30
+ end
31
+ end
32
+
33
+
34
+ mdstat = File.read('/proc/mdstat').split("\n")
35
+ mdstat.each_with_index do |line, index|
36
+ next unless line.include?('blocks')
37
+ device = file[index-1].split(':')[0].strip
38
+ mdstatus = rm_bracket(line.split(' ').last) # UUU
39
+ next if status_well?(mdstatus) # пропускаем все збс
40
+ next if mdstatus == settings.states.send(device).to_s # disabled in config
41
+ event(:service => "mdadm #{device}", :metric => 1, :description => "mdadm failed device #{device}: #{get_failed_parts(device)}")
42
+ end
43
+
44
+ end
@@ -0,0 +1,13 @@
1
+ interval 180
2
+ always_start true
3
+
4
+ critical 1
5
+ settings :cmd, 'megacli -AdpAllInfo -aAll -NoLog | awk -F": " \'/Virtual Drives/ { getline; print $2; }\''
6
+
7
+ run_if "linux" do
8
+ File.exists? '/usr/bin/megacli'
9
+ end
10
+
11
+ collect "linux" do
12
+ event(:metric => shell!(settings.cmd).to_i, :description => 'MegaCli status')
13
+ end
data/example/memory.rb ADDED
@@ -0,0 +1,31 @@
1
+ interval 60
2
+ always_start true
3
+
4
+ critical 70
5
+ warning 85
6
+
7
+ collect "linux" do
8
+ m = File.read('/proc/meminfo').split(/\n/).inject({}) do |info, line|
9
+ x = line.split(/:?\s+/)
10
+ info[x[0]] = x[1].to_i
11
+ info
12
+ end
13
+
14
+ free = m['MemFree'].to_i * 1024
15
+ cached = m['Cached'].to_i * 1024
16
+ buffers = m['Buffers'].to_i * 1024
17
+ total = m['MemTotal'].to_i * 1024
18
+ used = total - free
19
+ free_bc = free + buffers + cached
20
+ fraction = 1 - (free_bc.to_f / total)
21
+ swap_fraction = m['SwapTotal'] == 0 ? 0 : 1 - m['SwapFree'].to_f/m['SwapTotal']
22
+
23
+ event(:service => 'memory % free', :description => 'Memory usage, %', :metric => fraction.round(2) * 100)
24
+ event(:service => 'memory % swap', :description => 'Swap usage, %', :metric => swap_fraction.round(2) * 100)
25
+ event(:service => 'memory abs free', :description => 'Memory free (kB)', :metric => free, :state => 'ok')
26
+ event(:service => 'memory abs total', :description => 'Memory total (kB)', :metric => total, :state => 'ok')
27
+ event(:service => 'memory abs cached', :description => 'Memory usage, cached (kB)', :metric => cached, :state => 'ok')
28
+ event(:service => 'memory abs buffers', :description => 'Memory usage, buffers (kB)', :metric => buffers, :state => 'ok')
29
+ event(:service => 'memory abs used', :description => 'Memory usage, used (kB)', :metric => used, :state => 'ok')
30
+ event(:service => 'memory abs free_bc', :description => 'Memory usage with cache and buffers (kB)', :metric => free_bc, :state => 'ok')
31
+ end