kurchatov 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (82) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +18 -0
  3. data/.travis.yml +5 -0
  4. data/Gemfile +4 -0
  5. data/LICENSE.txt +22 -0
  6. data/README.md +111 -0
  7. data/Rakefile +1 -0
  8. data/Vagrantfile +16 -0
  9. data/bin/kurchatov +6 -0
  10. data/examples/check_file_contains.rb +14 -0
  11. data/examples/count_proc.rb +14 -0
  12. data/examples/cpu.rb +29 -0
  13. data/examples/disk.rb +56 -0
  14. data/examples/disk_stat.rb +28 -0
  15. data/examples/dns_check.rb +5 -0
  16. data/examples/exim.rb +12 -0
  17. data/examples/file_age.rb +11 -0
  18. data/examples/find_files.rb +21 -0
  19. data/examples/http.rb +25 -0
  20. data/examples/iptables.rb +27 -0
  21. data/examples/la.rb +10 -0
  22. data/examples/mdadm.rb +43 -0
  23. data/examples/megacli.rb +12 -0
  24. data/examples/memory.rb +28 -0
  25. data/examples/net.rb +25 -0
  26. data/examples/net_stat.rb +25 -0
  27. data/examples/nfs.rb +9 -0
  28. data/examples/nginx.rb +22 -0
  29. data/examples/nginx_500.rb +48 -0
  30. data/examples/ntp.rb +15 -0
  31. data/examples/openfiles.rb +6 -0
  32. data/examples/pgsql.rb +67 -0
  33. data/examples/ping_icmp.rb +12 -0
  34. data/examples/ping_tcp.rb +14 -0
  35. data/examples/proc_mem.rb +24 -0
  36. data/examples/process_usage.rb +15 -0
  37. data/examples/rabbitmq.rb +16 -0
  38. data/examples/runit.rb +47 -0
  39. data/examples/sidekiq.rb +21 -0
  40. data/examples/sidekiq_queue_state.rb +9 -0
  41. data/examples/status_file.rb +14 -0
  42. data/examples/tw_cli.rb +17 -0
  43. data/examples/uptime.rb +14 -0
  44. data/kurchatov.gemspec +28 -0
  45. data/lib/kurchatov/application.rb +154 -0
  46. data/lib/kurchatov/config.rb +14 -0
  47. data/lib/kurchatov/log.rb +9 -0
  48. data/lib/kurchatov/mashie.rb +152 -0
  49. data/lib/kurchatov/mixin/command.rb +31 -0
  50. data/lib/kurchatov/mixin/event.rb +63 -0
  51. data/lib/kurchatov/mixin/http.rb +21 -0
  52. data/lib/kurchatov/mixin/init.rb +6 -0
  53. data/lib/kurchatov/mixin/ohai.rb +22 -0
  54. data/lib/kurchatov/mixin/queue.rb +14 -0
  55. data/lib/kurchatov/monitor.rb +62 -0
  56. data/lib/kurchatov/plugin/config.rb +68 -0
  57. data/lib/kurchatov/plugin/dsl.rb +81 -0
  58. data/lib/kurchatov/plugin/riemann.rb +54 -0
  59. data/lib/kurchatov/plugin.rb +15 -0
  60. data/lib/kurchatov/queue.rb +28 -0
  61. data/lib/kurchatov/responders/http.rb +36 -0
  62. data/lib/kurchatov/responders/init.rb +3 -0
  63. data/lib/kurchatov/responders/riemann.rb +46 -0
  64. data/lib/kurchatov/responders/udp.rb +32 -0
  65. data/lib/kurchatov/riemann/client.rb +49 -0
  66. data/lib/kurchatov/riemann/event.rb +42 -0
  67. data/lib/kurchatov/riemann/message.rb +18 -0
  68. data/lib/kurchatov/version.rb +3 -0
  69. data/lib/kurchatov.rb +3 -0
  70. data/lib/ohai/plugins/darwin/hostname.rb +22 -0
  71. data/lib/ohai/plugins/darwin/platform.rb +38 -0
  72. data/lib/ohai/plugins/hostname.rb +27 -0
  73. data/lib/ohai/plugins/linux/hostname.rb +26 -0
  74. data/lib/ohai/plugins/linux/platform.rb +113 -0
  75. data/lib/ohai/plugins/linux/virtualization.rb +125 -0
  76. data/lib/ohai/plugins/os.rb +53 -0
  77. data/lib/ohai/plugins/platform.rb +28 -0
  78. data/lib/ohai/plugins/virtualization.rb +86 -0
  79. data/lib/ohai/plugins/windows/hostname.rb +33 -0
  80. data/lib/ohai/plugins/windows/platform.rb +27 -0
  81. data/tests/run.sh +55 -0
  82. metadata +209 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 37b3d24113a19c945cc3871050111cce3ebc8d96
4
+ data.tar.gz: 6a1ac341e371c41671f14081e2cee4f61ae2ec3b
5
+ SHA512:
6
+ metadata.gz: e106b36d33622926e71b9bc9ec1c757cef4c71221c75582001a84faeacccd7c0acb80754935b3f20c8234fed66a39ef90a2d799524599d229941a84bdd0ea918
7
+ data.tar.gz: 04d7c0362e7e7836ff88b4da38a12780615c1ff1e99c932ede53d603dd1ac1d2310d88642d802a0b5fd67ce042ce0ad8f8cb2276de26abd2d76b38860f739c3c
data/.gitignore ADDED
@@ -0,0 +1,18 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ .vendor
7
+ .vagrant
8
+ Gemfile.lock
9
+ InstalledFiles
10
+ _yardoc
11
+ coverage
12
+ doc/
13
+ lib/bundler/man
14
+ pkg
15
+ rdoc
16
+ spec/reports
17
+ tmp
18
+ dpkg
data/.travis.yml ADDED
@@ -0,0 +1,5 @@
1
+ language:
2
+ - 'ruby'
3
+ rvm:
4
+ - '1.9.3'
5
+ script: "./tests/run.sh"
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in kurchatov.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 Vasiliev Dmitry
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,111 @@
1
+ [![Build Status](https://travis-ci.org/vadv/kurchatov.png)](https://travis-ci.org/vadv/kurchatov)
2
+
3
+ # Kurchatov
4
+
5
+ Перед вами гем для мониторинга с помощью [riemann](http://riemann.io).
6
+ Я люблю [chef](http://www.getchef.com) и [ohai](http://docs.opscode.com/ohai.html),
7
+ поэтому здесь есть немного первого и немного второго.
8
+
9
+ Юзкейз таков:
10
+ * Kurchatov попадает в среду (окружение, приложения) которую не знает, и изучает ее с помощью ohai
11
+ * Решает какие плагины запускать
12
+ * Отсылает сообщения на riemann-хост со присвоеными статусами
13
+
14
+
15
+ ## DSL
16
+
17
+ Решено использовать dsl для написания плагинов, плагин выглядит так:
18
+ ```ruby
19
+ name "человеко читаемое имя" # по дефолту basename файла
20
+ interval 60 # с какой переодичностью будет запускаться плагин
21
+ always_start true # плагину не нужны дополнительные настройки
22
+
23
+ default[:nginx][:file] = "/etc/nginx/nginx.conf"
24
+ default[:nginx][:cmd] = "nginx -t" # дефолтные значение для Mashie: 'plugin'
25
+ default[:nginx][:url] = "http://127.0.0.1:133233/status" # данные значения смержаться со значениями
26
+ # полученными из конфига
27
+
28
+ run_if :os => 'linux' do # по умолчанию разрешено запускать все и везде
29
+ File.exists? plugin.file # plugin - не что иное как проставленые значения из default
30
+ # доступно обращение plugin[:file], plugin["file"], plugin.file
31
+ end
32
+
33
+ collect :web_some_platform => true, :os => 'linux' do # значение полученные через ohai,
34
+ # collect включиться ohai[:web_some_platform] == true и
35
+ # для ohai[:os] == 'linux'
36
+ metric = rest_get(default[:nginx][:url]).split("\n").first.split("Active connections:").last.to_i
37
+ event(
38
+ :service => "nginx active connections", # по дефолту name, если редиректим в graphite
39
+ :metric => metric, # то service будет ключем для url
40
+ :warning => 10,
41
+ :critical => 20,
42
+ :diff => true, # говорим что запоминать предыдущие значения и если разница между новым и старым
43
+ # меньше warning - получим статус 'ok', больше critical - 'critical' и так далее
44
+ # без :diff мы будем считать честные значения
45
+ # для того чтобы посчитать RPS мы просто делим метрику на interval
46
+ :description => "Что-то для человека-монитора" # допустимо сокращения :desc
47
+ )
48
+
49
+ event(
50
+ :service => "nginx test config #{plugin.file}", # сервис должен быть человекочитаемым но уникальным!
51
+ :state => shell_out("#{ohai[:nginx][:cmd]}").exitstatus == 0 # если :state == true стейт "ok", иначе - "critical"
52
+ # shell_out! - сгенерит exception и riemann уйдет сообщение об ошибке
53
+ # в плагине, также доступен просто shell() - он вернет только stdout и
54
+ # действует как shell_out!
55
+ :desc => "Ой, конфиг не валидный, наверно nginx -t его испортил :("
56
+ )
57
+
58
+ end
59
+
60
+ ```
61
+ Если плагин отправил event, это не означает что он попадает на riemann-server:
62
+ * Эвенты группируются и отсылаются асинхронно пачками (все что накопилось за `Kurchatov::Responders::Riemann::FLUSH_INTERVAL` по дефолту 0.5 секунд)
63
+ * При отсутвии метрики второй и последующий раз `:state == "ok"` не будет отсылаться
64
+
65
+ Больше примеров вы найдете [тут](https://github.com/vadv/kurchatov/tree/master/examples).
66
+
67
+ ## OHAI
68
+
69
+ И в африке ohai. Минимальный пример:
70
+ ```ruby
71
+ provides "postgres"
72
+ postgres Mash.new
73
+ cmd = "psql -U postgres -tqc 'select version()'"
74
+ status, stdout, stderr = run_command(:command => cmd)
75
+ postgres[:version] = stdout.strip
76
+ ```
77
+
78
+ ## Config
79
+
80
+ Это обычный yml-файл с настройками плагинов, eго удобно генерить chef'ом :)
81
+ ```yaml
82
+ plugin name:
83
+ settins name:
84
+ - 'bla-bla'
85
+ ```
86
+
87
+ Есть небольшая магия, для того чтобы использовать плагин как провайдер (например следить за определенными портами):
88
+ ```yaml
89
+ web watcher:
90
+ - url: http://localhost/ # создастся plugin с name == 'web watcher_0'
91
+ status: 302
92
+ - url: https://localhost/login # новый плагин name == 'web watcher_1'
93
+ status: 200
94
+ ua: Mozilla
95
+ robots txt watcher: # новый плагин name == 'robots txt watcher'
96
+ parent: web watcher
97
+ url: https://localhost/robots.txt
98
+ status: 404
99
+ ua: ^Yandex
100
+ ```
101
+
102
+ ## Почему велосипед
103
+
104
+ Удобно писать плагины, использовать 1 процесс, 1 коннект, и проч.
105
+
106
+ Мне не нравиться официальная реализация [riemann-client](https://github.com/aphyr/riemann-ruby-client),
107
+ она течет и создает много ненужных *конкретно* для меня полей для протобуфа (но все равно спасибо [aphyr](http://aphyr.com) за
108
+ прекрасный сервер :) ), так что вы тут не найдете search и udp.
109
+
110
+ Упор сделан на потребление памяти (эх, ruby), поэтому все на тредах и на данный момент на 1.9.3 вы можете получить 8Mb RES.
111
+
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
data/Vagrantfile ADDED
@@ -0,0 +1,16 @@
1
+ # -*- mode: ruby -*-
2
+ # vi: set ft=ruby :
3
+
4
+ Vagrant.configure("2") do |config|
5
+
6
+ config.vm.box_url = "http://files.vagrantup.com/precise64_vmware.box"
7
+ config.vm.box = "precise64_kurchatov_gem"
8
+ config.ssh.forward_agent = true
9
+
10
+ config.vm.provider :vmware_fusion do |vmware|
11
+ vmware.vm.vmx["memsize"] = "2048"
12
+ vmware.vm.gui = false
13
+ vmware.box_url = "http://files.vagrantup.com/precise64_vmware.box"
14
+ end
15
+
16
+ end
data/bin/kurchatov ADDED
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: utf-8
3
+
4
+ require "kurchatov/application"
5
+
6
+ Kurchatov::Aplication.new.run
@@ -0,0 +1,14 @@
1
+ interval 60
2
+
3
+ default[:base_uri] = 'http://localhost/check'
4
+ default[:expression] = 'ERROR'
5
+ default[:contains] = false # Contains or not expression
6
+ default[:service] = "check_file_contains"
7
+
8
+ collect do
9
+ event(
10
+ :service => "#{plugin.service} #{plugin.base_uri} #{plugin.expression}",
11
+ :description => "#{plugin.base_uri} contains #{plugin.expression}",
12
+ :metric => rest_get(plugin.base_uri).include?(plugin.expression) == plugin.contains
13
+ )
14
+ end
@@ -0,0 +1,14 @@
1
+ interval 60
2
+ default[:proc] = 'ruby'
3
+
4
+ collect :os => 'linux' do
5
+ count = 0
6
+ Dir['/proc/[0-9]*/cmdline'].each { |p| count += 1 if File.read(p) =~ /#{plugin.proc}/ }
7
+ event(
8
+ :service => "count proc #{plugin.proc}",
9
+ :metric => count,
10
+ :description => "count proc #{plugin.proc}, count: #{count}",
11
+ :warning => 5,
12
+ :critical => 20
13
+ )
14
+ end
data/examples/cpu.rb ADDED
@@ -0,0 +1,29 @@
1
+ always_start true
2
+ interval 60
3
+
4
+ default[:per_process] = false
5
+
6
+ collect :os => 'linux' do
7
+ @old_cpu ||= {}
8
+ File.read('/proc/stat').each_line do |cpu_line|
9
+ cpu_number = cpu_line.scan(/cpu(\d+|\s)\s+/)
10
+ next if cpu_number.empty?
11
+ cpu_number = cpu_number[0][0] == ' ' ? '_total' : cpu_number[0][0]
12
+ cpu_line[/cpu(\d+|\s)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)/]
13
+ _, u2, n2, s2, i2 = [$1, $2, $3, $4, $5].map { |e| e.to_i }
14
+ unless @old_cpu[cpu_number].nil?
15
+ u1, n1, s1, i1 = @old_cpu[cpu_number]
16
+ used = (u2+n2+s2) - (u1+n1+s1)
17
+ total = used + i2-i1
18
+ fraction = used.to_f / total
19
+ end
20
+ @old_cpu[cpu_number] = [u2, n2, s2, i2]
21
+ service = "cpu usage cpu#{cpu_number}"
22
+ description = "Cpu#{cpu_number} usage"
23
+ if cpu_number == '_total'
24
+ event(:service => service, :metric => fraction, :desc => description, :warning => 70, :critical => 58)
25
+ else
26
+ event(:service => service, :metric => fraction, :desc => description, :state => 'ok')
27
+ end
28
+ end
29
+ end
data/examples/disk.rb ADDED
@@ -0,0 +1,56 @@
1
+ require 'sys/filesystem'
2
+
3
+ always_start true
4
+ interval 60
5
+
6
+ default[:not_monit_fs_4_size] = %w(sysfs nfs devpts squashfs proc devtmpfs)
7
+ default[:monit_fs_4_fstab] = %w(ext2 ext3 ext4 xfs tmpfs)
8
+ default[:not_monit_device_4_fstab] = %w(none)
9
+ default[:not_monit_point_4_fstab] = %w(/lib/init/rw /dev/shm /dev)
10
+ default[:check_fstab] = true
11
+
12
+ collect :os => 'linux' do
13
+
14
+ def get_monit_points_for_size
15
+ monit_points = []
16
+ File.open('/proc/mounts', 'r') do |file|
17
+ while line = file.gets
18
+ mtab = line.split(/\s+/)
19
+ monit_points << mtab[1] unless plugin.not_monit_fs_4_size.include? mtab[2]
20
+ end
21
+ end
22
+ monit_points
23
+ end
24
+
25
+ def get_monit_points_for_fstab
26
+ monit_points = []
27
+ File.open('/proc/mounts', 'r') do |file|
28
+ while line = file.gets
29
+ mtab = line.split(/\s+/)
30
+ if plugin.monit_fs_4_fstab.include?(mtab[2]) &&
31
+ !plugin.not_monit_point_4_fstab.include?(mtab[1]) &&
32
+ !plugin.not_monit_device_4_fstab.include?(mtab[0])
33
+ monit_points << mtab[1]
34
+ end
35
+ end
36
+ end
37
+ monit_points
38
+ end
39
+
40
+ get_monit_points_for_size.each do |point|
41
+ point_stat = Sys::Filesystem.stat(point)
42
+ human_point = point == '/' ? '/root' : point
43
+ human_point = human_point.gsub(/^\//, '').gsub(/\//, '_')
44
+ event(:warning => 70, :critical => 85, :service => "disk #{human_point} % block", :desc => "Disk usage #{point}, %", :metric => (1- point_stat.blocks_available.to_f/point_stat.blocks).round(2) * 100) unless point_stat.blocks == 0
45
+ event(:warning => 70, :critical => 85, :service => "disk #{human_point} % inode", :desc => "Disk usage #{point}, inodes %", :metric => (1 - point_stat.files_available.to_f/point_stat.files).round(2) * 100) unless point_stat.files == 0
46
+ event(:service => "disk #{human_point} abs free", :desc => "Disk free #{point}, B", :metric => point_stat.blocks_free * point_stat.block_size, :state => 'ok')
47
+ event(:service => "disk #{human_point} abs total", :desc => "Disk space #{point}, B", :metric => point_stat.blocks * point_stat.block_size, :state => 'ok')
48
+ end
49
+
50
+ fstab = File.read('/etc/fstab').split("\n").delete_if { |x| x.strip.match(/^#/) }
51
+ fstab = fstab.join("\n")
52
+ get_monit_points_for_fstab.each do |point|
53
+ event(:service => "disk #{point} fstab entry", :desc => "Mount point #{point} not matched in /etc/fstab", :state => 'critical') unless fstab.match(/#{point}(\s|\/\s)/)
54
+ end if plugin.check_fstab
55
+
56
+ end
@@ -0,0 +1,28 @@
1
+ always_start true
2
+ interval 60
3
+
4
+ default[:words] = [ 'reads reqs', 'reads merged', 'reads sector', 'reads time',
5
+ 'writes reqs', 'writes merged', 'writes sector', 'writes time',
6
+ 'io reqs', 'io time', 'io weighted' ]
7
+
8
+ default[:filter] = [ 'reads reqs', 'writes reqs' ]
9
+
10
+ run_if do
11
+ File.exists? '/proc/diskstats'
12
+ end
13
+
14
+ collect :os => "linux" do
15
+ f = File.read('/proc/diskstats')
16
+ f.split("\n").reject { |d| d =~ /(ram|loop)/ }.inject({}) do |_, line|
17
+ if line =~ /^(?:\s+\d+){2}\s+([\w\d]+) (.*)$/
18
+ dev = $1
19
+ values = $2.split(/\s+/).map { |str| str.to_i }
20
+ next if !!(dev.match /\d+$/ || !(dev.match =~ /^xvd/))
21
+ plugin.filter.each do |filter|
22
+ event(:service => "diskstat #{dev} #{filter}", :metric => values[plugin.words.index(filter)].to_f/interval, :diff => true)
23
+ end
24
+ iops = values[plugin.words.index('reads reqs')].to_i + values[plugin.words.index('writes reqs')].to_i
25
+ event(:service => "diskstat #{dev} iops", :metric => iops.to_f/interval, :diff => true)
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,5 @@
1
+ collect do
2
+ event(
3
+ :state => Resolv::DNS.new.getresources(ohai[:fqdn], Resolv::DNS::Resource::IN::A).count == 1,
4
+ :desc => "Check resolv self FQDN")
5
+ end
data/examples/exim.rb ADDED
@@ -0,0 +1,12 @@
1
+ always_start true
2
+ interval 60
3
+ default[:exim] = '/usr/sbin/exim'
4
+
5
+ run_if do
6
+ File.exists? plugin.exim
7
+ end
8
+
9
+ collect do
10
+ event(:service => 'exim', :metric => shell(plugin.exim, ' -bpc').to_i,
11
+ :desc => 'Exim: count frozen mails', :warning => 5, :critical => 20)
12
+ end
@@ -0,0 +1,11 @@
1
+ default[:file] = '/notexists'
2
+
3
+ collect do
4
+ event(
5
+ :service => "#{name} #{plugin.file}",
6
+ :metric => (unixnow - File.stat(plugin.file).mtime.to_i).abs.to_f/60,
7
+ :description => "File #{plugin.file} age",
8
+ :warning => 1,
9
+ :critical => 5
10
+ )
11
+ end
@@ -0,0 +1,21 @@
1
+ interval 60
2
+
3
+ warning 5
4
+
5
+ default[:file_mask] = '.*'
6
+ default[:dir] = '/tmp/dir'
7
+ default[:age] = 24 * 60 * 60
8
+
9
+ collect do
10
+ if File.directory?(plugin.dir)
11
+ count_files = 0
12
+ file_mask = Regexp.new(plugin.file_mask)
13
+ Find.find(plugin.dir).each do |file|
14
+ next unless File.file? file
15
+ next unless file_mask.match file
16
+ next unless Time.now.to_i - plugin.age > File.new(file).mtime.to_i
17
+ count_files += 1
18
+ end
19
+ event(:service => "find files #{plugin.dir}", :metric => count_files, :description => "Count files in #{plugin.dir}")
20
+ end
21
+ end
data/examples/http.rb ADDED
@@ -0,0 +1,25 @@
1
+ interval 60
2
+
3
+ default[:http_code] = 200
4
+ default[:http_method] = 'GET'
5
+ default[:connect_timeout] = 5
6
+ default[:retry] = 0
7
+ default[:retry_delay] = 0
8
+ default[:max_time] = 10
9
+ default[:insecure] = false
10
+ default[:url] = 'http://127.0.0.1:80'
11
+ default[:service] = 'http check'
12
+
13
+ collect do
14
+
15
+ @cmd ||= begin
16
+ "curl -X#{plugin.http_method} -s --connect-timeout #{plugin.connect_timeout}" +
17
+ " #{'--insecure' if plugin.insecure} " +
18
+ " -w '%{http_code}\\n' --retry #{plugin.retry} --retry-delay #{plugin.retry_delay}" +
19
+ " --max-time #{plugin.max_time} --fail #{plugin.url} -o /dev/null"
20
+ end
21
+
22
+ out = shell_out(@cmd).stdout.to_i
23
+ event(:service => plugin.service, :metric => out, :description => "http code: #{out}", :state => out == plugin.http_code)
24
+
25
+ end
@@ -0,0 +1,27 @@
1
+ interval 60
2
+ default[:rule_file] = '/etc/network/iptables'
3
+ always_start true
4
+
5
+ run_if do
6
+ File.exists? plugin.rule_file
7
+ end
8
+
9
+ collect do
10
+
11
+ def delete_counters(str)
12
+ str.gsub(/\[\d+\:\d+\]/, '').strip
13
+ end
14
+
15
+ current_rules = shell_out!('iptables-save').stdout.split("\n").map do |x|
16
+ x[0] == '#' ? nil : delete_counters(x)
17
+ end.compact.join("\n")
18
+ saved_rules = File.read(plugin.rules_file).split("\n").map do |x|
19
+ x[0] == '#' ? nil : delete_counters(x) # delete counters and comments
20
+ end.compact.join("\n")
21
+
22
+ event(
23
+ :service => "iptables #{plugin.rule_file}",
24
+ :state => current_rules == saved_rules,
25
+ :description => "iptables rules different between file: #{plugin.rule_file} and iptables-save"
26
+ )
27
+ end
data/examples/la.rb ADDED
@@ -0,0 +1,10 @@
1
+ always_start true
2
+ interval 60
3
+
4
+ collect :os => "linux" do
5
+ event(
6
+ :metric => File.read('/proc/loadavg').scan(/[\d\.]+/)[0].to_f,
7
+ :desc => 'LA averaged over 1 minute',
8
+ :service => 'la la_1'
9
+ )
10
+ end
data/examples/mdadm.rb ADDED
@@ -0,0 +1,43 @@
1
+ interval 60
2
+
3
+ run_if do
4
+ File.exists? '/proc/mdstat'
5
+ end
6
+
7
+ collect do
8
+
9
+ def rm_bracket(text)
10
+ text.gsub('[', '').gsub(']', '')
11
+ end
12
+
13
+ def status_well?(text)
14
+ text.gsub(/U/, '').empty?
15
+ end
16
+
17
+ def get_failed_parts (device)
18
+ begin
19
+ failed_parts = []
20
+ Dir["/sys/block/#{device}/md/dev-*"].each do |p|
21
+ state = File.read("#{p}/state").strip
22
+ next unless state != 'in_sync'
23
+ p.gsub!(/.+\/dev-/, '')
24
+ failed_parts << "#{p} (#{state})"
25
+ end
26
+ failed_parts.join(', ')
27
+ rescue
28
+ nil
29
+ end
30
+ end
31
+
32
+
33
+ mdstat = File.read('/proc/mdstat').split("\n")
34
+ mdstat.each_with_index do |line, index|
35
+ next unless line.include?('blocks')
36
+ device = file[index-1].split(':')[0].strip
37
+ mdstatus = rm_bracket(line.split(' ').last) # UUU
38
+ next if status_well?(mdstatus) # пропускаем все збс
39
+ next if mdstatus == plugin[states][device].to_s # disabled in config
40
+ event(:service => "mdadm #{device}", :state => 'critical', :desc => "mdadm failed device #{device}: #{get_failed_parts(device)}")
41
+ end
42
+
43
+ end
@@ -0,0 +1,12 @@
1
+ interval 180
2
+ always_start true
3
+
4
+ default[:cmd] = 'megacli -AdpAllInfo -aAll -NoLog | awk -F": " \'/Virtual Drives/ { getline; print $2; }\''
5
+
6
+ run_if do
7
+ File.exists? '/usr/bin/megacli'
8
+ end
9
+
10
+ collect do
11
+ event(:metric => shell(settings.cmd).to_i > 0, :description => 'MegaCli status')
12
+ end
@@ -0,0 +1,28 @@
1
+ interval 60
2
+ always_start true
3
+
4
+ collect :os => 'linux' do
5
+ m = File.read('/proc/meminfo').split(/\n/).inject({}) do |info, line|
6
+ x = line.split(/:?\s+/)
7
+ info[x[0]] = x[1].to_i
8
+ info
9
+ end
10
+
11
+ free = m['MemFree'].to_i * 1024
12
+ cached = m['Cached'].to_i * 1024
13
+ buffers = m['Buffers'].to_i * 1024
14
+ total = m['MemTotal'].to_i * 1024
15
+ used = total - free
16
+ free_bc = free + buffers + cached
17
+ fraction = 1 - (free_bc.to_f / total)
18
+ swap_fraction = m['SwapTotal'] == 0 ? 0 : 1 - m['SwapFree'].to_f/m['SwapTotal']
19
+
20
+ event(:service => 'memory % free', :desc => 'Memory usage, %', :metric => fraction.round(2) * 100, :critical => 85, :warning => 75)
21
+ event(:service => 'memory % swap', :desc => 'Swap usage, %', :metric => swap_fraction.round(2) * 100, :critical => 85, :warning => 75)
22
+ event(:service => 'memory abs free', :desc => 'Memory free (kB)', :metric => free, :state => 'ok')
23
+ event(:service => 'memory abs total', :desc => 'Memory total (kB)', :metric => total, :state => 'ok')
24
+ event(:service => 'memory abs cached', :desc => 'Memory usage, cached (kB)', :metric => cached, :state => 'ok')
25
+ event(:service => 'memory abs buffers', :desc => 'Memory usage, buffers (kB)', :metric => buffers, :state => 'ok')
26
+ event(:service => 'memory abs used', :desc => 'Memory usage, used (kB)', :metric => used, :state => 'ok')
27
+ event(:service => 'memory abs free_bc', :desc => 'Memory usage with cache and buffers (kB)', :metric => free_bc, :state => 'ok')
28
+ end
data/examples/net.rb ADDED
@@ -0,0 +1,25 @@
1
+ interval 60
2
+ always_start true
3
+
4
+ default[:include_alias] = false
5
+ default[:filter] = [ 'rx bytes', 'rx errs', 'rx drop', 'tx bytes', 'tx errs', 'tx drop' ]
6
+ default[:words] = [ 'rx bytes', 'rx packets', 'rx errs', 'rx drop', 'rx fifo', 'rx frame',
7
+ 'rx compressed', 'rx multicast', 'tx bytes', 'tx packets', 'tx drops',
8
+ 'tx fifo', 'tx colls', 'tx carrier', 'tx compressed' ]
9
+
10
+ collect :os => "linux" do
11
+ File.read('/proc/net/dev').each_line do |line|
12
+ iface = line.split(':')[0].strip
13
+ iface.gsub!(/\./, '_')
14
+ next if (iface =~ /\./ && !plugin.include_alias)
15
+ next unless line =~ /(\w*)\:\s*([\s\d]+)\s*/
16
+ plugin.words.map do |service|
17
+ service
18
+ end.zip(
19
+ $2.split(/\s+/).map { |str| str.to_i }
20
+ ).each do |service, value|
21
+ next unless plugin.filter.include? service
22
+ event(:service => "net #{iface} #{service}", :metric => value.to_f/interval, :diff => true)
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,25 @@
1
+ interval 5
2
+ default[:ports] = [80, 3994]
3
+
4
+ collect do
5
+
6
+ filter = nil
7
+ plugin.ports.each do |port|
8
+ if filter == nil
9
+ filter = "\\( src *:#{port}"
10
+ else
11
+ filter += " or src *:#{port}"
12
+ end
13
+ end
14
+ filter += " \\) and not dst 127.0.0.1:*"
15
+ cmd = 'ss -t -4 -n state established ' + filter + ' | wc -l'
16
+
17
+ count = shell!(cmd).to_i - 1
18
+
19
+ event(
20
+ :service => "netstat tcp #{plugin.ports.join(', ')}",
21
+ :metric => count,
22
+ :description => "count established connects: #{count} to ports #{plugin.ports.join(', ')}"
23
+ )
24
+
25
+ end
data/examples/nfs.rb ADDED
@@ -0,0 +1,9 @@
1
+ interval 60
2
+ default[:file] = '/tmp/file'
3
+
4
+ collect do
5
+ event(
6
+ :state => system("test -f #{file.file}"),
7
+ :desc => "Check file #{file.file}"
8
+ )
9
+ end
data/examples/nginx.rb ADDED
@@ -0,0 +1,22 @@
1
+ always_start true
2
+ interval 60
3
+
4
+ default[:file] = '/etc/nginx/sites-enabled/status'
5
+ default[:url] = 'http://127.0.0.1:11311/status'
6
+ default[:nginx_status_1] = %W(accepts handled requests)
7
+ default[:nginx_status_2] = %W(reading writing waiting)
8
+
9
+ run_if do
10
+ File.exists? plugin.file
11
+ end
12
+
13
+ collect :os => "linux" do
14
+ lines = http_get(plugin.url).split("\n")
15
+ lines[2].scan(/\d+/).each_with_index do |value, index|
16
+ event(:service => "nginx #{plugin.nginx_status_1[index]}", :metric => value.to_f/interval, :diff => true)
17
+ end
18
+ event(:service => 'nginx active', :metric => lines[0].split(':')[1].strip.to_i)
19
+ lines[3].scan(/\d+/).each_with_index do |value, index|
20
+ event(:service => "nginx #{plugin.nginx_status_2[index]}", :metric => value.to_i)
21
+ end
22
+ end