kurchatov 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +18 -0
  3. data/.travis.yml +5 -0
  4. data/Gemfile +4 -0
  5. data/LICENSE.txt +22 -0
  6. data/README.md +111 -0
  7. data/Rakefile +1 -0
  8. data/Vagrantfile +16 -0
  9. data/bin/kurchatov +6 -0
  10. data/examples/check_file_contains.rb +14 -0
  11. data/examples/count_proc.rb +14 -0
  12. data/examples/cpu.rb +29 -0
  13. data/examples/disk.rb +56 -0
  14. data/examples/disk_stat.rb +28 -0
  15. data/examples/dns_check.rb +5 -0
  16. data/examples/exim.rb +12 -0
  17. data/examples/file_age.rb +11 -0
  18. data/examples/find_files.rb +21 -0
  19. data/examples/http.rb +25 -0
  20. data/examples/iptables.rb +27 -0
  21. data/examples/la.rb +10 -0
  22. data/examples/mdadm.rb +43 -0
  23. data/examples/megacli.rb +12 -0
  24. data/examples/memory.rb +28 -0
  25. data/examples/net.rb +25 -0
  26. data/examples/net_stat.rb +25 -0
  27. data/examples/nfs.rb +9 -0
  28. data/examples/nginx.rb +22 -0
  29. data/examples/nginx_500.rb +48 -0
  30. data/examples/ntp.rb +15 -0
  31. data/examples/openfiles.rb +6 -0
  32. data/examples/pgsql.rb +67 -0
  33. data/examples/ping_icmp.rb +12 -0
  34. data/examples/ping_tcp.rb +14 -0
  35. data/examples/proc_mem.rb +24 -0
  36. data/examples/process_usage.rb +15 -0
  37. data/examples/rabbitmq.rb +16 -0
  38. data/examples/runit.rb +47 -0
  39. data/examples/sidekiq.rb +21 -0
  40. data/examples/sidekiq_queue_state.rb +9 -0
  41. data/examples/status_file.rb +14 -0
  42. data/examples/tw_cli.rb +17 -0
  43. data/examples/uptime.rb +14 -0
  44. data/kurchatov.gemspec +28 -0
  45. data/lib/kurchatov/application.rb +154 -0
  46. data/lib/kurchatov/config.rb +14 -0
  47. data/lib/kurchatov/log.rb +9 -0
  48. data/lib/kurchatov/mashie.rb +152 -0
  49. data/lib/kurchatov/mixin/command.rb +31 -0
  50. data/lib/kurchatov/mixin/event.rb +63 -0
  51. data/lib/kurchatov/mixin/http.rb +21 -0
  52. data/lib/kurchatov/mixin/init.rb +6 -0
  53. data/lib/kurchatov/mixin/ohai.rb +22 -0
  54. data/lib/kurchatov/mixin/queue.rb +14 -0
  55. data/lib/kurchatov/monitor.rb +62 -0
  56. data/lib/kurchatov/plugin/config.rb +68 -0
  57. data/lib/kurchatov/plugin/dsl.rb +81 -0
  58. data/lib/kurchatov/plugin/riemann.rb +54 -0
  59. data/lib/kurchatov/plugin.rb +15 -0
  60. data/lib/kurchatov/queue.rb +28 -0
  61. data/lib/kurchatov/responders/http.rb +36 -0
  62. data/lib/kurchatov/responders/init.rb +3 -0
  63. data/lib/kurchatov/responders/riemann.rb +46 -0
  64. data/lib/kurchatov/responders/udp.rb +32 -0
  65. data/lib/kurchatov/riemann/client.rb +49 -0
  66. data/lib/kurchatov/riemann/event.rb +42 -0
  67. data/lib/kurchatov/riemann/message.rb +18 -0
  68. data/lib/kurchatov/version.rb +3 -0
  69. data/lib/kurchatov.rb +3 -0
  70. data/lib/ohai/plugins/darwin/hostname.rb +22 -0
  71. data/lib/ohai/plugins/darwin/platform.rb +38 -0
  72. data/lib/ohai/plugins/hostname.rb +27 -0
  73. data/lib/ohai/plugins/linux/hostname.rb +26 -0
  74. data/lib/ohai/plugins/linux/platform.rb +113 -0
  75. data/lib/ohai/plugins/linux/virtualization.rb +125 -0
  76. data/lib/ohai/plugins/os.rb +53 -0
  77. data/lib/ohai/plugins/platform.rb +28 -0
  78. data/lib/ohai/plugins/virtualization.rb +86 -0
  79. data/lib/ohai/plugins/windows/hostname.rb +33 -0
  80. data/lib/ohai/plugins/windows/platform.rb +27 -0
  81. data/tests/run.sh +55 -0
  82. metadata +209 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 37b3d24113a19c945cc3871050111cce3ebc8d96
4
+ data.tar.gz: 6a1ac341e371c41671f14081e2cee4f61ae2ec3b
5
+ SHA512:
6
+ metadata.gz: e106b36d33622926e71b9bc9ec1c757cef4c71221c75582001a84faeacccd7c0acb80754935b3f20c8234fed66a39ef90a2d799524599d229941a84bdd0ea918
7
+ data.tar.gz: 04d7c0362e7e7836ff88b4da38a12780615c1ff1e99c932ede53d603dd1ac1d2310d88642d802a0b5fd67ce042ce0ad8f8cb2276de26abd2d76b38860f739c3c
data/.gitignore ADDED
@@ -0,0 +1,18 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ .vendor
7
+ .vagrant
8
+ Gemfile.lock
9
+ InstalledFiles
10
+ _yardoc
11
+ coverage
12
+ doc/
13
+ lib/bundler/man
14
+ pkg
15
+ rdoc
16
+ spec/reports
17
+ tmp
18
+ dpkg
data/.travis.yml ADDED
@@ -0,0 +1,5 @@
1
+ language:
2
+ - 'ruby'
3
+ rvm:
4
+ - '1.9.3'
5
+ script: "./tests/run.sh"
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in kurchatov.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2014 Vasiliev Dmitry
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,111 @@
1
+ [![Build Status](https://travis-ci.org/vadv/kurchatov.png)](https://travis-ci.org/vadv/kurchatov)
2
+
3
+ # Kurchatov
4
+
5
+ Перед вами гем для мониторинга с помощью [riemann](http://riemann.io).
6
+ Я люблю [chef](http://www.getchef.com) и [ohai](http://docs.opscode.com/ohai.html),
7
+ поэтому здесь есть немного первого и немного второго.
8
+
9
+ Юзкейз таков:
10
+ * Kurchatov попадает в среду (окружение, приложения) которую не знает, и изучает ее с помощью ohai
11
+ * Решает какие плагины запускать
12
+ * Отсылает сообщения на riemann-хост со присвоеными статусами
13
+
14
+
15
+ ## DSL
16
+
17
+ Решено использовать dsl для написания плагинов, плагин выглядит так:
18
+ ```ruby
19
+ name "человеко читаемое имя" # по дефолту basename файла
20
+ interval 60 # с какой переодичностью будет запускаться плагин
21
+ always_start true # плагину не нужны дополнительные настройки
22
+
23
+ default[:nginx][:file] = "/etc/nginx/nginx.conf"
24
+ default[:nginx][:cmd] = "nginx -t" # дефолтные значение для Mashie: 'plugin'
25
+ default[:nginx][:url] = "http://127.0.0.1:133233/status" # данные значения смержаться со значениями
26
+ # полученными из конфига
27
+
28
+ run_if :os => 'linux' do # по умолчанию разрешено запускать все и везде
29
+ File.exists? plugin.file # plugin - не что иное как проставленые значения из default
30
+ # доступно обращение plugin[:file], plugin["file"], plugin.file
31
+ end
32
+
33
+ collect :web_some_platform => true, :os => 'linux' do # значение полученные через ohai,
34
+ # collect включиться ohai[:web_some_platform] == true и
35
+ # для ohai[:os] == 'linux'
36
+ metric = rest_get(default[:nginx][:url]).split("\n").first.split("Active connections:").last.to_i
37
+ event(
38
+ :service => "nginx active connections", # по дефолту name, если редиректим в graphite
39
+ :metric => metric, # то service будет ключем для url
40
+ :warning => 10,
41
+ :critical => 20,
42
+ :diff => true, # говорим что запоминать предыдущие значения и если разница между новым и старым
43
+ # меньше warning - получим статус 'ok', больше critical - 'critical' и так далее
44
+ # без :diff мы будем считать честные значения
45
+ # для того чтобы посчитать RPS мы просто делим метрику на interval
46
+ :description => "Что-то для человека-монитора" # допустимо сокращения :desc
47
+ )
48
+
49
+ event(
50
+ :service => "nginx test config #{plugin.file}", # сервис должен быть человекочитаемым но уникальным!
51
+ :state => shell_out("#{ohai[:nginx][:cmd]}").exitstatus == 0 # если :state == true стейт "ok", иначе - "critical"
52
+ # shell_out! - сгенерит exception и riemann уйдет сообщение об ошибке
53
+ # в плагине, также доступен просто shell() - он вернет только stdout и
54
+ # действует как shell_out!
55
+ :desc => "Ой, конфиг не валидный, наверно nginx -t его испортил :("
56
+ )
57
+
58
+ end
59
+
60
+ ```
61
+ Если плагин отправил event, это не означает что он попадает на riemann-server:
62
+ * Эвенты группируются и отсылаются асинхронно пачками (все что накопилось за `Kurchatov::Responders::Riemann::FLUSH_INTERVAL` по дефолту 0.5 секунд)
63
+ * При отсутвии метрики второй и последующий раз `:state == "ok"` не будет отсылаться
64
+
65
+ Больше примеров вы найдете [тут](https://github.com/vadv/kurchatov/tree/master/examples).
66
+
67
+ ## OHAI
68
+
69
+ И в африке ohai. Минимальный пример:
70
+ ```ruby
71
+ provides "postgres"
72
+ postgres Mash.new
73
+ cmd = "psql -U postgres -tqc 'select version()'"
74
+ status, stdout, stderr = run_command(:command => cmd)
75
+ postgres[:version] = stdout.strip
76
+ ```
77
+
78
+ ## Config
79
+
80
+ Это обычный yml-файл с настройками плагинов, eго удобно генерить chef'ом :)
81
+ ```yaml
82
+ plugin name:
83
+ settins name:
84
+ - 'bla-bla'
85
+ ```
86
+
87
+ Есть небольшая магия, для того чтобы использовать плагин как провайдер (например следить за определенными портами):
88
+ ```yaml
89
+ web watcher:
90
+ - url: http://localhost/ # создастся plugin с name == 'web watcher_0'
91
+ status: 302
92
+ - url: https://localhost/login # новый плагин name == 'web watcher_1'
93
+ status: 200
94
+ ua: Mozilla
95
+ robots txt watcher: # новый плагин name == 'robots txt watcher'
96
+ parent: web watcher
97
+ url: https://localhost/robots.txt
98
+ status: 404
99
+ ua: ^Yandex
100
+ ```
101
+
102
+ ## Почему велосипед
103
+
104
+ Удобно писать плагины, использовать 1 процесс, 1 коннект, и проч.
105
+
106
+ Мне не нравиться официальная реализация [riemann-client](https://github.com/aphyr/riemann-ruby-client),
107
+ она течет и создает много ненужных *конкретно* для меня полей для протобуфа (но все равно спасибо [aphyr](http://aphyr.com) за
108
+ прекрасный сервер :) ), так что вы тут не найдете search и udp.
109
+
110
+ Упор сделан на потребление памяти (эх, ruby), поэтому все на тредах и на данный момент на 1.9.3 вы можете получить 8Mb RES.
111
+
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
data/Vagrantfile ADDED
@@ -0,0 +1,16 @@
1
+ # -*- mode: ruby -*-
2
+ # vi: set ft=ruby :
3
+
4
+ Vagrant.configure("2") do |config|
5
+
6
+ config.vm.box_url = "http://files.vagrantup.com/precise64_vmware.box"
7
+ config.vm.box = "precise64_kurchatov_gem"
8
+ config.ssh.forward_agent = true
9
+
10
+ config.vm.provider :vmware_fusion do |vmware|
11
+ vmware.vm.vmx["memsize"] = "2048"
12
+ vmware.vm.gui = false
13
+ vmware.box_url = "http://files.vagrantup.com/precise64_vmware.box"
14
+ end
15
+
16
+ end
data/bin/kurchatov ADDED
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env ruby
2
+ # encoding: utf-8
3
+
4
+ require "kurchatov/application"
5
+
6
+ Kurchatov::Aplication.new.run
@@ -0,0 +1,14 @@
1
+ interval 60
2
+
3
+ default[:base_uri] = 'http://localhost/check'
4
+ default[:expression] = 'ERROR'
5
+ default[:contains] = false # Contains or not expression
6
+ default[:service] = "check_file_contains"
7
+
8
+ collect do
9
+ event(
10
+ :service => "#{plugin.service} #{plugin.base_uri} #{plugin.expression}",
11
+ :description => "#{plugin.base_uri} contains #{plugin.expression}",
12
+ :metric => rest_get(plugin.base_uri).include?(plugin.expression) == plugin.contains
13
+ )
14
+ end
@@ -0,0 +1,14 @@
1
+ interval 60
2
+ default[:proc] = 'ruby'
3
+
4
+ collect :os => 'linux' do
5
+ count = 0
6
+ Dir['/proc/[0-9]*/cmdline'].each { |p| count += 1 if File.read(p) =~ /#{plugin.proc}/ }
7
+ event(
8
+ :service => "count proc #{plugin.proc}",
9
+ :metric => count,
10
+ :description => "count proc #{plugin.proc}, count: #{count}",
11
+ :warning => 5,
12
+ :critical => 20
13
+ )
14
+ end
data/examples/cpu.rb ADDED
@@ -0,0 +1,29 @@
1
+ always_start true
2
+ interval 60
3
+
4
+ default[:per_process] = false
5
+
6
+ collect :os => 'linux' do
7
+ @old_cpu ||= {}
8
+ File.read('/proc/stat').each_line do |cpu_line|
9
+ cpu_number = cpu_line.scan(/cpu(\d+|\s)\s+/)
10
+ next if cpu_number.empty?
11
+ cpu_number = cpu_number[0][0] == ' ' ? '_total' : cpu_number[0][0]
12
+ cpu_line[/cpu(\d+|\s)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)/]
13
+ _, u2, n2, s2, i2 = [$1, $2, $3, $4, $5].map { |e| e.to_i }
14
+ unless @old_cpu[cpu_number].nil?
15
+ u1, n1, s1, i1 = @old_cpu[cpu_number]
16
+ used = (u2+n2+s2) - (u1+n1+s1)
17
+ total = used + i2-i1
18
+ fraction = used.to_f / total
19
+ end
20
+ @old_cpu[cpu_number] = [u2, n2, s2, i2]
21
+ service = "cpu usage cpu#{cpu_number}"
22
+ description = "Cpu#{cpu_number} usage"
23
+ if cpu_number == '_total'
24
+ event(:service => service, :metric => fraction, :desc => description, :warning => 70, :critical => 58)
25
+ else
26
+ event(:service => service, :metric => fraction, :desc => description, :state => 'ok')
27
+ end
28
+ end
29
+ end
data/examples/disk.rb ADDED
@@ -0,0 +1,56 @@
1
+ require 'sys/filesystem'
2
+
3
+ always_start true
4
+ interval 60
5
+
6
+ default[:not_monit_fs_4_size] = %w(sysfs nfs devpts squashfs proc devtmpfs)
7
+ default[:monit_fs_4_fstab] = %w(ext2 ext3 ext4 xfs tmpfs)
8
+ default[:not_monit_device_4_fstab] = %w(none)
9
+ default[:not_monit_point_4_fstab] = %w(/lib/init/rw /dev/shm /dev)
10
+ default[:check_fstab] = true
11
+
12
+ collect :os => 'linux' do
13
+
14
+ def get_monit_points_for_size
15
+ monit_points = []
16
+ File.open('/proc/mounts', 'r') do |file|
17
+ while line = file.gets
18
+ mtab = line.split(/\s+/)
19
+ monit_points << mtab[1] unless plugin.not_monit_fs_4_size.include? mtab[2]
20
+ end
21
+ end
22
+ monit_points
23
+ end
24
+
25
+ def get_monit_points_for_fstab
26
+ monit_points = []
27
+ File.open('/proc/mounts', 'r') do |file|
28
+ while line = file.gets
29
+ mtab = line.split(/\s+/)
30
+ if plugin.monit_fs_4_fstab.include?(mtab[2]) &&
31
+ !plugin.not_monit_point_4_fstab.include?(mtab[1]) &&
32
+ !plugin.not_monit_device_4_fstab.include?(mtab[0])
33
+ monit_points << mtab[1]
34
+ end
35
+ end
36
+ end
37
+ monit_points
38
+ end
39
+
40
+ get_monit_points_for_size.each do |point|
41
+ point_stat = Sys::Filesystem.stat(point)
42
+ human_point = point == '/' ? '/root' : point
43
+ human_point = human_point.gsub(/^\//, '').gsub(/\//, '_')
44
+ event(:warning => 70, :critical => 85, :service => "disk #{human_point} % block", :desc => "Disk usage #{point}, %", :metric => (1- point_stat.blocks_available.to_f/point_stat.blocks).round(2) * 100) unless point_stat.blocks == 0
45
+ event(:warning => 70, :critical => 85, :service => "disk #{human_point} % inode", :desc => "Disk usage #{point}, inodes %", :metric => (1 - point_stat.files_available.to_f/point_stat.files).round(2) * 100) unless point_stat.files == 0
46
+ event(:service => "disk #{human_point} abs free", :desc => "Disk free #{point}, B", :metric => point_stat.blocks_free * point_stat.block_size, :state => 'ok')
47
+ event(:service => "disk #{human_point} abs total", :desc => "Disk space #{point}, B", :metric => point_stat.blocks * point_stat.block_size, :state => 'ok')
48
+ end
49
+
50
+ fstab = File.read('/etc/fstab').split("\n").delete_if { |x| x.strip.match(/^#/) }
51
+ fstab = fstab.join("\n")
52
+ get_monit_points_for_fstab.each do |point|
53
+ event(:service => "disk #{point} fstab entry", :desc => "Mount point #{point} not matched in /etc/fstab", :state => 'critical') unless fstab.match(/#{point}(\s|\/\s)/)
54
+ end if plugin.check_fstab
55
+
56
+ end
@@ -0,0 +1,28 @@
1
+ always_start true
2
+ interval 60
3
+
4
+ default[:words] = [ 'reads reqs', 'reads merged', 'reads sector', 'reads time',
5
+ 'writes reqs', 'writes merged', 'writes sector', 'writes time',
6
+ 'io reqs', 'io time', 'io weighted' ]
7
+
8
+ default[:filter] = [ 'reads reqs', 'writes reqs' ]
9
+
10
+ run_if do
11
+ File.exists? '/proc/diskstats'
12
+ end
13
+
14
+ collect :os => "linux" do
15
+ f = File.read('/proc/diskstats')
16
+ f.split("\n").reject { |d| d =~ /(ram|loop)/ }.inject({}) do |_, line|
17
+ if line =~ /^(?:\s+\d+){2}\s+([\w\d]+) (.*)$/
18
+ dev = $1
19
+ values = $2.split(/\s+/).map { |str| str.to_i }
20
+ next if !!(dev.match /\d+$/ || !(dev.match =~ /^xvd/))
21
+ plugin.filter.each do |filter|
22
+ event(:service => "diskstat #{dev} #{filter}", :metric => values[plugin.words.index(filter)].to_f/interval, :diff => true)
23
+ end
24
+ iops = values[plugin.words.index('reads reqs')].to_i + values[plugin.words.index('writes reqs')].to_i
25
+ event(:service => "diskstat #{dev} iops", :metric => iops.to_f/interval, :diff => true)
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,5 @@
1
+ collect do
2
+ event(
3
+ :state => Resolv::DNS.new.getresources(ohai[:fqdn], Resolv::DNS::Resource::IN::A).count == 1,
4
+ :desc => "Check resolv self FQDN")
5
+ end
data/examples/exim.rb ADDED
@@ -0,0 +1,12 @@
1
+ always_start true
2
+ interval 60
3
+ default[:exim] = '/usr/sbin/exim'
4
+
5
+ run_if do
6
+ File.exists? plugin.exim
7
+ end
8
+
9
+ collect do
10
+ event(:service => 'exim', :metric => shell(plugin.exim, ' -bpc').to_i,
11
+ :desc => 'Exim: count frozen mails', :warning => 5, :critical => 20)
12
+ end
@@ -0,0 +1,11 @@
1
+ default[:file] = '/notexists'
2
+
3
+ collect do
4
+ event(
5
+ :service => "#{name} #{plugin.file}",
6
+ :metric => (unixnow - File.stat(plugin.file).mtime.to_i).abs.to_f/60,
7
+ :description => "File #{plugin.file} age",
8
+ :warning => 1,
9
+ :critical => 5
10
+ )
11
+ end
@@ -0,0 +1,21 @@
1
+ interval 60
2
+
3
+ warning 5
4
+
5
+ default[:file_mask] = '.*'
6
+ default[:dir] = '/tmp/dir'
7
+ default[:age] = 24 * 60 * 60
8
+
9
+ collect do
10
+ if File.directory?(plugin.dir)
11
+ count_files = 0
12
+ file_mask = Regexp.new(plugin.file_mask)
13
+ Find.find(plugin.dir).each do |file|
14
+ next unless File.file? file
15
+ next unless file_mask.match file
16
+ next unless Time.now.to_i - plugin.age > File.new(file).mtime.to_i
17
+ count_files += 1
18
+ end
19
+ event(:service => "find files #{plugin.dir}", :metric => count_files, :description => "Count files in #{plugin.dir}")
20
+ end
21
+ end
data/examples/http.rb ADDED
@@ -0,0 +1,25 @@
1
+ interval 60
2
+
3
+ default[:http_code] = 200
4
+ default[:http_method] = 'GET'
5
+ default[:connect_timeout] = 5
6
+ default[:retry] = 0
7
+ default[:retry_delay] = 0
8
+ default[:max_time] = 10
9
+ default[:insecure] = false
10
+ default[:url] = 'http://127.0.0.1:80'
11
+ default[:service] = 'http check'
12
+
13
+ collect do
14
+
15
+ @cmd ||= begin
16
+ "curl -X#{plugin.http_method} -s --connect-timeout #{plugin.connect_timeout}" +
17
+ " #{'--insecure' if plugin.insecure} " +
18
+ " -w '%{http_code}\\n' --retry #{plugin.retry} --retry-delay #{plugin.retry_delay}" +
19
+ " --max-time #{plugin.max_time} --fail #{plugin.url} -o /dev/null"
20
+ end
21
+
22
+ out = shell_out(@cmd).stdout.to_i
23
+ event(:service => plugin.service, :metric => out, :description => "http code: #{out}", :state => out == plugin.http_code)
24
+
25
+ end
@@ -0,0 +1,27 @@
1
+ interval 60
2
+ default[:rule_file] = '/etc/network/iptables'
3
+ always_start true
4
+
5
+ run_if do
6
+ File.exists? plugin.rule_file
7
+ end
8
+
9
+ collect do
10
+
11
+ def delete_counters(str)
12
+ str.gsub(/\[\d+\:\d+\]/, '').strip
13
+ end
14
+
15
+ current_rules = shell_out!('iptables-save').stdout.split("\n").map do |x|
16
+ x[0] == '#' ? nil : delete_counters(x)
17
+ end.compact.join("\n")
18
+ saved_rules = File.read(plugin.rules_file).split("\n").map do |x|
19
+ x[0] == '#' ? nil : delete_counters(x) # delete counters and comments
20
+ end.compact.join("\n")
21
+
22
+ event(
23
+ :service => "iptables #{plugin.rule_file}",
24
+ :state => current_rules == saved_rules,
25
+ :description => "iptables rules different between file: #{plugin.rule_file} and iptables-save"
26
+ )
27
+ end
data/examples/la.rb ADDED
@@ -0,0 +1,10 @@
1
+ always_start true
2
+ interval 60
3
+
4
+ collect :os => "linux" do
5
+ event(
6
+ :metric => File.read('/proc/loadavg').scan(/[\d\.]+/)[0].to_f,
7
+ :desc => 'LA averaged over 1 minute',
8
+ :service => 'la la_1'
9
+ )
10
+ end
data/examples/mdadm.rb ADDED
@@ -0,0 +1,43 @@
1
+ interval 60
2
+
3
+ run_if do
4
+ File.exists? '/proc/mdstat'
5
+ end
6
+
7
+ collect do
8
+
9
+ def rm_bracket(text)
10
+ text.gsub('[', '').gsub(']', '')
11
+ end
12
+
13
+ def status_well?(text)
14
+ text.gsub(/U/, '').empty?
15
+ end
16
+
17
+ def get_failed_parts (device)
18
+ begin
19
+ failed_parts = []
20
+ Dir["/sys/block/#{device}/md/dev-*"].each do |p|
21
+ state = File.read("#{p}/state").strip
22
+ next unless state != 'in_sync'
23
+ p.gsub!(/.+\/dev-/, '')
24
+ failed_parts << "#{p} (#{state})"
25
+ end
26
+ failed_parts.join(', ')
27
+ rescue
28
+ nil
29
+ end
30
+ end
31
+
32
+
33
+ mdstat = File.read('/proc/mdstat').split("\n")
34
+ mdstat.each_with_index do |line, index|
35
+ next unless line.include?('blocks')
36
+ device = file[index-1].split(':')[0].strip
37
+ mdstatus = rm_bracket(line.split(' ').last) # UUU
38
+ next if status_well?(mdstatus) # пропускаем все збс
39
+ next if mdstatus == plugin[states][device].to_s # disabled in config
40
+ event(:service => "mdadm #{device}", :state => 'critical', :desc => "mdadm failed device #{device}: #{get_failed_parts(device)}")
41
+ end
42
+
43
+ end
@@ -0,0 +1,12 @@
1
+ interval 180
2
+ always_start true
3
+
4
+ default[:cmd] = 'megacli -AdpAllInfo -aAll -NoLog | awk -F": " \'/Virtual Drives/ { getline; print $2; }\''
5
+
6
+ run_if do
7
+ File.exists? '/usr/bin/megacli'
8
+ end
9
+
10
+ collect do
11
+ event(:metric => shell(settings.cmd).to_i > 0, :description => 'MegaCli status')
12
+ end
@@ -0,0 +1,28 @@
1
+ interval 60
2
+ always_start true
3
+
4
+ collect :os => 'linux' do
5
+ m = File.read('/proc/meminfo').split(/\n/).inject({}) do |info, line|
6
+ x = line.split(/:?\s+/)
7
+ info[x[0]] = x[1].to_i
8
+ info
9
+ end
10
+
11
+ free = m['MemFree'].to_i * 1024
12
+ cached = m['Cached'].to_i * 1024
13
+ buffers = m['Buffers'].to_i * 1024
14
+ total = m['MemTotal'].to_i * 1024
15
+ used = total - free
16
+ free_bc = free + buffers + cached
17
+ fraction = 1 - (free_bc.to_f / total)
18
+ swap_fraction = m['SwapTotal'] == 0 ? 0 : 1 - m['SwapFree'].to_f/m['SwapTotal']
19
+
20
+ event(:service => 'memory % free', :desc => 'Memory usage, %', :metric => fraction.round(2) * 100, :critical => 85, :warning => 75)
21
+ event(:service => 'memory % swap', :desc => 'Swap usage, %', :metric => swap_fraction.round(2) * 100, :critical => 85, :warning => 75)
22
+ event(:service => 'memory abs free', :desc => 'Memory free (kB)', :metric => free, :state => 'ok')
23
+ event(:service => 'memory abs total', :desc => 'Memory total (kB)', :metric => total, :state => 'ok')
24
+ event(:service => 'memory abs cached', :desc => 'Memory usage, cached (kB)', :metric => cached, :state => 'ok')
25
+ event(:service => 'memory abs buffers', :desc => 'Memory usage, buffers (kB)', :metric => buffers, :state => 'ok')
26
+ event(:service => 'memory abs used', :desc => 'Memory usage, used (kB)', :metric => used, :state => 'ok')
27
+ event(:service => 'memory abs free_bc', :desc => 'Memory usage with cache and buffers (kB)', :metric => free_bc, :state => 'ok')
28
+ end
data/examples/net.rb ADDED
@@ -0,0 +1,25 @@
1
+ interval 60
2
+ always_start true
3
+
4
+ default[:include_alias] = false
5
+ default[:filter] = [ 'rx bytes', 'rx errs', 'rx drop', 'tx bytes', 'tx errs', 'tx drop' ]
6
+ default[:words] = [ 'rx bytes', 'rx packets', 'rx errs', 'rx drop', 'rx fifo', 'rx frame',
7
+ 'rx compressed', 'rx multicast', 'tx bytes', 'tx packets', 'tx drops',
8
+ 'tx fifo', 'tx colls', 'tx carrier', 'tx compressed' ]
9
+
10
+ collect :os => "linux" do
11
+ File.read('/proc/net/dev').each_line do |line|
12
+ iface = line.split(':')[0].strip
13
+ iface.gsub!(/\./, '_')
14
+ next if (iface =~ /\./ && !plugin.include_alias)
15
+ next unless line =~ /(\w*)\:\s*([\s\d]+)\s*/
16
+ plugin.words.map do |service|
17
+ service
18
+ end.zip(
19
+ $2.split(/\s+/).map { |str| str.to_i }
20
+ ).each do |service, value|
21
+ next unless plugin.filter.include? service
22
+ event(:service => "net #{iface} #{service}", :metric => value.to_f/interval, :diff => true)
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,25 @@
1
+ interval 5
2
+ default[:ports] = [80, 3994]
3
+
4
+ collect do
5
+
6
+ filter = nil
7
+ plugin.ports.each do |port|
8
+ if filter == nil
9
+ filter = "\\( src *:#{port}"
10
+ else
11
+ filter += " or src *:#{port}"
12
+ end
13
+ end
14
+ filter += " \\) and not dst 127.0.0.1:*"
15
+ cmd = 'ss -t -4 -n state established ' + filter + ' | wc -l'
16
+
17
+ count = shell!(cmd).to_i - 1
18
+
19
+ event(
20
+ :service => "netstat tcp #{plugin.ports.join(', ')}",
21
+ :metric => count,
22
+ :description => "count established connects: #{count} to ports #{plugin.ports.join(', ')}"
23
+ )
24
+
25
+ end
data/examples/nfs.rb ADDED
@@ -0,0 +1,9 @@
1
+ interval 60
2
+ default[:file] = '/tmp/file'
3
+
4
+ collect do
5
+ event(
6
+ :state => system("test -f #{file.file}"),
7
+ :desc => "Check file #{file.file}"
8
+ )
9
+ end
data/examples/nginx.rb ADDED
@@ -0,0 +1,22 @@
1
+ always_start true
2
+ interval 60
3
+
4
+ default[:file] = '/etc/nginx/sites-enabled/status'
5
+ default[:url] = 'http://127.0.0.1:11311/status'
6
+ default[:nginx_status_1] = %W(accepts handled requests)
7
+ default[:nginx_status_2] = %W(reading writing waiting)
8
+
9
+ run_if do
10
+ File.exists? plugin.file
11
+ end
12
+
13
+ collect :os => "linux" do
14
+ lines = http_get(plugin.url).split("\n")
15
+ lines[2].scan(/\d+/).each_with_index do |value, index|
16
+ event(:service => "nginx #{plugin.nginx_status_1[index]}", :metric => value.to_f/interval, :diff => true)
17
+ end
18
+ event(:service => 'nginx active', :metric => lines[0].split(':')[1].strip.to_i)
19
+ lines[3].scan(/\d+/).each_with_index do |value, index|
20
+ event(:service => "nginx #{plugin.nginx_status_2[index]}", :metric => value.to_i)
21
+ end
22
+ end