kurchatov 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +18 -0
- data/.travis.yml +5 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +111 -0
- data/Rakefile +1 -0
- data/Vagrantfile +16 -0
- data/bin/kurchatov +6 -0
- data/examples/check_file_contains.rb +14 -0
- data/examples/count_proc.rb +14 -0
- data/examples/cpu.rb +29 -0
- data/examples/disk.rb +56 -0
- data/examples/disk_stat.rb +28 -0
- data/examples/dns_check.rb +5 -0
- data/examples/exim.rb +12 -0
- data/examples/file_age.rb +11 -0
- data/examples/find_files.rb +21 -0
- data/examples/http.rb +25 -0
- data/examples/iptables.rb +27 -0
- data/examples/la.rb +10 -0
- data/examples/mdadm.rb +43 -0
- data/examples/megacli.rb +12 -0
- data/examples/memory.rb +28 -0
- data/examples/net.rb +25 -0
- data/examples/net_stat.rb +25 -0
- data/examples/nfs.rb +9 -0
- data/examples/nginx.rb +22 -0
- data/examples/nginx_500.rb +48 -0
- data/examples/ntp.rb +15 -0
- data/examples/openfiles.rb +6 -0
- data/examples/pgsql.rb +67 -0
- data/examples/ping_icmp.rb +12 -0
- data/examples/ping_tcp.rb +14 -0
- data/examples/proc_mem.rb +24 -0
- data/examples/process_usage.rb +15 -0
- data/examples/rabbitmq.rb +16 -0
- data/examples/runit.rb +47 -0
- data/examples/sidekiq.rb +21 -0
- data/examples/sidekiq_queue_state.rb +9 -0
- data/examples/status_file.rb +14 -0
- data/examples/tw_cli.rb +17 -0
- data/examples/uptime.rb +14 -0
- data/kurchatov.gemspec +28 -0
- data/lib/kurchatov/application.rb +154 -0
- data/lib/kurchatov/config.rb +14 -0
- data/lib/kurchatov/log.rb +9 -0
- data/lib/kurchatov/mashie.rb +152 -0
- data/lib/kurchatov/mixin/command.rb +31 -0
- data/lib/kurchatov/mixin/event.rb +63 -0
- data/lib/kurchatov/mixin/http.rb +21 -0
- data/lib/kurchatov/mixin/init.rb +6 -0
- data/lib/kurchatov/mixin/ohai.rb +22 -0
- data/lib/kurchatov/mixin/queue.rb +14 -0
- data/lib/kurchatov/monitor.rb +62 -0
- data/lib/kurchatov/plugin/config.rb +68 -0
- data/lib/kurchatov/plugin/dsl.rb +81 -0
- data/lib/kurchatov/plugin/riemann.rb +54 -0
- data/lib/kurchatov/plugin.rb +15 -0
- data/lib/kurchatov/queue.rb +28 -0
- data/lib/kurchatov/responders/http.rb +36 -0
- data/lib/kurchatov/responders/init.rb +3 -0
- data/lib/kurchatov/responders/riemann.rb +46 -0
- data/lib/kurchatov/responders/udp.rb +32 -0
- data/lib/kurchatov/riemann/client.rb +49 -0
- data/lib/kurchatov/riemann/event.rb +42 -0
- data/lib/kurchatov/riemann/message.rb +18 -0
- data/lib/kurchatov/version.rb +3 -0
- data/lib/kurchatov.rb +3 -0
- data/lib/ohai/plugins/darwin/hostname.rb +22 -0
- data/lib/ohai/plugins/darwin/platform.rb +38 -0
- data/lib/ohai/plugins/hostname.rb +27 -0
- data/lib/ohai/plugins/linux/hostname.rb +26 -0
- data/lib/ohai/plugins/linux/platform.rb +113 -0
- data/lib/ohai/plugins/linux/virtualization.rb +125 -0
- data/lib/ohai/plugins/os.rb +53 -0
- data/lib/ohai/plugins/platform.rb +28 -0
- data/lib/ohai/plugins/virtualization.rb +86 -0
- data/lib/ohai/plugins/windows/hostname.rb +33 -0
- data/lib/ohai/plugins/windows/platform.rb +27 -0
- data/tests/run.sh +55 -0
- metadata +209 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 37b3d24113a19c945cc3871050111cce3ebc8d96
|
4
|
+
data.tar.gz: 6a1ac341e371c41671f14081e2cee4f61ae2ec3b
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: e106b36d33622926e71b9bc9ec1c757cef4c71221c75582001a84faeacccd7c0acb80754935b3f20c8234fed66a39ef90a2d799524599d229941a84bdd0ea918
|
7
|
+
data.tar.gz: 04d7c0362e7e7836ff88b4da38a12780615c1ff1e99c932ede53d603dd1ac1d2310d88642d802a0b5fd67ce042ce0ad8f8cb2276de26abd2d76b38860f739c3c
|
data/.gitignore
ADDED
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2014 Vasiliev Dmitry
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,111 @@
|
|
1
|
+
[![Build Status](https://travis-ci.org/vadv/kurchatov.png)](https://travis-ci.org/vadv/kurchatov)
|
2
|
+
|
3
|
+
# Kurchatov
|
4
|
+
|
5
|
+
Перед вами гем для мониторинга с помощью [riemann](http://riemann.io).
|
6
|
+
Я люблю [chef](http://www.getchef.com) и [ohai](http://docs.opscode.com/ohai.html),
|
7
|
+
поэтому здесь есть немного первого и немного второго.
|
8
|
+
|
9
|
+
Юзкейз таков:
|
10
|
+
* Kurchatov попадает в среду (окружение, приложения) которую не знает, и изучает ее с помощью ohai
|
11
|
+
* Решает какие плагины запускать
|
12
|
+
* Отсылает сообщения на riemann-хост со присвоеными статусами
|
13
|
+
|
14
|
+
|
15
|
+
## DSL
|
16
|
+
|
17
|
+
Решено использовать dsl для написания плагинов, плагин выглядит так:
|
18
|
+
```ruby
|
19
|
+
name "человеко читаемое имя" # по дефолту basename файла
|
20
|
+
interval 60 # с какой переодичностью будет запускаться плагин
|
21
|
+
always_start true # плагину не нужны дополнительные настройки
|
22
|
+
|
23
|
+
default[:nginx][:file] = "/etc/nginx/nginx.conf"
|
24
|
+
default[:nginx][:cmd] = "nginx -t" # дефолтные значение для Mashie: 'plugin'
|
25
|
+
default[:nginx][:url] = "http://127.0.0.1:133233/status" # данные значения смержаться со значениями
|
26
|
+
# полученными из конфига
|
27
|
+
|
28
|
+
run_if :os => 'linux' do # по умолчанию разрешено запускать все и везде
|
29
|
+
File.exists? plugin.file # plugin - не что иное как проставленые значения из default
|
30
|
+
# доступно обращение plugin[:file], plugin["file"], plugin.file
|
31
|
+
end
|
32
|
+
|
33
|
+
collect :web_some_platform => true, :os => 'linux' do # значение полученные через ohai,
|
34
|
+
# collect включиться ohai[:web_some_platform] == true и
|
35
|
+
# для ohai[:os] == 'linux'
|
36
|
+
metric = rest_get(default[:nginx][:url]).split("\n").first.split("Active connections:").last.to_i
|
37
|
+
event(
|
38
|
+
:service => "nginx active connections", # по дефолту name, если редиректим в graphite
|
39
|
+
:metric => metric, # то service будет ключем для url
|
40
|
+
:warning => 10,
|
41
|
+
:critical => 20,
|
42
|
+
:diff => true, # говорим что запоминать предыдущие значения и если разница между новым и старым
|
43
|
+
# меньше warning - получим статус 'ok', больше critical - 'critical' и так далее
|
44
|
+
# без :diff мы будем считать честные значения
|
45
|
+
# для того чтобы посчитать RPS мы просто делим метрику на interval
|
46
|
+
:description => "Что-то для человека-монитора" # допустимо сокращения :desc
|
47
|
+
)
|
48
|
+
|
49
|
+
event(
|
50
|
+
:service => "nginx test config #{plugin.file}", # сервис должен быть человекочитаемым но уникальным!
|
51
|
+
:state => shell_out("#{ohai[:nginx][:cmd]}").exitstatus == 0 # если :state == true стейт "ok", иначе - "critical"
|
52
|
+
# shell_out! - сгенерит exception и riemann уйдет сообщение об ошибке
|
53
|
+
# в плагине, также доступен просто shell() - он вернет только stdout и
|
54
|
+
# действует как shell_out!
|
55
|
+
:desc => "Ой, конфиг не валидный, наверно nginx -t его испортил :("
|
56
|
+
)
|
57
|
+
|
58
|
+
end
|
59
|
+
|
60
|
+
```
|
61
|
+
Если плагин отправил event, это не означает что он попадает на riemann-server:
|
62
|
+
* Эвенты группируются и отсылаются асинхронно пачками (все что накопилось за `Kurchatov::Responders::Riemann::FLUSH_INTERVAL` по дефолту 0.5 секунд)
|
63
|
+
* При отсутвии метрики второй и последующий раз `:state == "ok"` не будет отсылаться
|
64
|
+
|
65
|
+
Больше примеров вы найдете [тут](https://github.com/vadv/kurchatov/tree/master/examples).
|
66
|
+
|
67
|
+
## OHAI
|
68
|
+
|
69
|
+
И в африке ohai. Минимальный пример:
|
70
|
+
```ruby
|
71
|
+
provides "postgres"
|
72
|
+
postgres Mash.new
|
73
|
+
cmd = "psql -U postgres -tqc 'select version()'"
|
74
|
+
status, stdout, stderr = run_command(:command => cmd)
|
75
|
+
postgres[:version] = stdout.strip
|
76
|
+
```
|
77
|
+
|
78
|
+
## Config
|
79
|
+
|
80
|
+
Это обычный yml-файл с настройками плагинов, eго удобно генерить chef'ом :)
|
81
|
+
```yaml
|
82
|
+
plugin name:
|
83
|
+
settins name:
|
84
|
+
- 'bla-bla'
|
85
|
+
```
|
86
|
+
|
87
|
+
Есть небольшая магия, для того чтобы использовать плагин как провайдер (например следить за определенными портами):
|
88
|
+
```yaml
|
89
|
+
web watcher:
|
90
|
+
- url: http://localhost/ # создастся plugin с name == 'web watcher_0'
|
91
|
+
status: 302
|
92
|
+
- url: https://localhost/login # новый плагин name == 'web watcher_1'
|
93
|
+
status: 200
|
94
|
+
ua: Mozilla
|
95
|
+
robots txt watcher: # новый плагин name == 'robots txt watcher'
|
96
|
+
parent: web watcher
|
97
|
+
url: https://localhost/robots.txt
|
98
|
+
status: 404
|
99
|
+
ua: ^Yandex
|
100
|
+
```
|
101
|
+
|
102
|
+
## Почему велосипед
|
103
|
+
|
104
|
+
Удобно писать плагины, использовать 1 процесс, 1 коннект, и проч.
|
105
|
+
|
106
|
+
Мне не нравиться официальная реализация [riemann-client](https://github.com/aphyr/riemann-ruby-client),
|
107
|
+
она течет и создает много ненужных *конкретно* для меня полей для протобуфа (но все равно спасибо [aphyr](http://aphyr.com) за
|
108
|
+
прекрасный сервер :) ), так что вы тут не найдете search и udp.
|
109
|
+
|
110
|
+
Упор сделан на потребление памяти (эх, ruby), поэтому все на тредах и на данный момент на 1.9.3 вы можете получить 8Mb RES.
|
111
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
data/Vagrantfile
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
# -*- mode: ruby -*-
|
2
|
+
# vi: set ft=ruby :
|
3
|
+
|
4
|
+
Vagrant.configure("2") do |config|
|
5
|
+
|
6
|
+
config.vm.box_url = "http://files.vagrantup.com/precise64_vmware.box"
|
7
|
+
config.vm.box = "precise64_kurchatov_gem"
|
8
|
+
config.ssh.forward_agent = true
|
9
|
+
|
10
|
+
config.vm.provider :vmware_fusion do |vmware|
|
11
|
+
vmware.vm.vmx["memsize"] = "2048"
|
12
|
+
vmware.vm.gui = false
|
13
|
+
vmware.box_url = "http://files.vagrantup.com/precise64_vmware.box"
|
14
|
+
end
|
15
|
+
|
16
|
+
end
|
data/bin/kurchatov
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
interval 60
|
2
|
+
|
3
|
+
default[:base_uri] = 'http://localhost/check'
|
4
|
+
default[:expression] = 'ERROR'
|
5
|
+
default[:contains] = false # Contains or not expression
|
6
|
+
default[:service] = "check_file_contains"
|
7
|
+
|
8
|
+
collect do
|
9
|
+
event(
|
10
|
+
:service => "#{plugin.service} #{plugin.base_uri} #{plugin.expression}",
|
11
|
+
:description => "#{plugin.base_uri} contains #{plugin.expression}",
|
12
|
+
:metric => rest_get(plugin.base_uri).include?(plugin.expression) == plugin.contains
|
13
|
+
)
|
14
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
interval 60
|
2
|
+
default[:proc] = 'ruby'
|
3
|
+
|
4
|
+
collect :os => 'linux' do
|
5
|
+
count = 0
|
6
|
+
Dir['/proc/[0-9]*/cmdline'].each { |p| count += 1 if File.read(p) =~ /#{plugin.proc}/ }
|
7
|
+
event(
|
8
|
+
:service => "count proc #{plugin.proc}",
|
9
|
+
:metric => count,
|
10
|
+
:description => "count proc #{plugin.proc}, count: #{count}",
|
11
|
+
:warning => 5,
|
12
|
+
:critical => 20
|
13
|
+
)
|
14
|
+
end
|
data/examples/cpu.rb
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
always_start true
|
2
|
+
interval 60
|
3
|
+
|
4
|
+
default[:per_process] = false
|
5
|
+
|
6
|
+
collect :os => 'linux' do
|
7
|
+
@old_cpu ||= {}
|
8
|
+
File.read('/proc/stat').each_line do |cpu_line|
|
9
|
+
cpu_number = cpu_line.scan(/cpu(\d+|\s)\s+/)
|
10
|
+
next if cpu_number.empty?
|
11
|
+
cpu_number = cpu_number[0][0] == ' ' ? '_total' : cpu_number[0][0]
|
12
|
+
cpu_line[/cpu(\d+|\s)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)/]
|
13
|
+
_, u2, n2, s2, i2 = [$1, $2, $3, $4, $5].map { |e| e.to_i }
|
14
|
+
unless @old_cpu[cpu_number].nil?
|
15
|
+
u1, n1, s1, i1 = @old_cpu[cpu_number]
|
16
|
+
used = (u2+n2+s2) - (u1+n1+s1)
|
17
|
+
total = used + i2-i1
|
18
|
+
fraction = used.to_f / total
|
19
|
+
end
|
20
|
+
@old_cpu[cpu_number] = [u2, n2, s2, i2]
|
21
|
+
service = "cpu usage cpu#{cpu_number}"
|
22
|
+
description = "Cpu#{cpu_number} usage"
|
23
|
+
if cpu_number == '_total'
|
24
|
+
event(:service => service, :metric => fraction, :desc => description, :warning => 70, :critical => 58)
|
25
|
+
else
|
26
|
+
event(:service => service, :metric => fraction, :desc => description, :state => 'ok')
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
data/examples/disk.rb
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
require 'sys/filesystem'
|
2
|
+
|
3
|
+
always_start true
|
4
|
+
interval 60
|
5
|
+
|
6
|
+
default[:not_monit_fs_4_size] = %w(sysfs nfs devpts squashfs proc devtmpfs)
|
7
|
+
default[:monit_fs_4_fstab] = %w(ext2 ext3 ext4 xfs tmpfs)
|
8
|
+
default[:not_monit_device_4_fstab] = %w(none)
|
9
|
+
default[:not_monit_point_4_fstab] = %w(/lib/init/rw /dev/shm /dev)
|
10
|
+
default[:check_fstab] = true
|
11
|
+
|
12
|
+
collect :os => 'linux' do
|
13
|
+
|
14
|
+
def get_monit_points_for_size
|
15
|
+
monit_points = []
|
16
|
+
File.open('/proc/mounts', 'r') do |file|
|
17
|
+
while line = file.gets
|
18
|
+
mtab = line.split(/\s+/)
|
19
|
+
monit_points << mtab[1] unless plugin.not_monit_fs_4_size.include? mtab[2]
|
20
|
+
end
|
21
|
+
end
|
22
|
+
monit_points
|
23
|
+
end
|
24
|
+
|
25
|
+
def get_monit_points_for_fstab
|
26
|
+
monit_points = []
|
27
|
+
File.open('/proc/mounts', 'r') do |file|
|
28
|
+
while line = file.gets
|
29
|
+
mtab = line.split(/\s+/)
|
30
|
+
if plugin.monit_fs_4_fstab.include?(mtab[2]) &&
|
31
|
+
!plugin.not_monit_point_4_fstab.include?(mtab[1]) &&
|
32
|
+
!plugin.not_monit_device_4_fstab.include?(mtab[0])
|
33
|
+
monit_points << mtab[1]
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
monit_points
|
38
|
+
end
|
39
|
+
|
40
|
+
get_monit_points_for_size.each do |point|
|
41
|
+
point_stat = Sys::Filesystem.stat(point)
|
42
|
+
human_point = point == '/' ? '/root' : point
|
43
|
+
human_point = human_point.gsub(/^\//, '').gsub(/\//, '_')
|
44
|
+
event(:warning => 70, :critical => 85, :service => "disk #{human_point} % block", :desc => "Disk usage #{point}, %", :metric => (1- point_stat.blocks_available.to_f/point_stat.blocks).round(2) * 100) unless point_stat.blocks == 0
|
45
|
+
event(:warning => 70, :critical => 85, :service => "disk #{human_point} % inode", :desc => "Disk usage #{point}, inodes %", :metric => (1 - point_stat.files_available.to_f/point_stat.files).round(2) * 100) unless point_stat.files == 0
|
46
|
+
event(:service => "disk #{human_point} abs free", :desc => "Disk free #{point}, B", :metric => point_stat.blocks_free * point_stat.block_size, :state => 'ok')
|
47
|
+
event(:service => "disk #{human_point} abs total", :desc => "Disk space #{point}, B", :metric => point_stat.blocks * point_stat.block_size, :state => 'ok')
|
48
|
+
end
|
49
|
+
|
50
|
+
fstab = File.read('/etc/fstab').split("\n").delete_if { |x| x.strip.match(/^#/) }
|
51
|
+
fstab = fstab.join("\n")
|
52
|
+
get_monit_points_for_fstab.each do |point|
|
53
|
+
event(:service => "disk #{point} fstab entry", :desc => "Mount point #{point} not matched in /etc/fstab", :state => 'critical') unless fstab.match(/#{point}(\s|\/\s)/)
|
54
|
+
end if plugin.check_fstab
|
55
|
+
|
56
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
always_start true
|
2
|
+
interval 60
|
3
|
+
|
4
|
+
default[:words] = [ 'reads reqs', 'reads merged', 'reads sector', 'reads time',
|
5
|
+
'writes reqs', 'writes merged', 'writes sector', 'writes time',
|
6
|
+
'io reqs', 'io time', 'io weighted' ]
|
7
|
+
|
8
|
+
default[:filter] = [ 'reads reqs', 'writes reqs' ]
|
9
|
+
|
10
|
+
run_if do
|
11
|
+
File.exists? '/proc/diskstats'
|
12
|
+
end
|
13
|
+
|
14
|
+
collect :os => "linux" do
|
15
|
+
f = File.read('/proc/diskstats')
|
16
|
+
f.split("\n").reject { |d| d =~ /(ram|loop)/ }.inject({}) do |_, line|
|
17
|
+
if line =~ /^(?:\s+\d+){2}\s+([\w\d]+) (.*)$/
|
18
|
+
dev = $1
|
19
|
+
values = $2.split(/\s+/).map { |str| str.to_i }
|
20
|
+
next if !!(dev.match /\d+$/ || !(dev.match =~ /^xvd/))
|
21
|
+
plugin.filter.each do |filter|
|
22
|
+
event(:service => "diskstat #{dev} #{filter}", :metric => values[plugin.words.index(filter)].to_f/interval, :diff => true)
|
23
|
+
end
|
24
|
+
iops = values[plugin.words.index('reads reqs')].to_i + values[plugin.words.index('writes reqs')].to_i
|
25
|
+
event(:service => "diskstat #{dev} iops", :metric => iops.to_f/interval, :diff => true)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
data/examples/exim.rb
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
always_start true
|
2
|
+
interval 60
|
3
|
+
default[:exim] = '/usr/sbin/exim'
|
4
|
+
|
5
|
+
run_if do
|
6
|
+
File.exists? plugin.exim
|
7
|
+
end
|
8
|
+
|
9
|
+
collect do
|
10
|
+
event(:service => 'exim', :metric => shell(plugin.exim, ' -bpc').to_i,
|
11
|
+
:desc => 'Exim: count frozen mails', :warning => 5, :critical => 20)
|
12
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
interval 60
|
2
|
+
|
3
|
+
warning 5
|
4
|
+
|
5
|
+
default[:file_mask] = '.*'
|
6
|
+
default[:dir] = '/tmp/dir'
|
7
|
+
default[:age] = 24 * 60 * 60
|
8
|
+
|
9
|
+
collect do
|
10
|
+
if File.directory?(plugin.dir)
|
11
|
+
count_files = 0
|
12
|
+
file_mask = Regexp.new(plugin.file_mask)
|
13
|
+
Find.find(plugin.dir).each do |file|
|
14
|
+
next unless File.file? file
|
15
|
+
next unless file_mask.match file
|
16
|
+
next unless Time.now.to_i - plugin.age > File.new(file).mtime.to_i
|
17
|
+
count_files += 1
|
18
|
+
end
|
19
|
+
event(:service => "find files #{plugin.dir}", :metric => count_files, :description => "Count files in #{plugin.dir}")
|
20
|
+
end
|
21
|
+
end
|
data/examples/http.rb
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
interval 60
|
2
|
+
|
3
|
+
default[:http_code] = 200
|
4
|
+
default[:http_method] = 'GET'
|
5
|
+
default[:connect_timeout] = 5
|
6
|
+
default[:retry] = 0
|
7
|
+
default[:retry_delay] = 0
|
8
|
+
default[:max_time] = 10
|
9
|
+
default[:insecure] = false
|
10
|
+
default[:url] = 'http://127.0.0.1:80'
|
11
|
+
default[:service] = 'http check'
|
12
|
+
|
13
|
+
collect do
|
14
|
+
|
15
|
+
@cmd ||= begin
|
16
|
+
"curl -X#{plugin.http_method} -s --connect-timeout #{plugin.connect_timeout}" +
|
17
|
+
" #{'--insecure' if plugin.insecure} " +
|
18
|
+
" -w '%{http_code}\\n' --retry #{plugin.retry} --retry-delay #{plugin.retry_delay}" +
|
19
|
+
" --max-time #{plugin.max_time} --fail #{plugin.url} -o /dev/null"
|
20
|
+
end
|
21
|
+
|
22
|
+
out = shell_out(@cmd).stdout.to_i
|
23
|
+
event(:service => plugin.service, :metric => out, :description => "http code: #{out}", :state => out == plugin.http_code)
|
24
|
+
|
25
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
interval 60
|
2
|
+
default[:rule_file] = '/etc/network/iptables'
|
3
|
+
always_start true
|
4
|
+
|
5
|
+
run_if do
|
6
|
+
File.exists? plugin.rule_file
|
7
|
+
end
|
8
|
+
|
9
|
+
collect do
|
10
|
+
|
11
|
+
def delete_counters(str)
|
12
|
+
str.gsub(/\[\d+\:\d+\]/, '').strip
|
13
|
+
end
|
14
|
+
|
15
|
+
current_rules = shell_out!('iptables-save').stdout.split("\n").map do |x|
|
16
|
+
x[0] == '#' ? nil : delete_counters(x)
|
17
|
+
end.compact.join("\n")
|
18
|
+
saved_rules = File.read(plugin.rules_file).split("\n").map do |x|
|
19
|
+
x[0] == '#' ? nil : delete_counters(x) # delete counters and comments
|
20
|
+
end.compact.join("\n")
|
21
|
+
|
22
|
+
event(
|
23
|
+
:service => "iptables #{plugin.rule_file}",
|
24
|
+
:state => current_rules == saved_rules,
|
25
|
+
:description => "iptables rules different between file: #{plugin.rule_file} and iptables-save"
|
26
|
+
)
|
27
|
+
end
|
data/examples/la.rb
ADDED
data/examples/mdadm.rb
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
interval 60
|
2
|
+
|
3
|
+
run_if do
|
4
|
+
File.exists? '/proc/mdstat'
|
5
|
+
end
|
6
|
+
|
7
|
+
collect do
|
8
|
+
|
9
|
+
def rm_bracket(text)
|
10
|
+
text.gsub('[', '').gsub(']', '')
|
11
|
+
end
|
12
|
+
|
13
|
+
def status_well?(text)
|
14
|
+
text.gsub(/U/, '').empty?
|
15
|
+
end
|
16
|
+
|
17
|
+
def get_failed_parts (device)
|
18
|
+
begin
|
19
|
+
failed_parts = []
|
20
|
+
Dir["/sys/block/#{device}/md/dev-*"].each do |p|
|
21
|
+
state = File.read("#{p}/state").strip
|
22
|
+
next unless state != 'in_sync'
|
23
|
+
p.gsub!(/.+\/dev-/, '')
|
24
|
+
failed_parts << "#{p} (#{state})"
|
25
|
+
end
|
26
|
+
failed_parts.join(', ')
|
27
|
+
rescue
|
28
|
+
nil
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
|
33
|
+
mdstat = File.read('/proc/mdstat').split("\n")
|
34
|
+
mdstat.each_with_index do |line, index|
|
35
|
+
next unless line.include?('blocks')
|
36
|
+
device = file[index-1].split(':')[0].strip
|
37
|
+
mdstatus = rm_bracket(line.split(' ').last) # UUU
|
38
|
+
next if status_well?(mdstatus) # пропускаем все збс
|
39
|
+
next if mdstatus == plugin[states][device].to_s # disabled in config
|
40
|
+
event(:service => "mdadm #{device}", :state => 'critical', :desc => "mdadm failed device #{device}: #{get_failed_parts(device)}")
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
data/examples/megacli.rb
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
interval 180
|
2
|
+
always_start true
|
3
|
+
|
4
|
+
default[:cmd] = 'megacli -AdpAllInfo -aAll -NoLog | awk -F": " \'/Virtual Drives/ { getline; print $2; }\''
|
5
|
+
|
6
|
+
run_if do
|
7
|
+
File.exists? '/usr/bin/megacli'
|
8
|
+
end
|
9
|
+
|
10
|
+
collect do
|
11
|
+
event(:metric => shell(settings.cmd).to_i > 0, :description => 'MegaCli status')
|
12
|
+
end
|
data/examples/memory.rb
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
interval 60
|
2
|
+
always_start true
|
3
|
+
|
4
|
+
collect :os => 'linux' do
|
5
|
+
m = File.read('/proc/meminfo').split(/\n/).inject({}) do |info, line|
|
6
|
+
x = line.split(/:?\s+/)
|
7
|
+
info[x[0]] = x[1].to_i
|
8
|
+
info
|
9
|
+
end
|
10
|
+
|
11
|
+
free = m['MemFree'].to_i * 1024
|
12
|
+
cached = m['Cached'].to_i * 1024
|
13
|
+
buffers = m['Buffers'].to_i * 1024
|
14
|
+
total = m['MemTotal'].to_i * 1024
|
15
|
+
used = total - free
|
16
|
+
free_bc = free + buffers + cached
|
17
|
+
fraction = 1 - (free_bc.to_f / total)
|
18
|
+
swap_fraction = m['SwapTotal'] == 0 ? 0 : 1 - m['SwapFree'].to_f/m['SwapTotal']
|
19
|
+
|
20
|
+
event(:service => 'memory % free', :desc => 'Memory usage, %', :metric => fraction.round(2) * 100, :critical => 85, :warning => 75)
|
21
|
+
event(:service => 'memory % swap', :desc => 'Swap usage, %', :metric => swap_fraction.round(2) * 100, :critical => 85, :warning => 75)
|
22
|
+
event(:service => 'memory abs free', :desc => 'Memory free (kB)', :metric => free, :state => 'ok')
|
23
|
+
event(:service => 'memory abs total', :desc => 'Memory total (kB)', :metric => total, :state => 'ok')
|
24
|
+
event(:service => 'memory abs cached', :desc => 'Memory usage, cached (kB)', :metric => cached, :state => 'ok')
|
25
|
+
event(:service => 'memory abs buffers', :desc => 'Memory usage, buffers (kB)', :metric => buffers, :state => 'ok')
|
26
|
+
event(:service => 'memory abs used', :desc => 'Memory usage, used (kB)', :metric => used, :state => 'ok')
|
27
|
+
event(:service => 'memory abs free_bc', :desc => 'Memory usage with cache and buffers (kB)', :metric => free_bc, :state => 'ok')
|
28
|
+
end
|
data/examples/net.rb
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
interval 60
|
2
|
+
always_start true
|
3
|
+
|
4
|
+
default[:include_alias] = false
|
5
|
+
default[:filter] = [ 'rx bytes', 'rx errs', 'rx drop', 'tx bytes', 'tx errs', 'tx drop' ]
|
6
|
+
default[:words] = [ 'rx bytes', 'rx packets', 'rx errs', 'rx drop', 'rx fifo', 'rx frame',
|
7
|
+
'rx compressed', 'rx multicast', 'tx bytes', 'tx packets', 'tx drops',
|
8
|
+
'tx fifo', 'tx colls', 'tx carrier', 'tx compressed' ]
|
9
|
+
|
10
|
+
collect :os => "linux" do
|
11
|
+
File.read('/proc/net/dev').each_line do |line|
|
12
|
+
iface = line.split(':')[0].strip
|
13
|
+
iface.gsub!(/\./, '_')
|
14
|
+
next if (iface =~ /\./ && !plugin.include_alias)
|
15
|
+
next unless line =~ /(\w*)\:\s*([\s\d]+)\s*/
|
16
|
+
plugin.words.map do |service|
|
17
|
+
service
|
18
|
+
end.zip(
|
19
|
+
$2.split(/\s+/).map { |str| str.to_i }
|
20
|
+
).each do |service, value|
|
21
|
+
next unless plugin.filter.include? service
|
22
|
+
event(:service => "net #{iface} #{service}", :metric => value.to_f/interval, :diff => true)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
interval 5
|
2
|
+
default[:ports] = [80, 3994]
|
3
|
+
|
4
|
+
collect do
|
5
|
+
|
6
|
+
filter = nil
|
7
|
+
plugin.ports.each do |port|
|
8
|
+
if filter == nil
|
9
|
+
filter = "\\( src *:#{port}"
|
10
|
+
else
|
11
|
+
filter += " or src *:#{port}"
|
12
|
+
end
|
13
|
+
end
|
14
|
+
filter += " \\) and not dst 127.0.0.1:*"
|
15
|
+
cmd = 'ss -t -4 -n state established ' + filter + ' | wc -l'
|
16
|
+
|
17
|
+
count = shell!(cmd).to_i - 1
|
18
|
+
|
19
|
+
event(
|
20
|
+
:service => "netstat tcp #{plugin.ports.join(', ')}",
|
21
|
+
:metric => count,
|
22
|
+
:description => "count established connects: #{count} to ports #{plugin.ports.join(', ')}"
|
23
|
+
)
|
24
|
+
|
25
|
+
end
|
data/examples/nfs.rb
ADDED
data/examples/nginx.rb
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
always_start true
|
2
|
+
interval 60
|
3
|
+
|
4
|
+
default[:file] = '/etc/nginx/sites-enabled/status'
|
5
|
+
default[:url] = 'http://127.0.0.1:11311/status'
|
6
|
+
default[:nginx_status_1] = %W(accepts handled requests)
|
7
|
+
default[:nginx_status_2] = %W(reading writing waiting)
|
8
|
+
|
9
|
+
run_if do
|
10
|
+
File.exists? plugin.file
|
11
|
+
end
|
12
|
+
|
13
|
+
collect :os => "linux" do
|
14
|
+
lines = http_get(plugin.url).split("\n")
|
15
|
+
lines[2].scan(/\d+/).each_with_index do |value, index|
|
16
|
+
event(:service => "nginx #{plugin.nginx_status_1[index]}", :metric => value.to_f/interval, :diff => true)
|
17
|
+
end
|
18
|
+
event(:service => 'nginx active', :metric => lines[0].split(':')[1].strip.to_i)
|
19
|
+
lines[3].scan(/\d+/).each_with_index do |value, index|
|
20
|
+
event(:service => "nginx #{plugin.nginx_status_2[index]}", :metric => value.to_i)
|
21
|
+
end
|
22
|
+
end
|