kurchatov 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +18 -0
- data/.travis.yml +5 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +111 -0
- data/Rakefile +1 -0
- data/Vagrantfile +16 -0
- data/bin/kurchatov +6 -0
- data/examples/check_file_contains.rb +14 -0
- data/examples/count_proc.rb +14 -0
- data/examples/cpu.rb +29 -0
- data/examples/disk.rb +56 -0
- data/examples/disk_stat.rb +28 -0
- data/examples/dns_check.rb +5 -0
- data/examples/exim.rb +12 -0
- data/examples/file_age.rb +11 -0
- data/examples/find_files.rb +21 -0
- data/examples/http.rb +25 -0
- data/examples/iptables.rb +27 -0
- data/examples/la.rb +10 -0
- data/examples/mdadm.rb +43 -0
- data/examples/megacli.rb +12 -0
- data/examples/memory.rb +28 -0
- data/examples/net.rb +25 -0
- data/examples/net_stat.rb +25 -0
- data/examples/nfs.rb +9 -0
- data/examples/nginx.rb +22 -0
- data/examples/nginx_500.rb +48 -0
- data/examples/ntp.rb +15 -0
- data/examples/openfiles.rb +6 -0
- data/examples/pgsql.rb +67 -0
- data/examples/ping_icmp.rb +12 -0
- data/examples/ping_tcp.rb +14 -0
- data/examples/proc_mem.rb +24 -0
- data/examples/process_usage.rb +15 -0
- data/examples/rabbitmq.rb +16 -0
- data/examples/runit.rb +47 -0
- data/examples/sidekiq.rb +21 -0
- data/examples/sidekiq_queue_state.rb +9 -0
- data/examples/status_file.rb +14 -0
- data/examples/tw_cli.rb +17 -0
- data/examples/uptime.rb +14 -0
- data/kurchatov.gemspec +28 -0
- data/lib/kurchatov/application.rb +154 -0
- data/lib/kurchatov/config.rb +14 -0
- data/lib/kurchatov/log.rb +9 -0
- data/lib/kurchatov/mashie.rb +152 -0
- data/lib/kurchatov/mixin/command.rb +31 -0
- data/lib/kurchatov/mixin/event.rb +63 -0
- data/lib/kurchatov/mixin/http.rb +21 -0
- data/lib/kurchatov/mixin/init.rb +6 -0
- data/lib/kurchatov/mixin/ohai.rb +22 -0
- data/lib/kurchatov/mixin/queue.rb +14 -0
- data/lib/kurchatov/monitor.rb +62 -0
- data/lib/kurchatov/plugin/config.rb +68 -0
- data/lib/kurchatov/plugin/dsl.rb +81 -0
- data/lib/kurchatov/plugin/riemann.rb +54 -0
- data/lib/kurchatov/plugin.rb +15 -0
- data/lib/kurchatov/queue.rb +28 -0
- data/lib/kurchatov/responders/http.rb +36 -0
- data/lib/kurchatov/responders/init.rb +3 -0
- data/lib/kurchatov/responders/riemann.rb +46 -0
- data/lib/kurchatov/responders/udp.rb +32 -0
- data/lib/kurchatov/riemann/client.rb +49 -0
- data/lib/kurchatov/riemann/event.rb +42 -0
- data/lib/kurchatov/riemann/message.rb +18 -0
- data/lib/kurchatov/version.rb +3 -0
- data/lib/kurchatov.rb +3 -0
- data/lib/ohai/plugins/darwin/hostname.rb +22 -0
- data/lib/ohai/plugins/darwin/platform.rb +38 -0
- data/lib/ohai/plugins/hostname.rb +27 -0
- data/lib/ohai/plugins/linux/hostname.rb +26 -0
- data/lib/ohai/plugins/linux/platform.rb +113 -0
- data/lib/ohai/plugins/linux/virtualization.rb +125 -0
- data/lib/ohai/plugins/os.rb +53 -0
- data/lib/ohai/plugins/platform.rb +28 -0
- data/lib/ohai/plugins/virtualization.rb +86 -0
- data/lib/ohai/plugins/windows/hostname.rb +33 -0
- data/lib/ohai/plugins/windows/platform.rb +27 -0
- data/tests/run.sh +55 -0
- metadata +209 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 37b3d24113a19c945cc3871050111cce3ebc8d96
|
4
|
+
data.tar.gz: 6a1ac341e371c41671f14081e2cee4f61ae2ec3b
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: e106b36d33622926e71b9bc9ec1c757cef4c71221c75582001a84faeacccd7c0acb80754935b3f20c8234fed66a39ef90a2d799524599d229941a84bdd0ea918
|
7
|
+
data.tar.gz: 04d7c0362e7e7836ff88b4da38a12780615c1ff1e99c932ede53d603dd1ac1d2310d88642d802a0b5fd67ce042ce0ad8f8cb2276de26abd2d76b38860f739c3c
|
data/.gitignore
ADDED
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2014 Vasiliev Dmitry
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,111 @@
|
|
1
|
+
[](https://travis-ci.org/vadv/kurchatov)
|
2
|
+
|
3
|
+
# Kurchatov
|
4
|
+
|
5
|
+
Перед вами гем для мониторинга с помощью [riemann](http://riemann.io).
|
6
|
+
Я люблю [chef](http://www.getchef.com) и [ohai](http://docs.opscode.com/ohai.html),
|
7
|
+
поэтому здесь есть немного первого и немного второго.
|
8
|
+
|
9
|
+
Юзкейз таков:
|
10
|
+
* Kurchatov попадает в среду (окружение, приложения) которую не знает, и изучает ее с помощью ohai
|
11
|
+
* Решает какие плагины запускать
|
12
|
+
* Отсылает сообщения на riemann-хост со присвоеными статусами
|
13
|
+
|
14
|
+
|
15
|
+
## DSL
|
16
|
+
|
17
|
+
Решено использовать dsl для написания плагинов, плагин выглядит так:
|
18
|
+
```ruby
|
19
|
+
name "человеко читаемое имя" # по дефолту basename файла
|
20
|
+
interval 60 # с какой переодичностью будет запускаться плагин
|
21
|
+
always_start true # плагину не нужны дополнительные настройки
|
22
|
+
|
23
|
+
default[:nginx][:file] = "/etc/nginx/nginx.conf"
|
24
|
+
default[:nginx][:cmd] = "nginx -t" # дефолтные значение для Mashie: 'plugin'
|
25
|
+
default[:nginx][:url] = "http://127.0.0.1:133233/status" # данные значения смержаться со значениями
|
26
|
+
# полученными из конфига
|
27
|
+
|
28
|
+
run_if :os => 'linux' do # по умолчанию разрешено запускать все и везде
|
29
|
+
File.exists? plugin.file # plugin - не что иное как проставленые значения из default
|
30
|
+
# доступно обращение plugin[:file], plugin["file"], plugin.file
|
31
|
+
end
|
32
|
+
|
33
|
+
collect :web_some_platform => true, :os => 'linux' do # значение полученные через ohai,
|
34
|
+
# collect включиться ohai[:web_some_platform] == true и
|
35
|
+
# для ohai[:os] == 'linux'
|
36
|
+
metric = rest_get(default[:nginx][:url]).split("\n").first.split("Active connections:").last.to_i
|
37
|
+
event(
|
38
|
+
:service => "nginx active connections", # по дефолту name, если редиректим в graphite
|
39
|
+
:metric => metric, # то service будет ключем для url
|
40
|
+
:warning => 10,
|
41
|
+
:critical => 20,
|
42
|
+
:diff => true, # говорим что запоминать предыдущие значения и если разница между новым и старым
|
43
|
+
# меньше warning - получим статус 'ok', больше critical - 'critical' и так далее
|
44
|
+
# без :diff мы будем считать честные значения
|
45
|
+
# для того чтобы посчитать RPS мы просто делим метрику на interval
|
46
|
+
:description => "Что-то для человека-монитора" # допустимо сокращения :desc
|
47
|
+
)
|
48
|
+
|
49
|
+
event(
|
50
|
+
:service => "nginx test config #{plugin.file}", # сервис должен быть человекочитаемым но уникальным!
|
51
|
+
:state => shell_out("#{ohai[:nginx][:cmd]}").exitstatus == 0 # если :state == true стейт "ok", иначе - "critical"
|
52
|
+
# shell_out! - сгенерит exception и riemann уйдет сообщение об ошибке
|
53
|
+
# в плагине, также доступен просто shell() - он вернет только stdout и
|
54
|
+
# действует как shell_out!
|
55
|
+
:desc => "Ой, конфиг не валидный, наверно nginx -t его испортил :("
|
56
|
+
)
|
57
|
+
|
58
|
+
end
|
59
|
+
|
60
|
+
```
|
61
|
+
Если плагин отправил event, это не означает что он попадает на riemann-server:
|
62
|
+
* Эвенты группируются и отсылаются асинхронно пачками (все что накопилось за `Kurchatov::Responders::Riemann::FLUSH_INTERVAL` по дефолту 0.5 секунд)
|
63
|
+
* При отсутвии метрики второй и последующий раз `:state == "ok"` не будет отсылаться
|
64
|
+
|
65
|
+
Больше примеров вы найдете [тут](https://github.com/vadv/kurchatov/tree/master/examples).
|
66
|
+
|
67
|
+
## OHAI
|
68
|
+
|
69
|
+
И в африке ohai. Минимальный пример:
|
70
|
+
```ruby
|
71
|
+
provides "postgres"
|
72
|
+
postgres Mash.new
|
73
|
+
cmd = "psql -U postgres -tqc 'select version()'"
|
74
|
+
status, stdout, stderr = run_command(:command => cmd)
|
75
|
+
postgres[:version] = stdout.strip
|
76
|
+
```
|
77
|
+
|
78
|
+
## Config
|
79
|
+
|
80
|
+
Это обычный yml-файл с настройками плагинов, eго удобно генерить chef'ом :)
|
81
|
+
```yaml
|
82
|
+
plugin name:
|
83
|
+
settins name:
|
84
|
+
- 'bla-bla'
|
85
|
+
```
|
86
|
+
|
87
|
+
Есть небольшая магия, для того чтобы использовать плагин как провайдер (например следить за определенными портами):
|
88
|
+
```yaml
|
89
|
+
web watcher:
|
90
|
+
- url: http://localhost/ # создастся plugin с name == 'web watcher_0'
|
91
|
+
status: 302
|
92
|
+
- url: https://localhost/login # новый плагин name == 'web watcher_1'
|
93
|
+
status: 200
|
94
|
+
ua: Mozilla
|
95
|
+
robots txt watcher: # новый плагин name == 'robots txt watcher'
|
96
|
+
parent: web watcher
|
97
|
+
url: https://localhost/robots.txt
|
98
|
+
status: 404
|
99
|
+
ua: ^Yandex
|
100
|
+
```
|
101
|
+
|
102
|
+
## Почему велосипед
|
103
|
+
|
104
|
+
Удобно писать плагины, использовать 1 процесс, 1 коннект, и проч.
|
105
|
+
|
106
|
+
Мне не нравиться официальная реализация [riemann-client](https://github.com/aphyr/riemann-ruby-client),
|
107
|
+
она течет и создает много ненужных *конкретно* для меня полей для протобуфа (но все равно спасибо [aphyr](http://aphyr.com) за
|
108
|
+
прекрасный сервер :) ), так что вы тут не найдете search и udp.
|
109
|
+
|
110
|
+
Упор сделан на потребление памяти (эх, ruby), поэтому все на тредах и на данный момент на 1.9.3 вы можете получить 8Mb RES.
|
111
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
data/Vagrantfile
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
# -*- mode: ruby -*-
|
2
|
+
# vi: set ft=ruby :
|
3
|
+
|
4
|
+
Vagrant.configure("2") do |config|
|
5
|
+
|
6
|
+
config.vm.box_url = "http://files.vagrantup.com/precise64_vmware.box"
|
7
|
+
config.vm.box = "precise64_kurchatov_gem"
|
8
|
+
config.ssh.forward_agent = true
|
9
|
+
|
10
|
+
config.vm.provider :vmware_fusion do |vmware|
|
11
|
+
vmware.vm.vmx["memsize"] = "2048"
|
12
|
+
vmware.vm.gui = false
|
13
|
+
vmware.box_url = "http://files.vagrantup.com/precise64_vmware.box"
|
14
|
+
end
|
15
|
+
|
16
|
+
end
|
data/bin/kurchatov
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
interval 60
|
2
|
+
|
3
|
+
default[:base_uri] = 'http://localhost/check'
|
4
|
+
default[:expression] = 'ERROR'
|
5
|
+
default[:contains] = false # Contains or not expression
|
6
|
+
default[:service] = "check_file_contains"
|
7
|
+
|
8
|
+
collect do
|
9
|
+
event(
|
10
|
+
:service => "#{plugin.service} #{plugin.base_uri} #{plugin.expression}",
|
11
|
+
:description => "#{plugin.base_uri} contains #{plugin.expression}",
|
12
|
+
:metric => rest_get(plugin.base_uri).include?(plugin.expression) == plugin.contains
|
13
|
+
)
|
14
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
interval 60
|
2
|
+
default[:proc] = 'ruby'
|
3
|
+
|
4
|
+
collect :os => 'linux' do
|
5
|
+
count = 0
|
6
|
+
Dir['/proc/[0-9]*/cmdline'].each { |p| count += 1 if File.read(p) =~ /#{plugin.proc}/ }
|
7
|
+
event(
|
8
|
+
:service => "count proc #{plugin.proc}",
|
9
|
+
:metric => count,
|
10
|
+
:description => "count proc #{plugin.proc}, count: #{count}",
|
11
|
+
:warning => 5,
|
12
|
+
:critical => 20
|
13
|
+
)
|
14
|
+
end
|
data/examples/cpu.rb
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
always_start true
|
2
|
+
interval 60
|
3
|
+
|
4
|
+
default[:per_process] = false
|
5
|
+
|
6
|
+
collect :os => 'linux' do
|
7
|
+
@old_cpu ||= {}
|
8
|
+
File.read('/proc/stat').each_line do |cpu_line|
|
9
|
+
cpu_number = cpu_line.scan(/cpu(\d+|\s)\s+/)
|
10
|
+
next if cpu_number.empty?
|
11
|
+
cpu_number = cpu_number[0][0] == ' ' ? '_total' : cpu_number[0][0]
|
12
|
+
cpu_line[/cpu(\d+|\s)\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)/]
|
13
|
+
_, u2, n2, s2, i2 = [$1, $2, $3, $4, $5].map { |e| e.to_i }
|
14
|
+
unless @old_cpu[cpu_number].nil?
|
15
|
+
u1, n1, s1, i1 = @old_cpu[cpu_number]
|
16
|
+
used = (u2+n2+s2) - (u1+n1+s1)
|
17
|
+
total = used + i2-i1
|
18
|
+
fraction = used.to_f / total
|
19
|
+
end
|
20
|
+
@old_cpu[cpu_number] = [u2, n2, s2, i2]
|
21
|
+
service = "cpu usage cpu#{cpu_number}"
|
22
|
+
description = "Cpu#{cpu_number} usage"
|
23
|
+
if cpu_number == '_total'
|
24
|
+
event(:service => service, :metric => fraction, :desc => description, :warning => 70, :critical => 58)
|
25
|
+
else
|
26
|
+
event(:service => service, :metric => fraction, :desc => description, :state => 'ok')
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
data/examples/disk.rb
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
require 'sys/filesystem'
|
2
|
+
|
3
|
+
always_start true
|
4
|
+
interval 60
|
5
|
+
|
6
|
+
default[:not_monit_fs_4_size] = %w(sysfs nfs devpts squashfs proc devtmpfs)
|
7
|
+
default[:monit_fs_4_fstab] = %w(ext2 ext3 ext4 xfs tmpfs)
|
8
|
+
default[:not_monit_device_4_fstab] = %w(none)
|
9
|
+
default[:not_monit_point_4_fstab] = %w(/lib/init/rw /dev/shm /dev)
|
10
|
+
default[:check_fstab] = true
|
11
|
+
|
12
|
+
collect :os => 'linux' do
|
13
|
+
|
14
|
+
def get_monit_points_for_size
|
15
|
+
monit_points = []
|
16
|
+
File.open('/proc/mounts', 'r') do |file|
|
17
|
+
while line = file.gets
|
18
|
+
mtab = line.split(/\s+/)
|
19
|
+
monit_points << mtab[1] unless plugin.not_monit_fs_4_size.include? mtab[2]
|
20
|
+
end
|
21
|
+
end
|
22
|
+
monit_points
|
23
|
+
end
|
24
|
+
|
25
|
+
def get_monit_points_for_fstab
|
26
|
+
monit_points = []
|
27
|
+
File.open('/proc/mounts', 'r') do |file|
|
28
|
+
while line = file.gets
|
29
|
+
mtab = line.split(/\s+/)
|
30
|
+
if plugin.monit_fs_4_fstab.include?(mtab[2]) &&
|
31
|
+
!plugin.not_monit_point_4_fstab.include?(mtab[1]) &&
|
32
|
+
!plugin.not_monit_device_4_fstab.include?(mtab[0])
|
33
|
+
monit_points << mtab[1]
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
monit_points
|
38
|
+
end
|
39
|
+
|
40
|
+
get_monit_points_for_size.each do |point|
|
41
|
+
point_stat = Sys::Filesystem.stat(point)
|
42
|
+
human_point = point == '/' ? '/root' : point
|
43
|
+
human_point = human_point.gsub(/^\//, '').gsub(/\//, '_')
|
44
|
+
event(:warning => 70, :critical => 85, :service => "disk #{human_point} % block", :desc => "Disk usage #{point}, %", :metric => (1- point_stat.blocks_available.to_f/point_stat.blocks).round(2) * 100) unless point_stat.blocks == 0
|
45
|
+
event(:warning => 70, :critical => 85, :service => "disk #{human_point} % inode", :desc => "Disk usage #{point}, inodes %", :metric => (1 - point_stat.files_available.to_f/point_stat.files).round(2) * 100) unless point_stat.files == 0
|
46
|
+
event(:service => "disk #{human_point} abs free", :desc => "Disk free #{point}, B", :metric => point_stat.blocks_free * point_stat.block_size, :state => 'ok')
|
47
|
+
event(:service => "disk #{human_point} abs total", :desc => "Disk space #{point}, B", :metric => point_stat.blocks * point_stat.block_size, :state => 'ok')
|
48
|
+
end
|
49
|
+
|
50
|
+
fstab = File.read('/etc/fstab').split("\n").delete_if { |x| x.strip.match(/^#/) }
|
51
|
+
fstab = fstab.join("\n")
|
52
|
+
get_monit_points_for_fstab.each do |point|
|
53
|
+
event(:service => "disk #{point} fstab entry", :desc => "Mount point #{point} not matched in /etc/fstab", :state => 'critical') unless fstab.match(/#{point}(\s|\/\s)/)
|
54
|
+
end if plugin.check_fstab
|
55
|
+
|
56
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
always_start true
|
2
|
+
interval 60
|
3
|
+
|
4
|
+
default[:words] = [ 'reads reqs', 'reads merged', 'reads sector', 'reads time',
|
5
|
+
'writes reqs', 'writes merged', 'writes sector', 'writes time',
|
6
|
+
'io reqs', 'io time', 'io weighted' ]
|
7
|
+
|
8
|
+
default[:filter] = [ 'reads reqs', 'writes reqs' ]
|
9
|
+
|
10
|
+
run_if do
|
11
|
+
File.exists? '/proc/diskstats'
|
12
|
+
end
|
13
|
+
|
14
|
+
collect :os => "linux" do
|
15
|
+
f = File.read('/proc/diskstats')
|
16
|
+
f.split("\n").reject { |d| d =~ /(ram|loop)/ }.inject({}) do |_, line|
|
17
|
+
if line =~ /^(?:\s+\d+){2}\s+([\w\d]+) (.*)$/
|
18
|
+
dev = $1
|
19
|
+
values = $2.split(/\s+/).map { |str| str.to_i }
|
20
|
+
next if !!(dev.match /\d+$/ || !(dev.match =~ /^xvd/))
|
21
|
+
plugin.filter.each do |filter|
|
22
|
+
event(:service => "diskstat #{dev} #{filter}", :metric => values[plugin.words.index(filter)].to_f/interval, :diff => true)
|
23
|
+
end
|
24
|
+
iops = values[plugin.words.index('reads reqs')].to_i + values[plugin.words.index('writes reqs')].to_i
|
25
|
+
event(:service => "diskstat #{dev} iops", :metric => iops.to_f/interval, :diff => true)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
data/examples/exim.rb
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
always_start true
|
2
|
+
interval 60
|
3
|
+
default[:exim] = '/usr/sbin/exim'
|
4
|
+
|
5
|
+
run_if do
|
6
|
+
File.exists? plugin.exim
|
7
|
+
end
|
8
|
+
|
9
|
+
collect do
|
10
|
+
event(:service => 'exim', :metric => shell(plugin.exim, ' -bpc').to_i,
|
11
|
+
:desc => 'Exim: count frozen mails', :warning => 5, :critical => 20)
|
12
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
interval 60
|
2
|
+
|
3
|
+
warning 5
|
4
|
+
|
5
|
+
default[:file_mask] = '.*'
|
6
|
+
default[:dir] = '/tmp/dir'
|
7
|
+
default[:age] = 24 * 60 * 60
|
8
|
+
|
9
|
+
collect do
|
10
|
+
if File.directory?(plugin.dir)
|
11
|
+
count_files = 0
|
12
|
+
file_mask = Regexp.new(plugin.file_mask)
|
13
|
+
Find.find(plugin.dir).each do |file|
|
14
|
+
next unless File.file? file
|
15
|
+
next unless file_mask.match file
|
16
|
+
next unless Time.now.to_i - plugin.age > File.new(file).mtime.to_i
|
17
|
+
count_files += 1
|
18
|
+
end
|
19
|
+
event(:service => "find files #{plugin.dir}", :metric => count_files, :description => "Count files in #{plugin.dir}")
|
20
|
+
end
|
21
|
+
end
|
data/examples/http.rb
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
interval 60
|
2
|
+
|
3
|
+
default[:http_code] = 200
|
4
|
+
default[:http_method] = 'GET'
|
5
|
+
default[:connect_timeout] = 5
|
6
|
+
default[:retry] = 0
|
7
|
+
default[:retry_delay] = 0
|
8
|
+
default[:max_time] = 10
|
9
|
+
default[:insecure] = false
|
10
|
+
default[:url] = 'http://127.0.0.1:80'
|
11
|
+
default[:service] = 'http check'
|
12
|
+
|
13
|
+
collect do
|
14
|
+
|
15
|
+
@cmd ||= begin
|
16
|
+
"curl -X#{plugin.http_method} -s --connect-timeout #{plugin.connect_timeout}" +
|
17
|
+
" #{'--insecure' if plugin.insecure} " +
|
18
|
+
" -w '%{http_code}\\n' --retry #{plugin.retry} --retry-delay #{plugin.retry_delay}" +
|
19
|
+
" --max-time #{plugin.max_time} --fail #{plugin.url} -o /dev/null"
|
20
|
+
end
|
21
|
+
|
22
|
+
out = shell_out(@cmd).stdout.to_i
|
23
|
+
event(:service => plugin.service, :metric => out, :description => "http code: #{out}", :state => out == plugin.http_code)
|
24
|
+
|
25
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
interval 60
|
2
|
+
default[:rule_file] = '/etc/network/iptables'
|
3
|
+
always_start true
|
4
|
+
|
5
|
+
run_if do
|
6
|
+
File.exists? plugin.rule_file
|
7
|
+
end
|
8
|
+
|
9
|
+
collect do
|
10
|
+
|
11
|
+
def delete_counters(str)
|
12
|
+
str.gsub(/\[\d+\:\d+\]/, '').strip
|
13
|
+
end
|
14
|
+
|
15
|
+
current_rules = shell_out!('iptables-save').stdout.split("\n").map do |x|
|
16
|
+
x[0] == '#' ? nil : delete_counters(x)
|
17
|
+
end.compact.join("\n")
|
18
|
+
saved_rules = File.read(plugin.rules_file).split("\n").map do |x|
|
19
|
+
x[0] == '#' ? nil : delete_counters(x) # delete counters and comments
|
20
|
+
end.compact.join("\n")
|
21
|
+
|
22
|
+
event(
|
23
|
+
:service => "iptables #{plugin.rule_file}",
|
24
|
+
:state => current_rules == saved_rules,
|
25
|
+
:description => "iptables rules different between file: #{plugin.rule_file} and iptables-save"
|
26
|
+
)
|
27
|
+
end
|
data/examples/la.rb
ADDED
data/examples/mdadm.rb
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
interval 60
|
2
|
+
|
3
|
+
run_if do
|
4
|
+
File.exists? '/proc/mdstat'
|
5
|
+
end
|
6
|
+
|
7
|
+
collect do
|
8
|
+
|
9
|
+
def rm_bracket(text)
|
10
|
+
text.gsub('[', '').gsub(']', '')
|
11
|
+
end
|
12
|
+
|
13
|
+
def status_well?(text)
|
14
|
+
text.gsub(/U/, '').empty?
|
15
|
+
end
|
16
|
+
|
17
|
+
def get_failed_parts (device)
|
18
|
+
begin
|
19
|
+
failed_parts = []
|
20
|
+
Dir["/sys/block/#{device}/md/dev-*"].each do |p|
|
21
|
+
state = File.read("#{p}/state").strip
|
22
|
+
next unless state != 'in_sync'
|
23
|
+
p.gsub!(/.+\/dev-/, '')
|
24
|
+
failed_parts << "#{p} (#{state})"
|
25
|
+
end
|
26
|
+
failed_parts.join(', ')
|
27
|
+
rescue
|
28
|
+
nil
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
|
33
|
+
mdstat = File.read('/proc/mdstat').split("\n")
|
34
|
+
mdstat.each_with_index do |line, index|
|
35
|
+
next unless line.include?('blocks')
|
36
|
+
device = file[index-1].split(':')[0].strip
|
37
|
+
mdstatus = rm_bracket(line.split(' ').last) # UUU
|
38
|
+
next if status_well?(mdstatus) # пропускаем все збс
|
39
|
+
next if mdstatus == plugin[states][device].to_s # disabled in config
|
40
|
+
event(:service => "mdadm #{device}", :state => 'critical', :desc => "mdadm failed device #{device}: #{get_failed_parts(device)}")
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
data/examples/megacli.rb
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
interval 180
|
2
|
+
always_start true
|
3
|
+
|
4
|
+
default[:cmd] = 'megacli -AdpAllInfo -aAll -NoLog | awk -F": " \'/Virtual Drives/ { getline; print $2; }\''
|
5
|
+
|
6
|
+
run_if do
|
7
|
+
File.exists? '/usr/bin/megacli'
|
8
|
+
end
|
9
|
+
|
10
|
+
collect do
|
11
|
+
event(:metric => shell(settings.cmd).to_i > 0, :description => 'MegaCli status')
|
12
|
+
end
|
data/examples/memory.rb
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
interval 60
|
2
|
+
always_start true
|
3
|
+
|
4
|
+
collect :os => 'linux' do
|
5
|
+
m = File.read('/proc/meminfo').split(/\n/).inject({}) do |info, line|
|
6
|
+
x = line.split(/:?\s+/)
|
7
|
+
info[x[0]] = x[1].to_i
|
8
|
+
info
|
9
|
+
end
|
10
|
+
|
11
|
+
free = m['MemFree'].to_i * 1024
|
12
|
+
cached = m['Cached'].to_i * 1024
|
13
|
+
buffers = m['Buffers'].to_i * 1024
|
14
|
+
total = m['MemTotal'].to_i * 1024
|
15
|
+
used = total - free
|
16
|
+
free_bc = free + buffers + cached
|
17
|
+
fraction = 1 - (free_bc.to_f / total)
|
18
|
+
swap_fraction = m['SwapTotal'] == 0 ? 0 : 1 - m['SwapFree'].to_f/m['SwapTotal']
|
19
|
+
|
20
|
+
event(:service => 'memory % free', :desc => 'Memory usage, %', :metric => fraction.round(2) * 100, :critical => 85, :warning => 75)
|
21
|
+
event(:service => 'memory % swap', :desc => 'Swap usage, %', :metric => swap_fraction.round(2) * 100, :critical => 85, :warning => 75)
|
22
|
+
event(:service => 'memory abs free', :desc => 'Memory free (kB)', :metric => free, :state => 'ok')
|
23
|
+
event(:service => 'memory abs total', :desc => 'Memory total (kB)', :metric => total, :state => 'ok')
|
24
|
+
event(:service => 'memory abs cached', :desc => 'Memory usage, cached (kB)', :metric => cached, :state => 'ok')
|
25
|
+
event(:service => 'memory abs buffers', :desc => 'Memory usage, buffers (kB)', :metric => buffers, :state => 'ok')
|
26
|
+
event(:service => 'memory abs used', :desc => 'Memory usage, used (kB)', :metric => used, :state => 'ok')
|
27
|
+
event(:service => 'memory abs free_bc', :desc => 'Memory usage with cache and buffers (kB)', :metric => free_bc, :state => 'ok')
|
28
|
+
end
|
data/examples/net.rb
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
interval 60
|
2
|
+
always_start true
|
3
|
+
|
4
|
+
default[:include_alias] = false
|
5
|
+
default[:filter] = [ 'rx bytes', 'rx errs', 'rx drop', 'tx bytes', 'tx errs', 'tx drop' ]
|
6
|
+
default[:words] = [ 'rx bytes', 'rx packets', 'rx errs', 'rx drop', 'rx fifo', 'rx frame',
|
7
|
+
'rx compressed', 'rx multicast', 'tx bytes', 'tx packets', 'tx drops',
|
8
|
+
'tx fifo', 'tx colls', 'tx carrier', 'tx compressed' ]
|
9
|
+
|
10
|
+
collect :os => "linux" do
|
11
|
+
File.read('/proc/net/dev').each_line do |line|
|
12
|
+
iface = line.split(':')[0].strip
|
13
|
+
iface.gsub!(/\./, '_')
|
14
|
+
next if (iface =~ /\./ && !plugin.include_alias)
|
15
|
+
next unless line =~ /(\w*)\:\s*([\s\d]+)\s*/
|
16
|
+
plugin.words.map do |service|
|
17
|
+
service
|
18
|
+
end.zip(
|
19
|
+
$2.split(/\s+/).map { |str| str.to_i }
|
20
|
+
).each do |service, value|
|
21
|
+
next unless plugin.filter.include? service
|
22
|
+
event(:service => "net #{iface} #{service}", :metric => value.to_f/interval, :diff => true)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
interval 5
|
2
|
+
default[:ports] = [80, 3994]
|
3
|
+
|
4
|
+
collect do
|
5
|
+
|
6
|
+
filter = nil
|
7
|
+
plugin.ports.each do |port|
|
8
|
+
if filter == nil
|
9
|
+
filter = "\\( src *:#{port}"
|
10
|
+
else
|
11
|
+
filter += " or src *:#{port}"
|
12
|
+
end
|
13
|
+
end
|
14
|
+
filter += " \\) and not dst 127.0.0.1:*"
|
15
|
+
cmd = 'ss -t -4 -n state established ' + filter + ' | wc -l'
|
16
|
+
|
17
|
+
count = shell!(cmd).to_i - 1
|
18
|
+
|
19
|
+
event(
|
20
|
+
:service => "netstat tcp #{plugin.ports.join(', ')}",
|
21
|
+
:metric => count,
|
22
|
+
:description => "count established connects: #{count} to ports #{plugin.ports.join(', ')}"
|
23
|
+
)
|
24
|
+
|
25
|
+
end
|
data/examples/nfs.rb
ADDED
data/examples/nginx.rb
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
always_start true
|
2
|
+
interval 60
|
3
|
+
|
4
|
+
default[:file] = '/etc/nginx/sites-enabled/status'
|
5
|
+
default[:url] = 'http://127.0.0.1:11311/status'
|
6
|
+
default[:nginx_status_1] = %W(accepts handled requests)
|
7
|
+
default[:nginx_status_2] = %W(reading writing waiting)
|
8
|
+
|
9
|
+
run_if do
|
10
|
+
File.exists? plugin.file
|
11
|
+
end
|
12
|
+
|
13
|
+
collect :os => "linux" do
|
14
|
+
lines = http_get(plugin.url).split("\n")
|
15
|
+
lines[2].scan(/\d+/).each_with_index do |value, index|
|
16
|
+
event(:service => "nginx #{plugin.nginx_status_1[index]}", :metric => value.to_f/interval, :diff => true)
|
17
|
+
end
|
18
|
+
event(:service => 'nginx active', :metric => lines[0].split(':')[1].strip.to_i)
|
19
|
+
lines[3].scan(/\d+/).each_with_index do |value, index|
|
20
|
+
event(:service => "nginx #{plugin.nginx_status_2[index]}", :metric => value.to_i)
|
21
|
+
end
|
22
|
+
end
|