cloud-mu 3.5.0 → 3.6.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Berksfile +5 -2
- data/Berksfile.lock +135 -0
- data/ansible/roles/mu-base/README.md +33 -0
- data/ansible/roles/mu-base/defaults/main.yml +2 -0
- data/ansible/roles/mu-base/files/check_apm.cfg +1 -0
- data/ansible/roles/mu-base/files/check_apm.sh +18 -0
- data/ansible/roles/mu-base/files/check_disk.cfg +1 -0
- data/ansible/roles/mu-base/files/check_elastic_shards.cfg +1 -0
- data/ansible/roles/mu-base/files/check_elastic_shards.sh +12 -0
- data/ansible/roles/mu-base/files/check_logstash.cfg +1 -0
- data/ansible/roles/mu-base/files/check_logstash.sh +14 -0
- data/ansible/roles/mu-base/files/check_mem.cfg +1 -0
- data/ansible/roles/mu-base/files/check_updates.cfg +1 -0
- data/ansible/roles/mu-base/files/logrotate.conf +35 -0
- data/ansible/roles/mu-base/files/nrpe-apm-sudo +1 -0
- data/ansible/roles/mu-base/files/nrpe-elasticshards-sudo +2 -0
- data/ansible/roles/mu-base/handlers/main.yml +5 -0
- data/ansible/roles/mu-base/meta/main.yml +53 -0
- data/ansible/roles/mu-base/tasks/main.yml +113 -0
- data/ansible/roles/mu-base/templates/nrpe.cfg.j2 +231 -0
- data/ansible/roles/mu-base/tests/inventory +2 -0
- data/ansible/roles/mu-base/tests/test.yml +5 -0
- data/ansible/roles/mu-base/vars/main.yml +1 -0
- data/ansible/roles/mu-compliance/README.md +33 -0
- data/ansible/roles/mu-compliance/defaults/main.yml +2 -0
- data/ansible/roles/mu-compliance/files/U_MS_Windows_Server_2016_V2R1_STIG_SCAP_1-2_Benchmark.xml +15674 -0
- data/ansible/roles/mu-compliance/files/U_MS_Windows_Server_2019_V2R1_STIG_SCAP_1-2_Benchmark.xml +17553 -0
- data/ansible/roles/mu-compliance/handlers/main.yml +2 -0
- data/ansible/roles/mu-compliance/meta/main.yml +53 -0
- data/ansible/roles/mu-compliance/tasks/main.yml +45 -0
- data/ansible/roles/mu-compliance/tests/inventory +2 -0
- data/ansible/roles/mu-compliance/tests/test.yml +5 -0
- data/ansible/roles/mu-compliance/vars/main.yml +4 -0
- data/ansible/roles/mu-elastic/README.md +51 -0
- data/ansible/roles/mu-elastic/defaults/main.yml +2 -0
- data/ansible/roles/mu-elastic/files/jvm.options +93 -0
- data/ansible/roles/mu-elastic/handlers/main.yml +10 -0
- data/ansible/roles/mu-elastic/meta/main.yml +52 -0
- data/ansible/roles/mu-elastic/tasks/main.yml +186 -0
- data/ansible/roles/mu-elastic/templates/elasticsearch.yml.j2 +110 -0
- data/ansible/roles/mu-elastic/templates/kibana.yml.j2 +131 -0
- data/ansible/roles/mu-elastic/templates/password_set.expect.j2 +19 -0
- data/ansible/roles/mu-elastic/tests/inventory +2 -0
- data/ansible/roles/mu-elastic/tests/test.yml +5 -0
- data/ansible/roles/mu-elastic/vars/main.yml +2 -0
- data/ansible/roles/mu-logstash/README.md +51 -0
- data/ansible/roles/mu-logstash/defaults/main.yml +2 -0
- data/ansible/roles/mu-logstash/files/02-beats-input.conf +5 -0
- data/ansible/roles/mu-logstash/files/10-rails-filter.conf +16 -0
- data/ansible/roles/mu-logstash/files/jvm.options +84 -0
- data/ansible/roles/mu-logstash/files/logstash.yml +304 -0
- data/ansible/roles/mu-logstash/handlers/main.yml +20 -0
- data/ansible/roles/mu-logstash/meta/main.yml +52 -0
- data/ansible/roles/mu-logstash/tasks/main.yml +254 -0
- data/ansible/roles/mu-logstash/templates/20-cloudtrail.conf.j2 +28 -0
- data/ansible/roles/mu-logstash/templates/30-elasticsearch-output.conf.j2 +19 -0
- data/ansible/roles/mu-logstash/templates/apm-server.yml.j2 +33 -0
- data/ansible/roles/mu-logstash/templates/heartbeat.yml.j2 +29 -0
- data/ansible/roles/mu-logstash/templates/nginx/apm.conf.j2 +25 -0
- data/ansible/roles/mu-logstash/templates/nginx/default.conf.j2 +56 -0
- data/ansible/roles/mu-logstash/templates/nginx/elastic.conf.j2 +27 -0
- data/ansible/roles/mu-logstash/tests/inventory +2 -0
- data/ansible/roles/mu-logstash/tests/test.yml +5 -0
- data/ansible/roles/mu-logstash/vars/main.yml +2 -0
- data/ansible/roles/mu-rdp/README.md +33 -0
- data/ansible/roles/mu-rdp/meta/main.yml +53 -0
- data/ansible/roles/mu-rdp/tasks/main.yml +9 -0
- data/ansible/roles/mu-rdp/tests/inventory +2 -0
- data/ansible/roles/mu-rdp/tests/test.yml +5 -0
- data/ansible/roles/mu-windows/tasks/main.yml +3 -0
- data/bin/mu-ansible-secret +1 -1
- data/bin/mu-aws-setup +4 -3
- data/bin/mu-azure-setup +5 -5
- data/bin/mu-configure +25 -17
- data/bin/mu-firewall-allow-clients +1 -0
- data/bin/mu-gcp-setup +3 -3
- data/bin/mu-load-config.rb +1 -0
- data/bin/mu-node-manage +66 -33
- data/bin/mu-self-update +2 -2
- data/bin/mu-upload-chef-artifacts +6 -1
- data/bin/mu-user-manage +1 -1
- data/cloud-mu.gemspec +25 -23
- data/cookbooks/firewall/CHANGELOG.md +417 -224
- data/cookbooks/firewall/LICENSE +202 -0
- data/cookbooks/firewall/README.md +153 -126
- data/cookbooks/firewall/TODO.md +6 -0
- data/cookbooks/firewall/attributes/firewalld.rb +7 -0
- data/cookbooks/firewall/attributes/iptables.rb +3 -3
- data/cookbooks/firewall/chefignore +115 -0
- data/cookbooks/firewall/libraries/helpers.rb +5 -0
- data/cookbooks/firewall/libraries/helpers_firewalld.rb +1 -1
- data/cookbooks/firewall/libraries/helpers_firewalld_dbus.rb +72 -0
- data/cookbooks/firewall/libraries/helpers_iptables.rb +3 -3
- data/cookbooks/firewall/libraries/helpers_nftables.rb +170 -0
- data/cookbooks/firewall/libraries/helpers_ufw.rb +7 -0
- data/cookbooks/firewall/libraries/helpers_windows.rb +8 -9
- data/cookbooks/firewall/libraries/provider_firewall_firewalld.rb +9 -9
- data/cookbooks/firewall/libraries/provider_firewall_iptables.rb +7 -7
- data/cookbooks/firewall/libraries/provider_firewall_iptables_ubuntu.rb +12 -8
- data/cookbooks/firewall/libraries/provider_firewall_iptables_ubuntu1404.rb +13 -9
- data/cookbooks/firewall/libraries/provider_firewall_rule.rb +1 -1
- data/cookbooks/firewall/libraries/provider_firewall_ufw.rb +5 -5
- data/cookbooks/firewall/libraries/provider_firewall_windows.rb +4 -4
- data/cookbooks/firewall/libraries/resource_firewall_rule.rb +3 -3
- data/cookbooks/firewall/metadata.json +40 -1
- data/cookbooks/firewall/metadata.rb +15 -0
- data/cookbooks/firewall/recipes/default.rb +7 -7
- data/cookbooks/firewall/recipes/disable_firewall.rb +1 -1
- data/cookbooks/firewall/recipes/firewalld.rb +87 -0
- data/cookbooks/firewall/renovate.json +18 -0
- data/cookbooks/firewall/resources/firewalld.rb +28 -0
- data/cookbooks/firewall/resources/firewalld_config.rb +39 -0
- data/cookbooks/firewall/resources/firewalld_helpers.rb +106 -0
- data/cookbooks/firewall/resources/firewalld_icmptype.rb +88 -0
- data/cookbooks/firewall/resources/firewalld_ipset.rb +104 -0
- data/cookbooks/firewall/resources/firewalld_policy.rb +115 -0
- data/cookbooks/firewall/resources/firewalld_service.rb +98 -0
- data/cookbooks/firewall/resources/firewalld_zone.rb +118 -0
- data/cookbooks/firewall/resources/nftables.rb +71 -0
- data/cookbooks/firewall/resources/nftables_rule.rb +113 -0
- data/cookbooks/mu-activedirectory/Berksfile +1 -1
- data/cookbooks/mu-activedirectory/metadata.rb +1 -1
- data/cookbooks/mu-firewall/metadata.rb +2 -2
- data/cookbooks/mu-master/Berksfile +4 -3
- data/cookbooks/mu-master/attributes/default.rb +5 -2
- data/cookbooks/mu-master/files/default/check_elastic.sh +761 -0
- data/cookbooks/mu-master/files/default/check_kibana.rb +45 -0
- data/cookbooks/mu-master/libraries/mu.rb +24 -0
- data/cookbooks/mu-master/metadata.rb +5 -5
- data/cookbooks/mu-master/recipes/default.rb +31 -20
- data/cookbooks/mu-master/recipes/firewall-holes.rb +5 -0
- data/cookbooks/mu-master/recipes/init.rb +58 -19
- data/cookbooks/mu-master/recipes/update_nagios_only.rb +251 -178
- data/cookbooks/mu-master/templates/default/nagios.conf.erb +5 -11
- data/cookbooks/mu-master/templates/default/web_app.conf.erb +3 -0
- data/cookbooks/mu-php54/Berksfile +1 -1
- data/cookbooks/mu-php54/metadata.rb +2 -2
- data/cookbooks/mu-tools/Berksfile +2 -3
- data/cookbooks/mu-tools/attributes/default.rb +3 -4
- data/cookbooks/mu-tools/files/amazon/etc/bashrc +90 -0
- data/cookbooks/mu-tools/files/amazon/etc/login.defs +292 -0
- data/cookbooks/mu-tools/files/amazon/etc/profile +77 -0
- data/cookbooks/mu-tools/files/amazon/etc/security/limits.conf +63 -0
- data/cookbooks/mu-tools/files/amazon/etc/sysconfig/init +19 -0
- data/cookbooks/mu-tools/files/amazon/etc/sysctl.conf +82 -0
- data/cookbooks/mu-tools/files/amazon-2023/etc/login.defs +294 -0
- data/cookbooks/mu-tools/files/default/logrotate.conf +35 -0
- data/cookbooks/mu-tools/files/default/nrpe_conf_d.pp +0 -0
- data/cookbooks/mu-tools/libraries/helper.rb +21 -9
- data/cookbooks/mu-tools/metadata.rb +4 -4
- data/cookbooks/mu-tools/recipes/apply_security.rb +3 -2
- data/cookbooks/mu-tools/recipes/aws_api.rb +23 -5
- data/cookbooks/mu-tools/recipes/base_repositories.rb +4 -1
- data/cookbooks/mu-tools/recipes/gcloud.rb +56 -56
- data/cookbooks/mu-tools/recipes/nagios.rb +1 -1
- data/cookbooks/mu-tools/recipes/nrpe.rb +20 -2
- data/cookbooks/mu-tools/recipes/rsyslog.rb +12 -1
- data/cookbooks/mu-tools/recipes/set_local_fw.rb +1 -1
- data/data_bags/nagios_services/apm_backend_connect.json +5 -0
- data/data_bags/nagios_services/apm_listen.json +5 -0
- data/data_bags/nagios_services/elastic_shards.json +5 -0
- data/data_bags/nagios_services/logstash.json +5 -0
- data/data_bags/nagios_services/rhel7_updates.json +8 -0
- data/extras/image-generators/AWS/centos7.yaml +1 -0
- data/extras/image-generators/AWS/rhel7.yaml +21 -0
- data/extras/image-generators/AWS/win2k12r2.yaml +1 -0
- data/extras/image-generators/AWS/win2k16.yaml +1 -0
- data/extras/image-generators/AWS/win2k19.yaml +1 -0
- data/extras/list-stock-amis +0 -0
- data/extras/ruby_rpm/muby.spec +8 -5
- data/extras/vault_tools/export_vaults.sh +1 -1
- data/extras/vault_tools/recreate_vaults.sh +0 -0
- data/extras/vault_tools/test_vaults.sh +0 -0
- data/install/deprecated-bash-library.sh +1 -1
- data/install/installer +4 -2
- data/modules/mommacat.ru +3 -1
- data/modules/mu/adoption.rb +1 -1
- data/modules/mu/cloud/dnszone.rb +2 -2
- data/modules/mu/cloud/machine_images.rb +26 -25
- data/modules/mu/cloud/resource_base.rb +213 -182
- data/modules/mu/cloud/server_pool.rb +1 -1
- data/modules/mu/cloud/ssh_sessions.rb +7 -5
- data/modules/mu/cloud/wrappers.rb +2 -2
- data/modules/mu/cloud.rb +1 -1
- data/modules/mu/config/bucket.rb +1 -1
- data/modules/mu/config/function.rb +6 -1
- data/modules/mu/config/loadbalancer.rb +24 -2
- data/modules/mu/config/ref.rb +12 -0
- data/modules/mu/config/role.rb +1 -1
- data/modules/mu/config/schema_helpers.rb +42 -9
- data/modules/mu/config/server.rb +43 -27
- data/modules/mu/config/tail.rb +19 -10
- data/modules/mu/config.rb +6 -5
- data/modules/mu/defaults/AWS.yaml +78 -114
- data/modules/mu/deploy.rb +9 -2
- data/modules/mu/groomer.rb +12 -4
- data/modules/mu/groomers/ansible.rb +104 -20
- data/modules/mu/groomers/chef.rb +15 -6
- data/modules/mu/master.rb +9 -4
- data/modules/mu/mommacat/daemon.rb +4 -2
- data/modules/mu/mommacat/naming.rb +1 -2
- data/modules/mu/mommacat/storage.rb +7 -2
- data/modules/mu/mommacat.rb +33 -6
- data/modules/mu/providers/aws/database.rb +161 -8
- data/modules/mu/providers/aws/dnszone.rb +11 -6
- data/modules/mu/providers/aws/endpoint.rb +81 -6
- data/modules/mu/providers/aws/firewall_rule.rb +254 -172
- data/modules/mu/providers/aws/function.rb +65 -3
- data/modules/mu/providers/aws/loadbalancer.rb +39 -28
- data/modules/mu/providers/aws/log.rb +2 -1
- data/modules/mu/providers/aws/role.rb +25 -7
- data/modules/mu/providers/aws/server.rb +36 -12
- data/modules/mu/providers/aws/server_pool.rb +237 -127
- data/modules/mu/providers/aws/storage_pool.rb +7 -1
- data/modules/mu/providers/aws/user.rb +1 -1
- data/modules/mu/providers/aws/userdata/linux.erb +6 -2
- data/modules/mu/providers/aws/userdata/windows.erb +7 -5
- data/modules/mu/providers/aws/vpc.rb +49 -25
- data/modules/mu/providers/aws.rb +13 -8
- data/modules/mu/providers/azure/container_cluster.rb +1 -1
- data/modules/mu/providers/azure/loadbalancer.rb +2 -2
- data/modules/mu/providers/azure/server.rb +5 -2
- data/modules/mu/providers/azure/userdata/linux.erb +1 -1
- data/modules/mu/providers/azure.rb +11 -8
- data/modules/mu/providers/cloudformation/dnszone.rb +1 -1
- data/modules/mu/providers/google/container_cluster.rb +15 -2
- data/modules/mu/providers/google/folder.rb +2 -1
- data/modules/mu/providers/google/function.rb +130 -4
- data/modules/mu/providers/google/habitat.rb +2 -1
- data/modules/mu/providers/google/loadbalancer.rb +407 -160
- data/modules/mu/providers/google/role.rb +16 -3
- data/modules/mu/providers/google/server.rb +5 -1
- data/modules/mu/providers/google/user.rb +25 -18
- data/modules/mu/providers/google/userdata/linux.erb +1 -1
- data/modules/mu/providers/google/vpc.rb +53 -7
- data/modules/mu/providers/google.rb +39 -39
- data/modules/mu.rb +8 -8
- data/modules/tests/elk.yaml +46 -0
- data/test/mu-master-test/controls/all_in_one.rb +1 -1
- metadata +207 -112
- data/cookbooks/firewall/CONTRIBUTING.md +0 -2
- data/cookbooks/firewall/MAINTAINERS.md +0 -19
- data/cookbooks/firewall/libraries/matchers.rb +0 -30
- data/extras/image-generators/AWS/rhel71.yaml +0 -17
@@ -0,0 +1,761 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
################################################################################
|
3
|
+
# Script: check_es_system.sh #
|
4
|
+
# Author: Claudio Kuenzler www.claudiokuenzler.com #
|
5
|
+
# Purpose: Monitor ElasticSearch Store (Disk) Usage #
|
6
|
+
# Official doc: www.claudiokuenzler.com/monitoring-plugins/check_es_system.php #
|
7
|
+
# License: GPLv2 #
|
8
|
+
# GNU General Public Licence (GPL) http://www.gnu.org/ #
|
9
|
+
# This program is free software; you can redistribute it and/or #
|
10
|
+
# modify it under the terms of the GNU General Public License #
|
11
|
+
# as published by the Free Software Foundation; either version 2 #
|
12
|
+
# of the License, or (at your option) any later version. #
|
13
|
+
# #
|
14
|
+
# This program is distributed in the hope that it will be useful, #
|
15
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
|
16
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
|
17
|
+
# GNU General Public License for more details. #
|
18
|
+
# #
|
19
|
+
# You should have received a copy of the GNU General Public License #
|
20
|
+
# along with this program; if not, see <https://www.gnu.org/licenses/>. #
|
21
|
+
# #
|
22
|
+
# Copyright 2016,2018-2021 Claudio Kuenzler #
|
23
|
+
# Copyright 2018 Tomas Barton #
|
24
|
+
# Copyright 2020 NotAProfessionalDeveloper #
|
25
|
+
# Copyright 2020 tatref #
|
26
|
+
# Copyright 2020 fbomj #
|
27
|
+
# Copyright 2021 chicco27 #
|
28
|
+
# #
|
29
|
+
# History: #
|
30
|
+
# 20160429: Started programming plugin #
|
31
|
+
# 20160601: Continued programming. Working now as it should =) #
|
32
|
+
# 20160906: Added memory usage check, check types option (-t) #
|
33
|
+
# 20160906: Renamed plugin from check_es_store to check_es_system #
|
34
|
+
# 20160907: Change internal referenced variable name for available size #
|
35
|
+
# 20160907: Output now contains both used and available sizes #
|
36
|
+
# 20161017: Add missing -t in usage output #
|
37
|
+
# 20180105: Fix if statement for authentication (@deric) #
|
38
|
+
# 20180105: Fix authentication when wrong credentials were used #
|
39
|
+
# 20180313: Configure max_time for Elastic to respond (@deric) #
|
40
|
+
# 20190219: Fix alternative subject name in ssl (issue 4), direct to auth #
|
41
|
+
# 20190220: Added status check type #
|
42
|
+
# 20190403: Check for mandatory parameter checktype, adjust help #
|
43
|
+
# 20190403: Catch connection refused error #
|
44
|
+
# 20190426: Catch unauthorized (403) error #
|
45
|
+
# 20190626: Added readonly check type #
|
46
|
+
# 20190905: Catch empty cluster health status (issue #13) #
|
47
|
+
# 20190909: Added jthreads and tps (thread pool stats) check types #
|
48
|
+
# 20190909: Handle correct curl return codes #
|
49
|
+
# 20190924: Missing 'than' in tps output #
|
50
|
+
# 20191104: Added master check type #
|
51
|
+
# 20200401: Fix/handle 503 errors with curl exit code 0 (issue #20) #
|
52
|
+
# 20200409: Fix 503 error lookup (issue #22) #
|
53
|
+
# 20200430: Support both jshon and jq as json parsers (issue #18) #
|
54
|
+
# 20200609: Fix readonly check on ALL indices (issue #26) #
|
55
|
+
# 20200723: Add cluster name to status output #
|
56
|
+
# 20200824: Fix typo in readonly check output #
|
57
|
+
# 20200916: Internal renaming of -i parameter, use for tps check (issue #28) #
|
58
|
+
# 20201110: Fix thresholds in jthreads check #
|
59
|
+
# 20201125: Show names of read_only indexes with jq, set jq as default parser #
|
60
|
+
# 20210616: Fix authentication bug (#38) and non ES URL responding (#39) #
|
61
|
+
# 20211202: Added local node (-L), SSL settings (-K, -E), cpu check #
|
62
|
+
################################################################################
|
63
|
+
#Variables and defaults
|
64
|
+
STATE_OK=0 # define the exit code if status is OK
|
65
|
+
STATE_WARNING=1 # define the exit code if status is Warning
|
66
|
+
STATE_CRITICAL=2 # define the exit code if status is Critical
|
67
|
+
STATE_UNKNOWN=3 # define the exit code if status is Unknown
|
68
|
+
export PATH=$PATH:/usr/local/bin:/usr/bin:/bin # Set path
|
69
|
+
version=1.12.0
|
70
|
+
port=9200
|
71
|
+
httpscheme=http
|
72
|
+
unit=G
|
73
|
+
include='_all'
|
74
|
+
max_time=30
|
75
|
+
parsers=(jq jshon)
|
76
|
+
################################################################################
|
77
|
+
#Functions
|
78
|
+
help () {
|
79
|
+
echo -e "$0 $version (c) 2016-$(date +%Y) Claudio Kuenzler and contributors (open source rulez!)
|
80
|
+
|
81
|
+
Usage: ./check_es_system.sh -H ESNode [-P port] [-S] [-u user -p pass|-E cert -K key] -t checktype [-o unit] [-w int] [-c int] [-m int] [-e string] [-X parser]
|
82
|
+
|
83
|
+
Options:
|
84
|
+
|
85
|
+
* -H Hostname or ip address of ElasticSearch Node
|
86
|
+
-L Run check on local node instead of cluster
|
87
|
+
-P Port (defaults to 9200)
|
88
|
+
-S Use https
|
89
|
+
-E Certs for Authentication
|
90
|
+
-K Key for Authentication
|
91
|
+
-u Username if authentication is required
|
92
|
+
-p Password if authentication is required
|
93
|
+
* -t Type of check (disk, mem, cpu, status, readonly, jthreads, tps, master)
|
94
|
+
-o Disk space unit (K|M|G) (defaults to G)
|
95
|
+
-i Space separated list of included object names to be checked (index names on readonly check, pool names on tps check)
|
96
|
+
-w Warning threshold (see usage notes below)
|
97
|
+
-c Critical threshold (see usage notes below)
|
98
|
+
-m Maximum time in seconds to wait for response (default: 30)
|
99
|
+
-e Expect master node (used with 'master' check)
|
100
|
+
-X The json parser to be used jshon or jq (default: jq)
|
101
|
+
-h Help!
|
102
|
+
|
103
|
+
*mandatory options
|
104
|
+
|
105
|
+
Threshold format for 'disk', 'mem' and 'cpu': int (for percent), defaults to 80 (warn) and 95 (crit)
|
106
|
+
Threshold format for 'tps': int,int,int (active, queued, rejected), no defaults
|
107
|
+
Threshold format for all other check types': int, no defaults
|
108
|
+
|
109
|
+
Requirements: curl, expr and one of $(IFS=,; echo "${parsers[*]}")"
|
110
|
+
exit $STATE_UNKNOWN;
|
111
|
+
}
|
112
|
+
|
113
|
+
authlogic () {
|
114
|
+
if [[ -z $user ]] && [[ -z $pass ]]; then echo "ES SYSTEM UNKNOWN - Authentication required but missing username and password"; exit $STATE_UNKNOWN
|
115
|
+
elif [[ -n $user ]] && [[ -z $pass ]]; then echo "ES SYSTEM UNKNOWN - Authentication required but missing password"; exit $STATE_UNKNOWN
|
116
|
+
elif [[ -n $pass ]] && [[ -z $user ]]; then echo "ES SYSTEM UNKNOWN - Missing username"; exit $STATE_UNKNOWN
|
117
|
+
fi
|
118
|
+
}
|
119
|
+
|
120
|
+
authlogic_cert () {
|
121
|
+
if [[ -z $cert ]] && [[ -z $key ]]; then echo "ES SYSTEM UNKNOWN - Authentication required but missing cert and key"; exit $STATE_UNKNOWN
|
122
|
+
elif [[ -n $cert ]] && [[ -z $key ]]; then echo "ES SYSTEM UNKNOWN - Authentication required but missing key"; exit $STATE_UNKNOWN
|
123
|
+
elif [[ -n $key ]] && [[ -z $cert ]]; then echo "ES SYSTEM UNKNOWN - Missing cert"; exit $STATE_UNKNOWN
|
124
|
+
fi
|
125
|
+
}
|
126
|
+
|
127
|
+
unitcalc() {
|
128
|
+
# ES presents the currently used disk space in Bytes
|
129
|
+
if [[ -n $unit ]]; then
|
130
|
+
case $unit in
|
131
|
+
K) availsize=$(expr $available / 1024); outputsize=$(expr ${size} / 1024);;
|
132
|
+
M) availsize=$(expr $available / 1024 / 1024); outputsize=$(expr ${size} / 1024 / 1024);;
|
133
|
+
G) availsize=$(expr $available / 1024 / 1024 / 1024); outputsize=$(expr ${size} / 1024 / 1024 / 1024);;
|
134
|
+
esac
|
135
|
+
if [[ -n $warning ]] ; then
|
136
|
+
warningsize=$(expr $warning \* ${available} / 100)
|
137
|
+
fi
|
138
|
+
if [[ -n $critical ]] ; then
|
139
|
+
criticalsize=$(expr $critical \* ${available} / 100)
|
140
|
+
fi
|
141
|
+
usedpercent=$(expr $size \* 100 / $available)
|
142
|
+
else echo "UNKNOWN - Shouldnt exit here. No units given"; exit $STATE_UNKNOWN
|
143
|
+
fi
|
144
|
+
}
|
145
|
+
|
146
|
+
thresholdlogic () {
|
147
|
+
if [ -n $warning ] && [ -z $critical ]; then echo "UNKNOWN - Define both warning and critical thresholds"; exit $STATE_UNKNOWN; fi
|
148
|
+
if [ -n $critical ] && [ -z $warning ]; then echo "UNKNOWN - Define both warning and critical thresholds"; exit $STATE_UNKNOWN; fi
|
149
|
+
}
|
150
|
+
|
151
|
+
default_percentage_thresholds() {
|
152
|
+
if [ -z $warning ] || [ "${warning}" = "" ]; then warning=80; fi
|
153
|
+
if [ -z $critical ] || [ "${critical}" = "" ]; then critical=95; fi
|
154
|
+
}
|
155
|
+
|
156
|
+
json_parse() {
|
157
|
+
json_parse_usage() { echo "$0: [-r] [-q] [-c] [-a] -x arg1 -x arg2 ..." 1>&2; exit; }
|
158
|
+
|
159
|
+
local OPTIND opt r q c a x
|
160
|
+
while getopts ":rqcax:" opt
|
161
|
+
do
|
162
|
+
case "${opt}" in
|
163
|
+
r) raw=1;;
|
164
|
+
q) quiet=1;; # only required for jshon
|
165
|
+
c) continue=1;; # only required for jshon
|
166
|
+
a) across=1;;
|
167
|
+
x) args+=("$OPTARG");;
|
168
|
+
*) json_parse_usage;;
|
169
|
+
esac
|
170
|
+
done
|
171
|
+
|
172
|
+
case ${parser} in
|
173
|
+
jshon)
|
174
|
+
cmd=()
|
175
|
+
for arg in "${args[@]}"; do
|
176
|
+
cmd+=(-e $arg)
|
177
|
+
done
|
178
|
+
jshon ${quiet:+-Q} ${continue:+-C} ${across:+-a} "${cmd[@]}" ${raw:+-u}
|
179
|
+
;;
|
180
|
+
jq)
|
181
|
+
cmd=()
|
182
|
+
for arg in "${args[@]}"; do
|
183
|
+
cmd+=(.$arg)
|
184
|
+
done
|
185
|
+
jq ${raw:+-r} $(IFS=; echo ${across:+.[]}"${cmd[*]}")
|
186
|
+
;;
|
187
|
+
esac
|
188
|
+
}
|
189
|
+
|
190
|
+
################################################################################
|
191
|
+
# Check for people who need help - aren't we all nice ;-)
|
192
|
+
if [ "${1}" = "--help" -o "${#}" = "0" ]; then help; exit $STATE_UNKNOWN; fi
|
193
|
+
################################################################################
|
194
|
+
# Get user-given variables
|
195
|
+
while getopts "H:LP:SE:K:u:p:d:o:i:w:c:t:m:e:X:" Input
|
196
|
+
do
|
197
|
+
case ${Input} in
|
198
|
+
H) host=${OPTARG};;
|
199
|
+
L) local=true;;
|
200
|
+
P) port=${OPTARG};;
|
201
|
+
S) httpscheme=https;;
|
202
|
+
E) cert=${OPTARG};;
|
203
|
+
K) key=${OPTARG};;
|
204
|
+
u) user=${OPTARG};;
|
205
|
+
p) pass=${OPTARG};;
|
206
|
+
d) oldavailable=${OPTARG};;
|
207
|
+
o) unit=${OPTARG};;
|
208
|
+
i) include=${OPTARG};;
|
209
|
+
w) warning=${OPTARG};;
|
210
|
+
c) critical=${OPTARG};;
|
211
|
+
t) checktype=${OPTARG};;
|
212
|
+
m) max_time=${OPTARG};;
|
213
|
+
e) expect_master=${OPTARG};;
|
214
|
+
X) parser=${OPTARG:=jq};;
|
215
|
+
*) help;;
|
216
|
+
esac
|
217
|
+
done
|
218
|
+
|
219
|
+
# Check for mandatory opts
|
220
|
+
if [[ -z ${host} ]]; then help; exit $STATE_UNKNOWN; fi
|
221
|
+
if [[ -z ${checktype} ]]; then help; exit $STATE_UNKNOWN; fi
|
222
|
+
|
223
|
+
# Check for deprecated opts
|
224
|
+
if [[ -n ${oldavailable} ]]; then
|
225
|
+
echo "ES SYSTEM UNKNOWN: -d parameter is now invalid. Capacities are now discovered directly from Elasticsearch."
|
226
|
+
exit ${STATE_UNKNOWN}
|
227
|
+
fi
|
228
|
+
|
229
|
+
# Local checks are only useful for certain check types
|
230
|
+
if [[ -n ${local} ]] && ( ! [[ ${checktype} =~ ^(cpu|mem|disk|jthreads)$ ]] ); then
|
231
|
+
echo "ES SYSTEM UNKNOWN: Node local checks (-L) only work with the following check types: cpu, mem, disk, jthreads"
|
232
|
+
exit ${STATE_UNKNOWN}
|
233
|
+
fi
|
234
|
+
################################################################################
|
235
|
+
# Check requirements
|
236
|
+
for cmd in curl expr ${parser}; do
|
237
|
+
if ! `which ${cmd} >/dev/null 2>&1`; then
|
238
|
+
echo "UNKNOWN: ${cmd} does not exist, please check if command exists and PATH is correct"
|
239
|
+
exit ${STATE_UNKNOWN}
|
240
|
+
fi
|
241
|
+
done
|
242
|
+
# Find parser
|
243
|
+
if [ -z ${parser} ]; then
|
244
|
+
for cmd in ${parsers[@]}; do
|
245
|
+
if `which ${cmd} >/dev/null 2>&1`; then
|
246
|
+
parser=${cmd}
|
247
|
+
break
|
248
|
+
fi
|
249
|
+
done
|
250
|
+
if [ -z "${parser}" ]; then
|
251
|
+
echo "UNKNOWN: No JSON parser found. Either one of the following is required: $(IFS=,; echo "${parsers[*]}")"
|
252
|
+
exit ${STATE_UNKNOWN}
|
253
|
+
fi
|
254
|
+
fi
|
255
|
+
|
256
|
+
################################################################################
|
257
|
+
# Retrieve information from Elasticsearch cluster
|
258
|
+
getstatus() {
|
259
|
+
if [[ ${local} ]]; then
|
260
|
+
esurl="${httpscheme}://${host}:${port}/_nodes/_local/stats"
|
261
|
+
else
|
262
|
+
esurl="${httpscheme}://${host}:${port}/_cluster/stats"
|
263
|
+
fi
|
264
|
+
eshealthurl="${httpscheme}://${host}:${port}/_cluster/health"
|
265
|
+
|
266
|
+
if [[ -z $user ]] && [[ -z $cert ]]; then
|
267
|
+
# Without authentication
|
268
|
+
esstatus=$(curl -k -s --max-time ${max_time} $esurl)
|
269
|
+
esstatusrc=$?
|
270
|
+
if [[ $esstatusrc -eq 7 ]]; then
|
271
|
+
echo "ES SYSTEM CRITICAL - Failed to connect to ${host} port ${port}: Connection refused"
|
272
|
+
exit $STATE_CRITICAL
|
273
|
+
elif [[ $esstatusrc -eq 28 ]]; then
|
274
|
+
echo "ES SYSTEM CRITICAL - server did not respond within ${max_time} seconds"
|
275
|
+
exit $STATE_CRITICAL
|
276
|
+
elif [[ "$esstatus" =~ "503 Service Unavailable" ]]; then
|
277
|
+
echo "ES SYSTEM CRITICAL - Elasticsearch not available: ${host}:${port} return error 503"
|
278
|
+
exit $STATE_CRITICAL
|
279
|
+
elif [[ "$esstatus" =~ "Unknown resource" ]]; then
|
280
|
+
echo "ES SYSTEM CRITICAL - Elasticsearch not available: ${esstatus}"
|
281
|
+
exit $STATE_CRITICAL
|
282
|
+
elif ! [[ "$esstatus" =~ "cluster_name" ]]; then
|
283
|
+
echo "ES SYSTEM CRITICAL - Elasticsearch not available at this address ${host}:${port}"
|
284
|
+
exit $STATE_CRITICAL
|
285
|
+
fi
|
286
|
+
# Additionally get cluster health infos
|
287
|
+
if [ $checktype = status ]; then
|
288
|
+
eshealth=$(curl -k -s --max-time ${max_time} $eshealthurl)
|
289
|
+
if [[ -z $eshealth ]]; then
|
290
|
+
echo "ES SYSTEM CRITICAL - unable to get cluster health information"
|
291
|
+
exit $STATE_CRITICAL
|
292
|
+
fi
|
293
|
+
fi
|
294
|
+
fi
|
295
|
+
|
296
|
+
if [[ -n $user ]] || [[ -n $(echo $esstatus | grep -i authentication) ]] ; then
|
297
|
+
# Authentication required
|
298
|
+
authlogic
|
299
|
+
esstatus=$(curl -k -s --max-time ${max_time} --basic -u ${user}:${pass} $esurl)
|
300
|
+
esstatusrc=$?
|
301
|
+
if [[ $esstatusrc -eq 7 ]]; then
|
302
|
+
echo "ES SYSTEM CRITICAL - Failed to connect to ${host} port ${port}: Connection refused"
|
303
|
+
exit $STATE_CRITICAL
|
304
|
+
elif [[ $esstatusrc -eq 28 ]]; then
|
305
|
+
echo "ES SYSTEM CRITICAL - server did not respond within ${max_time} seconds"
|
306
|
+
exit $STATE_CRITICAL
|
307
|
+
elif [[ "$esstatus" =~ "503 Service Unavailable" ]]; then
|
308
|
+
echo "ES SYSTEM CRITICAL - Elasticsearch not available: ${host}:${port} return error 503"
|
309
|
+
exit $STATE_CRITICAL
|
310
|
+
elif [[ "$esstatus" =~ "Unknown resource" ]]; then
|
311
|
+
echo "ES SYSTEM CRITICAL - Elasticsearch not available: ${esstatus}"
|
312
|
+
exit $STATE_CRITICAL
|
313
|
+
elif [[ -n $(echo "$esstatus" | grep -i "unable to authenticate") ]]; then
|
314
|
+
echo "ES SYSTEM CRITICAL - Unable to authenticate user $user for REST request"
|
315
|
+
exit $STATE_CRITICAL
|
316
|
+
elif [[ -n $(echo "$esstatus" | grep -i "unauthorized") ]]; then
|
317
|
+
echo "ES SYSTEM CRITICAL - User $user is unauthorized"
|
318
|
+
exit $STATE_CRITICAL
|
319
|
+
elif ! [[ "$esstatus" =~ "cluster_name" ]]; then
|
320
|
+
echo "ES SYSTEM CRITICAL - Elasticsearch not available at this address ${host}:${port}"
|
321
|
+
exit $STATE_CRITICAL
|
322
|
+
fi
|
323
|
+
# Additionally get cluster health infos
|
324
|
+
if [[ $checktype = status ]]; then
|
325
|
+
eshealth=$(curl -k -s --max-time ${max_time} --basic -u ${user}:${pass} $eshealthurl)
|
326
|
+
if [[ -z $eshealth ]]; then
|
327
|
+
echo "ES SYSTEM CRITICAL - unable to get cluster health information"
|
328
|
+
exit $STATE_CRITICAL
|
329
|
+
fi
|
330
|
+
fi
|
331
|
+
fi
|
332
|
+
|
333
|
+
if [[ -n $cert ]] || [[ -n $(echo $esstatus | grep -i authentication) ]] ; then
|
334
|
+
# Authentication with certificate
|
335
|
+
authlogic_cert
|
336
|
+
esstatus=$(curl -k -s --max-time ${max_time} -E ${cert} --key ${key} $esurl)
|
337
|
+
esstatusrc=$?
|
338
|
+
if [[ $esstatusrc -eq 7 ]]; then
|
339
|
+
echo "ES SYSTEM CRITICAL - Failed to connect to ${host} port ${port}: Connection refused"
|
340
|
+
exit $STATE_CRITICAL
|
341
|
+
elif [[ $esstatusrc -eq 28 ]]; then
|
342
|
+
echo "ES SYSTEM CRITICAL - server did not respond within ${max_time} seconds"
|
343
|
+
exit $STATE_CRITICAL
|
344
|
+
elif [[ "$esstatus" =~ "503 Service Unavailable" ]]; then
|
345
|
+
echo "ES SYSTEM CRITICAL - Elasticsearch not available: ${host}:${port} return error 503"
|
346
|
+
exit $STATE_CRITICAL
|
347
|
+
elif [[ -n $(echo "$esstatus" | grep -i "unable to authenticate") ]]; then
|
348
|
+
echo "ES SYSTEM CRITICAL - Unable to authenticate user $user for REST request"
|
349
|
+
exit $STATE_CRITICAL
|
350
|
+
elif [[ -n $(echo "$esstatus" | grep -i "unauthorized") ]]; then
|
351
|
+
echo "ES SYSTEM CRITICAL - User $user is unauthorized"
|
352
|
+
exit $STATE_CRITICAL
|
353
|
+
fi
|
354
|
+
# Additionally get cluster health infos
|
355
|
+
if [[ $checktype = status ]]; then
|
356
|
+
eshealth=$(curl -k -s --max-time ${max_time} -E ${cert} --key ${key} $eshealthurl)
|
357
|
+
if [[ -z $eshealth ]]; then
|
358
|
+
echo "ES SYSTEM CRITICAL - unable to get cluster health information"
|
359
|
+
exit $STATE_CRITICAL
|
360
|
+
fi
|
361
|
+
fi
|
362
|
+
fi
|
363
|
+
|
364
|
+
# Catch empty reply from server (typically happens when ssl port used with http connection)
|
365
|
+
if [[ -z $esstatus ]] || [[ $esstatus = '' ]]; then
|
366
|
+
echo "ES SYSTEM UNKNOWN - Empty reply from server (verify ssl settings)"
|
367
|
+
exit $STATE_UNKNOWN
|
368
|
+
fi
|
369
|
+
}
|
370
|
+
################################################################################
|
371
|
+
# Do the checks
|
372
|
+
case $checktype in
|
373
|
+
disk) # Check disk usage
|
374
|
+
getstatus
|
375
|
+
default_percentage_thresholds
|
376
|
+
if [[ ${local} ]]; then
|
377
|
+
size=$(echo $esstatus | json_parse -x 'nodes|' -x '[]' -x indices -x store -x size_in_bytes)
|
378
|
+
available=$(echo $esstatus | json_parse -x 'nodes|' -x '[]' -x fs -x total -x total_in_bytes)
|
379
|
+
else
|
380
|
+
size=$(echo $esstatus | json_parse -x indices -x store -x size_in_bytes)
|
381
|
+
available=$(echo $esstatus | json_parse -x nodes -x fs -x total_in_bytes)
|
382
|
+
fi
|
383
|
+
|
384
|
+
unitcalc
|
385
|
+
if [ -n "${warning}" ] || [ -n "${critical}" ]; then
|
386
|
+
# Handle tresholds
|
387
|
+
thresholdlogic
|
388
|
+
if [ $size -ge $criticalsize ]; then
|
389
|
+
echo "ES SYSTEM CRITICAL - Disk usage is at ${usedpercent}% ($outputsize $unit from $availsize $unit)|es_disk=${size}B;${warningsize};${criticalsize};0;${available}"
|
390
|
+
exit $STATE_CRITICAL
|
391
|
+
elif [ $size -ge $warningsize ]; then
|
392
|
+
echo "ES SYSTEM WARNING - Disk usage is at ${usedpercent}% ($outputsize $unit from $availsize $unit)|es_disk=${size}B;${warningsize};${criticalsize};0;${available}"
|
393
|
+
exit $STATE_WARNING
|
394
|
+
else
|
395
|
+
echo "ES SYSTEM OK - Disk usage is at ${usedpercent}% ($outputsize $unit from $availsize $unit)|es_disk=${size}B;${warningsize};${criticalsize};0;${available}"
|
396
|
+
exit $STATE_OK
|
397
|
+
fi
|
398
|
+
else
|
399
|
+
# No thresholds
|
400
|
+
echo "ES SYSTEM OK - Disk usage is at ${usedpercent}% ($outputsize $unit from $availsize $unit)|es_disk=${size}B;;;0;${available}"
|
401
|
+
exit $STATE_OK
|
402
|
+
fi
|
403
|
+
;;
|
404
|
+
|
405
|
+
mem) # Check memory usage
|
406
|
+
getstatus
|
407
|
+
default_percentage_thresholds
|
408
|
+
if [[ ${local} ]]; then
|
409
|
+
size=$(echo $esstatus | json_parse -x 'nodes|' -x '[]' -x jvm -x mem -x heap_used_in_bytes)
|
410
|
+
available=$(echo $esstatus | json_parse -x 'nodes|' -x '[]' -x jvm -x mem -x heap_max_in_bytes)
|
411
|
+
else
|
412
|
+
size=$(echo $esstatus | json_parse -x nodes -x jvm -x mem -x heap_used_in_bytes)
|
413
|
+
available=$(echo $esstatus | json_parse -x nodes -x jvm -x mem -x heap_max_in_bytes)
|
414
|
+
fi
|
415
|
+
|
416
|
+
unitcalc
|
417
|
+
if [ -n "${warning}" ] || [ -n "${critical}" ]; then
|
418
|
+
# Handle tresholds
|
419
|
+
thresholdlogic
|
420
|
+
if [ $size -ge $criticalsize ]; then
|
421
|
+
echo "ES SYSTEM CRITICAL - Memory usage is at ${usedpercent}% ($outputsize $unit) from $availsize $unit|es_memory=${size}B;${warningsize};${criticalsize};0;${available}"
|
422
|
+
exit $STATE_CRITICAL
|
423
|
+
elif [ $size -ge $warningsize ]; then
|
424
|
+
echo "ES SYSTEM WARNING - Memory usage is at ${usedpercent}% ($outputsize $unit from $availsize $unit)|es_memory=${size}B;${warningsize};${criticalsize};0;${available}"
|
425
|
+
exit $STATE_WARNING
|
426
|
+
else
|
427
|
+
echo "ES SYSTEM OK - Memory usage is at ${usedpercent}% ($outputsize $unit from $availsize $unit)|es_memory=${size}B;${warningsize};${criticalsize};0;${available}"
|
428
|
+
exit $STATE_OK
|
429
|
+
fi
|
430
|
+
else
|
431
|
+
# No thresholds
|
432
|
+
echo "ES SYSTEM OK - Memory usage is at ${usedpercent}% ($outputsize $unit from $availsize $unit)|es_memory=${size}B;;;0;${available}"
|
433
|
+
exit $STATE_OK
|
434
|
+
fi
|
435
|
+
;;
|
436
|
+
|
437
|
+
cpu) # Check memory usage
|
438
|
+
getstatus
|
439
|
+
default_percentage_thresholds
|
440
|
+
if [[ ${local} ]]; then
|
441
|
+
value=$(echo $esstatus | json_parse -x 'nodes|' -x '[]' -x process -x cpu -x percent)
|
442
|
+
else
|
443
|
+
value=$(echo $esstatus | json_parse -x nodes -x process -x cpu -x percent)
|
444
|
+
fi
|
445
|
+
|
446
|
+
if [ -n "${warning}" ] || [ -n "${critical}" ]; then
|
447
|
+
# Handle tresholds
|
448
|
+
thresholdlogic
|
449
|
+
if [ $value -ge $critical ]; then
|
450
|
+
echo "ES SYSTEM CRITICAL - CPU usage is at ${value}% |es_cpu=${value}%;${warning};${critical};0;100"
|
451
|
+
exit $STATE_CRITICAL
|
452
|
+
elif [ $value -ge $warning ]; then
|
453
|
+
echo "ES SYSTEM WARNING - CPU usage is at ${value}% |es_cpu=${value}%;${warning};${critical};0;100"
|
454
|
+
exit $STATE_WARNING
|
455
|
+
else
|
456
|
+
echo "ES SYSTEM OK - CPU usage is at ${value}% |es_cpu=${value}%;${warning};${critical};0;100"
|
457
|
+
exit $STATE_OK
|
458
|
+
fi
|
459
|
+
else
|
460
|
+
# No thresholds
|
461
|
+
echo "ES SYSTEM OK - CPU usage is at ${value}% |es_cpu=${value}%;${warning};${critical};0;100"
|
462
|
+
exit $STATE_OK
|
463
|
+
fi
|
464
|
+
;;
|
465
|
+
|
466
|
+
status) # Check Elasticsearch status
|
467
|
+
getstatus
|
468
|
+
status=$(echo $esstatus | json_parse -r -x status)
|
469
|
+
clustername=$(echo $esstatus | json_parse -r -x cluster_name)
|
470
|
+
shards=$(echo $esstatus | json_parse -r -x indices -x shards -x total)
|
471
|
+
docs=$(echo $esstatus | json_parse -r -x indices -x docs -x count)
|
472
|
+
nodest=$(echo $esstatus | json_parse -r -x nodes -x count -x total)
|
473
|
+
nodesd=$(echo $esstatus | json_parse -r -x nodes -x count -x data)
|
474
|
+
relocating=$(echo $eshealth | json_parse -r -x relocating_shards)
|
475
|
+
init=$(echo $eshealth | json_parse -r -x initializing_shards)
|
476
|
+
unass=$(echo $eshealth | json_parse -r -x unassigned_shards)
|
477
|
+
if [ "$status" = "green" ]; then
|
478
|
+
echo "ES SYSTEM OK - Elasticsearch Cluster \"$clustername\" is green (${nodest} nodes, ${nodesd} data nodes, ${shards} shards, ${docs} docs)|total_nodes=${nodest};;;; data_nodes=${nodesd};;;; total_shards=${shards};;;; relocating_shards=${relocating};;;; initializing_shards=${init};;;; unassigned_shards=${unass};;;; docs=${docs};;;;"
|
479
|
+
exit $STATE_OK
|
480
|
+
elif [ "$status" = "yellow" ]; then
|
481
|
+
echo "ES SYSTEM WARNING - Elasticsearch Cluster \"$clustername\" is yellow (${nodest} nodes, ${nodesd} data nodes, ${shards} shards, ${relocating} relocating shards, ${init} initializing shards, ${unass} unassigned shards, ${docs} docs)|total_nodes=${nodest};;;; data_nodes=${nodesd};;;; total_shards=${shards};;;; relocating_shards=${relocating};;;; initializing_shards=${init};;;; unassigned_shards=${unass};;;; docs=${docs};;;;"
|
482
|
+
exit $STATE_WARNING
|
483
|
+
elif [ "$status" = "red" ]; then
|
484
|
+
echo "ES SYSTEM CRITICAL - Elasticsearch Cluster \"$clustername\" is red (${nodest} nodes, ${nodesd} data nodes, ${shards} shards, ${relocating} relocating shards, ${init} initializing shards, ${unass} unassigned shards, ${docs} docs)|total_nodes=${nodest};;;; data_nodes=${nodesd};;;; total_shards=${shards};;;; relocating_shards=${relocating};;;; initializing_shards=${init};;;; unassigned_shards=${unass};;;; docs=${docs};;;;"
|
485
|
+
exit $STATE_CRITICAL
|
486
|
+
fi
|
487
|
+
;;
|
488
|
+
|
489
|
+
readonly) # Check Readonly status on given indexes
|
490
|
+
getstatus
|
491
|
+
icount=0
|
492
|
+
for index in $include; do
|
493
|
+
if [[ -z $user ]]; then
|
494
|
+
# Without authentication
|
495
|
+
settings=$(curl -k -s --max-time ${max_time} ${httpscheme}://${host}:${port}/$index/_settings)
|
496
|
+
if [[ $? -eq 7 ]]; then
|
497
|
+
echo "ES SYSTEM CRITICAL - Failed to connect to ${host} port ${port}: Connection refused"
|
498
|
+
exit $STATE_CRITICAL
|
499
|
+
elif [[ $? -eq 28 ]]; then
|
500
|
+
echo "ES SYSTEM CRITICAL - server did not respond within ${max_time} seconds"
|
501
|
+
exit $STATE_CRITICAL
|
502
|
+
fi
|
503
|
+
rocount=$(echo $settings | json_parse -r -q -c -a -x settings -x index -x blocks -x read_only | grep -c true)
|
504
|
+
roadcount=$(echo $settings | json_parse -r -q -c -a -x settings -x index -x blocks -x read_only_allow_delete | grep -c true)
|
505
|
+
if [[ $rocount -gt 0 ]]; then
|
506
|
+
output[${icount}]=" $index is read-only -"
|
507
|
+
roerror=true
|
508
|
+
fi
|
509
|
+
if [[ $roadcount -gt 0 ]]; then
|
510
|
+
output[${icount}]+=" $index is read-only (allow delete) -"
|
511
|
+
roerror=true
|
512
|
+
fi
|
513
|
+
fi
|
514
|
+
|
515
|
+
if [[ -n $user ]] || [[ -n $(echo $esstatus | grep -i authentication) ]] ; then
|
516
|
+
# Authentication required
|
517
|
+
authlogic
|
518
|
+
settings=$(curl -k -s --max-time ${max_time} --basic -u ${user}:${pass} ${httpscheme}://${host}:${port}/$index/_settings)
|
519
|
+
settingsrc=$?
|
520
|
+
if [[ $settingsrc -eq 7 ]]; then
|
521
|
+
echo "ES SYSTEM CRITICAL - Failed to connect to ${host} port ${port}: Connection refused"
|
522
|
+
exit $STATE_CRITICAL
|
523
|
+
elif [[ $settingsrc -eq 28 ]]; then
|
524
|
+
echo "ES SYSTEM CRITICAL - server did not respond within ${max_time} seconds"
|
525
|
+
exit $STATE_CRITICAL
|
526
|
+
elif [[ -n $(echo $esstatus | grep -i "unable to authenticate") ]]; then
|
527
|
+
echo "ES SYSTEM CRITICAL - Unable to authenticate user $user for REST request"
|
528
|
+
exit $STATE_CRITICAL
|
529
|
+
elif [[ -n $(echo $esstatus | grep -i "unauthorized") ]]; then
|
530
|
+
echo "ES SYSTEM CRITICAL - User $user is unauthorized"
|
531
|
+
exit $STATE_CRITICAL
|
532
|
+
fi
|
533
|
+
rocount=$(echo $settings | json_parse -r -q -c -a -x settings -x index -x blocks -x read_only | grep -c true)
|
534
|
+
roadcount=$(echo $settings | json_parse -r -q -c -a -x settings -x index -x blocks -x read_only_allow_delete | grep -c true)
|
535
|
+
if [[ $rocount -gt 0 ]]; then
|
536
|
+
if [[ "$index" = "_all" ]]; then
|
537
|
+
if [[ $parser = "jq" ]]; then
|
538
|
+
roindexes=$(echo $settings | jq -r '.[].settings.index |select(.blocks.read_only == "true").provided_name')
|
539
|
+
fi
|
540
|
+
output[${icount}]=" $rocount index(es) found read-only $roindexes -"
|
541
|
+
else output[${icount}]=" $index is read-only -"
|
542
|
+
fi
|
543
|
+
roerror=true
|
544
|
+
fi
|
545
|
+
if [[ $roadcount -gt 0 ]]; then
|
546
|
+
if [[ "$index" = "_all" ]]; then
|
547
|
+
if [[ $parser = "jq" ]]; then
|
548
|
+
roadindexes=$(echo $settings | jq -r '.[].settings.index |select(.blocks.read_only_allow_delete == "true").provided_name' | tr '\n' ' ')
|
549
|
+
fi
|
550
|
+
output[${icount}]+=" $roadcount index(es) found read-only (allow delete) $roadindexes"
|
551
|
+
else output[${icount}]+=" $index is read-only (allow delete) -"
|
552
|
+
fi
|
553
|
+
roerror=true
|
554
|
+
fi
|
555
|
+
fi
|
556
|
+
let icount++
|
557
|
+
done
|
558
|
+
|
559
|
+
if [[ $roerror ]]; then
|
560
|
+
echo "ES SYSTEM CRITICAL - ${output[*]}"
|
561
|
+
exit $STATE_CRITICAL
|
562
|
+
else
|
563
|
+
echo "ES SYSTEM OK - Elasticsearch Indexes ($include) are writeable"
|
564
|
+
exit $STATE_OK
|
565
|
+
fi
|
566
|
+
;;
|
567
|
+
|
568
|
+
jthreads) # Check JVM threads
|
569
|
+
getstatus
|
570
|
+
if [[ ${local} ]]; then
|
571
|
+
threads=$(echo $esstatus | json_parse -x 'nodes|' -x '[]' -x jvm -x threads -x count)
|
572
|
+
else
|
573
|
+
threads=$(echo $esstatus | json_parse -r -x nodes -x jvm -x "threads")
|
574
|
+
fi
|
575
|
+
|
576
|
+
if [ -n "${warning}" ] || [ -n "${critical}" ]; then
|
577
|
+
# Handle tresholds
|
578
|
+
thresholdlogic
|
579
|
+
if [[ $threads -ge $critical ]]; then
|
580
|
+
echo "ES SYSTEM CRITICAL - Number of JVM threads is ${threads}|es_jvm_threads=${threads};${warning};${critical};;"
|
581
|
+
exit $STATE_CRITICAL
|
582
|
+
elif [[ $threads -ge $warning ]]; then
|
583
|
+
echo "ES SYSTEM WARNING - Number of JVM threads is ${threads}|es_jvm_threads=${threads};${warning};${critical};;"
|
584
|
+
exit $STATE_WARNING
|
585
|
+
else
|
586
|
+
echo "ES SYSTEM OK - Number of JVM threads is ${threads}|es_jvm_threads=${threads};${warning};${critical};;"
|
587
|
+
exit $STATE_OK
|
588
|
+
fi
|
589
|
+
else
|
590
|
+
# No thresholds
|
591
|
+
echo "ES SYSTEM OK - Number of JVM threads is ${threads}|es_jvm_threads=${threads};${warning};${critical};;"
|
592
|
+
exit $STATE_OK
|
593
|
+
fi
|
594
|
+
;;
|
595
|
+
|
596
|
+
tps) # Check Thread Pool Statistics
|
597
|
+
getstatus
|
598
|
+
if [[ -z $user ]]; then
|
599
|
+
# Without authentication
|
600
|
+
threadpools=$(curl -k -s --max-time ${max_time} ${httpscheme}://${host}:${port}/_cat/thread_pool)
|
601
|
+
threadpoolrc=$?
|
602
|
+
if [[ $threadpoolrc -eq 7 ]]; then
|
603
|
+
echo "ES SYSTEM CRITICAL - Failed to connect to ${host} port ${port}: Connection refused"
|
604
|
+
exit $STATE_CRITICAL
|
605
|
+
elif [[ $threadpoolrc -eq 28 ]]; then
|
606
|
+
echo "ES SYSTEM CRITICAL - server did not respond within ${max_time} seconds"
|
607
|
+
exit $STATE_CRITICAL
|
608
|
+
fi
|
609
|
+
fi
|
610
|
+
|
611
|
+
if [[ -n $user ]] || [[ -n $(echo $esstatus | grep -i authentication) ]] ; then
|
612
|
+
# Authentication required
|
613
|
+
authlogic
|
614
|
+
threadpools=$(curl -k -s --max-time ${max_time} --basic -u ${user}:${pass} ${httpscheme}://${host}:${port}/_cat/thread_pool)
|
615
|
+
threadpoolrc=$?
|
616
|
+
if [[ $threadpoolrc -eq 7 ]]; then
|
617
|
+
echo "ES SYSTEM CRITICAL - Failed to connect to ${host} port ${port}: Connection refused"
|
618
|
+
exit $STATE_CRITICAL
|
619
|
+
elif [[ $threadpoolrc -eq 28 ]]; then
|
620
|
+
echo "ES SYSTEM CRITICAL - server did not respond within ${max_time} seconds"
|
621
|
+
exit $STATE_CRITICAL
|
622
|
+
elif [[ -n $(echo $esstatus | grep -i "unable to authenticate") ]]; then
|
623
|
+
echo "ES SYSTEM CRITICAL - Unable to authenticate user $user for REST request"
|
624
|
+
exit $STATE_CRITICAL
|
625
|
+
elif [[ -n $(echo $esstatus | grep -i "unauthorized") ]]; then
|
626
|
+
echo "ES SYSTEM CRITICAL - User $user is unauthorized"
|
627
|
+
exit $STATE_CRITICAL
|
628
|
+
fi
|
629
|
+
fi
|
630
|
+
|
631
|
+
if ! [[ $include = "_all" ]]; then
|
632
|
+
tpsgrep=$(echo "$include" | sed "s/ /|/g")
|
633
|
+
threadpools=$(echo "$threadpools" | egrep -i "(${tpsgrep})")
|
634
|
+
if [[ $(echo ${threadpools[*]}) = "" ]]; then
|
635
|
+
echo "Thread Pool check is critical: No thread pools found with given name(s): ${include}."
|
636
|
+
exit $STATE_CRITICAL
|
637
|
+
fi
|
638
|
+
fi
|
639
|
+
|
640
|
+
tpname=($(echo "$threadpools" | awk '{print $1"-"$2}' | sed "s/\n//g"))
|
641
|
+
tpactive=($(echo "$threadpools" | awk '{print $3}' | sed "s/\n//g"))
|
642
|
+
tpqueue=($(echo "$threadpools" | awk '{print $4}' | sed "s/\n//g"))
|
643
|
+
tprejected=($(echo "$threadpools" | awk '{print $5}' | sed "s/\n//g"))
|
644
|
+
|
645
|
+
if [ -n "${warning}" ] || [ -n "${critical}" ]; then
|
646
|
+
# Handle thresholds. They have to come in a special format: n,n,n (active, queue, rejected)
|
647
|
+
thresholdlogic
|
648
|
+
wactive=$(echo ${warning} | awk -F',' '{print $1}')
|
649
|
+
wqueue=$(echo ${warning} | awk -F',' '{print $2}')
|
650
|
+
wrejected=$(echo ${warning} | awk -F',' '{print $3}')
|
651
|
+
cactive=$(echo ${critical} | awk -F',' '{print $1}')
|
652
|
+
cqueue=$(echo ${critical} | awk -F',' '{print $2}')
|
653
|
+
crejected=$(echo ${critical} | awk -F',' '{print $3}')
|
654
|
+
|
655
|
+
i=0; for tp in ${tpname[*]}; do
|
656
|
+
perfdata[$i]="tp_${tp}_active=${tpactive[$i]};${wactive};${cactive};; tp_${tp}_queue=${tpqueue[$i]};${wqueue};${cqueue};; tp_${tp}_rejected=${tprejected[$i]};${wrejected};${crejected};; "
|
657
|
+
let i++
|
658
|
+
done
|
659
|
+
|
660
|
+
i=0
|
661
|
+
for tpa in $(echo ${tpactive[*]}); do
|
662
|
+
if [[ $tpa -ge $cactive ]]; then
|
663
|
+
echo "Thread Pool ${tpname[$i]} is critical: Active ($tpa) is equal or higher than threshold ($cactive)|${perfdata[*]}"
|
664
|
+
exit $STATE_CRITICAL
|
665
|
+
elif [[ $tpa -ge $wactive ]]; then
|
666
|
+
echo "Thread Pool ${tpname[$i]} is warning: Active ($tpa) is equal or higher than threshold ($wactive)|${perfdata[*]}"
|
667
|
+
exit $STATE_WARNING
|
668
|
+
fi
|
669
|
+
let i++
|
670
|
+
done
|
671
|
+
|
672
|
+
i=0
|
673
|
+
for tpq in $(echo ${tpqueue[*]}); do
|
674
|
+
if [[ $tpq -ge $cqueue ]]; then
|
675
|
+
echo "Thread Pool ${tpname[$i]} is critical: Queue ($tpq) is equal or higher than threshold ($cqueue)|${perfdata[*]}"
|
676
|
+
exit $STATE_CRITICAL
|
677
|
+
elif [[ $tpq -ge $wqueue ]]; then
|
678
|
+
echo "Thread Pool ${tpname[$i]} is warning: Queue ($tpq) is equal or higher than threshold ($wqueue)|${perfdata[*]}"
|
679
|
+
exit $STATE_WARNING
|
680
|
+
fi
|
681
|
+
let i++
|
682
|
+
done
|
683
|
+
|
684
|
+
i=0
|
685
|
+
for tpr in $(echo ${tprejected[*]}); do
|
686
|
+
if [[ $tpr -ge $crejected ]]; then
|
687
|
+
echo "Thread Pool ${tpname[$i]} is critical: Rejected ($tpr) is equal or higher than threshold ($crejected)|${perfdata[*]}"
|
688
|
+
exit $STATE_CRITICAL
|
689
|
+
elif [[ $tpr -ge $wrejected ]]; then
|
690
|
+
echo "Thread Pool ${tpname[$i]} is warning: Rejected ($tpr) is equal or higher than threshold ($wrejected)|${perfdata[*]}"
|
691
|
+
exit $STATE_WARNING
|
692
|
+
fi
|
693
|
+
let i++
|
694
|
+
done
|
695
|
+
|
696
|
+
echo "ES SYSTEM OK - Found ${#tpname[*]} thread pools in cluster|${perfdata[*]}"
|
697
|
+
exit $STATE_OK
|
698
|
+
fi
|
699
|
+
|
700
|
+
# No Thresholds
|
701
|
+
i=0; for tp in ${tpname[*]}; do
|
702
|
+
perfdata[$i]="tp_${tp}_active=${tpactive[$i]};;;; tp_${tp}_queue=${tpqueue[$i]};;;; tp_${tp}_rejected=${tprejected[$i]};;;; "
|
703
|
+
let i++
|
704
|
+
done
|
705
|
+
echo "ES SYSTEM OK - Found ${#tpname[*]} thread pools in cluster|${perfdata[*]}"
|
706
|
+
exit $STATE_OK
|
707
|
+
;;
|
708
|
+
|
709
|
+
master) # Check Cluster Master
|
710
|
+
getstatus
|
711
|
+
if [[ -z $user ]]; then
|
712
|
+
# Without authentication
|
713
|
+
master=$(curl -k -s --max-time ${max_time} ${httpscheme}://${host}:${port}/_cat/master)
|
714
|
+
masterrc=$?
|
715
|
+
if [[ $masterrc -eq 7 ]]; then
|
716
|
+
echo "ES SYSTEM CRITICAL - Failed to connect to ${host} port ${port}: Connection refused"
|
717
|
+
exit $STATE_CRITICAL
|
718
|
+
elif [[ $masterrc -eq 28 ]]; then
|
719
|
+
echo "ES SYSTEM CRITICAL - server did not respond within ${max_time} seconds"
|
720
|
+
exit $STATE_CRITICAL
|
721
|
+
fi
|
722
|
+
fi
|
723
|
+
|
724
|
+
if [[ -n $user ]] || [[ -n $(echo $esstatus | grep -i authentication) ]] ; then
|
725
|
+
# Authentication required
|
726
|
+
authlogic
|
727
|
+
master=$(curl -k -s --max-time ${max_time} --basic -u ${user}:${pass} ${httpscheme}://${host}:${port}/_cat/master)
|
728
|
+
masterrc=$?
|
729
|
+
if [[ $threadpoolrc -eq 7 ]]; then
|
730
|
+
echo "ES SYSTEM CRITICAL - Failed to connect to ${host} port ${port}: Connection refused"
|
731
|
+
exit $STATE_CRITICAL
|
732
|
+
elif [[ $threadpoolrc -eq 28 ]]; then
|
733
|
+
echo "ES SYSTEM CRITICAL - server did not respond within ${max_time} seconds"
|
734
|
+
exit $STATE_CRITICAL
|
735
|
+
elif [[ -n $(echo $esstatus | grep -i "unable to authenticate") ]]; then
|
736
|
+
echo "ES SYSTEM CRITICAL - Unable to authenticate user $user for REST request"
|
737
|
+
exit $STATE_CRITICAL
|
738
|
+
elif [[ -n $(echo $esstatus | grep -i "unauthorized") ]]; then
|
739
|
+
echo "ES SYSTEM CRITICAL - User $user is unauthorized"
|
740
|
+
exit $STATE_CRITICAL
|
741
|
+
fi
|
742
|
+
fi
|
743
|
+
|
744
|
+
masternode=$(echo "$master" | awk '{print $NF}')
|
745
|
+
|
746
|
+
if [[ -n ${expect_master} ]]; then
|
747
|
+
if [[ "${expect_master}" = "${masternode}" ]]; then
|
748
|
+
echo "ES SYSTEM OK - Master node is $masternode"
|
749
|
+
exit $STATE_OK
|
750
|
+
else
|
751
|
+
echo "ES SYSTEM WARNING - Master node is $masternode but expected ${expect_master}"
|
752
|
+
exit $STATE_WARNING
|
753
|
+
fi
|
754
|
+
else
|
755
|
+
echo "ES SYSTEM OK - Master node is $masternode"
|
756
|
+
exit $STATE_OK
|
757
|
+
fi
|
758
|
+
;;
|
759
|
+
|
760
|
+
*) help
|
761
|
+
esac
|