riemann-tools 0.2.13 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (91) hide show
  1. checksums.yaml +5 -5
  2. data/.docker/Dockerfile +7 -0
  3. data/.docker/publish.sh +35 -0
  4. data/.github/dependabot.yml +11 -0
  5. data/.github/workflows/ci.yml +42 -0
  6. data/.github/workflows/codeql-analysis.yml +72 -0
  7. data/.gitignore +6 -0
  8. data/.rspec +2 -0
  9. data/.rubocop.yml +32 -0
  10. data/.travis.yml +31 -0
  11. data/CHANGELOG.md +422 -0
  12. data/Gemfile +6 -0
  13. data/ISSUE_TEMPLATE.md +15 -0
  14. data/README.markdown +14 -15
  15. data/Rakefile +23 -0
  16. data/SECURITY.md +42 -0
  17. data/bin/riemann-apache-status +92 -77
  18. data/bin/riemann-bench +54 -48
  19. data/bin/riemann-cloudant +44 -39
  20. data/bin/riemann-consul +82 -75
  21. data/bin/riemann-dir-files-count +53 -46
  22. data/bin/riemann-dir-space +53 -46
  23. data/bin/riemann-diskstats +78 -74
  24. data/bin/riemann-fd +68 -47
  25. data/bin/riemann-freeswitch +108 -102
  26. data/bin/riemann-haproxy +46 -39
  27. data/bin/riemann-health +4 -335
  28. data/bin/riemann-kvminstance +18 -12
  29. data/bin/riemann-memcached +35 -28
  30. data/bin/riemann-net +4 -103
  31. data/bin/riemann-nginx-status +74 -66
  32. data/bin/riemann-ntp +4 -32
  33. data/bin/riemann-portcheck +40 -30
  34. data/bin/riemann-proc +96 -89
  35. data/bin/riemann-varnish +51 -44
  36. data/bin/riemann-zookeeper +38 -33
  37. data/lib/riemann/tools/health.rb +347 -0
  38. data/lib/riemann/tools/net.rb +104 -0
  39. data/lib/riemann/tools/ntp.rb +41 -0
  40. data/lib/riemann/tools/utils.rb +17 -0
  41. data/lib/riemann/tools/version.rb +7 -0
  42. data/lib/riemann/tools.rb +40 -33
  43. data/riemann-tools.gemspec +42 -0
  44. data/tools/riemann-aws/LICENSE +21 -0
  45. data/tools/riemann-aws/README.md +54 -0
  46. data/tools/riemann-aws/Rakefile +37 -0
  47. data/tools/riemann-aws/bin/riemann-aws-billing +93 -0
  48. data/tools/riemann-aws/bin/riemann-aws-rds-status +68 -0
  49. data/tools/riemann-aws/bin/riemann-aws-sqs-status +50 -0
  50. data/tools/riemann-aws/bin/riemann-aws-status +83 -0
  51. data/tools/riemann-aws/bin/riemann-elb-metrics +168 -0
  52. data/tools/riemann-aws/bin/riemann-s3-list +87 -0
  53. data/tools/riemann-aws/bin/riemann-s3-status +102 -0
  54. data/tools/riemann-chronos/LICENSE +21 -0
  55. data/tools/riemann-chronos/README.md +10 -0
  56. data/tools/riemann-chronos/Rakefile +37 -0
  57. data/tools/riemann-chronos/bin/riemann-chronos +161 -0
  58. data/tools/riemann-docker/LICENSE +21 -0
  59. data/tools/riemann-docker/README.md +10 -0
  60. data/tools/riemann-docker/Rakefile +36 -0
  61. data/tools/riemann-docker/bin/riemann-docker +206 -0
  62. data/tools/riemann-elasticsearch/LICENSE +21 -0
  63. data/tools/riemann-elasticsearch/README.md +10 -0
  64. data/tools/riemann-elasticsearch/Rakefile +37 -0
  65. data/tools/riemann-elasticsearch/bin/riemann-elasticsearch +174 -0
  66. data/tools/riemann-marathon/LICENSE +21 -0
  67. data/tools/riemann-marathon/README.md +10 -0
  68. data/tools/riemann-marathon/Rakefile +37 -0
  69. data/tools/riemann-marathon/bin/riemann-marathon +163 -0
  70. data/tools/riemann-mesos/LICENSE +21 -0
  71. data/tools/riemann-mesos/README.md +10 -0
  72. data/tools/riemann-mesos/Rakefile +37 -0
  73. data/tools/riemann-mesos/bin/riemann-mesos +146 -0
  74. data/tools/riemann-munin/LICENSE +21 -0
  75. data/tools/riemann-munin/README.md +10 -0
  76. data/tools/riemann-munin/Rakefile +36 -0
  77. data/tools/riemann-munin/bin/riemann-munin +43 -0
  78. data/tools/riemann-rabbitmq/LICENSE +21 -0
  79. data/tools/riemann-rabbitmq/README.md +10 -0
  80. data/tools/riemann-rabbitmq/Rakefile +37 -0
  81. data/tools/riemann-rabbitmq/bin/riemann-rabbitmq +273 -0
  82. data/tools/riemann-riak/LICENSE +21 -0
  83. data/tools/riemann-riak/README.md +10 -0
  84. data/tools/riemann-riak/Rakefile +36 -0
  85. data/tools/riemann-riak/bin/riemann-riak +323 -0
  86. data/tools/riemann-riak/bin/riemann-riak-keys +13 -0
  87. data/tools/riemann-riak/bin/riemann-riak-ring +9 -0
  88. data/tools/riemann-riak/riak_status/key_count.erl +13 -0
  89. data/tools/riemann-riak/riak_status/riak_status.rb +152 -0
  90. data/tools/riemann-riak/riak_status/ringready.erl +9 -0
  91. metadata +195 -34
@@ -0,0 +1,206 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ Process.setproctitle($PROGRAM_NAME)
5
+
6
+ # Reports current CPU, disk, load average, and memory use to riemann.
7
+
8
+ require 'riemann/tools'
9
+
10
+ module Riemann
11
+ module Tools
12
+ class DockerHealth
13
+ require 'docker'
14
+ require 'socket'
15
+ include Riemann::Tools
16
+ include Docker
17
+
18
+ opt :docker_host, 'Docker Container Host (see https://github.com/swipely/docker-api#host)', type: String,
19
+ default: nil
20
+ opt :cpu_warning, 'CPU warning threshold (fraction of total jiffies)', default: 0.9
21
+ opt :cpu_critical, 'CPU critical threshold (fraction of total jiffies)', default: 0.95
22
+ opt :disk_warning, 'Disk warning threshold (fraction of space used)', default: 0.9
23
+ opt :disk_critical, 'Disk critical threshold (fraction of space used)', default: 0.95
24
+ opt :memory_warning, 'Memory warning threshold (fraction of RAM)', default: 0.85
25
+ opt :memory_critical, 'Memory critical threshold (fraction of RAM)', default: 0.95
26
+ opt :host_hostname, 'Suffix of host', type: String, default: nil
27
+ opt :checks, 'A list of checks to run.', type: :strings, default: %w[cpu memory disk basic]
28
+
29
+ def containers
30
+ Docker::Container.all
31
+ end
32
+
33
+ def get_container_name(container)
34
+ container.json['Name'][1..]
35
+ end
36
+
37
+ def initialize
38
+ Docker.url = opts[:docker_host] unless opts[:docker_host].nil?
39
+
40
+ @hostname = opts[:host_hostname]
41
+ @hostname = Socket.gethostname if @hostname.nil? || !(@hostname.is_a? String) || @hostname.empty?
42
+
43
+ @cpu_coefficient = 1000 * 1000 * 1000
44
+
45
+ @limits = {
46
+ cpu: { critical: opts[:cpu_critical], warning: opts[:cpu_warning] },
47
+ disk: { critical: opts[:disk_critical], warning: opts[:disk_warning] },
48
+ memory: { critical: opts[:memory_critical], warning: opts[:memory_warning] },
49
+ }
50
+
51
+ @last_cpu_reads = {}
52
+ @last_uptime_reads = {}
53
+
54
+ opts[:checks].each do |check|
55
+ case check
56
+ when 'disk'
57
+ @disk_enabled = true
58
+ when 'cpu'
59
+ @cpu_enabled = true
60
+ when 'memory'
61
+ @memory_enabled = true
62
+ when 'basic'
63
+ @basic_inspection_enabled = true
64
+ end
65
+ end
66
+ end
67
+
68
+ def alert(container, service, state, metric, description)
69
+ opts = {
70
+ service: service.to_s,
71
+ state: state.to_s,
72
+ metric: metric.to_f,
73
+ description: description,
74
+ }
75
+
76
+ opts[:host] = if !container.nil?
77
+ "#{@hostname}-#{container}"
78
+ else
79
+ @hostname
80
+ end
81
+
82
+ report(opts)
83
+ end
84
+
85
+ def report_pct(container, service, fraction, report = '', name = nil)
86
+ return unless fraction
87
+
88
+ name = service if name.nil?
89
+
90
+ if fraction > @limits[service][:critical]
91
+ alert container, name, :critical, fraction, "#{format('%.2f', fraction * 100)}% #{report}"
92
+ elsif fraction > @limits[service][:warning]
93
+ alert container, name, :warning, fraction, "#{format('%.2f', fraction * 100)}% #{report}"
94
+ else
95
+ alert container, name, :ok, fraction, "#{format('%.2f', fraction * 100)}% #{report}"
96
+ end
97
+ end
98
+
99
+ def cpu(id, name, stats)
100
+ current = stats['precpu_stats']['cpu_usage']['total_usage'] / stats['precpu_stats']['cpu_usage']['percpu_usage'].count
101
+
102
+ unless current
103
+ alert name, :cpu, :unknown, nil, 'no total usage found in docker remote api stats'
104
+ return false
105
+ end
106
+
107
+ current_time = Time.parse(stats['read'])
108
+ unless @last_cpu_reads[id].nil?
109
+ last = @last_cpu_reads[id]
110
+ used = (current - last[:v]) / (current_time - last[:t]) / @cpu_coefficient
111
+
112
+ report_pct name, :cpu, used
113
+ end
114
+
115
+ @last_cpu_reads[id] = { v: current, t: current_time }
116
+ end
117
+
118
+ def memory(_id, name, stats)
119
+ memory_stats = stats['memory_stats']
120
+ usage = memory_stats['usage'].to_f
121
+ total = memory_stats['limit'].to_f
122
+ fraction = (usage / total)
123
+
124
+ report_pct name, :memory, fraction, "#{usage} / #{total}"
125
+ end
126
+
127
+ def disk
128
+ `df -P`.split(/\n/).each do |r|
129
+ f = r.split(/\s+/)
130
+ next if f[0] == 'Filesystem'
131
+ next unless f[0] =~ %r{/} # Needs at least one slash in the mount path
132
+
133
+ # Calculate capacity
134
+ x = f[4].to_f / 100
135
+ report_pct(nil, :disk, x, "#{f[3].to_i / 1024} mb left", "disk #{f[5]}")
136
+ end
137
+ end
138
+
139
+ def basic_inspection(id, name, inspection)
140
+ state = inspection['State']
141
+ json_state = JSON.generate(state)
142
+
143
+ running = state['Running']
144
+
145
+ alert(
146
+ name, 'status',
147
+ running ? 'ok' : 'critical',
148
+ running ? 1 : 0,
149
+ json_state,
150
+ )
151
+
152
+ return unless running
153
+
154
+ start_time = DateTime.rfc3339(state['StartedAt']).to_time.utc.to_i
155
+ now = DateTime.now.to_time.utc.to_i
156
+ uptime = now - start_time
157
+
158
+ unless @last_uptime_reads[id].nil?
159
+ last = @last_uptime_reads[id]
160
+ restarted = start_time != last
161
+ alert(
162
+ name, 'uptime',
163
+ restarted ? 'critical' : 'ok',
164
+ uptime,
165
+ "last 'StartedAt' measure was #{last} (#{Time.at(last).utc}), " \
166
+ "now it's #{start_time} (#{Time.at(start_time).utc})",
167
+ )
168
+ end
169
+
170
+ @last_uptime_reads[id] = start_time
171
+ end
172
+
173
+ def tick
174
+ # Disk is the same in every container
175
+ disk if @disk_enabled
176
+
177
+ # Get CPU, Memory and Load of each container
178
+ threads = []
179
+
180
+ containers.each do |ctr|
181
+ threads << Thread.new(ctr) do |container|
182
+ id = container.id
183
+ name = get_container_name(container)
184
+
185
+ stats = Docker::Util.parse_json(container.connection.get("/containers/#{id}/stats", { stream: false }))
186
+
187
+ if @basic_inspection_enabled
188
+ inspection = Docker::Util.parse_json(container.connection.get("/containers/#{id}/json"))
189
+ basic_inspection(id, name, inspection)
190
+ end
191
+ cpu(id, name, stats) if @cpu_enabled
192
+ memory(id, name, stats) if @memory_enabled
193
+ end
194
+ end
195
+
196
+ threads.each do |thread|
197
+ thread.join
198
+ rescue StandardError => e
199
+ warn "#{e.class} #{e}\n#{e.backtrace.join "\n"}"
200
+ end
201
+ end
202
+ end
203
+ end
204
+ end
205
+
206
+ Riemann::Tools::DockerHealth.run
@@ -0,0 +1,21 @@
1
+ The MIT License
2
+
3
+ Copyright (c) 2011 Kyle Kingsbury
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
@@ -0,0 +1,10 @@
1
+ # Riemann Elasticsearch
2
+
3
+ Gathers Elasticsearch metrics and sends them to Riemann.
4
+
5
+ # Getting started
6
+
7
+ ```
8
+ gem install riemann-elasticsearch
9
+ riemann-elasticsearch --help
10
+ ```
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rubygems'
4
+ require 'rubygems/package_task'
5
+ require 'rdoc/task'
6
+ require 'find'
7
+
8
+ # Don't include resource forks in tarballs on Mac OS X.
9
+ ENV['COPY_EXTENDED_ATTRIBUTES_DISABLE'] = 'true'
10
+ ENV['COPYFILE_DISABLE'] = 'true'
11
+
12
+ # Gemspec
13
+ gemspec = Gem::Specification.new do |s|
14
+ s.rubyforge_project = 'riemann-elasticsearch'
15
+
16
+ s.name = 'riemann-elasticsearch'
17
+ s.version = '0.2.4'
18
+ s.author = 'Gavin Sandie'
19
+ s.email = 'beach@vicecity.co.uk'
20
+ s.homepage = 'https://github.com/riemann/riemann-tools'
21
+ s.platform = Gem::Platform::RUBY
22
+ s.summary = 'Submits elasticsearch stats to riemann.'
23
+ s.license = 'MIT'
24
+
25
+ s.add_dependency 'riemann-tools', '>= 0.2.13'
26
+ s.add_dependency 'faraday', '>= 0.8.5'
27
+ s.add_dependency 'json'
28
+
29
+ s.files = FileList['bin/*', 'LICENSE', 'README.md'].to_a
30
+ s.executables |= Dir.entries('bin/')
31
+ s.has_rdoc = false
32
+
33
+ s.required_ruby_version = '>= 1.8.7'
34
+ end
35
+
36
+ Gem::PackageTask.new gemspec do |p|
37
+ end
@@ -0,0 +1,174 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ Process.setproctitle($PROGRAM_NAME)
5
+
6
+ require 'riemann/tools'
7
+
8
+ module Riemann
9
+ module Tools
10
+ class Elasticsearch
11
+ include Riemann::Tools
12
+ require 'faraday'
13
+ require 'json'
14
+ require 'uri'
15
+
16
+ opt :read_timeout, 'Faraday read timeout', type: :int, default: 2
17
+ opt :open_timeout, 'Faraday open timeout', type: :int, default: 1
18
+ opt :path_prefix,
19
+ 'Elasticsearch path prefix for proxied installations e.g. "els" for target http://localhost/els/_cluster/health', default: '/'
20
+ opt :es_host, 'Elasticsearch host', default: 'localhost'
21
+ opt :es_port, 'Elasticsearch port', type: :int, default: 9200
22
+ opt :es_search_index, 'Elasticsearch index to fetch search statistics for', default: '_all'
23
+
24
+ # Handles HTTP connections and GET requests safely
25
+ def safe_get(uri)
26
+ # Handle connection timeouts
27
+ response = nil
28
+ begin
29
+ connection = Faraday.new(uri)
30
+ response = connection.get do |req|
31
+ req.options[:timeout] = options[:read_timeout]
32
+ req.options[:open_timeout] = options[:open_timeout]
33
+ end
34
+ rescue StandardError => e
35
+ report(
36
+ host: uri.host,
37
+ service: 'elasticsearch health',
38
+ state: 'critical',
39
+ description: "HTTP connection error: #{e.class} - #{e.message}",
40
+ )
41
+ end
42
+ response
43
+ end
44
+
45
+ def make_es_url(path)
46
+ path_prefix = options[:path_prefix]
47
+ path_prefix[0] = '' if path_prefix[0] == '/'
48
+ path_prefix[path_prefix.length - 1] = '' if path_prefix[path_prefix.length - 1] == '/'
49
+ "http://#{options[:es_host]}:#{options[:es_port]}#{path_prefix.length.positive? ? '/' : ''}#{path_prefix}/#{path}"
50
+ end
51
+
52
+ def health_url
53
+ make_es_url('_cluster/health')
54
+ end
55
+
56
+ def indices_url
57
+ make_es_url('_stats/store')
58
+ end
59
+
60
+ def search_url
61
+ es_search_index = options[:es_search_index]
62
+ make_es_url("#{es_search_index}/_stats/search")
63
+ end
64
+
65
+ def bad?(response, uri)
66
+ if response.success?
67
+ false
68
+ else
69
+ report(
70
+ host: uri.host,
71
+ service: 'elasticsearch health',
72
+ state: 'critical',
73
+ description: response.nil? ? 'HTTP response is empty!' : "HTTP connection error: #{response.status} - #{response.body}",
74
+ )
75
+ end
76
+ end
77
+
78
+ def tick_indices
79
+ uri = URI(indices_url)
80
+ response = safe_get(uri)
81
+
82
+ return if bad?(response, uri)
83
+
84
+ # Assuming that a 200 will give json
85
+ json = JSON.parse(response.body)
86
+
87
+ json['indices'].each_pair do |k, v|
88
+ report(
89
+ host: uri.host,
90
+ service: "elasticsearch index/#{k}/primaries/size_in_bytes",
91
+ metric: v['primaries']['store']['size_in_bytes'],
92
+ )
93
+ report(
94
+ host: uri.host,
95
+ service: "elasticsearch index/#{k}/total/size_in_bytes",
96
+ metric: v['total']['store']['size_in_bytes'],
97
+ )
98
+ end
99
+ end
100
+
101
+ def tick_search
102
+ uri = URI(search_url)
103
+ response = safe_get(uri)
104
+
105
+ return if bad?(response, uri)
106
+
107
+ es_search_index = options[:es_search_index]
108
+ # Assuming that a 200 will give json
109
+ json = JSON.parse(response.body)
110
+
111
+ json['_all'].each_pair do |_type, data|
112
+ query = data['search']['query_time_in_millis'].to_f / data['search']['query_total']
113
+ fetch = data['search']['fetch_time_in_millis'].to_f / data['search']['fetch_total']
114
+
115
+ report(
116
+ host: uri.host,
117
+ service: "elasticsearch search/#{es_search_index}/query",
118
+ metric: query,
119
+ )
120
+ report(
121
+ host: uri.host,
122
+ service: "elasticsearch search/#{es_search_index}/fetch",
123
+ metric: fetch,
124
+ )
125
+ end
126
+ end
127
+
128
+ def tick
129
+ begin
130
+ tick_indices
131
+ tick_search
132
+ rescue StandardError => e
133
+ report(
134
+ host: options[:es_host],
135
+ service: 'elasticsearch error',
136
+ state: 'critical',
137
+ description: "Elasticsearch cluster error: #{e.message}",
138
+ )
139
+ end
140
+ uri = URI(health_url)
141
+ response = safe_get(uri)
142
+
143
+ return if bad?(response, uri)
144
+
145
+ # Assuming that a 200 will give json
146
+ json = JSON.parse(response.body)
147
+ cluster_name = json.delete('cluster_name')
148
+ cluster_status = json.delete('status')
149
+ state = {
150
+ 'green' => 'ok',
151
+ 'yellow' => 'warning',
152
+ 'red' => 'critical',
153
+ }[cluster_status]
154
+
155
+ report(
156
+ host: uri.host,
157
+ service: 'elasticsearch health',
158
+ state: state,
159
+ description: "Elasticsearch cluster: #{cluster_name} - #{cluster_status}",
160
+ )
161
+
162
+ json.each_pair do |k, v|
163
+ report(
164
+ host: uri.host,
165
+ service: "elasticsearch #{k}",
166
+ metric: v,
167
+ description: "Elasticsearch cluster #{k}",
168
+ )
169
+ end
170
+ end
171
+ end
172
+ end
173
+ end
174
+ Riemann::Tools::Elasticsearch.run
@@ -0,0 +1,21 @@
1
+ The MIT License
2
+
3
+ Copyright (c) 2011 Kyle Kingsbury
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
@@ -0,0 +1,10 @@
1
+ # Riemann Marathon
2
+
3
+ Gathers Marathon metrics and sends them to Riemann.
4
+
5
+ # Getting started
6
+
7
+ ```
8
+ gem install riemann-marathon
9
+ riemann-marathon --help
10
+ ```
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rubygems'
4
+ require 'rubygems/package_task'
5
+ require 'rdoc/task'
6
+ require 'find'
7
+
8
+ # Don't include resource forks in tarballs on Mac OS X.
9
+ ENV['COPY_EXTENDED_ATTRIBUTES_DISABLE'] = 'true'
10
+ ENV['COPYFILE_DISABLE'] = 'true'
11
+
12
+ # Gemspec
13
+ gemspec = Gem::Specification.new do |s|
14
+ s.rubyforge_project = 'riemann-marathon'
15
+
16
+ s.name = 'riemann-marathon'
17
+ s.version = '0.1.3'
18
+ s.author = 'Giulio Eulisse'
19
+ s.email = 'giulio.eulisse@cern.ch'
20
+ s.homepage = 'https://github.com/riemann/riemann-tools'
21
+ s.platform = Gem::Platform::RUBY
22
+ s.summary = 'Submits Marathon stats to riemann.'
23
+ s.license = 'MIT'
24
+
25
+ s.add_dependency 'riemann-tools', '>= 0.2.13'
26
+ s.add_dependency 'faraday', '>= 0.8.5'
27
+ s.add_dependency 'json'
28
+
29
+ s.files = FileList['bin/*', 'LICENSE', 'README.md'].to_a
30
+ s.executables |= Dir.entries('bin/')
31
+ s.has_rdoc = false
32
+
33
+ s.required_ruby_version = '>= 1.8.7'
34
+ end
35
+
36
+ Gem::PackageTask.new gemspec do |p|
37
+ end
@@ -0,0 +1,163 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ Process.setproctitle($PROGRAM_NAME)
5
+
6
+ require 'riemann/tools'
7
+
8
+ module Riemann
9
+ module Tools
10
+ class Marathon
11
+ include Riemann::Tools
12
+
13
+ require 'faraday'
14
+ require 'json'
15
+ require 'uri'
16
+
17
+ opt :read_timeout, 'Faraday read timeout', type: :int, default: 2
18
+ opt :open_timeout, 'Faraday open timeout', type: :int, default: 1
19
+ opt :path_prefix,
20
+ 'Marathon path prefix for proxied installations e.g. "marathon" for target http://localhost/marathon/metrics', default: '/'
21
+ opt :marathon_host, 'Marathon host', default: 'localhost'
22
+ opt :marathon_port, 'Marathon port', type: :int, default: 8080
23
+
24
+ def initialize
25
+ options[:interval] = 60
26
+ options[:ttl] = 120
27
+ end
28
+
29
+ # Handles HTTP connections and GET requests safely
30
+ def safe_get(uri)
31
+ # Handle connection timeouts
32
+ response = nil
33
+ begin
34
+ connection = Faraday.new(uri)
35
+ response = connection.get do |req|
36
+ req.options[:timeout] = options[:read_timeout]
37
+ req.options[:open_timeout] = options[:open_timeout]
38
+ end
39
+ rescue StandardError => e
40
+ report(
41
+ host: uri.host,
42
+ service: 'marathon health',
43
+ state: 'critical',
44
+ description: "HTTP connection error: #{e.class} - #{e.message}",
45
+ )
46
+ end
47
+ response
48
+ end
49
+
50
+ def health_url
51
+ path_prefix = options[:path_prefix]
52
+ path_prefix[0] = '' if path_prefix[0] == '/'
53
+ path_prefix[path_prefix.length - 1] = '' if path_prefix[path_prefix.length - 1] == '/'
54
+ "http://#{options[:marathon_host]}:#{options[:marathon_port]}#{path_prefix.length.positive? ? '/' : ''}#{path_prefix}/metrics"
55
+ end
56
+
57
+ def apps_url
58
+ path_prefix = options[:path_prefix]
59
+ path_prefix[0] = '' if path_prefix[0] == '/'
60
+ path_prefix[path_prefix.length - 1] = '' if path_prefix[path_prefix.length - 1] == '/'
61
+ "http://#{options[:marathon_host]}:#{options[:marathon_port]}#{path_prefix.length.positive? ? '/' : ''}#{path_prefix}/v2/apps"
62
+ end
63
+
64
+ def tick
65
+ tick_health
66
+ tick_apps
67
+ end
68
+
69
+ def tick_health
70
+ uri = URI(health_url)
71
+ response = safe_get(uri)
72
+
73
+ return if response.nil?
74
+
75
+ if response.status != 200
76
+ report(
77
+ host: uri.host,
78
+ service: 'marathon health',
79
+ state: 'critical',
80
+ description: "HTTP connection error: #{response.status} - #{response.body}",
81
+ )
82
+ else
83
+ # Assuming that a 200 will give json
84
+ json = JSON.parse(response.body)
85
+ state = 'ok'
86
+
87
+ report(
88
+ host: uri.host,
89
+ service: 'marathon health',
90
+ state: state,
91
+ )
92
+
93
+ json.each_pair do |t, d|
94
+ next unless d.respond_to? :each_pair
95
+
96
+ d.each_pair do |service, counters|
97
+ report(
98
+ host: uri.host,
99
+ service: "marathon_metric #{t} #{service}",
100
+ metric: 1,
101
+ tags: ['metric_name'],
102
+ ttl: 600,
103
+ )
104
+ next unless counters.respond_to? :each_pair
105
+
106
+ counters.each_pair do |k, v|
107
+ next unless v.is_a? Numeric
108
+
109
+ report(
110
+ host: uri.host,
111
+ service: "marathon #{service} #{k}",
112
+ metric: v,
113
+ tags: ['metric', t.to_s],
114
+ ttl: 600,
115
+ )
116
+ end
117
+ end
118
+ end
119
+ end
120
+ end
121
+
122
+ def tick_apps
123
+ uri = URI(apps_url)
124
+ response = safe_get(uri)
125
+
126
+ return if response.nil?
127
+
128
+ if response.status != 200
129
+ report(
130
+ host: uri.host,
131
+ service: 'marathon health',
132
+ state: 'critical',
133
+ description: "HTTP connection error: #{response.status} - #{response.body}",
134
+ )
135
+ else
136
+ # Assuming that a 200 will give json
137
+ json = JSON.parse(response.body)
138
+ state = 'ok'
139
+
140
+ report(
141
+ host: uri.host,
142
+ service: 'marathon health',
143
+ state: state,
144
+ )
145
+
146
+ json['apps'].each do |app|
147
+ app.each_pair do |k, v|
148
+ next unless v.is_a? Numeric
149
+
150
+ report(
151
+ host: uri.host,
152
+ service: "marathon apps#{app['id']}/#{k}",
153
+ metric: v,
154
+ ttl: 120,
155
+ )
156
+ end
157
+ end
158
+ end
159
+ end
160
+ end
161
+ end
162
+ end
163
+ Riemann::Tools::Marathon.run