riemann-tools 0.2.14 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. checksums.yaml +5 -5
  2. data/.docker/Dockerfile +7 -0
  3. data/.docker/publish.sh +35 -0
  4. data/.github/dependabot.yml +11 -0
  5. data/.github/workflows/ci.yml +42 -0
  6. data/.github/workflows/codeql-analysis.yml +72 -0
  7. data/.gitignore +6 -0
  8. data/.rspec +2 -0
  9. data/.rubocop.yml +32 -0
  10. data/.travis.yml +31 -0
  11. data/CHANGELOG.md +430 -0
  12. data/Gemfile +6 -0
  13. data/ISSUE_TEMPLATE.md +15 -0
  14. data/README.markdown +13 -16
  15. data/Rakefile +23 -0
  16. data/SECURITY.md +42 -0
  17. data/bin/riemann-apache-status +92 -77
  18. data/bin/riemann-bench +54 -48
  19. data/bin/riemann-cloudant +44 -39
  20. data/bin/riemann-consul +82 -75
  21. data/bin/riemann-dir-files-count +53 -46
  22. data/bin/riemann-dir-space +53 -46
  23. data/bin/riemann-diskstats +78 -74
  24. data/bin/riemann-fd +68 -47
  25. data/bin/riemann-freeswitch +108 -102
  26. data/bin/riemann-haproxy +46 -39
  27. data/bin/riemann-health +4 -335
  28. data/bin/riemann-kvminstance +18 -12
  29. data/bin/riemann-memcached +35 -28
  30. data/bin/riemann-net +4 -103
  31. data/bin/riemann-nginx-status +74 -66
  32. data/bin/riemann-ntp +4 -32
  33. data/bin/riemann-portcheck +40 -30
  34. data/bin/riemann-proc +96 -89
  35. data/bin/riemann-varnish +51 -44
  36. data/bin/riemann-zookeeper +38 -33
  37. data/lib/riemann/tools/health.rb +347 -0
  38. data/lib/riemann/tools/net.rb +104 -0
  39. data/lib/riemann/tools/ntp.rb +41 -0
  40. data/lib/riemann/tools/utils.rb +17 -0
  41. data/lib/riemann/tools/version.rb +7 -0
  42. data/lib/riemann/tools.rb +38 -31
  43. data/riemann-tools.gemspec +42 -0
  44. data/tools/riemann-aws/LICENSE +21 -0
  45. data/tools/riemann-aws/README.md +54 -0
  46. data/tools/riemann-aws/Rakefile +37 -0
  47. data/tools/riemann-aws/bin/riemann-aws-billing +93 -0
  48. data/tools/riemann-aws/bin/riemann-aws-rds-status +68 -0
  49. data/tools/riemann-aws/bin/riemann-aws-sqs-status +50 -0
  50. data/tools/riemann-aws/bin/riemann-aws-status +83 -0
  51. data/tools/riemann-aws/bin/riemann-elb-metrics +168 -0
  52. data/tools/riemann-aws/bin/riemann-s3-list +87 -0
  53. data/tools/riemann-aws/bin/riemann-s3-status +102 -0
  54. data/tools/riemann-chronos/LICENSE +21 -0
  55. data/tools/riemann-chronos/README.md +10 -0
  56. data/tools/riemann-chronos/Rakefile +37 -0
  57. data/tools/riemann-chronos/bin/riemann-chronos +161 -0
  58. data/tools/riemann-docker/LICENSE +21 -0
  59. data/tools/riemann-docker/README.md +10 -0
  60. data/tools/riemann-docker/Rakefile +36 -0
  61. data/tools/riemann-docker/bin/riemann-docker +206 -0
  62. data/tools/riemann-elasticsearch/LICENSE +21 -0
  63. data/tools/riemann-elasticsearch/README.md +10 -0
  64. data/tools/riemann-elasticsearch/Rakefile +37 -0
  65. data/tools/riemann-elasticsearch/bin/riemann-elasticsearch +174 -0
  66. data/tools/riemann-marathon/LICENSE +21 -0
  67. data/tools/riemann-marathon/README.md +10 -0
  68. data/tools/riemann-marathon/Rakefile +37 -0
  69. data/tools/riemann-marathon/bin/riemann-marathon +163 -0
  70. data/tools/riemann-mesos/LICENSE +21 -0
  71. data/tools/riemann-mesos/README.md +10 -0
  72. data/tools/riemann-mesos/Rakefile +37 -0
  73. data/tools/riemann-mesos/bin/riemann-mesos +146 -0
  74. data/tools/riemann-munin/LICENSE +21 -0
  75. data/tools/riemann-munin/README.md +10 -0
  76. data/tools/riemann-munin/Rakefile +36 -0
  77. data/tools/riemann-munin/bin/riemann-munin +43 -0
  78. data/tools/riemann-rabbitmq/LICENSE +21 -0
  79. data/tools/riemann-rabbitmq/README.md +10 -0
  80. data/tools/riemann-rabbitmq/Rakefile +37 -0
  81. data/tools/riemann-rabbitmq/bin/riemann-rabbitmq +273 -0
  82. data/tools/riemann-riak/LICENSE +21 -0
  83. data/tools/riemann-riak/README.md +10 -0
  84. data/tools/riemann-riak/Rakefile +36 -0
  85. data/tools/riemann-riak/bin/riemann-riak +323 -0
  86. data/tools/riemann-riak/bin/riemann-riak-keys +13 -0
  87. data/tools/riemann-riak/bin/riemann-riak-ring +9 -0
  88. data/tools/riemann-riak/riak_status/key_count.erl +13 -0
  89. data/tools/riemann-riak/riak_status/riak_status.rb +152 -0
  90. data/tools/riemann-riak/riak_status/ringready.erl +9 -0
  91. metadata +186 -37
@@ -0,0 +1,206 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ Process.setproctitle($PROGRAM_NAME)
5
+
6
+ # Reports current CPU, disk, load average, and memory use to riemann.
7
+
8
+ require 'riemann/tools'
9
+
10
+ module Riemann
11
+ module Tools
12
+ class DockerHealth
13
+ require 'docker'
14
+ require 'socket'
15
+ include Riemann::Tools
16
+ include Docker
17
+
18
+ opt :docker_host, 'Docker Container Host (see https://github.com/swipely/docker-api#host)', type: String,
19
+ default: nil
20
+ opt :cpu_warning, 'CPU warning threshold (fraction of total jiffies)', default: 0.9
21
+ opt :cpu_critical, 'CPU critical threshold (fraction of total jiffies)', default: 0.95
22
+ opt :disk_warning, 'Disk warning threshold (fraction of space used)', default: 0.9
23
+ opt :disk_critical, 'Disk critical threshold (fraction of space used)', default: 0.95
24
+ opt :memory_warning, 'Memory warning threshold (fraction of RAM)', default: 0.85
25
+ opt :memory_critical, 'Memory critical threshold (fraction of RAM)', default: 0.95
26
+ opt :host_hostname, 'Suffix of host', type: String, default: nil
27
+ opt :checks, 'A list of checks to run.', type: :strings, default: %w[cpu memory disk basic]
28
+
29
+ def containers
30
+ Docker::Container.all
31
+ end
32
+
33
+ def get_container_name(container)
34
+ container.json['Name'][1..]
35
+ end
36
+
37
+ def initialize
38
+ Docker.url = opts[:docker_host] unless opts[:docker_host].nil?
39
+
40
+ @hostname = opts[:host_hostname]
41
+ @hostname = Socket.gethostname if @hostname.nil? || !(@hostname.is_a? String) || @hostname.empty?
42
+
43
+ @cpu_coefficient = 1000 * 1000 * 1000
44
+
45
+ @limits = {
46
+ cpu: { critical: opts[:cpu_critical], warning: opts[:cpu_warning] },
47
+ disk: { critical: opts[:disk_critical], warning: opts[:disk_warning] },
48
+ memory: { critical: opts[:memory_critical], warning: opts[:memory_warning] },
49
+ }
50
+
51
+ @last_cpu_reads = {}
52
+ @last_uptime_reads = {}
53
+
54
+ opts[:checks].each do |check|
55
+ case check
56
+ when 'disk'
57
+ @disk_enabled = true
58
+ when 'cpu'
59
+ @cpu_enabled = true
60
+ when 'memory'
61
+ @memory_enabled = true
62
+ when 'basic'
63
+ @basic_inspection_enabled = true
64
+ end
65
+ end
66
+ end
67
+
68
+ def alert(container, service, state, metric, description)
69
+ opts = {
70
+ service: service.to_s,
71
+ state: state.to_s,
72
+ metric: metric.to_f,
73
+ description: description,
74
+ }
75
+
76
+ opts[:host] = if !container.nil?
77
+ "#{@hostname}-#{container}"
78
+ else
79
+ @hostname
80
+ end
81
+
82
+ report(opts)
83
+ end
84
+
85
+ def report_pct(container, service, fraction, report = '', name = nil)
86
+ return unless fraction
87
+
88
+ name = service if name.nil?
89
+
90
+ if fraction > @limits[service][:critical]
91
+ alert container, name, :critical, fraction, "#{format('%.2f', fraction * 100)}% #{report}"
92
+ elsif fraction > @limits[service][:warning]
93
+ alert container, name, :warning, fraction, "#{format('%.2f', fraction * 100)}% #{report}"
94
+ else
95
+ alert container, name, :ok, fraction, "#{format('%.2f', fraction * 100)}% #{report}"
96
+ end
97
+ end
98
+
99
+ def cpu(id, name, stats)
100
+ current = stats['precpu_stats']['cpu_usage']['total_usage'] / stats['precpu_stats']['cpu_usage']['percpu_usage'].count
101
+
102
+ unless current
103
+ alert name, :cpu, :unknown, nil, 'no total usage found in docker remote api stats'
104
+ return false
105
+ end
106
+
107
+ current_time = Time.parse(stats['read'])
108
+ unless @last_cpu_reads[id].nil?
109
+ last = @last_cpu_reads[id]
110
+ used = (current - last[:v]) / (current_time - last[:t]) / @cpu_coefficient
111
+
112
+ report_pct name, :cpu, used
113
+ end
114
+
115
+ @last_cpu_reads[id] = { v: current, t: current_time }
116
+ end
117
+
118
+ def memory(_id, name, stats)
119
+ memory_stats = stats['memory_stats']
120
+ usage = memory_stats['usage'].to_f
121
+ total = memory_stats['limit'].to_f
122
+ fraction = (usage / total)
123
+
124
+ report_pct name, :memory, fraction, "#{usage} / #{total}"
125
+ end
126
+
127
+ def disk
128
+ `df -P`.split(/\n/).each do |r|
129
+ f = r.split(/\s+/)
130
+ next if f[0] == 'Filesystem'
131
+ next unless f[0] =~ %r{/} # Needs at least one slash in the mount path
132
+
133
+ # Calculate capacity
134
+ x = f[4].to_f / 100
135
+ report_pct(nil, :disk, x, "#{f[3].to_i / 1024} mb left", "disk #{f[5]}")
136
+ end
137
+ end
138
+
139
+ def basic_inspection(id, name, inspection)
140
+ state = inspection['State']
141
+ json_state = JSON.generate(state)
142
+
143
+ running = state['Running']
144
+
145
+ alert(
146
+ name, 'status',
147
+ running ? 'ok' : 'critical',
148
+ running ? 1 : 0,
149
+ json_state,
150
+ )
151
+
152
+ return unless running
153
+
154
+ start_time = DateTime.rfc3339(state['StartedAt']).to_time.utc.to_i
155
+ now = DateTime.now.to_time.utc.to_i
156
+ uptime = now - start_time
157
+
158
+ unless @last_uptime_reads[id].nil?
159
+ last = @last_uptime_reads[id]
160
+ restarted = start_time != last
161
+ alert(
162
+ name, 'uptime',
163
+ restarted ? 'critical' : 'ok',
164
+ uptime,
165
+ "last 'StartedAt' measure was #{last} (#{Time.at(last).utc}), " \
166
+ "now it's #{start_time} (#{Time.at(start_time).utc})",
167
+ )
168
+ end
169
+
170
+ @last_uptime_reads[id] = start_time
171
+ end
172
+
173
+ def tick
174
+ # Disk is the same in every container
175
+ disk if @disk_enabled
176
+
177
+ # Get CPU, Memory and Load of each container
178
+ threads = []
179
+
180
+ containers.each do |ctr|
181
+ threads << Thread.new(ctr) do |container|
182
+ id = container.id
183
+ name = get_container_name(container)
184
+
185
+ stats = Docker::Util.parse_json(container.connection.get("/containers/#{id}/stats", { stream: false }))
186
+
187
+ if @basic_inspection_enabled
188
+ inspection = Docker::Util.parse_json(container.connection.get("/containers/#{id}/json"))
189
+ basic_inspection(id, name, inspection)
190
+ end
191
+ cpu(id, name, stats) if @cpu_enabled
192
+ memory(id, name, stats) if @memory_enabled
193
+ end
194
+ end
195
+
196
+ threads.each do |thread|
197
+ thread.join
198
+ rescue StandardError => e
199
+ warn "#{e.class} #{e}\n#{e.backtrace.join "\n"}"
200
+ end
201
+ end
202
+ end
203
+ end
204
+ end
205
+
206
+ Riemann::Tools::DockerHealth.run
@@ -0,0 +1,21 @@
1
+ The MIT License
2
+
3
+ Copyright (c) 2011 Kyle Kingsbury
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
@@ -0,0 +1,10 @@
1
+ # Riemann Elasticsearch
2
+
3
+ Gathers Elasticsearch metrics and sends them to Riemann.
4
+
5
+ # Getting started
6
+
7
+ ```
8
+ gem install riemann-elasticsearch
9
+ riemann-elasticsearch --help
10
+ ```
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rubygems'
4
+ require 'rubygems/package_task'
5
+ require 'rdoc/task'
6
+ require 'find'
7
+
8
+ # Don't include resource forks in tarballs on Mac OS X.
9
+ ENV['COPY_EXTENDED_ATTRIBUTES_DISABLE'] = 'true'
10
+ ENV['COPYFILE_DISABLE'] = 'true'
11
+
12
+ # Gemspec
13
+ gemspec = Gem::Specification.new do |s|
14
+ s.rubyforge_project = 'riemann-elasticsearch'
15
+
16
+ s.name = 'riemann-elasticsearch'
17
+ s.version = '0.2.4'
18
+ s.author = 'Gavin Sandie'
19
+ s.email = 'beach@vicecity.co.uk'
20
+ s.homepage = 'https://github.com/riemann/riemann-tools'
21
+ s.platform = Gem::Platform::RUBY
22
+ s.summary = 'Submits elasticsearch stats to riemann.'
23
+ s.license = 'MIT'
24
+
25
+ s.add_dependency 'riemann-tools', '>= 0.2.13'
26
+ s.add_dependency 'faraday', '>= 0.8.5'
27
+ s.add_dependency 'json'
28
+
29
+ s.files = FileList['bin/*', 'LICENSE', 'README.md'].to_a
30
+ s.executables |= Dir.entries('bin/')
31
+ s.has_rdoc = false
32
+
33
+ s.required_ruby_version = '>= 1.8.7'
34
+ end
35
+
36
+ Gem::PackageTask.new gemspec do |p|
37
+ end
@@ -0,0 +1,174 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ Process.setproctitle($PROGRAM_NAME)
5
+
6
+ require 'riemann/tools'
7
+
8
+ module Riemann
9
+ module Tools
10
+ class Elasticsearch
11
+ include Riemann::Tools
12
+ require 'faraday'
13
+ require 'json'
14
+ require 'uri'
15
+
16
+ opt :read_timeout, 'Faraday read timeout', type: :int, default: 2
17
+ opt :open_timeout, 'Faraday open timeout', type: :int, default: 1
18
+ opt :path_prefix,
19
+ 'Elasticsearch path prefix for proxied installations e.g. "els" for target http://localhost/els/_cluster/health', default: '/'
20
+ opt :es_host, 'Elasticsearch host', default: 'localhost'
21
+ opt :es_port, 'Elasticsearch port', type: :int, default: 9200
22
+ opt :es_search_index, 'Elasticsearch index to fetch search statistics for', default: '_all'
23
+
24
+ # Handles HTTP connections and GET requests safely
25
+ def safe_get(uri)
26
+ # Handle connection timeouts
27
+ response = nil
28
+ begin
29
+ connection = Faraday.new(uri)
30
+ response = connection.get do |req|
31
+ req.options[:timeout] = options[:read_timeout]
32
+ req.options[:open_timeout] = options[:open_timeout]
33
+ end
34
+ rescue StandardError => e
35
+ report(
36
+ host: uri.host,
37
+ service: 'elasticsearch health',
38
+ state: 'critical',
39
+ description: "HTTP connection error: #{e.class} - #{e.message}",
40
+ )
41
+ end
42
+ response
43
+ end
44
+
45
+ def make_es_url(path)
46
+ path_prefix = options[:path_prefix]
47
+ path_prefix[0] = '' if path_prefix[0] == '/'
48
+ path_prefix[path_prefix.length - 1] = '' if path_prefix[path_prefix.length - 1] == '/'
49
+ "http://#{options[:es_host]}:#{options[:es_port]}#{path_prefix.length.positive? ? '/' : ''}#{path_prefix}/#{path}"
50
+ end
51
+
52
+ def health_url
53
+ make_es_url('_cluster/health')
54
+ end
55
+
56
+ def indices_url
57
+ make_es_url('_stats/store')
58
+ end
59
+
60
+ def search_url
61
+ es_search_index = options[:es_search_index]
62
+ make_es_url("#{es_search_index}/_stats/search")
63
+ end
64
+
65
+ def bad?(response, uri)
66
+ if response.success?
67
+ false
68
+ else
69
+ report(
70
+ host: uri.host,
71
+ service: 'elasticsearch health',
72
+ state: 'critical',
73
+ description: response.nil? ? 'HTTP response is empty!' : "HTTP connection error: #{response.status} - #{response.body}",
74
+ )
75
+ end
76
+ end
77
+
78
+ def tick_indices
79
+ uri = URI(indices_url)
80
+ response = safe_get(uri)
81
+
82
+ return if bad?(response, uri)
83
+
84
+ # Assuming that a 200 will give json
85
+ json = JSON.parse(response.body)
86
+
87
+ json['indices'].each_pair do |k, v|
88
+ report(
89
+ host: uri.host,
90
+ service: "elasticsearch index/#{k}/primaries/size_in_bytes",
91
+ metric: v['primaries']['store']['size_in_bytes'],
92
+ )
93
+ report(
94
+ host: uri.host,
95
+ service: "elasticsearch index/#{k}/total/size_in_bytes",
96
+ metric: v['total']['store']['size_in_bytes'],
97
+ )
98
+ end
99
+ end
100
+
101
+ def tick_search
102
+ uri = URI(search_url)
103
+ response = safe_get(uri)
104
+
105
+ return if bad?(response, uri)
106
+
107
+ es_search_index = options[:es_search_index]
108
+ # Assuming that a 200 will give json
109
+ json = JSON.parse(response.body)
110
+
111
+ json['_all'].each_pair do |_type, data|
112
+ query = data['search']['query_time_in_millis'].to_f / data['search']['query_total']
113
+ fetch = data['search']['fetch_time_in_millis'].to_f / data['search']['fetch_total']
114
+
115
+ report(
116
+ host: uri.host,
117
+ service: "elasticsearch search/#{es_search_index}/query",
118
+ metric: query,
119
+ )
120
+ report(
121
+ host: uri.host,
122
+ service: "elasticsearch search/#{es_search_index}/fetch",
123
+ metric: fetch,
124
+ )
125
+ end
126
+ end
127
+
128
+ def tick
129
+ begin
130
+ tick_indices
131
+ tick_search
132
+ rescue StandardError => e
133
+ report(
134
+ host: options[:es_host],
135
+ service: 'elasticsearch error',
136
+ state: 'critical',
137
+ description: "Elasticsearch cluster error: #{e.message}",
138
+ )
139
+ end
140
+ uri = URI(health_url)
141
+ response = safe_get(uri)
142
+
143
+ return if bad?(response, uri)
144
+
145
+ # Assuming that a 200 will give json
146
+ json = JSON.parse(response.body)
147
+ cluster_name = json.delete('cluster_name')
148
+ cluster_status = json.delete('status')
149
+ state = {
150
+ 'green' => 'ok',
151
+ 'yellow' => 'warning',
152
+ 'red' => 'critical',
153
+ }[cluster_status]
154
+
155
+ report(
156
+ host: uri.host,
157
+ service: 'elasticsearch health',
158
+ state: state,
159
+ description: "Elasticsearch cluster: #{cluster_name} - #{cluster_status}",
160
+ )
161
+
162
+ json.each_pair do |k, v|
163
+ report(
164
+ host: uri.host,
165
+ service: "elasticsearch #{k}",
166
+ metric: v,
167
+ description: "Elasticsearch cluster #{k}",
168
+ )
169
+ end
170
+ end
171
+ end
172
+ end
173
+ end
174
+ Riemann::Tools::Elasticsearch.run
@@ -0,0 +1,21 @@
1
+ The MIT License
2
+
3
+ Copyright (c) 2011 Kyle Kingsbury
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
@@ -0,0 +1,10 @@
1
+ # Riemann Marathon
2
+
3
+ Gathers Marathon metrics and sends them to Riemann.
4
+
5
+ # Getting started
6
+
7
+ ```
8
+ gem install riemann-marathon
9
+ riemann-marathon --help
10
+ ```
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rubygems'
4
+ require 'rubygems/package_task'
5
+ require 'rdoc/task'
6
+ require 'find'
7
+
8
+ # Don't include resource forks in tarballs on Mac OS X.
9
+ ENV['COPY_EXTENDED_ATTRIBUTES_DISABLE'] = 'true'
10
+ ENV['COPYFILE_DISABLE'] = 'true'
11
+
12
+ # Gemspec
13
+ gemspec = Gem::Specification.new do |s|
14
+ s.rubyforge_project = 'riemann-marathon'
15
+
16
+ s.name = 'riemann-marathon'
17
+ s.version = '0.1.3'
18
+ s.author = 'Giulio Eulisse'
19
+ s.email = 'giulio.eulisse@cern.ch'
20
+ s.homepage = 'https://github.com/riemann/riemann-tools'
21
+ s.platform = Gem::Platform::RUBY
22
+ s.summary = 'Submits Marathon stats to riemann.'
23
+ s.license = 'MIT'
24
+
25
+ s.add_dependency 'riemann-tools', '>= 0.2.13'
26
+ s.add_dependency 'faraday', '>= 0.8.5'
27
+ s.add_dependency 'json'
28
+
29
+ s.files = FileList['bin/*', 'LICENSE', 'README.md'].to_a
30
+ s.executables |= Dir.entries('bin/')
31
+ s.has_rdoc = false
32
+
33
+ s.required_ruby_version = '>= 1.8.7'
34
+ end
35
+
36
+ Gem::PackageTask.new gemspec do |p|
37
+ end
@@ -0,0 +1,163 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ Process.setproctitle($PROGRAM_NAME)
5
+
6
+ require 'riemann/tools'
7
+
8
+ module Riemann
9
+ module Tools
10
+ class Marathon
11
+ include Riemann::Tools
12
+
13
+ require 'faraday'
14
+ require 'json'
15
+ require 'uri'
16
+
17
+ opt :read_timeout, 'Faraday read timeout', type: :int, default: 2
18
+ opt :open_timeout, 'Faraday open timeout', type: :int, default: 1
19
+ opt :path_prefix,
20
+ 'Marathon path prefix for proxied installations e.g. "marathon" for target http://localhost/marathon/metrics', default: '/'
21
+ opt :marathon_host, 'Marathon host', default: 'localhost'
22
+ opt :marathon_port, 'Marathon port', type: :int, default: 8080
23
+
24
+ def initialize
25
+ options[:interval] = 60
26
+ options[:ttl] = 120
27
+ end
28
+
29
+ # Handles HTTP connections and GET requests safely
30
+ def safe_get(uri)
31
+ # Handle connection timeouts
32
+ response = nil
33
+ begin
34
+ connection = Faraday.new(uri)
35
+ response = connection.get do |req|
36
+ req.options[:timeout] = options[:read_timeout]
37
+ req.options[:open_timeout] = options[:open_timeout]
38
+ end
39
+ rescue StandardError => e
40
+ report(
41
+ host: uri.host,
42
+ service: 'marathon health',
43
+ state: 'critical',
44
+ description: "HTTP connection error: #{e.class} - #{e.message}",
45
+ )
46
+ end
47
+ response
48
+ end
49
+
50
+ def health_url
51
+ path_prefix = options[:path_prefix]
52
+ path_prefix[0] = '' if path_prefix[0] == '/'
53
+ path_prefix[path_prefix.length - 1] = '' if path_prefix[path_prefix.length - 1] == '/'
54
+ "http://#{options[:marathon_host]}:#{options[:marathon_port]}#{path_prefix.length.positive? ? '/' : ''}#{path_prefix}/metrics"
55
+ end
56
+
57
+ def apps_url
58
+ path_prefix = options[:path_prefix]
59
+ path_prefix[0] = '' if path_prefix[0] == '/'
60
+ path_prefix[path_prefix.length - 1] = '' if path_prefix[path_prefix.length - 1] == '/'
61
+ "http://#{options[:marathon_host]}:#{options[:marathon_port]}#{path_prefix.length.positive? ? '/' : ''}#{path_prefix}/v2/apps"
62
+ end
63
+
64
+ def tick
65
+ tick_health
66
+ tick_apps
67
+ end
68
+
69
+ def tick_health
70
+ uri = URI(health_url)
71
+ response = safe_get(uri)
72
+
73
+ return if response.nil?
74
+
75
+ if response.status != 200
76
+ report(
77
+ host: uri.host,
78
+ service: 'marathon health',
79
+ state: 'critical',
80
+ description: "HTTP connection error: #{response.status} - #{response.body}",
81
+ )
82
+ else
83
+ # Assuming that a 200 will give json
84
+ json = JSON.parse(response.body)
85
+ state = 'ok'
86
+
87
+ report(
88
+ host: uri.host,
89
+ service: 'marathon health',
90
+ state: state,
91
+ )
92
+
93
+ json.each_pair do |t, d|
94
+ next unless d.respond_to? :each_pair
95
+
96
+ d.each_pair do |service, counters|
97
+ report(
98
+ host: uri.host,
99
+ service: "marathon_metric #{t} #{service}",
100
+ metric: 1,
101
+ tags: ['metric_name'],
102
+ ttl: 600,
103
+ )
104
+ next unless counters.respond_to? :each_pair
105
+
106
+ counters.each_pair do |k, v|
107
+ next unless v.is_a? Numeric
108
+
109
+ report(
110
+ host: uri.host,
111
+ service: "marathon #{service} #{k}",
112
+ metric: v,
113
+ tags: ['metric', t.to_s],
114
+ ttl: 600,
115
+ )
116
+ end
117
+ end
118
+ end
119
+ end
120
+ end
121
+
122
+ def tick_apps
123
+ uri = URI(apps_url)
124
+ response = safe_get(uri)
125
+
126
+ return if response.nil?
127
+
128
+ if response.status != 200
129
+ report(
130
+ host: uri.host,
131
+ service: 'marathon health',
132
+ state: 'critical',
133
+ description: "HTTP connection error: #{response.status} - #{response.body}",
134
+ )
135
+ else
136
+ # Assuming that a 200 will give json
137
+ json = JSON.parse(response.body)
138
+ state = 'ok'
139
+
140
+ report(
141
+ host: uri.host,
142
+ service: 'marathon health',
143
+ state: state,
144
+ )
145
+
146
+ json['apps'].each do |app|
147
+ app.each_pair do |k, v|
148
+ next unless v.is_a? Numeric
149
+
150
+ report(
151
+ host: uri.host,
152
+ service: "marathon apps#{app['id']}/#{k}",
153
+ metric: v,
154
+ ttl: 120,
155
+ )
156
+ end
157
+ end
158
+ end
159
+ end
160
+ end
161
+ end
162
+ end
163
+ Riemann::Tools::Marathon.run