fluent-plugin-k8s-metrics-agg 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.circleci/build_and_push.sh +10 -0
- data/.circleci/build_and_push_to_dockerhub.sh +11 -0
- data/.circleci/build_and_push_to_github_release.sh +11 -0
- data/.circleci/config.yml +105 -0
- data/.circleci/install_dep.sh +5 -0
- data/.circleci/push_gem.sh +7 -0
- data/.gitignore +5 -0
- data/CONTRIBUTING.md +11 -0
- data/CONTRIBUTORS.md +13 -0
- data/Gemfile +3 -0
- data/Gemfile.lock +101 -0
- data/LICENSE +269 -0
- data/README.md +94 -0
- data/Rakefile +29 -0
- data/VERSION +1 -0
- data/docker/CONTRIBUTING.md +20 -0
- data/docker/Dockerfile +52 -0
- data/docker/LICENSE +201 -0
- data/docker/README.md +1 -0
- data/docker/entrypoint.sh +27 -0
- data/fluent-plugin-k8s-metrics-agg.gemspec +33 -0
- data/lib/fluent/plugin/in_kubernetes_metrics_aggregator.rb +604 -0
- data/test/api.json +8 -0
- data/test/helper.rb +178 -0
- data/test/node1.json +667 -0
- data/test/node2.json +692 -0
- data/test/node3.json +629 -0
- data/test/nodes.json +814 -0
- data/test/plugin/test_in_kubernetes_metrics_aggregator.rb +218 -0
- data/test/pods.json +3502 -0
- data/test/v1.json +488 -0
- metadata +210 -0
@@ -0,0 +1,604 @@
|
|
1
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
2
|
+
# you may not use this file except in compliance with the License.
|
3
|
+
# You may obtain a copy of the License at
|
4
|
+
#
|
5
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
6
|
+
#
|
7
|
+
# Unless required by applicable law or agreed to in writing, software
|
8
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
9
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
10
|
+
# See the License for the specific language governing permissions and
|
11
|
+
# limitations under the License.
|
12
|
+
|
13
|
+
require 'time'
|
14
|
+
|
15
|
+
require 'fluent/plugin/input'
|
16
|
+
require 'kubeclient'
|
17
|
+
require 'multi_json'
|
18
|
+
module Fluent
|
19
|
+
module Plugin
|
20
|
+
class KubernetesMetricsAggregatorInput < Fluent::Plugin::Input
|
21
|
+
@@namespace_usage_metrics_map = {}
|
22
|
+
@@node_requests_limits_metrics_map = {}
|
23
|
+
|
24
|
+
@@namespace_resource_usage_metrics_map = {}
|
25
|
+
@@node_resource_usage_metrics_map = {}
|
26
|
+
|
27
|
+
class UsageMetricsUnit
|
28
|
+
def initialize
|
29
|
+
@cpu_limit = 0
|
30
|
+
@cpu_request = 0
|
31
|
+
@memory_limit = 0
|
32
|
+
@memory_request = 0
|
33
|
+
end
|
34
|
+
|
35
|
+
def add_usage_metrics(cpu_limit, cpu_request, memory_limit, memory_request)
|
36
|
+
cpu = get_cpu_or_memory_value(cpu_limit)
|
37
|
+
mult = get_cpu_mult(cpu_limit)
|
38
|
+
@cpu_limit += cpu * mult
|
39
|
+
cpu = get_cpu_or_memory_value(cpu_request)
|
40
|
+
mult = get_cpu_mult(cpu_request)
|
41
|
+
@cpu_request += cpu * mult
|
42
|
+
memory = get_cpu_or_memory_value(memory_limit)
|
43
|
+
mult = get_memory_mult(memory_limit)
|
44
|
+
@memory_limit += memory * mult
|
45
|
+
memory = get_cpu_or_memory_value(memory_request)
|
46
|
+
mult = get_memory_mult(memory_request)
|
47
|
+
@memory_request += memory * mult
|
48
|
+
end
|
49
|
+
|
50
|
+
def get_cpu_value_and_multiplier(cpu)
|
51
|
+
# m cpu is assumed standard
|
52
|
+
@cpu_mult = 1
|
53
|
+
@cpu_mult = 1000 if cpu[-1] != 'm'
|
54
|
+
cpu.delete('^0-9').to_i
|
55
|
+
end
|
56
|
+
|
57
|
+
def get_cpu_or_memory_value(resource)
|
58
|
+
resource = resource.tr('^0-9', '').to_i
|
59
|
+
resource
|
60
|
+
end
|
61
|
+
|
62
|
+
def get_cpu_mult(cpu)
|
63
|
+
cpu_mult = 1
|
64
|
+
cpu_mult = 1000 if cpu[-1] != 'm'
|
65
|
+
cpu_mult
|
66
|
+
end
|
67
|
+
|
68
|
+
# https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/#meaning-of-memory
|
69
|
+
def get_memory_mult(memory)
|
70
|
+
memory_mult = if memory[-2] == 'Ki'
|
71
|
+
0.001
|
72
|
+
elsif memory[-2] == 'K'
|
73
|
+
1.0 / 1024
|
74
|
+
elsif memory[-2] == 'Mi'
|
75
|
+
1
|
76
|
+
elsif memory[-2] == 'M'
|
77
|
+
1
|
78
|
+
elsif memory[-2] == 'Gi'
|
79
|
+
1000
|
80
|
+
elsif memory[-2] == 'G'
|
81
|
+
1024
|
82
|
+
elsif memory[-2] == 'Ti'
|
83
|
+
1_000_000
|
84
|
+
elsif memory[-2] == 'T'
|
85
|
+
1_048_576
|
86
|
+
elsif memory[-2] == 'Ei'
|
87
|
+
1_000_000_000
|
88
|
+
elsif memory[-2] == 'E'
|
89
|
+
1_073_741_824
|
90
|
+
else
|
91
|
+
0.000001
|
92
|
+
end
|
93
|
+
memory_mult
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
class ResourceUsageMetricsUnit
|
98
|
+
def initialize
|
99
|
+
@cpu_usage = 0
|
100
|
+
@memory_usage = 0
|
101
|
+
end
|
102
|
+
|
103
|
+
def add_resource_usage_metrics(cpu_usage, memory_usage)
|
104
|
+
@cpu_usage += cpu_usage
|
105
|
+
@memory_usage += memory_usage
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
Fluent::Plugin.register_input('kubernetes_metrics_aggregator', self)
|
110
|
+
|
111
|
+
helpers :timer
|
112
|
+
|
113
|
+
desc 'URL of the kubernetes API server.'
|
114
|
+
config_param :kubernetes_url, :string, default: nil
|
115
|
+
|
116
|
+
desc 'The port that kubelet is listening to.'
|
117
|
+
config_param :kubelet_port, :integer, default: 10_250
|
118
|
+
|
119
|
+
desc 'The tag of the event.'
|
120
|
+
config_param :tag, :string, default: 'kubernetes.metrics.*'
|
121
|
+
|
122
|
+
desc 'How often it pulls metrics.'
|
123
|
+
config_param :interval, :time, default: "15s"
|
124
|
+
|
125
|
+
desc 'Path to a kubeconfig file points to a cluster the plugin should collect metrics from. Mostly useful when running fluentd outside of the cluster. When `kubeconfig` is set, `kubernetes_url`, `client_cert`, `client_key`, `ca_file`, `insecure_ssl`, `bearer_token_file`, and `secret_dir` will all be ignored.'
|
126
|
+
config_param :kubeconfig, :string, default: nil
|
127
|
+
|
128
|
+
desc 'Path to the certificate file for this client.'
|
129
|
+
config_param :client_cert, :string, default: nil
|
130
|
+
|
131
|
+
desc 'Path to the private key file for this client.'
|
132
|
+
config_param :client_key, :string, default: nil
|
133
|
+
|
134
|
+
desc 'Path to the CA file.'
|
135
|
+
config_param :ca_file, :string, default: nil
|
136
|
+
|
137
|
+
desc "If `insecure_ssl` is set to `true`, it won't verify apiserver's certificate."
|
138
|
+
config_param :insecure_ssl, :bool, default: false
|
139
|
+
|
140
|
+
desc 'Path to the file contains the API token. By default it reads from the file "token" in the `secret_dir`.'
|
141
|
+
config_param :bearer_token_file, :string, default: nil
|
142
|
+
|
143
|
+
desc "Path of the location where pod's service account's credentials are stored."
|
144
|
+
config_param :secret_dir, :string, default: '/var/run/secrets/kubernetes.io/serviceaccount'
|
145
|
+
|
146
|
+
desc 'The name of the cluster, where the plugin is deployed.'
|
147
|
+
config_param :cluster_name, :string, default: 'cluster_name'
|
148
|
+
|
149
|
+
def configure(conf)
|
150
|
+
super
|
151
|
+
@mutex_node_req_lim = Mutex.new
|
152
|
+
@mutex_node_res_usage = Mutex.new
|
153
|
+
parse_tag
|
154
|
+
initialize_client
|
155
|
+
end
|
156
|
+
|
157
|
+
def start
|
158
|
+
super
|
159
|
+
|
160
|
+
timer_execute :limits_request_scraper, @interval, &method(:scrape_limits_requests_metrics)
|
161
|
+
timer_execute :node_scraper, @interval, &method(:scrape_node_metrics)
|
162
|
+
timer_execute :resource_usage_scraper, @interval, &method(:scrape_resource_usage_metrics)
|
163
|
+
|
164
|
+
end
|
165
|
+
|
166
|
+
def close
|
167
|
+
@watchers.each &:finish if @watchers
|
168
|
+
|
169
|
+
super
|
170
|
+
end
|
171
|
+
|
172
|
+
private
|
173
|
+
|
174
|
+
def parse_tag
|
175
|
+
@tag_prefix, @tag_suffix = @tag.split('*') if @tag.include?('*')
|
176
|
+
end
|
177
|
+
|
178
|
+
def generate_tag(item_name)
|
179
|
+
return @tag unless @tag_prefix
|
180
|
+
|
181
|
+
[@tag_prefix, item_name, @tag_suffix].join
|
182
|
+
end
|
183
|
+
|
184
|
+
def init_with_kubeconfig(options = {})
|
185
|
+
config = Kubeclient::Config.read @kubeconfig
|
186
|
+
current_context = config.context
|
187
|
+
|
188
|
+
@client = Kubeclient::Client.new(
|
189
|
+
current_context.api_endpoint,
|
190
|
+
current_context.api_version,
|
191
|
+
options.merge(
|
192
|
+
ssl_options: current_context.ssl_options,
|
193
|
+
auth_options: current_context.auth_options
|
194
|
+
)
|
195
|
+
)
|
196
|
+
end
|
197
|
+
|
198
|
+
def init_without_kubeconfig(_options = {})
|
199
|
+
kubernetes_url_final = nil
|
200
|
+
# mostly borrowed from Fluentd Kubernetes Metadata Filter Plugin
|
201
|
+
if @kubernetes_url.nil?
|
202
|
+
# Use Kubernetes default service account if we're in a pod.
|
203
|
+
env_host = ENV['KUBERNETES_SERVICE_HOST']
|
204
|
+
env_port = ENV['KUBERNETES_SERVICE_PORT']
|
205
|
+
if env_host && env_port
|
206
|
+
kubernetes_url_final = "https://#{env_host}:#{env_port}/api/"
|
207
|
+
end
|
208
|
+
else
|
209
|
+
kubernetes_url_final = "https://#{@kubernetes_url}:#{@kubelet_port}/api/"
|
210
|
+
end
|
211
|
+
|
212
|
+
raise Fluent::ConfigError, 'kubernetes url is not set in configuration, or environment variables' unless kubernetes_url_final
|
213
|
+
|
214
|
+
# Use SSL certificate and bearer token from Kubernetes service account.
|
215
|
+
if Dir.exist?(@secret_dir)
|
216
|
+
secret_ca_file = File.join(@secret_dir, 'ca.crt')
|
217
|
+
secret_token_file = File.join(@secret_dir, 'token')
|
218
|
+
|
219
|
+
if @ca_file.nil? && File.exist?(secret_ca_file)
|
220
|
+
@ca_file = secret_ca_file
|
221
|
+
end
|
222
|
+
|
223
|
+
if @bearer_token_file.nil? && File.exist?(secret_token_file)
|
224
|
+
@bearer_token_file = secret_token_file
|
225
|
+
end
|
226
|
+
end
|
227
|
+
|
228
|
+
ssl_options = {
|
229
|
+
client_cert: @client_cert && OpenSSL::X509::Certificate.new(File.read(@client_cert)),
|
230
|
+
client_key: @client_key && OpenSSL::PKey::RSA.new(File.read(@client_key)),
|
231
|
+
ca_file: @ca_file,
|
232
|
+
verify_ssl: @insecure_ssl ? OpenSSL::SSL::VERIFY_NONE : OpenSSL::SSL::VERIFY_PEER
|
233
|
+
}
|
234
|
+
|
235
|
+
auth_options = {}
|
236
|
+
auth_options[:bearer_token] = File.read(@bearer_token_file) if @bearer_token_file
|
237
|
+
|
238
|
+
@client = Kubeclient::Client.new(
|
239
|
+
kubernetes_url_final, 'v1',
|
240
|
+
ssl_options: ssl_options,
|
241
|
+
auth_options: auth_options
|
242
|
+
)
|
243
|
+
|
244
|
+
begin
|
245
|
+
@client.api_valid?
|
246
|
+
rescue KubeException => kube_error
|
247
|
+
raise Fluent::ConfigError, "Invalid Kubernetes API #{@api_version} endpoint #{kubernetes_url_final}: #{kube_error.message}"
|
248
|
+
end
|
249
|
+
end
|
250
|
+
|
251
|
+
def initialize_client
|
252
|
+
options = {
|
253
|
+
timeouts: {
|
254
|
+
open: 10,
|
255
|
+
read: nil
|
256
|
+
}
|
257
|
+
}
|
258
|
+
|
259
|
+
if @kubeconfig.nil?
|
260
|
+
init_without_kubeconfig options
|
261
|
+
else
|
262
|
+
init_with_kubeconfig options
|
263
|
+
end
|
264
|
+
end
|
265
|
+
|
266
|
+
def parse_time(metric_time)
|
267
|
+
Fluent::EventTime.from_time Time.iso8601(metric_time)
|
268
|
+
end
|
269
|
+
|
270
|
+
def underscore(camlcase)
|
271
|
+
camlcase.gsub(/[A-Z]/) { |c| "_#{c.downcase}" }
|
272
|
+
end
|
273
|
+
|
274
|
+
def get_cpu_mult(cpu)
|
275
|
+
cpu_mult = 1
|
276
|
+
cpu_mult = 1000 if cpu[-1] != 'm'
|
277
|
+
cpu_mult
|
278
|
+
end
|
279
|
+
|
280
|
+
def get_cpu_value(resource)
|
281
|
+
cpu_val = resource.tr('^0-9', '').to_i
|
282
|
+
mult = get_cpu_mult(resource)
|
283
|
+
cpu_val += cpu_val * mult
|
284
|
+
cpu_val
|
285
|
+
end
|
286
|
+
|
287
|
+
def get_memory_mult(memory)
|
288
|
+
memory_mult = if memory[-2] == 'Ki'
|
289
|
+
0.001
|
290
|
+
elsif memory[-2] == 'K'
|
291
|
+
1.0 / 1024
|
292
|
+
elsif memory[-2] == 'Mi'
|
293
|
+
1
|
294
|
+
elsif memory[-2] == 'M'
|
295
|
+
1
|
296
|
+
elsif memory[-2] == 'Gi'
|
297
|
+
1000
|
298
|
+
elsif memory[-2] == 'G'
|
299
|
+
1024
|
300
|
+
elsif memory[-2] == 'Ti'
|
301
|
+
1_000_000
|
302
|
+
elsif memory[-2] == 'T'
|
303
|
+
1_048_576 # 1024*1024
|
304
|
+
elsif memory[-2] == 'Ei'
|
305
|
+
1_000_000_000
|
306
|
+
elsif memory[-2] == 'E'
|
307
|
+
1_073_741_824 # 1024*1024*1024
|
308
|
+
else
|
309
|
+
0.000001
|
310
|
+
end
|
311
|
+
memory_mult
|
312
|
+
end
|
313
|
+
|
314
|
+
def get_memory_value(resource)
|
315
|
+
mem_val = resource.tr('^0-9', '').to_i
|
316
|
+
mult = get_memory_mult(resource)
|
317
|
+
mem_val += mem_val * mult
|
318
|
+
mem_val
|
319
|
+
end
|
320
|
+
|
321
|
+
def emit_limits_requests_metrics(tag,
|
322
|
+
scraped_at,
|
323
|
+
labels,
|
324
|
+
limits_requests_metric)
|
325
|
+
router.emit tag + '.cpu.limit',
|
326
|
+
Fluent::EventTime.from_time(scraped_at),
|
327
|
+
labels.merge(
|
328
|
+
'value' => limits_requests_metric.instance_variable_get(:@cpu_limit)
|
329
|
+
)
|
330
|
+
router.emit tag + '.cpu.request',
|
331
|
+
Fluent::EventTime.from_time(scraped_at),
|
332
|
+
labels.merge('value' => limits_requests_metric.instance_variable_get(:@cpu_request))
|
333
|
+
router.emit tag + '.memory.limit',
|
334
|
+
Fluent::EventTime.from_time(scraped_at),
|
335
|
+
labels.merge('value' => limits_requests_metric.instance_variable_get(:@memory_limit))
|
336
|
+
router.emit tag + '.memory.request',
|
337
|
+
Fluent::EventTime.from_time(scraped_at),
|
338
|
+
labels.merge('value' => limits_requests_metric.instance_variable_get(:@memory_request))
|
339
|
+
end
|
340
|
+
|
341
|
+
def emit_resource_usage_metrics(tag,
|
342
|
+
scraped_at,
|
343
|
+
labels,
|
344
|
+
resource_usage_metric)
|
345
|
+
router.emit tag + '.cpu.usage',
|
346
|
+
Fluent::EventTime.from_time(scraped_at),
|
347
|
+
labels.merge('value' => resource_usage_metric.instance_variable_get(:@cpu_usage))
|
348
|
+
router.emit tag + '.memory.usage',
|
349
|
+
Fluent::EventTime.from_time(scraped_at),
|
350
|
+
labels.merge('value' => resource_usage_metric.instance_variable_get(:@memory_usage))
|
351
|
+
end
|
352
|
+
|
353
|
+
def limits_requests_api
|
354
|
+
@limits_requests_api =
|
355
|
+
begin
|
356
|
+
@client.discover unless @client.discovered
|
357
|
+
@client.rest_client['/pods'].tap do |endpoint|
|
358
|
+
log.info("Use URL #{endpoint.url} for scraping limits requests metrics")
|
359
|
+
end
|
360
|
+
end
|
361
|
+
end
|
362
|
+
|
363
|
+
def scrape_limits_requests_metrics
|
364
|
+
response = limits_requests_api.get(@client.headers)
|
365
|
+
handle_limits_requests_res(response)
|
366
|
+
end
|
367
|
+
|
368
|
+
# This method is used to handle responses from the kube apiserver api
|
369
|
+
def handle_limits_requests_res(response)
|
370
|
+
# Checking response codes only for a successful GET request viz., 2XX codes
|
371
|
+
if (response.code < 300) && (response.code > 199)
|
372
|
+
@scraped_at = Time.now
|
373
|
+
process_limits_requests_res MultiJson.load(response.body)
|
374
|
+
else
|
375
|
+
log.error "ExMultiJson.load(response.body) expected 2xx from summary API, but got #{response.code}. Response body = #{response.body}"
|
376
|
+
end
|
377
|
+
rescue StandardError => e
|
378
|
+
log.error "Failed to scrape metrics, error=#{$ERROR_INFO}, #{e.inspect}"
|
379
|
+
log.error_backtrace
|
380
|
+
end
|
381
|
+
|
382
|
+
def process_limits_requests_res(response)
|
383
|
+
@scraped_at = Time.now
|
384
|
+
@mutex_node_req_lim.synchronize do
|
385
|
+
Array(response['items']).each do |pod_json|
|
386
|
+
pod_namespace = pod_json['metadata']['namespace']
|
387
|
+
pod_node_name = pod_json['spec']['nodeName']
|
388
|
+
if @@namespace_usage_metrics_map[pod_namespace].nil?
|
389
|
+
namespace_usage_metrics = UsageMetricsUnit.new
|
390
|
+
@@namespace_usage_metrics_map[pod_namespace] = namespace_usage_metrics
|
391
|
+
end
|
392
|
+
|
393
|
+
pod_containers = pod_json['spec']['containers']
|
394
|
+
pod_usage_metrics = UsageMetricsUnit.new
|
395
|
+
Array(pod_containers).each do |container_json|
|
396
|
+
container_usage_metrics = UsageMetricsUnit.new
|
397
|
+
cpu_limit = '0'
|
398
|
+
memory_limit = '0'
|
399
|
+
cpu_request = '0'
|
400
|
+
memory_request = '0'
|
401
|
+
unless container_json['resources']['limits'].nil?
|
402
|
+
unless container_json['resources']['limits']['cpu'].nil?
|
403
|
+
cpu_limit = container_json['resources']['limits']['cpu']
|
404
|
+
end
|
405
|
+
unless container_json['resources']['limits']['memory'].nil?
|
406
|
+
memory_limit = container_json['resources']['limits']['memory']
|
407
|
+
end
|
408
|
+
end
|
409
|
+
unless container_json['resources']['requests'].nil?
|
410
|
+
unless container_json['resources']['requests']['cpu'].nil?
|
411
|
+
cpu_request = container_json['resources']['requests']['cpu']
|
412
|
+
end
|
413
|
+
unless container_json['resources']['requests']['memory'].nil?
|
414
|
+
memory_request = container_json['resources']['requests']['memory']
|
415
|
+
end
|
416
|
+
end
|
417
|
+
container_usage_metrics.add_usage_metrics(cpu_limit, cpu_request, memory_limit, memory_request)
|
418
|
+
container_labels = { 'name' => container_json['name'], 'image' => container_json['image'], 'node' => pod_json['spec']['nodeName'] }
|
419
|
+
emit_limits_requests_metrics(generate_tag('container'), @scraped_at, container_labels, container_usage_metrics)
|
420
|
+
pod_usage_metrics.add_usage_metrics(cpu_limit, cpu_request, memory_limit, memory_request)
|
421
|
+
end
|
422
|
+
|
423
|
+
pod_labels = { 'name' => pod_json['metadata']['name'], 'namespace' => pod_json['metadata']['name'], 'node' => pod_json['spec']['nodeName'] }
|
424
|
+
emit_limits_requests_metrics(generate_tag('pod'), @scraped_at, pod_labels, pod_usage_metrics)
|
425
|
+
@@namespace_usage_metrics_map[pod_namespace].add_usage_metrics(pod_usage_metrics.instance_variable_get(:@cpu_limit).to_s + ('m'), pod_usage_metrics.instance_variable_get(:@cpu_request).to_s + ('m'),
|
426
|
+
pod_usage_metrics.instance_variable_get(:@memory_limit).to_s + ('Mi'), pod_usage_metrics.instance_variable_get(:@memory_request).to_s + ('Mi'))
|
427
|
+
|
428
|
+
if @@node_requests_limits_metrics_map[pod_node_name].nil?
|
429
|
+
node_name_usage_metrics = UsageMetricsUnit.new
|
430
|
+
@@node_requests_limits_metrics_map[pod_node_name] = node_name_usage_metrics
|
431
|
+
end
|
432
|
+
@@node_requests_limits_metrics_map[pod_node_name].add_usage_metrics(pod_usage_metrics.instance_variable_get(:@cpu_limit).to_s + ('m'), pod_usage_metrics.instance_variable_get(:@cpu_request).to_s + ('m'),
|
433
|
+
pod_usage_metrics.instance_variable_get(:@memory_limit).to_s + ('Mi'), pod_usage_metrics.instance_variable_get(:@memory_request).to_s + ('Mi'))
|
434
|
+
pod_usage_metrics = nil
|
435
|
+
end
|
436
|
+
end
|
437
|
+
cluster_usage_metrics = UsageMetricsUnit.new
|
438
|
+
@@namespace_usage_metrics_map.each do |key, value|
|
439
|
+
cluster_usage_metrics.add_usage_metrics(value.instance_variable_get(:@cpu_limit).to_s + ('m'), value.instance_variable_get(:@cpu_request).to_s + ('m'),
|
440
|
+
value.instance_variable_get(:@memory_limit).to_s + ('Mi'), value.instance_variable_get(:@memory_request).to_s + ('Mi'))
|
441
|
+
emit_limits_requests_metrics(generate_tag('namespace'), @scraped_at, { 'name' => key }, value)
|
442
|
+
value = nil
|
443
|
+
end
|
444
|
+
|
445
|
+
emit_limits_requests_metrics(generate_tag('cluster'), @scraped_at, { 'name' => @cluster_name }, cluster_usage_metrics)
|
446
|
+
cluster_usage_metrics = nil
|
447
|
+
|
448
|
+
@@namespace_usage_metrics_map = nil
|
449
|
+
@@namespace_usage_metrics_map = {}
|
450
|
+
end
|
451
|
+
|
452
|
+
def node_api
|
453
|
+
@node_api =
|
454
|
+
begin
|
455
|
+
@client.discover unless @client.discovered
|
456
|
+
@client.rest_client['/nodes'].tap do |endpoint|
|
457
|
+
log.info("Use URL #{endpoint.url} for scraping node metrics")
|
458
|
+
end
|
459
|
+
end
|
460
|
+
end
|
461
|
+
|
462
|
+
def scrape_node_metrics
|
463
|
+
response = node_api.get(@client.headers)
|
464
|
+
handle_node_response(response)
|
465
|
+
end
|
466
|
+
|
467
|
+
# This method is used to handle responses from the kubeapiserver api
|
468
|
+
def handle_node_response(response)
|
469
|
+
# Checking response codes only for a successful GET request viz., 2XX codes
|
470
|
+
if (response.code < 300) && (response.code > 199)
|
471
|
+
@scraped_node_at = Time.now
|
472
|
+
process_node_response MultiJson.load(response.body)
|
473
|
+
else
|
474
|
+
log.error "ExMultiJson.load(response.body) expected 2xx from summary API, but got #{response.code}. Response body = #{response.body}"
|
475
|
+
end
|
476
|
+
rescue StandardError => e
|
477
|
+
log.error "Failed to scrape metrics, error=#{$ERROR_INFO}, #{e.inspect}"
|
478
|
+
log.error_backtrace
|
479
|
+
end
|
480
|
+
|
481
|
+
def process_node_response(response)
|
482
|
+
Array(response['items']).each do |node_json|
|
483
|
+
node_name = node_json['metadata']['name']
|
484
|
+
node_cpu_capacity = get_cpu_value(node_json['status']['capacity']['cpu'])
|
485
|
+
router.emit generate_tag('node') << ('.cpu.capacity'), Fluent::EventTime.from_time(@scraped_node_at), 'node' => node_name, 'value' => node_cpu_capacity
|
486
|
+
node_cpu_allocatable = get_cpu_value(node_json['status']['allocatable']['cpu'])
|
487
|
+
router.emit generate_tag('node') << ('.cpu.allocatable'), Fluent::EventTime.from_time(@scraped_node_at), 'node' => node_name, 'value' => node_cpu_allocatable
|
488
|
+
node_memory_capacity = get_memory_value(node_json['status']['capacity']['memory'])
|
489
|
+
router.emit generate_tag('node') << ('.memory.capacity'), Fluent::EventTime.from_time(@scraped_node_at), 'node' => node_name, 'value' => node_memory_capacity
|
490
|
+
node_memory_allocatable = get_memory_value(node_json['status']['allocatable']['memory'])
|
491
|
+
router.emit generate_tag('node') << ('.memory.allocatable'), Fluent::EventTime.from_time(@scraped_node_at), 'node' => node_name, 'value' => node_memory_allocatable
|
492
|
+
|
493
|
+
node_req_lim = UsageMetricsUnit.new
|
494
|
+
node_res_usage = ResourceUsageMetricsUnit.new
|
495
|
+
@mutex_node_req_lim.synchronize do
|
496
|
+
next if @@node_requests_limits_metrics_map[node_name].nil?
|
497
|
+
|
498
|
+
node_req_lim = @@node_requests_limits_metrics_map[node_name]
|
499
|
+
end
|
500
|
+
@mutex_node_res_usage.synchronize do
|
501
|
+
next if @@node_resource_usage_metrics_map[node_name].nil?
|
502
|
+
|
503
|
+
node_res_usage = @@node_resource_usage_metrics_map[node_name]
|
504
|
+
end
|
505
|
+
# https://github.com/kubernetes/heapster/blob/c78cc312ab3901acfe5c2f95f7a621909c8455ad/metrics/processors/node_autoscaling_enricher.go#L62
|
506
|
+
node_cpu_utilization = node_res_usage.instance_variable_get(:@cpu_usage).to_f / 1_000_000 * node_cpu_allocatable # converting from nano cores to milli core
|
507
|
+
router.emit generate_tag('node') << ('.cpu.utilization'), Fluent::EventTime.from_time(@scraped_node_at), 'node' => node_name, 'value' => node_cpu_utilization
|
508
|
+
node_cpu_reservation = node_req_lim.instance_variable_get(:@cpu_request).to_f / node_cpu_allocatable
|
509
|
+
router.emit generate_tag('node') << ('.cpu.reservation'), Fluent::EventTime.from_time(@scraped_node_at), 'node' => node_name, 'value' => node_cpu_reservation
|
510
|
+
node_memory_utilization = node_res_usage.instance_variable_get(:@memory_usage).to_f / 1_000_000 * node_memory_allocatable # converting from bytes to megabytes
|
511
|
+
router.emit generate_tag('node') << ('.memory.utilization'), Fluent::EventTime.from_time(@scraped_node_at), 'node' => node_name, 'value' => node_memory_utilization
|
512
|
+
node_memory_reservation = node_req_lim.instance_variable_get(:@memory_request).to_f / node_memory_allocatable
|
513
|
+
router.emit generate_tag('node') << ('.memory.reservation'), Fluent::EventTime.from_time(@scraped_node_at), 'node' => node_name, 'value' => node_memory_reservation
|
514
|
+
@mutex_node_req_lim.synchronize do
|
515
|
+
@@node_requests_limits_metrics_map = nil
|
516
|
+
@@node_requests_limits_metrics_map = {}
|
517
|
+
end
|
518
|
+
@mutex_node_res_usage.synchronize do
|
519
|
+
@@node_resource_usage_metrics_map = nil
|
520
|
+
@@node_resource_usage_metrics_map = {}
|
521
|
+
end
|
522
|
+
end
|
523
|
+
end
|
524
|
+
|
525
|
+
def resource_usage_api
|
526
|
+
@resource_usage_api =
|
527
|
+
begin
|
528
|
+
@client.discover unless @client.discovered
|
529
|
+
@client.rest_client['/nodes'].tap do |endpoint|
|
530
|
+
log.info("Use URL #{endpoint.url} for scraping node metrics")
|
531
|
+
end
|
532
|
+
end
|
533
|
+
end
|
534
|
+
|
535
|
+
def scrape_resource_usage_metrics
|
536
|
+
response = resource_usage_api.get(@client.headers)
|
537
|
+
handle_resource_usage_response(response)
|
538
|
+
end
|
539
|
+
|
540
|
+
# This method is used to handle responses from the kubelet summary api
|
541
|
+
def handle_resource_usage_response(response)
|
542
|
+
# Checking response codes only for a successful GET request viz., 2XX codes
|
543
|
+
if (response.code < 300) && (response.code > 199)
|
544
|
+
@scraped_at = Time.now
|
545
|
+
process_resource_usage_res MultiJson.load(response.body)
|
546
|
+
else
|
547
|
+
log.error "ExMultiJson.load(response.body) expected 2xx from summary API, but got #{response.code}. Response body = #{response.body}"
|
548
|
+
end
|
549
|
+
rescue StandardError => e
|
550
|
+
log.error "Failed to scrape metrics, error=#{$ERROR_INFO}, #{e.inspect}"
|
551
|
+
log.error_backtrace
|
552
|
+
end
|
553
|
+
|
554
|
+
def process_resource_usage_res(response)
|
555
|
+
@scraped_node_at = Time.now
|
556
|
+
@mutex_node_res_usage.synchronize do
|
557
|
+
Array(response['items']).each do |node_json|
|
558
|
+
node_name = node_json['metadata']['name']
|
559
|
+
node_rest_client =
|
560
|
+
begin
|
561
|
+
@client.discover unless @client.discovered
|
562
|
+
@client.rest_client["/nodes/#{node_name}:#{@kubelet_port}/proxy/stats/summary"].tap do |endpoint|
|
563
|
+
log.info("Use URL #{endpoint.url} for scraping resource usage metrics")
|
564
|
+
end
|
565
|
+
end
|
566
|
+
|
567
|
+
node_response = JSON.parse(node_rest_client.get(@client.headers))
|
568
|
+
Array(node_response['pods']).each do |pod_json|
|
569
|
+
pod_cpu_usage = pod_json['cpu']['usageNanoCores']
|
570
|
+
pod_memory_usage = pod_json['memory']['usageBytes']
|
571
|
+
pod_namespace = pod_json['podRef']['namespace']
|
572
|
+
pod_usage = ResourceUsageMetricsUnit.new
|
573
|
+
pod_usage.add_resource_usage_metrics(pod_cpu_usage, pod_memory_usage)
|
574
|
+
if @@namespace_resource_usage_metrics_map[pod_namespace].nil?
|
575
|
+
namespace_usage_metrics = ResourceUsageMetricsUnit.new
|
576
|
+
@@namespace_resource_usage_metrics_map[pod_namespace] = pod_usage
|
577
|
+
else
|
578
|
+
@@namespace_resource_usage_metrics_map[pod_namespace].add_resource_usage_metrics(pod_cpu_usage, pod_memory_usage)
|
579
|
+
end
|
580
|
+
if @@node_resource_usage_metrics_map[node_name].nil?
|
581
|
+
node_name_usage_metrics = ResourceUsageMetricsUnit.new
|
582
|
+
@@node_resource_usage_metrics_map[node_name] = node_name_usage_metrics
|
583
|
+
end
|
584
|
+
@@node_resource_usage_metrics_map[node_name].add_resource_usage_metrics(pod_cpu_usage, pod_memory_usage)
|
585
|
+
pod_usage = nil
|
586
|
+
end
|
587
|
+
end
|
588
|
+
end
|
589
|
+
|
590
|
+
cluster_usage_metrics = ResourceUsageMetricsUnit.new
|
591
|
+
@@namespace_resource_usage_metrics_map.each do |key, value|
|
592
|
+
cluster_usage_metrics.add_resource_usage_metrics(value.instance_variable_get(:@cpu_usage), value.instance_variable_get(:@memory_usage))
|
593
|
+
emit_resource_usage_metrics(generate_tag('namespace'), @scraped_at, { 'name' => key }, value)
|
594
|
+
value = nil
|
595
|
+
end
|
596
|
+
emit_resource_usage_metrics(generate_tag('cluster'), @scraped_at, { 'name' => @cluster_name }, cluster_usage_metrics)
|
597
|
+
cluster_usage_metrics = nil
|
598
|
+
|
599
|
+
@@namespace_resource_usage_metrics_map = nil
|
600
|
+
@@namespace_resource_usage_metrics_map = {}
|
601
|
+
end
|
602
|
+
end
|
603
|
+
end
|
604
|
+
end
|