fluent-plugin-k8s-metrics-agg 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.circleci/build_and_push.sh +10 -0
- data/.circleci/build_and_push_to_dockerhub.sh +11 -0
- data/.circleci/build_and_push_to_github_release.sh +11 -0
- data/.circleci/config.yml +105 -0
- data/.circleci/install_dep.sh +5 -0
- data/.circleci/push_gem.sh +7 -0
- data/.gitignore +5 -0
- data/CONTRIBUTING.md +11 -0
- data/CONTRIBUTORS.md +13 -0
- data/Gemfile +3 -0
- data/Gemfile.lock +101 -0
- data/LICENSE +269 -0
- data/README.md +94 -0
- data/Rakefile +29 -0
- data/VERSION +1 -0
- data/docker/CONTRIBUTING.md +20 -0
- data/docker/Dockerfile +52 -0
- data/docker/LICENSE +201 -0
- data/docker/README.md +1 -0
- data/docker/entrypoint.sh +27 -0
- data/fluent-plugin-k8s-metrics-agg.gemspec +33 -0
- data/lib/fluent/plugin/in_kubernetes_metrics_aggregator.rb +604 -0
- data/test/api.json +8 -0
- data/test/helper.rb +178 -0
- data/test/node1.json +667 -0
- data/test/node2.json +692 -0
- data/test/node3.json +629 -0
- data/test/nodes.json +814 -0
- data/test/plugin/test_in_kubernetes_metrics_aggregator.rb +218 -0
- data/test/pods.json +3502 -0
- data/test/v1.json +488 -0
- metadata +210 -0
@@ -0,0 +1,604 @@
|
|
1
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
2
|
+
# you may not use this file except in compliance with the License.
|
3
|
+
# You may obtain a copy of the License at
|
4
|
+
#
|
5
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
6
|
+
#
|
7
|
+
# Unless required by applicable law or agreed to in writing, software
|
8
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
9
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
10
|
+
# See the License for the specific language governing permissions and
|
11
|
+
# limitations under the License.
|
12
|
+
|
13
|
+
require 'time'
|
14
|
+
|
15
|
+
require 'fluent/plugin/input'
|
16
|
+
require 'kubeclient'
|
17
|
+
require 'multi_json'
|
18
|
+
module Fluent
|
19
|
+
module Plugin
|
20
|
+
class KubernetesMetricsAggregatorInput < Fluent::Plugin::Input
|
21
|
+
@@namespace_usage_metrics_map = {}
|
22
|
+
@@node_requests_limits_metrics_map = {}
|
23
|
+
|
24
|
+
@@namespace_resource_usage_metrics_map = {}
|
25
|
+
@@node_resource_usage_metrics_map = {}
|
26
|
+
|
27
|
+
class UsageMetricsUnit
|
28
|
+
def initialize
|
29
|
+
@cpu_limit = 0
|
30
|
+
@cpu_request = 0
|
31
|
+
@memory_limit = 0
|
32
|
+
@memory_request = 0
|
33
|
+
end
|
34
|
+
|
35
|
+
def add_usage_metrics(cpu_limit, cpu_request, memory_limit, memory_request)
|
36
|
+
cpu = get_cpu_or_memory_value(cpu_limit)
|
37
|
+
mult = get_cpu_mult(cpu_limit)
|
38
|
+
@cpu_limit += cpu * mult
|
39
|
+
cpu = get_cpu_or_memory_value(cpu_request)
|
40
|
+
mult = get_cpu_mult(cpu_request)
|
41
|
+
@cpu_request += cpu * mult
|
42
|
+
memory = get_cpu_or_memory_value(memory_limit)
|
43
|
+
mult = get_memory_mult(memory_limit)
|
44
|
+
@memory_limit += memory * mult
|
45
|
+
memory = get_cpu_or_memory_value(memory_request)
|
46
|
+
mult = get_memory_mult(memory_request)
|
47
|
+
@memory_request += memory * mult
|
48
|
+
end
|
49
|
+
|
50
|
+
def get_cpu_value_and_multiplier(cpu)
|
51
|
+
# m cpu is assumed standard
|
52
|
+
@cpu_mult = 1
|
53
|
+
@cpu_mult = 1000 if cpu[-1] != 'm'
|
54
|
+
cpu.delete('^0-9').to_i
|
55
|
+
end
|
56
|
+
|
57
|
+
def get_cpu_or_memory_value(resource)
|
58
|
+
resource = resource.tr('^0-9', '').to_i
|
59
|
+
resource
|
60
|
+
end
|
61
|
+
|
62
|
+
def get_cpu_mult(cpu)
|
63
|
+
cpu_mult = 1
|
64
|
+
cpu_mult = 1000 if cpu[-1] != 'm'
|
65
|
+
cpu_mult
|
66
|
+
end
|
67
|
+
|
68
|
+
# https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/#meaning-of-memory
|
69
|
+
def get_memory_mult(memory)
|
70
|
+
memory_mult = if memory[-2] == 'Ki'
|
71
|
+
0.001
|
72
|
+
elsif memory[-2] == 'K'
|
73
|
+
1.0 / 1024
|
74
|
+
elsif memory[-2] == 'Mi'
|
75
|
+
1
|
76
|
+
elsif memory[-2] == 'M'
|
77
|
+
1
|
78
|
+
elsif memory[-2] == 'Gi'
|
79
|
+
1000
|
80
|
+
elsif memory[-2] == 'G'
|
81
|
+
1024
|
82
|
+
elsif memory[-2] == 'Ti'
|
83
|
+
1_000_000
|
84
|
+
elsif memory[-2] == 'T'
|
85
|
+
1_048_576
|
86
|
+
elsif memory[-2] == 'Ei'
|
87
|
+
1_000_000_000
|
88
|
+
elsif memory[-2] == 'E'
|
89
|
+
1_073_741_824
|
90
|
+
else
|
91
|
+
0.000001
|
92
|
+
end
|
93
|
+
memory_mult
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
class ResourceUsageMetricsUnit
|
98
|
+
def initialize
|
99
|
+
@cpu_usage = 0
|
100
|
+
@memory_usage = 0
|
101
|
+
end
|
102
|
+
|
103
|
+
def add_resource_usage_metrics(cpu_usage, memory_usage)
|
104
|
+
@cpu_usage += cpu_usage
|
105
|
+
@memory_usage += memory_usage
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
Fluent::Plugin.register_input('kubernetes_metrics_aggregator', self)
|
110
|
+
|
111
|
+
helpers :timer
|
112
|
+
|
113
|
+
desc 'URL of the kubernetes API server.'
|
114
|
+
config_param :kubernetes_url, :string, default: nil
|
115
|
+
|
116
|
+
desc 'The port that kubelet is listening to.'
|
117
|
+
config_param :kubelet_port, :integer, default: 10_250
|
118
|
+
|
119
|
+
desc 'The tag of the event.'
|
120
|
+
config_param :tag, :string, default: 'kubernetes.metrics.*'
|
121
|
+
|
122
|
+
desc 'How often it pulls metrics.'
|
123
|
+
config_param :interval, :time, default: "15s"
|
124
|
+
|
125
|
+
desc 'Path to a kubeconfig file points to a cluster the plugin should collect metrics from. Mostly useful when running fluentd outside of the cluster. When `kubeconfig` is set, `kubernetes_url`, `client_cert`, `client_key`, `ca_file`, `insecure_ssl`, `bearer_token_file`, and `secret_dir` will all be ignored.'
|
126
|
+
config_param :kubeconfig, :string, default: nil
|
127
|
+
|
128
|
+
desc 'Path to the certificate file for this client.'
|
129
|
+
config_param :client_cert, :string, default: nil
|
130
|
+
|
131
|
+
desc 'Path to the private key file for this client.'
|
132
|
+
config_param :client_key, :string, default: nil
|
133
|
+
|
134
|
+
desc 'Path to the CA file.'
|
135
|
+
config_param :ca_file, :string, default: nil
|
136
|
+
|
137
|
+
desc "If `insecure_ssl` is set to `true`, it won't verify apiserver's certificate."
|
138
|
+
config_param :insecure_ssl, :bool, default: false
|
139
|
+
|
140
|
+
desc 'Path to the file contains the API token. By default it reads from the file "token" in the `secret_dir`.'
|
141
|
+
config_param :bearer_token_file, :string, default: nil
|
142
|
+
|
143
|
+
desc "Path of the location where pod's service account's credentials are stored."
|
144
|
+
config_param :secret_dir, :string, default: '/var/run/secrets/kubernetes.io/serviceaccount'
|
145
|
+
|
146
|
+
desc 'The name of the cluster, where the plugin is deployed.'
|
147
|
+
config_param :cluster_name, :string, default: 'cluster_name'
|
148
|
+
|
149
|
+
def configure(conf)
|
150
|
+
super
|
151
|
+
@mutex_node_req_lim = Mutex.new
|
152
|
+
@mutex_node_res_usage = Mutex.new
|
153
|
+
parse_tag
|
154
|
+
initialize_client
|
155
|
+
end
|
156
|
+
|
157
|
+
def start
|
158
|
+
super
|
159
|
+
|
160
|
+
timer_execute :limits_request_scraper, @interval, &method(:scrape_limits_requests_metrics)
|
161
|
+
timer_execute :node_scraper, @interval, &method(:scrape_node_metrics)
|
162
|
+
timer_execute :resource_usage_scraper, @interval, &method(:scrape_resource_usage_metrics)
|
163
|
+
|
164
|
+
end
|
165
|
+
|
166
|
+
def close
|
167
|
+
@watchers.each &:finish if @watchers
|
168
|
+
|
169
|
+
super
|
170
|
+
end
|
171
|
+
|
172
|
+
private
|
173
|
+
|
174
|
+
def parse_tag
|
175
|
+
@tag_prefix, @tag_suffix = @tag.split('*') if @tag.include?('*')
|
176
|
+
end
|
177
|
+
|
178
|
+
def generate_tag(item_name)
|
179
|
+
return @tag unless @tag_prefix
|
180
|
+
|
181
|
+
[@tag_prefix, item_name, @tag_suffix].join
|
182
|
+
end
|
183
|
+
|
184
|
+
def init_with_kubeconfig(options = {})
|
185
|
+
config = Kubeclient::Config.read @kubeconfig
|
186
|
+
current_context = config.context
|
187
|
+
|
188
|
+
@client = Kubeclient::Client.new(
|
189
|
+
current_context.api_endpoint,
|
190
|
+
current_context.api_version,
|
191
|
+
options.merge(
|
192
|
+
ssl_options: current_context.ssl_options,
|
193
|
+
auth_options: current_context.auth_options
|
194
|
+
)
|
195
|
+
)
|
196
|
+
end
|
197
|
+
|
198
|
+
def init_without_kubeconfig(_options = {})
|
199
|
+
kubernetes_url_final = nil
|
200
|
+
# mostly borrowed from Fluentd Kubernetes Metadata Filter Plugin
|
201
|
+
if @kubernetes_url.nil?
|
202
|
+
# Use Kubernetes default service account if we're in a pod.
|
203
|
+
env_host = ENV['KUBERNETES_SERVICE_HOST']
|
204
|
+
env_port = ENV['KUBERNETES_SERVICE_PORT']
|
205
|
+
if env_host && env_port
|
206
|
+
kubernetes_url_final = "https://#{env_host}:#{env_port}/api/"
|
207
|
+
end
|
208
|
+
else
|
209
|
+
kubernetes_url_final = "https://#{@kubernetes_url}:#{@kubelet_port}/api/"
|
210
|
+
end
|
211
|
+
|
212
|
+
raise Fluent::ConfigError, 'kubernetes url is not set in configuration, or environment variables' unless kubernetes_url_final
|
213
|
+
|
214
|
+
# Use SSL certificate and bearer token from Kubernetes service account.
|
215
|
+
if Dir.exist?(@secret_dir)
|
216
|
+
secret_ca_file = File.join(@secret_dir, 'ca.crt')
|
217
|
+
secret_token_file = File.join(@secret_dir, 'token')
|
218
|
+
|
219
|
+
if @ca_file.nil? && File.exist?(secret_ca_file)
|
220
|
+
@ca_file = secret_ca_file
|
221
|
+
end
|
222
|
+
|
223
|
+
if @bearer_token_file.nil? && File.exist?(secret_token_file)
|
224
|
+
@bearer_token_file = secret_token_file
|
225
|
+
end
|
226
|
+
end
|
227
|
+
|
228
|
+
ssl_options = {
|
229
|
+
client_cert: @client_cert && OpenSSL::X509::Certificate.new(File.read(@client_cert)),
|
230
|
+
client_key: @client_key && OpenSSL::PKey::RSA.new(File.read(@client_key)),
|
231
|
+
ca_file: @ca_file,
|
232
|
+
verify_ssl: @insecure_ssl ? OpenSSL::SSL::VERIFY_NONE : OpenSSL::SSL::VERIFY_PEER
|
233
|
+
}
|
234
|
+
|
235
|
+
auth_options = {}
|
236
|
+
auth_options[:bearer_token] = File.read(@bearer_token_file) if @bearer_token_file
|
237
|
+
|
238
|
+
@client = Kubeclient::Client.new(
|
239
|
+
kubernetes_url_final, 'v1',
|
240
|
+
ssl_options: ssl_options,
|
241
|
+
auth_options: auth_options
|
242
|
+
)
|
243
|
+
|
244
|
+
begin
|
245
|
+
@client.api_valid?
|
246
|
+
rescue KubeException => kube_error
|
247
|
+
raise Fluent::ConfigError, "Invalid Kubernetes API #{@api_version} endpoint #{kubernetes_url_final}: #{kube_error.message}"
|
248
|
+
end
|
249
|
+
end
|
250
|
+
|
251
|
+
def initialize_client
|
252
|
+
options = {
|
253
|
+
timeouts: {
|
254
|
+
open: 10,
|
255
|
+
read: nil
|
256
|
+
}
|
257
|
+
}
|
258
|
+
|
259
|
+
if @kubeconfig.nil?
|
260
|
+
init_without_kubeconfig options
|
261
|
+
else
|
262
|
+
init_with_kubeconfig options
|
263
|
+
end
|
264
|
+
end
|
265
|
+
|
266
|
+
def parse_time(metric_time)
|
267
|
+
Fluent::EventTime.from_time Time.iso8601(metric_time)
|
268
|
+
end
|
269
|
+
|
270
|
+
def underscore(camlcase)
|
271
|
+
camlcase.gsub(/[A-Z]/) { |c| "_#{c.downcase}" }
|
272
|
+
end
|
273
|
+
|
274
|
+
def get_cpu_mult(cpu)
|
275
|
+
cpu_mult = 1
|
276
|
+
cpu_mult = 1000 if cpu[-1] != 'm'
|
277
|
+
cpu_mult
|
278
|
+
end
|
279
|
+
|
280
|
+
def get_cpu_value(resource)
|
281
|
+
cpu_val = resource.tr('^0-9', '').to_i
|
282
|
+
mult = get_cpu_mult(resource)
|
283
|
+
cpu_val += cpu_val * mult
|
284
|
+
cpu_val
|
285
|
+
end
|
286
|
+
|
287
|
+
def get_memory_mult(memory)
|
288
|
+
memory_mult = if memory[-2] == 'Ki'
|
289
|
+
0.001
|
290
|
+
elsif memory[-2] == 'K'
|
291
|
+
1.0 / 1024
|
292
|
+
elsif memory[-2] == 'Mi'
|
293
|
+
1
|
294
|
+
elsif memory[-2] == 'M'
|
295
|
+
1
|
296
|
+
elsif memory[-2] == 'Gi'
|
297
|
+
1000
|
298
|
+
elsif memory[-2] == 'G'
|
299
|
+
1024
|
300
|
+
elsif memory[-2] == 'Ti'
|
301
|
+
1_000_000
|
302
|
+
elsif memory[-2] == 'T'
|
303
|
+
1_048_576 # 1024*1024
|
304
|
+
elsif memory[-2] == 'Ei'
|
305
|
+
1_000_000_000
|
306
|
+
elsif memory[-2] == 'E'
|
307
|
+
1_073_741_824 # 1024*1024*1024
|
308
|
+
else
|
309
|
+
0.000001
|
310
|
+
end
|
311
|
+
memory_mult
|
312
|
+
end
|
313
|
+
|
314
|
+
def get_memory_value(resource)
|
315
|
+
mem_val = resource.tr('^0-9', '').to_i
|
316
|
+
mult = get_memory_mult(resource)
|
317
|
+
mem_val += mem_val * mult
|
318
|
+
mem_val
|
319
|
+
end
|
320
|
+
|
321
|
+
def emit_limits_requests_metrics(tag,
|
322
|
+
scraped_at,
|
323
|
+
labels,
|
324
|
+
limits_requests_metric)
|
325
|
+
router.emit tag + '.cpu.limit',
|
326
|
+
Fluent::EventTime.from_time(scraped_at),
|
327
|
+
labels.merge(
|
328
|
+
'value' => limits_requests_metric.instance_variable_get(:@cpu_limit)
|
329
|
+
)
|
330
|
+
router.emit tag + '.cpu.request',
|
331
|
+
Fluent::EventTime.from_time(scraped_at),
|
332
|
+
labels.merge('value' => limits_requests_metric.instance_variable_get(:@cpu_request))
|
333
|
+
router.emit tag + '.memory.limit',
|
334
|
+
Fluent::EventTime.from_time(scraped_at),
|
335
|
+
labels.merge('value' => limits_requests_metric.instance_variable_get(:@memory_limit))
|
336
|
+
router.emit tag + '.memory.request',
|
337
|
+
Fluent::EventTime.from_time(scraped_at),
|
338
|
+
labels.merge('value' => limits_requests_metric.instance_variable_get(:@memory_request))
|
339
|
+
end
|
340
|
+
|
341
|
+
def emit_resource_usage_metrics(tag,
|
342
|
+
scraped_at,
|
343
|
+
labels,
|
344
|
+
resource_usage_metric)
|
345
|
+
router.emit tag + '.cpu.usage',
|
346
|
+
Fluent::EventTime.from_time(scraped_at),
|
347
|
+
labels.merge('value' => resource_usage_metric.instance_variable_get(:@cpu_usage))
|
348
|
+
router.emit tag + '.memory.usage',
|
349
|
+
Fluent::EventTime.from_time(scraped_at),
|
350
|
+
labels.merge('value' => resource_usage_metric.instance_variable_get(:@memory_usage))
|
351
|
+
end
|
352
|
+
|
353
|
+
def limits_requests_api
|
354
|
+
@limits_requests_api =
|
355
|
+
begin
|
356
|
+
@client.discover unless @client.discovered
|
357
|
+
@client.rest_client['/pods'].tap do |endpoint|
|
358
|
+
log.info("Use URL #{endpoint.url} for scraping limits requests metrics")
|
359
|
+
end
|
360
|
+
end
|
361
|
+
end
|
362
|
+
|
363
|
+
def scrape_limits_requests_metrics
|
364
|
+
response = limits_requests_api.get(@client.headers)
|
365
|
+
handle_limits_requests_res(response)
|
366
|
+
end
|
367
|
+
|
368
|
+
# This method is used to handle responses from the kube apiserver api
|
369
|
+
def handle_limits_requests_res(response)
|
370
|
+
# Checking response codes only for a successful GET request viz., 2XX codes
|
371
|
+
if (response.code < 300) && (response.code > 199)
|
372
|
+
@scraped_at = Time.now
|
373
|
+
process_limits_requests_res MultiJson.load(response.body)
|
374
|
+
else
|
375
|
+
log.error "ExMultiJson.load(response.body) expected 2xx from summary API, but got #{response.code}. Response body = #{response.body}"
|
376
|
+
end
|
377
|
+
rescue StandardError => e
|
378
|
+
log.error "Failed to scrape metrics, error=#{$ERROR_INFO}, #{e.inspect}"
|
379
|
+
log.error_backtrace
|
380
|
+
end
|
381
|
+
|
382
|
+
def process_limits_requests_res(response)
|
383
|
+
@scraped_at = Time.now
|
384
|
+
@mutex_node_req_lim.synchronize do
|
385
|
+
Array(response['items']).each do |pod_json|
|
386
|
+
pod_namespace = pod_json['metadata']['namespace']
|
387
|
+
pod_node_name = pod_json['spec']['nodeName']
|
388
|
+
if @@namespace_usage_metrics_map[pod_namespace].nil?
|
389
|
+
namespace_usage_metrics = UsageMetricsUnit.new
|
390
|
+
@@namespace_usage_metrics_map[pod_namespace] = namespace_usage_metrics
|
391
|
+
end
|
392
|
+
|
393
|
+
pod_containers = pod_json['spec']['containers']
|
394
|
+
pod_usage_metrics = UsageMetricsUnit.new
|
395
|
+
Array(pod_containers).each do |container_json|
|
396
|
+
container_usage_metrics = UsageMetricsUnit.new
|
397
|
+
cpu_limit = '0'
|
398
|
+
memory_limit = '0'
|
399
|
+
cpu_request = '0'
|
400
|
+
memory_request = '0'
|
401
|
+
unless container_json['resources']['limits'].nil?
|
402
|
+
unless container_json['resources']['limits']['cpu'].nil?
|
403
|
+
cpu_limit = container_json['resources']['limits']['cpu']
|
404
|
+
end
|
405
|
+
unless container_json['resources']['limits']['memory'].nil?
|
406
|
+
memory_limit = container_json['resources']['limits']['memory']
|
407
|
+
end
|
408
|
+
end
|
409
|
+
unless container_json['resources']['requests'].nil?
|
410
|
+
unless container_json['resources']['requests']['cpu'].nil?
|
411
|
+
cpu_request = container_json['resources']['requests']['cpu']
|
412
|
+
end
|
413
|
+
unless container_json['resources']['requests']['memory'].nil?
|
414
|
+
memory_request = container_json['resources']['requests']['memory']
|
415
|
+
end
|
416
|
+
end
|
417
|
+
container_usage_metrics.add_usage_metrics(cpu_limit, cpu_request, memory_limit, memory_request)
|
418
|
+
container_labels = { 'name' => container_json['name'], 'image' => container_json['image'], 'node' => pod_json['spec']['nodeName'] }
|
419
|
+
emit_limits_requests_metrics(generate_tag('container'), @scraped_at, container_labels, container_usage_metrics)
|
420
|
+
pod_usage_metrics.add_usage_metrics(cpu_limit, cpu_request, memory_limit, memory_request)
|
421
|
+
end
|
422
|
+
|
423
|
+
pod_labels = { 'name' => pod_json['metadata']['name'], 'namespace' => pod_json['metadata']['name'], 'node' => pod_json['spec']['nodeName'] }
|
424
|
+
emit_limits_requests_metrics(generate_tag('pod'), @scraped_at, pod_labels, pod_usage_metrics)
|
425
|
+
@@namespace_usage_metrics_map[pod_namespace].add_usage_metrics(pod_usage_metrics.instance_variable_get(:@cpu_limit).to_s + ('m'), pod_usage_metrics.instance_variable_get(:@cpu_request).to_s + ('m'),
|
426
|
+
pod_usage_metrics.instance_variable_get(:@memory_limit).to_s + ('Mi'), pod_usage_metrics.instance_variable_get(:@memory_request).to_s + ('Mi'))
|
427
|
+
|
428
|
+
if @@node_requests_limits_metrics_map[pod_node_name].nil?
|
429
|
+
node_name_usage_metrics = UsageMetricsUnit.new
|
430
|
+
@@node_requests_limits_metrics_map[pod_node_name] = node_name_usage_metrics
|
431
|
+
end
|
432
|
+
@@node_requests_limits_metrics_map[pod_node_name].add_usage_metrics(pod_usage_metrics.instance_variable_get(:@cpu_limit).to_s + ('m'), pod_usage_metrics.instance_variable_get(:@cpu_request).to_s + ('m'),
|
433
|
+
pod_usage_metrics.instance_variable_get(:@memory_limit).to_s + ('Mi'), pod_usage_metrics.instance_variable_get(:@memory_request).to_s + ('Mi'))
|
434
|
+
pod_usage_metrics = nil
|
435
|
+
end
|
436
|
+
end
|
437
|
+
cluster_usage_metrics = UsageMetricsUnit.new
|
438
|
+
@@namespace_usage_metrics_map.each do |key, value|
|
439
|
+
cluster_usage_metrics.add_usage_metrics(value.instance_variable_get(:@cpu_limit).to_s + ('m'), value.instance_variable_get(:@cpu_request).to_s + ('m'),
|
440
|
+
value.instance_variable_get(:@memory_limit).to_s + ('Mi'), value.instance_variable_get(:@memory_request).to_s + ('Mi'))
|
441
|
+
emit_limits_requests_metrics(generate_tag('namespace'), @scraped_at, { 'name' => key }, value)
|
442
|
+
value = nil
|
443
|
+
end
|
444
|
+
|
445
|
+
emit_limits_requests_metrics(generate_tag('cluster'), @scraped_at, { 'name' => @cluster_name }, cluster_usage_metrics)
|
446
|
+
cluster_usage_metrics = nil
|
447
|
+
|
448
|
+
@@namespace_usage_metrics_map = nil
|
449
|
+
@@namespace_usage_metrics_map = {}
|
450
|
+
end
|
451
|
+
|
452
|
+
def node_api
|
453
|
+
@node_api =
|
454
|
+
begin
|
455
|
+
@client.discover unless @client.discovered
|
456
|
+
@client.rest_client['/nodes'].tap do |endpoint|
|
457
|
+
log.info("Use URL #{endpoint.url} for scraping node metrics")
|
458
|
+
end
|
459
|
+
end
|
460
|
+
end
|
461
|
+
|
462
|
+
def scrape_node_metrics
|
463
|
+
response = node_api.get(@client.headers)
|
464
|
+
handle_node_response(response)
|
465
|
+
end
|
466
|
+
|
467
|
+
# This method is used to handle responses from the kubeapiserver api
|
468
|
+
def handle_node_response(response)
|
469
|
+
# Checking response codes only for a successful GET request viz., 2XX codes
|
470
|
+
if (response.code < 300) && (response.code > 199)
|
471
|
+
@scraped_node_at = Time.now
|
472
|
+
process_node_response MultiJson.load(response.body)
|
473
|
+
else
|
474
|
+
log.error "ExMultiJson.load(response.body) expected 2xx from summary API, but got #{response.code}. Response body = #{response.body}"
|
475
|
+
end
|
476
|
+
rescue StandardError => e
|
477
|
+
log.error "Failed to scrape metrics, error=#{$ERROR_INFO}, #{e.inspect}"
|
478
|
+
log.error_backtrace
|
479
|
+
end
|
480
|
+
|
481
|
+
def process_node_response(response)
|
482
|
+
Array(response['items']).each do |node_json|
|
483
|
+
node_name = node_json['metadata']['name']
|
484
|
+
node_cpu_capacity = get_cpu_value(node_json['status']['capacity']['cpu'])
|
485
|
+
router.emit generate_tag('node') << ('.cpu.capacity'), Fluent::EventTime.from_time(@scraped_node_at), 'node' => node_name, 'value' => node_cpu_capacity
|
486
|
+
node_cpu_allocatable = get_cpu_value(node_json['status']['allocatable']['cpu'])
|
487
|
+
router.emit generate_tag('node') << ('.cpu.allocatable'), Fluent::EventTime.from_time(@scraped_node_at), 'node' => node_name, 'value' => node_cpu_allocatable
|
488
|
+
node_memory_capacity = get_memory_value(node_json['status']['capacity']['memory'])
|
489
|
+
router.emit generate_tag('node') << ('.memory.capacity'), Fluent::EventTime.from_time(@scraped_node_at), 'node' => node_name, 'value' => node_memory_capacity
|
490
|
+
node_memory_allocatable = get_memory_value(node_json['status']['allocatable']['memory'])
|
491
|
+
router.emit generate_tag('node') << ('.memory.allocatable'), Fluent::EventTime.from_time(@scraped_node_at), 'node' => node_name, 'value' => node_memory_allocatable
|
492
|
+
|
493
|
+
node_req_lim = UsageMetricsUnit.new
|
494
|
+
node_res_usage = ResourceUsageMetricsUnit.new
|
495
|
+
@mutex_node_req_lim.synchronize do
|
496
|
+
next if @@node_requests_limits_metrics_map[node_name].nil?
|
497
|
+
|
498
|
+
node_req_lim = @@node_requests_limits_metrics_map[node_name]
|
499
|
+
end
|
500
|
+
@mutex_node_res_usage.synchronize do
|
501
|
+
next if @@node_resource_usage_metrics_map[node_name].nil?
|
502
|
+
|
503
|
+
node_res_usage = @@node_resource_usage_metrics_map[node_name]
|
504
|
+
end
|
505
|
+
# https://github.com/kubernetes/heapster/blob/c78cc312ab3901acfe5c2f95f7a621909c8455ad/metrics/processors/node_autoscaling_enricher.go#L62
|
506
|
+
node_cpu_utilization = node_res_usage.instance_variable_get(:@cpu_usage).to_f / 1_000_000 * node_cpu_allocatable # converting from nano cores to milli core
|
507
|
+
router.emit generate_tag('node') << ('.cpu.utilization'), Fluent::EventTime.from_time(@scraped_node_at), 'node' => node_name, 'value' => node_cpu_utilization
|
508
|
+
node_cpu_reservation = node_req_lim.instance_variable_get(:@cpu_request).to_f / node_cpu_allocatable
|
509
|
+
router.emit generate_tag('node') << ('.cpu.reservation'), Fluent::EventTime.from_time(@scraped_node_at), 'node' => node_name, 'value' => node_cpu_reservation
|
510
|
+
node_memory_utilization = node_res_usage.instance_variable_get(:@memory_usage).to_f / 1_000_000 * node_memory_allocatable # converting from bytes to megabytes
|
511
|
+
router.emit generate_tag('node') << ('.memory.utilization'), Fluent::EventTime.from_time(@scraped_node_at), 'node' => node_name, 'value' => node_memory_utilization
|
512
|
+
node_memory_reservation = node_req_lim.instance_variable_get(:@memory_request).to_f / node_memory_allocatable
|
513
|
+
router.emit generate_tag('node') << ('.memory.reservation'), Fluent::EventTime.from_time(@scraped_node_at), 'node' => node_name, 'value' => node_memory_reservation
|
514
|
+
@mutex_node_req_lim.synchronize do
|
515
|
+
@@node_requests_limits_metrics_map = nil
|
516
|
+
@@node_requests_limits_metrics_map = {}
|
517
|
+
end
|
518
|
+
@mutex_node_res_usage.synchronize do
|
519
|
+
@@node_resource_usage_metrics_map = nil
|
520
|
+
@@node_resource_usage_metrics_map = {}
|
521
|
+
end
|
522
|
+
end
|
523
|
+
end
|
524
|
+
|
525
|
+
def resource_usage_api
|
526
|
+
@resource_usage_api =
|
527
|
+
begin
|
528
|
+
@client.discover unless @client.discovered
|
529
|
+
@client.rest_client['/nodes'].tap do |endpoint|
|
530
|
+
log.info("Use URL #{endpoint.url} for scraping node metrics")
|
531
|
+
end
|
532
|
+
end
|
533
|
+
end
|
534
|
+
|
535
|
+
def scrape_resource_usage_metrics
|
536
|
+
response = resource_usage_api.get(@client.headers)
|
537
|
+
handle_resource_usage_response(response)
|
538
|
+
end
|
539
|
+
|
540
|
+
# This method is used to handle responses from the kubelet summary api
|
541
|
+
def handle_resource_usage_response(response)
|
542
|
+
# Checking response codes only for a successful GET request viz., 2XX codes
|
543
|
+
if (response.code < 300) && (response.code > 199)
|
544
|
+
@scraped_at = Time.now
|
545
|
+
process_resource_usage_res MultiJson.load(response.body)
|
546
|
+
else
|
547
|
+
log.error "ExMultiJson.load(response.body) expected 2xx from summary API, but got #{response.code}. Response body = #{response.body}"
|
548
|
+
end
|
549
|
+
rescue StandardError => e
|
550
|
+
log.error "Failed to scrape metrics, error=#{$ERROR_INFO}, #{e.inspect}"
|
551
|
+
log.error_backtrace
|
552
|
+
end
|
553
|
+
|
554
|
+
def process_resource_usage_res(response)
|
555
|
+
@scraped_node_at = Time.now
|
556
|
+
@mutex_node_res_usage.synchronize do
|
557
|
+
Array(response['items']).each do |node_json|
|
558
|
+
node_name = node_json['metadata']['name']
|
559
|
+
node_rest_client =
|
560
|
+
begin
|
561
|
+
@client.discover unless @client.discovered
|
562
|
+
@client.rest_client["/nodes/#{node_name}:#{@kubelet_port}/proxy/stats/summary"].tap do |endpoint|
|
563
|
+
log.info("Use URL #{endpoint.url} for scraping resource usage metrics")
|
564
|
+
end
|
565
|
+
end
|
566
|
+
|
567
|
+
node_response = JSON.parse(node_rest_client.get(@client.headers))
|
568
|
+
Array(node_response['pods']).each do |pod_json|
|
569
|
+
pod_cpu_usage = pod_json['cpu']['usageNanoCores']
|
570
|
+
pod_memory_usage = pod_json['memory']['usageBytes']
|
571
|
+
pod_namespace = pod_json['podRef']['namespace']
|
572
|
+
pod_usage = ResourceUsageMetricsUnit.new
|
573
|
+
pod_usage.add_resource_usage_metrics(pod_cpu_usage, pod_memory_usage)
|
574
|
+
if @@namespace_resource_usage_metrics_map[pod_namespace].nil?
|
575
|
+
namespace_usage_metrics = ResourceUsageMetricsUnit.new
|
576
|
+
@@namespace_resource_usage_metrics_map[pod_namespace] = pod_usage
|
577
|
+
else
|
578
|
+
@@namespace_resource_usage_metrics_map[pod_namespace].add_resource_usage_metrics(pod_cpu_usage, pod_memory_usage)
|
579
|
+
end
|
580
|
+
if @@node_resource_usage_metrics_map[node_name].nil?
|
581
|
+
node_name_usage_metrics = ResourceUsageMetricsUnit.new
|
582
|
+
@@node_resource_usage_metrics_map[node_name] = node_name_usage_metrics
|
583
|
+
end
|
584
|
+
@@node_resource_usage_metrics_map[node_name].add_resource_usage_metrics(pod_cpu_usage, pod_memory_usage)
|
585
|
+
pod_usage = nil
|
586
|
+
end
|
587
|
+
end
|
588
|
+
end
|
589
|
+
|
590
|
+
cluster_usage_metrics = ResourceUsageMetricsUnit.new
|
591
|
+
@@namespace_resource_usage_metrics_map.each do |key, value|
|
592
|
+
cluster_usage_metrics.add_resource_usage_metrics(value.instance_variable_get(:@cpu_usage), value.instance_variable_get(:@memory_usage))
|
593
|
+
emit_resource_usage_metrics(generate_tag('namespace'), @scraped_at, { 'name' => key }, value)
|
594
|
+
value = nil
|
595
|
+
end
|
596
|
+
emit_resource_usage_metrics(generate_tag('cluster'), @scraped_at, { 'name' => @cluster_name }, cluster_usage_metrics)
|
597
|
+
cluster_usage_metrics = nil
|
598
|
+
|
599
|
+
@@namespace_resource_usage_metrics_map = nil
|
600
|
+
@@namespace_resource_usage_metrics_map = {}
|
601
|
+
end
|
602
|
+
end
|
603
|
+
end
|
604
|
+
end
|