fluent-plugin-k8s-metrics-agg 1.1.7 → 1.1.10

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 11e9b540230a927f8cb2dd469d4b16d16941c8e7cde2d4019394509c8562874e
4
- data.tar.gz: 38e231e8cde3aa56f386c3cc835065fec257ecffe502866ae0db8e00a818f970
3
+ metadata.gz: 70bc112ad78a8b436be2fc81cbb19333c529ace6fecfd914a6fd0322b6f1d416
4
+ data.tar.gz: dcdb1ecb991835130f35fea247e5f3da4a1a004f3be3d92bef6057e8e47096e2
5
5
  SHA512:
6
- metadata.gz: 298e621affd96c9185a95e019af9bf947d4f1a81bdb850606fc9c9729ba0f7ec3072ad4df4f176e4280ae70bba1a012d59d183ecd858cc80f1c085c4b99ec44b
7
- data.tar.gz: 3ea6fe8cd14f6820a8c3126cc8fc226b28d06786e467fdba6cbdc8d8b1445724a11406a4d28850f87685959ac59170be0e45fab8cee35570476167fb96f3f01e
6
+ metadata.gz: 97dfe412aee7d7918ad44b3962bbf06afc34dad7fa1ab242c8c34cbe2ff282356e4270ba236f3fabfe1b58047cf63c12e23282c45734363265f599e3e15a1715
7
+ data.tar.gz: 88bd162e3dd54b0d5dcb2a3cf02ae1d596161dae5840c1b280f3a25054d2802cb25f068a2857490395f6d87d86332f89e7caaa82023dcfcf7b5bec336166620e
@@ -67,8 +67,9 @@ jobs:
67
67
  CI_INDEX_EVENTS: ci_events
68
68
  CI_INDEX_OBJECTS: ci_objects
69
69
  CI_INDEX_METRICS: ci_metrics
70
- KUBERNETES_VERSION: v1.15.2
71
- MINIKUBE_VERSION: v1.21.0
70
+ KUBERNETES_VERSION: v1.23.2
71
+ MINIKUBE_VERSION: v1.24.0
72
+ MINIKUBE_NODE_COUNTS: 2
72
73
  GITHUB_ACTIONS: true
73
74
 
74
75
  steps:
@@ -121,7 +122,7 @@ jobs:
121
122
  chmod +x minikube
122
123
  sudo mv minikube /usr/local/bin/
123
124
  # Start Minikube and Wait
124
- minikube start --driver=docker --container-runtime=docker --cpus 2 --memory 4096 --kubernetes-version=${KUBERNETES_VERSION} --no-vtx-check
125
+ minikube start --driver=docker --container-runtime=docker --cpus 2 --memory 4096 --kubernetes-version=${KUBERNETES_VERSION} --no-vtx-check -n=${MINIKUBE_NODE_COUNTS}
125
126
  export JSONPATH='{range .items[*]}{@.metadata.name}:{range @.status.conditions[*]}{@.type}={@.status};{end}{end}'
126
127
  until kubectl get nodes -o jsonpath="$JSONPATH" 2>&1 | grep -q "Ready=True"; do
127
128
  sleep 1;
@@ -130,13 +131,15 @@ jobs:
130
131
  - name: Install Splunk
131
132
  run: |
132
133
  # Wait until minikube is ready
133
- kubectl apply -f https://docs.projectcalico.org/v3.14/manifests/calico.yaml
134
134
  export JSONPATH='{range .items[*]}{@.metadata.name}:{range @.status.conditions[*]}{@.type}={@.status};{end}{end}'
135
135
  until kubectl get nodes -o jsonpath="$JSONPATH" 2>&1 | grep -q "Ready=True"; do
136
136
  echo "wait for minikube ready ..."
137
137
  sleep 1;
138
138
  done
139
139
  kubectl get nodes
140
+ until kubectl get sa | grep -q 'default'; do
141
+ sleep 1;
142
+ done
140
143
  # Install Splunk on minikube
141
144
  kubectl apply -f ci_scripts/k8s-splunk.yml
142
145
  # Wait until splunk is ready
@@ -189,4 +192,5 @@ jobs:
189
192
  --splunkd-url https://$CI_SPLUNK_HOST:8089 \
190
193
  --splunk-user admin \
191
194
  --splunk-password $CI_SPLUNK_PASSWORD \
192
- -p no:warnings -s
195
+ --nodes-count $MINIKUBE_NODE_COUNTS\
196
+ -p no:warnings -s -n auto
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- fluent-plugin-k8s-metrics-agg (1.1.7)
4
+ fluent-plugin-k8s-metrics-agg (1.1.10)
5
5
  fluentd (>= 1.9.1)
6
6
  kubeclient (~> 4.6.0)
7
7
  multi_json (~> 1.14.1)
@@ -19,14 +19,14 @@ GEM
19
19
  docile (1.4.0)
20
20
  domain_name (0.5.20190701)
21
21
  unf (>= 0.0.5, < 1.0.0)
22
- ffi (1.15.4)
22
+ ffi (1.15.5)
23
23
  ffi-compiler (1.0.1)
24
24
  ffi (>= 1.0.0)
25
25
  rake
26
- fluentd (1.14.2)
26
+ fluentd (1.14.4)
27
27
  bundler
28
28
  cool.io (>= 1.4.5, < 2.0.0)
29
- http_parser.rb (>= 0.5.1, < 0.8.0)
29
+ http_parser.rb (>= 0.5.1, < 0.9.0)
30
30
  msgpack (>= 1.3.1, < 2.0.0)
31
31
  serverengine (>= 2.2.2, < 3.0.0)
32
32
  sigdump (~> 0.2.2)
@@ -47,16 +47,16 @@ GEM
47
47
  http-form_data (2.3.0)
48
48
  http-parser (1.2.3)
49
49
  ffi-compiler (>= 1.0, < 2.0)
50
- http_parser.rb (0.7.0)
50
+ http_parser.rb (0.8.0)
51
51
  json (2.6.1)
52
52
  kubeclient (4.6.0)
53
53
  http (>= 3.0, < 5.0)
54
54
  recursive-open-struct (~> 1.0, >= 1.0.4)
55
55
  rest-client (~> 2.0)
56
- mime-types (3.3.1)
56
+ mime-types (3.4.1)
57
57
  mime-types-data (~> 3.2015)
58
- mime-types-data (3.2021.0901)
59
- msgpack (1.4.2)
58
+ mime-types-data (3.2022.0105)
59
+ msgpack (1.4.4)
60
60
  multi_json (1.14.1)
61
61
  netrc (0.11.0)
62
62
  oj (3.10.18)
@@ -70,7 +70,7 @@ GEM
70
70
  mime-types (>= 1.16, < 4.0)
71
71
  netrc (~> 0.8)
72
72
  rexml (3.2.5)
73
- serverengine (2.2.4)
73
+ serverengine (2.2.5)
74
74
  sigdump (~> 0.2.2)
75
75
  sigdump (0.2.4)
76
76
  simplecov (0.16.1)
@@ -107,4 +107,4 @@ DEPENDENCIES
107
107
  webmock (~> 3.5.1)
108
108
 
109
109
  BUNDLED WITH
110
- 2.2.30
110
+ 2.3.9
data/VERSION CHANGED
@@ -1 +1 @@
1
- 1.1.7
1
+ 1.1.10
@@ -23,7 +23,11 @@ helm install ci-sck --set global.splunk.hec.token=$CI_SPLUNK_HEC_TOKEN \
23
23
  --set splunk-kubernetes-metrics.imageAgg.tag=recent \
24
24
  --set splunk-kubernetes-metrics.imageAgg.pullPolicy=IfNotPresent \
25
25
  -f ci_scripts/sck_values.yml helm-chart/splunk-connect-for-kubernetes
26
- #wait for deployment to finish
27
- until kubectl get pod | grep Running | [[ $(wc -l) == 4 ]]; do
26
+
27
+ kubectl get pod
28
+ # wait for deployment to finish
29
+ # metric and logging deamon set for each node + aggr + object + splunk
30
+ PODS=$((MINIKUBE_NODE_COUNTS*2+2+1))
31
+ until kubectl get pod | grep Running | [[ $(wc -l) == $PODS ]]; do
28
32
  sleep 1;
29
- done
33
+ done
data/docker/Dockerfile CHANGED
@@ -31,13 +31,17 @@ RUN mkdir /licenses
31
31
  COPY --from=builder /app/LICENSE /licenses/LICENSE
32
32
 
33
33
  COPY --from=builder /app/docker/Gemfile* ./
34
-
34
+ RUN gem update date cgi
35
+ RUN rm -f /usr/share/gems/specifications/default/cgi-0.1.0.gemspec /usr/share/gems/specifications/default/date-3.0.0.gemspec
35
36
  RUN yum update -y \
36
37
  && yum remove -y nodejs npm \
37
38
  && gem install bundler \
39
+ && gem uninstall -i /usr/share/gems bundler \
38
40
  && gem unpack /tmp/*.gem --target gem \
39
41
  && bundle install \
40
- && rpm -e --nodeps python3-pip-wheel python3-urllib3-* python3-requests-* python3-libxml2-* python3-dmidecode-* subscription-manager-* libwebp-* libwebp-devel-* glib2-* libjpeg-turbo-devel-* libjpeg-turbo-* mariadb-connector-c-config-* mariadb-connector-c-* mariadb-connector-c-devel-* rsync-* sqlite-libs-* sqlite-devel-* sqlite-* libxml2-* libxml2-devel-* libX11-* libX11-common-* libX11-devel-* libX11-xcb-* nettle-* libsolv-* file-libs-* dbus-daemon-* tar-* qt5-srpm-macros-* perl-parent-* git-* bsdtar-* openssh-clients-* json-c-* binutils-* libtiff-devel-* libtiff-*
42
+ && bundle update i18n \
43
+ && gem uninstall -i /usr/share/gems i18n --version 1.8.11 \
44
+ && rpm -e --nodeps python3-pip-wheel python3-urllib3-* python3-requests-* python3-libxml2-* python3-dmidecode-* subscription-manager-* libwebp-* libwebp-devel-* glib2-* libjpeg-turbo-devel-* libjpeg-turbo-* mariadb-connector-c-config-* mariadb-connector-c-* mariadb-connector-c-devel-* rsync-* sqlite-libs-* sqlite-devel-* sqlite-* libxml2-* libxml2-devel-* libX11-* libX11-common-* libX11-devel-* libX11-xcb-* nettle-* libsolv-* file-libs-* dbus-daemon-* tar-* qt5-srpm-macros-* perl-parent-* git-* bsdtar-* openssh-clients-* json-c-* binutils-* libtiff-devel-* libtiff-* || true
41
45
 
42
46
  RUN groupadd -r $FLUENT_USER && \
43
47
  useradd -r -g $FLUENT_USER $FLUENT_USER && \
data/docker/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: gem
3
3
  specs:
4
- fluent-plugin-k8s-metrics-agg (1.1.7)
4
+ fluent-plugin-k8s-metrics-agg (1.1.10)
5
5
  fluentd (>= 1.9.1)
6
6
  kubeclient (~> 4.6.0)
7
7
  multi_json (~> 1.14.1)
@@ -10,9 +10,9 @@ PATH
10
10
  GEM
11
11
  remote: https://rubygems.org/
12
12
  specs:
13
- activemodel (6.1.4.1)
14
- activesupport (= 6.1.4.1)
15
- activesupport (6.1.4.1)
13
+ activemodel (6.1.4.3)
14
+ activesupport (= 6.1.4.3)
15
+ activesupport (6.1.4.3)
16
16
  concurrent-ruby (~> 1.0, >= 1.0.2)
17
17
  i18n (>= 1.6, < 2)
18
18
  minitest (>= 5.1)
@@ -33,7 +33,7 @@ GEM
33
33
  ffi-compiler (1.0.1)
34
34
  ffi (>= 1.0.0)
35
35
  rake
36
- fluent-plugin-kubernetes_metadata_filter (2.9.2)
36
+ fluent-plugin-kubernetes_metadata_filter (2.9.3)
37
37
  fluentd (>= 0.14.0, < 1.15)
38
38
  kubeclient (>= 4.0.0, < 5.0.0)
39
39
  lru_redux
@@ -42,16 +42,16 @@ GEM
42
42
  prometheus-client (>= 2.1.0)
43
43
  fluent-plugin-record-modifier (2.1.0)
44
44
  fluentd (>= 1.0, < 2)
45
- fluent-plugin-splunk-hec (1.2.7)
45
+ fluent-plugin-splunk-hec (1.2.11)
46
46
  fluentd (>= 1.4)
47
47
  multi_json (~> 1.13)
48
48
  net-http-persistent (~> 3.1)
49
49
  openid_connect (~> 1.1.8)
50
50
  prometheus-client (>= 2.1.0)
51
- fluentd (1.14.2)
51
+ fluentd (1.14.3)
52
52
  bundler
53
53
  cool.io (>= 1.4.5, < 2.0.0)
54
- http_parser.rb (>= 0.5.1, < 0.8.0)
54
+ http_parser.rb (>= 0.5.1, < 0.9.0)
55
55
  msgpack (>= 1.3.1, < 2.0.0)
56
56
  serverengine (>= 2.2.2, < 3.0.0)
57
57
  sigdump (~> 0.2.2)
@@ -86,11 +86,11 @@ GEM
86
86
  lru_redux (1.1.0)
87
87
  mail (2.7.1)
88
88
  mini_mime (>= 0.1.1)
89
- mime-types (3.3.1)
89
+ mime-types (3.4.1)
90
90
  mime-types-data (~> 3.2015)
91
- mime-types-data (3.2021.0901)
91
+ mime-types-data (3.2021.1115)
92
92
  mini_mime (1.1.2)
93
- minitest (5.14.4)
93
+ minitest (5.15.0)
94
94
  msgpack (1.4.2)
95
95
  multi_json (1.14.1)
96
96
  net-http-persistent (3.1.0)
@@ -51,11 +51,11 @@ module Fluent
51
51
  # m cpu is assumed standard
52
52
  @cpu_mult = 1
53
53
  @cpu_mult = 1000 if cpu[-1] != 'm'
54
- cpu.delete('^0-9').to_i
54
+ cpu.delete('^0-9.').to_i
55
55
  end
56
56
 
57
57
  def get_cpu_or_memory_value(resource)
58
- resource = resource.tr('^0-9', '').to_i
58
+ resource = resource.tr('^0-9.', '').to_i
59
59
  resource
60
60
  end
61
61
 
@@ -66,29 +66,35 @@ module Fluent
66
66
  end
67
67
 
68
68
  # https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/#meaning-of-memory
69
+ # 1 Ki = 1024 bytes
70
+ # 1 K = 1000 bytes = 1000/1024 Ki = 1000/1024*1024 Mi
69
71
  def get_memory_mult(memory)
70
- memory_mult = if memory[-2] == 'Ki'
71
- 0.001
72
- elsif memory[-2] == 'K'
72
+ memory_mult = if memory[-2..] == 'Ki'
73
73
  1.0 / 1024
74
- elsif memory[-2] == 'Mi'
74
+ elsif memory[-1] == 'K'
75
+ 1e3 / 1024 ** 2
76
+ elsif memory[-2..] == 'Mi'
75
77
  1
76
- elsif memory[-2] == 'M'
77
- 1
78
- elsif memory[-2] == 'Gi'
79
- 1000
80
- elsif memory[-2] == 'G'
78
+ elsif memory[-1] == 'M'
79
+ 1e6 / 1024 ** 2
80
+ elsif memory[-2..] == 'Gi'
81
81
  1024
82
- elsif memory[-2] == 'Ti'
83
- 1_000_000
84
- elsif memory[-2] == 'T'
85
- 1_048_576
86
- elsif memory[-2] == 'Ei'
87
- 1_000_000_000
82
+ elsif memory[-1] == 'G'
83
+ 1e9 / 1024 ** 2
84
+ elsif memory[-2..] == 'Ti'
85
+ 1024 ** 2
86
+ elsif memory[-1] == 'T'
87
+ 1e12 / 1024 ** 2
88
+ elsif memory[-2..] == 'Pi'
89
+ 1024**3
90
+ elsif memory[-2] == 'P'
91
+ 1e15 / 1024 ** 2
92
+ elsif memory[-2..] == 'Ei'
93
+ 1024**4
88
94
  elsif memory[-2] == 'E'
89
- 1_073_741_824
95
+ 1e18 / 1024 ** 2
90
96
  else
91
- 0.000001
97
+ 1.0 / 1024 ** 2
92
98
  end
93
99
  memory_mult
94
100
  end
@@ -278,43 +284,47 @@ module Fluent
278
284
  end
279
285
 
280
286
  def get_cpu_value(resource)
281
- cpu_val = resource.tr('^0-9', '').to_i
287
+ cpu_val = resource.tr('^0-9.', '').to_i
282
288
  mult = get_cpu_mult(resource)
283
- cpu_val += cpu_val * mult
289
+ cpu_val = cpu_val * mult
284
290
  cpu_val
285
291
  end
286
292
 
287
293
  def get_memory_mult(memory)
288
- memory_mult = if memory[-2] == 'Ki'
289
- 0.001
290
- elsif memory[-2] == 'K'
294
+ memory_mult = if memory[-2..] == 'Ki'
291
295
  1.0 / 1024
292
- elsif memory[-2] == 'Mi'
293
- 1
294
- elsif memory[-2] == 'M'
296
+ elsif memory[-1] == 'K'
297
+ 1e3 / 1024 ** 2
298
+ elsif memory[-2..] == 'Mi'
295
299
  1
296
- elsif memory[-2] == 'Gi'
297
- 1000
298
- elsif memory[-2] == 'G'
300
+ elsif memory[-1] == 'M'
301
+ 1e6 / 1024 ** 2
302
+ elsif memory[-2..] == 'Gi'
299
303
  1024
300
- elsif memory[-2] == 'Ti'
301
- 1_000_000
302
- elsif memory[-2] == 'T'
303
- 1_048_576 # 1024*1024
304
- elsif memory[-2] == 'Ei'
305
- 1_000_000_000
304
+ elsif memory[-1] == 'G'
305
+ 1e9 / 1024 ** 2
306
+ elsif memory[-2..] == 'Ti'
307
+ 1024 ** 2
308
+ elsif memory[-1] == 'T'
309
+ 1e12 / 1024 ** 2
310
+ elsif memory[-2..] == 'Pi'
311
+ 1024**3
312
+ elsif memory[-2] == 'P'
313
+ 1e15 / 1024 ** 2
314
+ elsif memory[-2..] == 'Ei'
315
+ 1024**4
306
316
  elsif memory[-2] == 'E'
307
- 1_073_741_824 # 1024*1024*1024
317
+ 1e18 / 1024 ** 2
308
318
  else
309
- 0.000001
319
+ 1.0 / 1024 ** 2
310
320
  end
311
321
  memory_mult
312
322
  end
313
323
 
314
324
  def get_memory_value(resource)
315
- mem_val = resource.tr('^0-9', '').to_i
325
+ mem_val = resource.tr('^0-9.', '').to_i
316
326
  mult = get_memory_mult(resource)
317
- mem_val += mem_val * mult
327
+ mem_val = mem_val * mult
318
328
  mem_val
319
329
  end
320
330
 
@@ -418,7 +428,7 @@ module Fluent
418
428
  end
419
429
  end
420
430
  container_usage_metrics.add_usage_metrics(cpu_limit, cpu_request, memory_limit, memory_request)
421
- container_labels = { 'name' => container_json['name'], 'image' => container_json['image'], 'node' => pod_json['spec']['nodeName'] }
431
+ container_labels = { 'pod-name' => pod_json['metadata']['name'], 'namespace' => pod_json['metadata']['namespace'], 'name' => container_json['name'], 'image' => container_json['image'], 'node' => pod_json['spec']['nodeName'] }
422
432
  emit_limits_requests_metrics(generate_tag('container'), @scraped_at, container_labels, container_usage_metrics)
423
433
  pod_usage_metrics.add_usage_metrics(cpu_limit, cpu_request, memory_limit, memory_request)
424
434
  end
@@ -485,46 +495,53 @@ module Fluent
485
495
  end
486
496
 
487
497
  def process_node_response(response)
488
- Array(response['items']).each do |node_json|
489
- node_name = node_json['metadata']['name']
490
- node_cpu_capacity = get_cpu_value(node_json['status']['capacity']['cpu'])
491
- router.emit generate_tag('node') << ('.cpu.capacity'), Fluent::EventTime.from_time(@scraped_node_at), 'node' => node_name, 'value' => node_cpu_capacity
492
- node_cpu_allocatable = get_cpu_value(node_json['status']['allocatable']['cpu'])
493
- router.emit generate_tag('node') << ('.cpu.allocatable'), Fluent::EventTime.from_time(@scraped_node_at), 'node' => node_name, 'value' => node_cpu_allocatable
494
- node_memory_capacity = get_memory_value(node_json['status']['capacity']['memory'])
495
- router.emit generate_tag('node') << ('.memory.capacity'), Fluent::EventTime.from_time(@scraped_node_at), 'node' => node_name, 'value' => node_memory_capacity
496
- node_memory_allocatable = get_memory_value(node_json['status']['allocatable']['memory'])
497
- router.emit generate_tag('node') << ('.memory.allocatable'), Fluent::EventTime.from_time(@scraped_node_at), 'node' => node_name, 'value' => node_memory_allocatable
498
-
499
- node_req_lim = UsageMetricsUnit.new
500
- node_res_usage = ResourceUsageMetricsUnit.new
501
- @mutex_node_req_lim.synchronize do
502
- next if @@node_requests_limits_metrics_map[node_name].nil?
503
-
498
+ @mutex_node_req_lim.synchronize do
499
+ Array(response['items']).each do |node_json|
500
+ node_name = node_json['metadata']['name']
501
+ node_cpu_capacity = get_cpu_value(node_json['status']['capacity']['cpu'])
502
+ router.emit generate_tag('node') << ('.cpu.capacity'), Fluent::EventTime.from_time(@scraped_node_at), 'node' => node_name, 'value' => node_cpu_capacity
503
+ node_cpu_allocatable = get_cpu_value(node_json['status']['allocatable']['cpu'])
504
+ router.emit generate_tag('node') << ('.cpu.allocatable'), Fluent::EventTime.from_time(@scraped_node_at), 'node' => node_name, 'value' => node_cpu_allocatable
505
+ node_memory_capacity = get_memory_value(node_json['status']['capacity']['memory'])
506
+ router.emit generate_tag('node') << ('.memory.capacity'), Fluent::EventTime.from_time(@scraped_node_at), 'node' => node_name, 'value' => node_memory_capacity
507
+ node_memory_allocatable = get_memory_value(node_json['status']['allocatable']['memory'])
508
+ router.emit generate_tag('node') << ('.memory.allocatable'), Fluent::EventTime.from_time(@scraped_node_at), 'node' => node_name, 'value' => node_memory_allocatable
509
+
510
+ node_req_lim = UsageMetricsUnit.new
511
+ if @@node_requests_limits_metrics_map[node_name].nil?
512
+ next
513
+ end
504
514
  node_req_lim = @@node_requests_limits_metrics_map[node_name]
515
+
516
+ node_cpu_reservation = node_req_lim.instance_variable_get(:@cpu_request).to_f / node_cpu_allocatable
517
+ router.emit generate_tag('node') << ('.cpu.reservation'), Fluent::EventTime.from_time(@scraped_node_at), 'node' => node_name, 'value' => node_cpu_reservation
518
+ node_memory_reservation = node_req_lim.instance_variable_get(:@memory_request).to_f / node_memory_allocatable
519
+ router.emit generate_tag('node') << ('.memory.reservation'), Fluent::EventTime.from_time(@scraped_node_at), 'node' => node_name, 'value' => node_memory_reservation
505
520
  end
506
- @mutex_node_res_usage.synchronize do
507
- next if @@node_resource_usage_metrics_map[node_name].nil?
521
+ @@node_requests_limits_metrics_map = nil
522
+ @@node_requests_limits_metrics_map = {}
523
+ end
524
+
525
+ @mutex_node_res_usage.synchronize do
526
+ Array(response['items']).each do |node_json|
527
+ node_name = node_json['metadata']['name']
528
+ node_cpu_allocatable = get_cpu_value(node_json['status']['allocatable']['cpu'])
529
+ node_memory_allocatable = get_memory_value(node_json['status']['allocatable']['memory'])
508
530
 
531
+ node_res_usage = ResourceUsageMetricsUnit.new
532
+ if @@node_resource_usage_metrics_map[node_name].nil?
533
+ next
534
+ end
509
535
  node_res_usage = @@node_resource_usage_metrics_map[node_name]
536
+
537
+ # https://github.com/kubernetes/heapster/blob/c78cc312ab3901acfe5c2f95f7a621909c8455ad/metrics/processors/node_autoscaling_enricher.go#L62
538
+ node_cpu_utilization = node_res_usage.instance_variable_get(:@cpu_usage).to_f / 1_000_000 * node_cpu_allocatable # converting from nano cores to milli core
539
+ router.emit generate_tag('node') << ('.cpu.utilization'), Fluent::EventTime.from_time(@scraped_node_at), 'node' => node_name, 'value' => node_cpu_utilization
540
+ node_memory_utilization = node_res_usage.instance_variable_get(:@memory_usage).to_f / node_memory_allocatable # converting from bytes to megabytes
541
+ router.emit generate_tag('node') << ('.memory.utilization'), Fluent::EventTime.from_time(@scraped_node_at), 'node' => node_name, 'value' => node_memory_utilization
510
542
  end
511
- # https://github.com/kubernetes/heapster/blob/c78cc312ab3901acfe5c2f95f7a621909c8455ad/metrics/processors/node_autoscaling_enricher.go#L62
512
- node_cpu_utilization = node_res_usage.instance_variable_get(:@cpu_usage).to_f / 1_000_000 * node_cpu_allocatable # converting from nano cores to milli core
513
- router.emit generate_tag('node') << ('.cpu.utilization'), Fluent::EventTime.from_time(@scraped_node_at), 'node' => node_name, 'value' => node_cpu_utilization
514
- node_cpu_reservation = node_req_lim.instance_variable_get(:@cpu_request).to_f / node_cpu_allocatable
515
- router.emit generate_tag('node') << ('.cpu.reservation'), Fluent::EventTime.from_time(@scraped_node_at), 'node' => node_name, 'value' => node_cpu_reservation
516
- node_memory_utilization = node_res_usage.instance_variable_get(:@memory_usage).to_f / 1_000_000 * node_memory_allocatable # converting from bytes to megabytes
517
- router.emit generate_tag('node') << ('.memory.utilization'), Fluent::EventTime.from_time(@scraped_node_at), 'node' => node_name, 'value' => node_memory_utilization
518
- node_memory_reservation = node_req_lim.instance_variable_get(:@memory_request).to_f / node_memory_allocatable
519
- router.emit generate_tag('node') << ('.memory.reservation'), Fluent::EventTime.from_time(@scraped_node_at), 'node' => node_name, 'value' => node_memory_reservation
520
- @mutex_node_req_lim.synchronize do
521
- @@node_requests_limits_metrics_map = nil
522
- @@node_requests_limits_metrics_map = {}
523
- end
524
- @mutex_node_res_usage.synchronize do
525
- @@node_resource_usage_metrics_map = nil
526
- @@node_resource_usage_metrics_map = {}
527
- end
543
+ @@node_resource_usage_metrics_map = nil
544
+ @@node_resource_usage_metrics_map = {}
528
545
  end
529
546
  end
530
547
 
@@ -541,7 +558,7 @@ module Fluent
541
558
  def scrape_resource_usage_metrics
542
559
  response = resource_usage_api.get(@client.headers)
543
560
  handle_resource_usage_response(response)
544
- rescue StandardError => e
561
+ rescue StandardError => e
545
562
  log.error "Failed to get resource usage metrics, error=#{$ERROR_INFO}, #{e.inspect}"
546
563
  log.error_backtrace
547
564
  end
@@ -573,16 +590,21 @@ module Fluent
573
590
  end
574
591
  end
575
592
 
576
- node_response = JSON.parse(node_rest_client.get(@client.headers))
593
+ begin
594
+ node_response = JSON.parse(node_rest_client.get(@client.headers))
595
+ rescue RestClient::ServiceUnavailable
596
+ log.warn("Couldn't scrap metric for node '#{node_name} as it is unavailable. Ignoring it.'")
597
+ next
598
+ end
599
+
577
600
  Array(node_response['pods']).each do |pod_json|
578
601
  unless pod_json['cpu'].nil? || pod_json['memory'].nil?
579
- pod_cpu_usage = pod_json['cpu'].fetch('usageNanoCores', 0)/ 1_000_000
580
- pod_memory_usage = pod_json['memory'].fetch('usageBytes', 0)
602
+ pod_cpu_usage = pod_json['cpu'].fetch('usageNanoCores', 0)/ 1_000_000.to_f
603
+ pod_memory_usage = pod_json['memory'].fetch('usageBytes', 0) / 1024 ** 2.to_f # Converting to Mi
581
604
  pod_namespace = pod_json['podRef']['namespace']
582
605
  pod_usage = ResourceUsageMetricsUnit.new
583
606
  pod_usage.add_resource_usage_metrics(pod_cpu_usage, pod_memory_usage)
584
607
  if @@namespace_resource_usage_metrics_map[pod_namespace].nil?
585
- namespace_usage_metrics = ResourceUsageMetricsUnit.new
586
608
  @@namespace_resource_usage_metrics_map[pod_namespace] = pod_usage
587
609
  else
588
610
  @@namespace_resource_usage_metrics_map[pod_namespace].add_resource_usage_metrics(pod_cpu_usage, pod_memory_usage)
@@ -594,6 +616,33 @@ module Fluent
594
616
  @@node_resource_usage_metrics_map[node_name].add_resource_usage_metrics(pod_cpu_usage, pod_memory_usage)
595
617
  pod_usage = nil
596
618
  end
619
+
620
+ if pod_json['cpu'].nil? || pod_json['memory'].nil?
621
+ unless pod_json['containers'].nil?
622
+ pod_namespace = pod_json['podRef']['namespace']
623
+ Array(pod_json['containers']).each do |container_json|
624
+ unless container_json['cpu'].nil? || container_json['memory'].nil?
625
+ container_cpu_usage = container_json['cpu'].fetch('usageNanoCores', 0)/ 1_000_000.to_f
626
+ container_memory_usage = container_json['memory'].fetch('usageBytes', 0) / 1024 ** 2.to_f # Converting to Mi
627
+ container_usage = ResourceUsageMetricsUnit.new
628
+ container_usage.add_resource_usage_metrics(container_cpu_usage, container_memory_usage)
629
+ if @@namespace_resource_usage_metrics_map[pod_namespace].nil?
630
+ namespace_usage_metrics = ResourceUsageMetricsUnit.new
631
+ @@namespace_resource_usage_metrics_map[pod_namespace] = container_usage
632
+ else
633
+ @@namespace_resource_usage_metrics_map[pod_namespace].add_resource_usage_metrics(container_cpu_usage, container_memory_usage)
634
+ end
635
+ if @@node_resource_usage_metrics_map[node_name].nil?
636
+ node_name_usage_metrics = ResourceUsageMetricsUnit.new
637
+ @@node_resource_usage_metrics_map[node_name] = node_name_usage_metrics
638
+ else
639
+ @@node_resource_usage_metrics_map[node_name].add_resource_usage_metrics(container_cpu_usage, container_memory_usage)
640
+ end
641
+ container_usage = nil
642
+ end
643
+ end
644
+ end
645
+ end
597
646
  end
598
647
  end
599
648
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-k8s-metrics-agg
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.7
4
+ version: 1.1.10
5
5
  platform: ruby
6
6
  authors:
7
7
  - Splunk Inc.
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-11-03 00:00:00.000000000 Z
11
+ date: 2022-03-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -201,7 +201,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
201
201
  - !ruby/object:Gem::Version
202
202
  version: '0'
203
203
  requirements: []
204
- rubygems_version: 3.0.1
204
+ rubygems_version: 3.1.4
205
205
  signing_key:
206
206
  specification_version: 4
207
207
  summary: A fluentd input plugin that collects kubernetes cluster metrics.