fluent-plugin-kubernetes_metadata_filter 2.1.4 → 2.9.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. checksums.yaml +4 -4
  2. data/.circleci/config.yml +57 -0
  3. data/.gitignore +0 -1
  4. data/.rubocop.yml +57 -0
  5. data/Gemfile +4 -2
  6. data/Gemfile.lock +158 -0
  7. data/README.md +48 -28
  8. data/Rakefile +15 -11
  9. data/fluent-plugin-kubernetes_metadata_filter.gemspec +25 -28
  10. data/lib/fluent/plugin/filter_kubernetes_metadata.rb +185 -131
  11. data/lib/fluent/plugin/kubernetes_metadata_cache_strategy.rb +27 -20
  12. data/lib/fluent/plugin/kubernetes_metadata_common.rb +59 -33
  13. data/lib/fluent/plugin/kubernetes_metadata_stats.rb +6 -6
  14. data/lib/fluent/plugin/kubernetes_metadata_test_api_adapter.rb +68 -0
  15. data/lib/fluent/plugin/kubernetes_metadata_util.rb +53 -0
  16. data/lib/fluent/plugin/kubernetes_metadata_watch_namespaces.rb +121 -27
  17. data/lib/fluent/plugin/kubernetes_metadata_watch_pods.rb +138 -29
  18. data/release_notes.md +42 -0
  19. data/test/cassettes/kubernetes_docker_metadata_annotations.yml +0 -34
  20. data/test/cassettes/{kubernetes_docker_metadata_dotted_labels.yml → kubernetes_docker_metadata_dotted_slashed_labels.yml} +0 -34
  21. data/test/cassettes/kubernetes_get_api_v1.yml +193 -0
  22. data/test/cassettes/kubernetes_get_api_v1_using_token.yml +195 -0
  23. data/test/cassettes/kubernetes_get_namespace_default.yml +69 -0
  24. data/test/cassettes/kubernetes_get_namespace_default_using_token.yml +71 -0
  25. data/test/cassettes/{kubernetes_docker_metadata.yml → kubernetes_get_pod.yml} +0 -82
  26. data/test/cassettes/{metadata_with_namespace_id.yml → kubernetes_get_pod_container_init.yml} +3 -134
  27. data/test/cassettes/{kubernetes_docker_metadata_using_bearer_token.yml → kubernetes_get_pod_using_token.yml} +5 -105
  28. data/test/cassettes/metadata_from_tag_and_journald_fields.yml +0 -255
  29. data/test/cassettes/metadata_from_tag_journald_and_kubernetes_fields.yml +0 -255
  30. data/test/cassettes/{non_kubernetes_docker_metadata.yml → valid_kubernetes_api_server_using_token.yml} +4 -44
  31. data/test/helper.rb +20 -2
  32. data/test/plugin/test_cache_stats.rb +10 -13
  33. data/test/plugin/test_cache_strategy.rb +158 -160
  34. data/test/plugin/test_filter_kubernetes_metadata.rb +480 -320
  35. data/test/plugin/test_utils.rb +56 -0
  36. data/test/plugin/test_watch_namespaces.rb +209 -55
  37. data/test/plugin/test_watch_pods.rb +302 -103
  38. data/test/plugin/watch_test.rb +52 -33
  39. metadata +69 -72
  40. data/circle.yml +0 -17
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  #
2
4
  # Fluentd Kubernetes Metadata Filter Plugin - Enrich Fluentd events with
3
5
  # Kubernetes metadata
@@ -18,6 +20,11 @@
18
20
  #
19
21
  module KubernetesMetadata
20
22
  module Common
23
+ class GoneError < StandardError
24
+ def initialize(msg = '410 Gone')
25
+ super
26
+ end
27
+ end
21
28
 
22
29
  def match_annotations(annotations)
23
30
  result = {}
@@ -32,61 +39,81 @@ module KubernetesMetadata
32
39
  end
33
40
 
34
41
  def parse_namespace_metadata(namespace_object)
35
- labels = syms_to_strs(namespace_object['metadata']['labels'].to_h)
36
- annotations = match_annotations(syms_to_strs(namespace_object['metadata']['annotations'].to_h))
42
+ labels = ''
43
+ labels = syms_to_strs(namespace_object[:metadata][:labels].to_h) unless @skip_labels
44
+
45
+ annotations = match_annotations(syms_to_strs(namespace_object[:metadata][:annotations].to_h))
37
46
  if @de_dot
38
- self.de_dot!(labels)
39
- self.de_dot!(annotations)
47
+ de_dot!(labels) unless @skip_labels
48
+ de_dot!(annotations)
49
+ end
50
+ if @de_slash
51
+ de_slash!(labels) unless @skip_labels
52
+ de_slash!(annotations)
40
53
  end
41
54
  kubernetes_metadata = {
42
- 'namespace_id' => namespace_object['metadata']['uid'],
43
- 'creation_timestamp' => namespace_object['metadata']['creationTimestamp']
55
+ 'namespace_id' => namespace_object[:metadata][:uid],
56
+ 'creation_timestamp' => namespace_object[:metadata][:creationTimestamp]
44
57
  }
45
58
  kubernetes_metadata['namespace_labels'] = labels unless labels.empty?
46
59
  kubernetes_metadata['namespace_annotations'] = annotations unless annotations.empty?
47
- return kubernetes_metadata
60
+ kubernetes_metadata
48
61
  end
49
62
 
50
63
  def parse_pod_metadata(pod_object)
51
- labels = syms_to_strs(pod_object['metadata']['labels'].to_h)
52
- annotations = match_annotations(syms_to_strs(pod_object['metadata']['annotations'].to_h))
64
+ labels = ''
65
+ labels = syms_to_strs(pod_object[:metadata][:labels].to_h) unless @skip_labels
66
+
67
+ annotations = match_annotations(syms_to_strs(pod_object[:metadata][:annotations].to_h))
53
68
  if @de_dot
54
- self.de_dot!(labels)
55
- self.de_dot!(annotations)
69
+ de_dot!(labels) unless @skip_labels
70
+ de_dot!(annotations)
71
+ end
72
+ if @de_slash
73
+ de_slash!(labels) unless @skip_labels
74
+ de_slash!(annotations)
56
75
  end
57
76
 
58
- # collect container informations
77
+ # collect container information
59
78
  container_meta = {}
60
79
  begin
61
- pod_object['status']['containerStatuses'].each do|container_status|
62
- # get plain container id (eg. docker://hash -> hash)
63
- container_id = container_status['containerID'].sub /^[-_a-zA-Z0-9]+:\/\//, ''
64
- container_meta[container_id] = {
65
- 'name' => container_status['name'],
66
- 'image' => container_status['image'],
67
- 'image_id' => container_status['imageID']
68
- }
69
- end
70
- rescue
71
- log.debug("parsing container meta information failed for: #{pod_object['metadata']['namespace']}/#{pod_object['metadata']['name']} ")
80
+ pod_object[:status][:containerStatuses].each do |container_status|
81
+ container_id = (container_status[:containerID]||"").sub(%r{^[-_a-zA-Z0-9]+://}, '')
82
+ key = container_status[:name]
83
+ container_meta[key] = if @skip_container_metadata
84
+ {
85
+ 'name' => container_status[:name]
86
+ }
87
+ else
88
+ {
89
+ 'name' => container_status[:name],
90
+ 'image' => container_status[:image],
91
+ 'image_id' => container_status[:imageID],
92
+ :containerID => container_id
93
+ }
94
+ end
95
+ end if pod_object[:status] && pod_object[:status][:containerStatuses]
96
+ rescue StandardError=>e
97
+ log.warn("parsing container meta information failed for: #{pod_object[:metadata][:namespace]}/#{pod_object[:metadata][:name]}: #{e}")
72
98
  end
73
99
 
74
100
  kubernetes_metadata = {
75
- 'namespace_name' => pod_object['metadata']['namespace'],
76
- 'pod_id' => pod_object['metadata']['uid'],
77
- 'pod_name' => pod_object['metadata']['name'],
78
- 'containers' => syms_to_strs(container_meta),
79
- 'labels' => labels,
80
- 'host' => pod_object['spec']['nodeName'],
81
- 'master_url' => @kubernetes_url
101
+ 'namespace_name' => pod_object[:metadata][:namespace],
102
+ 'pod_id' => pod_object[:metadata][:uid],
103
+ 'pod_name' => pod_object[:metadata][:name],
104
+ 'pod_ip' => pod_object[:status][:podIP],
105
+ 'containers' => syms_to_strs(container_meta),
106
+ 'host' => pod_object[:spec][:nodeName]
82
107
  }
83
108
  kubernetes_metadata['annotations'] = annotations unless annotations.empty?
84
- return kubernetes_metadata
109
+ kubernetes_metadata['labels'] = labels unless labels.empty?
110
+ kubernetes_metadata['master_url'] = @kubernetes_url unless @skip_master_url
111
+ kubernetes_metadata
85
112
  end
86
113
 
87
114
  def syms_to_strs(hsh)
88
115
  newhsh = {}
89
- hsh.each_pair do |kk,vv|
116
+ hsh.each_pair do |kk, vv|
90
117
  if vv.is_a?(Hash)
91
118
  vv = syms_to_strs(vv)
92
119
  end
@@ -98,6 +125,5 @@ module KubernetesMetadata
98
125
  end
99
126
  newhsh
100
127
  end
101
-
102
128
  end
103
129
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  #
2
4
  # Fluentd Kubernetes Metadata Filter Plugin - Enrich Fluentd events with
3
5
  # Kubernetes metadata
@@ -19,17 +21,16 @@
19
21
  require 'lru_redux'
20
22
  module KubernetesMetadata
21
23
  class Stats
22
-
23
24
  def initialize
24
25
  @stats = ::LruRedux::TTL::ThreadSafeCache.new(1000, 3600)
25
26
  end
26
27
 
27
28
  def bump(key)
28
- @stats[key] = @stats.getset(key) { 0 } + 1
29
+ @stats[key] = @stats.getset(key) { 0 } + 1
29
30
  end
30
31
 
31
32
  def set(key, value)
32
- @stats[key] = value
33
+ @stats[key] = value
33
34
  end
34
35
 
35
36
  def [](key)
@@ -37,10 +38,9 @@ module KubernetesMetadata
37
38
  end
38
39
 
39
40
  def to_s
40
- "stats - " + [].tap do |a|
41
- @stats.each {|k,v| a << "#{k.to_s}: #{v}"}
41
+ 'stats - ' + [].tap do |a|
42
+ @stats.each { |k, v| a << "#{k}: #{v}" }
42
43
  end.join(', ')
43
44
  end
44
-
45
45
  end
46
46
  end
@@ -0,0 +1,68 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ # Fluentd Kubernetes Metadata Filter Plugin - Enrich Fluentd events with
5
+ # Kubernetes metadata
6
+ #
7
+ # Copyright 2021 Red Hat, Inc.
8
+ #
9
+ # Licensed under the Apache License, Version 2.0 (the "License");
10
+ # you may not use this file except in compliance with the License.
11
+ # You may obtain a copy of the License at
12
+ #
13
+ # http://www.apache.org/licenses/LICENSE-2.0
14
+ #
15
+ # Unless required by applicable law or agreed to in writing, software
16
+ # distributed under the License is distributed on an "AS IS" BASIS,
17
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18
+ # See the License for the specific language governing permissions and
19
+ # limitations under the License.
20
+ #
21
+ require 'kubeclient'
22
+
23
+ module KubernetesMetadata
24
+ module TestApiAdapter
25
+
26
+ def api_valid?
27
+ true
28
+ end
29
+ def get_namespace(namespace_name)
30
+ return {
31
+ metadata: {
32
+ name: namespace_name,
33
+ uid: namespace_name + 'uuid',
34
+ labels: {
35
+ foo_ns: 'bar_ns'
36
+ }
37
+ }
38
+ }
39
+ end
40
+
41
+ def get_pod(pod_name, namespace_name)
42
+ return {
43
+ metadata: {
44
+ name: pod_name,
45
+ namespace: namespace_name,
46
+ uid: namespace_name + namespace_name + "uuid",
47
+ labels: {
48
+ foo: 'bar'
49
+ }
50
+ },
51
+ spec: {
52
+ nodeName: 'aNodeName',
53
+ containers: [{
54
+ name: 'foo',
55
+ image: 'bar'
56
+ }, {
57
+ name: 'bar',
58
+ image: 'foo'
59
+ }]
60
+ },
61
+ status: {
62
+ podIP: '172.17.0.8'
63
+ }
64
+ }
65
+ end
66
+
67
+ end
68
+ end
@@ -0,0 +1,53 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ # Fluentd Kubernetes Metadata Filter Plugin - Enrich Fluentd events with
5
+ # Kubernetes metadata
6
+ #
7
+ # Copyright 2021 Red Hat, Inc.
8
+ #
9
+ # Licensed under the Apache License, Version 2.0 (the "License");
10
+ # you may not use this file except in compliance with the License.
11
+ # You may obtain a copy of the License at
12
+ #
13
+ # http://www.apache.org/licenses/LICENSE-2.0
14
+ #
15
+ # Unless required by applicable law or agreed to in writing, software
16
+ # distributed under the License is distributed on an "AS IS" BASIS,
17
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18
+ # See the License for the specific language governing permissions and
19
+ # limitations under the License.
20
+ #
21
+ module KubernetesMetadata
22
+ module Util
23
+ def create_time_from_record(record, internal_time)
24
+ time_key = @time_fields.detect { |ii| record.key?(ii) }
25
+ time = record[time_key]
26
+ if time.nil? || time.is_a?(String) && time.chop.empty?
27
+ # `internal_time` is a Fluent::EventTime, it can't compare with Time.
28
+ return Time.at(internal_time.to_f)
29
+ end
30
+
31
+ if ['_SOURCE_REALTIME_TIMESTAMP', '__REALTIME_TIMESTAMP'].include?(time_key)
32
+ timei = time.to_i
33
+ return Time.at(timei / 1_000_000, timei % 1_000_000)
34
+ end
35
+ return Time.at(time) if time.is_a?(Numeric)
36
+
37
+ Time.parse(time)
38
+ end
39
+ end
40
+ end
41
+
42
+ #https://stackoverflow.com/questions/5622435/how-do-i-convert-a-ruby-class-name-to-a-underscore-delimited-symbol
43
+ class String
44
+ def underscore
45
+ word = self.dup
46
+ word.gsub!(/::/, '_')
47
+ word.gsub!(/([A-Z]+)([A-Z][a-z])/,'\1_\2')
48
+ word.gsub!(/([a-z\d])([A-Z])/,'\1_\2')
49
+ word.tr!("-", "_")
50
+ word.downcase!
51
+ word
52
+ end
53
+ end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  #
2
4
  # Fluentd Kubernetes Metadata Filter Plugin - Enrich Fluentd events with
3
5
  # Kubernetes metadata
@@ -16,45 +18,137 @@
16
18
  # See the License for the specific language governing permissions and
17
19
  # limitations under the License.
18
20
  #
21
+ # TODO: this is mostly copy-paste from kubernetes_metadata_watch_pods.rb unify them
19
22
  require_relative 'kubernetes_metadata_common'
20
23
 
21
24
  module KubernetesMetadata
22
25
  module WatchNamespaces
23
-
24
26
  include ::KubernetesMetadata::Common
25
27
 
28
+ def set_up_namespace_thread
29
+ # Any failures / exceptions in the initial setup should raise
30
+ # Fluent:ConfigError, so that users can inspect potential errors in
31
+ # the configuration.
32
+ namespace_watcher = start_namespace_watch
33
+ Thread.current[:namespace_watch_retry_backoff_interval] = @watch_retry_interval
34
+ Thread.current[:namespace_watch_retry_count] = 0
35
+
36
+ # Any failures / exceptions in the followup watcher notice
37
+ # processing will be swallowed and retried. These failures /
38
+ # exceptions could be caused by Kubernetes API being temporarily
39
+ # down. We assume the configuration is correct at this point.
40
+ loop do
41
+ namespace_watcher ||= get_namespaces_and_start_watcher
42
+ process_namespace_watcher_notices(namespace_watcher)
43
+ rescue GoneError => e
44
+ # Expected error. Quietly go back through the loop in order to
45
+ # start watching from the latest resource versions
46
+ @stats.bump(:namespace_watch_gone_errors)
47
+ log.info('410 Gone encountered. Restarting namespace watch to reset resource versions.', e)
48
+ namespace_watcher = nil
49
+ rescue StandardError => e
50
+ @stats.bump(:namespace_watch_failures)
51
+ if Thread.current[:namespace_watch_retry_count] < @watch_retry_max_times
52
+ # Instead of raising exceptions and crashing Fluentd, swallow
53
+ # the exception and reset the watcher.
54
+ log.info(
55
+ 'Exception encountered parsing namespace watch event. ' \
56
+ 'The connection might have been closed. Sleeping for ' \
57
+ "#{Thread.current[:namespace_watch_retry_backoff_interval]} " \
58
+ 'seconds and resetting the namespace watcher.', e
59
+ )
60
+ sleep(Thread.current[:namespace_watch_retry_backoff_interval])
61
+ Thread.current[:namespace_watch_retry_count] += 1
62
+ Thread.current[:namespace_watch_retry_backoff_interval] *= @watch_retry_exponential_backoff_base
63
+ namespace_watcher = nil
64
+ else
65
+ # Since retries failed for many times, log as errors instead
66
+ # of info and raise exceptions and trigger Fluentd to restart.
67
+ message =
68
+ 'Exception encountered parsing namespace watch event. The ' \
69
+ 'connection might have been closed. Retried ' \
70
+ "#{@watch_retry_max_times} times yet still failing. Restarting."
71
+ log.error(message, e)
72
+ raise Fluent::UnrecoverableError, message
73
+ end
74
+ end
75
+ end
76
+
26
77
  def start_namespace_watch
27
- begin
28
- resource_version = @client.get_namespaces.resourceVersion
29
- watcher = @client.watch_namespaces(resource_version)
30
- rescue Exception=>e
31
- message = "start_namespace_watch: Exception encountered setting up namespace watch from Kubernetes API #{@apiVersion} endpoint #{@kubernetes_url}: #{e.message}"
32
- message += " (#{e.response})" if e.respond_to?(:response)
33
- log.debug(message)
34
- raise Fluent::ConfigError, message
78
+ get_namespaces_and_start_watcher
79
+ rescue StandardError => e
80
+ message = 'start_namespace_watch: Exception encountered setting up ' \
81
+ "namespace watch from Kubernetes API #{@apiVersion} endpoint " \
82
+ "#{@kubernetes_url}: #{e.message}"
83
+ message += " (#{e.response})" if e.respond_to?(:response)
84
+ log.debug(message)
85
+
86
+ raise Fluent::ConfigError, message
87
+ end
88
+
89
+ # List all namespaces, record the resourceVersion and return a watcher
90
+ # starting from that resourceVersion.
91
+ def get_namespaces_and_start_watcher
92
+ options = {
93
+ resource_version: '0' # Fetch from API server cache instead of etcd quorum read
94
+ }
95
+ namespaces = @client.get_namespaces(options)
96
+ namespaces[:items].each do |namespace|
97
+ cache_key = namespace[:metadata][:uid]
98
+ @namespace_cache[cache_key] = parse_namespace_metadata(namespace)
99
+ @stats.bump(:namespace_cache_host_updates)
35
100
  end
101
+
102
+ # continue watching from most recent resourceVersion
103
+ options[:resource_version] = namespaces[:metadata][:resourceVersion]
104
+
105
+ watcher = @client.watch_namespaces(options)
106
+ reset_namespace_watch_retry_stats
107
+ watcher
108
+ end
109
+
110
+ # Reset namespace watch retry count and backoff interval as there is a
111
+ # successful watch notice.
112
+ def reset_namespace_watch_retry_stats
113
+ Thread.current[:namespace_watch_retry_count] = 0
114
+ Thread.current[:namespace_watch_retry_backoff_interval] = @watch_retry_interval
115
+ end
116
+
117
+ # Process a watcher notice and potentially raise an exception.
118
+ def process_namespace_watcher_notices(watcher)
36
119
  watcher.each do |notice|
37
- case notice.type
38
- when 'MODIFIED'
39
- cache_key = notice.object['metadata']['uid']
40
- cached = @namespace_cache[cache_key]
41
- if cached
42
- @namespace_cache[cache_key] = parse_namespace_metadata(notice.object)
43
- @stats.bump(:namespace_cache_watch_updates)
44
- else
45
- @stats.bump(:namespace_cache_watch_misses)
46
- end
47
- when 'DELETED'
48
- # ignore and let age out for cases where
49
- # deleted but still processing logs
50
- @stats.bump(:namespace_cache_watch_deletes_ignored)
120
+ case notice[:type]
121
+ when 'MODIFIED'
122
+ reset_namespace_watch_retry_stats
123
+ cache_key = notice[:object][:metadata][:uid]
124
+ cached = @namespace_cache[cache_key]
125
+ if cached
126
+ @namespace_cache[cache_key] = parse_namespace_metadata(notice[:object])
127
+ @stats.bump(:namespace_cache_watch_updates)
51
128
  else
52
- # Don't pay attention to creations, since the created namespace may not
53
- # be used by any pod on this node.
54
- @stats.bump(:namespace_cache_watch_ignored)
129
+ @stats.bump(:namespace_cache_watch_misses)
130
+ end
131
+ when 'DELETED'
132
+ reset_namespace_watch_retry_stats
133
+ # ignore and let age out for cases where
134
+ # deleted but still processing logs
135
+ @stats.bump(:namespace_cache_watch_deletes_ignored)
136
+ when 'ERROR'
137
+ if notice[:object] && notice[:object][:code] == 410
138
+ @stats.bump(:namespace_watch_gone_notices)
139
+ raise GoneError
140
+ else
141
+ @stats.bump(:namespace_watch_error_type_notices)
142
+ message = notice[:object][:message] if notice[:object] && notice[:object][:message]
143
+ raise "Error while watching namespaces: #{message}"
144
+ end
145
+ else
146
+ reset_namespace_watch_retry_stats
147
+ # Don't pay attention to creations, since the created namespace may not
148
+ # be used by any namespace on this node.
149
+ @stats.bump(:namespace_cache_watch_ignored)
55
150
  end
56
151
  end
57
152
  end
58
-
59
153
  end
60
154
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  #
2
4
  # Fluentd Kubernetes Metadata Filter Plugin - Enrich Fluentd events with
3
5
  # Kubernetes metadata
@@ -16,46 +18,153 @@
16
18
  # See the License for the specific language governing permissions and
17
19
  # limitations under the License.
18
20
  #
21
+ # TODO: this is mostly copy-paste from kubernetes_metadata_watch_namespaces.rb unify them
19
22
  require_relative 'kubernetes_metadata_common'
20
23
 
21
24
  module KubernetesMetadata
22
25
  module WatchPods
23
-
24
26
  include ::KubernetesMetadata::Common
25
27
 
28
+ def set_up_pod_thread
29
+ # Any failures / exceptions in the initial setup should raise
30
+ # Fluent:ConfigError, so that users can inspect potential errors in
31
+ # the configuration.
32
+ pod_watcher = start_pod_watch
33
+
34
+ Thread.current[:pod_watch_retry_backoff_interval] = @watch_retry_interval
35
+ Thread.current[:pod_watch_retry_count] = 0
36
+
37
+ # Any failures / exceptions in the followup watcher notice
38
+ # processing will be swallowed and retried. These failures /
39
+ # exceptions could be caused by Kubernetes API being temporarily
40
+ # down. We assume the configuration is correct at this point.
41
+ loop do
42
+ pod_watcher ||= get_pods_and_start_watcher
43
+ process_pod_watcher_notices(pod_watcher)
44
+ rescue GoneError => e
45
+ # Expected error. Quietly go back through the loop in order to
46
+ # start watching from the latest resource versions
47
+ @stats.bump(:pod_watch_gone_errors)
48
+ log.info('410 Gone encountered. Restarting pod watch to reset resource versions.', e)
49
+ pod_watcher = nil
50
+ rescue StandardError => e
51
+ @stats.bump(:pod_watch_failures)
52
+ if Thread.current[:pod_watch_retry_count] < @watch_retry_max_times
53
+ # Instead of raising exceptions and crashing Fluentd, swallow
54
+ # the exception and reset the watcher.
55
+ log.info(
56
+ 'Exception encountered parsing pod watch event. The ' \
57
+ 'connection might have been closed. Sleeping for ' \
58
+ "#{Thread.current[:pod_watch_retry_backoff_interval]} " \
59
+ 'seconds and resetting the pod watcher.', e
60
+ )
61
+ sleep(Thread.current[:pod_watch_retry_backoff_interval])
62
+ Thread.current[:pod_watch_retry_count] += 1
63
+ Thread.current[:pod_watch_retry_backoff_interval] *= @watch_retry_exponential_backoff_base
64
+ pod_watcher = nil
65
+ else
66
+ # Since retries failed for many times, log as errors instead
67
+ # of info and raise exceptions and trigger Fluentd to restart.
68
+ message =
69
+ 'Exception encountered parsing pod watch event. The ' \
70
+ 'connection might have been closed. Retried ' \
71
+ "#{@watch_retry_max_times} times yet still failing. Restarting."
72
+ log.error(message, e)
73
+ raise Fluent::UnrecoverableError, message
74
+ end
75
+ end
76
+ end
77
+
26
78
  def start_pod_watch
27
- begin
28
- resource_version = @client.get_pods.resourceVersion
29
- watcher = @client.watch_pods(resource_version)
30
- rescue Exception => e
31
- message = "Exception encountered fetching metadata from Kubernetes API endpoint: #{e.message}"
32
- message += " (#{e.response})" if e.respond_to?(:response)
33
-
34
- raise Fluent::ConfigError, message
79
+ get_pods_and_start_watcher
80
+ rescue StandardError => e
81
+ message = 'start_pod_watch: Exception encountered setting up pod watch ' \
82
+ "from Kubernetes API #{@apiVersion} endpoint " \
83
+ "#{@kubernetes_url}: #{e.message}"
84
+ message += " (#{e.response})" if e.respond_to?(:response)
85
+ log.debug(message)
86
+
87
+ raise Fluent::ConfigError, message
88
+ end
89
+
90
+ # List all pods, record the resourceVersion and return a watcher starting
91
+ # from that resourceVersion.
92
+ def get_pods_and_start_watcher
93
+ options = {
94
+ resource_version: '0' # Fetch from API server cache instead of etcd quorum read
95
+ }
96
+ if ENV['K8S_NODE_NAME']
97
+ options[:field_selector] = 'spec.nodeName=' + ENV['K8S_NODE_NAME']
35
98
  end
99
+ if @last_seen_resource_version
100
+ options[:resource_version] = @last_seen_resource_version
101
+ else
102
+ pods = @client.get_pods(options)
103
+ pods[:items].each do |pod|
104
+ cache_key = pod[:metadata][:uid]
105
+ @cache[cache_key] = parse_pod_metadata(pod)
106
+ @stats.bump(:pod_cache_host_updates)
107
+ end
108
+
109
+ # continue watching from most recent resourceVersion
110
+ options[:resource_version] = pods[:metadata][:resourceVersion]
111
+ end
112
+
113
+ watcher = @client.watch_pods(options)
114
+ reset_pod_watch_retry_stats
115
+ watcher
116
+ end
117
+
118
+ # Reset pod watch retry count and backoff interval as there is a
119
+ # successful watch notice.
120
+ def reset_pod_watch_retry_stats
121
+ Thread.current[:pod_watch_retry_count] = 0
122
+ Thread.current[:pod_watch_retry_backoff_interval] = @watch_retry_interval
123
+ end
36
124
 
125
+ # Process a watcher notice and potentially raise an exception.
126
+ def process_pod_watcher_notices(watcher)
37
127
  watcher.each do |notice|
38
- case notice.type
39
- when 'MODIFIED'
40
- cache_key = notice.object['metadata']['uid']
41
- cached = @cache[cache_key]
42
- if cached
43
- @cache[cache_key] = parse_pod_metadata(notice.object)
44
- @stats.bump(:pod_cache_watch_updates)
45
- elsif ENV['K8S_NODE_NAME'] == notice.object['spec']['nodeName'] then
46
- @cache[cache_key] = parse_pod_metadata(notice.object)
47
- @stats.bump(:pod_cache_host_updates)
48
- else
49
- @stats.bump(:pod_cache_watch_misses)
50
- end
51
- when 'DELETED'
52
- # ignore and let age out for cases where pods
53
- # deleted but still processing logs
54
- @stats.bump(:pod_cache_watch_delete_ignored)
128
+ # store version we processed to not reprocess it ... do not unset when there is no version in response
129
+ version = ( # TODO: replace with &.dig once we are on ruby 2.5+
130
+ notice[:object] && notice[:object][:metadata] && notice[:object][:metadata][:resourceVersion]
131
+ )
132
+ @last_seen_resource_version = version if version
133
+
134
+ case notice[:type]
135
+ when 'MODIFIED'
136
+ reset_pod_watch_retry_stats
137
+ cache_key = notice.dig(:object, :metadata, :uid)
138
+ cached = @cache[cache_key]
139
+ if cached
140
+ @cache[cache_key] = parse_pod_metadata(notice[:object])
141
+ @stats.bump(:pod_cache_watch_updates)
142
+ elsif ENV['K8S_NODE_NAME'] == notice[:object][:spec][:nodeName]
143
+ @cache[cache_key] = parse_pod_metadata(notice[:object])
144
+ @stats.bump(:pod_cache_host_updates)
145
+ else
146
+ @stats.bump(:pod_cache_watch_misses)
147
+ end
148
+ when 'DELETED'
149
+ reset_pod_watch_retry_stats
150
+ # ignore and let age out for cases where pods
151
+ # deleted but still processing logs
152
+ @stats.bump(:pod_cache_watch_delete_ignored)
153
+ when 'ERROR'
154
+ if notice[:object] && notice[:object][:code] == 410
155
+ @last_seen_resource_version = nil # requested resourceVersion was too old, need to reset
156
+ @stats.bump(:pod_watch_gone_notices)
157
+ raise GoneError
55
158
  else
56
- # Don't pay attention to creations, since the created pod may not
57
- # end up on this node.
58
- @stats.bump(:pod_cache_watch_ignored)
159
+ @stats.bump(:pod_watch_error_type_notices)
160
+ message = notice[:object][:message] if notice[:object] && notice[:object][:message]
161
+ raise "Error while watching pods: #{message}"
162
+ end
163
+ else
164
+ reset_pod_watch_retry_stats
165
+ # Don't pay attention to creations, since the created pod may not
166
+ # end up on this node.
167
+ @stats.bump(:pod_cache_watch_ignored)
59
168
  end
60
169
  end
61
170
  end