fluent-plugin-kubernetes_metadata_filter 2.1.4 → 2.9.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.circleci/config.yml +57 -0
- data/.gitignore +0 -1
- data/.rubocop.yml +57 -0
- data/Gemfile +4 -2
- data/Gemfile.lock +158 -0
- data/README.md +48 -28
- data/Rakefile +15 -11
- data/fluent-plugin-kubernetes_metadata_filter.gemspec +25 -28
- data/lib/fluent/plugin/filter_kubernetes_metadata.rb +185 -131
- data/lib/fluent/plugin/kubernetes_metadata_cache_strategy.rb +27 -20
- data/lib/fluent/plugin/kubernetes_metadata_common.rb +59 -33
- data/lib/fluent/plugin/kubernetes_metadata_stats.rb +6 -6
- data/lib/fluent/plugin/kubernetes_metadata_test_api_adapter.rb +68 -0
- data/lib/fluent/plugin/kubernetes_metadata_util.rb +53 -0
- data/lib/fluent/plugin/kubernetes_metadata_watch_namespaces.rb +121 -27
- data/lib/fluent/plugin/kubernetes_metadata_watch_pods.rb +138 -29
- data/release_notes.md +42 -0
- data/test/cassettes/kubernetes_docker_metadata_annotations.yml +0 -34
- data/test/cassettes/{kubernetes_docker_metadata_dotted_labels.yml → kubernetes_docker_metadata_dotted_slashed_labels.yml} +0 -34
- data/test/cassettes/kubernetes_get_api_v1.yml +193 -0
- data/test/cassettes/kubernetes_get_api_v1_using_token.yml +195 -0
- data/test/cassettes/kubernetes_get_namespace_default.yml +69 -0
- data/test/cassettes/kubernetes_get_namespace_default_using_token.yml +71 -0
- data/test/cassettes/{kubernetes_docker_metadata.yml → kubernetes_get_pod.yml} +0 -82
- data/test/cassettes/{metadata_with_namespace_id.yml → kubernetes_get_pod_container_init.yml} +3 -134
- data/test/cassettes/{kubernetes_docker_metadata_using_bearer_token.yml → kubernetes_get_pod_using_token.yml} +5 -105
- data/test/cassettes/metadata_from_tag_and_journald_fields.yml +0 -255
- data/test/cassettes/metadata_from_tag_journald_and_kubernetes_fields.yml +0 -255
- data/test/cassettes/{non_kubernetes_docker_metadata.yml → valid_kubernetes_api_server_using_token.yml} +4 -44
- data/test/helper.rb +20 -2
- data/test/plugin/test_cache_stats.rb +10 -13
- data/test/plugin/test_cache_strategy.rb +158 -160
- data/test/plugin/test_filter_kubernetes_metadata.rb +480 -320
- data/test/plugin/test_utils.rb +56 -0
- data/test/plugin/test_watch_namespaces.rb +209 -55
- data/test/plugin/test_watch_pods.rb +302 -103
- data/test/plugin/watch_test.rb +52 -33
- metadata +69 -72
- data/circle.yml +0 -17
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
#
|
2
4
|
# Fluentd Kubernetes Metadata Filter Plugin - Enrich Fluentd events with
|
3
5
|
# Kubernetes metadata
|
@@ -18,6 +20,11 @@
|
|
18
20
|
#
|
19
21
|
module KubernetesMetadata
|
20
22
|
module Common
|
23
|
+
class GoneError < StandardError
|
24
|
+
def initialize(msg = '410 Gone')
|
25
|
+
super
|
26
|
+
end
|
27
|
+
end
|
21
28
|
|
22
29
|
def match_annotations(annotations)
|
23
30
|
result = {}
|
@@ -32,61 +39,81 @@ module KubernetesMetadata
|
|
32
39
|
end
|
33
40
|
|
34
41
|
def parse_namespace_metadata(namespace_object)
|
35
|
-
labels =
|
36
|
-
|
42
|
+
labels = ''
|
43
|
+
labels = syms_to_strs(namespace_object[:metadata][:labels].to_h) unless @skip_labels
|
44
|
+
|
45
|
+
annotations = match_annotations(syms_to_strs(namespace_object[:metadata][:annotations].to_h))
|
37
46
|
if @de_dot
|
38
|
-
|
39
|
-
|
47
|
+
de_dot!(labels) unless @skip_labels
|
48
|
+
de_dot!(annotations)
|
49
|
+
end
|
50
|
+
if @de_slash
|
51
|
+
de_slash!(labels) unless @skip_labels
|
52
|
+
de_slash!(annotations)
|
40
53
|
end
|
41
54
|
kubernetes_metadata = {
|
42
|
-
'namespace_id' => namespace_object[
|
43
|
-
'creation_timestamp' => namespace_object[
|
55
|
+
'namespace_id' => namespace_object[:metadata][:uid],
|
56
|
+
'creation_timestamp' => namespace_object[:metadata][:creationTimestamp]
|
44
57
|
}
|
45
58
|
kubernetes_metadata['namespace_labels'] = labels unless labels.empty?
|
46
59
|
kubernetes_metadata['namespace_annotations'] = annotations unless annotations.empty?
|
47
|
-
|
60
|
+
kubernetes_metadata
|
48
61
|
end
|
49
62
|
|
50
63
|
def parse_pod_metadata(pod_object)
|
51
|
-
labels =
|
52
|
-
|
64
|
+
labels = ''
|
65
|
+
labels = syms_to_strs(pod_object[:metadata][:labels].to_h) unless @skip_labels
|
66
|
+
|
67
|
+
annotations = match_annotations(syms_to_strs(pod_object[:metadata][:annotations].to_h))
|
53
68
|
if @de_dot
|
54
|
-
|
55
|
-
|
69
|
+
de_dot!(labels) unless @skip_labels
|
70
|
+
de_dot!(annotations)
|
71
|
+
end
|
72
|
+
if @de_slash
|
73
|
+
de_slash!(labels) unless @skip_labels
|
74
|
+
de_slash!(annotations)
|
56
75
|
end
|
57
76
|
|
58
|
-
# collect container
|
77
|
+
# collect container information
|
59
78
|
container_meta = {}
|
60
79
|
begin
|
61
|
-
pod_object[
|
62
|
-
|
63
|
-
|
64
|
-
container_meta[
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
80
|
+
pod_object[:status][:containerStatuses].each do |container_status|
|
81
|
+
container_id = (container_status[:containerID]||"").sub(%r{^[-_a-zA-Z0-9]+://}, '')
|
82
|
+
key = container_status[:name]
|
83
|
+
container_meta[key] = if @skip_container_metadata
|
84
|
+
{
|
85
|
+
'name' => container_status[:name]
|
86
|
+
}
|
87
|
+
else
|
88
|
+
{
|
89
|
+
'name' => container_status[:name],
|
90
|
+
'image' => container_status[:image],
|
91
|
+
'image_id' => container_status[:imageID],
|
92
|
+
:containerID => container_id
|
93
|
+
}
|
94
|
+
end
|
95
|
+
end if pod_object[:status] && pod_object[:status][:containerStatuses]
|
96
|
+
rescue StandardError=>e
|
97
|
+
log.warn("parsing container meta information failed for: #{pod_object[:metadata][:namespace]}/#{pod_object[:metadata][:name]}: #{e}")
|
72
98
|
end
|
73
99
|
|
74
100
|
kubernetes_metadata = {
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
'master_url' => @kubernetes_url
|
101
|
+
'namespace_name' => pod_object[:metadata][:namespace],
|
102
|
+
'pod_id' => pod_object[:metadata][:uid],
|
103
|
+
'pod_name' => pod_object[:metadata][:name],
|
104
|
+
'pod_ip' => pod_object[:status][:podIP],
|
105
|
+
'containers' => syms_to_strs(container_meta),
|
106
|
+
'host' => pod_object[:spec][:nodeName]
|
82
107
|
}
|
83
108
|
kubernetes_metadata['annotations'] = annotations unless annotations.empty?
|
84
|
-
|
109
|
+
kubernetes_metadata['labels'] = labels unless labels.empty?
|
110
|
+
kubernetes_metadata['master_url'] = @kubernetes_url unless @skip_master_url
|
111
|
+
kubernetes_metadata
|
85
112
|
end
|
86
113
|
|
87
114
|
def syms_to_strs(hsh)
|
88
115
|
newhsh = {}
|
89
|
-
hsh.each_pair do |kk,vv|
|
116
|
+
hsh.each_pair do |kk, vv|
|
90
117
|
if vv.is_a?(Hash)
|
91
118
|
vv = syms_to_strs(vv)
|
92
119
|
end
|
@@ -98,6 +125,5 @@ module KubernetesMetadata
|
|
98
125
|
end
|
99
126
|
newhsh
|
100
127
|
end
|
101
|
-
|
102
128
|
end
|
103
129
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
#
|
2
4
|
# Fluentd Kubernetes Metadata Filter Plugin - Enrich Fluentd events with
|
3
5
|
# Kubernetes metadata
|
@@ -19,17 +21,16 @@
|
|
19
21
|
require 'lru_redux'
|
20
22
|
module KubernetesMetadata
|
21
23
|
class Stats
|
22
|
-
|
23
24
|
def initialize
|
24
25
|
@stats = ::LruRedux::TTL::ThreadSafeCache.new(1000, 3600)
|
25
26
|
end
|
26
27
|
|
27
28
|
def bump(key)
|
28
|
-
|
29
|
+
@stats[key] = @stats.getset(key) { 0 } + 1
|
29
30
|
end
|
30
31
|
|
31
32
|
def set(key, value)
|
32
|
-
|
33
|
+
@stats[key] = value
|
33
34
|
end
|
34
35
|
|
35
36
|
def [](key)
|
@@ -37,10 +38,9 @@ module KubernetesMetadata
|
|
37
38
|
end
|
38
39
|
|
39
40
|
def to_s
|
40
|
-
|
41
|
-
|
41
|
+
'stats - ' + [].tap do |a|
|
42
|
+
@stats.each { |k, v| a << "#{k}: #{v}" }
|
42
43
|
end.join(', ')
|
43
44
|
end
|
44
|
-
|
45
45
|
end
|
46
46
|
end
|
@@ -0,0 +1,68 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
#
|
4
|
+
# Fluentd Kubernetes Metadata Filter Plugin - Enrich Fluentd events with
|
5
|
+
# Kubernetes metadata
|
6
|
+
#
|
7
|
+
# Copyright 2021 Red Hat, Inc.
|
8
|
+
#
|
9
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
10
|
+
# you may not use this file except in compliance with the License.
|
11
|
+
# You may obtain a copy of the License at
|
12
|
+
#
|
13
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
14
|
+
#
|
15
|
+
# Unless required by applicable law or agreed to in writing, software
|
16
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
17
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
18
|
+
# See the License for the specific language governing permissions and
|
19
|
+
# limitations under the License.
|
20
|
+
#
|
21
|
+
require 'kubeclient'
|
22
|
+
|
23
|
+
module KubernetesMetadata
|
24
|
+
module TestApiAdapter
|
25
|
+
|
26
|
+
def api_valid?
|
27
|
+
true
|
28
|
+
end
|
29
|
+
def get_namespace(namespace_name)
|
30
|
+
return {
|
31
|
+
metadata: {
|
32
|
+
name: namespace_name,
|
33
|
+
uid: namespace_name + 'uuid',
|
34
|
+
labels: {
|
35
|
+
foo_ns: 'bar_ns'
|
36
|
+
}
|
37
|
+
}
|
38
|
+
}
|
39
|
+
end
|
40
|
+
|
41
|
+
def get_pod(pod_name, namespace_name)
|
42
|
+
return {
|
43
|
+
metadata: {
|
44
|
+
name: pod_name,
|
45
|
+
namespace: namespace_name,
|
46
|
+
uid: namespace_name + namespace_name + "uuid",
|
47
|
+
labels: {
|
48
|
+
foo: 'bar'
|
49
|
+
}
|
50
|
+
},
|
51
|
+
spec: {
|
52
|
+
nodeName: 'aNodeName',
|
53
|
+
containers: [{
|
54
|
+
name: 'foo',
|
55
|
+
image: 'bar'
|
56
|
+
}, {
|
57
|
+
name: 'bar',
|
58
|
+
image: 'foo'
|
59
|
+
}]
|
60
|
+
},
|
61
|
+
status: {
|
62
|
+
podIP: '172.17.0.8'
|
63
|
+
}
|
64
|
+
}
|
65
|
+
end
|
66
|
+
|
67
|
+
end
|
68
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
#
|
4
|
+
# Fluentd Kubernetes Metadata Filter Plugin - Enrich Fluentd events with
|
5
|
+
# Kubernetes metadata
|
6
|
+
#
|
7
|
+
# Copyright 2021 Red Hat, Inc.
|
8
|
+
#
|
9
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
10
|
+
# you may not use this file except in compliance with the License.
|
11
|
+
# You may obtain a copy of the License at
|
12
|
+
#
|
13
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
14
|
+
#
|
15
|
+
# Unless required by applicable law or agreed to in writing, software
|
16
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
17
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
18
|
+
# See the License for the specific language governing permissions and
|
19
|
+
# limitations under the License.
|
20
|
+
#
|
21
|
+
module KubernetesMetadata
|
22
|
+
module Util
|
23
|
+
def create_time_from_record(record, internal_time)
|
24
|
+
time_key = @time_fields.detect { |ii| record.key?(ii) }
|
25
|
+
time = record[time_key]
|
26
|
+
if time.nil? || time.is_a?(String) && time.chop.empty?
|
27
|
+
# `internal_time` is a Fluent::EventTime, it can't compare with Time.
|
28
|
+
return Time.at(internal_time.to_f)
|
29
|
+
end
|
30
|
+
|
31
|
+
if ['_SOURCE_REALTIME_TIMESTAMP', '__REALTIME_TIMESTAMP'].include?(time_key)
|
32
|
+
timei = time.to_i
|
33
|
+
return Time.at(timei / 1_000_000, timei % 1_000_000)
|
34
|
+
end
|
35
|
+
return Time.at(time) if time.is_a?(Numeric)
|
36
|
+
|
37
|
+
Time.parse(time)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
#https://stackoverflow.com/questions/5622435/how-do-i-convert-a-ruby-class-name-to-a-underscore-delimited-symbol
|
43
|
+
class String
|
44
|
+
def underscore
|
45
|
+
word = self.dup
|
46
|
+
word.gsub!(/::/, '_')
|
47
|
+
word.gsub!(/([A-Z]+)([A-Z][a-z])/,'\1_\2')
|
48
|
+
word.gsub!(/([a-z\d])([A-Z])/,'\1_\2')
|
49
|
+
word.tr!("-", "_")
|
50
|
+
word.downcase!
|
51
|
+
word
|
52
|
+
end
|
53
|
+
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
#
|
2
4
|
# Fluentd Kubernetes Metadata Filter Plugin - Enrich Fluentd events with
|
3
5
|
# Kubernetes metadata
|
@@ -16,45 +18,137 @@
|
|
16
18
|
# See the License for the specific language governing permissions and
|
17
19
|
# limitations under the License.
|
18
20
|
#
|
21
|
+
# TODO: this is mostly copy-paste from kubernetes_metadata_watch_pods.rb unify them
|
19
22
|
require_relative 'kubernetes_metadata_common'
|
20
23
|
|
21
24
|
module KubernetesMetadata
|
22
25
|
module WatchNamespaces
|
23
|
-
|
24
26
|
include ::KubernetesMetadata::Common
|
25
27
|
|
28
|
+
def set_up_namespace_thread
|
29
|
+
# Any failures / exceptions in the initial setup should raise
|
30
|
+
# Fluent:ConfigError, so that users can inspect potential errors in
|
31
|
+
# the configuration.
|
32
|
+
namespace_watcher = start_namespace_watch
|
33
|
+
Thread.current[:namespace_watch_retry_backoff_interval] = @watch_retry_interval
|
34
|
+
Thread.current[:namespace_watch_retry_count] = 0
|
35
|
+
|
36
|
+
# Any failures / exceptions in the followup watcher notice
|
37
|
+
# processing will be swallowed and retried. These failures /
|
38
|
+
# exceptions could be caused by Kubernetes API being temporarily
|
39
|
+
# down. We assume the configuration is correct at this point.
|
40
|
+
loop do
|
41
|
+
namespace_watcher ||= get_namespaces_and_start_watcher
|
42
|
+
process_namespace_watcher_notices(namespace_watcher)
|
43
|
+
rescue GoneError => e
|
44
|
+
# Expected error. Quietly go back through the loop in order to
|
45
|
+
# start watching from the latest resource versions
|
46
|
+
@stats.bump(:namespace_watch_gone_errors)
|
47
|
+
log.info('410 Gone encountered. Restarting namespace watch to reset resource versions.', e)
|
48
|
+
namespace_watcher = nil
|
49
|
+
rescue StandardError => e
|
50
|
+
@stats.bump(:namespace_watch_failures)
|
51
|
+
if Thread.current[:namespace_watch_retry_count] < @watch_retry_max_times
|
52
|
+
# Instead of raising exceptions and crashing Fluentd, swallow
|
53
|
+
# the exception and reset the watcher.
|
54
|
+
log.info(
|
55
|
+
'Exception encountered parsing namespace watch event. ' \
|
56
|
+
'The connection might have been closed. Sleeping for ' \
|
57
|
+
"#{Thread.current[:namespace_watch_retry_backoff_interval]} " \
|
58
|
+
'seconds and resetting the namespace watcher.', e
|
59
|
+
)
|
60
|
+
sleep(Thread.current[:namespace_watch_retry_backoff_interval])
|
61
|
+
Thread.current[:namespace_watch_retry_count] += 1
|
62
|
+
Thread.current[:namespace_watch_retry_backoff_interval] *= @watch_retry_exponential_backoff_base
|
63
|
+
namespace_watcher = nil
|
64
|
+
else
|
65
|
+
# Since retries failed for many times, log as errors instead
|
66
|
+
# of info and raise exceptions and trigger Fluentd to restart.
|
67
|
+
message =
|
68
|
+
'Exception encountered parsing namespace watch event. The ' \
|
69
|
+
'connection might have been closed. Retried ' \
|
70
|
+
"#{@watch_retry_max_times} times yet still failing. Restarting."
|
71
|
+
log.error(message, e)
|
72
|
+
raise Fluent::UnrecoverableError, message
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
26
77
|
def start_namespace_watch
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
78
|
+
get_namespaces_and_start_watcher
|
79
|
+
rescue StandardError => e
|
80
|
+
message = 'start_namespace_watch: Exception encountered setting up ' \
|
81
|
+
"namespace watch from Kubernetes API #{@apiVersion} endpoint " \
|
82
|
+
"#{@kubernetes_url}: #{e.message}"
|
83
|
+
message += " (#{e.response})" if e.respond_to?(:response)
|
84
|
+
log.debug(message)
|
85
|
+
|
86
|
+
raise Fluent::ConfigError, message
|
87
|
+
end
|
88
|
+
|
89
|
+
# List all namespaces, record the resourceVersion and return a watcher
|
90
|
+
# starting from that resourceVersion.
|
91
|
+
def get_namespaces_and_start_watcher
|
92
|
+
options = {
|
93
|
+
resource_version: '0' # Fetch from API server cache instead of etcd quorum read
|
94
|
+
}
|
95
|
+
namespaces = @client.get_namespaces(options)
|
96
|
+
namespaces[:items].each do |namespace|
|
97
|
+
cache_key = namespace[:metadata][:uid]
|
98
|
+
@namespace_cache[cache_key] = parse_namespace_metadata(namespace)
|
99
|
+
@stats.bump(:namespace_cache_host_updates)
|
35
100
|
end
|
101
|
+
|
102
|
+
# continue watching from most recent resourceVersion
|
103
|
+
options[:resource_version] = namespaces[:metadata][:resourceVersion]
|
104
|
+
|
105
|
+
watcher = @client.watch_namespaces(options)
|
106
|
+
reset_namespace_watch_retry_stats
|
107
|
+
watcher
|
108
|
+
end
|
109
|
+
|
110
|
+
# Reset namespace watch retry count and backoff interval as there is a
|
111
|
+
# successful watch notice.
|
112
|
+
def reset_namespace_watch_retry_stats
|
113
|
+
Thread.current[:namespace_watch_retry_count] = 0
|
114
|
+
Thread.current[:namespace_watch_retry_backoff_interval] = @watch_retry_interval
|
115
|
+
end
|
116
|
+
|
117
|
+
# Process a watcher notice and potentially raise an exception.
|
118
|
+
def process_namespace_watcher_notices(watcher)
|
36
119
|
watcher.each do |notice|
|
37
|
-
case notice
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
@stats.bump(:namespace_cache_watch_misses)
|
46
|
-
end
|
47
|
-
when 'DELETED'
|
48
|
-
# ignore and let age out for cases where
|
49
|
-
# deleted but still processing logs
|
50
|
-
@stats.bump(:namespace_cache_watch_deletes_ignored)
|
120
|
+
case notice[:type]
|
121
|
+
when 'MODIFIED'
|
122
|
+
reset_namespace_watch_retry_stats
|
123
|
+
cache_key = notice[:object][:metadata][:uid]
|
124
|
+
cached = @namespace_cache[cache_key]
|
125
|
+
if cached
|
126
|
+
@namespace_cache[cache_key] = parse_namespace_metadata(notice[:object])
|
127
|
+
@stats.bump(:namespace_cache_watch_updates)
|
51
128
|
else
|
52
|
-
|
53
|
-
|
54
|
-
|
129
|
+
@stats.bump(:namespace_cache_watch_misses)
|
130
|
+
end
|
131
|
+
when 'DELETED'
|
132
|
+
reset_namespace_watch_retry_stats
|
133
|
+
# ignore and let age out for cases where
|
134
|
+
# deleted but still processing logs
|
135
|
+
@stats.bump(:namespace_cache_watch_deletes_ignored)
|
136
|
+
when 'ERROR'
|
137
|
+
if notice[:object] && notice[:object][:code] == 410
|
138
|
+
@stats.bump(:namespace_watch_gone_notices)
|
139
|
+
raise GoneError
|
140
|
+
else
|
141
|
+
@stats.bump(:namespace_watch_error_type_notices)
|
142
|
+
message = notice[:object][:message] if notice[:object] && notice[:object][:message]
|
143
|
+
raise "Error while watching namespaces: #{message}"
|
144
|
+
end
|
145
|
+
else
|
146
|
+
reset_namespace_watch_retry_stats
|
147
|
+
# Don't pay attention to creations, since the created namespace may not
|
148
|
+
# be used by any namespace on this node.
|
149
|
+
@stats.bump(:namespace_cache_watch_ignored)
|
55
150
|
end
|
56
151
|
end
|
57
152
|
end
|
58
|
-
|
59
153
|
end
|
60
154
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
#
|
2
4
|
# Fluentd Kubernetes Metadata Filter Plugin - Enrich Fluentd events with
|
3
5
|
# Kubernetes metadata
|
@@ -16,46 +18,153 @@
|
|
16
18
|
# See the License for the specific language governing permissions and
|
17
19
|
# limitations under the License.
|
18
20
|
#
|
21
|
+
# TODO: this is mostly copy-paste from kubernetes_metadata_watch_namespaces.rb unify them
|
19
22
|
require_relative 'kubernetes_metadata_common'
|
20
23
|
|
21
24
|
module KubernetesMetadata
|
22
25
|
module WatchPods
|
23
|
-
|
24
26
|
include ::KubernetesMetadata::Common
|
25
27
|
|
28
|
+
def set_up_pod_thread
|
29
|
+
# Any failures / exceptions in the initial setup should raise
|
30
|
+
# Fluent:ConfigError, so that users can inspect potential errors in
|
31
|
+
# the configuration.
|
32
|
+
pod_watcher = start_pod_watch
|
33
|
+
|
34
|
+
Thread.current[:pod_watch_retry_backoff_interval] = @watch_retry_interval
|
35
|
+
Thread.current[:pod_watch_retry_count] = 0
|
36
|
+
|
37
|
+
# Any failures / exceptions in the followup watcher notice
|
38
|
+
# processing will be swallowed and retried. These failures /
|
39
|
+
# exceptions could be caused by Kubernetes API being temporarily
|
40
|
+
# down. We assume the configuration is correct at this point.
|
41
|
+
loop do
|
42
|
+
pod_watcher ||= get_pods_and_start_watcher
|
43
|
+
process_pod_watcher_notices(pod_watcher)
|
44
|
+
rescue GoneError => e
|
45
|
+
# Expected error. Quietly go back through the loop in order to
|
46
|
+
# start watching from the latest resource versions
|
47
|
+
@stats.bump(:pod_watch_gone_errors)
|
48
|
+
log.info('410 Gone encountered. Restarting pod watch to reset resource versions.', e)
|
49
|
+
pod_watcher = nil
|
50
|
+
rescue StandardError => e
|
51
|
+
@stats.bump(:pod_watch_failures)
|
52
|
+
if Thread.current[:pod_watch_retry_count] < @watch_retry_max_times
|
53
|
+
# Instead of raising exceptions and crashing Fluentd, swallow
|
54
|
+
# the exception and reset the watcher.
|
55
|
+
log.info(
|
56
|
+
'Exception encountered parsing pod watch event. The ' \
|
57
|
+
'connection might have been closed. Sleeping for ' \
|
58
|
+
"#{Thread.current[:pod_watch_retry_backoff_interval]} " \
|
59
|
+
'seconds and resetting the pod watcher.', e
|
60
|
+
)
|
61
|
+
sleep(Thread.current[:pod_watch_retry_backoff_interval])
|
62
|
+
Thread.current[:pod_watch_retry_count] += 1
|
63
|
+
Thread.current[:pod_watch_retry_backoff_interval] *= @watch_retry_exponential_backoff_base
|
64
|
+
pod_watcher = nil
|
65
|
+
else
|
66
|
+
# Since retries failed for many times, log as errors instead
|
67
|
+
# of info and raise exceptions and trigger Fluentd to restart.
|
68
|
+
message =
|
69
|
+
'Exception encountered parsing pod watch event. The ' \
|
70
|
+
'connection might have been closed. Retried ' \
|
71
|
+
"#{@watch_retry_max_times} times yet still failing. Restarting."
|
72
|
+
log.error(message, e)
|
73
|
+
raise Fluent::UnrecoverableError, message
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
26
78
|
def start_pod_watch
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
79
|
+
get_pods_and_start_watcher
|
80
|
+
rescue StandardError => e
|
81
|
+
message = 'start_pod_watch: Exception encountered setting up pod watch ' \
|
82
|
+
"from Kubernetes API #{@apiVersion} endpoint " \
|
83
|
+
"#{@kubernetes_url}: #{e.message}"
|
84
|
+
message += " (#{e.response})" if e.respond_to?(:response)
|
85
|
+
log.debug(message)
|
86
|
+
|
87
|
+
raise Fluent::ConfigError, message
|
88
|
+
end
|
89
|
+
|
90
|
+
# List all pods, record the resourceVersion and return a watcher starting
|
91
|
+
# from that resourceVersion.
|
92
|
+
def get_pods_and_start_watcher
|
93
|
+
options = {
|
94
|
+
resource_version: '0' # Fetch from API server cache instead of etcd quorum read
|
95
|
+
}
|
96
|
+
if ENV['K8S_NODE_NAME']
|
97
|
+
options[:field_selector] = 'spec.nodeName=' + ENV['K8S_NODE_NAME']
|
35
98
|
end
|
99
|
+
if @last_seen_resource_version
|
100
|
+
options[:resource_version] = @last_seen_resource_version
|
101
|
+
else
|
102
|
+
pods = @client.get_pods(options)
|
103
|
+
pods[:items].each do |pod|
|
104
|
+
cache_key = pod[:metadata][:uid]
|
105
|
+
@cache[cache_key] = parse_pod_metadata(pod)
|
106
|
+
@stats.bump(:pod_cache_host_updates)
|
107
|
+
end
|
108
|
+
|
109
|
+
# continue watching from most recent resourceVersion
|
110
|
+
options[:resource_version] = pods[:metadata][:resourceVersion]
|
111
|
+
end
|
112
|
+
|
113
|
+
watcher = @client.watch_pods(options)
|
114
|
+
reset_pod_watch_retry_stats
|
115
|
+
watcher
|
116
|
+
end
|
117
|
+
|
118
|
+
# Reset pod watch retry count and backoff interval as there is a
|
119
|
+
# successful watch notice.
|
120
|
+
def reset_pod_watch_retry_stats
|
121
|
+
Thread.current[:pod_watch_retry_count] = 0
|
122
|
+
Thread.current[:pod_watch_retry_backoff_interval] = @watch_retry_interval
|
123
|
+
end
|
36
124
|
|
125
|
+
# Process a watcher notice and potentially raise an exception.
|
126
|
+
def process_pod_watcher_notices(watcher)
|
37
127
|
watcher.each do |notice|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
@stats.bump(:
|
128
|
+
# store version we processed to not reprocess it ... do not unset when there is no version in response
|
129
|
+
version = ( # TODO: replace with &.dig once we are on ruby 2.5+
|
130
|
+
notice[:object] && notice[:object][:metadata] && notice[:object][:metadata][:resourceVersion]
|
131
|
+
)
|
132
|
+
@last_seen_resource_version = version if version
|
133
|
+
|
134
|
+
case notice[:type]
|
135
|
+
when 'MODIFIED'
|
136
|
+
reset_pod_watch_retry_stats
|
137
|
+
cache_key = notice.dig(:object, :metadata, :uid)
|
138
|
+
cached = @cache[cache_key]
|
139
|
+
if cached
|
140
|
+
@cache[cache_key] = parse_pod_metadata(notice[:object])
|
141
|
+
@stats.bump(:pod_cache_watch_updates)
|
142
|
+
elsif ENV['K8S_NODE_NAME'] == notice[:object][:spec][:nodeName]
|
143
|
+
@cache[cache_key] = parse_pod_metadata(notice[:object])
|
144
|
+
@stats.bump(:pod_cache_host_updates)
|
145
|
+
else
|
146
|
+
@stats.bump(:pod_cache_watch_misses)
|
147
|
+
end
|
148
|
+
when 'DELETED'
|
149
|
+
reset_pod_watch_retry_stats
|
150
|
+
# ignore and let age out for cases where pods
|
151
|
+
# deleted but still processing logs
|
152
|
+
@stats.bump(:pod_cache_watch_delete_ignored)
|
153
|
+
when 'ERROR'
|
154
|
+
if notice[:object] && notice[:object][:code] == 410
|
155
|
+
@last_seen_resource_version = nil # requested resourceVersion was too old, need to reset
|
156
|
+
@stats.bump(:pod_watch_gone_notices)
|
157
|
+
raise GoneError
|
55
158
|
else
|
56
|
-
|
57
|
-
|
58
|
-
|
159
|
+
@stats.bump(:pod_watch_error_type_notices)
|
160
|
+
message = notice[:object][:message] if notice[:object] && notice[:object][:message]
|
161
|
+
raise "Error while watching pods: #{message}"
|
162
|
+
end
|
163
|
+
else
|
164
|
+
reset_pod_watch_retry_stats
|
165
|
+
# Don't pay attention to creations, since the created pod may not
|
166
|
+
# end up on this node.
|
167
|
+
@stats.bump(:pod_cache_watch_ignored)
|
59
168
|
end
|
60
169
|
end
|
61
170
|
end
|