fluent-plugin-kubernetes_metadata_filter 2.1.4 → 2.9.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.circleci/config.yml +57 -0
- data/.gitignore +0 -1
- data/.rubocop.yml +57 -0
- data/Gemfile +4 -2
- data/Gemfile.lock +158 -0
- data/README.md +48 -28
- data/Rakefile +15 -11
- data/fluent-plugin-kubernetes_metadata_filter.gemspec +25 -28
- data/lib/fluent/plugin/filter_kubernetes_metadata.rb +185 -131
- data/lib/fluent/plugin/kubernetes_metadata_cache_strategy.rb +27 -20
- data/lib/fluent/plugin/kubernetes_metadata_common.rb +59 -33
- data/lib/fluent/plugin/kubernetes_metadata_stats.rb +6 -6
- data/lib/fluent/plugin/kubernetes_metadata_test_api_adapter.rb +68 -0
- data/lib/fluent/plugin/kubernetes_metadata_util.rb +53 -0
- data/lib/fluent/plugin/kubernetes_metadata_watch_namespaces.rb +121 -27
- data/lib/fluent/plugin/kubernetes_metadata_watch_pods.rb +138 -29
- data/release_notes.md +42 -0
- data/test/cassettes/kubernetes_docker_metadata_annotations.yml +0 -34
- data/test/cassettes/{kubernetes_docker_metadata_dotted_labels.yml → kubernetes_docker_metadata_dotted_slashed_labels.yml} +0 -34
- data/test/cassettes/kubernetes_get_api_v1.yml +193 -0
- data/test/cassettes/kubernetes_get_api_v1_using_token.yml +195 -0
- data/test/cassettes/kubernetes_get_namespace_default.yml +69 -0
- data/test/cassettes/kubernetes_get_namespace_default_using_token.yml +71 -0
- data/test/cassettes/{kubernetes_docker_metadata.yml → kubernetes_get_pod.yml} +0 -82
- data/test/cassettes/{metadata_with_namespace_id.yml → kubernetes_get_pod_container_init.yml} +3 -134
- data/test/cassettes/{kubernetes_docker_metadata_using_bearer_token.yml → kubernetes_get_pod_using_token.yml} +5 -105
- data/test/cassettes/metadata_from_tag_and_journald_fields.yml +0 -255
- data/test/cassettes/metadata_from_tag_journald_and_kubernetes_fields.yml +0 -255
- data/test/cassettes/{non_kubernetes_docker_metadata.yml → valid_kubernetes_api_server_using_token.yml} +4 -44
- data/test/helper.rb +20 -2
- data/test/plugin/test_cache_stats.rb +10 -13
- data/test/plugin/test_cache_strategy.rb +158 -160
- data/test/plugin/test_filter_kubernetes_metadata.rb +480 -320
- data/test/plugin/test_utils.rb +56 -0
- data/test/plugin/test_watch_namespaces.rb +209 -55
- data/test/plugin/test_watch_pods.rb +302 -103
- data/test/plugin/watch_test.rb +52 -33
- metadata +69 -72
- data/circle.yml +0 -17
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
#
|
2
4
|
# Fluentd Kubernetes Metadata Filter Plugin - Enrich Fluentd events with
|
3
5
|
# Kubernetes metadata
|
@@ -18,6 +20,11 @@
|
|
18
20
|
#
|
19
21
|
module KubernetesMetadata
|
20
22
|
module Common
|
23
|
+
class GoneError < StandardError
|
24
|
+
def initialize(msg = '410 Gone')
|
25
|
+
super
|
26
|
+
end
|
27
|
+
end
|
21
28
|
|
22
29
|
def match_annotations(annotations)
|
23
30
|
result = {}
|
@@ -32,61 +39,81 @@ module KubernetesMetadata
|
|
32
39
|
end
|
33
40
|
|
34
41
|
def parse_namespace_metadata(namespace_object)
|
35
|
-
labels =
|
36
|
-
|
42
|
+
labels = ''
|
43
|
+
labels = syms_to_strs(namespace_object[:metadata][:labels].to_h) unless @skip_labels
|
44
|
+
|
45
|
+
annotations = match_annotations(syms_to_strs(namespace_object[:metadata][:annotations].to_h))
|
37
46
|
if @de_dot
|
38
|
-
|
39
|
-
|
47
|
+
de_dot!(labels) unless @skip_labels
|
48
|
+
de_dot!(annotations)
|
49
|
+
end
|
50
|
+
if @de_slash
|
51
|
+
de_slash!(labels) unless @skip_labels
|
52
|
+
de_slash!(annotations)
|
40
53
|
end
|
41
54
|
kubernetes_metadata = {
|
42
|
-
'namespace_id' => namespace_object[
|
43
|
-
'creation_timestamp' => namespace_object[
|
55
|
+
'namespace_id' => namespace_object[:metadata][:uid],
|
56
|
+
'creation_timestamp' => namespace_object[:metadata][:creationTimestamp]
|
44
57
|
}
|
45
58
|
kubernetes_metadata['namespace_labels'] = labels unless labels.empty?
|
46
59
|
kubernetes_metadata['namespace_annotations'] = annotations unless annotations.empty?
|
47
|
-
|
60
|
+
kubernetes_metadata
|
48
61
|
end
|
49
62
|
|
50
63
|
def parse_pod_metadata(pod_object)
|
51
|
-
labels =
|
52
|
-
|
64
|
+
labels = ''
|
65
|
+
labels = syms_to_strs(pod_object[:metadata][:labels].to_h) unless @skip_labels
|
66
|
+
|
67
|
+
annotations = match_annotations(syms_to_strs(pod_object[:metadata][:annotations].to_h))
|
53
68
|
if @de_dot
|
54
|
-
|
55
|
-
|
69
|
+
de_dot!(labels) unless @skip_labels
|
70
|
+
de_dot!(annotations)
|
71
|
+
end
|
72
|
+
if @de_slash
|
73
|
+
de_slash!(labels) unless @skip_labels
|
74
|
+
de_slash!(annotations)
|
56
75
|
end
|
57
76
|
|
58
|
-
# collect container
|
77
|
+
# collect container information
|
59
78
|
container_meta = {}
|
60
79
|
begin
|
61
|
-
pod_object[
|
62
|
-
|
63
|
-
|
64
|
-
container_meta[
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
80
|
+
pod_object[:status][:containerStatuses].each do |container_status|
|
81
|
+
container_id = (container_status[:containerID]||"").sub(%r{^[-_a-zA-Z0-9]+://}, '')
|
82
|
+
key = container_status[:name]
|
83
|
+
container_meta[key] = if @skip_container_metadata
|
84
|
+
{
|
85
|
+
'name' => container_status[:name]
|
86
|
+
}
|
87
|
+
else
|
88
|
+
{
|
89
|
+
'name' => container_status[:name],
|
90
|
+
'image' => container_status[:image],
|
91
|
+
'image_id' => container_status[:imageID],
|
92
|
+
:containerID => container_id
|
93
|
+
}
|
94
|
+
end
|
95
|
+
end if pod_object[:status] && pod_object[:status][:containerStatuses]
|
96
|
+
rescue StandardError=>e
|
97
|
+
log.warn("parsing container meta information failed for: #{pod_object[:metadata][:namespace]}/#{pod_object[:metadata][:name]}: #{e}")
|
72
98
|
end
|
73
99
|
|
74
100
|
kubernetes_metadata = {
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
'master_url' => @kubernetes_url
|
101
|
+
'namespace_name' => pod_object[:metadata][:namespace],
|
102
|
+
'pod_id' => pod_object[:metadata][:uid],
|
103
|
+
'pod_name' => pod_object[:metadata][:name],
|
104
|
+
'pod_ip' => pod_object[:status][:podIP],
|
105
|
+
'containers' => syms_to_strs(container_meta),
|
106
|
+
'host' => pod_object[:spec][:nodeName]
|
82
107
|
}
|
83
108
|
kubernetes_metadata['annotations'] = annotations unless annotations.empty?
|
84
|
-
|
109
|
+
kubernetes_metadata['labels'] = labels unless labels.empty?
|
110
|
+
kubernetes_metadata['master_url'] = @kubernetes_url unless @skip_master_url
|
111
|
+
kubernetes_metadata
|
85
112
|
end
|
86
113
|
|
87
114
|
def syms_to_strs(hsh)
|
88
115
|
newhsh = {}
|
89
|
-
hsh.each_pair do |kk,vv|
|
116
|
+
hsh.each_pair do |kk, vv|
|
90
117
|
if vv.is_a?(Hash)
|
91
118
|
vv = syms_to_strs(vv)
|
92
119
|
end
|
@@ -98,6 +125,5 @@ module KubernetesMetadata
|
|
98
125
|
end
|
99
126
|
newhsh
|
100
127
|
end
|
101
|
-
|
102
128
|
end
|
103
129
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
#
|
2
4
|
# Fluentd Kubernetes Metadata Filter Plugin - Enrich Fluentd events with
|
3
5
|
# Kubernetes metadata
|
@@ -19,17 +21,16 @@
|
|
19
21
|
require 'lru_redux'
|
20
22
|
module KubernetesMetadata
|
21
23
|
class Stats
|
22
|
-
|
23
24
|
def initialize
|
24
25
|
@stats = ::LruRedux::TTL::ThreadSafeCache.new(1000, 3600)
|
25
26
|
end
|
26
27
|
|
27
28
|
def bump(key)
|
28
|
-
|
29
|
+
@stats[key] = @stats.getset(key) { 0 } + 1
|
29
30
|
end
|
30
31
|
|
31
32
|
def set(key, value)
|
32
|
-
|
33
|
+
@stats[key] = value
|
33
34
|
end
|
34
35
|
|
35
36
|
def [](key)
|
@@ -37,10 +38,9 @@ module KubernetesMetadata
|
|
37
38
|
end
|
38
39
|
|
39
40
|
def to_s
|
40
|
-
|
41
|
-
|
41
|
+
'stats - ' + [].tap do |a|
|
42
|
+
@stats.each { |k, v| a << "#{k}: #{v}" }
|
42
43
|
end.join(', ')
|
43
44
|
end
|
44
|
-
|
45
45
|
end
|
46
46
|
end
|
@@ -0,0 +1,68 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
#
|
4
|
+
# Fluentd Kubernetes Metadata Filter Plugin - Enrich Fluentd events with
|
5
|
+
# Kubernetes metadata
|
6
|
+
#
|
7
|
+
# Copyright 2021 Red Hat, Inc.
|
8
|
+
#
|
9
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
10
|
+
# you may not use this file except in compliance with the License.
|
11
|
+
# You may obtain a copy of the License at
|
12
|
+
#
|
13
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
14
|
+
#
|
15
|
+
# Unless required by applicable law or agreed to in writing, software
|
16
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
17
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
18
|
+
# See the License for the specific language governing permissions and
|
19
|
+
# limitations under the License.
|
20
|
+
#
|
21
|
+
require 'kubeclient'
|
22
|
+
|
23
|
+
module KubernetesMetadata
|
24
|
+
module TestApiAdapter
|
25
|
+
|
26
|
+
def api_valid?
|
27
|
+
true
|
28
|
+
end
|
29
|
+
def get_namespace(namespace_name)
|
30
|
+
return {
|
31
|
+
metadata: {
|
32
|
+
name: namespace_name,
|
33
|
+
uid: namespace_name + 'uuid',
|
34
|
+
labels: {
|
35
|
+
foo_ns: 'bar_ns'
|
36
|
+
}
|
37
|
+
}
|
38
|
+
}
|
39
|
+
end
|
40
|
+
|
41
|
+
def get_pod(pod_name, namespace_name)
|
42
|
+
return {
|
43
|
+
metadata: {
|
44
|
+
name: pod_name,
|
45
|
+
namespace: namespace_name,
|
46
|
+
uid: namespace_name + namespace_name + "uuid",
|
47
|
+
labels: {
|
48
|
+
foo: 'bar'
|
49
|
+
}
|
50
|
+
},
|
51
|
+
spec: {
|
52
|
+
nodeName: 'aNodeName',
|
53
|
+
containers: [{
|
54
|
+
name: 'foo',
|
55
|
+
image: 'bar'
|
56
|
+
}, {
|
57
|
+
name: 'bar',
|
58
|
+
image: 'foo'
|
59
|
+
}]
|
60
|
+
},
|
61
|
+
status: {
|
62
|
+
podIP: '172.17.0.8'
|
63
|
+
}
|
64
|
+
}
|
65
|
+
end
|
66
|
+
|
67
|
+
end
|
68
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
#
|
4
|
+
# Fluentd Kubernetes Metadata Filter Plugin - Enrich Fluentd events with
|
5
|
+
# Kubernetes metadata
|
6
|
+
#
|
7
|
+
# Copyright 2021 Red Hat, Inc.
|
8
|
+
#
|
9
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
10
|
+
# you may not use this file except in compliance with the License.
|
11
|
+
# You may obtain a copy of the License at
|
12
|
+
#
|
13
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
14
|
+
#
|
15
|
+
# Unless required by applicable law or agreed to in writing, software
|
16
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
17
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
18
|
+
# See the License for the specific language governing permissions and
|
19
|
+
# limitations under the License.
|
20
|
+
#
|
21
|
+
module KubernetesMetadata
|
22
|
+
module Util
|
23
|
+
def create_time_from_record(record, internal_time)
|
24
|
+
time_key = @time_fields.detect { |ii| record.key?(ii) }
|
25
|
+
time = record[time_key]
|
26
|
+
if time.nil? || time.is_a?(String) && time.chop.empty?
|
27
|
+
# `internal_time` is a Fluent::EventTime, it can't compare with Time.
|
28
|
+
return Time.at(internal_time.to_f)
|
29
|
+
end
|
30
|
+
|
31
|
+
if ['_SOURCE_REALTIME_TIMESTAMP', '__REALTIME_TIMESTAMP'].include?(time_key)
|
32
|
+
timei = time.to_i
|
33
|
+
return Time.at(timei / 1_000_000, timei % 1_000_000)
|
34
|
+
end
|
35
|
+
return Time.at(time) if time.is_a?(Numeric)
|
36
|
+
|
37
|
+
Time.parse(time)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
#https://stackoverflow.com/questions/5622435/how-do-i-convert-a-ruby-class-name-to-a-underscore-delimited-symbol
|
43
|
+
class String
|
44
|
+
def underscore
|
45
|
+
word = self.dup
|
46
|
+
word.gsub!(/::/, '_')
|
47
|
+
word.gsub!(/([A-Z]+)([A-Z][a-z])/,'\1_\2')
|
48
|
+
word.gsub!(/([a-z\d])([A-Z])/,'\1_\2')
|
49
|
+
word.tr!("-", "_")
|
50
|
+
word.downcase!
|
51
|
+
word
|
52
|
+
end
|
53
|
+
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
#
|
2
4
|
# Fluentd Kubernetes Metadata Filter Plugin - Enrich Fluentd events with
|
3
5
|
# Kubernetes metadata
|
@@ -16,45 +18,137 @@
|
|
16
18
|
# See the License for the specific language governing permissions and
|
17
19
|
# limitations under the License.
|
18
20
|
#
|
21
|
+
# TODO: this is mostly copy-paste from kubernetes_metadata_watch_pods.rb unify them
|
19
22
|
require_relative 'kubernetes_metadata_common'
|
20
23
|
|
21
24
|
module KubernetesMetadata
|
22
25
|
module WatchNamespaces
|
23
|
-
|
24
26
|
include ::KubernetesMetadata::Common
|
25
27
|
|
28
|
+
def set_up_namespace_thread
|
29
|
+
# Any failures / exceptions in the initial setup should raise
|
30
|
+
# Fluent:ConfigError, so that users can inspect potential errors in
|
31
|
+
# the configuration.
|
32
|
+
namespace_watcher = start_namespace_watch
|
33
|
+
Thread.current[:namespace_watch_retry_backoff_interval] = @watch_retry_interval
|
34
|
+
Thread.current[:namespace_watch_retry_count] = 0
|
35
|
+
|
36
|
+
# Any failures / exceptions in the followup watcher notice
|
37
|
+
# processing will be swallowed and retried. These failures /
|
38
|
+
# exceptions could be caused by Kubernetes API being temporarily
|
39
|
+
# down. We assume the configuration is correct at this point.
|
40
|
+
loop do
|
41
|
+
namespace_watcher ||= get_namespaces_and_start_watcher
|
42
|
+
process_namespace_watcher_notices(namespace_watcher)
|
43
|
+
rescue GoneError => e
|
44
|
+
# Expected error. Quietly go back through the loop in order to
|
45
|
+
# start watching from the latest resource versions
|
46
|
+
@stats.bump(:namespace_watch_gone_errors)
|
47
|
+
log.info('410 Gone encountered. Restarting namespace watch to reset resource versions.', e)
|
48
|
+
namespace_watcher = nil
|
49
|
+
rescue StandardError => e
|
50
|
+
@stats.bump(:namespace_watch_failures)
|
51
|
+
if Thread.current[:namespace_watch_retry_count] < @watch_retry_max_times
|
52
|
+
# Instead of raising exceptions and crashing Fluentd, swallow
|
53
|
+
# the exception and reset the watcher.
|
54
|
+
log.info(
|
55
|
+
'Exception encountered parsing namespace watch event. ' \
|
56
|
+
'The connection might have been closed. Sleeping for ' \
|
57
|
+
"#{Thread.current[:namespace_watch_retry_backoff_interval]} " \
|
58
|
+
'seconds and resetting the namespace watcher.', e
|
59
|
+
)
|
60
|
+
sleep(Thread.current[:namespace_watch_retry_backoff_interval])
|
61
|
+
Thread.current[:namespace_watch_retry_count] += 1
|
62
|
+
Thread.current[:namespace_watch_retry_backoff_interval] *= @watch_retry_exponential_backoff_base
|
63
|
+
namespace_watcher = nil
|
64
|
+
else
|
65
|
+
# Since retries failed for many times, log as errors instead
|
66
|
+
# of info and raise exceptions and trigger Fluentd to restart.
|
67
|
+
message =
|
68
|
+
'Exception encountered parsing namespace watch event. The ' \
|
69
|
+
'connection might have been closed. Retried ' \
|
70
|
+
"#{@watch_retry_max_times} times yet still failing. Restarting."
|
71
|
+
log.error(message, e)
|
72
|
+
raise Fluent::UnrecoverableError, message
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
26
77
|
def start_namespace_watch
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
78
|
+
get_namespaces_and_start_watcher
|
79
|
+
rescue StandardError => e
|
80
|
+
message = 'start_namespace_watch: Exception encountered setting up ' \
|
81
|
+
"namespace watch from Kubernetes API #{@apiVersion} endpoint " \
|
82
|
+
"#{@kubernetes_url}: #{e.message}"
|
83
|
+
message += " (#{e.response})" if e.respond_to?(:response)
|
84
|
+
log.debug(message)
|
85
|
+
|
86
|
+
raise Fluent::ConfigError, message
|
87
|
+
end
|
88
|
+
|
89
|
+
# List all namespaces, record the resourceVersion and return a watcher
|
90
|
+
# starting from that resourceVersion.
|
91
|
+
def get_namespaces_and_start_watcher
|
92
|
+
options = {
|
93
|
+
resource_version: '0' # Fetch from API server cache instead of etcd quorum read
|
94
|
+
}
|
95
|
+
namespaces = @client.get_namespaces(options)
|
96
|
+
namespaces[:items].each do |namespace|
|
97
|
+
cache_key = namespace[:metadata][:uid]
|
98
|
+
@namespace_cache[cache_key] = parse_namespace_metadata(namespace)
|
99
|
+
@stats.bump(:namespace_cache_host_updates)
|
35
100
|
end
|
101
|
+
|
102
|
+
# continue watching from most recent resourceVersion
|
103
|
+
options[:resource_version] = namespaces[:metadata][:resourceVersion]
|
104
|
+
|
105
|
+
watcher = @client.watch_namespaces(options)
|
106
|
+
reset_namespace_watch_retry_stats
|
107
|
+
watcher
|
108
|
+
end
|
109
|
+
|
110
|
+
# Reset namespace watch retry count and backoff interval as there is a
|
111
|
+
# successful watch notice.
|
112
|
+
def reset_namespace_watch_retry_stats
|
113
|
+
Thread.current[:namespace_watch_retry_count] = 0
|
114
|
+
Thread.current[:namespace_watch_retry_backoff_interval] = @watch_retry_interval
|
115
|
+
end
|
116
|
+
|
117
|
+
# Process a watcher notice and potentially raise an exception.
|
118
|
+
def process_namespace_watcher_notices(watcher)
|
36
119
|
watcher.each do |notice|
|
37
|
-
case notice
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
@stats.bump(:namespace_cache_watch_misses)
|
46
|
-
end
|
47
|
-
when 'DELETED'
|
48
|
-
# ignore and let age out for cases where
|
49
|
-
# deleted but still processing logs
|
50
|
-
@stats.bump(:namespace_cache_watch_deletes_ignored)
|
120
|
+
case notice[:type]
|
121
|
+
when 'MODIFIED'
|
122
|
+
reset_namespace_watch_retry_stats
|
123
|
+
cache_key = notice[:object][:metadata][:uid]
|
124
|
+
cached = @namespace_cache[cache_key]
|
125
|
+
if cached
|
126
|
+
@namespace_cache[cache_key] = parse_namespace_metadata(notice[:object])
|
127
|
+
@stats.bump(:namespace_cache_watch_updates)
|
51
128
|
else
|
52
|
-
|
53
|
-
|
54
|
-
|
129
|
+
@stats.bump(:namespace_cache_watch_misses)
|
130
|
+
end
|
131
|
+
when 'DELETED'
|
132
|
+
reset_namespace_watch_retry_stats
|
133
|
+
# ignore and let age out for cases where
|
134
|
+
# deleted but still processing logs
|
135
|
+
@stats.bump(:namespace_cache_watch_deletes_ignored)
|
136
|
+
when 'ERROR'
|
137
|
+
if notice[:object] && notice[:object][:code] == 410
|
138
|
+
@stats.bump(:namespace_watch_gone_notices)
|
139
|
+
raise GoneError
|
140
|
+
else
|
141
|
+
@stats.bump(:namespace_watch_error_type_notices)
|
142
|
+
message = notice[:object][:message] if notice[:object] && notice[:object][:message]
|
143
|
+
raise "Error while watching namespaces: #{message}"
|
144
|
+
end
|
145
|
+
else
|
146
|
+
reset_namespace_watch_retry_stats
|
147
|
+
# Don't pay attention to creations, since the created namespace may not
|
148
|
+
# be used by any namespace on this node.
|
149
|
+
@stats.bump(:namespace_cache_watch_ignored)
|
55
150
|
end
|
56
151
|
end
|
57
152
|
end
|
58
|
-
|
59
153
|
end
|
60
154
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
#
|
2
4
|
# Fluentd Kubernetes Metadata Filter Plugin - Enrich Fluentd events with
|
3
5
|
# Kubernetes metadata
|
@@ -16,46 +18,153 @@
|
|
16
18
|
# See the License for the specific language governing permissions and
|
17
19
|
# limitations under the License.
|
18
20
|
#
|
21
|
+
# TODO: this is mostly copy-paste from kubernetes_metadata_watch_namespaces.rb unify them
|
19
22
|
require_relative 'kubernetes_metadata_common'
|
20
23
|
|
21
24
|
module KubernetesMetadata
|
22
25
|
module WatchPods
|
23
|
-
|
24
26
|
include ::KubernetesMetadata::Common
|
25
27
|
|
28
|
+
def set_up_pod_thread
|
29
|
+
# Any failures / exceptions in the initial setup should raise
|
30
|
+
# Fluent:ConfigError, so that users can inspect potential errors in
|
31
|
+
# the configuration.
|
32
|
+
pod_watcher = start_pod_watch
|
33
|
+
|
34
|
+
Thread.current[:pod_watch_retry_backoff_interval] = @watch_retry_interval
|
35
|
+
Thread.current[:pod_watch_retry_count] = 0
|
36
|
+
|
37
|
+
# Any failures / exceptions in the followup watcher notice
|
38
|
+
# processing will be swallowed and retried. These failures /
|
39
|
+
# exceptions could be caused by Kubernetes API being temporarily
|
40
|
+
# down. We assume the configuration is correct at this point.
|
41
|
+
loop do
|
42
|
+
pod_watcher ||= get_pods_and_start_watcher
|
43
|
+
process_pod_watcher_notices(pod_watcher)
|
44
|
+
rescue GoneError => e
|
45
|
+
# Expected error. Quietly go back through the loop in order to
|
46
|
+
# start watching from the latest resource versions
|
47
|
+
@stats.bump(:pod_watch_gone_errors)
|
48
|
+
log.info('410 Gone encountered. Restarting pod watch to reset resource versions.', e)
|
49
|
+
pod_watcher = nil
|
50
|
+
rescue StandardError => e
|
51
|
+
@stats.bump(:pod_watch_failures)
|
52
|
+
if Thread.current[:pod_watch_retry_count] < @watch_retry_max_times
|
53
|
+
# Instead of raising exceptions and crashing Fluentd, swallow
|
54
|
+
# the exception and reset the watcher.
|
55
|
+
log.info(
|
56
|
+
'Exception encountered parsing pod watch event. The ' \
|
57
|
+
'connection might have been closed. Sleeping for ' \
|
58
|
+
"#{Thread.current[:pod_watch_retry_backoff_interval]} " \
|
59
|
+
'seconds and resetting the pod watcher.', e
|
60
|
+
)
|
61
|
+
sleep(Thread.current[:pod_watch_retry_backoff_interval])
|
62
|
+
Thread.current[:pod_watch_retry_count] += 1
|
63
|
+
Thread.current[:pod_watch_retry_backoff_interval] *= @watch_retry_exponential_backoff_base
|
64
|
+
pod_watcher = nil
|
65
|
+
else
|
66
|
+
# Since retries failed for many times, log as errors instead
|
67
|
+
# of info and raise exceptions and trigger Fluentd to restart.
|
68
|
+
message =
|
69
|
+
'Exception encountered parsing pod watch event. The ' \
|
70
|
+
'connection might have been closed. Retried ' \
|
71
|
+
"#{@watch_retry_max_times} times yet still failing. Restarting."
|
72
|
+
log.error(message, e)
|
73
|
+
raise Fluent::UnrecoverableError, message
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
26
78
|
def start_pod_watch
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
79
|
+
get_pods_and_start_watcher
|
80
|
+
rescue StandardError => e
|
81
|
+
message = 'start_pod_watch: Exception encountered setting up pod watch ' \
|
82
|
+
"from Kubernetes API #{@apiVersion} endpoint " \
|
83
|
+
"#{@kubernetes_url}: #{e.message}"
|
84
|
+
message += " (#{e.response})" if e.respond_to?(:response)
|
85
|
+
log.debug(message)
|
86
|
+
|
87
|
+
raise Fluent::ConfigError, message
|
88
|
+
end
|
89
|
+
|
90
|
+
# List all pods, record the resourceVersion and return a watcher starting
|
91
|
+
# from that resourceVersion.
|
92
|
+
def get_pods_and_start_watcher
|
93
|
+
options = {
|
94
|
+
resource_version: '0' # Fetch from API server cache instead of etcd quorum read
|
95
|
+
}
|
96
|
+
if ENV['K8S_NODE_NAME']
|
97
|
+
options[:field_selector] = 'spec.nodeName=' + ENV['K8S_NODE_NAME']
|
35
98
|
end
|
99
|
+
if @last_seen_resource_version
|
100
|
+
options[:resource_version] = @last_seen_resource_version
|
101
|
+
else
|
102
|
+
pods = @client.get_pods(options)
|
103
|
+
pods[:items].each do |pod|
|
104
|
+
cache_key = pod[:metadata][:uid]
|
105
|
+
@cache[cache_key] = parse_pod_metadata(pod)
|
106
|
+
@stats.bump(:pod_cache_host_updates)
|
107
|
+
end
|
108
|
+
|
109
|
+
# continue watching from most recent resourceVersion
|
110
|
+
options[:resource_version] = pods[:metadata][:resourceVersion]
|
111
|
+
end
|
112
|
+
|
113
|
+
watcher = @client.watch_pods(options)
|
114
|
+
reset_pod_watch_retry_stats
|
115
|
+
watcher
|
116
|
+
end
|
117
|
+
|
118
|
+
# Reset pod watch retry count and backoff interval as there is a
|
119
|
+
# successful watch notice.
|
120
|
+
def reset_pod_watch_retry_stats
|
121
|
+
Thread.current[:pod_watch_retry_count] = 0
|
122
|
+
Thread.current[:pod_watch_retry_backoff_interval] = @watch_retry_interval
|
123
|
+
end
|
36
124
|
|
125
|
+
# Process a watcher notice and potentially raise an exception.
|
126
|
+
def process_pod_watcher_notices(watcher)
|
37
127
|
watcher.each do |notice|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
@stats.bump(:
|
128
|
+
# store version we processed to not reprocess it ... do not unset when there is no version in response
|
129
|
+
version = ( # TODO: replace with &.dig once we are on ruby 2.5+
|
130
|
+
notice[:object] && notice[:object][:metadata] && notice[:object][:metadata][:resourceVersion]
|
131
|
+
)
|
132
|
+
@last_seen_resource_version = version if version
|
133
|
+
|
134
|
+
case notice[:type]
|
135
|
+
when 'MODIFIED'
|
136
|
+
reset_pod_watch_retry_stats
|
137
|
+
cache_key = notice.dig(:object, :metadata, :uid)
|
138
|
+
cached = @cache[cache_key]
|
139
|
+
if cached
|
140
|
+
@cache[cache_key] = parse_pod_metadata(notice[:object])
|
141
|
+
@stats.bump(:pod_cache_watch_updates)
|
142
|
+
elsif ENV['K8S_NODE_NAME'] == notice[:object][:spec][:nodeName]
|
143
|
+
@cache[cache_key] = parse_pod_metadata(notice[:object])
|
144
|
+
@stats.bump(:pod_cache_host_updates)
|
145
|
+
else
|
146
|
+
@stats.bump(:pod_cache_watch_misses)
|
147
|
+
end
|
148
|
+
when 'DELETED'
|
149
|
+
reset_pod_watch_retry_stats
|
150
|
+
# ignore and let age out for cases where pods
|
151
|
+
# deleted but still processing logs
|
152
|
+
@stats.bump(:pod_cache_watch_delete_ignored)
|
153
|
+
when 'ERROR'
|
154
|
+
if notice[:object] && notice[:object][:code] == 410
|
155
|
+
@last_seen_resource_version = nil # requested resourceVersion was too old, need to reset
|
156
|
+
@stats.bump(:pod_watch_gone_notices)
|
157
|
+
raise GoneError
|
55
158
|
else
|
56
|
-
|
57
|
-
|
58
|
-
|
159
|
+
@stats.bump(:pod_watch_error_type_notices)
|
160
|
+
message = notice[:object][:message] if notice[:object] && notice[:object][:message]
|
161
|
+
raise "Error while watching pods: #{message}"
|
162
|
+
end
|
163
|
+
else
|
164
|
+
reset_pod_watch_retry_stats
|
165
|
+
# Don't pay attention to creations, since the created pod may not
|
166
|
+
# end up on this node.
|
167
|
+
@stats.bump(:pod_cache_watch_ignored)
|
59
168
|
end
|
60
169
|
end
|
61
170
|
end
|