fluent-plugin-kubernetes_metadata_filter 2.4.1 → 2.5.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 636af222c31e7e4db80d96594f76d3f4918f981120e9a4006e414ddd6b94df15
4
- data.tar.gz: a4edbb9c153da167210ee0fc879d38e37428839ed640264ce81ad824e7991004
3
+ metadata.gz: b1991a6b8cd09f65728588401e697dda36954b94cf75b625789c6e7773d50ccf
4
+ data.tar.gz: df46f28a113c17aa3e2174d3ad23edb0b5b93fb6d65d5f02787d6ac83ac129a2
5
5
  SHA512:
6
- metadata.gz: a7b667937578e95e259752e9ad64391774103d1ad2ea7f569259ee6f0d6addfbb0e0960fe95379300052d8863342bc6cb0235f95af8f0346a03f6dba3dffffb2
7
- data.tar.gz: b8e0a412669d5fe57ce3c9cb8ed641e75c0c67d42305cdf88b91838621e781d172a19e079a4e312117e3e1dfd411f5c91c92a0e1626ed6a9c152e0633f64bcb2
6
+ metadata.gz: 1a41d498af8a2e92723c05679608294d07acf2cf7a87dfdf5921fe3a385df6369003eef939dd5aeabf970c67f31eb319ac1485d566ac110bcbe6b79f5acc392a
7
+ data.tar.gz: e62c355dbdb0ac1e89e43534c5929c80e82b166f5c614b1967d6ff79658835881a9c6803d5072e85a41c83fbefc8db326ba95c6d2f1c5a20ca1b8a0f18ce3cf1
@@ -2,13 +2,15 @@ version: 2.1
2
2
 
3
3
  install: &install
4
4
  name: Install bundle
5
- command: bundle install --path vendor/bundle
5
+ command: |
6
+ gem install bundler
7
+ bundle install --path vendor/bundle
6
8
 
7
9
  missingdeps: &missingdeps
8
10
  name: Install missing dependecies
9
11
  command: |
10
12
  cat /etc/os-release
11
- printf "deb http://archive.debian.org/debian/ jessie main\ndeb-src http://archive.debian.org/debian/ jessie main\ndeb http://security.debian.org jessie/updates main\ndeb-src http://security.debian.org jessie/updates main" > /tmp/sources.list
13
+ printf "deb http://deb.debian.org/debian buster main\ndeb http://security.debian.org buster/updates main\ndeb-src http://security.debian.org buster/updates main" > /tmp/sources.list
12
14
  sudo cp /tmp/sources.list /etc/apt/sources.list
13
15
  sudo apt-get update
14
16
  sudo apt-get install cmake libicu-dev libssl-dev
@@ -53,4 +55,3 @@ workflows:
53
55
  ruby-version: ruby-2-5
54
56
  - ruby-test:
55
57
  ruby-version: ruby-2-6
56
-
data/.gitignore CHANGED
@@ -4,7 +4,6 @@
4
4
  .config
5
5
  .yardoc
6
6
  vendor/
7
- Gemfile.lock
8
7
  InstalledFiles
9
8
  _yardoc
10
9
  coverage
@@ -0,0 +1,150 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ fluent-plugin-kubernetes_metadata_filter (2.5.0)
5
+ fluentd (>= 0.14.0, < 1.12)
6
+ kubeclient (< 5)
7
+ lru_redux
8
+
9
+ GEM
10
+ remote: https://rubygems.org/
11
+ specs:
12
+ addressable (2.7.0)
13
+ public_suffix (>= 2.0.2, < 5.0)
14
+ ast (2.4.1)
15
+ bump (0.9.0)
16
+ charlock_holmes (0.7.7)
17
+ codeclimate-test-reporter (0.6.0)
18
+ simplecov (>= 0.7.1, < 1.0.0)
19
+ concurrent-ruby (1.1.6)
20
+ cool.io (1.6.0)
21
+ copyright-header (1.0.22)
22
+ github-linguist
23
+ crack (0.4.3)
24
+ safe_yaml (~> 1.0.0)
25
+ docile (1.3.2)
26
+ domain_name (0.5.20190701)
27
+ unf (>= 0.0.5, < 1.0.0)
28
+ escape_utils (1.2.1)
29
+ ffi (1.13.1)
30
+ ffi-compiler (1.0.1)
31
+ ffi (>= 1.0.0)
32
+ rake
33
+ fluentd (1.11.1)
34
+ cool.io (>= 1.4.5, < 2.0.0)
35
+ http_parser.rb (>= 0.5.1, < 0.7.0)
36
+ msgpack (>= 1.3.1, < 2.0.0)
37
+ serverengine (>= 2.0.4, < 3.0.0)
38
+ sigdump (~> 0.2.2)
39
+ strptime (>= 0.2.2, < 1.0.0)
40
+ tzinfo (>= 1.0, < 3.0)
41
+ tzinfo-data (~> 1.0)
42
+ yajl-ruby (~> 1.0)
43
+ github-linguist (7.9.0)
44
+ charlock_holmes (~> 0.7.6)
45
+ escape_utils (~> 1.2.0)
46
+ mini_mime (~> 1.0)
47
+ rugged (>= 0.25.1)
48
+ hashdiff (1.0.1)
49
+ http (4.4.1)
50
+ addressable (~> 2.3)
51
+ http-cookie (~> 1.0)
52
+ http-form_data (~> 2.2)
53
+ http-parser (~> 1.2.0)
54
+ http-accept (1.7.0)
55
+ http-cookie (1.0.3)
56
+ domain_name (~> 0.5)
57
+ http-form_data (2.3.0)
58
+ http-parser (1.2.1)
59
+ ffi-compiler (>= 1.0, < 2.0)
60
+ http_parser.rb (0.6.0)
61
+ kubeclient (4.7.0)
62
+ http (>= 3.0, < 5.0)
63
+ recursive-open-struct (~> 1.1, >= 1.1.1)
64
+ rest-client (~> 2.0)
65
+ lru_redux (1.1.0)
66
+ mime-types (3.3.1)
67
+ mime-types-data (~> 3.2015)
68
+ mime-types-data (3.2020.0512)
69
+ mini_mime (1.0.2)
70
+ minitest (4.7.5)
71
+ msgpack (1.3.3)
72
+ netrc (0.11.0)
73
+ parallel (1.19.2)
74
+ parser (2.7.1.4)
75
+ ast (~> 2.4.1)
76
+ power_assert (1.2.0)
77
+ public_suffix (4.0.5)
78
+ rainbow (3.0.0)
79
+ rake (13.0.1)
80
+ recursive-open-struct (1.1.2)
81
+ regexp_parser (1.7.1)
82
+ rest-client (2.1.0)
83
+ http-accept (>= 1.7.0, < 2.0)
84
+ http-cookie (>= 1.0.2, < 2.0)
85
+ mime-types (>= 1.16, < 4.0)
86
+ netrc (~> 0.8)
87
+ rexml (3.2.4)
88
+ rr (1.2.1)
89
+ rubocop (0.86.0)
90
+ parallel (~> 1.10)
91
+ parser (>= 2.7.0.1)
92
+ rainbow (>= 2.2.2, < 4.0)
93
+ regexp_parser (>= 1.7)
94
+ rexml
95
+ rubocop-ast (>= 0.0.3, < 1.0)
96
+ ruby-progressbar (~> 1.7)
97
+ unicode-display_width (>= 1.4.0, < 2.0)
98
+ rubocop-ast (0.1.0)
99
+ parser (>= 2.7.0.1)
100
+ ruby-progressbar (1.10.1)
101
+ rugged (1.0.1)
102
+ safe_yaml (1.0.5)
103
+ serverengine (2.2.1)
104
+ sigdump (~> 0.2.2)
105
+ sigdump (0.2.4)
106
+ simplecov (0.18.5)
107
+ docile (~> 1.1)
108
+ simplecov-html (~> 0.11)
109
+ simplecov-html (0.12.2)
110
+ strptime (0.2.4)
111
+ test-unit (3.0.9)
112
+ power_assert
113
+ test-unit-rr (1.0.5)
114
+ rr (>= 1.1.1)
115
+ test-unit (>= 2.5.2)
116
+ tzinfo (2.0.2)
117
+ concurrent-ruby (~> 1.0)
118
+ tzinfo-data (1.2020.1)
119
+ tzinfo (>= 1.0.0)
120
+ unf (0.1.4)
121
+ unf_ext
122
+ unf_ext (0.0.7.7)
123
+ unicode-display_width (1.7.0)
124
+ vcr (6.0.0)
125
+ webmock (3.8.3)
126
+ addressable (>= 2.3.6)
127
+ crack (>= 0.3.2)
128
+ hashdiff (>= 0.4.0, < 2.0.0)
129
+ yajl-ruby (1.4.1)
130
+
131
+ PLATFORMS
132
+ ruby
133
+
134
+ DEPENDENCIES
135
+ bump
136
+ bundler (~> 2.0)
137
+ codeclimate-test-reporter (< 1.0.0)
138
+ copyright-header
139
+ fluent-plugin-kubernetes_metadata_filter!
140
+ minitest (~> 4.0)
141
+ rake
142
+ rubocop
143
+ test-unit (~> 3.0.2)
144
+ test-unit-rr (~> 1.0.3)
145
+ vcr
146
+ webmock
147
+ yajl-ruby
148
+
149
+ BUNDLED WITH
150
+ 2.1.4
data/README.md CHANGED
@@ -15,6 +15,7 @@ that rely on the authenticity of the namespace for proper log isolation.
15
15
 
16
16
  | fluent-plugin-kubernetes_metadata_filter | fluentd | ruby |
17
17
  |-------------------|---------|------|
18
+ | >= 2.5.0 | >= v1.10.0 | >= 2.5 |
18
19
  | >= 2.0.0 | >= v0.14.20 | >= 2.1 |
19
20
  | < 2.0.0 | >= v0.12.0 | >= 1.9 |
20
21
 
@@ -47,7 +48,7 @@ This must used named capture groups for `container_name`, `pod_name` & `namespac
47
48
  * *DEPRECATED* `use_journal` - If false, messages are expected to be formatted and tagged as if read by the fluentd in\_tail plugin with wildcard filename. If true, messages are expected to be formatted as if read from the systemd journal. The `MESSAGE` field has the full message. The `CONTAINER_NAME` field has the encoded k8s metadata (see below). The `CONTAINER_ID_FULL` field has the full container uuid. This requires docker to use the `--log-driver=journald` log driver. If unset (the default), the plugin will use the `CONTAINER_NAME` and `CONTAINER_ID_FULL` fields
48
49
  if available, otherwise, will use the tag in the `tag_to_kubernetes_name_regexp` format.
49
50
  * `container_name_to_kubernetes_regexp` - The regular expression used to extract the k8s metadata encoded in the journal `CONTAINER_NAME` field (default: `'^(?<name_prefix>[^_]+)_(?<container_name>[^\._]+)(\.(?<container_hash>[^_]+))?_(?<pod_name>[^_]+)_(?<namespace>[^_]+)_[^_]+_[^_]+$'`
50
- * This corresponds to the definition [in the source](https://github.com/kubernetes/kubernetes/blob/master/pkg/kubelet/dockertools/docker.go#L317)
51
+ * This corresponds to the definition [in the source](https://github.com/kubernetes/kubernetes/blob/release-1.6/pkg/kubelet/dockertools/docker.go#L317)
51
52
  * `annotation_match` - Array of regular expressions matching annotation field names. Matched annotations are added to a log record.
52
53
  * `allow_orphans` - Modify the namespace and namespace id to the values of `orphaned_namespace_name` and `orphaned_namespace_id`
53
54
  when true (default: `true`)
@@ -61,6 +62,7 @@ when true (default: `true`)
61
62
  * `skip_container_metadata` - Skip some of the container data of the metadata. The metadata will not contain the container_image and container_image_id fields.
62
63
  * `skip_master_url` - Skip the master_url field from the metadata.
63
64
  * `skip_namespace_metadata` - Skip the namespace_id field from the metadata. The fetch_namespace_metadata function will be skipped. The plugin will be faster and cpu consumption will be less.
65
+ * `watch_retry_interval` - The time interval in seconds for retry backoffs when watch connections fail. (default: `10`)
64
66
 
65
67
  **NOTE:** As of the release 2.1.x of this plugin, it no longer supports parsing the source message into JSON and attaching it to the
66
68
  payload. The following configuration options are removed:
@@ -80,16 +82,36 @@ then the plugin will parse those values using `container_name_to_kubernetes_rege
80
82
  - Otherwise, if the tag matches `tag_to_kubernetes_name_regexp`, the plugin will parse the tag and use those values to
81
83
  lookup the metdata
82
84
 
83
- Reading from the JSON formatted log files with `in_tail` and wildcard filenames:
85
+ Reading from the JSON formatted log files with `in_tail` and wildcard filenames while respecting the CRI-o log format with the same config you need the fluent-plugin "multi-format-parser":
86
+
87
+ ```
88
+ fluent-gem install fluent-plugin-multi-format-parser
89
+ ```
90
+
91
+ The config block could look like this:
84
92
  ```
85
93
  <source>
86
94
  @type tail
87
95
  path /var/log/containers/*.log
88
96
  pos_file fluentd-docker.pos
89
- time_format %Y-%m-%dT%H:%M:%S
90
- tag kubernetes.*
91
- format json
92
97
  read_from_head true
98
+ tag kubernetes.*
99
+ <parse>
100
+ @type multi_format
101
+ <pattern>
102
+ format json
103
+ time_key time
104
+ time_type string
105
+ time_format "%Y-%m-%dT%H:%M:%S.%NZ"
106
+ keep_time_key false
107
+ </pattern>
108
+ <pattern>
109
+ format regexp
110
+ expression /^(?<time>.+) (?<stream>stdout|stderr)( (?<logtag>.))? (?<log>.*)$/
111
+ time_format '%Y-%m-%dT%H:%M:%S.%N%:z'
112
+ keep_time_key false
113
+ </pattern>
114
+ </parse>
93
115
  </source>
94
116
 
95
117
  <filter kubernetes.var.log.containers.**.log>
@@ -128,6 +150,22 @@ Reading from the systemd journal (requires the fluentd `fluent-plugin-systemd` a
128
150
  @type stdout
129
151
  </match>
130
152
  ```
153
+ ## Log content as JSON
154
+ In former versions this plugin parsed the value of the key log as JSON. In the current version this feature was removed, to avoid duplicate features in the fluentd plugin ecosystem. It can parsed with the parser plugin like this:
155
+ ```
156
+ <filter kubernetes.**>
157
+ @type parser
158
+ key_name log
159
+ <parse>
160
+ @type json
161
+ json_parser json
162
+ </parse>
163
+ replace_invalid_sequence true
164
+ reserve_data true # this preserves unparsable log lines
165
+ emit_invalid_record_to_error false # In case of unparsable log lines keep the error log clean
166
+ reserve_time # the time was already parsed in the source, we don't want to overwrite it with current time.
167
+ </filter>
168
+ ```
131
169
 
132
170
  ## Environment variables for Kubernetes
133
171
 
@@ -171,6 +209,7 @@ Then output becomes as belows
171
209
  "host": "jimmi-redhat.localnet",
172
210
  "pod_name":"fabric8-console-controller-98rqc",
173
211
  "pod_id": "c76927af-f563-11e4-b32d-54ee7527188d",
212
+ "pod_ip": "172.17.0.8",
174
213
  "container_name": "fabric8-console-container",
175
214
  "namespace_name": "default",
176
215
  "namespace_id": "23437884-8e08-4d95-850b-e94378c9b2fd",
@@ -4,7 +4,7 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
4
 
5
5
  Gem::Specification.new do |gem|
6
6
  gem.name = "fluent-plugin-kubernetes_metadata_filter"
7
- gem.version = "2.4.1"
7
+ gem.version = "2.5.0"
8
8
  gem.authors = ["Jimmi Dyson"]
9
9
  gem.email = ["jimmidyson@gmail.com"]
10
10
  gem.description = %q{Filter plugin to add Kubernetes metadata}
@@ -19,11 +19,11 @@ Gem::Specification.new do |gem|
19
19
 
20
20
  gem.required_ruby_version = '>= 2.1.0'
21
21
 
22
- gem.add_runtime_dependency 'fluentd', ['>= 0.14.0', '< 2']
22
+ gem.add_runtime_dependency 'fluentd', ['>= 0.14.0', '< 1.12']
23
23
  gem.add_runtime_dependency "lru_redux"
24
24
  gem.add_runtime_dependency "kubeclient", '< 5'
25
25
 
26
- gem.add_development_dependency "bundler", "~> 2.0.2"
26
+ gem.add_development_dependency "bundler", "~> 2.0"
27
27
  gem.add_development_dependency "rake"
28
28
  gem.add_development_dependency "minitest", "~> 4.0"
29
29
  gem.add_development_dependency "test-unit", "~> 3.0.2"
@@ -22,6 +22,7 @@ require_relative 'kubernetes_metadata_common'
22
22
  require_relative 'kubernetes_metadata_stats'
23
23
  require_relative 'kubernetes_metadata_watch_namespaces'
24
24
  require_relative 'kubernetes_metadata_watch_pods'
25
+
25
26
  require 'fluent/plugin/filter'
26
27
  require 'resolv'
27
28
 
@@ -61,7 +62,7 @@ module Fluent::Plugin
61
62
  # Field 2 is the container_hash, field 5 is the pod_id, and field 6 is the pod_randhex
62
63
  # I would have included them as named groups, but you can't have named groups that are
63
64
  # non-capturing :P
64
- # parse format is defined here: https://github.com/kubernetes/kubernetes/blob/master/pkg/kubelet/dockertools/docker.go#L317
65
+ # parse format is defined here: https://github.com/kubernetes/kubernetes/blob/release-1.6/pkg/kubelet/dockertools/docker.go#L317
65
66
  config_param :container_name_to_kubernetes_regexp,
66
67
  :string,
67
68
  :default => '^(?<name_prefix>[^_]+)_(?<container_name>[^\._]+)(\.(?<container_hash>[^_]+))?_(?<pod_name>[^_]+)_(?<namespace>[^_]+)_[^_]+_[^_]+$'
@@ -80,6 +81,12 @@ module Fluent::Plugin
80
81
  config_param :skip_container_metadata, :bool, default: false
81
82
  config_param :skip_master_url, :bool, default: false
82
83
  config_param :skip_namespace_metadata, :bool, default: false
84
+ # The time interval in seconds for retry backoffs when watch connections fail.
85
+ config_param :watch_retry_interval, :integer, default: 1
86
+ # The base number of exponential backoff for retries.
87
+ config_param :watch_retry_exponential_backoff_base, :integer, default: 2
88
+ # The maximum number of times to retry pod and namespace watches.
89
+ config_param :watch_retry_max_times, :integer, default: 10
83
90
 
84
91
  def fetch_pod_metadata(namespace_name, pod_name)
85
92
  log.trace("fetching pod metadata: #{namespace_name}/#{pod_name}") if log.trace?
@@ -201,6 +208,8 @@ module Fluent::Plugin
201
208
  end
202
209
  @kubernetes_url = "https://#{env_host}:#{env_port}/api"
203
210
  log.debug "Kubernetes URL is now '#{@kubernetes_url}'"
211
+ else
212
+ log.debug "No Kubernetes URL could be found in config or environ"
204
213
  end
205
214
  end
206
215
 
@@ -264,9 +273,10 @@ module Fluent::Plugin
264
273
  end
265
274
 
266
275
  if @watch
267
- thread = Thread.new(self) { |this| this.start_pod_watch }
268
- thread.abort_on_exception = true
269
- namespace_thread = Thread.new(self) { |this| this.start_namespace_watch }
276
+ pod_thread = Thread.new(self) { |this| this.set_up_pod_thread }
277
+ pod_thread.abort_on_exception = true
278
+
279
+ namespace_thread = Thread.new(self) { |this| this.set_up_namespace_thread }
270
280
  namespace_thread.abort_on_exception = true
271
281
  end
272
282
  end
@@ -23,19 +23,92 @@ module KubernetesMetadata
23
23
 
24
24
  include ::KubernetesMetadata::Common
25
25
 
26
+ def set_up_namespace_thread
27
+ # Any failures / exceptions in the initial setup should raise
28
+ # Fluent:ConfigError, so that users can inspect potential errors in
29
+ # the configuration.
30
+ namespace_watcher = start_namespace_watch
31
+ Thread.current[:namespace_watch_retry_backoff_interval] = @watch_retry_interval
32
+ Thread.current[:namespace_watch_retry_count] = 0
33
+
34
+ # Any failures / exceptions in the followup watcher notice
35
+ # processing will be swallowed and retried. These failures /
36
+ # exceptions could be caused by Kubernetes API being temporarily
37
+ # down. We assume the configuration is correct at this point.
38
+ while true
39
+ begin
40
+ namespace_watcher ||= get_namespaces_and_start_watcher
41
+ process_namespace_watcher_notices(namespace_watcher)
42
+ rescue Exception => e
43
+ @stats.bump(:namespace_watch_failures)
44
+ if Thread.current[:namespace_watch_retry_count] < @watch_retry_max_times
45
+ # Instead of raising exceptions and crashing Fluentd, swallow
46
+ # the exception and reset the watcher.
47
+ log.info(
48
+ "Exception encountered parsing namespace watch event. " \
49
+ "The connection might have been closed. Sleeping for " \
50
+ "#{Thread.current[:namespace_watch_retry_backoff_interval]} " \
51
+ "seconds and resetting the namespace watcher.", e)
52
+ sleep(Thread.current[:namespace_watch_retry_backoff_interval])
53
+ Thread.current[:namespace_watch_retry_count] += 1
54
+ Thread.current[:namespace_watch_retry_backoff_interval] *= @watch_retry_exponential_backoff_base
55
+ namespace_watcher = nil
56
+ else
57
+ # Since retries failed for many times, log as errors instead
58
+ # of info and raise exceptions and trigger Fluentd to restart.
59
+ message =
60
+ "Exception encountered parsing namespace watch event. The " \
61
+ "connection might have been closed. Retried " \
62
+ "#{@watch_retry_max_times} times yet still failing. Restarting."
63
+ log.error(message, e)
64
+ raise Fluent::UnrecoverableError.new(message)
65
+ end
66
+ end
67
+ end
68
+ end
69
+
26
70
  def start_namespace_watch
27
- begin
28
- resource_version = @client.get_namespaces.resourceVersion
29
- watcher = @client.watch_namespaces(resource_version)
30
- rescue Exception=>e
31
- message = "start_namespace_watch: Exception encountered setting up namespace watch from Kubernetes API #{@apiVersion} endpoint #{@kubernetes_url}: #{e.message}"
32
- message += " (#{e.response})" if e.respond_to?(:response)
33
- log.debug(message)
34
- raise Fluent::ConfigError, message
71
+ return get_namespaces_and_start_watcher
72
+ rescue Exception => e
73
+ message = "start_namespace_watch: Exception encountered setting up " \
74
+ "namespace watch from Kubernetes API #{@apiVersion} endpoint " \
75
+ "#{@kubernetes_url}: #{e.message}"
76
+ message += " (#{e.response})" if e.respond_to?(:response)
77
+ log.debug(message)
78
+
79
+ raise Fluent::ConfigError, message
80
+ end
81
+
82
+ # List all namespaces, record the resourceVersion and return a watcher
83
+ # starting from that resourceVersion.
84
+ def get_namespaces_and_start_watcher
85
+ options = {
86
+ resource_version: '0' # Fetch from API server.
87
+ }
88
+ namespaces = @client.get_namespaces(options)
89
+ namespaces.each do |namespace|
90
+ cache_key = namespace.metadata['uid']
91
+ @namespace_cache[cache_key] = parse_namespace_metadata(namespace)
92
+ @stats.bump(:namespace_cache_host_updates)
35
93
  end
94
+ options[:resource_version] = namespaces.resourceVersion
95
+ watcher = @client.watch_namespaces(options)
96
+ watcher
97
+ end
98
+
99
+ # Reset namespace watch retry count and backoff interval as there is a
100
+ # successful watch notice.
101
+ def reset_namespace_watch_retry_stats
102
+ Thread.current[:namespace_watch_retry_count] = 0
103
+ Thread.current[:namespace_watch_retry_backoff_interval] = @watch_retry_interval
104
+ end
105
+
106
+ # Process a watcher notice and potentially raise an exception.
107
+ def process_namespace_watcher_notices(watcher)
36
108
  watcher.each do |notice|
37
109
  case notice.type
38
110
  when 'MODIFIED'
111
+ reset_namespace_watch_retry_stats
39
112
  cache_key = notice.object['metadata']['uid']
40
113
  cached = @namespace_cache[cache_key]
41
114
  if cached
@@ -45,16 +118,21 @@ module KubernetesMetadata
45
118
  @stats.bump(:namespace_cache_watch_misses)
46
119
  end
47
120
  when 'DELETED'
48
- # ignore and let age out for cases where
121
+ reset_namespace_watch_retry_stats
122
+ # ignore and let age out for cases where
49
123
  # deleted but still processing logs
50
124
  @stats.bump(:namespace_cache_watch_deletes_ignored)
125
+ when 'ERROR'
126
+ @stats.bump(:namespace_watch_error_type_notices)
127
+ message = notice['object']['message'] if notice['object'] && notice['object']['message']
128
+ raise "Error while watching namespaces: #{message}"
51
129
  else
130
+ reset_namespace_watch_retry_stats
52
131
  # Don't pay attention to creations, since the created namespace may not
53
- # be used by any pod on this node.
132
+ # be used by any namespace on this node.
54
133
  @stats.bump(:namespace_cache_watch_ignored)
55
134
  end
56
135
  end
57
136
  end
58
-
59
137
  end
60
138
  end
@@ -23,32 +23,95 @@ module KubernetesMetadata
23
23
 
24
24
  include ::KubernetesMetadata::Common
25
25
 
26
- def start_pod_watch
27
- begin
28
- options = {
29
- resource_version: '0' # Fetch from API server.
30
- }
31
- if ENV['K8S_NODE_NAME']
32
- options[:field_selector] = 'spec.nodeName=' + ENV['K8S_NODE_NAME']
33
- end
34
- pods = @client.get_pods(options)
35
- pods.each do |pod|
36
- cache_key = pod.metadata['uid']
37
- @cache[cache_key] = parse_pod_metadata(pod)
38
- @stats.bump(:pod_cache_host_updates)
26
+ def set_up_pod_thread
27
+ # Any failures / exceptions in the initial setup should raise
28
+ # Fluent:ConfigError, so that users can inspect potential errors in
29
+ # the configuration.
30
+ pod_watcher = start_pod_watch
31
+ Thread.current[:pod_watch_retry_backoff_interval] = @watch_retry_interval
32
+ Thread.current[:pod_watch_retry_count] = 0
33
+
34
+ # Any failures / exceptions in the followup watcher notice
35
+ # processing will be swallowed and retried. These failures /
36
+ # exceptions could be caused by Kubernetes API being temporarily
37
+ # down. We assume the configuration is correct at this point.
38
+ while true
39
+ begin
40
+ pod_watcher ||= get_pods_and_start_watcher
41
+ process_pod_watcher_notices(pod_watcher)
42
+ rescue Exception => e
43
+ @stats.bump(:pod_watch_failures)
44
+ if Thread.current[:pod_watch_retry_count] < @watch_retry_max_times
45
+ # Instead of raising exceptions and crashing Fluentd, swallow
46
+ # the exception and reset the watcher.
47
+ log.info(
48
+ "Exception encountered parsing pod watch event. The " \
49
+ "connection might have been closed. Sleeping for " \
50
+ "#{Thread.current[:pod_watch_retry_backoff_interval]} " \
51
+ "seconds and resetting the pod watcher.", e)
52
+ sleep(Thread.current[:pod_watch_retry_backoff_interval])
53
+ Thread.current[:pod_watch_retry_count] += 1
54
+ Thread.current[:pod_watch_retry_backoff_interval] *= @watch_retry_exponential_backoff_base
55
+ pod_watcher = nil
56
+ else
57
+ # Since retries failed for many times, log as errors instead
58
+ # of info and raise exceptions and trigger Fluentd to restart.
59
+ message =
60
+ "Exception encountered parsing pod watch event. The " \
61
+ "connection might have been closed. Retried " \
62
+ "#{@watch_retry_max_times} times yet still failing. Restarting."
63
+ log.error(message, e)
64
+ raise Fluent::UnrecoverableError.new(message)
65
+ end
39
66
  end
40
- options[:resource_version] = pods.resourceVersion
41
- watcher = @client.watch_pods(options)
42
- rescue Exception => e
43
- message = "Exception encountered fetching metadata from Kubernetes API endpoint: #{e.message}"
44
- message += " (#{e.response})" if e.respond_to?(:response)
67
+ end
68
+ end
69
+
70
+ def start_pod_watch
71
+ get_pods_and_start_watcher
72
+ rescue Exception => e
73
+ message = "start_pod_watch: Exception encountered setting up pod watch " \
74
+ "from Kubernetes API #{@apiVersion} endpoint " \
75
+ "#{@kubernetes_url}: #{e.message}"
76
+ message += " (#{e.response})" if e.respond_to?(:response)
77
+ log.debug(message)
78
+
79
+ raise Fluent::ConfigError, message
80
+ end
45
81
 
46
- raise Fluent::ConfigError, message
82
+ # List all pods, record the resourceVersion and return a watcher starting
83
+ # from that resourceVersion.
84
+ def get_pods_and_start_watcher
85
+ options = {
86
+ resource_version: '0' # Fetch from API server.
87
+ }
88
+ if ENV['K8S_NODE_NAME']
89
+ options[:field_selector] = 'spec.nodeName=' + ENV['K8S_NODE_NAME']
90
+ end
91
+ pods = @client.get_pods(options)
92
+ pods.each do |pod|
93
+ cache_key = pod.metadata['uid']
94
+ @cache[cache_key] = parse_pod_metadata(pod)
95
+ @stats.bump(:pod_cache_host_updates)
47
96
  end
97
+ options[:resource_version] = pods.resourceVersion
98
+ watcher = @client.watch_pods(options)
99
+ watcher
100
+ end
101
+
102
+ # Reset pod watch retry count and backoff interval as there is a
103
+ # successful watch notice.
104
+ def reset_pod_watch_retry_stats
105
+ Thread.current[:pod_watch_retry_count] = 0
106
+ Thread.current[:pod_watch_retry_backoff_interval] = @watch_retry_interval
107
+ end
48
108
 
109
+ # Process a watcher notice and potentially raise an exception.
110
+ def process_pod_watcher_notices(watcher)
49
111
  watcher.each do |notice|
50
112
  case notice.type
51
113
  when 'MODIFIED'
114
+ reset_pod_watch_retry_stats
52
115
  cache_key = notice.object['metadata']['uid']
53
116
  cached = @cache[cache_key]
54
117
  if cached
@@ -61,10 +124,16 @@ module KubernetesMetadata
61
124
  @stats.bump(:pod_cache_watch_misses)
62
125
  end
63
126
  when 'DELETED'
127
+ reset_pod_watch_retry_stats
64
128
  # ignore and let age out for cases where pods
65
129
  # deleted but still processing logs
66
130
  @stats.bump(:pod_cache_watch_delete_ignored)
131
+ when 'ERROR'
132
+ @stats.bump(:pod_watch_error_type_notices)
133
+ message = notice['object']['message'] if notice['object'] && notice['object']['message']
134
+ raise "Error while watching pods: #{message}"
67
135
  else
136
+ reset_pod_watch_retry_stats
68
137
  # Don't pay attention to creations, since the created pod may not
69
138
  # end up on this node.
70
139
  @stats.bump(:pod_cache_watch_ignored)
@@ -25,6 +25,24 @@ class WatchNamespacesTestTest < WatchTest
25
25
  include KubernetesMetadata::WatchNamespaces
26
26
 
27
27
  setup do
28
+ @initial = Kubeclient::Common::EntityList.new(
29
+ 'NamespaceList',
30
+ '123',
31
+ [
32
+ Kubeclient::Resource.new({
33
+ 'metadata' => {
34
+ 'name' => 'initial',
35
+ 'uid' => 'initial_uid'
36
+ }
37
+ }),
38
+ Kubeclient::Resource.new({
39
+ 'metadata' => {
40
+ 'name' => 'modified',
41
+ 'uid' => 'modified_uid'
42
+ }
43
+ })
44
+ ])
45
+
28
46
  @created = OpenStruct.new(
29
47
  type: 'CREATED',
30
48
  object: {
@@ -52,11 +70,39 @@ class WatchNamespacesTestTest < WatchTest
52
70
  }
53
71
  }
54
72
  )
73
+ @error = OpenStruct.new(
74
+ type: 'ERROR',
75
+ object: {
76
+ 'message' => 'some error message'
77
+ }
78
+ )
55
79
  end
56
80
 
81
+ test 'namespace list caches namespaces' do
82
+ @client.stub :get_namespaces, @initial do
83
+ process_namespace_watcher_notices(start_namespace_watch)
84
+ assert_equal(true, @namespace_cache.key?('initial_uid'))
85
+ assert_equal(true, @namespace_cache.key?('modified_uid'))
86
+ assert_equal(2, @stats[:namespace_cache_host_updates])
87
+ end
88
+ end
89
+
90
+ test 'namespace list caches namespaces and watch updates' do
91
+ orig_env_val = ENV['K8S_NODE_NAME']
92
+ ENV['K8S_NODE_NAME'] = 'aNodeName'
93
+ @client.stub :get_namespaces, @initial do
94
+ @client.stub :watch_namespaces, [@modified] do
95
+ process_namespace_watcher_notices(start_namespace_watch)
96
+ assert_equal(2, @stats[:namespace_cache_host_updates])
97
+ assert_equal(1, @stats[:namespace_cache_watch_updates])
98
+ end
99
+ end
100
+ ENV['K8S_NODE_NAME'] = orig_env_val
101
+ end
102
+
57
103
  test 'namespace watch ignores CREATED' do
58
104
  @client.stub :watch_namespaces, [@created] do
59
- start_namespace_watch
105
+ process_namespace_watcher_notices(start_namespace_watch)
60
106
  assert_equal(false, @namespace_cache.key?('created_uid'))
61
107
  assert_equal(1, @stats[:namespace_cache_watch_ignored])
62
108
  end
@@ -64,7 +110,7 @@ class WatchNamespacesTestTest < WatchTest
64
110
 
65
111
  test 'namespace watch ignores MODIFIED when info not in cache' do
66
112
  @client.stub :watch_namespaces, [@modified] do
67
- start_namespace_watch
113
+ process_namespace_watcher_notices(start_namespace_watch)
68
114
  assert_equal(false, @namespace_cache.key?('modified_uid'))
69
115
  assert_equal(1, @stats[:namespace_cache_watch_misses])
70
116
  end
@@ -73,7 +119,7 @@ class WatchNamespacesTestTest < WatchTest
73
119
  test 'namespace watch updates cache when MODIFIED is received and info is cached' do
74
120
  @namespace_cache['modified_uid'] = {}
75
121
  @client.stub :watch_namespaces, [@modified] do
76
- start_namespace_watch
122
+ process_namespace_watcher_notices(start_namespace_watch)
77
123
  assert_equal(true, @namespace_cache.key?('modified_uid'))
78
124
  assert_equal(1, @stats[:namespace_cache_watch_updates])
79
125
  end
@@ -82,10 +128,55 @@ class WatchNamespacesTestTest < WatchTest
82
128
  test 'namespace watch ignores DELETED' do
83
129
  @namespace_cache['deleted_uid'] = {}
84
130
  @client.stub :watch_namespaces, [@deleted] do
85
- start_namespace_watch
131
+ process_namespace_watcher_notices(start_namespace_watch)
86
132
  assert_equal(true, @namespace_cache.key?('deleted_uid'))
87
133
  assert_equal(1, @stats[:namespace_cache_watch_deletes_ignored])
88
134
  end
89
135
  end
90
136
 
137
+ test 'namespace watch retries when exceptions are encountered' do
138
+ @client.stub :get_namespaces, @initial do
139
+ @client.stub :watch_namespaces, [[@created, @exception_raised]] do
140
+ assert_raise Fluent::UnrecoverableError do
141
+ set_up_namespace_thread
142
+ end
143
+ assert_equal(3, @stats[:namespace_watch_failures])
144
+ assert_equal(2, Thread.current[:namespace_watch_retry_count])
145
+ assert_equal(4, Thread.current[:namespace_watch_retry_backoff_interval])
146
+ assert_nil(@stats[:namespace_watch_error_type_notices])
147
+ end
148
+ end
149
+ end
150
+
151
+ test 'namespace watch retries when error is received' do
152
+ @client.stub :get_namespaces, @initial do
153
+ @client.stub :watch_namespaces, [@error] do
154
+ assert_raise Fluent::UnrecoverableError do
155
+ set_up_namespace_thread
156
+ end
157
+ assert_equal(3, @stats[:namespace_watch_failures])
158
+ assert_equal(2, Thread.current[:namespace_watch_retry_count])
159
+ assert_equal(4, Thread.current[:namespace_watch_retry_backoff_interval])
160
+ assert_equal(3, @stats[:namespace_watch_error_type_notices])
161
+ end
162
+ end
163
+ end
164
+
165
+ test 'namespace watch continues after retries succeed' do
166
+ @client.stub :get_namespaces, @initial do
167
+ @client.stub :watch_namespaces, [@modified, @error, @modified] do
168
+ # Force the infinite watch loop to exit after 3 seconds. Verifies that
169
+ # no unrecoverable error was thrown during this period of time.
170
+ assert_raise Timeout::Error.new('execution expired') do
171
+ Timeout.timeout(3) do
172
+ set_up_namespace_thread
173
+ end
174
+ end
175
+ assert_operator(@stats[:namespace_watch_failures], :>=, 3)
176
+ assert_operator(Thread.current[:namespace_watch_retry_count], :<=, 1)
177
+ assert_operator(Thread.current[:namespace_watch_retry_backoff_interval], :<=, 1)
178
+ assert_operator(@stats[:namespace_watch_error_type_notices], :>=, 3)
179
+ end
180
+ end
181
+ end
91
182
  end
@@ -136,13 +136,19 @@ class DefaultPodWatchStrategyTest < WatchTest
136
136
  }
137
137
  }
138
138
  )
139
+ @error = OpenStruct.new(
140
+ type: 'ERROR',
141
+ object: {
142
+ 'message' => 'some error message'
143
+ }
144
+ )
139
145
  end
140
146
 
141
147
  test 'pod list caches pods' do
142
148
  orig_env_val = ENV['K8S_NODE_NAME']
143
149
  ENV['K8S_NODE_NAME'] = 'aNodeName'
144
150
  @client.stub :get_pods, @initial do
145
- start_pod_watch
151
+ process_pod_watcher_notices(start_pod_watch)
146
152
  assert_equal(true, @cache.key?('initial_uid'))
147
153
  assert_equal(true, @cache.key?('modified_uid'))
148
154
  assert_equal(2, @stats[:pod_cache_host_updates])
@@ -155,7 +161,7 @@ class DefaultPodWatchStrategyTest < WatchTest
155
161
  ENV['K8S_NODE_NAME'] = 'aNodeName'
156
162
  @client.stub :get_pods, @initial do
157
163
  @client.stub :watch_pods, [@modified] do
158
- start_pod_watch
164
+ process_pod_watcher_notices(start_pod_watch)
159
165
  assert_equal(2, @stats[:pod_cache_host_updates])
160
166
  assert_equal(1, @stats[:pod_cache_watch_updates])
161
167
  end
@@ -166,7 +172,7 @@ class DefaultPodWatchStrategyTest < WatchTest
166
172
  test 'pod watch notice ignores CREATED' do
167
173
  @client.stub :get_pods, @initial do
168
174
  @client.stub :watch_pods, [@created] do
169
- start_pod_watch
175
+ process_pod_watcher_notices(start_pod_watch)
170
176
  assert_equal(false, @cache.key?('created_uid'))
171
177
  assert_equal(1, @stats[:pod_cache_watch_ignored])
172
178
  end
@@ -175,7 +181,7 @@ class DefaultPodWatchStrategyTest < WatchTest
175
181
 
176
182
  test 'pod watch notice is ignored when info not cached and MODIFIED is received' do
177
183
  @client.stub :watch_pods, [@modified] do
178
- start_pod_watch
184
+ process_pod_watcher_notices(start_pod_watch)
179
185
  assert_equal(false, @cache.key?('modified_uid'))
180
186
  assert_equal(1, @stats[:pod_cache_watch_misses])
181
187
  end
@@ -185,7 +191,7 @@ class DefaultPodWatchStrategyTest < WatchTest
185
191
  orig_env_val = ENV['K8S_NODE_NAME']
186
192
  ENV['K8S_NODE_NAME'] = 'aNodeName'
187
193
  @client.stub :watch_pods, [@modified] do
188
- start_pod_watch
194
+ process_pod_watcher_notices(start_pod_watch)
189
195
  assert_equal(true, @cache.key?('modified_uid'))
190
196
  assert_equal(1, @stats[:pod_cache_host_updates])
191
197
  end
@@ -195,7 +201,7 @@ class DefaultPodWatchStrategyTest < WatchTest
195
201
  test 'pod watch notice is updated when MODIFIED is received' do
196
202
  @cache['modified_uid'] = {}
197
203
  @client.stub :watch_pods, [@modified] do
198
- start_pod_watch
204
+ process_pod_watcher_notices(start_pod_watch)
199
205
  assert_equal(true, @cache.key?('modified_uid'))
200
206
  assert_equal(1, @stats[:pod_cache_watch_updates])
201
207
  end
@@ -204,10 +210,55 @@ class DefaultPodWatchStrategyTest < WatchTest
204
210
  test 'pod watch notice is ignored when delete is received' do
205
211
  @cache['deleted_uid'] = {}
206
212
  @client.stub :watch_pods, [@deleted] do
207
- start_pod_watch
213
+ process_pod_watcher_notices(start_pod_watch)
208
214
  assert_equal(true, @cache.key?('deleted_uid'))
209
215
  assert_equal(1, @stats[:pod_cache_watch_delete_ignored])
210
216
  end
211
217
  end
212
218
 
219
+ test 'pod watch retries when exceptions are encountered' do
220
+ @client.stub :get_pods, @initial do
221
+ @client.stub :watch_pods, [[@created, @exception_raised]] do
222
+ assert_raise Fluent::UnrecoverableError do
223
+ set_up_pod_thread
224
+ end
225
+ assert_equal(3, @stats[:pod_watch_failures])
226
+ assert_equal(2, Thread.current[:pod_watch_retry_count])
227
+ assert_equal(4, Thread.current[:pod_watch_retry_backoff_interval])
228
+ assert_nil(@stats[:pod_watch_error_type_notices])
229
+ end
230
+ end
231
+ end
232
+
233
+ test 'pod watch retries when error is received' do
234
+ @client.stub :get_pods, @initial do
235
+ @client.stub :watch_pods, [@error] do
236
+ assert_raise Fluent::UnrecoverableError do
237
+ set_up_pod_thread
238
+ end
239
+ assert_equal(3, @stats[:pod_watch_failures])
240
+ assert_equal(2, Thread.current[:pod_watch_retry_count])
241
+ assert_equal(4, Thread.current[:pod_watch_retry_backoff_interval])
242
+ assert_equal(3, @stats[:pod_watch_error_type_notices])
243
+ end
244
+ end
245
+ end
246
+
247
+ test 'pod watch continues after retries succeed' do
248
+ @client.stub :get_pods, @initial do
249
+ @client.stub :watch_pods, [@modified, @error, @modified] do
250
+ # Force the infinite watch loop to exit after 3 seconds. Verifies that
251
+ # no unrecoverable error was thrown during this period of time.
252
+ assert_raise Timeout::Error.new('execution expired') do
253
+ Timeout.timeout(3) do
254
+ set_up_pod_thread
255
+ end
256
+ end
257
+ assert_operator(@stats[:pod_watch_failures], :>=, 3)
258
+ assert_operator(Thread.current[:pod_watch_retry_count], :<=, 1)
259
+ assert_operator(Thread.current[:pod_watch_retry_backoff_interval], :<=, 1)
260
+ assert_operator(@stats[:pod_watch_error_type_notices], :>=, 3)
261
+ end
262
+ end
263
+ end
213
264
  end
@@ -20,38 +20,56 @@ require_relative '../helper'
20
20
  require 'ostruct'
21
21
 
22
22
  class WatchTest < Test::Unit::TestCase
23
-
24
- setup do
25
- @annotations_regexps = []
26
- @namespace_cache = {}
27
- @cache = {}
28
- @stats = KubernetesMetadata::Stats.new
29
- @client = OpenStruct.new
30
- def @client.resourceVersion
31
- '12345'
32
- end
33
- def @client.watch_pods(options = {})
34
- []
35
- end
36
- def @client.watch_namespaces(options = {})
37
- []
38
- end
39
- def @client.get_namespaces(options = {})
40
- self
41
- end
42
- def @client.get_pods(options = {})
43
- self
44
- end
45
- end
46
23
 
47
- def watcher=(value)
24
+ def thread_current_running?
25
+ true
26
+ end
27
+
28
+ setup do
29
+ @annotations_regexps = []
30
+ @namespace_cache = {}
31
+ @watch_retry_max_times = 2
32
+ @watch_retry_interval = 1
33
+ @watch_retry_exponential_backoff_base = 2
34
+ @cache = {}
35
+ @stats = KubernetesMetadata::Stats.new
36
+ Thread.current[:pod_watch_retry_count] = 0
37
+ Thread.current[:namespace_watch_retry_count] = 0
38
+
39
+ @client = OpenStruct.new
40
+ def @client.resourceVersion
41
+ '12345'
42
+ end
43
+ def @client.watch_pods(options = {})
44
+ []
45
+ end
46
+ def @client.watch_namespaces(options = {})
47
+ []
48
+ end
49
+ def @client.get_namespaces(options = {})
50
+ self
51
+ end
52
+ def @client.get_pods(options = {})
53
+ self
48
54
  end
49
55
 
50
- def log
51
- logger = {}
52
- def logger.debug(message)
53
- end
54
- logger
56
+ @exception_raised = OpenStruct.new
57
+ def @exception_raised.each
58
+ raise Exception
55
59
  end
60
+ end
61
+
62
+ def watcher=(value)
63
+ end
56
64
 
65
+ def log
66
+ logger = {}
67
+ def logger.debug(message)
68
+ end
69
+ def logger.info(message, error)
70
+ end
71
+ def logger.error(message, error)
72
+ end
73
+ logger
74
+ end
57
75
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-kubernetes_metadata_filter
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.4.1
4
+ version: 2.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jimmi Dyson
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-12-09 00:00:00.000000000 Z
11
+ date: 2020-07-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: fluentd
@@ -19,7 +19,7 @@ dependencies:
19
19
  version: 0.14.0
20
20
  - - "<"
21
21
  - !ruby/object:Gem::Version
22
- version: '2'
22
+ version: '1.12'
23
23
  type: :runtime
24
24
  prerelease: false
25
25
  version_requirements: !ruby/object:Gem::Requirement
@@ -29,7 +29,7 @@ dependencies:
29
29
  version: 0.14.0
30
30
  - - "<"
31
31
  - !ruby/object:Gem::Version
32
- version: '2'
32
+ version: '1.12'
33
33
  - !ruby/object:Gem::Dependency
34
34
  name: lru_redux
35
35
  requirement: !ruby/object:Gem::Requirement
@@ -64,14 +64,14 @@ dependencies:
64
64
  requirements:
65
65
  - - "~>"
66
66
  - !ruby/object:Gem::Version
67
- version: 2.0.2
67
+ version: '2.0'
68
68
  type: :development
69
69
  prerelease: false
70
70
  version_requirements: !ruby/object:Gem::Requirement
71
71
  requirements:
72
72
  - - "~>"
73
73
  - !ruby/object:Gem::Version
74
- version: 2.0.2
74
+ version: '2.0'
75
75
  - !ruby/object:Gem::Dependency
76
76
  name: rake
77
77
  requirement: !ruby/object:Gem::Requirement
@@ -208,6 +208,7 @@ files:
208
208
  - ".circleci/config.yml"
209
209
  - ".gitignore"
210
210
  - Gemfile
211
+ - Gemfile.lock
211
212
  - LICENSE.txt
212
213
  - README.md
213
214
  - Rakefile
@@ -258,7 +259,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
258
259
  - !ruby/object:Gem::Version
259
260
  version: '0'
260
261
  requirements: []
261
- rubygems_version: 3.0.3
262
+ rubygems_version: 3.0.8
262
263
  signing_key:
263
264
  specification_version: 4
264
265
  summary: Fluentd filter plugin to add Kubernetes metadata