fluent-plugin-kubernetes_metadata_filter 2.5.0 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. checksums.yaml +4 -4
  2. data/.circleci/config.yml +10 -14
  3. data/.gitignore +0 -1
  4. data/.rubocop.yml +57 -0
  5. data/Gemfile +4 -2
  6. data/Gemfile.lock +76 -67
  7. data/README.md +9 -83
  8. data/Rakefile +15 -11
  9. data/doc/benchmark/5m-1-2500lps-256b-baseline-01/cpu.png +0 -0
  10. data/doc/benchmark/5m-1-2500lps-256b-baseline-01/latency.png +0 -0
  11. data/doc/benchmark/5m-1-2500lps-256b-baseline-01/loss.png +0 -0
  12. data/doc/benchmark/5m-1-2500lps-256b-baseline-01/mem.png +0 -0
  13. data/doc/benchmark/5m-1-2500lps-256b-baseline-01/readme.md +88 -0
  14. data/doc/benchmark/5m-1-2500lps-256b-baseline-01/results.html +127 -0
  15. data/doc/benchmark/5m-1-2500lps-256b-kube-01-01/cpu.png +0 -0
  16. data/doc/benchmark/5m-1-2500lps-256b-kube-01-01/latency.png +0 -0
  17. data/doc/benchmark/5m-1-2500lps-256b-kube-01-01/loss.png +0 -0
  18. data/doc/benchmark/5m-1-2500lps-256b-kube-01-01/mem.png +0 -0
  19. data/doc/benchmark/5m-1-2500lps-256b-kube-01-01/readme.md +97 -0
  20. data/doc/benchmark/5m-1-2500lps-256b-kube-01-01/results.html +136 -0
  21. data/doc/benchmark/5m-1-2500lps-256b-kube-01-marshal-02/cpu.png +0 -0
  22. data/doc/benchmark/5m-1-2500lps-256b-kube-01-marshal-02/latency.png +0 -0
  23. data/doc/benchmark/5m-1-2500lps-256b-kube-01-marshal-02/loss.png +0 -0
  24. data/doc/benchmark/5m-1-2500lps-256b-kube-01-marshal-02/mem.png +0 -0
  25. data/doc/benchmark/5m-1-2500lps-256b-kube-01-marshal-02/readme.md +97 -0
  26. data/doc/benchmark/5m-1-2500lps-256b-kube-01-marshal-02/results.html +136 -0
  27. data/fluent-plugin-kubernetes_metadata_filter.gemspec +25 -27
  28. data/lib/fluent/plugin/filter_kubernetes_metadata.rb +171 -192
  29. data/lib/fluent/plugin/kubernetes_metadata_cache_strategy.rb +25 -23
  30. data/lib/fluent/plugin/kubernetes_metadata_common.rb +44 -69
  31. data/lib/fluent/plugin/kubernetes_metadata_stats.rb +21 -5
  32. data/lib/fluent/plugin/kubernetes_metadata_test_api_adapter.rb +68 -0
  33. data/lib/fluent/plugin/kubernetes_metadata_util.rb +33 -0
  34. data/lib/fluent/plugin/kubernetes_metadata_watch_namespaces.rb +91 -42
  35. data/lib/fluent/plugin/kubernetes_metadata_watch_pods.rb +108 -47
  36. data/release_notes.md +42 -0
  37. data/test/cassettes/kubernetes_get_pod_container_init.yml +145 -0
  38. data/test/helper.rb +20 -2
  39. data/test/plugin/test_cache_stats.rb +10 -13
  40. data/test/plugin/test_cache_strategy.rb +158 -160
  41. data/test/plugin/test_filter_kubernetes_metadata.rb +340 -616
  42. data/test/plugin/test_watch_namespaces.rb +188 -125
  43. data/test/plugin/test_watch_pods.rb +282 -202
  44. data/test/plugin/watch_test.rb +16 -15
  45. metadata +77 -67
  46. /data/test/cassettes/{kubernetes_docker_metadata_dotted_labels.yml → kubernetes_docker_metadata_dotted_slashed_labels.yml} +0 -0
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  #
2
4
  # Fluentd Kubernetes Metadata Filter Plugin - Enrich Fluentd events with
3
5
  # Kubernetes metadata
@@ -20,6 +22,7 @@
20
22
  require_relative 'kubernetes_metadata_cache_strategy'
21
23
  require_relative 'kubernetes_metadata_common'
22
24
  require_relative 'kubernetes_metadata_stats'
25
+ require_relative 'kubernetes_metadata_util'
23
26
  require_relative 'kubernetes_metadata_watch_namespaces'
24
27
  require_relative 'kubernetes_metadata_watch_pods'
25
28
 
@@ -47,25 +50,21 @@ module Fluent::Plugin
47
50
  config_param :client_key, :string, default: nil
48
51
  config_param :ca_file, :string, default: nil
49
52
  config_param :verify_ssl, :bool, default: true
50
- config_param :tag_to_kubernetes_name_regexp,
51
- :string,
52
- :default => 'var\.log\.containers\.(?<pod_name>[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*)_(?<namespace>[^_]+)_(?<container_name>.+)-(?<docker_id>[a-z0-9]{64})\.log$'
53
+ config_param :open_timeout, :integer, default: 3
54
+ config_param :read_timeout, :integer, default: 10
55
+
56
+ REGEX_VAR_LOG_PODS = '(var\.log\.pods)\.(?<namespace>[^_]+)_(?<pod_name>[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*)_(?<pod_uuid>[a-z0-9-]*)\.(?<container_name>.+)\..*\.log$'
57
+ REGEX_VAR_LOG_CONTAINERS = '(var\.log\.containers)\.(?<pod_name>[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*)_(?<namespace>[^_]+)_(?<container_name>.+)-(?<docker_id>[a-z0-9]{64})\.log$'
58
+
59
+ #tag_to_kubernetes_name_regexp which must include named capture groups:
60
+ # namespace - The namespace in which the pod is deployed
61
+ # pod_name - The pod name
62
+ # container_name - The name of the container
63
+ # pod_uuid (/var/log/pods) | docker_id (/var/log/containers) - Unique identifier used in caching of either pod_uuid or the container hash
64
+ config_param :tag_to_kubernetes_name_regexp, :string, default: "(#{REGEX_VAR_LOG_PODS}|#{REGEX_VAR_LOG_CONTAINERS})"
65
+
53
66
  config_param :bearer_token_file, :string, default: nil
54
67
  config_param :secret_dir, :string, default: '/var/run/secrets/kubernetes.io/serviceaccount'
55
- config_param :de_dot, :bool, default: true
56
- config_param :de_dot_separator, :string, default: '_'
57
- # if reading from the journal, the record will contain the following fields in the following
58
- # format:
59
- # CONTAINER_NAME=k8s_$containername.$containerhash_$podname_$namespacename_$poduuid_$rand32bitashex
60
- # CONTAINER_FULL_ID=dockeridassha256hexvalue
61
- config_param :use_journal, :bool, default: nil
62
- # Field 2 is the container_hash, field 5 is the pod_id, and field 6 is the pod_randhex
63
- # I would have included them as named groups, but you can't have named groups that are
64
- # non-capturing :P
65
- # parse format is defined here: https://github.com/kubernetes/kubernetes/blob/release-1.6/pkg/kubelet/dockertools/docker.go#L317
66
- config_param :container_name_to_kubernetes_regexp,
67
- :string,
68
- :default => '^(?<name_prefix>[^_]+)_(?<container_name>[^\._]+)(\.(?<container_hash>[^_]+))?_(?<pod_name>[^_]+)_(?<namespace>[^_]+)_[^_]+_[^_]+$'
69
68
 
70
69
  config_param :annotation_match, :array, default: []
71
70
  config_param :stats_interval, :integer, default: 30
@@ -81,6 +80,11 @@ module Fluent::Plugin
81
80
  config_param :skip_container_metadata, :bool, default: false
82
81
  config_param :skip_master_url, :bool, default: false
83
82
  config_param :skip_namespace_metadata, :bool, default: false
83
+
84
+ # A classname in the form of Test::APIAdapter which will try
85
+ # to be resolved from a relative named file 'test_api_adapter'
86
+ config_param :test_api_adapter, :string, default: nil
87
+
84
88
  # The time interval in seconds for retry backoffs when watch connections fail.
85
89
  config_param :watch_retry_interval, :integer, default: 1
86
90
  # The base number of exponential backoff for retries.
@@ -89,36 +93,39 @@ module Fluent::Plugin
89
93
  config_param :watch_retry_max_times, :integer, default: 10
90
94
 
91
95
  def fetch_pod_metadata(namespace_name, pod_name)
92
- log.trace("fetching pod metadata: #{namespace_name}/#{pod_name}") if log.trace?
93
- begin
94
- metadata = @client.get_pod(pod_name, namespace_name)
95
- unless metadata
96
- log.trace("no metadata returned for: #{namespace_name}/#{pod_name}") if log.trace?
97
- @stats.bump(:pod_cache_api_nil_not_found)
96
+ log.trace("fetching pod metadata: #{namespace_name}/#{pod_name}")
97
+ options = {
98
+ resource_version: '0' # Fetch from API server cache instead of etcd quorum read
99
+ }
100
+ pod_object = @client.get_pod(pod_name, namespace_name, options)
101
+ log.trace("raw metadata for #{namespace_name}/#{pod_name}: #{pod_object}")
102
+ metadata = parse_pod_metadata(pod_object)
103
+ @stats.bump(:pod_cache_api_updates)
104
+ log.trace("parsed metadata for #{namespace_name}/#{pod_name}: #{metadata}")
105
+ @cache[metadata['pod_id']] = metadata
106
+ rescue KubeException => e
107
+ if e.error_code == 401
108
+ # recreate client to refresh token
109
+ log.info("Encountered '401 Unauthorized' exception, recreating client to refresh token")
110
+ create_client()
111
+ elsif e.error_code == 404
112
+ log.debug "Encountered '404 Not Found' exception, pod not found"
113
+ @stats.bump(:pod_cache_api_nil_error)
98
114
  else
99
- begin
100
- log.trace("raw metadata for #{namespace_name}/#{pod_name}: #{metadata}") if log.trace?
101
- metadata = parse_pod_metadata(metadata)
102
- @stats.bump(:pod_cache_api_updates)
103
- log.trace("parsed metadata for #{namespace_name}/#{pod_name}: #{metadata}") if log.trace?
104
- @cache[metadata['pod_id']] = metadata
105
- return metadata
106
- rescue Exception=>e
107
- log.debug(e)
108
- @stats.bump(:pod_cache_api_nil_bad_resp_payload)
109
- log.trace("returning empty metadata for #{namespace_name}/#{pod_name} due to error '#{e}'") if log.trace?
110
- end
115
+ log.error "Exception '#{e}' encountered fetching pod metadata from Kubernetes API #{@apiVersion} endpoint #{@kubernetes_url}"
116
+ @stats.bump(:pod_cache_api_nil_error)
111
117
  end
112
- rescue Exception=>e
118
+ {}
119
+ rescue StandardError => e
113
120
  @stats.bump(:pod_cache_api_nil_error)
114
- log.debug "Exception '#{e}' encountered fetching pod metadata from Kubernetes API #{@apiVersion} endpoint #{@kubernetes_url}"
121
+ log.error "Exception '#{e}' encountered fetching pod metadata from Kubernetes API #{@apiVersion} endpoint #{@kubernetes_url}"
122
+ {}
115
123
  end
116
- {}
117
- end
118
124
 
119
125
  def dump_stats
120
126
  @curr_time = Time.now
121
127
  return if @curr_time.to_i - @prev_time.to_i < @stats_interval
128
+
122
129
  @prev_time = @curr_time
123
130
  @stats.set(:pod_cache_size, @cache.count)
124
131
  @stats.set(:namespace_cache_size, @namespace_cache.count) if @namespace_cache
@@ -131,55 +138,53 @@ module Fluent::Plugin
131
138
  end
132
139
 
133
140
  def fetch_namespace_metadata(namespace_name)
134
- log.trace("fetching namespace metadata: #{namespace_name}") if log.trace?
135
- begin
136
- metadata = @client.get_namespace(namespace_name)
137
- unless metadata
138
- log.trace("no metadata returned for: #{namespace_name}") if log.trace?
139
- @stats.bump(:namespace_cache_api_nil_not_found)
141
+ log.trace("fetching namespace metadata: #{namespace_name}")
142
+ options = {
143
+ resource_version: '0' # Fetch from API server cache instead of etcd quorum read
144
+ }
145
+ namespace_object = @client.get_namespace(namespace_name, nil, options)
146
+ log.trace("raw metadata for #{namespace_name}: #{namespace_object}")
147
+ metadata = parse_namespace_metadata(namespace_object)
148
+ @stats.bump(:namespace_cache_api_updates)
149
+ log.trace("parsed metadata for #{namespace_name}: #{metadata}")
150
+ @namespace_cache[metadata['namespace_id']] = metadata
151
+ rescue KubeException => e
152
+ if e.error_code == 401
153
+ # recreate client to refresh token
154
+ log.info("Encountered '401 Unauthorized' exception, recreating client to refresh token")
155
+ create_client()
140
156
  else
141
- begin
142
- log.trace("raw metadata for #{namespace_name}: #{metadata}") if log.trace?
143
- metadata = parse_namespace_metadata(metadata)
144
- @stats.bump(:namespace_cache_api_updates)
145
- log.trace("parsed metadata for #{namespace_name}: #{metadata}") if log.trace?
146
- @namespace_cache[metadata['namespace_id']] = metadata
147
- return metadata
148
- rescue Exception => e
149
- log.debug(e)
150
- @stats.bump(:namespace_cache_api_nil_bad_resp_payload)
151
- log.trace("returning empty metadata for #{namespace_name} due to error '#{e}'") if log.trace?
152
- end
157
+ log.error "Exception '#{e}' encountered fetching namespace metadata from Kubernetes API #{@apiVersion} endpoint #{@kubernetes_url}"
158
+ @stats.bump(:namespace_cache_api_nil_error)
153
159
  end
154
- rescue Exception => kube_error
160
+ {}
161
+ rescue StandardError => e
155
162
  @stats.bump(:namespace_cache_api_nil_error)
156
- log.debug "Exception '#{kube_error}' encountered fetching namespace metadata from Kubernetes API #{@apiVersion} endpoint #{@kubernetes_url}"
157
- end
158
- {}
163
+ log.error "Exception '#{e}' encountered fetching namespace metadata from Kubernetes API #{@apiVersion} endpoint #{@kubernetes_url}"
164
+ {}
159
165
  end
160
166
 
161
167
  def initialize
162
168
  super
163
169
  @prev_time = Time.now
170
+ @ssl_options = {}
171
+ @auth_options = {}
164
172
  end
165
173
 
166
174
  def configure(conf)
167
175
  super
168
176
 
169
- def log.trace?
170
- level == Fluent::Log::LEVEL_TRACE
171
- end
172
-
173
177
  require 'kubeclient'
174
178
  require 'lru_redux'
175
- @stats = KubernetesMetadata::Stats.new
176
179
 
177
- if @de_dot && (@de_dot_separator =~ /\./).present?
178
- raise Fluent::ConfigError, "Invalid de_dot_separator: cannot be or contain '.'"
180
+ @stats = KubernetesMetadata::Stats.new
181
+ if @stats_interval <= 0
182
+ @stats = KubernetesMetadata::NoOpStats.new
183
+ self.define_singleton_method(:dump_stats) {}
179
184
  end
180
185
 
181
186
  if @cache_ttl < 0
182
- log.info "Setting the cache TTL to :none because it was <= 0"
187
+ log.info 'Setting the cache TTL to :none because it was <= 0'
183
188
  @cache_ttl = :none
184
189
  end
185
190
 
@@ -193,15 +198,14 @@ module Fluent::Plugin
193
198
  @namespace_cache = LruRedux::TTL::ThreadSafeCache.new(@cache_size, @cache_ttl)
194
199
 
195
200
  @tag_to_kubernetes_name_regexp_compiled = Regexp.compile(@tag_to_kubernetes_name_regexp)
196
- @container_name_to_kubernetes_regexp_compiled = Regexp.compile(@container_name_to_kubernetes_regexp)
197
-
201
+
198
202
  # Use Kubernetes default service account if we're in a pod.
199
203
  if @kubernetes_url.nil?
200
- log.debug "Kubernetes URL is not set - inspecting environ"
204
+ log.debug 'Kubernetes URL is not set - inspecting environ'
201
205
 
202
206
  env_host = ENV['KUBERNETES_SERVICE_HOST']
203
207
  env_port = ENV['KUBERNETES_SERVICE_PORT']
204
- if env_host.present? && env_port.present?
208
+ if present?(env_host) && present?(env_port)
205
209
  if env_host =~ Resolv::IPv6::Regex
206
210
  # Brackets are needed around IPv6 addresses
207
211
  env_host = "[#{env_host}]"
@@ -209,7 +213,7 @@ module Fluent::Plugin
209
213
  @kubernetes_url = "https://#{env_host}:#{env_port}/api"
210
214
  log.debug "Kubernetes URL is now '#{@kubernetes_url}'"
211
215
  else
212
- log.debug "No Kubernetes URL could be found in config or environ"
216
+ log.debug 'No Kubernetes URL could be found in config or environ'
213
217
  end
214
218
  end
215
219
 
@@ -219,24 +223,23 @@ module Fluent::Plugin
219
223
  ca_cert = File.join(@secret_dir, K8_POD_CA_CERT)
220
224
  pod_token = File.join(@secret_dir, K8_POD_TOKEN)
221
225
 
222
- if !@ca_file.present? and File.exist?(ca_cert)
226
+ if !present?(@ca_file) && File.exist?(ca_cert)
223
227
  log.debug "Found CA certificate: #{ca_cert}"
224
228
  @ca_file = ca_cert
225
229
  end
226
230
 
227
- if !@bearer_token_file.present? and File.exist?(pod_token)
231
+ if !present?(@bearer_token_file) && File.exist?(pod_token)
228
232
  log.debug "Found pod token: #{pod_token}"
229
233
  @bearer_token_file = pod_token
230
234
  end
231
235
  end
232
236
 
233
- if @kubernetes_url.present?
234
-
235
- ssl_options = {
236
- client_cert: @client_cert.present? ? OpenSSL::X509::Certificate.new(File.read(@client_cert)) : nil,
237
- client_key: @client_key.present? ? OpenSSL::PKey::RSA.new(File.read(@client_key)) : nil,
238
- ca_file: @ca_file,
239
- verify_ssl: @verify_ssl ? OpenSSL::SSL::VERIFY_PEER : OpenSSL::SSL::VERIFY_NONE
237
+ if present?(@kubernetes_url)
238
+ @ssl_options = {
239
+ client_cert: present?(@client_cert) ? OpenSSL::X509::Certificate.new(File.read(@client_cert)) : nil,
240
+ client_key: present?(@client_key) ? OpenSSL::PKey::RSA.new(File.read(@client_key)) : nil,
241
+ ca_file: @ca_file,
242
+ verify_ssl: @verify_ssl ? OpenSSL::SSL::VERIFY_PEER : OpenSSL::SSL::VERIFY_NONE
240
243
  }
241
244
 
242
245
  if @ssl_partial_chain
@@ -244,157 +247,133 @@ module Fluent::Plugin
244
247
  require 'openssl'
245
248
  ssl_store = OpenSSL::X509::Store.new
246
249
  ssl_store.set_default_paths
247
- if defined? OpenSSL::X509::V_FLAG_PARTIAL_CHAIN
248
- flagval = OpenSSL::X509::V_FLAG_PARTIAL_CHAIN
249
- else
250
- # this version of ruby does not define OpenSSL::X509::V_FLAG_PARTIAL_CHAIN
251
- flagval = 0x80000
252
- end
250
+ flagval = if defined? OpenSSL::X509::V_FLAG_PARTIAL_CHAIN
251
+ OpenSSL::X509::V_FLAG_PARTIAL_CHAIN
252
+ else
253
+ # this version of ruby does not define OpenSSL::X509::V_FLAG_PARTIAL_CHAIN
254
+ 0x80000
255
+ end
253
256
  ssl_store.flags = OpenSSL::X509::V_FLAG_CRL_CHECK_ALL | flagval
254
- ssl_options[:cert_store] = ssl_store
257
+ @ssl_options[:cert_store] = ssl_store
255
258
  end
256
259
 
257
- auth_options = {}
258
-
259
- if @bearer_token_file.present?
260
- bearer_token = File.read(@bearer_token_file)
261
- auth_options[:bearer_token] = bearer_token
260
+ if present?(@bearer_token_file)
261
+ @auth_options[:bearer_token_file] = @bearer_token_file
262
262
  end
263
263
 
264
- log.debug "Creating K8S client"
265
- @client = Kubeclient::Client.new @kubernetes_url, @apiVersion,
266
- ssl_options: ssl_options,
267
- auth_options: auth_options
264
+ create_client()
265
+
266
+ if @test_api_adapter
267
+ log.info "Extending client with test api adaper #{@test_api_adapter}"
268
+ require_relative @test_api_adapter.underscore
269
+ @client.extend(eval(@test_api_adapter))
270
+ end
268
271
 
269
272
  begin
270
273
  @client.api_valid?
271
- rescue KubeException => kube_error
272
- raise Fluent::ConfigError, "Invalid Kubernetes API #{@apiVersion} endpoint #{@kubernetes_url}: #{kube_error.message}"
274
+ rescue KubeException => e
275
+ raise Fluent::ConfigError, "Invalid Kubernetes API #{@apiVersion} endpoint #{@kubernetes_url}: #{e.message}"
273
276
  end
274
277
 
275
278
  if @watch
276
- pod_thread = Thread.new(self) { |this| this.set_up_pod_thread }
279
+ if ENV['K8S_NODE_NAME'].nil? || ENV['K8S_NODE_NAME'].strip.empty?
280
+ log.warn("!! The environment variable 'K8S_NODE_NAME' is not set to the node name which can affect the API server and watch efficiency !!")
281
+ end
282
+
283
+ pod_thread = Thread.new(self, &:set_up_pod_thread)
277
284
  pod_thread.abort_on_exception = true
278
285
 
279
- namespace_thread = Thread.new(self) { |this| this.set_up_namespace_thread }
286
+ namespace_thread = Thread.new(self, &:set_up_namespace_thread)
280
287
  namespace_thread.abort_on_exception = true
281
288
  end
282
289
  end
283
- @time_fields = []
284
- @time_fields.push('_SOURCE_REALTIME_TIMESTAMP', '__REALTIME_TIMESTAMP') if @use_journal || @use_journal.nil?
285
- @time_fields.push('time') unless @use_journal
286
- @time_fields.push('@timestamp') if @lookup_from_k8s_field
287
290
 
288
291
  @annotations_regexps = []
289
292
  @annotation_match.each do |regexp|
290
- begin
291
- @annotations_regexps << Regexp.compile(regexp)
292
- rescue RegexpError => e
293
- log.error "Error: invalid regular expression in annotation_match: #{e}"
294
- end
293
+ @annotations_regexps << Regexp.compile(regexp)
294
+ rescue RegexpError => e
295
+ log.error "Error: invalid regular expression in annotation_match: #{e}"
295
296
  end
297
+ end
296
298
 
299
+ def create_client()
300
+ log.debug 'Creating K8S client'
301
+ @client = nil
302
+ @client = Kubeclient::Client.new(
303
+ @kubernetes_url,
304
+ @apiVersion,
305
+ ssl_options: @ssl_options,
306
+ auth_options: @auth_options,
307
+ timeouts: {
308
+ open: @open_timeout,
309
+ read: @read_timeout
310
+ },
311
+ as: :parsed_symbolized
312
+ )
297
313
  end
298
314
 
299
- def get_metadata_for_record(namespace_name, pod_name, container_name, container_id, create_time, batch_miss_cache)
315
+ def get_metadata_for_record(namespace_name, pod_name, container_name, cache_key, create_time, batch_miss_cache, docker_id)
300
316
  metadata = {
301
- 'docker' => {'container_id' => container_id},
317
+ 'docker' => { 'container_id' => "" },
302
318
  'kubernetes' => {
303
- 'container_name' => container_name,
304
- 'namespace_name' => namespace_name,
305
- 'pod_name' => pod_name
319
+ 'container_name' => container_name,
320
+ 'namespace_name' => namespace_name,
321
+ 'pod_name' => pod_name
306
322
  }
307
323
  }
308
- if @kubernetes_url.present?
309
- pod_metadata = get_pod_metadata(container_id, namespace_name, pod_name, create_time, batch_miss_cache)
310
-
311
- if (pod_metadata.include? 'containers') && (pod_metadata['containers'].include? container_id) && !@skip_container_metadata
312
- metadata['kubernetes']['container_image'] = pod_metadata['containers'][container_id]['image']
313
- metadata['kubernetes']['container_image_id'] = pod_metadata['containers'][container_id]['image_id']
324
+ metadata['docker']['container_id'] = docker_id unless docker_id.nil?
325
+ container_cache_key = container_name
326
+ if present?(@kubernetes_url)
327
+ pod_metadata = get_pod_metadata(cache_key, namespace_name, pod_name, create_time, batch_miss_cache)
328
+ if (pod_metadata.include? 'containers') && (pod_metadata['containers'].include? container_cache_key) && !@skip_container_metadata
329
+ metadata['kubernetes']['container_image'] = pod_metadata['containers'][container_cache_key]['image']
330
+ metadata['kubernetes']['container_image_id'] = pod_metadata['containers'][container_cache_key]['image_id'] unless pod_metadata['containers'][container_cache_key]['image_id'].empty?
331
+ metadata['docker']['container_id'] = pod_metadata['containers'][container_cache_key]['containerID'] unless pod_metadata['containers'][container_cache_key]['containerID'].empty?
314
332
  end
315
333
 
316
334
  metadata['kubernetes'].merge!(pod_metadata) if pod_metadata
317
335
  metadata['kubernetes'].delete('containers')
318
336
  end
319
- metadata
320
- end
321
-
322
- def create_time_from_record(record, internal_time)
323
- time_key = @time_fields.detect{ |ii| record.has_key?(ii) }
324
- time = record[time_key]
325
- if time.nil? || time.chop.empty?
326
- # `internal_time` is a Fluent::EventTime, it can't compare with Time.
327
- return Time.at(internal_time.to_f)
328
- end
329
- if ['_SOURCE_REALTIME_TIMESTAMP', '__REALTIME_TIMESTAMP'].include?(time_key)
330
- timei= time.to_i
331
- return Time.at(timei / 1000000, timei % 1000000)
337
+ metadata['kubernetes'].tap do |kube|
338
+ kube.each_pair do |k,v|
339
+ kube[k.dup] = v.dup
340
+ end
332
341
  end
333
- return Time.parse(time)
342
+ metadata.delete('docker') if metadata['docker'] && (metadata['docker']['container_id'].nil? || metadata['docker']['container_id'].empty?)
343
+ metadata
334
344
  end
335
345
 
336
- def filter_stream(tag, es)
337
- return es if (es.respond_to?(:empty?) && es.empty?) || !es.is_a?(Fluent::EventStream)
338
- new_es = Fluent::MultiEventStream.new
339
- tag_match_data = tag.match(@tag_to_kubernetes_name_regexp_compiled) unless @use_journal
340
- tag_metadata = nil
346
+ def filter(tag, time, record)
347
+ tag_match_data = tag.match(@tag_to_kubernetes_name_regexp_compiled)
341
348
  batch_miss_cache = {}
342
- es.each do |time, record|
343
- if tag_match_data && tag_metadata.nil?
344
- tag_metadata = get_metadata_for_record(tag_match_data['namespace'], tag_match_data['pod_name'], tag_match_data['container_name'],
345
- tag_match_data['docker_id'], create_time_from_record(record, time), batch_miss_cache)
346
- end
347
- metadata = Marshal.load(Marshal.dump(tag_metadata)) if tag_metadata
348
- if (@use_journal || @use_journal.nil?) &&
349
- (j_metadata = get_metadata_for_journal_record(record, time, batch_miss_cache))
350
- metadata = j_metadata
351
- end
352
- if @lookup_from_k8s_field && record.has_key?('kubernetes') && record.has_key?('docker') &&
349
+ if tag_match_data
350
+ cache_key = if tag_match_data.names.include?('pod_uuid') && !tag_match_data['pod_uuid'].nil?
351
+ tag_match_data['pod_uuid']
352
+ else
353
+ tag_match_data['docker_id']
354
+ end
355
+ docker_id = tag_match_data.names.include?('docker_id') ? tag_match_data['docker_id'] : nil
356
+ metadata = get_metadata_for_record(tag_match_data['namespace'], tag_match_data['pod_name'], tag_match_data['container_name'],
357
+ cache_key, time, batch_miss_cache, docker_id)
358
+ end
359
+ if @lookup_from_k8s_field && record.key?('kubernetes') && record.key?('docker') &&
353
360
  record['kubernetes'].respond_to?(:has_key?) && record['docker'].respond_to?(:has_key?) &&
354
- record['kubernetes'].has_key?('namespace_name') &&
355
- record['kubernetes'].has_key?('pod_name') &&
356
- record['kubernetes'].has_key?('container_name') &&
357
- record['docker'].has_key?('container_id') &&
361
+ record['kubernetes'].key?('namespace_name') &&
362
+ record['kubernetes'].key?('pod_name') &&
363
+ record['kubernetes'].key?('container_name') &&
364
+ record['docker'].key?('container_id') &&
358
365
  (k_metadata = get_metadata_for_record(record['kubernetes']['namespace_name'], record['kubernetes']['pod_name'],
359
- record['kubernetes']['container_name'], record['docker']['container_id'],
360
- create_time_from_record(record, time), batch_miss_cache))
361
- metadata = k_metadata
362
- end
363
-
364
- record = record.merge(metadata) if metadata
365
- new_es.add(time, record)
366
+ record['kubernetes']['container_name'], record['docker']['container_id'],
367
+ time, batch_miss_cache, record['docker']['container_id']))
368
+ metadata = k_metadata
366
369
  end
367
370
  dump_stats
368
- new_es
369
- end
370
-
371
- def get_metadata_for_journal_record(record, time, batch_miss_cache)
372
- metadata = nil
373
- if record.has_key?('CONTAINER_NAME') && record.has_key?('CONTAINER_ID_FULL')
374
- metadata = record['CONTAINER_NAME'].match(@container_name_to_kubernetes_regexp_compiled) do |match_data|
375
- get_metadata_for_record(match_data['namespace'], match_data['pod_name'], match_data['container_name'],
376
- record['CONTAINER_ID_FULL'], create_time_from_record(record, time), batch_miss_cache)
377
- end
378
- unless metadata
379
- log.debug "Error: could not match CONTAINER_NAME from record #{record}"
380
- @stats.bump(:container_name_match_failed)
381
- end
382
- elsif record.has_key?('CONTAINER_NAME') && record['CONTAINER_NAME'].start_with?('k8s_')
383
- log.debug "Error: no container name and id in record #{record}"
384
- @stats.bump(:container_name_id_missing)
385
- end
386
- metadata
371
+ metadata ? record.merge(metadata) : record
387
372
  end
388
373
 
389
- def de_dot!(h)
390
- h.keys.each do |ref|
391
- if h[ref] && ref =~ /\./
392
- v = h.delete(ref)
393
- newref = ref.to_s.gsub('.', @de_dot_separator)
394
- h[newref] = v
395
- end
396
- end
374
+ # copied from activesupport
375
+ def present?(object)
376
+ object.respond_to?(:empty?) ? !object.empty? : !!object
397
377
  end
398
-
399
378
  end
400
379
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  #
2
4
  # Fluentd Kubernetes Metadata Filter Plugin - Enrich Fluentd events with
3
5
  # Kubernetes metadata
@@ -18,34 +20,21 @@
18
20
  #
19
21
  module KubernetesMetadata
20
22
  module CacheStrategy
21
- def get_pod_metadata(key, namespace_name, pod_name, record_create_time, batch_miss_cache)
23
+ def get_pod_metadata(key, namespace_name, pod_name, time, batch_miss_cache)
22
24
  metadata = {}
23
25
  ids = @id_cache[key]
24
- if !ids.nil?
25
- # FAST PATH
26
- # Cache hit, fetch metadata from the cache
27
- metadata = @cache.fetch(ids[:pod_id]) do
28
- @stats.bump(:pod_cache_miss)
29
- m = fetch_pod_metadata(namespace_name, pod_name)
30
- (m.nil? || m.empty?) ? {'pod_id'=>ids[:pod_id]} : m
31
- end
32
- metadata.merge!(@namespace_cache.fetch(ids[:namespace_id]) do
33
- @stats.bump(:namespace_cache_miss)
34
- m = fetch_namespace_metadata(namespace_name) unless @skip_namespace_metadata
35
- (m.nil? || m.empty?) ? {'namespace_id'=>ids[:namespace_id]} : m
36
- end)
37
- else
38
- # SLOW PATH
26
+ if ids.nil?
39
27
  @stats.bump(:id_cache_miss)
40
28
  return batch_miss_cache["#{namespace_name}_#{pod_name}"] if batch_miss_cache.key?("#{namespace_name}_#{pod_name}")
29
+
41
30
  pod_metadata = fetch_pod_metadata(namespace_name, pod_name)
42
31
  if @skip_namespace_metadata
43
- ids = { :pod_id=> pod_metadata['pod_id'] }
32
+ ids = { pod_id: pod_metadata['pod_id'] }
44
33
  @id_cache[key] = ids
45
34
  return pod_metadata
46
35
  end
47
36
  namespace_metadata = fetch_namespace_metadata(namespace_name)
48
- ids = { :pod_id=> pod_metadata['pod_id'], :namespace_id => namespace_metadata['namespace_id'] }
37
+ ids = { pod_id: pod_metadata['pod_id'], namespace_id: namespace_metadata['namespace_id'] }
49
38
  if !ids[:pod_id].nil? && !ids[:namespace_id].nil?
50
39
  # pod found and namespace found
51
40
  metadata = pod_metadata
@@ -55,11 +44,11 @@ module KubernetesMetadata
55
44
  # pod not found, but namespace found
56
45
  @stats.bump(:id_cache_pod_not_found_namespace)
57
46
  ns_time = Time.parse(namespace_metadata['creation_timestamp'])
58
- if ns_time <= record_create_time
47
+ if ns_time <= Time.at(time.to_f)
59
48
  # namespace is older then record for pod
60
49
  ids[:pod_id] = key
61
50
  metadata = @cache.fetch(ids[:pod_id]) do
62
- m = { 'pod_id' => ids[:pod_id] }
51
+ { 'pod_id' => ids[:pod_id] }
63
52
  end
64
53
  end
65
54
  metadata.merge!(namespace_metadata)
@@ -74,7 +63,7 @@ module KubernetesMetadata
74
63
  @stats.bump(:id_cache_orphaned_record)
75
64
  end
76
65
  if @allow_orphans
77
- log.trace("orphaning message for: #{namespace_name}/#{pod_name} ") if log.trace?
66
+ log.trace("orphaning message for: #{namespace_name}/#{pod_name} ")
78
67
  metadata = {
79
68
  'orphaned_namespace' => namespace_name,
80
69
  'namespace_name' => @orphaned_namespace_name,
@@ -87,12 +76,25 @@ module KubernetesMetadata
87
76
  end
88
77
  end
89
78
  @id_cache[key] = ids unless batch_miss_cache.key?("#{namespace_name}_#{pod_name}")
79
+ else
80
+ # SLOW PATH
81
+ metadata = @cache.fetch(ids[:pod_id]) do
82
+ @stats.bump(:pod_cache_miss)
83
+ m = fetch_pod_metadata(namespace_name, pod_name)
84
+ m.nil? || m.empty? ? { 'pod_id' => ids[:pod_id] } : m
85
+ end
86
+ metadata.merge!(@namespace_cache.fetch(ids[:namespace_id]) do
87
+ m = unless @skip_namespace_metadata
88
+ @stats.bump(:namespace_cache_miss)
89
+ fetch_namespace_metadata(namespace_name)
90
+ end
91
+ m.nil? || m.empty? ? { 'namespace_id' => ids[:namespace_id] } : m
92
+ end)
90
93
  end
91
94
 
92
95
  # remove namespace info that is only used for comparison
93
96
  metadata.delete('creation_timestamp')
94
- metadata.delete_if{|k,v| v.nil?}
97
+ metadata.delete_if { |_k, v| v.nil? }
95
98
  end
96
-
97
99
  end
98
100
  end