fluent-plugin-kubernetes_metadata_filter 2.5.0 → 3.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. checksums.yaml +4 -4
  2. data/.circleci/config.yml +10 -14
  3. data/.gitignore +0 -1
  4. data/.rubocop.yml +57 -0
  5. data/Gemfile +4 -2
  6. data/Gemfile.lock +76 -67
  7. data/README.md +9 -83
  8. data/Rakefile +15 -11
  9. data/doc/benchmark/5m-1-2500lps-256b-baseline-01/cpu.png +0 -0
  10. data/doc/benchmark/5m-1-2500lps-256b-baseline-01/latency.png +0 -0
  11. data/doc/benchmark/5m-1-2500lps-256b-baseline-01/loss.png +0 -0
  12. data/doc/benchmark/5m-1-2500lps-256b-baseline-01/mem.png +0 -0
  13. data/doc/benchmark/5m-1-2500lps-256b-baseline-01/readme.md +88 -0
  14. data/doc/benchmark/5m-1-2500lps-256b-baseline-01/results.html +127 -0
  15. data/doc/benchmark/5m-1-2500lps-256b-kube-01-01/cpu.png +0 -0
  16. data/doc/benchmark/5m-1-2500lps-256b-kube-01-01/latency.png +0 -0
  17. data/doc/benchmark/5m-1-2500lps-256b-kube-01-01/loss.png +0 -0
  18. data/doc/benchmark/5m-1-2500lps-256b-kube-01-01/mem.png +0 -0
  19. data/doc/benchmark/5m-1-2500lps-256b-kube-01-01/readme.md +97 -0
  20. data/doc/benchmark/5m-1-2500lps-256b-kube-01-01/results.html +136 -0
  21. data/doc/benchmark/5m-1-2500lps-256b-kube-01-marshal-02/cpu.png +0 -0
  22. data/doc/benchmark/5m-1-2500lps-256b-kube-01-marshal-02/latency.png +0 -0
  23. data/doc/benchmark/5m-1-2500lps-256b-kube-01-marshal-02/loss.png +0 -0
  24. data/doc/benchmark/5m-1-2500lps-256b-kube-01-marshal-02/mem.png +0 -0
  25. data/doc/benchmark/5m-1-2500lps-256b-kube-01-marshal-02/readme.md +97 -0
  26. data/doc/benchmark/5m-1-2500lps-256b-kube-01-marshal-02/results.html +136 -0
  27. data/fluent-plugin-kubernetes_metadata_filter.gemspec +25 -27
  28. data/lib/fluent/plugin/filter_kubernetes_metadata.rb +171 -192
  29. data/lib/fluent/plugin/kubernetes_metadata_cache_strategy.rb +25 -23
  30. data/lib/fluent/plugin/kubernetes_metadata_common.rb +44 -69
  31. data/lib/fluent/plugin/kubernetes_metadata_stats.rb +21 -5
  32. data/lib/fluent/plugin/kubernetes_metadata_test_api_adapter.rb +68 -0
  33. data/lib/fluent/plugin/kubernetes_metadata_util.rb +33 -0
  34. data/lib/fluent/plugin/kubernetes_metadata_watch_namespaces.rb +91 -42
  35. data/lib/fluent/plugin/kubernetes_metadata_watch_pods.rb +108 -47
  36. data/release_notes.md +42 -0
  37. data/test/cassettes/kubernetes_get_pod_container_init.yml +145 -0
  38. data/test/helper.rb +20 -2
  39. data/test/plugin/test_cache_stats.rb +10 -13
  40. data/test/plugin/test_cache_strategy.rb +158 -160
  41. data/test/plugin/test_filter_kubernetes_metadata.rb +340 -616
  42. data/test/plugin/test_watch_namespaces.rb +188 -125
  43. data/test/plugin/test_watch_pods.rb +282 -202
  44. data/test/plugin/watch_test.rb +16 -15
  45. metadata +77 -67
  46. /data/test/cassettes/{kubernetes_docker_metadata_dotted_labels.yml → kubernetes_docker_metadata_dotted_slashed_labels.yml} +0 -0
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  #
2
4
  # Fluentd Kubernetes Metadata Filter Plugin - Enrich Fluentd events with
3
5
  # Kubernetes metadata
@@ -20,6 +22,7 @@
20
22
  require_relative 'kubernetes_metadata_cache_strategy'
21
23
  require_relative 'kubernetes_metadata_common'
22
24
  require_relative 'kubernetes_metadata_stats'
25
+ require_relative 'kubernetes_metadata_util'
23
26
  require_relative 'kubernetes_metadata_watch_namespaces'
24
27
  require_relative 'kubernetes_metadata_watch_pods'
25
28
 
@@ -47,25 +50,21 @@ module Fluent::Plugin
47
50
  config_param :client_key, :string, default: nil
48
51
  config_param :ca_file, :string, default: nil
49
52
  config_param :verify_ssl, :bool, default: true
50
- config_param :tag_to_kubernetes_name_regexp,
51
- :string,
52
- :default => 'var\.log\.containers\.(?<pod_name>[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*)_(?<namespace>[^_]+)_(?<container_name>.+)-(?<docker_id>[a-z0-9]{64})\.log$'
53
+ config_param :open_timeout, :integer, default: 3
54
+ config_param :read_timeout, :integer, default: 10
55
+
56
+ REGEX_VAR_LOG_PODS = '(var\.log\.pods)\.(?<namespace>[^_]+)_(?<pod_name>[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*)_(?<pod_uuid>[a-z0-9-]*)\.(?<container_name>.+)\..*\.log$'
57
+ REGEX_VAR_LOG_CONTAINERS = '(var\.log\.containers)\.(?<pod_name>[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*)_(?<namespace>[^_]+)_(?<container_name>.+)-(?<docker_id>[a-z0-9]{64})\.log$'
58
+
59
+ #tag_to_kubernetes_name_regexp which must include named capture groups:
60
+ # namespace - The namespace in which the pod is deployed
61
+ # pod_name - The pod name
62
+ # container_name - The name of the container
63
+ # pod_uuid (/var/log/pods) | docker_id (/var/log/containers) - Unique identifier used in caching of either pod_uuid or the container hash
64
+ config_param :tag_to_kubernetes_name_regexp, :string, default: "(#{REGEX_VAR_LOG_PODS}|#{REGEX_VAR_LOG_CONTAINERS})"
65
+
53
66
  config_param :bearer_token_file, :string, default: nil
54
67
  config_param :secret_dir, :string, default: '/var/run/secrets/kubernetes.io/serviceaccount'
55
- config_param :de_dot, :bool, default: true
56
- config_param :de_dot_separator, :string, default: '_'
57
- # if reading from the journal, the record will contain the following fields in the following
58
- # format:
59
- # CONTAINER_NAME=k8s_$containername.$containerhash_$podname_$namespacename_$poduuid_$rand32bitashex
60
- # CONTAINER_FULL_ID=dockeridassha256hexvalue
61
- config_param :use_journal, :bool, default: nil
62
- # Field 2 is the container_hash, field 5 is the pod_id, and field 6 is the pod_randhex
63
- # I would have included them as named groups, but you can't have named groups that are
64
- # non-capturing :P
65
- # parse format is defined here: https://github.com/kubernetes/kubernetes/blob/release-1.6/pkg/kubelet/dockertools/docker.go#L317
66
- config_param :container_name_to_kubernetes_regexp,
67
- :string,
68
- :default => '^(?<name_prefix>[^_]+)_(?<container_name>[^\._]+)(\.(?<container_hash>[^_]+))?_(?<pod_name>[^_]+)_(?<namespace>[^_]+)_[^_]+_[^_]+$'
69
68
 
70
69
  config_param :annotation_match, :array, default: []
71
70
  config_param :stats_interval, :integer, default: 30
@@ -81,6 +80,11 @@ module Fluent::Plugin
81
80
  config_param :skip_container_metadata, :bool, default: false
82
81
  config_param :skip_master_url, :bool, default: false
83
82
  config_param :skip_namespace_metadata, :bool, default: false
83
+
84
+ # A classname in the form of Test::APIAdapter which will try
85
+ # to be resolved from a relative named file 'test_api_adapter'
86
+ config_param :test_api_adapter, :string, default: nil
87
+
84
88
  # The time interval in seconds for retry backoffs when watch connections fail.
85
89
  config_param :watch_retry_interval, :integer, default: 1
86
90
  # The base number of exponential backoff for retries.
@@ -89,36 +93,39 @@ module Fluent::Plugin
89
93
  config_param :watch_retry_max_times, :integer, default: 10
90
94
 
91
95
  def fetch_pod_metadata(namespace_name, pod_name)
92
- log.trace("fetching pod metadata: #{namespace_name}/#{pod_name}") if log.trace?
93
- begin
94
- metadata = @client.get_pod(pod_name, namespace_name)
95
- unless metadata
96
- log.trace("no metadata returned for: #{namespace_name}/#{pod_name}") if log.trace?
97
- @stats.bump(:pod_cache_api_nil_not_found)
96
+ log.trace("fetching pod metadata: #{namespace_name}/#{pod_name}")
97
+ options = {
98
+ resource_version: '0' # Fetch from API server cache instead of etcd quorum read
99
+ }
100
+ pod_object = @client.get_pod(pod_name, namespace_name, options)
101
+ log.trace("raw metadata for #{namespace_name}/#{pod_name}: #{pod_object}")
102
+ metadata = parse_pod_metadata(pod_object)
103
+ @stats.bump(:pod_cache_api_updates)
104
+ log.trace("parsed metadata for #{namespace_name}/#{pod_name}: #{metadata}")
105
+ @cache[metadata['pod_id']] = metadata
106
+ rescue KubeException => e
107
+ if e.error_code == 401
108
+ # recreate client to refresh token
109
+ log.info("Encountered '401 Unauthorized' exception, recreating client to refresh token")
110
+ create_client()
111
+ elsif e.error_code == 404
112
+ log.debug "Encountered '404 Not Found' exception, pod not found"
113
+ @stats.bump(:pod_cache_api_nil_error)
98
114
  else
99
- begin
100
- log.trace("raw metadata for #{namespace_name}/#{pod_name}: #{metadata}") if log.trace?
101
- metadata = parse_pod_metadata(metadata)
102
- @stats.bump(:pod_cache_api_updates)
103
- log.trace("parsed metadata for #{namespace_name}/#{pod_name}: #{metadata}") if log.trace?
104
- @cache[metadata['pod_id']] = metadata
105
- return metadata
106
- rescue Exception=>e
107
- log.debug(e)
108
- @stats.bump(:pod_cache_api_nil_bad_resp_payload)
109
- log.trace("returning empty metadata for #{namespace_name}/#{pod_name} due to error '#{e}'") if log.trace?
110
- end
115
+ log.error "Exception '#{e}' encountered fetching pod metadata from Kubernetes API #{@apiVersion} endpoint #{@kubernetes_url}"
116
+ @stats.bump(:pod_cache_api_nil_error)
111
117
  end
112
- rescue Exception=>e
118
+ {}
119
+ rescue StandardError => e
113
120
  @stats.bump(:pod_cache_api_nil_error)
114
- log.debug "Exception '#{e}' encountered fetching pod metadata from Kubernetes API #{@apiVersion} endpoint #{@kubernetes_url}"
121
+ log.error "Exception '#{e}' encountered fetching pod metadata from Kubernetes API #{@apiVersion} endpoint #{@kubernetes_url}"
122
+ {}
115
123
  end
116
- {}
117
- end
118
124
 
119
125
  def dump_stats
120
126
  @curr_time = Time.now
121
127
  return if @curr_time.to_i - @prev_time.to_i < @stats_interval
128
+
122
129
  @prev_time = @curr_time
123
130
  @stats.set(:pod_cache_size, @cache.count)
124
131
  @stats.set(:namespace_cache_size, @namespace_cache.count) if @namespace_cache
@@ -131,55 +138,53 @@ module Fluent::Plugin
131
138
  end
132
139
 
133
140
  def fetch_namespace_metadata(namespace_name)
134
- log.trace("fetching namespace metadata: #{namespace_name}") if log.trace?
135
- begin
136
- metadata = @client.get_namespace(namespace_name)
137
- unless metadata
138
- log.trace("no metadata returned for: #{namespace_name}") if log.trace?
139
- @stats.bump(:namespace_cache_api_nil_not_found)
141
+ log.trace("fetching namespace metadata: #{namespace_name}")
142
+ options = {
143
+ resource_version: '0' # Fetch from API server cache instead of etcd quorum read
144
+ }
145
+ namespace_object = @client.get_namespace(namespace_name, nil, options)
146
+ log.trace("raw metadata for #{namespace_name}: #{namespace_object}")
147
+ metadata = parse_namespace_metadata(namespace_object)
148
+ @stats.bump(:namespace_cache_api_updates)
149
+ log.trace("parsed metadata for #{namespace_name}: #{metadata}")
150
+ @namespace_cache[metadata['namespace_id']] = metadata
151
+ rescue KubeException => e
152
+ if e.error_code == 401
153
+ # recreate client to refresh token
154
+ log.info("Encountered '401 Unauthorized' exception, recreating client to refresh token")
155
+ create_client()
140
156
  else
141
- begin
142
- log.trace("raw metadata for #{namespace_name}: #{metadata}") if log.trace?
143
- metadata = parse_namespace_metadata(metadata)
144
- @stats.bump(:namespace_cache_api_updates)
145
- log.trace("parsed metadata for #{namespace_name}: #{metadata}") if log.trace?
146
- @namespace_cache[metadata['namespace_id']] = metadata
147
- return metadata
148
- rescue Exception => e
149
- log.debug(e)
150
- @stats.bump(:namespace_cache_api_nil_bad_resp_payload)
151
- log.trace("returning empty metadata for #{namespace_name} due to error '#{e}'") if log.trace?
152
- end
157
+ log.error "Exception '#{e}' encountered fetching namespace metadata from Kubernetes API #{@apiVersion} endpoint #{@kubernetes_url}"
158
+ @stats.bump(:namespace_cache_api_nil_error)
153
159
  end
154
- rescue Exception => kube_error
160
+ {}
161
+ rescue StandardError => e
155
162
  @stats.bump(:namespace_cache_api_nil_error)
156
- log.debug "Exception '#{kube_error}' encountered fetching namespace metadata from Kubernetes API #{@apiVersion} endpoint #{@kubernetes_url}"
157
- end
158
- {}
163
+ log.error "Exception '#{e}' encountered fetching namespace metadata from Kubernetes API #{@apiVersion} endpoint #{@kubernetes_url}"
164
+ {}
159
165
  end
160
166
 
161
167
  def initialize
162
168
  super
163
169
  @prev_time = Time.now
170
+ @ssl_options = {}
171
+ @auth_options = {}
164
172
  end
165
173
 
166
174
  def configure(conf)
167
175
  super
168
176
 
169
- def log.trace?
170
- level == Fluent::Log::LEVEL_TRACE
171
- end
172
-
173
177
  require 'kubeclient'
174
178
  require 'lru_redux'
175
- @stats = KubernetesMetadata::Stats.new
176
179
 
177
- if @de_dot && (@de_dot_separator =~ /\./).present?
178
- raise Fluent::ConfigError, "Invalid de_dot_separator: cannot be or contain '.'"
180
+ @stats = KubernetesMetadata::Stats.new
181
+ if @stats_interval <= 0
182
+ @stats = KubernetesMetadata::NoOpStats.new
183
+ self.define_singleton_method(:dump_stats) {}
179
184
  end
180
185
 
181
186
  if @cache_ttl < 0
182
- log.info "Setting the cache TTL to :none because it was <= 0"
187
+ log.info 'Setting the cache TTL to :none because it was <= 0'
183
188
  @cache_ttl = :none
184
189
  end
185
190
 
@@ -193,15 +198,14 @@ module Fluent::Plugin
193
198
  @namespace_cache = LruRedux::TTL::ThreadSafeCache.new(@cache_size, @cache_ttl)
194
199
 
195
200
  @tag_to_kubernetes_name_regexp_compiled = Regexp.compile(@tag_to_kubernetes_name_regexp)
196
- @container_name_to_kubernetes_regexp_compiled = Regexp.compile(@container_name_to_kubernetes_regexp)
197
-
201
+
198
202
  # Use Kubernetes default service account if we're in a pod.
199
203
  if @kubernetes_url.nil?
200
- log.debug "Kubernetes URL is not set - inspecting environ"
204
+ log.debug 'Kubernetes URL is not set - inspecting environ'
201
205
 
202
206
  env_host = ENV['KUBERNETES_SERVICE_HOST']
203
207
  env_port = ENV['KUBERNETES_SERVICE_PORT']
204
- if env_host.present? && env_port.present?
208
+ if present?(env_host) && present?(env_port)
205
209
  if env_host =~ Resolv::IPv6::Regex
206
210
  # Brackets are needed around IPv6 addresses
207
211
  env_host = "[#{env_host}]"
@@ -209,7 +213,7 @@ module Fluent::Plugin
209
213
  @kubernetes_url = "https://#{env_host}:#{env_port}/api"
210
214
  log.debug "Kubernetes URL is now '#{@kubernetes_url}'"
211
215
  else
212
- log.debug "No Kubernetes URL could be found in config or environ"
216
+ log.debug 'No Kubernetes URL could be found in config or environ'
213
217
  end
214
218
  end
215
219
 
@@ -219,24 +223,23 @@ module Fluent::Plugin
219
223
  ca_cert = File.join(@secret_dir, K8_POD_CA_CERT)
220
224
  pod_token = File.join(@secret_dir, K8_POD_TOKEN)
221
225
 
222
- if !@ca_file.present? and File.exist?(ca_cert)
226
+ if !present?(@ca_file) && File.exist?(ca_cert)
223
227
  log.debug "Found CA certificate: #{ca_cert}"
224
228
  @ca_file = ca_cert
225
229
  end
226
230
 
227
- if !@bearer_token_file.present? and File.exist?(pod_token)
231
+ if !present?(@bearer_token_file) && File.exist?(pod_token)
228
232
  log.debug "Found pod token: #{pod_token}"
229
233
  @bearer_token_file = pod_token
230
234
  end
231
235
  end
232
236
 
233
- if @kubernetes_url.present?
234
-
235
- ssl_options = {
236
- client_cert: @client_cert.present? ? OpenSSL::X509::Certificate.new(File.read(@client_cert)) : nil,
237
- client_key: @client_key.present? ? OpenSSL::PKey::RSA.new(File.read(@client_key)) : nil,
238
- ca_file: @ca_file,
239
- verify_ssl: @verify_ssl ? OpenSSL::SSL::VERIFY_PEER : OpenSSL::SSL::VERIFY_NONE
237
+ if present?(@kubernetes_url)
238
+ @ssl_options = {
239
+ client_cert: present?(@client_cert) ? OpenSSL::X509::Certificate.new(File.read(@client_cert)) : nil,
240
+ client_key: present?(@client_key) ? OpenSSL::PKey::RSA.new(File.read(@client_key)) : nil,
241
+ ca_file: @ca_file,
242
+ verify_ssl: @verify_ssl ? OpenSSL::SSL::VERIFY_PEER : OpenSSL::SSL::VERIFY_NONE
240
243
  }
241
244
 
242
245
  if @ssl_partial_chain
@@ -244,157 +247,133 @@ module Fluent::Plugin
244
247
  require 'openssl'
245
248
  ssl_store = OpenSSL::X509::Store.new
246
249
  ssl_store.set_default_paths
247
- if defined? OpenSSL::X509::V_FLAG_PARTIAL_CHAIN
248
- flagval = OpenSSL::X509::V_FLAG_PARTIAL_CHAIN
249
- else
250
- # this version of ruby does not define OpenSSL::X509::V_FLAG_PARTIAL_CHAIN
251
- flagval = 0x80000
252
- end
250
+ flagval = if defined? OpenSSL::X509::V_FLAG_PARTIAL_CHAIN
251
+ OpenSSL::X509::V_FLAG_PARTIAL_CHAIN
252
+ else
253
+ # this version of ruby does not define OpenSSL::X509::V_FLAG_PARTIAL_CHAIN
254
+ 0x80000
255
+ end
253
256
  ssl_store.flags = OpenSSL::X509::V_FLAG_CRL_CHECK_ALL | flagval
254
- ssl_options[:cert_store] = ssl_store
257
+ @ssl_options[:cert_store] = ssl_store
255
258
  end
256
259
 
257
- auth_options = {}
258
-
259
- if @bearer_token_file.present?
260
- bearer_token = File.read(@bearer_token_file)
261
- auth_options[:bearer_token] = bearer_token
260
+ if present?(@bearer_token_file)
261
+ @auth_options[:bearer_token_file] = @bearer_token_file
262
262
  end
263
263
 
264
- log.debug "Creating K8S client"
265
- @client = Kubeclient::Client.new @kubernetes_url, @apiVersion,
266
- ssl_options: ssl_options,
267
- auth_options: auth_options
264
+ create_client()
265
+
266
+ if @test_api_adapter
267
+ log.info "Extending client with test api adaper #{@test_api_adapter}"
268
+ require_relative @test_api_adapter.underscore
269
+ @client.extend(eval(@test_api_adapter))
270
+ end
268
271
 
269
272
  begin
270
273
  @client.api_valid?
271
- rescue KubeException => kube_error
272
- raise Fluent::ConfigError, "Invalid Kubernetes API #{@apiVersion} endpoint #{@kubernetes_url}: #{kube_error.message}"
274
+ rescue KubeException => e
275
+ raise Fluent::ConfigError, "Invalid Kubernetes API #{@apiVersion} endpoint #{@kubernetes_url}: #{e.message}"
273
276
  end
274
277
 
275
278
  if @watch
276
- pod_thread = Thread.new(self) { |this| this.set_up_pod_thread }
279
+ if ENV['K8S_NODE_NAME'].nil? || ENV['K8S_NODE_NAME'].strip.empty?
280
+ log.warn("!! The environment variable 'K8S_NODE_NAME' is not set to the node name which can affect the API server and watch efficiency !!")
281
+ end
282
+
283
+ pod_thread = Thread.new(self, &:set_up_pod_thread)
277
284
  pod_thread.abort_on_exception = true
278
285
 
279
- namespace_thread = Thread.new(self) { |this| this.set_up_namespace_thread }
286
+ namespace_thread = Thread.new(self, &:set_up_namespace_thread)
280
287
  namespace_thread.abort_on_exception = true
281
288
  end
282
289
  end
283
- @time_fields = []
284
- @time_fields.push('_SOURCE_REALTIME_TIMESTAMP', '__REALTIME_TIMESTAMP') if @use_journal || @use_journal.nil?
285
- @time_fields.push('time') unless @use_journal
286
- @time_fields.push('@timestamp') if @lookup_from_k8s_field
287
290
 
288
291
  @annotations_regexps = []
289
292
  @annotation_match.each do |regexp|
290
- begin
291
- @annotations_regexps << Regexp.compile(regexp)
292
- rescue RegexpError => e
293
- log.error "Error: invalid regular expression in annotation_match: #{e}"
294
- end
293
+ @annotations_regexps << Regexp.compile(regexp)
294
+ rescue RegexpError => e
295
+ log.error "Error: invalid regular expression in annotation_match: #{e}"
295
296
  end
297
+ end
296
298
 
299
+ def create_client()
300
+ log.debug 'Creating K8S client'
301
+ @client = nil
302
+ @client = Kubeclient::Client.new(
303
+ @kubernetes_url,
304
+ @apiVersion,
305
+ ssl_options: @ssl_options,
306
+ auth_options: @auth_options,
307
+ timeouts: {
308
+ open: @open_timeout,
309
+ read: @read_timeout
310
+ },
311
+ as: :parsed_symbolized
312
+ )
297
313
  end
298
314
 
299
- def get_metadata_for_record(namespace_name, pod_name, container_name, container_id, create_time, batch_miss_cache)
315
+ def get_metadata_for_record(namespace_name, pod_name, container_name, cache_key, create_time, batch_miss_cache, docker_id)
300
316
  metadata = {
301
- 'docker' => {'container_id' => container_id},
317
+ 'docker' => { 'container_id' => "" },
302
318
  'kubernetes' => {
303
- 'container_name' => container_name,
304
- 'namespace_name' => namespace_name,
305
- 'pod_name' => pod_name
319
+ 'container_name' => container_name,
320
+ 'namespace_name' => namespace_name,
321
+ 'pod_name' => pod_name
306
322
  }
307
323
  }
308
- if @kubernetes_url.present?
309
- pod_metadata = get_pod_metadata(container_id, namespace_name, pod_name, create_time, batch_miss_cache)
310
-
311
- if (pod_metadata.include? 'containers') && (pod_metadata['containers'].include? container_id) && !@skip_container_metadata
312
- metadata['kubernetes']['container_image'] = pod_metadata['containers'][container_id]['image']
313
- metadata['kubernetes']['container_image_id'] = pod_metadata['containers'][container_id]['image_id']
324
+ metadata['docker']['container_id'] = docker_id unless docker_id.nil?
325
+ container_cache_key = container_name
326
+ if present?(@kubernetes_url)
327
+ pod_metadata = get_pod_metadata(cache_key, namespace_name, pod_name, create_time, batch_miss_cache)
328
+ if (pod_metadata.include? 'containers') && (pod_metadata['containers'].include? container_cache_key) && !@skip_container_metadata
329
+ metadata['kubernetes']['container_image'] = pod_metadata['containers'][container_cache_key]['image']
330
+ metadata['kubernetes']['container_image_id'] = pod_metadata['containers'][container_cache_key]['image_id'] unless pod_metadata['containers'][container_cache_key]['image_id'].empty?
331
+ metadata['docker']['container_id'] = pod_metadata['containers'][container_cache_key]['containerID'] unless pod_metadata['containers'][container_cache_key]['containerID'].empty?
314
332
  end
315
333
 
316
334
  metadata['kubernetes'].merge!(pod_metadata) if pod_metadata
317
335
  metadata['kubernetes'].delete('containers')
318
336
  end
319
- metadata
320
- end
321
-
322
- def create_time_from_record(record, internal_time)
323
- time_key = @time_fields.detect{ |ii| record.has_key?(ii) }
324
- time = record[time_key]
325
- if time.nil? || time.chop.empty?
326
- # `internal_time` is a Fluent::EventTime, it can't compare with Time.
327
- return Time.at(internal_time.to_f)
328
- end
329
- if ['_SOURCE_REALTIME_TIMESTAMP', '__REALTIME_TIMESTAMP'].include?(time_key)
330
- timei= time.to_i
331
- return Time.at(timei / 1000000, timei % 1000000)
337
+ metadata['kubernetes'].tap do |kube|
338
+ kube.each_pair do |k,v|
339
+ kube[k.dup] = v.dup
340
+ end
332
341
  end
333
- return Time.parse(time)
342
+ metadata.delete('docker') if metadata['docker'] && (metadata['docker']['container_id'].nil? || metadata['docker']['container_id'].empty?)
343
+ metadata
334
344
  end
335
345
 
336
- def filter_stream(tag, es)
337
- return es if (es.respond_to?(:empty?) && es.empty?) || !es.is_a?(Fluent::EventStream)
338
- new_es = Fluent::MultiEventStream.new
339
- tag_match_data = tag.match(@tag_to_kubernetes_name_regexp_compiled) unless @use_journal
340
- tag_metadata = nil
346
+ def filter(tag, time, record)
347
+ tag_match_data = tag.match(@tag_to_kubernetes_name_regexp_compiled)
341
348
  batch_miss_cache = {}
342
- es.each do |time, record|
343
- if tag_match_data && tag_metadata.nil?
344
- tag_metadata = get_metadata_for_record(tag_match_data['namespace'], tag_match_data['pod_name'], tag_match_data['container_name'],
345
- tag_match_data['docker_id'], create_time_from_record(record, time), batch_miss_cache)
346
- end
347
- metadata = Marshal.load(Marshal.dump(tag_metadata)) if tag_metadata
348
- if (@use_journal || @use_journal.nil?) &&
349
- (j_metadata = get_metadata_for_journal_record(record, time, batch_miss_cache))
350
- metadata = j_metadata
351
- end
352
- if @lookup_from_k8s_field && record.has_key?('kubernetes') && record.has_key?('docker') &&
349
+ if tag_match_data
350
+ cache_key = if tag_match_data.names.include?('pod_uuid') && !tag_match_data['pod_uuid'].nil?
351
+ tag_match_data['pod_uuid']
352
+ else
353
+ tag_match_data['docker_id']
354
+ end
355
+ docker_id = tag_match_data.names.include?('docker_id') ? tag_match_data['docker_id'] : nil
356
+ metadata = get_metadata_for_record(tag_match_data['namespace'], tag_match_data['pod_name'], tag_match_data['container_name'],
357
+ cache_key, time, batch_miss_cache, docker_id)
358
+ end
359
+ if @lookup_from_k8s_field && record.key?('kubernetes') && record.key?('docker') &&
353
360
  record['kubernetes'].respond_to?(:has_key?) && record['docker'].respond_to?(:has_key?) &&
354
- record['kubernetes'].has_key?('namespace_name') &&
355
- record['kubernetes'].has_key?('pod_name') &&
356
- record['kubernetes'].has_key?('container_name') &&
357
- record['docker'].has_key?('container_id') &&
361
+ record['kubernetes'].key?('namespace_name') &&
362
+ record['kubernetes'].key?('pod_name') &&
363
+ record['kubernetes'].key?('container_name') &&
364
+ record['docker'].key?('container_id') &&
358
365
  (k_metadata = get_metadata_for_record(record['kubernetes']['namespace_name'], record['kubernetes']['pod_name'],
359
- record['kubernetes']['container_name'], record['docker']['container_id'],
360
- create_time_from_record(record, time), batch_miss_cache))
361
- metadata = k_metadata
362
- end
363
-
364
- record = record.merge(metadata) if metadata
365
- new_es.add(time, record)
366
+ record['kubernetes']['container_name'], record['docker']['container_id'],
367
+ time, batch_miss_cache, record['docker']['container_id']))
368
+ metadata = k_metadata
366
369
  end
367
370
  dump_stats
368
- new_es
369
- end
370
-
371
- def get_metadata_for_journal_record(record, time, batch_miss_cache)
372
- metadata = nil
373
- if record.has_key?('CONTAINER_NAME') && record.has_key?('CONTAINER_ID_FULL')
374
- metadata = record['CONTAINER_NAME'].match(@container_name_to_kubernetes_regexp_compiled) do |match_data|
375
- get_metadata_for_record(match_data['namespace'], match_data['pod_name'], match_data['container_name'],
376
- record['CONTAINER_ID_FULL'], create_time_from_record(record, time), batch_miss_cache)
377
- end
378
- unless metadata
379
- log.debug "Error: could not match CONTAINER_NAME from record #{record}"
380
- @stats.bump(:container_name_match_failed)
381
- end
382
- elsif record.has_key?('CONTAINER_NAME') && record['CONTAINER_NAME'].start_with?('k8s_')
383
- log.debug "Error: no container name and id in record #{record}"
384
- @stats.bump(:container_name_id_missing)
385
- end
386
- metadata
371
+ metadata ? record.merge(metadata) : record
387
372
  end
388
373
 
389
- def de_dot!(h)
390
- h.keys.each do |ref|
391
- if h[ref] && ref =~ /\./
392
- v = h.delete(ref)
393
- newref = ref.to_s.gsub('.', @de_dot_separator)
394
- h[newref] = v
395
- end
396
- end
374
+ # copied from activesupport
375
+ def present?(object)
376
+ object.respond_to?(:empty?) ? !object.empty? : !!object
397
377
  end
398
-
399
378
  end
400
379
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  #
2
4
  # Fluentd Kubernetes Metadata Filter Plugin - Enrich Fluentd events with
3
5
  # Kubernetes metadata
@@ -18,34 +20,21 @@
18
20
  #
19
21
  module KubernetesMetadata
20
22
  module CacheStrategy
21
- def get_pod_metadata(key, namespace_name, pod_name, record_create_time, batch_miss_cache)
23
+ def get_pod_metadata(key, namespace_name, pod_name, time, batch_miss_cache)
22
24
  metadata = {}
23
25
  ids = @id_cache[key]
24
- if !ids.nil?
25
- # FAST PATH
26
- # Cache hit, fetch metadata from the cache
27
- metadata = @cache.fetch(ids[:pod_id]) do
28
- @stats.bump(:pod_cache_miss)
29
- m = fetch_pod_metadata(namespace_name, pod_name)
30
- (m.nil? || m.empty?) ? {'pod_id'=>ids[:pod_id]} : m
31
- end
32
- metadata.merge!(@namespace_cache.fetch(ids[:namespace_id]) do
33
- @stats.bump(:namespace_cache_miss)
34
- m = fetch_namespace_metadata(namespace_name) unless @skip_namespace_metadata
35
- (m.nil? || m.empty?) ? {'namespace_id'=>ids[:namespace_id]} : m
36
- end)
37
- else
38
- # SLOW PATH
26
+ if ids.nil?
39
27
  @stats.bump(:id_cache_miss)
40
28
  return batch_miss_cache["#{namespace_name}_#{pod_name}"] if batch_miss_cache.key?("#{namespace_name}_#{pod_name}")
29
+
41
30
  pod_metadata = fetch_pod_metadata(namespace_name, pod_name)
42
31
  if @skip_namespace_metadata
43
- ids = { :pod_id=> pod_metadata['pod_id'] }
32
+ ids = { pod_id: pod_metadata['pod_id'] }
44
33
  @id_cache[key] = ids
45
34
  return pod_metadata
46
35
  end
47
36
  namespace_metadata = fetch_namespace_metadata(namespace_name)
48
- ids = { :pod_id=> pod_metadata['pod_id'], :namespace_id => namespace_metadata['namespace_id'] }
37
+ ids = { pod_id: pod_metadata['pod_id'], namespace_id: namespace_metadata['namespace_id'] }
49
38
  if !ids[:pod_id].nil? && !ids[:namespace_id].nil?
50
39
  # pod found and namespace found
51
40
  metadata = pod_metadata
@@ -55,11 +44,11 @@ module KubernetesMetadata
55
44
  # pod not found, but namespace found
56
45
  @stats.bump(:id_cache_pod_not_found_namespace)
57
46
  ns_time = Time.parse(namespace_metadata['creation_timestamp'])
58
- if ns_time <= record_create_time
47
+ if ns_time <= Time.at(time.to_f)
59
48
  # namespace is older then record for pod
60
49
  ids[:pod_id] = key
61
50
  metadata = @cache.fetch(ids[:pod_id]) do
62
- m = { 'pod_id' => ids[:pod_id] }
51
+ { 'pod_id' => ids[:pod_id] }
63
52
  end
64
53
  end
65
54
  metadata.merge!(namespace_metadata)
@@ -74,7 +63,7 @@ module KubernetesMetadata
74
63
  @stats.bump(:id_cache_orphaned_record)
75
64
  end
76
65
  if @allow_orphans
77
- log.trace("orphaning message for: #{namespace_name}/#{pod_name} ") if log.trace?
66
+ log.trace("orphaning message for: #{namespace_name}/#{pod_name} ")
78
67
  metadata = {
79
68
  'orphaned_namespace' => namespace_name,
80
69
  'namespace_name' => @orphaned_namespace_name,
@@ -87,12 +76,25 @@ module KubernetesMetadata
87
76
  end
88
77
  end
89
78
  @id_cache[key] = ids unless batch_miss_cache.key?("#{namespace_name}_#{pod_name}")
79
+ else
80
+ # SLOW PATH
81
+ metadata = @cache.fetch(ids[:pod_id]) do
82
+ @stats.bump(:pod_cache_miss)
83
+ m = fetch_pod_metadata(namespace_name, pod_name)
84
+ m.nil? || m.empty? ? { 'pod_id' => ids[:pod_id] } : m
85
+ end
86
+ metadata.merge!(@namespace_cache.fetch(ids[:namespace_id]) do
87
+ m = unless @skip_namespace_metadata
88
+ @stats.bump(:namespace_cache_miss)
89
+ fetch_namespace_metadata(namespace_name)
90
+ end
91
+ m.nil? || m.empty? ? { 'namespace_id' => ids[:namespace_id] } : m
92
+ end)
90
93
  end
91
94
 
92
95
  # remove namespace info that is only used for comparison
93
96
  metadata.delete('creation_timestamp')
94
- metadata.delete_if{|k,v| v.nil?}
97
+ metadata.delete_if { |_k, v| v.nil? }
95
98
  end
96
-
97
99
  end
98
100
  end