fluent-plugin-kubernetes_metadata_filter_splunk 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. checksums.yaml +7 -0
  2. data/.circleci/config.yml +56 -0
  3. data/.gitignore +20 -0
  4. data/Gemfile +7 -0
  5. data/LICENSE.txt +201 -0
  6. data/README.md +209 -0
  7. data/Rakefile +37 -0
  8. data/fluent-plugin-kubernetes_metadata_filter.gemspec +36 -0
  9. data/lib/fluent/plugin/filter_kubernetes_metadata.rb +446 -0
  10. data/lib/fluent/plugin/kubernetes_metadata_cache_strategy.rb +98 -0
  11. data/lib/fluent/plugin/kubernetes_metadata_common.rb +113 -0
  12. data/lib/fluent/plugin/kubernetes_metadata_stats.rb +46 -0
  13. data/lib/fluent/plugin/kubernetes_metadata_watch_namespaces.rb +65 -0
  14. data/lib/fluent/plugin/kubernetes_metadata_watch_pods.rb +68 -0
  15. data/test/cassettes/invalid_api_server_config.yml +53 -0
  16. data/test/cassettes/kubernetes_docker_metadata.yml +228 -0
  17. data/test/cassettes/kubernetes_docker_metadata_annotations.yml +239 -0
  18. data/test/cassettes/kubernetes_docker_metadata_dotted_labels.yml +231 -0
  19. data/test/cassettes/kubernetes_docker_metadata_using_bearer_token.yml +248 -0
  20. data/test/cassettes/kubernetes_get_api_v1.yml +193 -0
  21. data/test/cassettes/kubernetes_get_api_v1_using_token.yml +195 -0
  22. data/test/cassettes/kubernetes_get_namespace_default.yml +69 -0
  23. data/test/cassettes/kubernetes_get_namespace_default_using_token.yml +71 -0
  24. data/test/cassettes/kubernetes_get_pod.yml +146 -0
  25. data/test/cassettes/kubernetes_get_pod_using_token.yml +148 -0
  26. data/test/cassettes/metadata_from_tag_and_journald_fields.yml +408 -0
  27. data/test/cassettes/metadata_from_tag_journald_and_kubernetes_fields.yml +540 -0
  28. data/test/cassettes/metadata_with_namespace_id.yml +276 -0
  29. data/test/cassettes/non_kubernetes_docker_metadata.yml +97 -0
  30. data/test/cassettes/valid_kubernetes_api_server.yml +55 -0
  31. data/test/cassettes/valid_kubernetes_api_server_using_token.yml +57 -0
  32. data/test/helper.rb +64 -0
  33. data/test/plugin/test.token +1 -0
  34. data/test/plugin/test_cache_stats.rb +36 -0
  35. data/test/plugin/test_cache_strategy.rb +196 -0
  36. data/test/plugin/test_filter_kubernetes_metadata.rb +970 -0
  37. data/test/plugin/test_watch_namespaces.rb +91 -0
  38. data/test/plugin/test_watch_pods.rb +145 -0
  39. data/test/plugin/watch_test.rb +57 -0
  40. metadata +295 -0
@@ -0,0 +1,37 @@
1
+ require 'bundler/gem_tasks'
2
+ require 'rake/testtask'
3
+ require 'bump/tasks'
4
+
5
+ task :test => [:base_test]
6
+
7
+ task :default => [:test, :build]
8
+
9
+ desc 'Run test_unit based test'
10
+ Rake::TestTask.new(:base_test) do |t|
11
+ # To run test for only one file (or file path pattern)
12
+ # $ bundle exec rake base_test TEST=test/test_specified_path.rb
13
+ # $ bundle exec rake base_test TEST=test/test_*.rb
14
+ t.libs << 'test'
15
+ t.test_files = Dir['test/**/test_*.rb'].sort
16
+ #t.verbose = true
17
+ t.warning = false
18
+ end
19
+
20
+ desc 'Add copyright headers'
21
+ task :headers do
22
+ require 'rubygems'
23
+ require 'copyright_header'
24
+
25
+ args = {
26
+ :license => 'Apache-2.0',
27
+ :copyright_software => 'Fluentd Kubernetes Metadata Filter Plugin',
28
+ :copyright_software_description => 'Enrich Fluentd events with Kubernetes metadata',
29
+ :copyright_holders => ['Red Hat, Inc.'],
30
+ :copyright_years => ['2015-2017'],
31
+ :add_path => 'lib:test',
32
+ :output_dir => '.'
33
+ }
34
+
35
+ command_line = CopyrightHeader::CommandLine.new( args )
36
+ command_line.execute
37
+ end
@@ -0,0 +1,36 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+
5
+ Gem::Specification.new do |gem|
6
+ gem.name = "fluent-plugin-kubernetes_metadata_filter_splunk"
7
+ gem.version = "2.2.0"
8
+ gem.authors = ["Rock Baek"]
9
+ gem.email = ["rockb1017@gmail.com"]
10
+ gem.description = %q{Filter plugin to add Kubernetes metadata with custom caching algorithm by Cisco}
11
+ gem.summary = %q{Fluentd filter plugin to add Kubernetes metadata}
12
+ gem.homepage = ""
13
+ gem.license = "Apache-2.0"
14
+
15
+ gem.files = `git ls-files`.split($/)
16
+ gem.executables = gem.files.grep(%r{^bin/}) { |f| File.basename(f) }
17
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
18
+ gem.require_paths = ["lib"]
19
+
20
+ gem.required_ruby_version = '>= 2.1.0'
21
+
22
+ gem.add_runtime_dependency 'fluentd', ['>= 0.14.0', '< 2']
23
+ gem.add_runtime_dependency "lru_redux"
24
+ gem.add_runtime_dependency "kubeclient", "~> 1.1.4"
25
+
26
+ gem.add_development_dependency "bundler", "~> 1.3"
27
+ gem.add_development_dependency "rake"
28
+ gem.add_development_dependency "minitest", "~> 4.0"
29
+ gem.add_development_dependency "test-unit", "~> 3.0.2"
30
+ gem.add_development_dependency "test-unit-rr", "~> 1.0.3"
31
+ gem.add_development_dependency "copyright-header"
32
+ gem.add_development_dependency "webmock"
33
+ gem.add_development_dependency "vcr"
34
+ gem.add_development_dependency "bump"
35
+ gem.add_development_dependency "yajl-ruby"
36
+ end
@@ -0,0 +1,446 @@
1
+ #
2
+ # Fluentd Kubernetes Metadata Filter Plugin - Enrich Fluentd events with
3
+ # Kubernetes metadata
4
+ #
5
+ # Copyright 2017 Red Hat, Inc.
6
+ #
7
+ # Licensed under the Apache License, Version 2.0 (the "License");
8
+ # you may not use this file except in compliance with the License.
9
+ # You may obtain a copy of the License at
10
+ #
11
+ # http://www.apache.org/licenses/LICENSE-2.0
12
+ #
13
+ # Unless required by applicable law or agreed to in writing, software
14
+ # distributed under the License is distributed on an "AS IS" BASIS,
15
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
+ # See the License for the specific language governing permissions and
17
+ # limitations under the License.
18
+ #
19
+
20
+ require_relative 'kubernetes_metadata_cache_strategy'
21
+ require_relative 'kubernetes_metadata_common'
22
+ require_relative 'kubernetes_metadata_stats'
23
+ require_relative 'kubernetes_metadata_watch_namespaces'
24
+ require_relative 'kubernetes_metadata_watch_pods'
25
+
26
+ require 'fluent/plugin/filter'
27
+
28
+ module Fluent::Plugin
29
+ class KubernetesMetadataFilter < Fluent::Plugin::Filter
30
+ @pod_mapping
31
+ K8_POD_CA_CERT = 'ca.crt'
32
+ K8_POD_TOKEN = 'token'
33
+
34
+ include KubernetesMetadata::CacheStrategy
35
+ include KubernetesMetadata::Common
36
+ include KubernetesMetadata::WatchNamespaces
37
+ include KubernetesMetadata::WatchPods
38
+
39
+ Fluent::Plugin.register_filter('kubernetes_metadata', self)
40
+
41
+ config_param :kubernetes_url, :string, default: nil
42
+ config_param :cache_size, :integer, default: 1000
43
+ config_param :cache_ttl, :integer, default: 60 * 60
44
+ config_param :watch, :bool, default: true
45
+ config_param :apiVersion, :string, default: 'v1'
46
+ config_param :client_cert, :string, default: nil
47
+ config_param :client_key, :string, default: nil
48
+ config_param :ca_file, :string, default: nil
49
+ config_param :verify_ssl, :bool, default: true
50
+ config_param :tag_to_kubernetes_name_regexp,
51
+ :string,
52
+ :default => 'var\.log\.containers\.(?<pod_name>[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*)_(?<namespace>[^_]+)_(?<container_name>.+)-(?<docker_id>[a-z0-9]{64})\.log$'
53
+ config_param :bearer_token_file, :string, default: nil
54
+ config_param :secret_dir, :string, default: '/var/run/secrets/kubernetes.io/serviceaccount'
55
+ config_param :de_dot, :bool, default: true
56
+ config_param :de_dot_separator, :string, default: '_'
57
+ # if reading from the journal, the record will contain the following fields in the following
58
+ # format:
59
+ # CONTAINER_NAME=k8s_$containername.$containerhash_$podname_$namespacename_$poduuid_$rand32bitashex
60
+ # CONTAINER_FULL_ID=dockeridassha256hexvalue
61
+ config_param :use_journal, :bool, default: nil
62
+ # Field 2 is the container_hash, field 5 is the pod_id, and field 6 is the pod_randhex
63
+ # I would have included them as named groups, but you can't have named groups that are
64
+ # non-capturing :P
65
+ # parse format is defined here: https://github.com/kubernetes/kubernetes/blob/master/pkg/kubelet/dockertools/docker.go#L317
66
+ config_param :container_name_to_kubernetes_regexp,
67
+ :string,
68
+ :default => '^(?<name_prefix>[^_]+)_(?<container_name>[^\._]+)(\.(?<container_hash>[^_]+))?_(?<pod_name>[^_]+)_(?<namespace>[^_]+)_[^_]+_[^_]+$'
69
+
70
+ config_param :annotation_match, :array, default: []
71
+ config_param :stats_interval, :integer, default: 30
72
+ config_param :allow_orphans, :bool, default: true
73
+ config_param :orphaned_namespace_name, :string, default: '.orphaned'
74
+ config_param :orphaned_namespace_id, :string, default: 'orphaned'
75
+ config_param :lookup_from_k8s_field, :bool, default: true
76
+ # if `ca_file` is for an intermediate CA, or otherwise we do not have the root CA and want
77
+ # to trust the intermediate CA certs we do have, set this to `true` - this corresponds to
78
+ # the openssl s_client -partial_chain flag and X509_V_FLAG_PARTIAL_CHAIN
79
+ config_param :ssl_partial_chain, :bool, default: false
80
+ config_param :skip_labels, :bool, default: false
81
+ config_param :skip_container_metadata, :bool, default: false
82
+ config_param :skip_master_url, :bool, default: false
83
+ config_param :skip_namespace_metadata, :bool, default: false
84
+
85
+ def fetch_pod_metadata(namespace_name, pod_name)
86
+ log.trace("fetching pod metadata: #{namespace_name}/#{pod_name}") if log.trace?
87
+ begin
88
+ metadata = @client.get_pod(pod_name, namespace_name)
89
+ unless metadata
90
+ log.trace("no metadata returned for: #{namespace_name}/#{pod_name}") if log.trace?
91
+ @stats.bump(:pod_cache_api_nil_not_found)
92
+ else
93
+ begin
94
+ log.trace("raw metadata for #{namespace_name}/#{pod_name}: #{metadata}") if log.trace?
95
+ metadata = parse_pod_metadata(metadata)
96
+ @stats.bump(:pod_cache_api_updates)
97
+ log.trace("parsed metadata for #{namespace_name}/#{pod_name}: #{metadata}") if log.trace?
98
+ @cache[metadata['pod_id']] = metadata
99
+ return metadata
100
+ rescue Exception=>e
101
+ log.debug(e)
102
+ @stats.bump(:pod_cache_api_nil_bad_resp_payload)
103
+ log.trace("returning empty metadata for #{namespace_name}/#{pod_name} due to error '#{e}'") if log.trace?
104
+ end
105
+ end
106
+ rescue Exception=>e
107
+ @stats.bump(:pod_cache_api_nil_error)
108
+ log.debug "Exception '#{e}' encountered fetching pod metadata from Kubernetes API #{@apiVersion} endpoint #{@kubernetes_url}"
109
+ end
110
+ {}
111
+ end
112
+
113
+ def dump_stats
114
+ @curr_time = Time.now
115
+ return if @curr_time.to_i - @prev_time.to_i < @stats_interval
116
+ @prev_time = @curr_time
117
+ @stats.set(:pod_cache_size, @cache.count)
118
+ @stats.set(:namespace_cache_size, @namespace_cache.count) if @namespace_cache
119
+ log.info(@stats)
120
+ if log.level == Fluent::Log::LEVEL_TRACE
121
+ log.trace(" id cache: #{@id_cache.to_a}")
122
+ log.trace(" pod cache: #{@cache.to_a}")
123
+ log.trace("namespace cache: #{@namespace_cache.to_a}")
124
+ end
125
+ end
126
+
127
+ def fetch_namespace_metadata(namespace_name)
128
+ log.trace("fetching namespace metadata: #{namespace_name}") if log.trace?
129
+ begin
130
+ metadata = @client.get_namespace(namespace_name)
131
+ unless metadata
132
+ log.trace("no metadata returned for: #{namespace_name}") if log.trace?
133
+ @stats.bump(:namespace_cache_api_nil_not_found)
134
+ else
135
+ begin
136
+ log.trace("raw metadata for #{namespace_name}: #{metadata}") if log.trace?
137
+ metadata = parse_namespace_metadata(metadata)
138
+ @stats.bump(:namespace_cache_api_updates)
139
+ log.trace("parsed metadata for #{namespace_name}: #{metadata}") if log.trace?
140
+ @namespace_cache[metadata['namespace_id']] = metadata
141
+ return metadata
142
+ rescue Exception => e
143
+ log.debug(e)
144
+ @stats.bump(:namespace_cache_api_nil_bad_resp_payload)
145
+ log.trace("returning empty metadata for #{namespace_name} due to error '#{e}'") if log.trace?
146
+ end
147
+ end
148
+ rescue Exception => kube_error
149
+ @stats.bump(:namespace_cache_api_nil_error)
150
+ log.debug "Exception '#{kube_error}' encountered fetching namespace metadata from Kubernetes API #{@apiVersion} endpoint #{@kubernetes_url}"
151
+ end
152
+ {}
153
+ end
154
+
155
+ def initialize
156
+ super
157
+ @prev_time = Time.now
158
+ end
159
+
160
+ def configure(conf)
161
+ super
162
+
163
+ def log.trace?
164
+ level == Fluent::Log::LEVEL_TRACE
165
+ end
166
+
167
+ require 'kubeclient'
168
+ require 'active_support/core_ext/object/blank'
169
+ require 'lru_redux'
170
+ @stats = KubernetesMetadata::Stats.new
171
+
172
+ if @de_dot && (@de_dot_separator =~ /\./).present?
173
+ raise Fluent::ConfigError, "Invalid de_dot_separator: cannot be or contain '.'"
174
+ end
175
+
176
+ if @cache_ttl < 0
177
+ log.info "Setting the cache TTL to :none because it was <= 0"
178
+ @cache_ttl = :none
179
+ end
180
+
181
+ # Caches pod/namespace UID tuples for a given container UID.
182
+ @id_cache = LruRedux::TTL::ThreadSafeCache.new(@cache_size, @cache_ttl)
183
+
184
+ # Use the container UID as the key to fetch a hash containing pod metadata
185
+ @cache = LruRedux::TTL::ThreadSafeCache.new(@cache_size, @cache_ttl)
186
+
187
+ # Use the namespace UID as the key to fetch a hash containing namespace metadata
188
+ @namespace_cache = LruRedux::TTL::ThreadSafeCache.new(@cache_size, @cache_ttl)
189
+
190
+ @tag_to_kubernetes_name_regexp_compiled = Regexp.compile(@tag_to_kubernetes_name_regexp)
191
+ @tag_to_customlog_kubernetes_name_regexp_compiled = Regexp.compile('^tail\.(?<tag_prefix>[^\.]+)\.var\.lib\.origin\.openshift\.local\.volumes\.pods\.(?<pod_uuid>[-a-z0-9]+)\.volumes\.kubernetes.io~empty-dir\.(?<volume_name>[^\.]+)\.(?<file_name>.*)\.log$')
192
+ @customlog_string_to_kubernetes_compiled = Regexp.compile('^(?<namespace>[^[:space:]]+)[[:space:]]+(?<pod_name>[^[:space:]]+)[[:space:]]+(?<container_name>[^[:space:]]+)[[:space:]]+(?<docker_id>[^[:space:]]+)$')
193
+ @pod_log_filename_to_uuid_regexp_compiled = Regexp.compile('^\/var\/log\/pods\/(?<pod_uuid>[-a-z0-9]*)\/.*\/.*\.log$')
194
+ @container_filename_to_uuid_regexp_compiled = Regexp.compile('^\/var\/log\/containers\/(?<pod_name>[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*)_(?<namespace>[^_]+)_(?<container_name>.+)-(?<docker_id>[a-z0-9]{64})\.log$')
195
+ @container_name_to_kubernetes_regexp_compiled = Regexp.compile(@container_name_to_kubernetes_regexp)
196
+
197
+ # Use Kubernetes default service account if we're in a pod.
198
+ if @kubernetes_url.nil?
199
+ log.debug "Kubernetes URL is not set - inspecting environ"
200
+
201
+ env_host = ENV['KUBERNETES_SERVICE_HOST']
202
+ env_port = ENV['KUBERNETES_SERVICE_PORT']
203
+ if env_host.present? && env_port.present?
204
+ @kubernetes_url = "https://#{env_host}:#{env_port}/api"
205
+ log.debug "Kubernetes URL is now '#{@kubernetes_url}'"
206
+ end
207
+ end
208
+
209
+ # Use SSL certificate and bearer token from Kubernetes service account.
210
+ if Dir.exist?(@secret_dir)
211
+ log.debug "Found directory with secrets: #{@secret_dir}"
212
+ ca_cert = File.join(@secret_dir, K8_POD_CA_CERT)
213
+ pod_token = File.join(@secret_dir, K8_POD_TOKEN)
214
+
215
+ if !@ca_file.present? and File.exist?(ca_cert)
216
+ log.debug "Found CA certificate: #{ca_cert}"
217
+ @ca_file = ca_cert
218
+ end
219
+
220
+ if !@bearer_token_file.present? and File.exist?(pod_token)
221
+ log.debug "Found pod token: #{pod_token}"
222
+ @bearer_token_file = pod_token
223
+ end
224
+ end
225
+
226
+ if @kubernetes_url.present?
227
+
228
+ ssl_options = {
229
+ client_cert: @client_cert.present? ? OpenSSL::X509::Certificate.new(File.read(@client_cert)) : nil,
230
+ client_key: @client_key.present? ? OpenSSL::PKey::RSA.new(File.read(@client_key)) : nil,
231
+ ca_file: @ca_file,
232
+ verify_ssl: @verify_ssl ? OpenSSL::SSL::VERIFY_PEER : OpenSSL::SSL::VERIFY_NONE
233
+ }
234
+
235
+ if @ssl_partial_chain
236
+ # taken from the ssl.rb OpenSSL::SSL::SSLContext code for DEFAULT_CERT_STORE
237
+ require 'openssl'
238
+ ssl_store = OpenSSL::X509::Store.new
239
+ ssl_store.set_default_paths
240
+ if defined? OpenSSL::X509::V_FLAG_PARTIAL_CHAIN
241
+ flagval = OpenSSL::X509::V_FLAG_PARTIAL_CHAIN
242
+ else
243
+ # this version of ruby does not define OpenSSL::X509::V_FLAG_PARTIAL_CHAIN
244
+ flagval = 0x80000
245
+ end
246
+ ssl_store.flags = OpenSSL::X509::V_FLAG_CRL_CHECK_ALL | flagval
247
+ ssl_options[:cert_store] = ssl_store
248
+ end
249
+
250
+ auth_options = {}
251
+
252
+ if @bearer_token_file.present?
253
+ bearer_token = File.read(@bearer_token_file)
254
+ auth_options[:bearer_token] = bearer_token
255
+ end
256
+
257
+ log.debug "Creating K8S client"
258
+ @client = Kubeclient::Client.new @kubernetes_url, @apiVersion,
259
+ ssl_options: ssl_options,
260
+ auth_options: auth_options
261
+
262
+ begin
263
+ @client.api_valid?
264
+ rescue KubeException => kube_error
265
+ raise Fluent::ConfigError, "Invalid Kubernetes API #{@apiVersion} endpoint #{@kubernetes_url}: #{kube_error.message}"
266
+ end
267
+
268
+ if @watch
269
+ thread = Thread.new(self) { |this| this.start_pod_watch }
270
+ thread.abort_on_exception = true
271
+ namespace_thread = Thread.new(self) { |this| this.start_namespace_watch }
272
+ namespace_thread.abort_on_exception = true
273
+ end
274
+ end
275
+ @time_fields = []
276
+ @time_fields.push('_SOURCE_REALTIME_TIMESTAMP', '__REALTIME_TIMESTAMP') if @use_journal || @use_journal.nil?
277
+ @time_fields.push('time') unless @use_journal
278
+ @time_fields.push('@timestamp') if @lookup_from_k8s_field
279
+
280
+ @annotations_regexps = []
281
+ @annotation_match.each do |regexp|
282
+ begin
283
+ @annotations_regexps << Regexp.compile(regexp)
284
+ rescue RegexpError => e
285
+ log.error "Error: invalid regular expression in annotation_match: #{e}"
286
+ end
287
+ end
288
+
289
+ @pod_mapping = self.generate_map()
290
+ end
291
+
292
+ def get_metadata_for_record(namespace_name, pod_name, container_name, container_id, create_time, batch_miss_cache)
293
+ metadata = {
294
+ 'docker' => {'container_id' => container_id},
295
+ 'kubernetes' => {
296
+ 'container_name' => container_name,
297
+ 'namespace_name' => namespace_name,
298
+ 'pod_name' => pod_name
299
+ }
300
+ }
301
+ if @kubernetes_url.present?
302
+ pod_metadata = get_pod_metadata(container_id, namespace_name, pod_name, create_time, batch_miss_cache)
303
+
304
+ if (pod_metadata.include? 'containers') && (pod_metadata['containers'].include? container_id) && !@skip_container_metadata
305
+ metadata['kubernetes']['container_image'] = pod_metadata['containers'][container_id]['image']
306
+ metadata['kubernetes']['container_image_id'] = pod_metadata['containers'][container_id]['image_id']
307
+ end
308
+
309
+ metadata['kubernetes'].merge!(pod_metadata) if pod_metadata
310
+ metadata['kubernetes'].delete('containers')
311
+ end
312
+ metadata
313
+ end
314
+
315
+ def create_time_from_record(record, internal_time)
316
+ time_key = @time_fields.detect{ |ii| record.has_key?(ii) }
317
+ time = record[time_key]
318
+ if time.nil? || time.chop.empty?
319
+ # `internal_time` is a Fluent::EventTime, it can't compare with Time.
320
+ return Time.at(internal_time.to_f)
321
+ end
322
+ if ['_SOURCE_REALTIME_TIMESTAMP', '__REALTIME_TIMESTAMP'].include?(time_key)
323
+ timei= time.to_i
324
+ return Time.at(timei / 1000000, timei % 1000000)
325
+ end
326
+ return Time.parse(time)
327
+ end
328
+
329
+ def filter_stream(tag, es)
330
+ return es if (es.respond_to?(:empty?) && es.empty?) || !es.is_a?(Fluent::EventStream)
331
+ new_es = Fluent::MultiEventStream.new
332
+ tag_match_data = tag.match(@tag_to_kubernetes_name_regexp_compiled) unless @use_journal
333
+ tag_metadata = nil
334
+ batch_miss_cache = {}
335
+ unless tag_match_data
336
+ customlog_vol, customlog_file, tag_match_data = filter_customlog(tag)
337
+ end
338
+ es.each do |time, record|
339
+ if tag_match_data && tag_metadata.nil?
340
+ tag_metadata = get_metadata_for_record(tag_match_data['namespace'], tag_match_data['pod_name'], tag_match_data['container_name'],
341
+ tag_match_data['docker_id'], create_time_from_record(record, time), batch_miss_cache)
342
+ end
343
+ metadata = Marshal.load(Marshal.dump(tag_metadata)) if tag_metadata
344
+ if (@use_journal || @use_journal.nil?) &&
345
+ (j_metadata = get_metadata_for_journal_record(record, time, batch_miss_cache))
346
+ metadata = j_metadata
347
+ end
348
+ if @lookup_from_k8s_field && record.has_key?('kubernetes') && record.has_key?('docker') &&
349
+ record['kubernetes'].respond_to?(:has_key?) && record['docker'].respond_to?(:has_key?) &&
350
+ record['kubernetes'].has_key?('namespace_name') &&
351
+ record['kubernetes'].has_key?('pod_name') &&
352
+ record['kubernetes'].has_key?('container_name') &&
353
+ record['docker'].has_key?('container_id') &&
354
+ (k_metadata = get_metadata_for_record(record['kubernetes']['namespace_name'], record['kubernetes']['pod_name'],
355
+ record['kubernetes']['container_name'], record['docker']['container_id'],
356
+ create_time_from_record(record, time), batch_miss_cache))
357
+ metadata = k_metadata
358
+ end
359
+
360
+ record['customlog_vol'] = customlog_vol if defined?(customlog_vol)
361
+ record['customlog_file'] = customlog_file if defined?(customlog_file)
362
+ record = record.merge(metadata) if metadata
363
+ new_es.add(time, record)
364
+ end
365
+ dump_stats
366
+ new_es
367
+ end
368
+
369
+ def filter_customlog(tag)
370
+ tag_match_data = {}
371
+ customlog_match = tag.match(@tag_to_customlog_kubernetes_name_regexp_compiled)
372
+ if customlog_match
373
+ pod_uuid = customlog_match["pod_uuid"]
374
+ file_name = customlog_match["file_name"]
375
+ volume_name = customlog_match["volume_name"]
376
+
377
+ unless @pod_mapping[pod_uuid]
378
+ @pod_mapping = self.generate_map()
379
+ log.info "Info: #{pod_uuid} not found, regenerating map"
380
+ end
381
+
382
+ if @pod_mapping[pod_uuid]
383
+ string = @pod_mapping[pod_uuid][:namespace] + " " + @pod_mapping[pod_uuid][:pod_name] + " customlog " + @pod_mapping[pod_uuid][:docker_id]
384
+ tag_match_data = string.match(@customlog_string_to_kubernetes_compiled)
385
+ else
386
+ log.error "Error: #{pod_uuid} not found"
387
+ end
388
+ end
389
+ return volume_name, file_name, tag_match_data
390
+ end
391
+
392
+ def generate_map()
393
+ pod_mapping = {}
394
+ files = Dir["/var/log/containers/*.log"]
395
+
396
+ files.each do |item|
397
+ match = item.match(@container_filename_to_uuid_regexp_compiled)
398
+ if match
399
+ pod_name = match["pod_name"]
400
+ namespace = match["namespace"]
401
+ container_name = match["container_name"]
402
+ docker_id = match["docker_id"]
403
+
404
+ link_target = File.readlink(item)
405
+ pods_match = link_target.match(@pod_log_filename_to_uuid_regexp_compiled)
406
+ if pods_match
407
+ pod_uuid = pods_match["pod_uuid"]
408
+ end
409
+
410
+ log.info "Mapping uuid #{pod_uuid} to #{namespace}/#{pod_name}"
411
+ pod_mapping[pod_uuid] = { namespace: namespace, pod_name: pod_name, container_name: container_name, docker_id: docker_id }
412
+ end
413
+ end
414
+ pod_mapping
415
+ end
416
+
417
+ def get_metadata_for_journal_record(record, time, batch_miss_cache)
418
+ metadata = nil
419
+ if record.has_key?('CONTAINER_NAME') && record.has_key?('CONTAINER_ID_FULL')
420
+ metadata = record['CONTAINER_NAME'].match(@container_name_to_kubernetes_regexp_compiled) do |match_data|
421
+ get_metadata_for_record(match_data['namespace'], match_data['pod_name'], match_data['container_name'],
422
+ record['CONTAINER_ID_FULL'], create_time_from_record(record, time), batch_miss_cache)
423
+ end
424
+ unless metadata
425
+ log.debug "Error: could not match CONTAINER_NAME from record #{record}"
426
+ @stats.bump(:container_name_match_failed)
427
+ end
428
+ elsif record.has_key?('CONTAINER_NAME') && record['CONTAINER_NAME'].start_with?('k8s_')
429
+ log.debug "Error: no container name and id in record #{record}"
430
+ @stats.bump(:container_name_id_missing)
431
+ end
432
+ metadata
433
+ end
434
+
435
+ def de_dot!(h)
436
+ h.keys.each do |ref|
437
+ if h[ref] && ref =~ /\./
438
+ v = h.delete(ref)
439
+ newref = ref.to_s.gsub('.', @de_dot_separator)
440
+ h[newref] = v
441
+ end
442
+ end
443
+ end
444
+
445
+ end
446
+ end