fluent-plugin-kubernetes_metadata_filter_splunk 2.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (40) hide show
  1. checksums.yaml +7 -0
  2. data/.circleci/config.yml +56 -0
  3. data/.gitignore +20 -0
  4. data/Gemfile +7 -0
  5. data/LICENSE.txt +201 -0
  6. data/README.md +209 -0
  7. data/Rakefile +37 -0
  8. data/fluent-plugin-kubernetes_metadata_filter.gemspec +36 -0
  9. data/lib/fluent/plugin/filter_kubernetes_metadata.rb +446 -0
  10. data/lib/fluent/plugin/kubernetes_metadata_cache_strategy.rb +98 -0
  11. data/lib/fluent/plugin/kubernetes_metadata_common.rb +113 -0
  12. data/lib/fluent/plugin/kubernetes_metadata_stats.rb +46 -0
  13. data/lib/fluent/plugin/kubernetes_metadata_watch_namespaces.rb +65 -0
  14. data/lib/fluent/plugin/kubernetes_metadata_watch_pods.rb +68 -0
  15. data/test/cassettes/invalid_api_server_config.yml +53 -0
  16. data/test/cassettes/kubernetes_docker_metadata.yml +228 -0
  17. data/test/cassettes/kubernetes_docker_metadata_annotations.yml +239 -0
  18. data/test/cassettes/kubernetes_docker_metadata_dotted_labels.yml +231 -0
  19. data/test/cassettes/kubernetes_docker_metadata_using_bearer_token.yml +248 -0
  20. data/test/cassettes/kubernetes_get_api_v1.yml +193 -0
  21. data/test/cassettes/kubernetes_get_api_v1_using_token.yml +195 -0
  22. data/test/cassettes/kubernetes_get_namespace_default.yml +69 -0
  23. data/test/cassettes/kubernetes_get_namespace_default_using_token.yml +71 -0
  24. data/test/cassettes/kubernetes_get_pod.yml +146 -0
  25. data/test/cassettes/kubernetes_get_pod_using_token.yml +148 -0
  26. data/test/cassettes/metadata_from_tag_and_journald_fields.yml +408 -0
  27. data/test/cassettes/metadata_from_tag_journald_and_kubernetes_fields.yml +540 -0
  28. data/test/cassettes/metadata_with_namespace_id.yml +276 -0
  29. data/test/cassettes/non_kubernetes_docker_metadata.yml +97 -0
  30. data/test/cassettes/valid_kubernetes_api_server.yml +55 -0
  31. data/test/cassettes/valid_kubernetes_api_server_using_token.yml +57 -0
  32. data/test/helper.rb +64 -0
  33. data/test/plugin/test.token +1 -0
  34. data/test/plugin/test_cache_stats.rb +36 -0
  35. data/test/plugin/test_cache_strategy.rb +196 -0
  36. data/test/plugin/test_filter_kubernetes_metadata.rb +970 -0
  37. data/test/plugin/test_watch_namespaces.rb +91 -0
  38. data/test/plugin/test_watch_pods.rb +145 -0
  39. data/test/plugin/watch_test.rb +57 -0
  40. metadata +295 -0
@@ -0,0 +1,37 @@
1
+ require 'bundler/gem_tasks'
2
+ require 'rake/testtask'
3
+ require 'bump/tasks'
4
+
5
+ task :test => [:base_test]
6
+
7
+ task :default => [:test, :build]
8
+
9
+ desc 'Run test_unit based test'
10
+ Rake::TestTask.new(:base_test) do |t|
11
+ # To run test for only one file (or file path pattern)
12
+ # $ bundle exec rake base_test TEST=test/test_specified_path.rb
13
+ # $ bundle exec rake base_test TEST=test/test_*.rb
14
+ t.libs << 'test'
15
+ t.test_files = Dir['test/**/test_*.rb'].sort
16
+ #t.verbose = true
17
+ t.warning = false
18
+ end
19
+
20
+ desc 'Add copyright headers'
21
+ task :headers do
22
+ require 'rubygems'
23
+ require 'copyright_header'
24
+
25
+ args = {
26
+ :license => 'Apache-2.0',
27
+ :copyright_software => 'Fluentd Kubernetes Metadata Filter Plugin',
28
+ :copyright_software_description => 'Enrich Fluentd events with Kubernetes metadata',
29
+ :copyright_holders => ['Red Hat, Inc.'],
30
+ :copyright_years => ['2015-2017'],
31
+ :add_path => 'lib:test',
32
+ :output_dir => '.'
33
+ }
34
+
35
+ command_line = CopyrightHeader::CommandLine.new( args )
36
+ command_line.execute
37
+ end
@@ -0,0 +1,36 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+
5
+ Gem::Specification.new do |gem|
6
+ gem.name = "fluent-plugin-kubernetes_metadata_filter_splunk"
7
+ gem.version = "2.2.0"
8
+ gem.authors = ["Rock Baek"]
9
+ gem.email = ["rockb1017@gmail.com"]
10
+ gem.description = %q{Filter plugin to add Kubernetes metadata with custom caching algorithm by Cisco}
11
+ gem.summary = %q{Fluentd filter plugin to add Kubernetes metadata}
12
+ gem.homepage = ""
13
+ gem.license = "Apache-2.0"
14
+
15
+ gem.files = `git ls-files`.split($/)
16
+ gem.executables = gem.files.grep(%r{^bin/}) { |f| File.basename(f) }
17
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
18
+ gem.require_paths = ["lib"]
19
+
20
+ gem.required_ruby_version = '>= 2.1.0'
21
+
22
+ gem.add_runtime_dependency 'fluentd', ['>= 0.14.0', '< 2']
23
+ gem.add_runtime_dependency "lru_redux"
24
+ gem.add_runtime_dependency "kubeclient", "~> 1.1.4"
25
+
26
+ gem.add_development_dependency "bundler", "~> 1.3"
27
+ gem.add_development_dependency "rake"
28
+ gem.add_development_dependency "minitest", "~> 4.0"
29
+ gem.add_development_dependency "test-unit", "~> 3.0.2"
30
+ gem.add_development_dependency "test-unit-rr", "~> 1.0.3"
31
+ gem.add_development_dependency "copyright-header"
32
+ gem.add_development_dependency "webmock"
33
+ gem.add_development_dependency "vcr"
34
+ gem.add_development_dependency "bump"
35
+ gem.add_development_dependency "yajl-ruby"
36
+ end
@@ -0,0 +1,446 @@
1
+ #
2
+ # Fluentd Kubernetes Metadata Filter Plugin - Enrich Fluentd events with
3
+ # Kubernetes metadata
4
+ #
5
+ # Copyright 2017 Red Hat, Inc.
6
+ #
7
+ # Licensed under the Apache License, Version 2.0 (the "License");
8
+ # you may not use this file except in compliance with the License.
9
+ # You may obtain a copy of the License at
10
+ #
11
+ # http://www.apache.org/licenses/LICENSE-2.0
12
+ #
13
+ # Unless required by applicable law or agreed to in writing, software
14
+ # distributed under the License is distributed on an "AS IS" BASIS,
15
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
+ # See the License for the specific language governing permissions and
17
+ # limitations under the License.
18
+ #
19
+
20
+ require_relative 'kubernetes_metadata_cache_strategy'
21
+ require_relative 'kubernetes_metadata_common'
22
+ require_relative 'kubernetes_metadata_stats'
23
+ require_relative 'kubernetes_metadata_watch_namespaces'
24
+ require_relative 'kubernetes_metadata_watch_pods'
25
+
26
+ require 'fluent/plugin/filter'
27
+
28
+ module Fluent::Plugin
29
+ class KubernetesMetadataFilter < Fluent::Plugin::Filter
30
+ @pod_mapping
31
+ K8_POD_CA_CERT = 'ca.crt'
32
+ K8_POD_TOKEN = 'token'
33
+
34
+ include KubernetesMetadata::CacheStrategy
35
+ include KubernetesMetadata::Common
36
+ include KubernetesMetadata::WatchNamespaces
37
+ include KubernetesMetadata::WatchPods
38
+
39
+ Fluent::Plugin.register_filter('kubernetes_metadata', self)
40
+
41
+ config_param :kubernetes_url, :string, default: nil
42
+ config_param :cache_size, :integer, default: 1000
43
+ config_param :cache_ttl, :integer, default: 60 * 60
44
+ config_param :watch, :bool, default: true
45
+ config_param :apiVersion, :string, default: 'v1'
46
+ config_param :client_cert, :string, default: nil
47
+ config_param :client_key, :string, default: nil
48
+ config_param :ca_file, :string, default: nil
49
+ config_param :verify_ssl, :bool, default: true
50
+ config_param :tag_to_kubernetes_name_regexp,
51
+ :string,
52
+ :default => 'var\.log\.containers\.(?<pod_name>[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*)_(?<namespace>[^_]+)_(?<container_name>.+)-(?<docker_id>[a-z0-9]{64})\.log$'
53
+ config_param :bearer_token_file, :string, default: nil
54
+ config_param :secret_dir, :string, default: '/var/run/secrets/kubernetes.io/serviceaccount'
55
+ config_param :de_dot, :bool, default: true
56
+ config_param :de_dot_separator, :string, default: '_'
57
+ # if reading from the journal, the record will contain the following fields in the following
58
+ # format:
59
+ # CONTAINER_NAME=k8s_$containername.$containerhash_$podname_$namespacename_$poduuid_$rand32bitashex
60
+ # CONTAINER_FULL_ID=dockeridassha256hexvalue
61
+ config_param :use_journal, :bool, default: nil
62
+ # Field 2 is the container_hash, field 5 is the pod_id, and field 6 is the pod_randhex
63
+ # I would have included them as named groups, but you can't have named groups that are
64
+ # non-capturing :P
65
+ # parse format is defined here: https://github.com/kubernetes/kubernetes/blob/master/pkg/kubelet/dockertools/docker.go#L317
66
+ config_param :container_name_to_kubernetes_regexp,
67
+ :string,
68
+ :default => '^(?<name_prefix>[^_]+)_(?<container_name>[^\._]+)(\.(?<container_hash>[^_]+))?_(?<pod_name>[^_]+)_(?<namespace>[^_]+)_[^_]+_[^_]+$'
69
+
70
+ config_param :annotation_match, :array, default: []
71
+ config_param :stats_interval, :integer, default: 30
72
+ config_param :allow_orphans, :bool, default: true
73
+ config_param :orphaned_namespace_name, :string, default: '.orphaned'
74
+ config_param :orphaned_namespace_id, :string, default: 'orphaned'
75
+ config_param :lookup_from_k8s_field, :bool, default: true
76
+ # if `ca_file` is for an intermediate CA, or otherwise we do not have the root CA and want
77
+ # to trust the intermediate CA certs we do have, set this to `true` - this corresponds to
78
+ # the openssl s_client -partial_chain flag and X509_V_FLAG_PARTIAL_CHAIN
79
+ config_param :ssl_partial_chain, :bool, default: false
80
+ config_param :skip_labels, :bool, default: false
81
+ config_param :skip_container_metadata, :bool, default: false
82
+ config_param :skip_master_url, :bool, default: false
83
+ config_param :skip_namespace_metadata, :bool, default: false
84
+
85
+ def fetch_pod_metadata(namespace_name, pod_name)
86
+ log.trace("fetching pod metadata: #{namespace_name}/#{pod_name}") if log.trace?
87
+ begin
88
+ metadata = @client.get_pod(pod_name, namespace_name)
89
+ unless metadata
90
+ log.trace("no metadata returned for: #{namespace_name}/#{pod_name}") if log.trace?
91
+ @stats.bump(:pod_cache_api_nil_not_found)
92
+ else
93
+ begin
94
+ log.trace("raw metadata for #{namespace_name}/#{pod_name}: #{metadata}") if log.trace?
95
+ metadata = parse_pod_metadata(metadata)
96
+ @stats.bump(:pod_cache_api_updates)
97
+ log.trace("parsed metadata for #{namespace_name}/#{pod_name}: #{metadata}") if log.trace?
98
+ @cache[metadata['pod_id']] = metadata
99
+ return metadata
100
+ rescue Exception=>e
101
+ log.debug(e)
102
+ @stats.bump(:pod_cache_api_nil_bad_resp_payload)
103
+ log.trace("returning empty metadata for #{namespace_name}/#{pod_name} due to error '#{e}'") if log.trace?
104
+ end
105
+ end
106
+ rescue Exception=>e
107
+ @stats.bump(:pod_cache_api_nil_error)
108
+ log.debug "Exception '#{e}' encountered fetching pod metadata from Kubernetes API #{@apiVersion} endpoint #{@kubernetes_url}"
109
+ end
110
+ {}
111
+ end
112
+
113
+ def dump_stats
114
+ @curr_time = Time.now
115
+ return if @curr_time.to_i - @prev_time.to_i < @stats_interval
116
+ @prev_time = @curr_time
117
+ @stats.set(:pod_cache_size, @cache.count)
118
+ @stats.set(:namespace_cache_size, @namespace_cache.count) if @namespace_cache
119
+ log.info(@stats)
120
+ if log.level == Fluent::Log::LEVEL_TRACE
121
+ log.trace(" id cache: #{@id_cache.to_a}")
122
+ log.trace(" pod cache: #{@cache.to_a}")
123
+ log.trace("namespace cache: #{@namespace_cache.to_a}")
124
+ end
125
+ end
126
+
127
+ def fetch_namespace_metadata(namespace_name)
128
+ log.trace("fetching namespace metadata: #{namespace_name}") if log.trace?
129
+ begin
130
+ metadata = @client.get_namespace(namespace_name)
131
+ unless metadata
132
+ log.trace("no metadata returned for: #{namespace_name}") if log.trace?
133
+ @stats.bump(:namespace_cache_api_nil_not_found)
134
+ else
135
+ begin
136
+ log.trace("raw metadata for #{namespace_name}: #{metadata}") if log.trace?
137
+ metadata = parse_namespace_metadata(metadata)
138
+ @stats.bump(:namespace_cache_api_updates)
139
+ log.trace("parsed metadata for #{namespace_name}: #{metadata}") if log.trace?
140
+ @namespace_cache[metadata['namespace_id']] = metadata
141
+ return metadata
142
+ rescue Exception => e
143
+ log.debug(e)
144
+ @stats.bump(:namespace_cache_api_nil_bad_resp_payload)
145
+ log.trace("returning empty metadata for #{namespace_name} due to error '#{e}'") if log.trace?
146
+ end
147
+ end
148
+ rescue Exception => kube_error
149
+ @stats.bump(:namespace_cache_api_nil_error)
150
+ log.debug "Exception '#{kube_error}' encountered fetching namespace metadata from Kubernetes API #{@apiVersion} endpoint #{@kubernetes_url}"
151
+ end
152
+ {}
153
+ end
154
+
155
+ def initialize
156
+ super
157
+ @prev_time = Time.now
158
+ end
159
+
160
+ def configure(conf)
161
+ super
162
+
163
+ def log.trace?
164
+ level == Fluent::Log::LEVEL_TRACE
165
+ end
166
+
167
+ require 'kubeclient'
168
+ require 'active_support/core_ext/object/blank'
169
+ require 'lru_redux'
170
+ @stats = KubernetesMetadata::Stats.new
171
+
172
+ if @de_dot && (@de_dot_separator =~ /\./).present?
173
+ raise Fluent::ConfigError, "Invalid de_dot_separator: cannot be or contain '.'"
174
+ end
175
+
176
+ if @cache_ttl < 0
177
+ log.info "Setting the cache TTL to :none because it was <= 0"
178
+ @cache_ttl = :none
179
+ end
180
+
181
+ # Caches pod/namespace UID tuples for a given container UID.
182
+ @id_cache = LruRedux::TTL::ThreadSafeCache.new(@cache_size, @cache_ttl)
183
+
184
+ # Use the container UID as the key to fetch a hash containing pod metadata
185
+ @cache = LruRedux::TTL::ThreadSafeCache.new(@cache_size, @cache_ttl)
186
+
187
+ # Use the namespace UID as the key to fetch a hash containing namespace metadata
188
+ @namespace_cache = LruRedux::TTL::ThreadSafeCache.new(@cache_size, @cache_ttl)
189
+
190
+ @tag_to_kubernetes_name_regexp_compiled = Regexp.compile(@tag_to_kubernetes_name_regexp)
191
+ @tag_to_customlog_kubernetes_name_regexp_compiled = Regexp.compile('^tail\.(?<tag_prefix>[^\.]+)\.var\.lib\.origin\.openshift\.local\.volumes\.pods\.(?<pod_uuid>[-a-z0-9]+)\.volumes\.kubernetes.io~empty-dir\.(?<volume_name>[^\.]+)\.(?<file_name>.*)\.log$')
192
+ @customlog_string_to_kubernetes_compiled = Regexp.compile('^(?<namespace>[^[:space:]]+)[[:space:]]+(?<pod_name>[^[:space:]]+)[[:space:]]+(?<container_name>[^[:space:]]+)[[:space:]]+(?<docker_id>[^[:space:]]+)$')
193
+ @pod_log_filename_to_uuid_regexp_compiled = Regexp.compile('^\/var\/log\/pods\/(?<pod_uuid>[-a-z0-9]*)\/.*\/.*\.log$')
194
+ @container_filename_to_uuid_regexp_compiled = Regexp.compile('^\/var\/log\/containers\/(?<pod_name>[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*)_(?<namespace>[^_]+)_(?<container_name>.+)-(?<docker_id>[a-z0-9]{64})\.log$')
195
+ @container_name_to_kubernetes_regexp_compiled = Regexp.compile(@container_name_to_kubernetes_regexp)
196
+
197
+ # Use Kubernetes default service account if we're in a pod.
198
+ if @kubernetes_url.nil?
199
+ log.debug "Kubernetes URL is not set - inspecting environ"
200
+
201
+ env_host = ENV['KUBERNETES_SERVICE_HOST']
202
+ env_port = ENV['KUBERNETES_SERVICE_PORT']
203
+ if env_host.present? && env_port.present?
204
+ @kubernetes_url = "https://#{env_host}:#{env_port}/api"
205
+ log.debug "Kubernetes URL is now '#{@kubernetes_url}'"
206
+ end
207
+ end
208
+
209
+ # Use SSL certificate and bearer token from Kubernetes service account.
210
+ if Dir.exist?(@secret_dir)
211
+ log.debug "Found directory with secrets: #{@secret_dir}"
212
+ ca_cert = File.join(@secret_dir, K8_POD_CA_CERT)
213
+ pod_token = File.join(@secret_dir, K8_POD_TOKEN)
214
+
215
+ if !@ca_file.present? and File.exist?(ca_cert)
216
+ log.debug "Found CA certificate: #{ca_cert}"
217
+ @ca_file = ca_cert
218
+ end
219
+
220
+ if !@bearer_token_file.present? and File.exist?(pod_token)
221
+ log.debug "Found pod token: #{pod_token}"
222
+ @bearer_token_file = pod_token
223
+ end
224
+ end
225
+
226
+ if @kubernetes_url.present?
227
+
228
+ ssl_options = {
229
+ client_cert: @client_cert.present? ? OpenSSL::X509::Certificate.new(File.read(@client_cert)) : nil,
230
+ client_key: @client_key.present? ? OpenSSL::PKey::RSA.new(File.read(@client_key)) : nil,
231
+ ca_file: @ca_file,
232
+ verify_ssl: @verify_ssl ? OpenSSL::SSL::VERIFY_PEER : OpenSSL::SSL::VERIFY_NONE
233
+ }
234
+
235
+ if @ssl_partial_chain
236
+ # taken from the ssl.rb OpenSSL::SSL::SSLContext code for DEFAULT_CERT_STORE
237
+ require 'openssl'
238
+ ssl_store = OpenSSL::X509::Store.new
239
+ ssl_store.set_default_paths
240
+ if defined? OpenSSL::X509::V_FLAG_PARTIAL_CHAIN
241
+ flagval = OpenSSL::X509::V_FLAG_PARTIAL_CHAIN
242
+ else
243
+ # this version of ruby does not define OpenSSL::X509::V_FLAG_PARTIAL_CHAIN
244
+ flagval = 0x80000
245
+ end
246
+ ssl_store.flags = OpenSSL::X509::V_FLAG_CRL_CHECK_ALL | flagval
247
+ ssl_options[:cert_store] = ssl_store
248
+ end
249
+
250
+ auth_options = {}
251
+
252
+ if @bearer_token_file.present?
253
+ bearer_token = File.read(@bearer_token_file)
254
+ auth_options[:bearer_token] = bearer_token
255
+ end
256
+
257
+ log.debug "Creating K8S client"
258
+ @client = Kubeclient::Client.new @kubernetes_url, @apiVersion,
259
+ ssl_options: ssl_options,
260
+ auth_options: auth_options
261
+
262
+ begin
263
+ @client.api_valid?
264
+ rescue KubeException => kube_error
265
+ raise Fluent::ConfigError, "Invalid Kubernetes API #{@apiVersion} endpoint #{@kubernetes_url}: #{kube_error.message}"
266
+ end
267
+
268
+ if @watch
269
+ thread = Thread.new(self) { |this| this.start_pod_watch }
270
+ thread.abort_on_exception = true
271
+ namespace_thread = Thread.new(self) { |this| this.start_namespace_watch }
272
+ namespace_thread.abort_on_exception = true
273
+ end
274
+ end
275
+ @time_fields = []
276
+ @time_fields.push('_SOURCE_REALTIME_TIMESTAMP', '__REALTIME_TIMESTAMP') if @use_journal || @use_journal.nil?
277
+ @time_fields.push('time') unless @use_journal
278
+ @time_fields.push('@timestamp') if @lookup_from_k8s_field
279
+
280
+ @annotations_regexps = []
281
+ @annotation_match.each do |regexp|
282
+ begin
283
+ @annotations_regexps << Regexp.compile(regexp)
284
+ rescue RegexpError => e
285
+ log.error "Error: invalid regular expression in annotation_match: #{e}"
286
+ end
287
+ end
288
+
289
+ @pod_mapping = self.generate_map()
290
+ end
291
+
292
+ def get_metadata_for_record(namespace_name, pod_name, container_name, container_id, create_time, batch_miss_cache)
293
+ metadata = {
294
+ 'docker' => {'container_id' => container_id},
295
+ 'kubernetes' => {
296
+ 'container_name' => container_name,
297
+ 'namespace_name' => namespace_name,
298
+ 'pod_name' => pod_name
299
+ }
300
+ }
301
+ if @kubernetes_url.present?
302
+ pod_metadata = get_pod_metadata(container_id, namespace_name, pod_name, create_time, batch_miss_cache)
303
+
304
+ if (pod_metadata.include? 'containers') && (pod_metadata['containers'].include? container_id) && !@skip_container_metadata
305
+ metadata['kubernetes']['container_image'] = pod_metadata['containers'][container_id]['image']
306
+ metadata['kubernetes']['container_image_id'] = pod_metadata['containers'][container_id]['image_id']
307
+ end
308
+
309
+ metadata['kubernetes'].merge!(pod_metadata) if pod_metadata
310
+ metadata['kubernetes'].delete('containers')
311
+ end
312
+ metadata
313
+ end
314
+
315
+ def create_time_from_record(record, internal_time)
316
+ time_key = @time_fields.detect{ |ii| record.has_key?(ii) }
317
+ time = record[time_key]
318
+ if time.nil? || time.chop.empty?
319
+ # `internal_time` is a Fluent::EventTime, it can't compare with Time.
320
+ return Time.at(internal_time.to_f)
321
+ end
322
+ if ['_SOURCE_REALTIME_TIMESTAMP', '__REALTIME_TIMESTAMP'].include?(time_key)
323
+ timei= time.to_i
324
+ return Time.at(timei / 1000000, timei % 1000000)
325
+ end
326
+ return Time.parse(time)
327
+ end
328
+
329
+ def filter_stream(tag, es)
330
+ return es if (es.respond_to?(:empty?) && es.empty?) || !es.is_a?(Fluent::EventStream)
331
+ new_es = Fluent::MultiEventStream.new
332
+ tag_match_data = tag.match(@tag_to_kubernetes_name_regexp_compiled) unless @use_journal
333
+ tag_metadata = nil
334
+ batch_miss_cache = {}
335
+ unless tag_match_data
336
+ customlog_vol, customlog_file, tag_match_data = filter_customlog(tag)
337
+ end
338
+ es.each do |time, record|
339
+ if tag_match_data && tag_metadata.nil?
340
+ tag_metadata = get_metadata_for_record(tag_match_data['namespace'], tag_match_data['pod_name'], tag_match_data['container_name'],
341
+ tag_match_data['docker_id'], create_time_from_record(record, time), batch_miss_cache)
342
+ end
343
+ metadata = Marshal.load(Marshal.dump(tag_metadata)) if tag_metadata
344
+ if (@use_journal || @use_journal.nil?) &&
345
+ (j_metadata = get_metadata_for_journal_record(record, time, batch_miss_cache))
346
+ metadata = j_metadata
347
+ end
348
+ if @lookup_from_k8s_field && record.has_key?('kubernetes') && record.has_key?('docker') &&
349
+ record['kubernetes'].respond_to?(:has_key?) && record['docker'].respond_to?(:has_key?) &&
350
+ record['kubernetes'].has_key?('namespace_name') &&
351
+ record['kubernetes'].has_key?('pod_name') &&
352
+ record['kubernetes'].has_key?('container_name') &&
353
+ record['docker'].has_key?('container_id') &&
354
+ (k_metadata = get_metadata_for_record(record['kubernetes']['namespace_name'], record['kubernetes']['pod_name'],
355
+ record['kubernetes']['container_name'], record['docker']['container_id'],
356
+ create_time_from_record(record, time), batch_miss_cache))
357
+ metadata = k_metadata
358
+ end
359
+
360
+ record['customlog_vol'] = customlog_vol if defined?(customlog_vol)
361
+ record['customlog_file'] = customlog_file if defined?(customlog_file)
362
+ record = record.merge(metadata) if metadata
363
+ new_es.add(time, record)
364
+ end
365
+ dump_stats
366
+ new_es
367
+ end
368
+
369
+ def filter_customlog(tag)
370
+ tag_match_data = {}
371
+ customlog_match = tag.match(@tag_to_customlog_kubernetes_name_regexp_compiled)
372
+ if customlog_match
373
+ pod_uuid = customlog_match["pod_uuid"]
374
+ file_name = customlog_match["file_name"]
375
+ volume_name = customlog_match["volume_name"]
376
+
377
+ unless @pod_mapping[pod_uuid]
378
+ @pod_mapping = self.generate_map()
379
+ log.info "Info: #{pod_uuid} not found, regenerating map"
380
+ end
381
+
382
+ if @pod_mapping[pod_uuid]
383
+ string = @pod_mapping[pod_uuid][:namespace] + " " + @pod_mapping[pod_uuid][:pod_name] + " customlog " + @pod_mapping[pod_uuid][:docker_id]
384
+ tag_match_data = string.match(@customlog_string_to_kubernetes_compiled)
385
+ else
386
+ log.error "Error: #{pod_uuid} not found"
387
+ end
388
+ end
389
+ return volume_name, file_name, tag_match_data
390
+ end
391
+
392
+ def generate_map()
393
+ pod_mapping = {}
394
+ files = Dir["/var/log/containers/*.log"]
395
+
396
+ files.each do |item|
397
+ match = item.match(@container_filename_to_uuid_regexp_compiled)
398
+ if match
399
+ pod_name = match["pod_name"]
400
+ namespace = match["namespace"]
401
+ container_name = match["container_name"]
402
+ docker_id = match["docker_id"]
403
+
404
+ link_target = File.readlink(item)
405
+ pods_match = link_target.match(@pod_log_filename_to_uuid_regexp_compiled)
406
+ if pods_match
407
+ pod_uuid = pods_match["pod_uuid"]
408
+ end
409
+
410
+ log.info "Mapping uuid #{pod_uuid} to #{namespace}/#{pod_name}"
411
+ pod_mapping[pod_uuid] = { namespace: namespace, pod_name: pod_name, container_name: container_name, docker_id: docker_id }
412
+ end
413
+ end
414
+ pod_mapping
415
+ end
416
+
417
+ def get_metadata_for_journal_record(record, time, batch_miss_cache)
418
+ metadata = nil
419
+ if record.has_key?('CONTAINER_NAME') && record.has_key?('CONTAINER_ID_FULL')
420
+ metadata = record['CONTAINER_NAME'].match(@container_name_to_kubernetes_regexp_compiled) do |match_data|
421
+ get_metadata_for_record(match_data['namespace'], match_data['pod_name'], match_data['container_name'],
422
+ record['CONTAINER_ID_FULL'], create_time_from_record(record, time), batch_miss_cache)
423
+ end
424
+ unless metadata
425
+ log.debug "Error: could not match CONTAINER_NAME from record #{record}"
426
+ @stats.bump(:container_name_match_failed)
427
+ end
428
+ elsif record.has_key?('CONTAINER_NAME') && record['CONTAINER_NAME'].start_with?('k8s_')
429
+ log.debug "Error: no container name and id in record #{record}"
430
+ @stats.bump(:container_name_id_missing)
431
+ end
432
+ metadata
433
+ end
434
+
435
+ def de_dot!(h)
436
+ h.keys.each do |ref|
437
+ if h[ref] && ref =~ /\./
438
+ v = h.delete(ref)
439
+ newref = ref.to_s.gsub('.', @de_dot_separator)
440
+ h[newref] = v
441
+ end
442
+ end
443
+ end
444
+
445
+ end
446
+ end