fluent-plugin-kubernetes_metadata_filter 2.13.0 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +0 -1
  3. data/Gemfile.lock +1 -1
  4. data/README.md +3 -52
  5. data/doc/benchmark/5m-1-2500lps-256b-baseline-01/cpu.png +0 -0
  6. data/doc/benchmark/5m-1-2500lps-256b-baseline-01/latency.png +0 -0
  7. data/doc/benchmark/5m-1-2500lps-256b-baseline-01/loss.png +0 -0
  8. data/doc/benchmark/5m-1-2500lps-256b-baseline-01/mem.png +0 -0
  9. data/doc/benchmark/5m-1-2500lps-256b-baseline-01/readme.md +88 -0
  10. data/doc/benchmark/5m-1-2500lps-256b-baseline-01/results.html +127 -0
  11. data/doc/benchmark/5m-1-2500lps-256b-kube-01-01/cpu.png +0 -0
  12. data/doc/benchmark/5m-1-2500lps-256b-kube-01-01/latency.png +0 -0
  13. data/doc/benchmark/5m-1-2500lps-256b-kube-01-01/loss.png +0 -0
  14. data/doc/benchmark/5m-1-2500lps-256b-kube-01-01/mem.png +0 -0
  15. data/doc/benchmark/5m-1-2500lps-256b-kube-01-01/readme.md +97 -0
  16. data/doc/benchmark/5m-1-2500lps-256b-kube-01-01/results.html +136 -0
  17. data/doc/benchmark/5m-1-2500lps-256b-kube-01-marshal-02/cpu.png +0 -0
  18. data/doc/benchmark/5m-1-2500lps-256b-kube-01-marshal-02/latency.png +0 -0
  19. data/doc/benchmark/5m-1-2500lps-256b-kube-01-marshal-02/loss.png +0 -0
  20. data/doc/benchmark/5m-1-2500lps-256b-kube-01-marshal-02/mem.png +0 -0
  21. data/doc/benchmark/5m-1-2500lps-256b-kube-01-marshal-02/readme.md +97 -0
  22. data/doc/benchmark/5m-1-2500lps-256b-kube-01-marshal-02/results.html +136 -0
  23. data/fluent-plugin-kubernetes_metadata_filter.gemspec +1 -1
  24. data/lib/fluent/plugin/filter_kubernetes_metadata.rb +9 -78
  25. data/lib/fluent/plugin/kubernetes_metadata_cache_strategy.rb +2 -2
  26. data/lib/fluent/plugin/kubernetes_metadata_common.rb +1 -16
  27. data/lib/fluent/plugin/kubernetes_metadata_stats.rb +16 -0
  28. data/lib/fluent/plugin/kubernetes_metadata_util.rb +0 -20
  29. data/test/plugin/test_filter_kubernetes_metadata.rb +23 -411
  30. metadata +20 -3
  31. data/test/plugin/test_utils.rb +0 -56
@@ -0,0 +1,97 @@
1
+
2
+ # collector Functionl Benchmark Results
3
+ ## Options
4
+ * Image: quay.io/jcantril/fluentd:marshal
5
+ * Total Log Stressors: 1
6
+ * Lines Per Second: 2500
7
+ * Run Duration: 5m
8
+ * Payload Source: synthetic
9
+
10
+ ## Latency of logs collected based on the time the log was generated and ingested
11
+
12
+ Total Msg| Size | Elapsed (s) | Mean (s)| Min(s) | Max (s)| Median (s)
13
+ ---------|------|-------------|---------|--------|--------|---
14
+ 722919|256|5m0s|14.322|1.077|32.992|13.513
15
+
16
+ ![](cpu.png)
17
+
18
+ ![](mem.png)
19
+
20
+ ![](latency.png)
21
+
22
+ ![](loss.png)
23
+
24
+ ## Percent logs lost between first and last collected sequence ids
25
+ Stream | Min Seq | Max Seq | Purged | Collected | Percent Collected |
26
+ -------| ---------| --------| -------|-----------|--------------|
27
+ | loader-0|0|722918|0|722919|100.0%
28
+
29
+
30
+ ## Config
31
+
32
+ ```
33
+ <system>
34
+ log_level debug
35
+ </system>
36
+
37
+ <source>
38
+ @type tail
39
+ @id container-input
40
+ path /var/log/pods/testhack-ksowqntt_*/loader-*/*
41
+ exclude_path ["/var/log/pods/**/*/*.gz","/var/log/pods/**/*/*.tmp"]
42
+ pos_file "/var/lib/fluentd/pos/containers-app"
43
+ refresh_interval 5
44
+ rotate_wait 5
45
+ tag kubernetes.*
46
+ read_from_head "true"
47
+ <parse>
48
+ @type regexp
49
+ expression /^(?<@timestamp>[^\s]+) (?<stream>stdout|stderr) (?<logtag>[F|P]) (?<message>.*)$/
50
+ time_format '%Y-%m-%dT%H:%M:%S.%N%:z'
51
+ keep_time_key true
52
+ </parse>
53
+ </source>
54
+
55
+ <filter kubernetes.**>
56
+ @type concat
57
+ key message
58
+ partial_key logtag
59
+ partial_value P
60
+ separator ''
61
+ </filter>
62
+
63
+ <filter kubernetes.**>
64
+ @id kubernetes-metadata
65
+ @type kubernetes_metadata
66
+ kubernetes_url 'https://kubernetes.default.svc'
67
+ annotation_match ["^containerType\.logging\.openshift\.io\/.*$"]
68
+ allow_orphans false
69
+ cache_size '1000'
70
+ use_journal 'nil'
71
+ ssl_partial_chain 'true'
72
+ </filter>
73
+
74
+ <match **>
75
+ @type forward
76
+ heartbeat_type none
77
+ keepalive true
78
+
79
+ <buffer>
80
+ flush_mode interval
81
+ flush_interval 5s
82
+ flush_at_shutdown true
83
+ flush_thread_count 2
84
+ retry_type exponential_backoff
85
+ retry_wait 1s
86
+ retry_max_interval 60s
87
+ retry_forever true
88
+ overflow_action block
89
+ </buffer>
90
+
91
+ <server>
92
+ host 0.0.0.0
93
+ port 24224
94
+ </server>
95
+ </match>
96
+ ```
97
+
@@ -0,0 +1,136 @@
1
+
2
+ <html>
3
+ <div>
4
+ <div><b>Options</b><div>
5
+ <div>Image: quay.io/jcantril/fluentd:marshal</div>
6
+ <div>Total Log Stressors: 1</div>
7
+ <div>Lines Per Second: 2500</div>
8
+ <div>Run Duration: 5m</div>
9
+ <div>Payload Source: synthetic</div>
10
+ </div>
11
+ <div>
12
+ Latency of logs collected based on the time the log was generated and ingested
13
+ </div>
14
+ <table border="1">
15
+ <tr>
16
+ <th>Total</th>
17
+ <th>Size</th>
18
+ <th>Elapsed</th>
19
+ <th>Mean</th>
20
+ <th>Min</th>
21
+ <th>Max</th>
22
+ <th>Median</th>
23
+ </tr>
24
+ <tr>
25
+ <th>Msg</th>
26
+ <th></th>
27
+ <th>(s)</th>
28
+ <th>(s)</th>
29
+ <th>(s)</th>
30
+ <th>(s)</th>
31
+ <th>(s)</th>
32
+ </tr>
33
+ <tr>
34
+ <td>722919</td>
35
+ <td>256</td>
36
+ <td>5m0s</td>
37
+ <td>14.322</td>
38
+ <td>1.077</td>
39
+ <td>32.992</td>
40
+ <td>13.513</td>
41
+ </tr>
42
+ </table>
43
+ <div>
44
+ <img src="cpu.png">
45
+ </div>
46
+ <div>
47
+ <img src="mem.png">
48
+ </div>
49
+ <div>
50
+ <img src="latency.png">
51
+ </div>
52
+ <div>
53
+ <img src="loss.png">
54
+ </div>
55
+ <div>
56
+ <table border="1">
57
+ <tr>
58
+ <th>Stream</th>
59
+ <th>Min Seq</th>
60
+ <th>Max Seq</th>
61
+ <th>Purged</th>
62
+ <th>Collected</th>
63
+ <th>Percent Collected</th>
64
+ </tr>
65
+ <tr>
66
+ <tr><td>loader-0</td><td>0</td><td>722918</td><td>0</td><td>722919</td><td>100.0%</td><tr>
67
+ </table>
68
+ </div>
69
+ <div>
70
+ <code style="display:block;white-space:pre-wrap">
71
+ &lt;system&gt;
72
+ log_level debug
73
+ &lt;/system&gt;
74
+
75
+ &lt;source&gt;
76
+ @type tail
77
+ @id container-input
78
+ path /var/log/pods/testhack-ksowqntt_*/loader-*/*
79
+ exclude_path [&#34;/var/log/pods/**/*/*.gz&#34;,&#34;/var/log/pods/**/*/*.tmp&#34;]
80
+ pos_file &#34;/var/lib/fluentd/pos/containers-app&#34;
81
+ refresh_interval 5
82
+ rotate_wait 5
83
+ tag kubernetes.*
84
+ read_from_head &#34;true&#34;
85
+ &lt;parse&gt;
86
+ @type regexp
87
+ expression /^(?&lt;@timestamp&gt;[^\s]+) (?&lt;stream&gt;stdout|stderr) (?&lt;logtag&gt;[F|P]) (?&lt;message&gt;.*)$/
88
+ time_format &#39;%Y-%m-%dT%H:%M:%S.%N%:z&#39;
89
+ keep_time_key true
90
+ &lt;/parse&gt;
91
+ &lt;/source&gt;
92
+
93
+ &lt;filter kubernetes.**&gt;
94
+ @type concat
95
+ key message
96
+ partial_key logtag
97
+ partial_value P
98
+ separator &#39;&#39;
99
+ &lt;/filter&gt;
100
+
101
+ &lt;filter kubernetes.**&gt;
102
+ @id kubernetes-metadata
103
+ @type kubernetes_metadata
104
+ kubernetes_url &#39;https://kubernetes.default.svc&#39;
105
+ annotation_match [&#34;^containerType\.logging\.openshift\.io\/.*$&#34;]
106
+ allow_orphans false
107
+ cache_size &#39;1000&#39;
108
+ use_journal &#39;nil&#39;
109
+ ssl_partial_chain &#39;true&#39;
110
+ &lt;/filter&gt;
111
+
112
+ &lt;match **&gt;
113
+ @type forward
114
+ heartbeat_type none
115
+ keepalive true
116
+
117
+ &lt;buffer&gt;
118
+ flush_mode interval
119
+ flush_interval 5s
120
+ flush_at_shutdown true
121
+ flush_thread_count 2
122
+ retry_type exponential_backoff
123
+ retry_wait 1s
124
+ retry_max_interval 60s
125
+ retry_forever true
126
+ overflow_action block
127
+ &lt;/buffer&gt;
128
+
129
+ &lt;server&gt;
130
+ host 0.0.0.0
131
+ port 24224
132
+ &lt;/server&gt;
133
+ &lt;/match&gt;
134
+ </code>
135
+ </div>
136
+ </html>
@@ -5,7 +5,7 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
5
 
6
6
  Gem::Specification.new do |gem|
7
7
  gem.name = 'fluent-plugin-kubernetes_metadata_filter'
8
- gem.version = '2.13.0'
8
+ gem.version = '3.0.0'
9
9
  gem.authors = ['OpenShift Cluster Logging','Jimmi Dyson']
10
10
  gem.email = ['team-logging@redhat.com','jimmidyson@gmail.com']
11
11
  gem.description = 'Filter plugin to add Kubernetes metadata'
@@ -22,7 +22,6 @@
22
22
  require_relative 'kubernetes_metadata_cache_strategy'
23
23
  require_relative 'kubernetes_metadata_common'
24
24
  require_relative 'kubernetes_metadata_stats'
25
- require_relative 'kubernetes_metadata_util'
26
25
  require_relative 'kubernetes_metadata_watch_namespaces'
27
26
  require_relative 'kubernetes_metadata_watch_pods'
28
27
 
@@ -36,7 +35,6 @@ module Fluent::Plugin
36
35
 
37
36
  include KubernetesMetadata::CacheStrategy
38
37
  include KubernetesMetadata::Common
39
- include KubernetesMetadata::Util
40
38
  include KubernetesMetadata::WatchNamespaces
41
39
  include KubernetesMetadata::WatchPods
42
40
 
@@ -64,22 +62,6 @@ module Fluent::Plugin
64
62
 
65
63
  config_param :bearer_token_file, :string, default: nil
66
64
  config_param :secret_dir, :string, default: '/var/run/secrets/kubernetes.io/serviceaccount'
67
- config_param :de_dot, :bool, default: true
68
- config_param :de_dot_separator, :string, default: '_'
69
- config_param :de_slash, :bool, default: false
70
- config_param :de_slash_separator, :string, default: '__'
71
- # if reading from the journal, the record will contain the following fields in the following
72
- # format:
73
- # CONTAINER_NAME=k8s_$containername.$containerhash_$podname_$namespacename_$poduuid_$rand32bitashex
74
- # CONTAINER_FULL_ID=dockeridassha256hexvalue
75
- config_param :use_journal, :bool, default: nil
76
- # Field 2 is the container_hash, field 5 is the pod_id, and field 6 is the pod_randhex
77
- # I would have included them as named groups, but you can't have named groups that are
78
- # non-capturing :P
79
- # parse format is defined here: https://github.com/kubernetes/kubernetes/blob/release-1.6/pkg/kubelet/dockertools/docker.go#L317
80
- config_param :container_name_to_kubernetes_regexp,
81
- :string,
82
- default: '^(?<name_prefix>[^_]+)_(?<container_name>[^\._]+)(\.(?<container_hash>[^_]+))?_(?<pod_name>[^_]+)_(?<namespace>[^_]+)_[^_]+_[^_]+$'
83
65
 
84
66
  config_param :annotation_match, :array, default: []
85
67
  config_param :stats_interval, :integer, default: 30
@@ -188,14 +170,11 @@ module Fluent::Plugin
188
170
 
189
171
  require 'kubeclient'
190
172
  require 'lru_redux'
191
- @stats = KubernetesMetadata::Stats.new
192
-
193
- if @de_dot && @de_dot_separator.include?('.')
194
- raise Fluent::ConfigError, "Invalid de_dot_separator: cannot be or contain '.'"
195
- end
196
173
 
197
- if @de_slash && @de_slash_separator.include?('/')
198
- raise Fluent::ConfigError, "Invalid de_slash_separator: cannot be or contain '/'"
174
+ @stats = KubernetesMetadata::Stats.new
175
+ if @stats_interval <= 0
176
+ @stats = KubernetesMetadata::NoOpStats.new
177
+ self.define_singleton_method(:dump_stats) {}
199
178
  end
200
179
 
201
180
  if @cache_ttl < 0
@@ -214,8 +193,6 @@ module Fluent::Plugin
214
193
 
215
194
  @tag_to_kubernetes_name_regexp_compiled = Regexp.compile(@tag_to_kubernetes_name_regexp)
216
195
 
217
- @container_name_to_kubernetes_regexp_compiled = Regexp.compile(@container_name_to_kubernetes_regexp)
218
-
219
196
  # Use Kubernetes default service account if we're in a pod.
220
197
  if @kubernetes_url.nil?
221
198
  log.debug 'Kubernetes URL is not set - inspecting environ'
@@ -304,10 +281,6 @@ module Fluent::Plugin
304
281
  namespace_thread.abort_on_exception = true
305
282
  end
306
283
  end
307
- @time_fields = []
308
- @time_fields.push('_SOURCE_REALTIME_TIMESTAMP', '__REALTIME_TIMESTAMP') if @use_journal || @use_journal.nil?
309
- @time_fields.push('time') unless @use_journal
310
- @time_fields.push('@timestamp') if @lookup_from_k8s_field
311
284
 
312
285
  @annotations_regexps = []
313
286
  @annotation_match.each do |regexp|
@@ -356,7 +329,7 @@ module Fluent::Plugin
356
329
  end
357
330
 
358
331
  def filter(tag, time, record)
359
- tag_match_data = tag.match(@tag_to_kubernetes_name_regexp_compiled) unless @use_journal
332
+ tag_match_data = tag.match(@tag_to_kubernetes_name_regexp_compiled)
360
333
  batch_miss_cache = {}
361
334
  if tag_match_data
362
335
  cache_key = if tag_match_data.names.include?('pod_uuid') && !tag_match_data['pod_uuid'].nil?
@@ -365,13 +338,8 @@ module Fluent::Plugin
365
338
  tag_match_data['docker_id']
366
339
  end
367
340
  docker_id = tag_match_data.names.include?('docker_id') ? tag_match_data['docker_id'] : nil
368
- tag_metadata = get_metadata_for_record(tag_match_data['namespace'], tag_match_data['pod_name'], tag_match_data['container_name'],
369
- cache_key, create_time_from_record(record, time), batch_miss_cache, docker_id)
370
- end
371
- metadata = Marshal.load(Marshal.dump(tag_metadata)) if tag_metadata
372
- if (@use_journal || @use_journal.nil?) &&
373
- (j_metadata = get_metadata_for_journal_record(record, time, batch_miss_cache))
374
- metadata = j_metadata
341
+ metadata = get_metadata_for_record(tag_match_data['namespace'], tag_match_data['pod_name'], tag_match_data['container_name'],
342
+ cache_key, time, batch_miss_cache, docker_id)
375
343
  end
376
344
  if @lookup_from_k8s_field && record.key?('kubernetes') && record.key?('docker') &&
377
345
  record['kubernetes'].respond_to?(:has_key?) && record['docker'].respond_to?(:has_key?) &&
@@ -381,50 +349,13 @@ module Fluent::Plugin
381
349
  record['docker'].key?('container_id') &&
382
350
  (k_metadata = get_metadata_for_record(record['kubernetes']['namespace_name'], record['kubernetes']['pod_name'],
383
351
  record['kubernetes']['container_name'], record['docker']['container_id'],
384
- create_time_from_record(record, time), batch_miss_cache, record['docker']['container_id']))
352
+ time, batch_miss_cache, record['docker']['container_id']))
385
353
  metadata = k_metadata
386
354
  end
355
+ dump_stats
387
356
  metadata ? record.merge(metadata) : record
388
357
  end
389
358
 
390
- def get_metadata_for_journal_record(record, time, batch_miss_cache)
391
- metadata = nil
392
- if record.key?('CONTAINER_NAME') && record.key?('CONTAINER_ID_FULL')
393
- metadata = record['CONTAINER_NAME'].match(@container_name_to_kubernetes_regexp_compiled) do |match_data|
394
- get_metadata_for_record(match_data['namespace'], match_data['pod_name'], match_data['container_name'],
395
- record['CONTAINER_ID_FULL'], create_time_from_record(record, time), batch_miss_cache, record['CONTAINER_ID_FULL'])
396
- end
397
- unless metadata
398
- log.debug "Error: could not match CONTAINER_NAME from record #{record}"
399
- @stats.bump(:container_name_match_failed)
400
- end
401
- elsif record.key?('CONTAINER_NAME') && record['CONTAINER_NAME'].start_with?('k8s_')
402
- log.debug "Error: no container name and id in record #{record}"
403
- @stats.bump(:container_name_id_missing)
404
- end
405
- metadata
406
- end
407
-
408
- def de_dot!(h)
409
- h.keys.each do |ref|
410
- next unless h[ref] && ref =~ /\./
411
-
412
- v = h.delete(ref)
413
- newref = ref.to_s.gsub('.', @de_dot_separator)
414
- h[newref] = v
415
- end
416
- end
417
-
418
- def de_slash!(h)
419
- h.keys.each do |ref|
420
- next unless h[ref] && ref =~ /\//
421
-
422
- v = h.delete(ref)
423
- newref = ref.to_s.gsub('/', @de_slash_separator)
424
- h[newref] = v
425
- end
426
- end
427
-
428
359
  # copied from activesupport
429
360
  def present?(object)
430
361
  object.respond_to?(:empty?) ? !object.empty? : !!object
@@ -20,7 +20,7 @@
20
20
  #
21
21
  module KubernetesMetadata
22
22
  module CacheStrategy
23
- def get_pod_metadata(key, namespace_name, pod_name, record_create_time, batch_miss_cache)
23
+ def get_pod_metadata(key, namespace_name, pod_name, time, batch_miss_cache)
24
24
  metadata = {}
25
25
  ids = @id_cache[key]
26
26
  if ids.nil?
@@ -44,7 +44,7 @@ module KubernetesMetadata
44
44
  # pod not found, but namespace found
45
45
  @stats.bump(:id_cache_pod_not_found_namespace)
46
46
  ns_time = Time.parse(namespace_metadata['creation_timestamp'])
47
- if ns_time <= record_create_time
47
+ if ns_time <= Time.at(time.to_f)
48
48
  # namespace is older then record for pod
49
49
  ids[:pod_id] = key
50
50
  metadata = @cache.fetch(ids[:pod_id]) do
@@ -43,14 +43,7 @@ module KubernetesMetadata
43
43
  labels = syms_to_strs(namespace_object[:metadata][:labels].to_h) unless @skip_labels
44
44
 
45
45
  annotations = match_annotations(syms_to_strs(namespace_object[:metadata][:annotations].to_h))
46
- if @de_dot
47
- de_dot!(labels) unless @skip_labels
48
- de_dot!(annotations)
49
- end
50
- if @de_slash
51
- de_slash!(labels) unless @skip_labels
52
- de_slash!(annotations)
53
- end
46
+
54
47
  kubernetes_metadata = {
55
48
  'namespace_id' => namespace_object[:metadata][:uid],
56
49
  'creation_timestamp' => namespace_object[:metadata][:creationTimestamp]
@@ -65,14 +58,6 @@ module KubernetesMetadata
65
58
  labels = syms_to_strs(pod_object[:metadata][:labels].to_h) unless @skip_labels
66
59
 
67
60
  annotations = match_annotations(syms_to_strs(pod_object[:metadata][:annotations].to_h))
68
- if @de_dot
69
- de_dot!(labels) unless @skip_labels
70
- de_dot!(annotations)
71
- end
72
- if @de_slash
73
- de_slash!(labels) unless @skip_labels
74
- de_slash!(annotations)
75
- end
76
61
 
77
62
  # collect container information
78
63
  container_meta = {}
@@ -43,4 +43,20 @@ module KubernetesMetadata
43
43
  end.join(', ')
44
44
  end
45
45
  end
46
+ class NoOpStats
47
+ def initialize
48
+ end
49
+
50
+ def bump(key)
51
+ end
52
+
53
+ def set(key, value)
54
+ end
55
+
56
+ def [](key)
57
+ end
58
+
59
+ def to_s
60
+ end
61
+ end
46
62
  end
@@ -18,26 +18,6 @@
18
18
  # See the License for the specific language governing permissions and
19
19
  # limitations under the License.
20
20
  #
21
- module KubernetesMetadata
22
- module Util
23
- def create_time_from_record(record, internal_time)
24
- time_key = @time_fields.detect { |ii| record.key?(ii) }
25
- time = record[time_key]
26
- if time.nil? || time.is_a?(String) && time.chop.empty?
27
- # `internal_time` is a Fluent::EventTime, it can't compare with Time.
28
- return Time.at(internal_time.to_f)
29
- end
30
-
31
- if ['_SOURCE_REALTIME_TIMESTAMP', '__REALTIME_TIMESTAMP'].include?(time_key)
32
- timei = time.to_i
33
- return Time.at(timei / 1_000_000, timei % 1_000_000)
34
- end
35
- return Time.at(time) if time.is_a?(Numeric)
36
-
37
- Time.parse(time)
38
- end
39
- end
40
- end
41
21
 
42
22
  #https://stackoverflow.com/questions/5622435/how-do-i-convert-a-ruby-class-name-to-a-underscore-delimited-symbol
43
23
  class String