fluent-plugin-sampling-filter 1.1.0 → 1.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 649911c9296c2b6a15a2a4a24a9762a577be6c40
4
- data.tar.gz: 2449647d5f8aedc8516d8dc3777aad753a956262
2
+ SHA256:
3
+ metadata.gz: 2830d5959dab3375606dd4733b95b1ecc4e7cec666791937a708791b6e841bf6
4
+ data.tar.gz: 0767a563091fa6674f7f37158857decdaef378c92b181c14054ccd8a183c6586
5
5
  SHA512:
6
- metadata.gz: 30583bdb0e00e4e99f4eccc342fa52989c652b88e0f9cfc2eab0c214e67620f826fc4e231ca02635b66f36d455854ec647bef7e2ad4e1856a8c81463260947c2
7
- data.tar.gz: cbcfb440312c6925cc2a7857f0c445265c12c60471fd5f1f99ff4a4828d46f5c60c686f5b17e8f3c35e72b6e9fefaf3f8580969423c01079b4bc47697d0642cf
6
+ metadata.gz: a65c1919e2e774326a3ef25e3f9348a530566434c2aca03803b5b63804b5aada9fa40ef3b4a1e137a00e6204b7f09f5d85d746f45c7f0a2e4177dbbe05afdb8b
7
+ data.tar.gz: c5602c44dc0dfd4007082e2c9138745423a1bed774ff0d453eed9e00c3b9941840fe6a3245b5cdd4b656fcd6960e014602c2a716a6d08653858dfb339cc07410
data/README.md CHANGED
@@ -2,7 +2,14 @@
2
2
 
3
3
  This is a [Fluentd](http://fluentd.org) plugin to sample matching messages to analyse and report messages behavior and emit sampled messages with modified tag.
4
4
 
5
- * sampling rate per tags, or for all
5
+ * sampling rate per tags, message field, or all
6
+
7
+ ## Requirements
8
+
9
+ | fluent-plugin-sampling-filter | fluentd | ruby |
10
+ |-------------------------------|------------|--------|
11
+ | >= 1.0.0 | >= v0.14.0 | >= 2.1 |
12
+ | < 1.0.0 | < v0.14.0 | >= 1.9 |
6
13
 
7
14
  ## Configuration
8
15
 
@@ -14,13 +21,14 @@ This filter passes a specified part of whole events to following filter/output p
14
21
  @type any_great_input
15
22
  @label @mydata
16
23
  </source>
17
-
24
+
18
25
  <label @mydata>
19
26
  <filter **>
20
27
  @type sampling
28
+ sample_unit all
21
29
  interval 10 # pass 1/10 events to following plugins
22
30
  </filter>
23
-
31
+
24
32
  <match **>
25
33
  @type ...
26
34
  </match>
@@ -32,14 +40,35 @@ Sampling is done for all events, but we can do it per matched tags:
32
40
  @type any_great_input
33
41
  @label @mydata
34
42
  </source>
35
-
43
+
36
44
  <label @mydata>
37
45
  <filter **>
38
46
  @type sampling
39
47
  interval 10
40
48
  sample_unit tag # 1/10 events for each tags
41
49
  </filter>
42
-
50
+
51
+ <match **>
52
+ @type ...
53
+ </match>
54
+ </label>
55
+
56
+
57
+ We can also sample based on a value in the message
58
+
59
+ <source>
60
+ @type any_great_input
61
+ @label @mydata
62
+ </source>
63
+
64
+ <label @mydata>
65
+ <filter **>
66
+ @type sampling
67
+ interval 10
68
+ # pass 1/10 events per user given events like: { user: { name: "Bob" }, ... }
69
+ sample_unit $.user.name
70
+ </filter>
71
+
43
72
  <match **>
44
73
  @type ...
45
74
  </match>
@@ -47,6 +76,9 @@ Sampling is done for all events, but we can do it per matched tags:
47
76
 
48
77
  `minimum_rate_per_min` option(integer) configures this plugin to pass events with the specified rate even how small is the total number of whole events.
49
78
 
79
+ `sample_unit` option(string) configures this plugin to sample data based on tag(default), 'all', or by field value
80
+ using the [record accessor syntax](https://docs.fluentd.org/plugin-helper-overview/api-plugin-helper-record_accessor).
81
+
50
82
  ### SamplingFilterOutput
51
83
 
52
84
  **NOTE: This plugin is deprecated. Use filter plugin instead.**
@@ -58,7 +90,7 @@ Pickup 1/10 messages about each tags(default: `sample_unit tag`), and add tag pr
58
90
  interval 10
59
91
  add_prefix sampled
60
92
  </match>
61
-
93
+
62
94
  <match sampled.**>
63
95
  # output configurations where to send sampled messages
64
96
  </match>
@@ -72,7 +104,7 @@ Pickup 1/100 messages of all matched messages, and modify tags from `input.**` t
72
104
  remove_prefix input
73
105
  add_prefix output
74
106
  </match>
75
-
107
+
76
108
  <match sampled.**>
77
109
  # output configurations where to send sampled messages
78
110
  </match>
@@ -2,7 +2,7 @@
2
2
 
3
3
  Gem::Specification.new do |gem|
4
4
  gem.name = "fluent-plugin-sampling-filter"
5
- gem.version = "1.1.0"
5
+ gem.version = "1.2.0"
6
6
  gem.authors = ["TAGOMORI Satoshi"]
7
7
  gem.email = ["tagomoris@gmail.com"]
8
8
  gem.description = %q{fluentd plugin to pickup sample data from matched massages}
@@ -6,14 +6,17 @@ class Fluent::Plugin::SamplingFilter < Fluent::Plugin::Filter
6
6
  Fluent::Plugin.register_filter('sampling_filter', self)
7
7
 
8
8
  config_param :interval, :integer
9
- config_param :sample_unit, :enum, list: [:tag, :all], default: :tag
9
+ config_param :sample_unit, :string, default: 'tag'
10
10
  config_param :minimum_rate_per_min, :integer, default: nil
11
11
 
12
+ helpers :record_accessor
13
+
12
14
  def configure(conf)
13
15
  super
14
16
 
15
17
  @counts = {}
16
18
  @resets = {} if @minimum_rate_per_min
19
+ @accessor = record_accessor_create(@sample_unit) unless %w(all tag).include?(@sample_unit)
17
20
  end
18
21
 
19
22
  # Access to @counts SHOULD be protected by mutex, with a heavy penalty.
@@ -22,7 +25,7 @@ class Fluent::Plugin::SamplingFilter < Fluent::Plugin::Filter
22
25
  # then i let here as it is now.
23
26
 
24
27
  def filter(tag, _time, record)
25
- t = @sample_unit == :all ? 'all' : tag
28
+ t = record_key(tag, record)
26
29
  if @minimum_rate_per_min
27
30
  filter_with_minimum_rate(t, record)
28
31
  else
@@ -54,4 +57,15 @@ class Fluent::Plugin::SamplingFilter < Fluent::Plugin::Filter
54
57
  nil
55
58
  end
56
59
  end
60
+
61
+ def record_key(tag, record)
62
+ case @sample_unit
63
+ when 'all'
64
+ 'all'
65
+ when 'tag'
66
+ tag
67
+ else
68
+ @accessor.call(record)
69
+ end
70
+ end
57
71
  end
@@ -24,14 +24,21 @@ class SamplingFilterTest < Test::Unit::TestCase
24
24
  ]
25
25
 
26
26
  assert_equal 5, d.instance.interval
27
- assert_equal :tag, d.instance.sample_unit
27
+ assert_equal 'tag', d.instance.sample_unit
28
28
 
29
29
  d = create_driver %[
30
30
  interval 1000
31
31
  sample_unit all
32
32
  ]
33
33
  assert_equal 1000, d.instance.interval
34
- assert_equal :all, d.instance.sample_unit
34
+ assert_equal 'all', d.instance.sample_unit
35
+
36
+ d = create_driver %[
37
+ interval 1000
38
+ sample_unit $fake
39
+ ]
40
+ assert_equal 1000, d.instance.interval
41
+ assert_equal "$fake", d.instance.sample_unit
35
42
  end
36
43
 
37
44
  def test_filter
@@ -123,4 +130,57 @@ minimum_rate_per_min 10
123
130
  assert_equal ((1..10).map(&:to_i)+[20,30]), filtered.map{|_time,r| r['times']}
124
131
  assert_equal (['x']*12), filtered.map{|_time,r| r['data']}
125
132
  end
133
+
134
+ def test_filer_with_record_accessor
135
+ d2 = create_driver(%[
136
+ interval 3
137
+ sample_unit field3
138
+ ])
139
+ time = Time.parse("2012-01-02 13:14:15").to_i
140
+ d2.run(default_tag: 'input.hoge2') do
141
+ (1..12).each do |i|
142
+ [1,2].each do |sample_vaule|
143
+ d2.feed({'field1' => "record#{i}", 'field2' => i, 'field3' => sample_vaule})
144
+ end
145
+ end
146
+ end
147
+ filtered = d2.filtered
148
+ assert_equal 8, filtered.length
149
+
150
+ assert_equal 'record3', filtered[0][1]['field1']
151
+ assert_equal 1, filtered[0][1]['field3']
152
+ assert_equal 'record3', filtered[1][1]['field1']
153
+ assert_equal 2, filtered[1][1]['field3']
154
+ assert_equal 'record6', filtered[2][1]['field1']
155
+ assert_equal 1, filtered[2][1]['field3']
156
+ assert_equal 'record6', filtered[3][1]['field1']
157
+ assert_equal 2, filtered[3][1]['field3']
158
+ assert_equal 'record9', filtered[4][1]['field1']
159
+ assert_equal 1, filtered[4][1]['field3']
160
+ assert_equal 'record9', filtered[5][1]['field1']
161
+ assert_equal 2, filtered[5][1]['field3']
162
+ assert_equal 'record12', filtered[6][1]['field1']
163
+ assert_equal 1, filtered[6][1]['field3']
164
+ assert_equal 'record12', filtered[7][1]['field1']
165
+ assert_equal 2, filtered[7][1]['field3']
166
+ end
167
+
168
+ def test_filter_all
169
+ config = %[
170
+ interval 10
171
+ sample_unit all
172
+ ]
173
+ d1 = create_driver(config)
174
+ time = Time.parse("2012-01-02 13:14:15").to_i
175
+ d1.run do
176
+ 6.times do |i|
177
+ [0,1].each do |j|
178
+ d1.feed("input.hoge#{2*i+j}", time, {'field1' => "record#{2*i+j+1}"})
179
+ end
180
+ end
181
+ end
182
+ filtered = d1.filtered
183
+ assert_equal 1, filtered.length
184
+ assert_equal 'record10', filtered[0][1]['field1']
185
+ end
126
186
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-sampling-filter
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.0
4
+ version: 1.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - TAGOMORI Satoshi
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-02-01 00:00:00.000000000 Z
11
+ date: 2021-06-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -83,7 +83,7 @@ homepage: https://github.com/tagomoris/fluent-plugin-sampling-filter
83
83
  licenses:
84
84
  - Apache-2.0
85
85
  metadata: {}
86
- post_install_message:
86
+ post_install_message:
87
87
  rdoc_options: []
88
88
  require_paths:
89
89
  - lib
@@ -98,9 +98,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
98
98
  - !ruby/object:Gem::Version
99
99
  version: '0'
100
100
  requirements: []
101
- rubyforge_project:
102
- rubygems_version: 2.6.8
103
- signing_key:
101
+ rubygems_version: 3.2.3
102
+ signing_key:
104
103
  specification_version: 4
105
104
  summary: fluentd plugin to pickup sample data from matched massages
106
105
  test_files: