RubyGems - fluent-plugin-sampling-filter - Versions diffs - 1.1.0 → 1.2.0 - Mend

fluent-plugin-sampling-filter 1.1.0 → 1.2.0

Files changed (6) hide show

checksums.yaml +5 -5
data/README.md +39 -7
data/fluent-plugin-sampling-filter.gemspec +1 -1
data/lib/fluent/plugin/filter_sampling.rb +16 -2
data/test/plugin/test_filter_sampling.rb +62 -2
metadata +6 -7

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
-SHA1:
-  metadata.gz: 649911c9296c2b6a15a2a4a24a9762a577be6c40
-  data.tar.gz: 2449647d5f8aedc8516d8dc3777aad753a956262
+SHA256:
+  metadata.gz: 2830d5959dab3375606dd4733b95b1ecc4e7cec666791937a708791b6e841bf6
+  data.tar.gz: 0767a563091fa6674f7f37158857decdaef378c92b181c14054ccd8a183c6586
 SHA512:
-  metadata.gz: 30583bdb0e00e4e99f4eccc342fa52989c652b88e0f9cfc2eab0c214e67620f826fc4e231ca02635b66f36d455854ec647bef7e2ad4e1856a8c81463260947c2
-  data.tar.gz: cbcfb440312c6925cc2a7857f0c445265c12c60471fd5f1f99ff4a4828d46f5c60c686f5b17e8f3c35e72b6e9fefaf3f8580969423c01079b4bc47697d0642cf
+  metadata.gz: a65c1919e2e774326a3ef25e3f9348a530566434c2aca03803b5b63804b5aada9fa40ef3b4a1e137a00e6204b7f09f5d85d746f45c7f0a2e4177dbbe05afdb8b
+  data.tar.gz: c5602c44dc0dfd4007082e2c9138745423a1bed774ff0d453eed9e00c3b9941840fe6a3245b5cdd4b656fcd6960e014602c2a716a6d08653858dfb339cc07410

data/README.md CHANGED Viewed

@@ -2,7 +2,14 @@
 This is a [Fluentd](http://fluentd.org) plugin to sample matching messages to analyse and report messages behavior and emit sampled messages with modified tag.
-* sampling rate per tags, or for all
+* sampling rate per tags, message field, or all
+## Requirements
+| fluent-plugin-sampling-filter | fluentd    | ruby   |
+|-------------------------------|------------|--------|
+| >= 1.0.0                      | >= v0.14.0 | >= 2.1 |
+| <  1.0.0                      | <  v0.14.0 | >= 1.9 |
 ## Configuration
@@ -14,13 +21,14 @@ This filter passes a specified part of whole events to following filter/output p
       @type any_great_input
       @label @mydata
     </source>
     <label @mydata>
       <filter **>
         @type sampling
+        sample_unit all
         interval 10    # pass 1/10 events to following plugins
       </filter>
       <match **>
         @type ...
       </match>
@@ -32,14 +40,35 @@ Sampling is done for all events, but we can do it per matched tags:
       @type any_great_input
       @label @mydata
     </source>
     <label @mydata>
       <filter **>
         @type sampling
         interval 10
         sample_unit tag # 1/10 events for each tags
       </filter>
+      <match **>
+        @type ...
+      </match>
+    </label>
+We can also sample based on a value in the message
+    <source>
+      @type any_great_input
+      @label @mydata
+    </source>
+    <label @mydata>
+      <filter **>
+        @type sampling
+        interval 10
+        # pass 1/10 events per user given events like: { user: { name: "Bob" }, ... }
+        sample_unit $.user.name
+      </filter>
       <match **>
         @type ...
       </match>
@@ -47,6 +76,9 @@ Sampling is done for all events, but we can do it per matched tags:
 `minimum_rate_per_min` option(integer) configures this plugin to pass events with the specified rate even how small is the total number of whole events.
+`sample_unit` option(string) configures this plugin to sample data based on tag(default), 'all', or by field value
+using the [record accessor syntax](https://docs.fluentd.org/plugin-helper-overview/api-plugin-helper-record_accessor).
 ### SamplingFilterOutput
 **NOTE: This plugin is deprecated. Use filter plugin instead.**
@@ -58,7 +90,7 @@ Pickup 1/10 messages about each tags(default: `sample_unit tag`), and add tag pr
       interval 10
       add_prefix sampled
     </match>
     <match sampled.**>
       # output configurations where to send sampled messages
     </match>
@@ -72,7 +104,7 @@ Pickup 1/100 messages of all matched messages, and modify tags from `input.**` t
       remove_prefix input
       add_prefix output
     </match>
     <match sampled.**>
       # output configurations where to send sampled messages
     </match>

data/fluent-plugin-sampling-filter.gemspec CHANGED Viewed

@@ -2,7 +2,7 @@
 Gem::Specification.new do |gem|
   gem.name          = "fluent-plugin-sampling-filter"
-  gem.version       = "1.1.0"
+  gem.version       = "1.2.0"
   gem.authors       = ["TAGOMORI Satoshi"]
   gem.email         = ["tagomoris@gmail.com"]
   gem.description   = %q{fluentd plugin to pickup sample data from matched massages}

data/lib/fluent/plugin/filter_sampling.rb CHANGED Viewed

@@ -6,14 +6,17 @@ class Fluent::Plugin::SamplingFilter < Fluent::Plugin::Filter
   Fluent::Plugin.register_filter('sampling_filter', self)
   config_param :interval, :integer
-  config_param :sample_unit, :enum, list: [:tag, :all], default: :tag
+  config_param :sample_unit, :string, default: 'tag'
   config_param :minimum_rate_per_min, :integer, default: nil
+  helpers :record_accessor
   def configure(conf)
     super
     @counts = {}
     @resets = {} if @minimum_rate_per_min
+    @accessor = record_accessor_create(@sample_unit) unless %w(all tag).include?(@sample_unit)
   end
   # Access to @counts SHOULD be protected by mutex, with a heavy penalty.
@@ -22,7 +25,7 @@ class Fluent::Plugin::SamplingFilter < Fluent::Plugin::Filter
   # then i let here as it is now.
   def filter(tag, _time, record)
-    t = @sample_unit == :all ? 'all' : tag
+    t = record_key(tag, record)
     if @minimum_rate_per_min
       filter_with_minimum_rate(t, record)
     else
@@ -54,4 +57,15 @@ class Fluent::Plugin::SamplingFilter < Fluent::Plugin::Filter
       nil
     end
   end
+  def record_key(tag, record)
+    case @sample_unit
+    when 'all'
+      'all'
+    when 'tag'
+      tag
+    else
+      @accessor.call(record)
+    end
+  end
 end

data/test/plugin/test_filter_sampling.rb CHANGED Viewed

@@ -24,14 +24,21 @@ class SamplingFilterTest < Test::Unit::TestCase
     ]
     assert_equal 5, d.instance.interval
-    assert_equal :tag, d.instance.sample_unit
+    assert_equal 'tag', d.instance.sample_unit
     d = create_driver %[
       interval 1000
       sample_unit all
     ]
     assert_equal 1000, d.instance.interval
-    assert_equal :all, d.instance.sample_unit
+    assert_equal 'all', d.instance.sample_unit
+    d = create_driver %[
+      interval 1000
+      sample_unit $fake
+    ]
+    assert_equal 1000, d.instance.interval
+    assert_equal "$fake", d.instance.sample_unit
   end
   def test_filter
@@ -123,4 +130,57 @@ minimum_rate_per_min 10
     assert_equal ((1..10).map(&:to_i)+[20,30]), filtered.map{|_time,r| r['times']}
     assert_equal (['x']*12), filtered.map{|_time,r| r['data']}
   end
+  def test_filer_with_record_accessor
+    d2 = create_driver(%[
+      interval 3
+      sample_unit field3
+    ])
+    time = Time.parse("2012-01-02 13:14:15").to_i
+    d2.run(default_tag: 'input.hoge2') do
+      (1..12).each do |i|
+        [1,2].each do |sample_vaule|
+          d2.feed({'field1' => "record#{i}", 'field2' => i, 'field3' => sample_vaule})
+        end
+      end
+    end
+    filtered = d2.filtered
+    assert_equal 8, filtered.length
+    assert_equal 'record3', filtered[0][1]['field1']
+    assert_equal 1, filtered[0][1]['field3']
+    assert_equal 'record3', filtered[1][1]['field1']
+    assert_equal 2, filtered[1][1]['field3']
+    assert_equal 'record6', filtered[2][1]['field1']
+    assert_equal 1, filtered[2][1]['field3']
+    assert_equal 'record6', filtered[3][1]['field1']
+    assert_equal 2, filtered[3][1]['field3']
+    assert_equal 'record9', filtered[4][1]['field1']
+    assert_equal 1, filtered[4][1]['field3']
+    assert_equal 'record9', filtered[5][1]['field1']
+    assert_equal 2, filtered[5][1]['field3']
+    assert_equal 'record12', filtered[6][1]['field1']
+    assert_equal 1, filtered[6][1]['field3']
+    assert_equal 'record12', filtered[7][1]['field1']
+    assert_equal 2, filtered[7][1]['field3']
+  end
+  def test_filter_all
+    config = %[
+      interval 10
+      sample_unit all
+    ]
+    d1 = create_driver(config)
+    time = Time.parse("2012-01-02 13:14:15").to_i
+    d1.run do
+      6.times do |i|
+        [0,1].each do |j|
+          d1.feed("input.hoge#{2*i+j}", time, {'field1' => "record#{2*i+j+1}"})
+        end
+      end
+    end
+    filtered = d1.filtered
+    assert_equal 1, filtered.length
+    assert_equal 'record10', filtered[0][1]['field1']
+  end
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: fluent-plugin-sampling-filter
 version: !ruby/object:Gem::Version
-  version: 1.1.0
+  version: 1.2.0
 platform: ruby
 authors:
 - TAGOMORI Satoshi
-autorequire:
+autorequire:
 bindir: bin
 cert_chain: []
-date: 2017-02-01 00:00:00.000000000 Z
+date: 2021-06-16 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rake
@@ -83,7 +83,7 @@ homepage: https://github.com/tagomoris/fluent-plugin-sampling-filter
 licenses:
 - Apache-2.0
 metadata: {}
-post_install_message:
+post_install_message:
 rdoc_options: []
 require_paths:
 - lib
@@ -98,9 +98,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubyforge_project:
-rubygems_version: 2.6.8
-signing_key:
+rubygems_version: 3.2.3
+signing_key:
 specification_version: 4
 summary: fluentd plugin to pickup sample data from matched massages
 test_files: