RubyGems - fluent-plugin-sampling-filter - Versions diffs - 1.1.0 → 1.2.0 - Mend

fluent-plugin-sampling-filter 1.1.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

checksums.yaml +5 -5
data/README.md +39 -7
data/fluent-plugin-sampling-filter.gemspec +1 -1
data/lib/fluent/plugin/filter_sampling.rb +16 -2
data/test/plugin/test_filter_sampling.rb +62 -2
metadata +6 -7

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
-SHA1:
-  metadata.gz: 649911c9296c2b6a15a2a4a24a9762a577be6c40
-  data.tar.gz: 2449647d5f8aedc8516d8dc3777aad753a956262
+SHA256:
+  metadata.gz: 2830d5959dab3375606dd4733b95b1ecc4e7cec666791937a708791b6e841bf6
+  data.tar.gz: 0767a563091fa6674f7f37158857decdaef378c92b181c14054ccd8a183c6586
 SHA512:
-  metadata.gz: 30583bdb0e00e4e99f4eccc342fa52989c652b88e0f9cfc2eab0c214e67620f826fc4e231ca02635b66f36d455854ec647bef7e2ad4e1856a8c81463260947c2
-  data.tar.gz: cbcfb440312c6925cc2a7857f0c445265c12c60471fd5f1f99ff4a4828d46f5c60c686f5b17e8f3c35e72b6e9fefaf3f8580969423c01079b4bc47697d0642cf
+  metadata.gz: a65c1919e2e774326a3ef25e3f9348a530566434c2aca03803b5b63804b5aada9fa40ef3b4a1e137a00e6204b7f09f5d85d746f45c7f0a2e4177dbbe05afdb8b
+  data.tar.gz: c5602c44dc0dfd4007082e2c9138745423a1bed774ff0d453eed9e00c3b9941840fe6a3245b5cdd4b656fcd6960e014602c2a716a6d08653858dfb339cc07410

data/README.md CHANGED Viewed

@@ -2,7 +2,14 @@
 This is a [Fluentd](http://fluentd.org) plugin to sample matching messages to analyse and report messages behavior and emit sampled messages with modified tag.
-* sampling rate per tags, or for all
+* sampling rate per tags, message field, or all
+## Requirements
+| fluent-plugin-sampling-filter | fluentd    | ruby   |
+|-------------------------------|------------|--------|
+| >= 1.0.0                      | >= v0.14.0 | >= 2.1 |
+| <  1.0.0                      | <  v0.14.0 | >= 1.9 |
 ## Configuration
@@ -14,13 +21,14 @@ This filter passes a specified part of whole events to following filter/output p
       @type any_great_input
       @label @mydata
     </source>
     <label @mydata>
       <filter **>
         @type sampling
+        sample_unit all
         interval 10    # pass 1/10 events to following plugins
       </filter>
       <match **>
         @type ...
       </match>
@@ -32,14 +40,35 @@ Sampling is done for all events, but we can do it per matched tags:
       @type any_great_input
       @label @mydata
     </source>
     <label @mydata>
       <filter **>
         @type sampling
         interval 10
         sample_unit tag # 1/10 events for each tags
       </filter>
+      <match **>
+        @type ...
+      </match>
+    </label>
+We can also sample based on a value in the message
+    <source>
+      @type any_great_input
+      @label @mydata
+    </source>
+    <label @mydata>
+      <filter **>
+        @type sampling
+        interval 10
+        # pass 1/10 events per user given events like: { user: { name: "Bob" }, ... }
+        sample_unit $.user.name
+      </filter>
       <match **>
         @type ...
       </match>
@@ -47,6 +76,9 @@ Sampling is done for all events, but we can do it per matched tags:
 `minimum_rate_per_min` option(integer) configures this plugin to pass events with the specified rate even how small is the total number of whole events.
+`sample_unit` option(string) configures this plugin to sample data based on tag(default), 'all', or by field value
+using the [record accessor syntax](https://docs.fluentd.org/plugin-helper-overview/api-plugin-helper-record_accessor).
 ### SamplingFilterOutput
 **NOTE: This plugin is deprecated. Use filter plugin instead.**
@@ -58,7 +90,7 @@ Pickup 1/10 messages about each tags(default: `sample_unit tag`), and add tag pr
       interval 10
       add_prefix sampled
     </match>
     <match sampled.**>
       # output configurations where to send sampled messages
     </match>
@@ -72,7 +104,7 @@ Pickup 1/100 messages of all matched messages, and modify tags from `input.**` t
       remove_prefix input
       add_prefix output
     </match>
     <match sampled.**>
       # output configurations where to send sampled messages
     </match>

data/fluent-plugin-sampling-filter.gemspec CHANGED Viewed

@@ -2,7 +2,7 @@
 Gem::Specification.new do |gem|
   gem.name          = "fluent-plugin-sampling-filter"
-  gem.version       = "1.1.0"
+  gem.version       = "1.2.0"
   gem.authors       = ["TAGOMORI Satoshi"]
   gem.email         = ["tagomoris@gmail.com"]
   gem.description   = %q{fluentd plugin to pickup sample data from matched massages}

data/lib/fluent/plugin/filter_sampling.rb CHANGED Viewed

@@ -6,14 +6,17 @@ class Fluent::Plugin::SamplingFilter < Fluent::Plugin::Filter
   Fluent::Plugin.register_filter('sampling_filter', self)
   config_param :interval, :integer
-  config_param :sample_unit, :enum, list: [:tag, :all], default: :tag
+  config_param :sample_unit, :string, default: 'tag'
   config_param :minimum_rate_per_min, :integer, default: nil
+  helpers :record_accessor
   def configure(conf)
     super
     @counts = {}
     @resets = {} if @minimum_rate_per_min
+    @accessor = record_accessor_create(@sample_unit) unless %w(all tag).include?(@sample_unit)
   end
   # Access to @counts SHOULD be protected by mutex, with a heavy penalty.
@@ -22,7 +25,7 @@ class Fluent::Plugin::SamplingFilter < Fluent::Plugin::Filter
   # then i let here as it is now.
   def filter(tag, _time, record)
-    t = @sample_unit == :all ? 'all' : tag
+    t = record_key(tag, record)
     if @minimum_rate_per_min
       filter_with_minimum_rate(t, record)
     else
@@ -54,4 +57,15 @@ class Fluent::Plugin::SamplingFilter < Fluent::Plugin::Filter
       nil
     end
   end
+  def record_key(tag, record)
+    case @sample_unit
+    when 'all'
+      'all'
+    when 'tag'
+      tag
+    else
+      @accessor.call(record)
+    end
+  end
 end

data/test/plugin/test_filter_sampling.rb CHANGED Viewed

@@ -24,14 +24,21 @@ class SamplingFilterTest < Test::Unit::TestCase
     ]
     assert_equal 5, d.instance.interval
-    assert_equal :tag, d.instance.sample_unit
+    assert_equal 'tag', d.instance.sample_unit
     d = create_driver %[
       interval 1000
       sample_unit all
     ]
     assert_equal 1000, d.instance.interval
-    assert_equal :all, d.instance.sample_unit
+    assert_equal 'all', d.instance.sample_unit
+    d = create_driver %[
+      interval 1000
+      sample_unit $fake
+    ]
+    assert_equal 1000, d.instance.interval
+    assert_equal "$fake", d.instance.sample_unit
   end
   def test_filter
@@ -123,4 +130,57 @@ minimum_rate_per_min 10
     assert_equal ((1..10).map(&:to_i)+[20,30]), filtered.map{|_time,r| r['times']}
     assert_equal (['x']*12), filtered.map{|_time,r| r['data']}
   end
+  def test_filer_with_record_accessor
+    d2 = create_driver(%[
+      interval 3
+      sample_unit field3
+    ])
+    time = Time.parse("2012-01-02 13:14:15").to_i
+    d2.run(default_tag: 'input.hoge2') do
+      (1..12).each do |i|
+        [1,2].each do |sample_vaule|
+          d2.feed({'field1' => "record#{i}", 'field2' => i, 'field3' => sample_vaule})
+        end
+      end
+    end
+    filtered = d2.filtered
+    assert_equal 8, filtered.length
+    assert_equal 'record3', filtered[0][1]['field1']
+    assert_equal 1, filtered[0][1]['field3']
+    assert_equal 'record3', filtered[1][1]['field1']
+    assert_equal 2, filtered[1][1]['field3']
+    assert_equal 'record6', filtered[2][1]['field1']
+    assert_equal 1, filtered[2][1]['field3']
+    assert_equal 'record6', filtered[3][1]['field1']
+    assert_equal 2, filtered[3][1]['field3']
+    assert_equal 'record9', filtered[4][1]['field1']
+    assert_equal 1, filtered[4][1]['field3']
+    assert_equal 'record9', filtered[5][1]['field1']
+    assert_equal 2, filtered[5][1]['field3']
+    assert_equal 'record12', filtered[6][1]['field1']
+    assert_equal 1, filtered[6][1]['field3']
+    assert_equal 'record12', filtered[7][1]['field1']
+    assert_equal 2, filtered[7][1]['field3']
+  end
+  def test_filter_all
+    config = %[
+      interval 10
+      sample_unit all
+    ]
+    d1 = create_driver(config)
+    time = Time.parse("2012-01-02 13:14:15").to_i
+    d1.run do
+      6.times do |i|
+        [0,1].each do |j|
+          d1.feed("input.hoge#{2*i+j}", time, {'field1' => "record#{2*i+j+1}"})
+        end
+      end
+    end
+    filtered = d1.filtered
+    assert_equal 1, filtered.length
+    assert_equal 'record10', filtered[0][1]['field1']
+  end
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: fluent-plugin-sampling-filter
 version: !ruby/object:Gem::Version
-  version: 1.1.0
+  version: 1.2.0
 platform: ruby
 authors:
 - TAGOMORI Satoshi
-autorequire:
+autorequire:
 bindir: bin
 cert_chain: []
-date: 2017-02-01 00:00:00.000000000 Z
+date: 2021-06-16 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rake
@@ -83,7 +83,7 @@ homepage: https://github.com/tagomoris/fluent-plugin-sampling-filter
 licenses:
 - Apache-2.0
 metadata: {}
-post_install_message:
+post_install_message:
 rdoc_options: []
 require_paths:
 - lib
@@ -98,9 +98,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubyforge_project:
-rubygems_version: 2.6.8
-signing_key:
+rubygems_version: 3.2.3
+signing_key:
 specification_version: 4
 summary: fluentd plugin to pickup sample data from matched massages
 test_files: