fluent-plugin-sampling-filter 0.1.3 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/fluent-plugin-sampling-filter.gemspec +4 -3
- data/lib/fluent/plugin/filter_sampling.rb +62 -0
- data/lib/fluent/plugin/out_sampling_filter.rb +1 -1
- data/test/helper.rb +1 -0
- data/test/plugin/test_filter_sampling.rb +129 -0
- data/test/plugin/test_out_sampling_filter.rb +1 -2
- metadata +30 -6
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA1:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: 5a5722d8c7acf064693b07707aec8ea21d9f2475
         | 
| 4 | 
            +
              data.tar.gz: a486b0d0db3cf4911ecb91721c694fceee9c345f
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: 80d72b30ff51aa9bc12a397782d19acecaf611c3a7f059c9aa831f2d2d489c9eb4a91a1761a579e2d7c801bf13bcdf01ba74fc35a04be7e3c503492e39afdce9
         | 
| 7 | 
            +
              data.tar.gz: 1c0320e23f30de131122a8ab249a7eedd9580361c246eca7a9010cf7d3d310ac843cd0dbe9acae971fc36f4828d188f34545807abdc62e02f83988649269f74f
         | 
| @@ -2,13 +2,13 @@ | |
| 2 2 |  | 
| 3 3 | 
             
            Gem::Specification.new do |gem|
         | 
| 4 4 | 
             
              gem.name          = "fluent-plugin-sampling-filter"
         | 
| 5 | 
            -
              gem.version       = "0. | 
| 5 | 
            +
              gem.version       = "0.2.0"
         | 
| 6 6 | 
             
              gem.authors       = ["TAGOMORI Satoshi"]
         | 
| 7 7 | 
             
              gem.email         = ["tagomoris@gmail.com"]
         | 
| 8 8 | 
             
              gem.description   = %q{fluentd plugin to pickup sample data from matched massages}
         | 
| 9 9 | 
             
              gem.summary       = %q{fluentd plugin to pickup sample data from matched massages}
         | 
| 10 10 | 
             
              gem.homepage      = "https://github.com/tagomoris/fluent-plugin-sampling-filter"
         | 
| 11 | 
            -
              gem.license       = " | 
| 11 | 
            +
              gem.license       = "Apache-2.0"
         | 
| 12 12 |  | 
| 13 13 | 
             
              gem.files         = `git ls-files`.split($\)
         | 
| 14 14 | 
             
              gem.executables   = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
         | 
| @@ -16,5 +16,6 @@ Gem::Specification.new do |gem| | |
| 16 16 | 
             
              gem.require_paths = ["lib"]
         | 
| 17 17 |  | 
| 18 18 | 
             
              gem.add_development_dependency "rake"
         | 
| 19 | 
            -
              gem.add_runtime_dependency " | 
| 19 | 
            +
              gem.add_runtime_dependency "test-unit", "~> 3.1.0"
         | 
| 20 | 
            +
              gem.add_runtime_dependency "fluentd", [">= 0.12.0", "< 2"]
         | 
| 20 21 | 
             
            end
         | 
| @@ -0,0 +1,62 @@ | |
| 1 | 
            +
            class Fluent::SamplingFilter < Fluent::Filter
         | 
| 2 | 
            +
              Fluent::Plugin.register_filter('sampling_filter', self)
         | 
| 3 | 
            +
             | 
| 4 | 
            +
              config_param :interval, :integer
         | 
| 5 | 
            +
              config_param :sample_unit, :string, default: 'tag'
         | 
| 6 | 
            +
              config_param :minimum_rate_per_min, :integer, default: nil
         | 
| 7 | 
            +
             | 
| 8 | 
            +
              def configure(conf)
         | 
| 9 | 
            +
                super
         | 
| 10 | 
            +
             | 
| 11 | 
            +
                @sample_unit = case @sample_unit
         | 
| 12 | 
            +
                               when 'tag'
         | 
| 13 | 
            +
                                 :tag
         | 
| 14 | 
            +
                               when 'all'
         | 
| 15 | 
            +
                                 :all
         | 
| 16 | 
            +
                               else
         | 
| 17 | 
            +
                                 raise Fluent::ConfigError, "sample_unit allows only 'tag' or 'all'"
         | 
| 18 | 
            +
                               end
         | 
| 19 | 
            +
                @counts = {}
         | 
| 20 | 
            +
                @resets = {} if @minimum_rate_per_min
         | 
| 21 | 
            +
              end
         | 
| 22 | 
            +
             | 
| 23 | 
            +
              def filter_stream(tag, es)
         | 
| 24 | 
            +
                t = if @sample_unit == :all
         | 
| 25 | 
            +
                      'all'
         | 
| 26 | 
            +
                    else
         | 
| 27 | 
            +
                      tag
         | 
| 28 | 
            +
                    end
         | 
| 29 | 
            +
             | 
| 30 | 
            +
                new_es = Fluent::MultiEventStream.new
         | 
| 31 | 
            +
             | 
| 32 | 
            +
                # Access to @counts SHOULD be protected by mutex, with a heavy penalty.
         | 
| 33 | 
            +
                # Code below is not thread safe, but @counts (counter for sampling rate) is not
         | 
| 34 | 
            +
                # so serious value (and probably will not be broken...),
         | 
| 35 | 
            +
                # then i let here as it is now.
         | 
| 36 | 
            +
                if @minimum_rate_per_min
         | 
| 37 | 
            +
                  unless @resets[t]
         | 
| 38 | 
            +
                    @resets[t] = Fluent::Engine.now + (60 - rand(30))
         | 
| 39 | 
            +
                  end
         | 
| 40 | 
            +
                  if Fluent::Engine.now > @resets[t]
         | 
| 41 | 
            +
                    @resets[t] = Fluent::Engine.now + 60
         | 
| 42 | 
            +
                    @counts[t] = 0
         | 
| 43 | 
            +
                  end
         | 
| 44 | 
            +
                  es.each do |time,record|
         | 
| 45 | 
            +
                    c = (@counts[t] = @counts.fetch(t, 0) + 1)
         | 
| 46 | 
            +
                    if c < @minimum_rate_per_min or c % @interval == 0
         | 
| 47 | 
            +
                      new_es.add(time, record.dup)
         | 
| 48 | 
            +
                    end
         | 
| 49 | 
            +
                  end
         | 
| 50 | 
            +
                else
         | 
| 51 | 
            +
                  es.each do |time,record|
         | 
| 52 | 
            +
                    c = (@counts[t] = @counts.fetch(t, 0) + 1)
         | 
| 53 | 
            +
                    if c % @interval == 0
         | 
| 54 | 
            +
                      new_es.add(time, record.dup)
         | 
| 55 | 
            +
                      # reset only just before @counts[t] is to be bignum from fixnum
         | 
| 56 | 
            +
                      @counts[t] = 0 if c > 0x6fffffff
         | 
| 57 | 
            +
                    end
         | 
| 58 | 
            +
                  end
         | 
| 59 | 
            +
                end
         | 
| 60 | 
            +
                new_es
         | 
| 61 | 
            +
              end
         | 
| 62 | 
            +
            end
         | 
    
        data/test/helper.rb
    CHANGED
    
    | @@ -13,6 +13,7 @@ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib')) | |
| 13 13 | 
             
            $LOAD_PATH.unshift(File.dirname(__FILE__))
         | 
| 14 14 | 
             
            require 'fluent/test'
         | 
| 15 15 | 
             
            require 'fluent/plugin/out_sampling_filter'
         | 
| 16 | 
            +
            require 'fluent/plugin/filter_sampling'
         | 
| 16 17 |  | 
| 17 18 | 
             
            class Test::Unit::TestCase
         | 
| 18 19 | 
             
            end
         | 
| @@ -0,0 +1,129 @@ | |
| 1 | 
            +
            require 'helper'
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            class SamplingFilterTest < Test::Unit::TestCase
         | 
| 4 | 
            +
              def setup
         | 
| 5 | 
            +
                Fluent::Test.setup
         | 
| 6 | 
            +
              end
         | 
| 7 | 
            +
             | 
| 8 | 
            +
              CONFIG = %[
         | 
| 9 | 
            +
                interval 10
         | 
| 10 | 
            +
                sample_unit tag
         | 
| 11 | 
            +
              ]
         | 
| 12 | 
            +
             | 
| 13 | 
            +
              def create_driver(conf=CONFIG,tag='test')
         | 
| 14 | 
            +
                Fluent::Test::FilterTestDriver.new(Fluent::SamplingFilter, tag).configure(conf)
         | 
| 15 | 
            +
              end
         | 
| 16 | 
            +
             | 
| 17 | 
            +
              def test_configure
         | 
| 18 | 
            +
                assert_raise(Fluent::ConfigError) {
         | 
| 19 | 
            +
                  d = create_driver('')
         | 
| 20 | 
            +
                }
         | 
| 21 | 
            +
                d = create_driver %[
         | 
| 22 | 
            +
                  interval 5
         | 
| 23 | 
            +
                ]
         | 
| 24 | 
            +
             | 
| 25 | 
            +
                assert_equal 5, d.instance.interval
         | 
| 26 | 
            +
                assert_equal :tag, d.instance.sample_unit
         | 
| 27 | 
            +
             | 
| 28 | 
            +
                d = create_driver %[
         | 
| 29 | 
            +
                  interval 1000
         | 
| 30 | 
            +
                  sample_unit all
         | 
| 31 | 
            +
                ]
         | 
| 32 | 
            +
                assert_equal 1000, d.instance.interval
         | 
| 33 | 
            +
                assert_equal :all, d.instance.sample_unit
         | 
| 34 | 
            +
              end
         | 
| 35 | 
            +
             | 
| 36 | 
            +
              def test_filter
         | 
| 37 | 
            +
                d1 = create_driver(CONFIG, 'input.hoge1')
         | 
| 38 | 
            +
                time = Time.parse("2012-01-02 13:14:15").to_i
         | 
| 39 | 
            +
                d1.run do
         | 
| 40 | 
            +
                  d1.filter({'field1' => 'record1', 'field2' => 1})
         | 
| 41 | 
            +
                  d1.filter({'field1' => 'record2', 'field2' => 2})
         | 
| 42 | 
            +
                  d1.filter({'field1' => 'record3', 'field2' => 3})
         | 
| 43 | 
            +
                  d1.filter({'field1' => 'record4', 'field2' => 4})
         | 
| 44 | 
            +
                  d1.filter({'field1' => 'record5', 'field2' => 5})
         | 
| 45 | 
            +
                  d1.filter({'field1' => 'record6', 'field2' => 6})
         | 
| 46 | 
            +
                  d1.filter({'field1' => 'record7', 'field2' => 7})
         | 
| 47 | 
            +
                  d1.filter({'field1' => 'record8', 'field2' => 8})
         | 
| 48 | 
            +
                  d1.filter({'field1' => 'record9', 'field2' => 9})
         | 
| 49 | 
            +
                  d1.filter({'field1' => 'record10', 'field2' => 10})
         | 
| 50 | 
            +
                  d1.filter({'field1' => 'record11', 'field2' => 11})
         | 
| 51 | 
            +
                  d1.filter({'field1' => 'record12', 'field2' => 12})
         | 
| 52 | 
            +
                end
         | 
| 53 | 
            +
                filtered = d1.filtered_as_array
         | 
| 54 | 
            +
                assert_equal 1, filtered.length
         | 
| 55 | 
            +
                assert_equal 'input.hoge1', filtered[0][0] # tag
         | 
| 56 | 
            +
                assert_equal 'record10', filtered[0][2]['field1']
         | 
| 57 | 
            +
                assert_equal 10, filtered[0][2]['field2']
         | 
| 58 | 
            +
             | 
| 59 | 
            +
                d2 = create_driver(%[
         | 
| 60 | 
            +
                  interval 3
         | 
| 61 | 
            +
                ], 'input.hoge2')
         | 
| 62 | 
            +
                time = Time.parse("2012-01-02 13:14:15").to_i
         | 
| 63 | 
            +
                d2.run do
         | 
| 64 | 
            +
                  d2.filter({'field1' => 'record1', 'field2' => 1})
         | 
| 65 | 
            +
                  d2.filter({'field1' => 'record2', 'field2' => 2})
         | 
| 66 | 
            +
                  d2.filter({'field1' => 'record3', 'field2' => 3})
         | 
| 67 | 
            +
                  d2.filter({'field1' => 'record4', 'field2' => 4})
         | 
| 68 | 
            +
                  d2.filter({'field1' => 'record5', 'field2' => 5})
         | 
| 69 | 
            +
                  d2.filter({'field1' => 'record6', 'field2' => 6})
         | 
| 70 | 
            +
                  d2.filter({'field1' => 'record7', 'field2' => 7})
         | 
| 71 | 
            +
                  d2.filter({'field1' => 'record8', 'field2' => 8})
         | 
| 72 | 
            +
                  d2.filter({'field1' => 'record9', 'field2' => 9})
         | 
| 73 | 
            +
                  d2.filter({'field1' => 'record10', 'field2' => 10})
         | 
| 74 | 
            +
                  d2.filter({'field1' => 'record11', 'field2' => 11})
         | 
| 75 | 
            +
                  d2.filter({'field1' => 'record12', 'field2' => 12})
         | 
| 76 | 
            +
                end
         | 
| 77 | 
            +
                filtered = d2.filtered_as_array
         | 
| 78 | 
            +
                assert_equal 4, filtered.length
         | 
| 79 | 
            +
                assert_equal 'input.hoge2', filtered[0][0] # tag
         | 
| 80 | 
            +
             | 
| 81 | 
            +
                assert_equal 'record3', filtered[0][2]['field1']
         | 
| 82 | 
            +
                assert_equal 'record6', filtered[1][2]['field1']
         | 
| 83 | 
            +
                assert_equal 'record9', filtered[2][2]['field1']
         | 
| 84 | 
            +
                assert_equal 'record12', filtered[3][2]['field1']
         | 
| 85 | 
            +
              end
         | 
| 86 | 
            +
             | 
| 87 | 
            +
              def test_filter_minimum_rate
         | 
| 88 | 
            +
                config = %[
         | 
| 89 | 
            +
            interval 10
         | 
| 90 | 
            +
            sample_unit tag
         | 
| 91 | 
            +
            minimum_rate_per_min 100
         | 
| 92 | 
            +
            ]
         | 
| 93 | 
            +
                d = create_driver(config, 'input.hoge3')
         | 
| 94 | 
            +
                time = Time.parse("2012-01-02 13:14:15").to_i
         | 
| 95 | 
            +
                d.run do
         | 
| 96 | 
            +
                  (1..100).each do |t|
         | 
| 97 | 
            +
                    d.filter({'times' => t, 'data' => 'x'})
         | 
| 98 | 
            +
                  end
         | 
| 99 | 
            +
                  (101..130).each do |t|
         | 
| 100 | 
            +
                    d.filter({'times' => t, 'data' => 'y'})
         | 
| 101 | 
            +
                  end
         | 
| 102 | 
            +
                end
         | 
| 103 | 
            +
                filtered = d.filtered_as_array
         | 
| 104 | 
            +
                assert_equal 103, filtered.length
         | 
| 105 | 
            +
                assert_equal 'input.hoge3', filtered[0][0]
         | 
| 106 | 
            +
                assert_equal ((1..100).map(&:to_i) + [110, 120, 130]), filtered.map{|t,time,r| r['times']}
         | 
| 107 | 
            +
                assert_equal (['x']*100 + ['y']*3), filtered.map{|t,time,r| r['data']}
         | 
| 108 | 
            +
              end
         | 
| 109 | 
            +
             | 
| 110 | 
            +
              def test_filter_minimum_rate_expire
         | 
| 111 | 
            +
                config = %[
         | 
| 112 | 
            +
            interval 10
         | 
| 113 | 
            +
            sample_unit tag
         | 
| 114 | 
            +
            minimum_rate_per_min 10
         | 
| 115 | 
            +
            ]
         | 
| 116 | 
            +
                d = create_driver(config, 'input.hoge4')
         | 
| 117 | 
            +
                time = Time.parse("2012-01-02 13:14:15").to_i
         | 
| 118 | 
            +
                d.run do
         | 
| 119 | 
            +
                  (1..30).each do |t|
         | 
| 120 | 
            +
                    d.filter({'times' => t, 'data' => 'x'})
         | 
| 121 | 
            +
                  end
         | 
| 122 | 
            +
                end
         | 
| 123 | 
            +
                filtered = d.filtered_as_array
         | 
| 124 | 
            +
                assert_equal 12, filtered.length
         | 
| 125 | 
            +
                assert_equal 'input.hoge4', filtered[0][0]
         | 
| 126 | 
            +
                assert_equal ((1..10).map(&:to_i)+[20,30]), filtered.map{|t,time,r| r['times']}
         | 
| 127 | 
            +
                assert_equal (['x']*12), filtered.map{|t,time,r| r['data']}
         | 
| 128 | 
            +
              end
         | 
| 129 | 
            +
            end
         | 
    
        metadata
    CHANGED
    
    | @@ -1,14 +1,14 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: fluent-plugin-sampling-filter
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 0. | 
| 4 | 
            +
              version: 0.2.0
         | 
| 5 5 | 
             
            platform: ruby
         | 
| 6 6 | 
             
            authors:
         | 
| 7 7 | 
             
            - TAGOMORI Satoshi
         | 
| 8 8 | 
             
            autorequire: 
         | 
| 9 9 | 
             
            bindir: bin
         | 
| 10 10 | 
             
            cert_chain: []
         | 
| 11 | 
            -
            date:  | 
| 11 | 
            +
            date: 2015-10-18 00:00:00.000000000 Z
         | 
| 12 12 | 
             
            dependencies:
         | 
| 13 13 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 14 14 | 
             
              name: rake
         | 
| @@ -24,20 +24,40 @@ dependencies: | |
| 24 24 | 
             
                - - ">="
         | 
| 25 25 | 
             
                  - !ruby/object:Gem::Version
         | 
| 26 26 | 
             
                    version: '0'
         | 
| 27 | 
            +
            - !ruby/object:Gem::Dependency
         | 
| 28 | 
            +
              name: test-unit
         | 
| 29 | 
            +
              requirement: !ruby/object:Gem::Requirement
         | 
| 30 | 
            +
                requirements:
         | 
| 31 | 
            +
                - - "~>"
         | 
| 32 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 33 | 
            +
                    version: 3.1.0
         | 
| 34 | 
            +
              type: :runtime
         | 
| 35 | 
            +
              prerelease: false
         | 
| 36 | 
            +
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 37 | 
            +
                requirements:
         | 
| 38 | 
            +
                - - "~>"
         | 
| 39 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 40 | 
            +
                    version: 3.1.0
         | 
| 27 41 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 28 42 | 
             
              name: fluentd
         | 
| 29 43 | 
             
              requirement: !ruby/object:Gem::Requirement
         | 
| 30 44 | 
             
                requirements:
         | 
| 31 45 | 
             
                - - ">="
         | 
| 32 46 | 
             
                  - !ruby/object:Gem::Version
         | 
| 33 | 
            -
                    version:  | 
| 47 | 
            +
                    version: 0.12.0
         | 
| 48 | 
            +
                - - "<"
         | 
| 49 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 50 | 
            +
                    version: '2'
         | 
| 34 51 | 
             
              type: :runtime
         | 
| 35 52 | 
             
              prerelease: false
         | 
| 36 53 | 
             
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 37 54 | 
             
                requirements:
         | 
| 38 55 | 
             
                - - ">="
         | 
| 39 56 | 
             
                  - !ruby/object:Gem::Version
         | 
| 40 | 
            -
                    version:  | 
| 57 | 
            +
                    version: 0.12.0
         | 
| 58 | 
            +
                - - "<"
         | 
| 59 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 60 | 
            +
                    version: '2'
         | 
| 41 61 | 
             
            description: fluentd plugin to pickup sample data from matched massages
         | 
| 42 62 | 
             
            email:
         | 
| 43 63 | 
             
            - tagomoris@gmail.com
         | 
| @@ -54,12 +74,14 @@ files: | |
| 54 74 | 
             
            - README.md
         | 
| 55 75 | 
             
            - Rakefile
         | 
| 56 76 | 
             
            - fluent-plugin-sampling-filter.gemspec
         | 
| 77 | 
            +
            - lib/fluent/plugin/filter_sampling.rb
         | 
| 57 78 | 
             
            - lib/fluent/plugin/out_sampling_filter.rb
         | 
| 58 79 | 
             
            - test/helper.rb
         | 
| 80 | 
            +
            - test/plugin/test_filter_sampling.rb
         | 
| 59 81 | 
             
            - test/plugin/test_out_sampling_filter.rb
         | 
| 60 82 | 
             
            homepage: https://github.com/tagomoris/fluent-plugin-sampling-filter
         | 
| 61 83 | 
             
            licenses:
         | 
| 62 | 
            -
            -  | 
| 84 | 
            +
            - Apache-2.0
         | 
| 63 85 | 
             
            metadata: {}
         | 
| 64 86 | 
             
            post_install_message: 
         | 
| 65 87 | 
             
            rdoc_options: []
         | 
| @@ -77,10 +99,12 @@ required_rubygems_version: !ruby/object:Gem::Requirement | |
| 77 99 | 
             
                  version: '0'
         | 
| 78 100 | 
             
            requirements: []
         | 
| 79 101 | 
             
            rubyforge_project: 
         | 
| 80 | 
            -
            rubygems_version: 2. | 
| 102 | 
            +
            rubygems_version: 2.4.5
         | 
| 81 103 | 
             
            signing_key: 
         | 
| 82 104 | 
             
            specification_version: 4
         | 
| 83 105 | 
             
            summary: fluentd plugin to pickup sample data from matched massages
         | 
| 84 106 | 
             
            test_files:
         | 
| 85 107 | 
             
            - test/helper.rb
         | 
| 108 | 
            +
            - test/plugin/test_filter_sampling.rb
         | 
| 86 109 | 
             
            - test/plugin/test_out_sampling_filter.rb
         | 
| 110 | 
            +
            has_rdoc: 
         |