fluent-plugin-sampling-filter 0.1.3 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 3bc7e8e8abf7ba9ab745bd507bda6cba1d6b0eb3
4
- data.tar.gz: 73e288aaf2416e2c120ee5009ca05497e4f4896d
3
+ metadata.gz: 5a5722d8c7acf064693b07707aec8ea21d9f2475
4
+ data.tar.gz: a486b0d0db3cf4911ecb91721c694fceee9c345f
5
5
  SHA512:
6
- metadata.gz: f17bcc17d7d9b8593e7c6d85c8ec5fa567484625ae43c9fd9e8cb1de0f3c0234151f46643e3645cff1457ddea98763601a87f3bd98755869a073a0c7e53fbb7b
7
- data.tar.gz: 477ed760d0a90510ab1bbc8694fb8da4fd2be91a58668826a79d261b3e312f29b25b77b628f6603e2c8425cad1b462ec854395f7e334bb2e2824af203760c57d
6
+ metadata.gz: 80d72b30ff51aa9bc12a397782d19acecaf611c3a7f059c9aa831f2d2d489c9eb4a91a1761a579e2d7c801bf13bcdf01ba74fc35a04be7e3c503492e39afdce9
7
+ data.tar.gz: 1c0320e23f30de131122a8ab249a7eedd9580361c246eca7a9010cf7d3d310ac843cd0dbe9acae971fc36f4828d188f34545807abdc62e02f83988649269f74f
@@ -2,13 +2,13 @@
2
2
 
3
3
  Gem::Specification.new do |gem|
4
4
  gem.name = "fluent-plugin-sampling-filter"
5
- gem.version = "0.1.3"
5
+ gem.version = "0.2.0"
6
6
  gem.authors = ["TAGOMORI Satoshi"]
7
7
  gem.email = ["tagomoris@gmail.com"]
8
8
  gem.description = %q{fluentd plugin to pickup sample data from matched massages}
9
9
  gem.summary = %q{fluentd plugin to pickup sample data from matched massages}
10
10
  gem.homepage = "https://github.com/tagomoris/fluent-plugin-sampling-filter"
11
- gem.license = "APLv2"
11
+ gem.license = "Apache-2.0"
12
12
 
13
13
  gem.files = `git ls-files`.split($\)
14
14
  gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
@@ -16,5 +16,6 @@ Gem::Specification.new do |gem|
16
16
  gem.require_paths = ["lib"]
17
17
 
18
18
  gem.add_development_dependency "rake"
19
- gem.add_runtime_dependency "fluentd"
19
+ gem.add_runtime_dependency "test-unit", "~> 3.1.0"
20
+ gem.add_runtime_dependency "fluentd", [">= 0.12.0", "< 2"]
20
21
  end
@@ -0,0 +1,62 @@
1
+ class Fluent::SamplingFilter < Fluent::Filter
2
+ Fluent::Plugin.register_filter('sampling_filter', self)
3
+
4
+ config_param :interval, :integer
5
+ config_param :sample_unit, :string, default: 'tag'
6
+ config_param :minimum_rate_per_min, :integer, default: nil
7
+
8
+ def configure(conf)
9
+ super
10
+
11
+ @sample_unit = case @sample_unit
12
+ when 'tag'
13
+ :tag
14
+ when 'all'
15
+ :all
16
+ else
17
+ raise Fluent::ConfigError, "sample_unit allows only 'tag' or 'all'"
18
+ end
19
+ @counts = {}
20
+ @resets = {} if @minimum_rate_per_min
21
+ end
22
+
23
+ def filter_stream(tag, es)
24
+ t = if @sample_unit == :all
25
+ 'all'
26
+ else
27
+ tag
28
+ end
29
+
30
+ new_es = Fluent::MultiEventStream.new
31
+
32
+ # Access to @counts SHOULD be protected by mutex, with a heavy penalty.
33
+ # Code below is not thread safe, but @counts (counter for sampling rate) is not
34
+ # so serious value (and probably will not be broken...),
35
+ # then i let here as it is now.
36
+ if @minimum_rate_per_min
37
+ unless @resets[t]
38
+ @resets[t] = Fluent::Engine.now + (60 - rand(30))
39
+ end
40
+ if Fluent::Engine.now > @resets[t]
41
+ @resets[t] = Fluent::Engine.now + 60
42
+ @counts[t] = 0
43
+ end
44
+ es.each do |time,record|
45
+ c = (@counts[t] = @counts.fetch(t, 0) + 1)
46
+ if c < @minimum_rate_per_min or c % @interval == 0
47
+ new_es.add(time, record.dup)
48
+ end
49
+ end
50
+ else
51
+ es.each do |time,record|
52
+ c = (@counts[t] = @counts.fetch(t, 0) + 1)
53
+ if c % @interval == 0
54
+ new_es.add(time, record.dup)
55
+ # reset only just before @counts[t] is to be bignum from fixnum
56
+ @counts[t] = 0 if c > 0x6fffffff
57
+ end
58
+ end
59
+ end
60
+ new_es
61
+ end
62
+ end
@@ -47,7 +47,7 @@ class Fluent::SamplingFilterOutput < Fluent::Output
47
47
  end
48
48
 
49
49
  time_record_pairs.each {|t,r|
50
- Fluent::Engine.emit(tag, t, r)
50
+ router.emit(tag, t, r)
51
51
  }
52
52
  end
53
53
 
data/test/helper.rb CHANGED
@@ -13,6 +13,7 @@ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
13
13
  $LOAD_PATH.unshift(File.dirname(__FILE__))
14
14
  require 'fluent/test'
15
15
  require 'fluent/plugin/out_sampling_filter'
16
+ require 'fluent/plugin/filter_sampling'
16
17
 
17
18
  class Test::Unit::TestCase
18
19
  end
@@ -0,0 +1,129 @@
1
+ require 'helper'
2
+
3
+ class SamplingFilterTest < Test::Unit::TestCase
4
+ def setup
5
+ Fluent::Test.setup
6
+ end
7
+
8
+ CONFIG = %[
9
+ interval 10
10
+ sample_unit tag
11
+ ]
12
+
13
+ def create_driver(conf=CONFIG,tag='test')
14
+ Fluent::Test::FilterTestDriver.new(Fluent::SamplingFilter, tag).configure(conf)
15
+ end
16
+
17
+ def test_configure
18
+ assert_raise(Fluent::ConfigError) {
19
+ d = create_driver('')
20
+ }
21
+ d = create_driver %[
22
+ interval 5
23
+ ]
24
+
25
+ assert_equal 5, d.instance.interval
26
+ assert_equal :tag, d.instance.sample_unit
27
+
28
+ d = create_driver %[
29
+ interval 1000
30
+ sample_unit all
31
+ ]
32
+ assert_equal 1000, d.instance.interval
33
+ assert_equal :all, d.instance.sample_unit
34
+ end
35
+
36
+ def test_filter
37
+ d1 = create_driver(CONFIG, 'input.hoge1')
38
+ time = Time.parse("2012-01-02 13:14:15").to_i
39
+ d1.run do
40
+ d1.filter({'field1' => 'record1', 'field2' => 1})
41
+ d1.filter({'field1' => 'record2', 'field2' => 2})
42
+ d1.filter({'field1' => 'record3', 'field2' => 3})
43
+ d1.filter({'field1' => 'record4', 'field2' => 4})
44
+ d1.filter({'field1' => 'record5', 'field2' => 5})
45
+ d1.filter({'field1' => 'record6', 'field2' => 6})
46
+ d1.filter({'field1' => 'record7', 'field2' => 7})
47
+ d1.filter({'field1' => 'record8', 'field2' => 8})
48
+ d1.filter({'field1' => 'record9', 'field2' => 9})
49
+ d1.filter({'field1' => 'record10', 'field2' => 10})
50
+ d1.filter({'field1' => 'record11', 'field2' => 11})
51
+ d1.filter({'field1' => 'record12', 'field2' => 12})
52
+ end
53
+ filtered = d1.filtered_as_array
54
+ assert_equal 1, filtered.length
55
+ assert_equal 'input.hoge1', filtered[0][0] # tag
56
+ assert_equal 'record10', filtered[0][2]['field1']
57
+ assert_equal 10, filtered[0][2]['field2']
58
+
59
+ d2 = create_driver(%[
60
+ interval 3
61
+ ], 'input.hoge2')
62
+ time = Time.parse("2012-01-02 13:14:15").to_i
63
+ d2.run do
64
+ d2.filter({'field1' => 'record1', 'field2' => 1})
65
+ d2.filter({'field1' => 'record2', 'field2' => 2})
66
+ d2.filter({'field1' => 'record3', 'field2' => 3})
67
+ d2.filter({'field1' => 'record4', 'field2' => 4})
68
+ d2.filter({'field1' => 'record5', 'field2' => 5})
69
+ d2.filter({'field1' => 'record6', 'field2' => 6})
70
+ d2.filter({'field1' => 'record7', 'field2' => 7})
71
+ d2.filter({'field1' => 'record8', 'field2' => 8})
72
+ d2.filter({'field1' => 'record9', 'field2' => 9})
73
+ d2.filter({'field1' => 'record10', 'field2' => 10})
74
+ d2.filter({'field1' => 'record11', 'field2' => 11})
75
+ d2.filter({'field1' => 'record12', 'field2' => 12})
76
+ end
77
+ filtered = d2.filtered_as_array
78
+ assert_equal 4, filtered.length
79
+ assert_equal 'input.hoge2', filtered[0][0] # tag
80
+
81
+ assert_equal 'record3', filtered[0][2]['field1']
82
+ assert_equal 'record6', filtered[1][2]['field1']
83
+ assert_equal 'record9', filtered[2][2]['field1']
84
+ assert_equal 'record12', filtered[3][2]['field1']
85
+ end
86
+
87
+ def test_filter_minimum_rate
88
+ config = %[
89
+ interval 10
90
+ sample_unit tag
91
+ minimum_rate_per_min 100
92
+ ]
93
+ d = create_driver(config, 'input.hoge3')
94
+ time = Time.parse("2012-01-02 13:14:15").to_i
95
+ d.run do
96
+ (1..100).each do |t|
97
+ d.filter({'times' => t, 'data' => 'x'})
98
+ end
99
+ (101..130).each do |t|
100
+ d.filter({'times' => t, 'data' => 'y'})
101
+ end
102
+ end
103
+ filtered = d.filtered_as_array
104
+ assert_equal 103, filtered.length
105
+ assert_equal 'input.hoge3', filtered[0][0]
106
+ assert_equal ((1..100).map(&:to_i) + [110, 120, 130]), filtered.map{|t,time,r| r['times']}
107
+ assert_equal (['x']*100 + ['y']*3), filtered.map{|t,time,r| r['data']}
108
+ end
109
+
110
+ def test_filter_minimum_rate_expire
111
+ config = %[
112
+ interval 10
113
+ sample_unit tag
114
+ minimum_rate_per_min 10
115
+ ]
116
+ d = create_driver(config, 'input.hoge4')
117
+ time = Time.parse("2012-01-02 13:14:15").to_i
118
+ d.run do
119
+ (1..30).each do |t|
120
+ d.filter({'times' => t, 'data' => 'x'})
121
+ end
122
+ end
123
+ filtered = d.filtered_as_array
124
+ assert_equal 12, filtered.length
125
+ assert_equal 'input.hoge4', filtered[0][0]
126
+ assert_equal ((1..10).map(&:to_i)+[20,30]), filtered.map{|t,time,r| r['times']}
127
+ assert_equal (['x']*12), filtered.map{|t,time,r| r['data']}
128
+ end
129
+ end
@@ -118,8 +118,7 @@ minimum_rate_per_min 100
118
118
  end
119
119
  def test_minimum_rate_expire
120
120
  # hey, this test needs 60 seconds....
121
- assert_equal 1, 1
122
- return
121
+ omit("this test needs 60 seconds....") unless ENV["EXECLONGTEST"]
123
122
 
124
123
  config = %[
125
124
  interval 10
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-sampling-filter
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - TAGOMORI Satoshi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-03-07 00:00:00.000000000 Z
11
+ date: 2015-10-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -24,20 +24,40 @@ dependencies:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
26
  version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: test-unit
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: 3.1.0
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: 3.1.0
27
41
  - !ruby/object:Gem::Dependency
28
42
  name: fluentd
29
43
  requirement: !ruby/object:Gem::Requirement
30
44
  requirements:
31
45
  - - ">="
32
46
  - !ruby/object:Gem::Version
33
- version: '0'
47
+ version: 0.12.0
48
+ - - "<"
49
+ - !ruby/object:Gem::Version
50
+ version: '2'
34
51
  type: :runtime
35
52
  prerelease: false
36
53
  version_requirements: !ruby/object:Gem::Requirement
37
54
  requirements:
38
55
  - - ">="
39
56
  - !ruby/object:Gem::Version
40
- version: '0'
57
+ version: 0.12.0
58
+ - - "<"
59
+ - !ruby/object:Gem::Version
60
+ version: '2'
41
61
  description: fluentd plugin to pickup sample data from matched massages
42
62
  email:
43
63
  - tagomoris@gmail.com
@@ -54,12 +74,14 @@ files:
54
74
  - README.md
55
75
  - Rakefile
56
76
  - fluent-plugin-sampling-filter.gemspec
77
+ - lib/fluent/plugin/filter_sampling.rb
57
78
  - lib/fluent/plugin/out_sampling_filter.rb
58
79
  - test/helper.rb
80
+ - test/plugin/test_filter_sampling.rb
59
81
  - test/plugin/test_out_sampling_filter.rb
60
82
  homepage: https://github.com/tagomoris/fluent-plugin-sampling-filter
61
83
  licenses:
62
- - APLv2
84
+ - Apache-2.0
63
85
  metadata: {}
64
86
  post_install_message:
65
87
  rdoc_options: []
@@ -77,10 +99,12 @@ required_rubygems_version: !ruby/object:Gem::Requirement
77
99
  version: '0'
78
100
  requirements: []
79
101
  rubyforge_project:
80
- rubygems_version: 2.2.2
102
+ rubygems_version: 2.4.5
81
103
  signing_key:
82
104
  specification_version: 4
83
105
  summary: fluentd plugin to pickup sample data from matched massages
84
106
  test_files:
85
107
  - test/helper.rb
108
+ - test/plugin/test_filter_sampling.rb
86
109
  - test/plugin/test_out_sampling_filter.rb
110
+ has_rdoc: