fluent-plugin-sampling-filter 0.1.3 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/fluent-plugin-sampling-filter.gemspec +4 -3
- data/lib/fluent/plugin/filter_sampling.rb +62 -0
- data/lib/fluent/plugin/out_sampling_filter.rb +1 -1
- data/test/helper.rb +1 -0
- data/test/plugin/test_filter_sampling.rb +129 -0
- data/test/plugin/test_out_sampling_filter.rb +1 -2
- metadata +30 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5a5722d8c7acf064693b07707aec8ea21d9f2475
|
4
|
+
data.tar.gz: a486b0d0db3cf4911ecb91721c694fceee9c345f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 80d72b30ff51aa9bc12a397782d19acecaf611c3a7f059c9aa831f2d2d489c9eb4a91a1761a579e2d7c801bf13bcdf01ba74fc35a04be7e3c503492e39afdce9
|
7
|
+
data.tar.gz: 1c0320e23f30de131122a8ab249a7eedd9580361c246eca7a9010cf7d3d310ac843cd0dbe9acae971fc36f4828d188f34545807abdc62e02f83988649269f74f
|
@@ -2,13 +2,13 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |gem|
|
4
4
|
gem.name = "fluent-plugin-sampling-filter"
|
5
|
-
gem.version = "0.
|
5
|
+
gem.version = "0.2.0"
|
6
6
|
gem.authors = ["TAGOMORI Satoshi"]
|
7
7
|
gem.email = ["tagomoris@gmail.com"]
|
8
8
|
gem.description = %q{fluentd plugin to pickup sample data from matched massages}
|
9
9
|
gem.summary = %q{fluentd plugin to pickup sample data from matched massages}
|
10
10
|
gem.homepage = "https://github.com/tagomoris/fluent-plugin-sampling-filter"
|
11
|
-
gem.license = "
|
11
|
+
gem.license = "Apache-2.0"
|
12
12
|
|
13
13
|
gem.files = `git ls-files`.split($\)
|
14
14
|
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
@@ -16,5 +16,6 @@ Gem::Specification.new do |gem|
|
|
16
16
|
gem.require_paths = ["lib"]
|
17
17
|
|
18
18
|
gem.add_development_dependency "rake"
|
19
|
-
gem.add_runtime_dependency "
|
19
|
+
gem.add_runtime_dependency "test-unit", "~> 3.1.0"
|
20
|
+
gem.add_runtime_dependency "fluentd", [">= 0.12.0", "< 2"]
|
20
21
|
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
class Fluent::SamplingFilter < Fluent::Filter
|
2
|
+
Fluent::Plugin.register_filter('sampling_filter', self)
|
3
|
+
|
4
|
+
config_param :interval, :integer
|
5
|
+
config_param :sample_unit, :string, default: 'tag'
|
6
|
+
config_param :minimum_rate_per_min, :integer, default: nil
|
7
|
+
|
8
|
+
def configure(conf)
|
9
|
+
super
|
10
|
+
|
11
|
+
@sample_unit = case @sample_unit
|
12
|
+
when 'tag'
|
13
|
+
:tag
|
14
|
+
when 'all'
|
15
|
+
:all
|
16
|
+
else
|
17
|
+
raise Fluent::ConfigError, "sample_unit allows only 'tag' or 'all'"
|
18
|
+
end
|
19
|
+
@counts = {}
|
20
|
+
@resets = {} if @minimum_rate_per_min
|
21
|
+
end
|
22
|
+
|
23
|
+
def filter_stream(tag, es)
|
24
|
+
t = if @sample_unit == :all
|
25
|
+
'all'
|
26
|
+
else
|
27
|
+
tag
|
28
|
+
end
|
29
|
+
|
30
|
+
new_es = Fluent::MultiEventStream.new
|
31
|
+
|
32
|
+
# Access to @counts SHOULD be protected by mutex, with a heavy penalty.
|
33
|
+
# Code below is not thread safe, but @counts (counter for sampling rate) is not
|
34
|
+
# so serious value (and probably will not be broken...),
|
35
|
+
# then i let here as it is now.
|
36
|
+
if @minimum_rate_per_min
|
37
|
+
unless @resets[t]
|
38
|
+
@resets[t] = Fluent::Engine.now + (60 - rand(30))
|
39
|
+
end
|
40
|
+
if Fluent::Engine.now > @resets[t]
|
41
|
+
@resets[t] = Fluent::Engine.now + 60
|
42
|
+
@counts[t] = 0
|
43
|
+
end
|
44
|
+
es.each do |time,record|
|
45
|
+
c = (@counts[t] = @counts.fetch(t, 0) + 1)
|
46
|
+
if c < @minimum_rate_per_min or c % @interval == 0
|
47
|
+
new_es.add(time, record.dup)
|
48
|
+
end
|
49
|
+
end
|
50
|
+
else
|
51
|
+
es.each do |time,record|
|
52
|
+
c = (@counts[t] = @counts.fetch(t, 0) + 1)
|
53
|
+
if c % @interval == 0
|
54
|
+
new_es.add(time, record.dup)
|
55
|
+
# reset only just before @counts[t] is to be bignum from fixnum
|
56
|
+
@counts[t] = 0 if c > 0x6fffffff
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
new_es
|
61
|
+
end
|
62
|
+
end
|
data/test/helper.rb
CHANGED
@@ -13,6 +13,7 @@ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
|
13
13
|
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
14
14
|
require 'fluent/test'
|
15
15
|
require 'fluent/plugin/out_sampling_filter'
|
16
|
+
require 'fluent/plugin/filter_sampling'
|
16
17
|
|
17
18
|
class Test::Unit::TestCase
|
18
19
|
end
|
@@ -0,0 +1,129 @@
|
|
1
|
+
require 'helper'
|
2
|
+
|
3
|
+
class SamplingFilterTest < Test::Unit::TestCase
|
4
|
+
def setup
|
5
|
+
Fluent::Test.setup
|
6
|
+
end
|
7
|
+
|
8
|
+
CONFIG = %[
|
9
|
+
interval 10
|
10
|
+
sample_unit tag
|
11
|
+
]
|
12
|
+
|
13
|
+
def create_driver(conf=CONFIG,tag='test')
|
14
|
+
Fluent::Test::FilterTestDriver.new(Fluent::SamplingFilter, tag).configure(conf)
|
15
|
+
end
|
16
|
+
|
17
|
+
def test_configure
|
18
|
+
assert_raise(Fluent::ConfigError) {
|
19
|
+
d = create_driver('')
|
20
|
+
}
|
21
|
+
d = create_driver %[
|
22
|
+
interval 5
|
23
|
+
]
|
24
|
+
|
25
|
+
assert_equal 5, d.instance.interval
|
26
|
+
assert_equal :tag, d.instance.sample_unit
|
27
|
+
|
28
|
+
d = create_driver %[
|
29
|
+
interval 1000
|
30
|
+
sample_unit all
|
31
|
+
]
|
32
|
+
assert_equal 1000, d.instance.interval
|
33
|
+
assert_equal :all, d.instance.sample_unit
|
34
|
+
end
|
35
|
+
|
36
|
+
def test_filter
|
37
|
+
d1 = create_driver(CONFIG, 'input.hoge1')
|
38
|
+
time = Time.parse("2012-01-02 13:14:15").to_i
|
39
|
+
d1.run do
|
40
|
+
d1.filter({'field1' => 'record1', 'field2' => 1})
|
41
|
+
d1.filter({'field1' => 'record2', 'field2' => 2})
|
42
|
+
d1.filter({'field1' => 'record3', 'field2' => 3})
|
43
|
+
d1.filter({'field1' => 'record4', 'field2' => 4})
|
44
|
+
d1.filter({'field1' => 'record5', 'field2' => 5})
|
45
|
+
d1.filter({'field1' => 'record6', 'field2' => 6})
|
46
|
+
d1.filter({'field1' => 'record7', 'field2' => 7})
|
47
|
+
d1.filter({'field1' => 'record8', 'field2' => 8})
|
48
|
+
d1.filter({'field1' => 'record9', 'field2' => 9})
|
49
|
+
d1.filter({'field1' => 'record10', 'field2' => 10})
|
50
|
+
d1.filter({'field1' => 'record11', 'field2' => 11})
|
51
|
+
d1.filter({'field1' => 'record12', 'field2' => 12})
|
52
|
+
end
|
53
|
+
filtered = d1.filtered_as_array
|
54
|
+
assert_equal 1, filtered.length
|
55
|
+
assert_equal 'input.hoge1', filtered[0][0] # tag
|
56
|
+
assert_equal 'record10', filtered[0][2]['field1']
|
57
|
+
assert_equal 10, filtered[0][2]['field2']
|
58
|
+
|
59
|
+
d2 = create_driver(%[
|
60
|
+
interval 3
|
61
|
+
], 'input.hoge2')
|
62
|
+
time = Time.parse("2012-01-02 13:14:15").to_i
|
63
|
+
d2.run do
|
64
|
+
d2.filter({'field1' => 'record1', 'field2' => 1})
|
65
|
+
d2.filter({'field1' => 'record2', 'field2' => 2})
|
66
|
+
d2.filter({'field1' => 'record3', 'field2' => 3})
|
67
|
+
d2.filter({'field1' => 'record4', 'field2' => 4})
|
68
|
+
d2.filter({'field1' => 'record5', 'field2' => 5})
|
69
|
+
d2.filter({'field1' => 'record6', 'field2' => 6})
|
70
|
+
d2.filter({'field1' => 'record7', 'field2' => 7})
|
71
|
+
d2.filter({'field1' => 'record8', 'field2' => 8})
|
72
|
+
d2.filter({'field1' => 'record9', 'field2' => 9})
|
73
|
+
d2.filter({'field1' => 'record10', 'field2' => 10})
|
74
|
+
d2.filter({'field1' => 'record11', 'field2' => 11})
|
75
|
+
d2.filter({'field1' => 'record12', 'field2' => 12})
|
76
|
+
end
|
77
|
+
filtered = d2.filtered_as_array
|
78
|
+
assert_equal 4, filtered.length
|
79
|
+
assert_equal 'input.hoge2', filtered[0][0] # tag
|
80
|
+
|
81
|
+
assert_equal 'record3', filtered[0][2]['field1']
|
82
|
+
assert_equal 'record6', filtered[1][2]['field1']
|
83
|
+
assert_equal 'record9', filtered[2][2]['field1']
|
84
|
+
assert_equal 'record12', filtered[3][2]['field1']
|
85
|
+
end
|
86
|
+
|
87
|
+
def test_filter_minimum_rate
|
88
|
+
config = %[
|
89
|
+
interval 10
|
90
|
+
sample_unit tag
|
91
|
+
minimum_rate_per_min 100
|
92
|
+
]
|
93
|
+
d = create_driver(config, 'input.hoge3')
|
94
|
+
time = Time.parse("2012-01-02 13:14:15").to_i
|
95
|
+
d.run do
|
96
|
+
(1..100).each do |t|
|
97
|
+
d.filter({'times' => t, 'data' => 'x'})
|
98
|
+
end
|
99
|
+
(101..130).each do |t|
|
100
|
+
d.filter({'times' => t, 'data' => 'y'})
|
101
|
+
end
|
102
|
+
end
|
103
|
+
filtered = d.filtered_as_array
|
104
|
+
assert_equal 103, filtered.length
|
105
|
+
assert_equal 'input.hoge3', filtered[0][0]
|
106
|
+
assert_equal ((1..100).map(&:to_i) + [110, 120, 130]), filtered.map{|t,time,r| r['times']}
|
107
|
+
assert_equal (['x']*100 + ['y']*3), filtered.map{|t,time,r| r['data']}
|
108
|
+
end
|
109
|
+
|
110
|
+
def test_filter_minimum_rate_expire
|
111
|
+
config = %[
|
112
|
+
interval 10
|
113
|
+
sample_unit tag
|
114
|
+
minimum_rate_per_min 10
|
115
|
+
]
|
116
|
+
d = create_driver(config, 'input.hoge4')
|
117
|
+
time = Time.parse("2012-01-02 13:14:15").to_i
|
118
|
+
d.run do
|
119
|
+
(1..30).each do |t|
|
120
|
+
d.filter({'times' => t, 'data' => 'x'})
|
121
|
+
end
|
122
|
+
end
|
123
|
+
filtered = d.filtered_as_array
|
124
|
+
assert_equal 12, filtered.length
|
125
|
+
assert_equal 'input.hoge4', filtered[0][0]
|
126
|
+
assert_equal ((1..10).map(&:to_i)+[20,30]), filtered.map{|t,time,r| r['times']}
|
127
|
+
assert_equal (['x']*12), filtered.map{|t,time,r| r['data']}
|
128
|
+
end
|
129
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-sampling-filter
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- TAGOMORI Satoshi
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2015-10-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -24,20 +24,40 @@ dependencies:
|
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: test-unit
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 3.1.0
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 3.1.0
|
27
41
|
- !ruby/object:Gem::Dependency
|
28
42
|
name: fluentd
|
29
43
|
requirement: !ruby/object:Gem::Requirement
|
30
44
|
requirements:
|
31
45
|
- - ">="
|
32
46
|
- !ruby/object:Gem::Version
|
33
|
-
version:
|
47
|
+
version: 0.12.0
|
48
|
+
- - "<"
|
49
|
+
- !ruby/object:Gem::Version
|
50
|
+
version: '2'
|
34
51
|
type: :runtime
|
35
52
|
prerelease: false
|
36
53
|
version_requirements: !ruby/object:Gem::Requirement
|
37
54
|
requirements:
|
38
55
|
- - ">="
|
39
56
|
- !ruby/object:Gem::Version
|
40
|
-
version:
|
57
|
+
version: 0.12.0
|
58
|
+
- - "<"
|
59
|
+
- !ruby/object:Gem::Version
|
60
|
+
version: '2'
|
41
61
|
description: fluentd plugin to pickup sample data from matched massages
|
42
62
|
email:
|
43
63
|
- tagomoris@gmail.com
|
@@ -54,12 +74,14 @@ files:
|
|
54
74
|
- README.md
|
55
75
|
- Rakefile
|
56
76
|
- fluent-plugin-sampling-filter.gemspec
|
77
|
+
- lib/fluent/plugin/filter_sampling.rb
|
57
78
|
- lib/fluent/plugin/out_sampling_filter.rb
|
58
79
|
- test/helper.rb
|
80
|
+
- test/plugin/test_filter_sampling.rb
|
59
81
|
- test/plugin/test_out_sampling_filter.rb
|
60
82
|
homepage: https://github.com/tagomoris/fluent-plugin-sampling-filter
|
61
83
|
licenses:
|
62
|
-
-
|
84
|
+
- Apache-2.0
|
63
85
|
metadata: {}
|
64
86
|
post_install_message:
|
65
87
|
rdoc_options: []
|
@@ -77,10 +99,12 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
77
99
|
version: '0'
|
78
100
|
requirements: []
|
79
101
|
rubyforge_project:
|
80
|
-
rubygems_version: 2.
|
102
|
+
rubygems_version: 2.4.5
|
81
103
|
signing_key:
|
82
104
|
specification_version: 4
|
83
105
|
summary: fluentd plugin to pickup sample data from matched massages
|
84
106
|
test_files:
|
85
107
|
- test/helper.rb
|
108
|
+
- test/plugin/test_filter_sampling.rb
|
86
109
|
- test/plugin/test_out_sampling_filter.rb
|
110
|
+
has_rdoc:
|