fluent-plugin-sampling-filter 0.1.3 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.travis.yml +4 -2
- data/README.md +79 -10
- data/fluent-plugin-sampling-filter.gemspec +4 -3
- data/lib/fluent/plugin/filter_sampling.rb +71 -0
- data/lib/fluent/plugin/out_sampling_filter.rb +18 -27
- data/test/helper.rb +1 -0
- data/test/plugin/test_filter_sampling.rb +186 -0
- data/test/plugin/test_out_sampling_filter.rb +72 -72
- metadata +32 -10
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 2830d5959dab3375606dd4733b95b1ecc4e7cec666791937a708791b6e841bf6
|
4
|
+
data.tar.gz: 0767a563091fa6674f7f37158857decdaef378c92b181c14054ccd8a183c6586
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a65c1919e2e774326a3ef25e3f9348a530566434c2aca03803b5b63804b5aada9fa40ef3b4a1e137a00e6204b7f09f5d85d746f45c7f0a2e4177dbbe05afdb8b
|
7
|
+
data.tar.gz: c5602c44dc0dfd4007082e2c9138745423a1bed774ff0d453eed9e00c3b9941840fe6a3245b5cdd4b656fcd6960e014602c2a716a6d08653858dfb339cc07410
|
data/.travis.yml
CHANGED
data/README.md
CHANGED
@@ -1,26 +1,96 @@
|
|
1
1
|
# fluent-plugin-sampling-filter
|
2
2
|
|
3
|
-
|
3
|
+
This is a [Fluentd](http://fluentd.org) plugin to sample matching messages to analyse and report messages behavior and emit sampled messages with modified tag.
|
4
4
|
|
5
|
-
|
5
|
+
* sampling rate per tags, message field, or all
|
6
6
|
|
7
|
-
|
7
|
+
## Requirements
|
8
8
|
|
9
|
-
|
10
|
-
|
9
|
+
| fluent-plugin-sampling-filter | fluentd | ruby |
|
10
|
+
|-------------------------------|------------|--------|
|
11
|
+
| >= 1.0.0 | >= v0.14.0 | >= 2.1 |
|
12
|
+
| < 1.0.0 | < v0.14.0 | >= 1.9 |
|
11
13
|
|
12
14
|
## Configuration
|
13
15
|
|
16
|
+
### SamplingFilter
|
17
|
+
|
18
|
+
This filter passes a specified part of whole events to following filter/output plugins:
|
19
|
+
|
20
|
+
<source>
|
21
|
+
@type any_great_input
|
22
|
+
@label @mydata
|
23
|
+
</source>
|
24
|
+
|
25
|
+
<label @mydata>
|
26
|
+
<filter **>
|
27
|
+
@type sampling
|
28
|
+
sample_unit all
|
29
|
+
interval 10 # pass 1/10 events to following plugins
|
30
|
+
</filter>
|
31
|
+
|
32
|
+
<match **>
|
33
|
+
@type ...
|
34
|
+
</match>
|
35
|
+
</label>
|
36
|
+
|
37
|
+
Sampling is done for all events, but we can do it per matched tags:
|
38
|
+
|
39
|
+
<source>
|
40
|
+
@type any_great_input
|
41
|
+
@label @mydata
|
42
|
+
</source>
|
43
|
+
|
44
|
+
<label @mydata>
|
45
|
+
<filter **>
|
46
|
+
@type sampling
|
47
|
+
interval 10
|
48
|
+
sample_unit tag # 1/10 events for each tags
|
49
|
+
</filter>
|
50
|
+
|
51
|
+
<match **>
|
52
|
+
@type ...
|
53
|
+
</match>
|
54
|
+
</label>
|
55
|
+
|
56
|
+
|
57
|
+
We can also sample based on a value in the message
|
58
|
+
|
59
|
+
<source>
|
60
|
+
@type any_great_input
|
61
|
+
@label @mydata
|
62
|
+
</source>
|
63
|
+
|
64
|
+
<label @mydata>
|
65
|
+
<filter **>
|
66
|
+
@type sampling
|
67
|
+
interval 10
|
68
|
+
# pass 1/10 events per user given events like: { user: { name: "Bob" }, ... }
|
69
|
+
sample_unit $.user.name
|
70
|
+
</filter>
|
71
|
+
|
72
|
+
<match **>
|
73
|
+
@type ...
|
74
|
+
</match>
|
75
|
+
</label>
|
76
|
+
|
77
|
+
`minimum_rate_per_min` option(integer) configures this plugin to pass events with the specified rate even how small is the total number of whole events.
|
78
|
+
|
79
|
+
`sample_unit` option(string) configures this plugin to sample data based on tag(default), 'all', or by field value
|
80
|
+
using the [record accessor syntax](https://docs.fluentd.org/plugin-helper-overview/api-plugin-helper-record_accessor).
|
81
|
+
|
14
82
|
### SamplingFilterOutput
|
15
83
|
|
84
|
+
**NOTE: This plugin is deprecated. Use filter plugin instead.**
|
85
|
+
|
16
86
|
Pickup 1/10 messages about each tags(default: `sample_unit tag`), and add tag prefix `sampled`.
|
17
87
|
|
18
88
|
<match **>
|
19
|
-
type sampling_filter
|
89
|
+
@type sampling_filter
|
20
90
|
interval 10
|
21
91
|
add_prefix sampled
|
22
92
|
</match>
|
23
|
-
|
93
|
+
|
24
94
|
<match sampled.**>
|
25
95
|
# output configurations where to send sampled messages
|
26
96
|
</match>
|
@@ -28,20 +98,19 @@ Pickup 1/10 messages about each tags(default: `sample_unit tag`), and add tag pr
|
|
28
98
|
Pickup 1/100 messages of all matched messages, and modify tags from `input.**` to `output.**`
|
29
99
|
|
30
100
|
<match input.**>
|
31
|
-
type sampling_filter
|
101
|
+
@type sampling_filter
|
32
102
|
interval 100
|
33
103
|
sample_unit all
|
34
104
|
remove_prefix input
|
35
105
|
add_prefix output
|
36
106
|
</match>
|
37
|
-
|
107
|
+
|
38
108
|
<match sampled.**>
|
39
109
|
# output configurations where to send sampled messages
|
40
110
|
</match>
|
41
111
|
|
42
112
|
## TODO
|
43
113
|
|
44
|
-
* consider what to do next
|
45
114
|
* patches welcome!
|
46
115
|
|
47
116
|
## Copyright
|
@@ -2,13 +2,13 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |gem|
|
4
4
|
gem.name = "fluent-plugin-sampling-filter"
|
5
|
-
gem.version = "
|
5
|
+
gem.version = "1.2.0"
|
6
6
|
gem.authors = ["TAGOMORI Satoshi"]
|
7
7
|
gem.email = ["tagomoris@gmail.com"]
|
8
8
|
gem.description = %q{fluentd plugin to pickup sample data from matched massages}
|
9
9
|
gem.summary = %q{fluentd plugin to pickup sample data from matched massages}
|
10
10
|
gem.homepage = "https://github.com/tagomoris/fluent-plugin-sampling-filter"
|
11
|
-
gem.license = "
|
11
|
+
gem.license = "Apache-2.0"
|
12
12
|
|
13
13
|
gem.files = `git ls-files`.split($\)
|
14
14
|
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
@@ -16,5 +16,6 @@ Gem::Specification.new do |gem|
|
|
16
16
|
gem.require_paths = ["lib"]
|
17
17
|
|
18
18
|
gem.add_development_dependency "rake"
|
19
|
-
gem.add_runtime_dependency "
|
19
|
+
gem.add_runtime_dependency "test-unit", "~> 3.1.0"
|
20
|
+
gem.add_runtime_dependency "fluentd", [">= 0.14.12", "< 2"]
|
20
21
|
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
require 'fluent/plugin/filter'
|
2
|
+
require 'fluent/clock'
|
3
|
+
|
4
|
+
class Fluent::Plugin::SamplingFilter < Fluent::Plugin::Filter
|
5
|
+
Fluent::Plugin.register_filter('sampling', self)
|
6
|
+
Fluent::Plugin.register_filter('sampling_filter', self)
|
7
|
+
|
8
|
+
config_param :interval, :integer
|
9
|
+
config_param :sample_unit, :string, default: 'tag'
|
10
|
+
config_param :minimum_rate_per_min, :integer, default: nil
|
11
|
+
|
12
|
+
helpers :record_accessor
|
13
|
+
|
14
|
+
def configure(conf)
|
15
|
+
super
|
16
|
+
|
17
|
+
@counts = {}
|
18
|
+
@resets = {} if @minimum_rate_per_min
|
19
|
+
@accessor = record_accessor_create(@sample_unit) unless %w(all tag).include?(@sample_unit)
|
20
|
+
end
|
21
|
+
|
22
|
+
# Access to @counts SHOULD be protected by mutex, with a heavy penalty.
|
23
|
+
# Code below is not thread safe, but @counts (counter for sampling rate) is not
|
24
|
+
# so serious value (and probably will not be broken...),
|
25
|
+
# then i let here as it is now.
|
26
|
+
|
27
|
+
def filter(tag, _time, record)
|
28
|
+
t = record_key(tag, record)
|
29
|
+
if @minimum_rate_per_min
|
30
|
+
filter_with_minimum_rate(t, record)
|
31
|
+
else
|
32
|
+
filter_simple(t, record)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def filter_simple(t, record)
|
37
|
+
c = (@counts[t] = @counts.fetch(t, 0) + 1)
|
38
|
+
# reset only just before @counts[t] is to be bignum from fixnum
|
39
|
+
@counts[t] = 0 if c > 0x6fffffff
|
40
|
+
if c % @interval == 0
|
41
|
+
record
|
42
|
+
else
|
43
|
+
nil
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def filter_with_minimum_rate(t, record)
|
48
|
+
@resets[t] ||= Fluent::Clock.now + (60 - rand(30))
|
49
|
+
if Fluent::Clock.now > @resets[t]
|
50
|
+
@resets[t] = Fluent::Clock.now + 60
|
51
|
+
@counts[t] = 0
|
52
|
+
end
|
53
|
+
c = (@counts[t] = @counts.fetch(t, 0) + 1)
|
54
|
+
if c < @minimum_rate_per_min || c % @interval == 0
|
55
|
+
record.dup
|
56
|
+
else
|
57
|
+
nil
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
def record_key(tag, record)
|
62
|
+
case @sample_unit
|
63
|
+
when 'all'
|
64
|
+
'all'
|
65
|
+
when 'tag'
|
66
|
+
tag
|
67
|
+
else
|
68
|
+
@accessor.call(record)
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
@@ -1,36 +1,31 @@
|
|
1
|
-
|
1
|
+
require 'fluent/plugin/output'
|
2
|
+
require 'fluent/clock'
|
3
|
+
|
4
|
+
class Fluent::Plugin::SamplingFilterOutput < Fluent::Plugin::Output
|
2
5
|
Fluent::Plugin.register_output('sampling_filter', self)
|
3
6
|
|
4
|
-
|
5
|
-
config_param :sample_unit, :string, :default => 'tag'
|
6
|
-
config_param :remove_prefix, :string, :default => nil
|
7
|
-
config_param :add_prefix, :string, :default => 'sampled'
|
8
|
-
config_param :minimum_rate_per_min, :integer, :default => nil
|
7
|
+
helpers :event_emitter
|
9
8
|
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
9
|
+
config_param :interval, :integer
|
10
|
+
config_param :sample_unit, :enum, list: [:tag, :all], default: :tag
|
11
|
+
config_param :remove_prefix, :string, default: nil
|
12
|
+
config_param :add_prefix, :string, default: 'sampled'
|
13
|
+
config_param :minimum_rate_per_min, :integer, default: nil
|
14
14
|
|
15
15
|
def configure(conf)
|
16
16
|
super
|
17
17
|
|
18
|
+
log.warn "sampling_filter output plugin is deprecated. use sampling_filter filter plugin instead with <label> routing."
|
19
|
+
|
18
20
|
if @remove_prefix
|
19
21
|
@removed_prefix_string = @remove_prefix + '.'
|
20
22
|
@removed_length = @removed_prefix_string.length
|
21
23
|
elsif @add_prefix.empty?
|
22
24
|
raise Fluent::ConfigError, "either of 'add_prefix' or 'remove_prefix' must be specified"
|
23
25
|
end
|
26
|
+
@added_prefix_string = nil
|
24
27
|
@added_prefix_string = @add_prefix + '.' unless @add_prefix.empty?
|
25
28
|
|
26
|
-
@sample_unit = case @sample_unit
|
27
|
-
when 'tag'
|
28
|
-
:tag
|
29
|
-
when 'all'
|
30
|
-
:all
|
31
|
-
else
|
32
|
-
raise Fluent::ConfigError, "sample_unit allows only 'tag' or 'all'"
|
33
|
-
end
|
34
29
|
@counts = {}
|
35
30
|
@resets = {} if @minimum_rate_per_min
|
36
31
|
end
|
@@ -47,11 +42,11 @@ class Fluent::SamplingFilterOutput < Fluent::Output
|
|
47
42
|
end
|
48
43
|
|
49
44
|
time_record_pairs.each {|t,r|
|
50
|
-
|
45
|
+
router.emit(tag, t, r)
|
51
46
|
}
|
52
47
|
end
|
53
48
|
|
54
|
-
def
|
49
|
+
def process(tag, es)
|
55
50
|
t = if @sample_unit == :all
|
56
51
|
'all'
|
57
52
|
else
|
@@ -65,11 +60,9 @@ class Fluent::SamplingFilterOutput < Fluent::Output
|
|
65
60
|
# so serious value (and probably will not be broken...),
|
66
61
|
# then i let here as it is now.
|
67
62
|
if @minimum_rate_per_min
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
if Fluent::Engine.now > @resets[t]
|
72
|
-
@resets[t] = Fluent::Engine.now + 60
|
63
|
+
@resets[t] ||= Fluent::Clock.now + (60 - rand(30))
|
64
|
+
if Fluent::Clock.now > @resets[t]
|
65
|
+
@resets[t] = Fluent::Clock.now + 60
|
73
66
|
@counts[t] = 0
|
74
67
|
end
|
75
68
|
es.each do |time,record|
|
@@ -90,7 +83,5 @@ class Fluent::SamplingFilterOutput < Fluent::Output
|
|
90
83
|
end
|
91
84
|
|
92
85
|
emit_sampled(tag, pairs)
|
93
|
-
|
94
|
-
chain.next
|
95
86
|
end
|
96
87
|
end
|
data/test/helper.rb
CHANGED
@@ -13,6 +13,7 @@ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
|
13
13
|
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
14
14
|
require 'fluent/test'
|
15
15
|
require 'fluent/plugin/out_sampling_filter'
|
16
|
+
require 'fluent/plugin/filter_sampling'
|
16
17
|
|
17
18
|
class Test::Unit::TestCase
|
18
19
|
end
|
@@ -0,0 +1,186 @@
|
|
1
|
+
require 'helper'
|
2
|
+
require 'fluent/test/driver/filter'
|
3
|
+
|
4
|
+
class SamplingFilterTest < Test::Unit::TestCase
|
5
|
+
def setup
|
6
|
+
Fluent::Test.setup
|
7
|
+
end
|
8
|
+
|
9
|
+
CONFIG = %[
|
10
|
+
interval 10
|
11
|
+
sample_unit tag
|
12
|
+
]
|
13
|
+
|
14
|
+
def create_driver(conf=CONFIG)
|
15
|
+
Fluent::Test::Driver::Filter.new(Fluent::Plugin::SamplingFilter).configure(conf)
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_configure
|
19
|
+
assert_raise(Fluent::ConfigError) {
|
20
|
+
create_driver('')
|
21
|
+
}
|
22
|
+
d = create_driver %[
|
23
|
+
interval 5
|
24
|
+
]
|
25
|
+
|
26
|
+
assert_equal 5, d.instance.interval
|
27
|
+
assert_equal 'tag', d.instance.sample_unit
|
28
|
+
|
29
|
+
d = create_driver %[
|
30
|
+
interval 1000
|
31
|
+
sample_unit all
|
32
|
+
]
|
33
|
+
assert_equal 1000, d.instance.interval
|
34
|
+
assert_equal 'all', d.instance.sample_unit
|
35
|
+
|
36
|
+
d = create_driver %[
|
37
|
+
interval 1000
|
38
|
+
sample_unit $fake
|
39
|
+
]
|
40
|
+
assert_equal 1000, d.instance.interval
|
41
|
+
assert_equal "$fake", d.instance.sample_unit
|
42
|
+
end
|
43
|
+
|
44
|
+
def test_filter
|
45
|
+
d1 = create_driver(CONFIG)
|
46
|
+
time = Time.parse("2012-01-02 13:14:15").to_i
|
47
|
+
d1.run(default_tag: 'input.hoge1') do
|
48
|
+
d1.feed({'field1' => 'record1', 'field2' => 1})
|
49
|
+
d1.feed({'field1' => 'record2', 'field2' => 2})
|
50
|
+
d1.feed({'field1' => 'record3', 'field2' => 3})
|
51
|
+
d1.feed({'field1' => 'record4', 'field2' => 4})
|
52
|
+
d1.feed({'field1' => 'record5', 'field2' => 5})
|
53
|
+
d1.feed({'field1' => 'record6', 'field2' => 6})
|
54
|
+
d1.feed({'field1' => 'record7', 'field2' => 7})
|
55
|
+
d1.feed({'field1' => 'record8', 'field2' => 8})
|
56
|
+
d1.feed({'field1' => 'record9', 'field2' => 9})
|
57
|
+
d1.feed({'field1' => 'record10', 'field2' => 10})
|
58
|
+
d1.feed({'field1' => 'record11', 'field2' => 11})
|
59
|
+
d1.feed({'field1' => 'record12', 'field2' => 12})
|
60
|
+
end
|
61
|
+
filtered = d1.filtered
|
62
|
+
assert_equal 1, filtered.length
|
63
|
+
assert_equal 'record10', filtered[0][1]['field1']
|
64
|
+
assert_equal 10, filtered[0][1]['field2']
|
65
|
+
|
66
|
+
d2 = create_driver(%[
|
67
|
+
interval 3
|
68
|
+
])
|
69
|
+
time = Time.parse("2012-01-02 13:14:15").to_i
|
70
|
+
d2.run(default_tag: 'input.hoge2') do
|
71
|
+
d2.feed({'field1' => 'record1', 'field2' => 1})
|
72
|
+
d2.feed({'field1' => 'record2', 'field2' => 2})
|
73
|
+
d2.feed({'field1' => 'record3', 'field2' => 3})
|
74
|
+
d2.feed({'field1' => 'record4', 'field2' => 4})
|
75
|
+
d2.feed({'field1' => 'record5', 'field2' => 5})
|
76
|
+
d2.feed({'field1' => 'record6', 'field2' => 6})
|
77
|
+
d2.feed({'field1' => 'record7', 'field2' => 7})
|
78
|
+
d2.feed({'field1' => 'record8', 'field2' => 8})
|
79
|
+
d2.feed({'field1' => 'record9', 'field2' => 9})
|
80
|
+
d2.feed({'field1' => 'record10', 'field2' => 10})
|
81
|
+
d2.feed({'field1' => 'record11', 'field2' => 11})
|
82
|
+
d2.feed({'field1' => 'record12', 'field2' => 12})
|
83
|
+
end
|
84
|
+
filtered = d2.filtered
|
85
|
+
assert_equal 4, filtered.length
|
86
|
+
|
87
|
+
assert_equal 'record3', filtered[0][1]['field1']
|
88
|
+
assert_equal 'record6', filtered[1][1]['field1']
|
89
|
+
assert_equal 'record9', filtered[2][1]['field1']
|
90
|
+
assert_equal 'record12', filtered[3][1]['field1']
|
91
|
+
end
|
92
|
+
|
93
|
+
def test_filter_minimum_rate
|
94
|
+
config = %[
|
95
|
+
interval 10
|
96
|
+
sample_unit tag
|
97
|
+
minimum_rate_per_min 100
|
98
|
+
]
|
99
|
+
d = create_driver(config)
|
100
|
+
time = Time.parse("2012-01-02 13:14:15").to_i
|
101
|
+
d.run(default_tag: 'input.hoge3') do
|
102
|
+
(1..100).each do |t|
|
103
|
+
d.feed(time, {'times' => t, 'data' => 'x'})
|
104
|
+
end
|
105
|
+
(101..130).each do |t|
|
106
|
+
d.feed(time, {'times' => t, 'data' => 'y'})
|
107
|
+
end
|
108
|
+
end
|
109
|
+
filtered = d.filtered
|
110
|
+
assert_equal 103, filtered.length
|
111
|
+
assert_equal ((1..100).map(&:to_i) + [110, 120, 130]), filtered.map{|_time,r| r['times']}
|
112
|
+
assert_equal (['x']*100 + ['y']*3), filtered.map{|_time,r| r['data']}
|
113
|
+
end
|
114
|
+
|
115
|
+
def test_filter_minimum_rate_expire
|
116
|
+
config = %[
|
117
|
+
interval 10
|
118
|
+
sample_unit tag
|
119
|
+
minimum_rate_per_min 10
|
120
|
+
]
|
121
|
+
d = create_driver(config)
|
122
|
+
time = Time.parse("2012-01-02 13:14:15").to_i
|
123
|
+
d.run(default_tag: 'input.hoge4') do
|
124
|
+
(1..30).each do |t|
|
125
|
+
d.feed(time, {'times' => t, 'data' => 'x'})
|
126
|
+
end
|
127
|
+
end
|
128
|
+
filtered = d.filtered
|
129
|
+
assert_equal 12, filtered.length
|
130
|
+
assert_equal ((1..10).map(&:to_i)+[20,30]), filtered.map{|_time,r| r['times']}
|
131
|
+
assert_equal (['x']*12), filtered.map{|_time,r| r['data']}
|
132
|
+
end
|
133
|
+
|
134
|
+
def test_filer_with_record_accessor
|
135
|
+
d2 = create_driver(%[
|
136
|
+
interval 3
|
137
|
+
sample_unit field3
|
138
|
+
])
|
139
|
+
time = Time.parse("2012-01-02 13:14:15").to_i
|
140
|
+
d2.run(default_tag: 'input.hoge2') do
|
141
|
+
(1..12).each do |i|
|
142
|
+
[1,2].each do |sample_vaule|
|
143
|
+
d2.feed({'field1' => "record#{i}", 'field2' => i, 'field3' => sample_vaule})
|
144
|
+
end
|
145
|
+
end
|
146
|
+
end
|
147
|
+
filtered = d2.filtered
|
148
|
+
assert_equal 8, filtered.length
|
149
|
+
|
150
|
+
assert_equal 'record3', filtered[0][1]['field1']
|
151
|
+
assert_equal 1, filtered[0][1]['field3']
|
152
|
+
assert_equal 'record3', filtered[1][1]['field1']
|
153
|
+
assert_equal 2, filtered[1][1]['field3']
|
154
|
+
assert_equal 'record6', filtered[2][1]['field1']
|
155
|
+
assert_equal 1, filtered[2][1]['field3']
|
156
|
+
assert_equal 'record6', filtered[3][1]['field1']
|
157
|
+
assert_equal 2, filtered[3][1]['field3']
|
158
|
+
assert_equal 'record9', filtered[4][1]['field1']
|
159
|
+
assert_equal 1, filtered[4][1]['field3']
|
160
|
+
assert_equal 'record9', filtered[5][1]['field1']
|
161
|
+
assert_equal 2, filtered[5][1]['field3']
|
162
|
+
assert_equal 'record12', filtered[6][1]['field1']
|
163
|
+
assert_equal 1, filtered[6][1]['field3']
|
164
|
+
assert_equal 'record12', filtered[7][1]['field1']
|
165
|
+
assert_equal 2, filtered[7][1]['field3']
|
166
|
+
end
|
167
|
+
|
168
|
+
def test_filter_all
|
169
|
+
config = %[
|
170
|
+
interval 10
|
171
|
+
sample_unit all
|
172
|
+
]
|
173
|
+
d1 = create_driver(config)
|
174
|
+
time = Time.parse("2012-01-02 13:14:15").to_i
|
175
|
+
d1.run do
|
176
|
+
6.times do |i|
|
177
|
+
[0,1].each do |j|
|
178
|
+
d1.feed("input.hoge#{2*i+j}", time, {'field1' => "record#{2*i+j+1}"})
|
179
|
+
end
|
180
|
+
end
|
181
|
+
end
|
182
|
+
filtered = d1.filtered
|
183
|
+
assert_equal 1, filtered.length
|
184
|
+
assert_equal 'record10', filtered[0][1]['field1']
|
185
|
+
end
|
186
|
+
end
|
@@ -1,10 +1,11 @@
|
|
1
1
|
require 'helper'
|
2
|
+
require 'fluent/test/driver/output'
|
2
3
|
|
3
4
|
class SamplingFilterOutputTest < Test::Unit::TestCase
|
4
5
|
def setup
|
5
6
|
Fluent::Test.setup
|
6
7
|
end
|
7
|
-
|
8
|
+
|
8
9
|
CONFIG = %[
|
9
10
|
interval 10
|
10
11
|
sample_unit tag
|
@@ -12,13 +13,13 @@ class SamplingFilterOutputTest < Test::Unit::TestCase
|
|
12
13
|
add_prefix sampled
|
13
14
|
]
|
14
15
|
|
15
|
-
def create_driver(conf=CONFIG
|
16
|
-
Fluent::Test::
|
16
|
+
def create_driver(conf=CONFIG)
|
17
|
+
Fluent::Test::Driver::Output.new(Fluent::Plugin::SamplingFilterOutput).configure(conf)
|
17
18
|
end
|
18
19
|
|
19
20
|
def test_configure
|
20
21
|
assert_raise(Fluent::ConfigError) {
|
21
|
-
|
22
|
+
create_driver('')
|
22
23
|
}
|
23
24
|
d = create_driver %[
|
24
25
|
interval 5
|
@@ -33,7 +34,7 @@ class SamplingFilterOutputTest < Test::Unit::TestCase
|
|
33
34
|
interval 1000
|
34
35
|
sample_unit all
|
35
36
|
remove_prefix test
|
36
|
-
add_prefix output
|
37
|
+
add_prefix output
|
37
38
|
]
|
38
39
|
assert_equal 1000, d.instance.interval
|
39
40
|
assert_equal :all, d.instance.sample_unit
|
@@ -42,54 +43,54 @@ class SamplingFilterOutputTest < Test::Unit::TestCase
|
|
42
43
|
end
|
43
44
|
|
44
45
|
def test_emit
|
45
|
-
d1 = create_driver(CONFIG
|
46
|
+
d1 = create_driver(CONFIG)
|
46
47
|
time = Time.parse("2012-01-02 13:14:15").to_i
|
47
|
-
d1.run do
|
48
|
-
d1.
|
49
|
-
d1.
|
50
|
-
d1.
|
51
|
-
d1.
|
52
|
-
d1.
|
53
|
-
d1.
|
54
|
-
d1.
|
55
|
-
d1.
|
56
|
-
d1.
|
57
|
-
d1.
|
58
|
-
d1.
|
59
|
-
d1.
|
48
|
+
d1.run(default_tag: 'input.hoge1') do
|
49
|
+
d1.feed(time, {'field1' => 'record1', 'field2' => 1})
|
50
|
+
d1.feed(time, {'field1' => 'record2', 'field2' => 2})
|
51
|
+
d1.feed(time, {'field1' => 'record3', 'field2' => 3})
|
52
|
+
d1.feed(time, {'field1' => 'record4', 'field2' => 4})
|
53
|
+
d1.feed(time, {'field1' => 'record5', 'field2' => 5})
|
54
|
+
d1.feed(time, {'field1' => 'record6', 'field2' => 6})
|
55
|
+
d1.feed(time, {'field1' => 'record7', 'field2' => 7})
|
56
|
+
d1.feed(time, {'field1' => 'record8', 'field2' => 8})
|
57
|
+
d1.feed(time, {'field1' => 'record9', 'field2' => 9})
|
58
|
+
d1.feed(time, {'field1' => 'record10', 'field2' => 10})
|
59
|
+
d1.feed(time, {'field1' => 'record11', 'field2' => 11})
|
60
|
+
d1.feed(time, {'field1' => 'record12', 'field2' => 12})
|
60
61
|
end
|
61
|
-
|
62
|
-
assert_equal 1,
|
63
|
-
assert_equal 'sampled.hoge1',
|
64
|
-
assert_equal 'record10',
|
65
|
-
assert_equal 10,
|
62
|
+
events = d1.events
|
63
|
+
assert_equal 1, events.length
|
64
|
+
assert_equal 'sampled.hoge1', events[0][0] # tag
|
65
|
+
assert_equal 'record10', events[0][2]['field1']
|
66
|
+
assert_equal 10, events[0][2]['field2']
|
66
67
|
|
67
68
|
d2 = create_driver(%[
|
68
69
|
interval 3
|
69
|
-
]
|
70
|
+
])
|
70
71
|
time = Time.parse("2012-01-02 13:14:15").to_i
|
71
|
-
d2.run do
|
72
|
-
d2.
|
73
|
-
d2.
|
74
|
-
d2.
|
75
|
-
d2.
|
76
|
-
d2.
|
77
|
-
d2.
|
78
|
-
d2.
|
79
|
-
d2.
|
80
|
-
d2.
|
81
|
-
d2.
|
82
|
-
d2.
|
83
|
-
d2.
|
72
|
+
d2.run(default_tag: 'input.hoge2') do
|
73
|
+
d2.feed(time, {'field1' => 'record1', 'field2' => 1})
|
74
|
+
d2.feed(time, {'field1' => 'record2', 'field2' => 2})
|
75
|
+
d2.feed(time, {'field1' => 'record3', 'field2' => 3})
|
76
|
+
d2.feed(time, {'field1' => 'record4', 'field2' => 4})
|
77
|
+
d2.feed(time, {'field1' => 'record5', 'field2' => 5})
|
78
|
+
d2.feed(time, {'field1' => 'record6', 'field2' => 6})
|
79
|
+
d2.feed(time, {'field1' => 'record7', 'field2' => 7})
|
80
|
+
d2.feed(time, {'field1' => 'record8', 'field2' => 8})
|
81
|
+
d2.feed(time, {'field1' => 'record9', 'field2' => 9})
|
82
|
+
d2.feed(time, {'field1' => 'record10', 'field2' => 10})
|
83
|
+
d2.feed(time, {'field1' => 'record11', 'field2' => 11})
|
84
|
+
d2.feed(time, {'field1' => 'record12', 'field2' => 12})
|
84
85
|
end
|
85
|
-
|
86
|
-
assert_equal 4,
|
87
|
-
assert_equal 'sampled.input.hoge2',
|
86
|
+
events = d2.events
|
87
|
+
assert_equal 4, events.length
|
88
|
+
assert_equal 'sampled.input.hoge2', events[0][0] # tag
|
88
89
|
|
89
|
-
assert_equal 'record3',
|
90
|
-
assert_equal 'record6',
|
91
|
-
assert_equal 'record9',
|
92
|
-
assert_equal 'record12',
|
90
|
+
assert_equal 'record3', events[0][2]['field1']
|
91
|
+
assert_equal 'record6', events[1][2]['field1']
|
92
|
+
assert_equal 'record9', events[2][2]['field1']
|
93
|
+
assert_equal 'record12', events[3][2]['field1']
|
93
94
|
end
|
94
95
|
|
95
96
|
def test_minimum_rate
|
@@ -99,27 +100,26 @@ sample_unit tag
|
|
99
100
|
remove_prefix input
|
100
101
|
minimum_rate_per_min 100
|
101
102
|
]
|
102
|
-
d = create_driver(config
|
103
|
+
d = create_driver(config)
|
103
104
|
time = Time.parse("2012-01-02 13:14:15").to_i
|
104
|
-
d.run do
|
105
|
+
d.run(default_tag: 'input.hoge3') do
|
105
106
|
(1..100).each do |t|
|
106
|
-
d.
|
107
|
+
d.feed(time, {'times' => t, 'data' => 'x'})
|
107
108
|
end
|
108
109
|
(101..130).each do |t|
|
109
|
-
d.
|
110
|
+
d.feed(time, {'times' => t, 'data' => 'y'})
|
110
111
|
end
|
111
112
|
end
|
112
|
-
|
113
|
-
assert_equal 103,
|
114
|
-
assert_equal 'sampled.hoge3',
|
115
|
-
assert_equal ((1..100).map(&:to_i) + [110, 120, 130]),
|
116
|
-
assert_equal (['x']*100 + ['y']*3),
|
113
|
+
events = d.events
|
114
|
+
assert_equal 103, events.length
|
115
|
+
assert_equal 'sampled.hoge3', events[0][0]
|
116
|
+
assert_equal ((1..100).map(&:to_i) + [110, 120, 130]), events.map{|_tag,_time,r| r['times']}
|
117
|
+
assert_equal (['x']*100 + ['y']*3), events.map{|_tag,_time,r| r['data']}
|
117
118
|
|
118
119
|
end
|
119
120
|
def test_minimum_rate_expire
|
120
121
|
# hey, this test needs 60 seconds....
|
121
|
-
|
122
|
-
return
|
122
|
+
omit("this test needs 60 seconds....") unless ENV["EXECLONGTEST"]
|
123
123
|
|
124
124
|
config = %[
|
125
125
|
interval 10
|
@@ -127,39 +127,39 @@ sample_unit tag
|
|
127
127
|
remove_prefix input
|
128
128
|
minimum_rate_per_min 10
|
129
129
|
]
|
130
|
-
d = create_driver(config
|
130
|
+
d = create_driver(config)
|
131
131
|
time = Time.parse("2012-01-02 13:14:15").to_i
|
132
|
-
d.run do
|
132
|
+
d.run(default_tag: 'input.hoge4') do
|
133
133
|
(1..100).each do |t|
|
134
|
-
d.
|
134
|
+
d.feed(time, {'times' => t, 'data' => 'x'})
|
135
135
|
end
|
136
136
|
sleep 60
|
137
137
|
(101..130).each do |t|
|
138
|
-
d.
|
138
|
+
d.feed(time+60, {'times' => t, 'data' => 'y'})
|
139
139
|
end
|
140
140
|
end
|
141
|
-
|
142
|
-
# assert_equal (19 + 12),
|
143
|
-
assert_equal 'sampled.hoge4',
|
144
|
-
assert_equal ((1..10).map(&:to_i)+[20,30,40,50,60,70,80,90,100]+(101..110).map(&:to_i)+[120,130]),
|
145
|
-
assert_equal (['x']*19 + ['y']*12),
|
141
|
+
events = d.events
|
142
|
+
# assert_equal (19 + 12), events.length
|
143
|
+
assert_equal 'sampled.hoge4', events[0][0]
|
144
|
+
assert_equal ((1..10).map(&:to_i)+[20,30,40,50,60,70,80,90,100]+(101..110).map(&:to_i)+[120,130]), events.map{|_tag,_time,r| r['times']}
|
145
|
+
assert_equal (['x']*19 + ['y']*12), events.map{|_tag,_time,r| r['data']}
|
146
146
|
end
|
147
147
|
|
148
148
|
def test_without_add_prefix_but_remove_prefix
|
149
149
|
config = %[
|
150
150
|
interval 10
|
151
|
-
add_prefix
|
151
|
+
add_prefix
|
152
152
|
remove_prefix input
|
153
153
|
]
|
154
|
-
d = create_driver(config
|
154
|
+
d = create_driver(config)
|
155
155
|
time = Time.parse("2012-01-02 13:14:15").to_i
|
156
|
-
d.run do
|
156
|
+
d.run(default_tag: 'input.hoge3') do
|
157
157
|
(1..100).each do |t|
|
158
|
-
d.
|
158
|
+
d.feed(time, {'times' => t, 'data' => 'x'})
|
159
159
|
end
|
160
160
|
end
|
161
|
-
|
162
|
-
assert_equal 10,
|
163
|
-
assert_equal 'hoge3',
|
161
|
+
events = d.events
|
162
|
+
assert_equal 10, events.length
|
163
|
+
assert_equal 'hoge3', events[0][0]
|
164
164
|
end
|
165
165
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-sampling-filter
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 1.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- TAGOMORI Satoshi
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-06-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -24,20 +24,40 @@ dependencies:
|
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: test-unit
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 3.1.0
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 3.1.0
|
27
41
|
- !ruby/object:Gem::Dependency
|
28
42
|
name: fluentd
|
29
43
|
requirement: !ruby/object:Gem::Requirement
|
30
44
|
requirements:
|
31
45
|
- - ">="
|
32
46
|
- !ruby/object:Gem::Version
|
33
|
-
version:
|
47
|
+
version: 0.14.12
|
48
|
+
- - "<"
|
49
|
+
- !ruby/object:Gem::Version
|
50
|
+
version: '2'
|
34
51
|
type: :runtime
|
35
52
|
prerelease: false
|
36
53
|
version_requirements: !ruby/object:Gem::Requirement
|
37
54
|
requirements:
|
38
55
|
- - ">="
|
39
56
|
- !ruby/object:Gem::Version
|
40
|
-
version:
|
57
|
+
version: 0.14.12
|
58
|
+
- - "<"
|
59
|
+
- !ruby/object:Gem::Version
|
60
|
+
version: '2'
|
41
61
|
description: fluentd plugin to pickup sample data from matched massages
|
42
62
|
email:
|
43
63
|
- tagomoris@gmail.com
|
@@ -54,14 +74,16 @@ files:
|
|
54
74
|
- README.md
|
55
75
|
- Rakefile
|
56
76
|
- fluent-plugin-sampling-filter.gemspec
|
77
|
+
- lib/fluent/plugin/filter_sampling.rb
|
57
78
|
- lib/fluent/plugin/out_sampling_filter.rb
|
58
79
|
- test/helper.rb
|
80
|
+
- test/plugin/test_filter_sampling.rb
|
59
81
|
- test/plugin/test_out_sampling_filter.rb
|
60
82
|
homepage: https://github.com/tagomoris/fluent-plugin-sampling-filter
|
61
83
|
licenses:
|
62
|
-
-
|
84
|
+
- Apache-2.0
|
63
85
|
metadata: {}
|
64
|
-
post_install_message:
|
86
|
+
post_install_message:
|
65
87
|
rdoc_options: []
|
66
88
|
require_paths:
|
67
89
|
- lib
|
@@ -76,11 +98,11 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
76
98
|
- !ruby/object:Gem::Version
|
77
99
|
version: '0'
|
78
100
|
requirements: []
|
79
|
-
|
80
|
-
|
81
|
-
signing_key:
|
101
|
+
rubygems_version: 3.2.3
|
102
|
+
signing_key:
|
82
103
|
specification_version: 4
|
83
104
|
summary: fluentd plugin to pickup sample data from matched massages
|
84
105
|
test_files:
|
85
106
|
- test/helper.rb
|
107
|
+
- test/plugin/test_filter_sampling.rb
|
86
108
|
- test/plugin/test_out_sampling_filter.rb
|