fluent-plugin-histogram 0.1.2 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +8 -8
- data/README.md +11 -11
- data/fluent-plugin-histogram.gemspec +1 -1
- data/lib/fluent/plugin/out_histogram.rb +19 -17
- data/test/plugin/test_out_histogram.rb +30 -7
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
Yzc2ZDFiNDY3NmVlZjk5ZGE5MWU1NWFiYzRlZmM5MjcwOWYwZDIxNw==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
N2E5NWRkMDBmYjIxZjc4NDcwZWI5NjA5OGM1NTMyNmY3MTAwZTg2OQ==
|
7
7
|
SHA512:
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
MDllNzkzOTE0YzE0MzUzNjE1MTQ4YTFmOGNlYzk5MWQ4NTc0YTYzMDU0NWZl
|
10
|
+
MWVkZDU5MzczMzMzMDE5Y2FlODVmMTM0NWJjNGE3ZmE0NWI5NmRhY2U4Njg3
|
11
|
+
YjYyNzc3ODFhMGNjMjI1OTk3OGIzNjMyM2VkNjk4MWFiM2E2NWE=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
ZGE4Y2MyYzM1YjBhZjc4ZjYzNzU0MmJlNTg4YTI5ZDM5Zjk1ZmJjODQ0YTVl
|
14
|
+
NmExZGJmNmFhOTBlMDc4YjFiOTE2MzNmOGE5NGNjOTQwMWVmZDhhYjBkYzc3
|
15
|
+
NTk3ZjZjNTIzOWI1YmVkZjkxMmJjMzc5YzEwZDczMjE0YzA0MmY=
|
data/README.md
CHANGED
@@ -1,25 +1,25 @@
|
|
1
1
|
# fluent-plugin-histogram
|
2
2
|
|
3
|
-
Fluentd output plugin.
|
3
|
+
Fluentd output plugin.
|
4
4
|
|
5
5
|
Count up input keys, and make **scalable and rough histogram** to help detecting hotspot problems.
|
6
6
|
|
7
7
|
"Scalable rough histogram" fit for cases there are an enormous variety of keys.
|
8
8
|
|
9
|
-
We refered ["Strauss, O.: Rough histograms for robust statistics, Pattern Recogniti, 2000. Proceedings. 15th International Conference on (Volume:2)"](http://ieeexplore.ieee.org/xpl/mostRecentIssue.jsp?punumber=7237) for "rough histogram".
|
9
|
+
We refered ["Strauss, O.: Rough histograms for robust statistics, Pattern Recogniti, 2000. Proceedings. 15th International Conference on (Volume:2)"](http://ieeexplore.ieee.org/xpl/mostRecentIssue.jsp?punumber=7237) for "rough histogram".
|
10
10
|
In this approarch, a increment unit is not one value(`.`), increment some values like this shape `△ `.
|
11
11
|
To use this, please set `alpha >= 1`(default 1) option in fluent.conf.
|
12
12
|
|
13
13
|
Moreover, we optimized that histogram for enormous variety of keys by fix histogram width.
|
14
|
-
To use this, please set `bin_num`(default 100) in fluent.conf.
|
14
|
+
To use this, please set `bin_num`(default 100) in fluent.conf.
|
15
15
|
|
16
|
-
Be careful, our plugin's output histogram is not correct count-up
|
16
|
+
Be careful, our plugin's output histogram is not correct count-up results about provided data. But this plugin can scale out - can handle 25,000 records/sec inputs data -, and that output histogram is enough to use for detecting a hotspot problem.
|
17
17
|
|
18
18
|
## Examples
|
19
19
|
|
20
20
|
##### Example 1
|
21
21
|
|
22
|
-
if run below commands,
|
22
|
+
if run below commands,
|
23
23
|
```
|
24
24
|
$ echo '{"keys":["A", "B", "C", "A"]}' | fluent-cat input.sample
|
25
25
|
$ echo '{"keys":["A", "B", "D"]}' | fluent-cat input.sample
|
@@ -45,13 +45,13 @@ run bench
|
|
45
45
|
$ ruby bench/genload.rb input.sample 5000
|
46
46
|
```
|
47
47
|
|
48
|
-
output is,
|
48
|
+
output is,
|
49
49
|
```
|
50
|
-
2013-12-21T11:09:52+09:00 histo.sample.localhost
|
50
|
+
2013-12-21T11:09:52+09:00 histo.sample.localhost
|
51
51
|
{"hist":
|
52
|
-
[859, 963, 1224, 1252, 957, 764, 746, 929, 1406, 1519, 1072, 955, 1069, 916, 797, 948, 1090, 915, 727, 730, 898, 1051, 918, 780, 751, 890, 1104, 976, 949, 1138, 996, 959, 1100, 964, 840, 832, 1020, 1196, 969, 756, 750, 939, 1108, 928, 883, 1154, 1173, 951, 871, 837, 776, 896, 1048, 961, 825, 780, 959, 1113, 1034, 1019, 1090, 1274, 1370, 1207, 930, 898, 1029, 907, 951, 1113, 921, 992, 1422, 1509, 1253, 924, 941, 1099, 898, 775, 994, 1182, 1170, 1515, 1788, 1216, 870, 1038, 938, 744, 826, 969, 892, 843, 883, 840, 800, 966, 1115, 978],
|
53
|
-
"sum":100000,
|
54
|
-
"avg":1000,
|
52
|
+
[859, 963, 1224, 1252, 957, 764, 746, 929, 1406, 1519, 1072, 955, 1069, 916, 797, 948, 1090, 915, 727, 730, 898, 1051, 918, 780, 751, 890, 1104, 976, 949, 1138, 996, 959, 1100, 964, 840, 832, 1020, 1196, 969, 756, 750, 939, 1108, 928, 883, 1154, 1173, 951, 871, 837, 776, 896, 1048, 961, 825, 780, 959, 1113, 1034, 1019, 1090, 1274, 1370, 1207, 930, 898, 1029, 907, 951, 1113, 921, 992, 1422, 1509, 1253, 924, 941, 1099, 898, 775, 994, 1182, 1170, 1515, 1788, 1216, 870, 1038, 938, 744, 826, 969, 892, 843, 883, 840, 800, 966, 1115, 978],
|
53
|
+
"sum":100000,
|
54
|
+
"avg":1000,
|
55
55
|
"sd":193}
|
56
56
|
```
|
57
57
|
|
@@ -66,7 +66,7 @@ output is,
|
|
66
66
|
tag_suffix __HOSTNAME__ # this plugin mixined fluent-mixin-config-placeholders
|
67
67
|
input_tag_remove_prefix input
|
68
68
|
alpha 1 # count up like this, (■ = +1)
|
69
|
-
# ■
|
69
|
+
# ■
|
70
70
|
# ■ ■ ■ ■
|
71
71
|
# ■ ■ ■ ■ ■ ■ ■ ■ ■
|
72
72
|
# alpha: 0, 1, 2
|
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |gem|
|
4
4
|
gem.name = "fluent-plugin-histogram"
|
5
|
-
gem.version = "0.1.
|
5
|
+
gem.version = "0.1.3"
|
6
6
|
gem.authors = ["Yusuke SHIMIZU"]
|
7
7
|
gem.email = "a.ryuklnm@gmail.com"
|
8
8
|
gem.description = "Combine inputs data and make histogram which helps to detect a hotspot."
|
@@ -14,7 +14,7 @@ module Fluent
|
|
14
14
|
config_param :count_key, :string, :default => 'keys'
|
15
15
|
config_param :bin_num, :integer, :default => 100
|
16
16
|
config_param :alpha, :integer, :default => 1
|
17
|
-
config_param :sampling_rate, :integer, :default =>
|
17
|
+
config_param :sampling_rate, :integer, :default => 10
|
18
18
|
|
19
19
|
include Fluent::Mixin::ConfigPlaceholders
|
20
20
|
|
@@ -32,8 +32,10 @@ module Fluent
|
|
32
32
|
def configure(conf)
|
33
33
|
super
|
34
34
|
|
35
|
-
raise Fluent::ConfigError,
|
35
|
+
raise Fluent::ConfigError, 'bin_num must be > 0' if @bin_num <= 0
|
36
|
+
raise Fluent::ConfigError, 'sampling_rate must be >= 1' if @sampling_rate < 1
|
36
37
|
$log.warn %Q[too small "bin_num(=#{@bin_num})" may raise unexpected outcome] if @bin_num < 100
|
38
|
+
@sampling = true if !!conf['sampling_rate']
|
37
39
|
|
38
40
|
@tag_prefix_string = @tag_prefix + '.' if @tag_prefix
|
39
41
|
@tag_suffix_string = '.' + @tag_suffix if @tag_suffix
|
@@ -46,6 +48,8 @@ module Fluent
|
|
46
48
|
|
47
49
|
@hists = initialize_hists
|
48
50
|
@sampling_counter = 0
|
51
|
+
@tick = @sampling ? @sampling_rate.to_i : 1
|
52
|
+
|
49
53
|
@mutex = Mutex.new
|
50
54
|
|
51
55
|
end
|
@@ -91,8 +95,8 @@ module Fluent
|
|
91
95
|
id = key.hash % @bin_num
|
92
96
|
@mutex.synchronize {
|
93
97
|
(0..@alpha).each do |alpha|
|
94
|
-
(-alpha..alpha).each do |
|
95
|
-
@hists[tag][(id +
|
98
|
+
(-alpha..alpha).each do |al|
|
99
|
+
@hists[tag][(id + al) % @bin_num] += @tick
|
96
100
|
end
|
97
101
|
end
|
98
102
|
}
|
@@ -104,7 +108,7 @@ module Fluent
|
|
104
108
|
es.each do |time, record|
|
105
109
|
keys = record[@count_key]
|
106
110
|
[keys].flatten.each do |k|
|
107
|
-
if
|
111
|
+
if !@sampling
|
108
112
|
increment(tag, k)
|
109
113
|
else
|
110
114
|
@sampling_counter += 1
|
@@ -120,27 +124,25 @@ module Fluent
|
|
120
124
|
def tagging(flushed)
|
121
125
|
tagged = {}
|
122
126
|
tagged = Hash[ flushed.map do |tag, hist|
|
127
|
+
tagged_tag = tag.dup
|
123
128
|
if @tag
|
124
|
-
|
129
|
+
tagged_tag = @tag
|
125
130
|
else
|
126
131
|
if @input_tag_remove_prefix &&
|
127
132
|
( ( tag.start_with?(@remove_prefix_string) &&
|
128
133
|
tag.length > @remove_prefix_length ) ||
|
129
134
|
tag == @input_tag_remove_prefix)
|
130
|
-
|
131
|
-
tag.gsub!(/^\.|\.$/, "")
|
132
|
-
end
|
133
|
-
if @tag_prefix
|
134
|
-
tag = @tag_prefix_string + tag
|
135
|
-
tag.gsub!(/^\.|\.$/, "")
|
136
|
-
end
|
137
|
-
if @tag_suffix
|
138
|
-
tag += @tag_suffix_string
|
139
|
-
tag.gsub!(/^\.|\.$/, "")
|
135
|
+
tagged_tag = tagged_tag[@input_tag_remove_prefix.length..-1]
|
140
136
|
end
|
137
|
+
|
138
|
+
tagged_tag = @tag_prefix_string + tagged_tag if @tag_prefix
|
139
|
+
tagged_tag << @tag_suffix_string if @tag_suffix
|
140
|
+
|
141
|
+
tagged_tag.gsub!(/(^\.+)|(\.+$)/, '')
|
142
|
+
tagged_tag.gsub!(/(\.\.+)/, '.')
|
141
143
|
end
|
142
144
|
|
143
|
-
[
|
145
|
+
[tagged_tag, hist]
|
144
146
|
end ]
|
145
147
|
tagged
|
146
148
|
end
|
@@ -23,6 +23,9 @@ class HistogramOutputTest < Test::Unit::TestCase
|
|
23
23
|
assert_raise(Fluent::ConfigError) {
|
24
24
|
create_driver %[ bin_num 0]
|
25
25
|
}
|
26
|
+
assert_raise(Fluent::ConfigError) {
|
27
|
+
create_driver %[ sampling_rate -1 ]
|
28
|
+
}
|
26
29
|
end
|
27
30
|
|
28
31
|
def test_small_increment_no_alpha
|
@@ -155,6 +158,7 @@ class HistogramOutputTest < Test::Unit::TestCase
|
|
155
158
|
tag_prefix histo
|
156
159
|
tag_suffix __HOSTNAME__
|
157
160
|
hostname localhost
|
161
|
+
alpha 1
|
158
162
|
input_tag_remove_prefix test])
|
159
163
|
# ("A".."ZZ").to_a.size == 702
|
160
164
|
data = ("A".."ZZ").to_a.shuffle
|
@@ -165,7 +169,7 @@ class HistogramOutputTest < Test::Unit::TestCase
|
|
165
169
|
end
|
166
170
|
end
|
167
171
|
end
|
168
|
-
flushed_even = f.instance.flush
|
172
|
+
flushed_even = f.instance.flush
|
169
173
|
|
170
174
|
#('A'..'ZZ').to_a.shuffle.size == 702
|
171
175
|
# In here, replace 7 values of ('A'..'ZZ') to 'D' as example hotspot.
|
@@ -177,26 +181,45 @@ class HistogramOutputTest < Test::Unit::TestCase
|
|
177
181
|
end
|
178
182
|
end
|
179
183
|
end
|
180
|
-
|
181
|
-
|
182
|
-
assert_equal(true, flushed_even[:sd] <
|
184
|
+
flushed_bias = f.instance.flush
|
185
|
+
|
186
|
+
assert_equal(true, flushed_even["histo.localhost"][:sd] < flushed_bias["histo.localhost"][:sd],
|
187
|
+
"expected
|
188
|
+
even:#{flushed_even["histo.localhost"]}
|
189
|
+
<
|
190
|
+
bias:#{flushed_bias["histo.localhost"]}")
|
183
191
|
end
|
184
192
|
|
185
193
|
def test_sampling
|
186
194
|
bin_num = 100
|
187
195
|
sampling_rate = 10
|
188
196
|
f = create_driver(%[
|
189
|
-
bin_num
|
197
|
+
bin_num #{bin_num}
|
190
198
|
sampling_rate #{sampling_rate}
|
191
199
|
alpha 0 ])
|
200
|
+
f.run do
|
201
|
+
sampling_rate.times do
|
202
|
+
f.emit({"keys" => ["A"]})
|
203
|
+
end
|
204
|
+
end
|
205
|
+
flushed = f.instance.flush
|
206
|
+
assert_equal(sampling_rate, flushed["test"][:sum])
|
207
|
+
|
208
|
+
f.run do
|
209
|
+
1.times do # 1 < sampling_rate
|
210
|
+
f.emit({"keys" => ["A"]})
|
211
|
+
end
|
212
|
+
end
|
213
|
+
flushed = f.instance.flush
|
214
|
+
assert_equal(0, flushed["test"][:sum])
|
215
|
+
|
192
216
|
f.run do
|
193
217
|
100.times do
|
194
218
|
f.emit({"keys" => ["A", "B", "C"]})
|
195
219
|
end
|
196
220
|
end
|
197
221
|
flushed = f.instance.flush
|
198
|
-
assert_equal(
|
199
|
-
assert_equal(300/bin_num, flushed["test"][:avg])
|
222
|
+
assert_equal(100*3, flushed["test"][:sum])
|
200
223
|
end
|
201
224
|
|
202
225
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-histogram
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Yusuke SHIMIZU
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-01-
|
11
|
+
date: 2014-01-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|