fluent-plugin-histogram 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/README.md +11 -11
- data/fluent-plugin-histogram.gemspec +1 -1
- data/lib/fluent/plugin/out_histogram.rb +19 -17
- data/test/plugin/test_out_histogram.rb +30 -7
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
Yzc2ZDFiNDY3NmVlZjk5ZGE5MWU1NWFiYzRlZmM5MjcwOWYwZDIxNw==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
N2E5NWRkMDBmYjIxZjc4NDcwZWI5NjA5OGM1NTMyNmY3MTAwZTg2OQ==
|
7
7
|
SHA512:
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
MDllNzkzOTE0YzE0MzUzNjE1MTQ4YTFmOGNlYzk5MWQ4NTc0YTYzMDU0NWZl
|
10
|
+
MWVkZDU5MzczMzMzMDE5Y2FlODVmMTM0NWJjNGE3ZmE0NWI5NmRhY2U4Njg3
|
11
|
+
YjYyNzc3ODFhMGNjMjI1OTk3OGIzNjMyM2VkNjk4MWFiM2E2NWE=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
ZGE4Y2MyYzM1YjBhZjc4ZjYzNzU0MmJlNTg4YTI5ZDM5Zjk1ZmJjODQ0YTVl
|
14
|
+
NmExZGJmNmFhOTBlMDc4YjFiOTE2MzNmOGE5NGNjOTQwMWVmZDhhYjBkYzc3
|
15
|
+
NTk3ZjZjNTIzOWI1YmVkZjkxMmJjMzc5YzEwZDczMjE0YzA0MmY=
|
data/README.md
CHANGED
@@ -1,25 +1,25 @@
|
|
1
1
|
# fluent-plugin-histogram
|
2
2
|
|
3
|
-
Fluentd output plugin.
|
3
|
+
Fluentd output plugin.
|
4
4
|
|
5
5
|
Count up input keys, and make **scalable and rough histogram** to help detecting hotspot problems.
|
6
6
|
|
7
7
|
"Scalable rough histogram" fit for cases there are an enormous variety of keys.
|
8
8
|
|
9
|
-
We refered ["Strauss, O.: Rough histograms for robust statistics, Pattern Recogniti, 2000. Proceedings. 15th International Conference on (Volume:2)"](http://ieeexplore.ieee.org/xpl/mostRecentIssue.jsp?punumber=7237) for "rough histogram".
|
9
|
+
We refered ["Strauss, O.: Rough histograms for robust statistics, Pattern Recogniti, 2000. Proceedings. 15th International Conference on (Volume:2)"](http://ieeexplore.ieee.org/xpl/mostRecentIssue.jsp?punumber=7237) for "rough histogram".
|
10
10
|
In this approarch, a increment unit is not one value(`.`), increment some values like this shape `△ `.
|
11
11
|
To use this, please set `alpha >= 1`(default 1) option in fluent.conf.
|
12
12
|
|
13
13
|
Moreover, we optimized that histogram for enormous variety of keys by fix histogram width.
|
14
|
-
To use this, please set `bin_num`(default 100) in fluent.conf.
|
14
|
+
To use this, please set `bin_num`(default 100) in fluent.conf.
|
15
15
|
|
16
|
-
Be careful, our plugin's output histogram is not correct count-up
|
16
|
+
Be careful, our plugin's output histogram is not correct count-up results about provided data. But this plugin can scale out - can handle 25,000 records/sec inputs data -, and that output histogram is enough to use for detecting a hotspot problem.
|
17
17
|
|
18
18
|
## Examples
|
19
19
|
|
20
20
|
##### Example 1
|
21
21
|
|
22
|
-
if run below commands,
|
22
|
+
if run below commands,
|
23
23
|
```
|
24
24
|
$ echo '{"keys":["A", "B", "C", "A"]}' | fluent-cat input.sample
|
25
25
|
$ echo '{"keys":["A", "B", "D"]}' | fluent-cat input.sample
|
@@ -45,13 +45,13 @@ run bench
|
|
45
45
|
$ ruby bench/genload.rb input.sample 5000
|
46
46
|
```
|
47
47
|
|
48
|
-
output is,
|
48
|
+
output is,
|
49
49
|
```
|
50
|
-
2013-12-21T11:09:52+09:00 histo.sample.localhost
|
50
|
+
2013-12-21T11:09:52+09:00 histo.sample.localhost
|
51
51
|
{"hist":
|
52
|
-
[859, 963, 1224, 1252, 957, 764, 746, 929, 1406, 1519, 1072, 955, 1069, 916, 797, 948, 1090, 915, 727, 730, 898, 1051, 918, 780, 751, 890, 1104, 976, 949, 1138, 996, 959, 1100, 964, 840, 832, 1020, 1196, 969, 756, 750, 939, 1108, 928, 883, 1154, 1173, 951, 871, 837, 776, 896, 1048, 961, 825, 780, 959, 1113, 1034, 1019, 1090, 1274, 1370, 1207, 930, 898, 1029, 907, 951, 1113, 921, 992, 1422, 1509, 1253, 924, 941, 1099, 898, 775, 994, 1182, 1170, 1515, 1788, 1216, 870, 1038, 938, 744, 826, 969, 892, 843, 883, 840, 800, 966, 1115, 978],
|
53
|
-
"sum":100000,
|
54
|
-
"avg":1000,
|
52
|
+
[859, 963, 1224, 1252, 957, 764, 746, 929, 1406, 1519, 1072, 955, 1069, 916, 797, 948, 1090, 915, 727, 730, 898, 1051, 918, 780, 751, 890, 1104, 976, 949, 1138, 996, 959, 1100, 964, 840, 832, 1020, 1196, 969, 756, 750, 939, 1108, 928, 883, 1154, 1173, 951, 871, 837, 776, 896, 1048, 961, 825, 780, 959, 1113, 1034, 1019, 1090, 1274, 1370, 1207, 930, 898, 1029, 907, 951, 1113, 921, 992, 1422, 1509, 1253, 924, 941, 1099, 898, 775, 994, 1182, 1170, 1515, 1788, 1216, 870, 1038, 938, 744, 826, 969, 892, 843, 883, 840, 800, 966, 1115, 978],
|
53
|
+
"sum":100000,
|
54
|
+
"avg":1000,
|
55
55
|
"sd":193}
|
56
56
|
```
|
57
57
|
|
@@ -66,7 +66,7 @@ output is,
|
|
66
66
|
tag_suffix __HOSTNAME__ # this plugin mixined fluent-mixin-config-placeholders
|
67
67
|
input_tag_remove_prefix input
|
68
68
|
alpha 1 # count up like this, (■ = +1)
|
69
|
-
# ■
|
69
|
+
# ■
|
70
70
|
# ■ ■ ■ ■
|
71
71
|
# ■ ■ ■ ■ ■ ■ ■ ■ ■
|
72
72
|
# alpha: 0, 1, 2
|
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |gem|
|
4
4
|
gem.name = "fluent-plugin-histogram"
|
5
|
-
gem.version = "0.1.
|
5
|
+
gem.version = "0.1.3"
|
6
6
|
gem.authors = ["Yusuke SHIMIZU"]
|
7
7
|
gem.email = "a.ryuklnm@gmail.com"
|
8
8
|
gem.description = "Combine inputs data and make histogram which helps to detect a hotspot."
|
@@ -14,7 +14,7 @@ module Fluent
|
|
14
14
|
config_param :count_key, :string, :default => 'keys'
|
15
15
|
config_param :bin_num, :integer, :default => 100
|
16
16
|
config_param :alpha, :integer, :default => 1
|
17
|
-
config_param :sampling_rate, :integer, :default =>
|
17
|
+
config_param :sampling_rate, :integer, :default => 10
|
18
18
|
|
19
19
|
include Fluent::Mixin::ConfigPlaceholders
|
20
20
|
|
@@ -32,8 +32,10 @@ module Fluent
|
|
32
32
|
def configure(conf)
|
33
33
|
super
|
34
34
|
|
35
|
-
raise Fluent::ConfigError,
|
35
|
+
raise Fluent::ConfigError, 'bin_num must be > 0' if @bin_num <= 0
|
36
|
+
raise Fluent::ConfigError, 'sampling_rate must be >= 1' if @sampling_rate < 1
|
36
37
|
$log.warn %Q[too small "bin_num(=#{@bin_num})" may raise unexpected outcome] if @bin_num < 100
|
38
|
+
@sampling = true if !!conf['sampling_rate']
|
37
39
|
|
38
40
|
@tag_prefix_string = @tag_prefix + '.' if @tag_prefix
|
39
41
|
@tag_suffix_string = '.' + @tag_suffix if @tag_suffix
|
@@ -46,6 +48,8 @@ module Fluent
|
|
46
48
|
|
47
49
|
@hists = initialize_hists
|
48
50
|
@sampling_counter = 0
|
51
|
+
@tick = @sampling ? @sampling_rate.to_i : 1
|
52
|
+
|
49
53
|
@mutex = Mutex.new
|
50
54
|
|
51
55
|
end
|
@@ -91,8 +95,8 @@ module Fluent
|
|
91
95
|
id = key.hash % @bin_num
|
92
96
|
@mutex.synchronize {
|
93
97
|
(0..@alpha).each do |alpha|
|
94
|
-
(-alpha..alpha).each do |
|
95
|
-
@hists[tag][(id +
|
98
|
+
(-alpha..alpha).each do |al|
|
99
|
+
@hists[tag][(id + al) % @bin_num] += @tick
|
96
100
|
end
|
97
101
|
end
|
98
102
|
}
|
@@ -104,7 +108,7 @@ module Fluent
|
|
104
108
|
es.each do |time, record|
|
105
109
|
keys = record[@count_key]
|
106
110
|
[keys].flatten.each do |k|
|
107
|
-
if
|
111
|
+
if !@sampling
|
108
112
|
increment(tag, k)
|
109
113
|
else
|
110
114
|
@sampling_counter += 1
|
@@ -120,27 +124,25 @@ module Fluent
|
|
120
124
|
def tagging(flushed)
|
121
125
|
tagged = {}
|
122
126
|
tagged = Hash[ flushed.map do |tag, hist|
|
127
|
+
tagged_tag = tag.dup
|
123
128
|
if @tag
|
124
|
-
|
129
|
+
tagged_tag = @tag
|
125
130
|
else
|
126
131
|
if @input_tag_remove_prefix &&
|
127
132
|
( ( tag.start_with?(@remove_prefix_string) &&
|
128
133
|
tag.length > @remove_prefix_length ) ||
|
129
134
|
tag == @input_tag_remove_prefix)
|
130
|
-
|
131
|
-
tag.gsub!(/^\.|\.$/, "")
|
132
|
-
end
|
133
|
-
if @tag_prefix
|
134
|
-
tag = @tag_prefix_string + tag
|
135
|
-
tag.gsub!(/^\.|\.$/, "")
|
136
|
-
end
|
137
|
-
if @tag_suffix
|
138
|
-
tag += @tag_suffix_string
|
139
|
-
tag.gsub!(/^\.|\.$/, "")
|
135
|
+
tagged_tag = tagged_tag[@input_tag_remove_prefix.length..-1]
|
140
136
|
end
|
137
|
+
|
138
|
+
tagged_tag = @tag_prefix_string + tagged_tag if @tag_prefix
|
139
|
+
tagged_tag << @tag_suffix_string if @tag_suffix
|
140
|
+
|
141
|
+
tagged_tag.gsub!(/(^\.+)|(\.+$)/, '')
|
142
|
+
tagged_tag.gsub!(/(\.\.+)/, '.')
|
141
143
|
end
|
142
144
|
|
143
|
-
[
|
145
|
+
[tagged_tag, hist]
|
144
146
|
end ]
|
145
147
|
tagged
|
146
148
|
end
|
@@ -23,6 +23,9 @@ class HistogramOutputTest < Test::Unit::TestCase
|
|
23
23
|
assert_raise(Fluent::ConfigError) {
|
24
24
|
create_driver %[ bin_num 0]
|
25
25
|
}
|
26
|
+
assert_raise(Fluent::ConfigError) {
|
27
|
+
create_driver %[ sampling_rate -1 ]
|
28
|
+
}
|
26
29
|
end
|
27
30
|
|
28
31
|
def test_small_increment_no_alpha
|
@@ -155,6 +158,7 @@ class HistogramOutputTest < Test::Unit::TestCase
|
|
155
158
|
tag_prefix histo
|
156
159
|
tag_suffix __HOSTNAME__
|
157
160
|
hostname localhost
|
161
|
+
alpha 1
|
158
162
|
input_tag_remove_prefix test])
|
159
163
|
# ("A".."ZZ").to_a.size == 702
|
160
164
|
data = ("A".."ZZ").to_a.shuffle
|
@@ -165,7 +169,7 @@ class HistogramOutputTest < Test::Unit::TestCase
|
|
165
169
|
end
|
166
170
|
end
|
167
171
|
end
|
168
|
-
flushed_even = f.instance.flush
|
172
|
+
flushed_even = f.instance.flush
|
169
173
|
|
170
174
|
#('A'..'ZZ').to_a.shuffle.size == 702
|
171
175
|
# In here, replace 7 values of ('A'..'ZZ') to 'D' as example hotspot.
|
@@ -177,26 +181,45 @@ class HistogramOutputTest < Test::Unit::TestCase
|
|
177
181
|
end
|
178
182
|
end
|
179
183
|
end
|
180
|
-
|
181
|
-
|
182
|
-
assert_equal(true, flushed_even[:sd] <
|
184
|
+
flushed_bias = f.instance.flush
|
185
|
+
|
186
|
+
assert_equal(true, flushed_even["histo.localhost"][:sd] < flushed_bias["histo.localhost"][:sd],
|
187
|
+
"expected
|
188
|
+
even:#{flushed_even["histo.localhost"]}
|
189
|
+
<
|
190
|
+
bias:#{flushed_bias["histo.localhost"]}")
|
183
191
|
end
|
184
192
|
|
185
193
|
def test_sampling
|
186
194
|
bin_num = 100
|
187
195
|
sampling_rate = 10
|
188
196
|
f = create_driver(%[
|
189
|
-
bin_num
|
197
|
+
bin_num #{bin_num}
|
190
198
|
sampling_rate #{sampling_rate}
|
191
199
|
alpha 0 ])
|
200
|
+
f.run do
|
201
|
+
sampling_rate.times do
|
202
|
+
f.emit({"keys" => ["A"]})
|
203
|
+
end
|
204
|
+
end
|
205
|
+
flushed = f.instance.flush
|
206
|
+
assert_equal(sampling_rate, flushed["test"][:sum])
|
207
|
+
|
208
|
+
f.run do
|
209
|
+
1.times do # 1 < sampling_rate
|
210
|
+
f.emit({"keys" => ["A"]})
|
211
|
+
end
|
212
|
+
end
|
213
|
+
flushed = f.instance.flush
|
214
|
+
assert_equal(0, flushed["test"][:sum])
|
215
|
+
|
192
216
|
f.run do
|
193
217
|
100.times do
|
194
218
|
f.emit({"keys" => ["A", "B", "C"]})
|
195
219
|
end
|
196
220
|
end
|
197
221
|
flushed = f.instance.flush
|
198
|
-
assert_equal(
|
199
|
-
assert_equal(300/bin_num, flushed["test"][:avg])
|
222
|
+
assert_equal(100*3, flushed["test"][:sum])
|
200
223
|
end
|
201
224
|
|
202
225
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fluent-plugin-histogram
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Yusuke SHIMIZU
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-01-
|
11
|
+
date: 2014-01-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|