fluent-plugin-histogram 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +15 -0
- data/.gitignore +22 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +13 -0
- data/README.md +98 -0
- data/Rakefile +10 -0
- data/bench/README.md +30 -0
- data/bench/genload.rb +152 -0
- data/fluent-plugin-histogram.gemspec +25 -0
- data/lib/fluent/plugin/out_histogram.rb +179 -0
- data/test/helper.rb +30 -0
- data/test/plugin/test_out_histogram.rb +202 -0
- metadata +112 -0
checksums.yaml
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
---
|
2
|
+
!binary "U0hBMQ==":
|
3
|
+
metadata.gz: !binary |-
|
4
|
+
NmJkY2ZiNTU3NjcyMzRhN2EzNzA1MWFiMzJlMjgxNGY4ODI2Mjc4Zg==
|
5
|
+
data.tar.gz: !binary |-
|
6
|
+
YjVhOGEzNWQzY2YyZTBhYjYyMjVlYjkxMDdhZDczMjg1YzA5MzQ1Nw==
|
7
|
+
SHA512:
|
8
|
+
metadata.gz: !binary |-
|
9
|
+
ZDNlZGU3OWYwYzFlYjhiOTNjMzU4MjU3MTU0MzZjZmUzMzY4OGQ4NDg2ODky
|
10
|
+
NjBjNDJlNmM1MTE4MmE1YjcwNWNlMThlN2Y0OTQ3ZGMyN2VkNTQxMThlNmIx
|
11
|
+
YmUzMjQ1MzU2MDk0NzEwNjNlYmRjOGFlZjNlZTZiOTQ0YjkwMTY=
|
12
|
+
data.tar.gz: !binary |-
|
13
|
+
YWE0MmNjOWQ2MjI5N2EzZTVmNTNiZWY1NzNkNzgzOTNkOGM2NmJmMzU0YWNh
|
14
|
+
OGZmNjQxNzc0OTFmZGZjZmNjYTUxMzdiNTYyOTYzNjI4NGFmYWE4MjdhMTRj
|
15
|
+
ZDM5NmY0MzM2ZDhmOTBlY2UzZDk3MjA2MmJiYzhkODE3ZDJiOTE=
|
data/.gitignore
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
*.gem
|
2
|
+
*.rbc
|
3
|
+
.bundle
|
4
|
+
.config
|
5
|
+
.yardoc
|
6
|
+
Gemfile.lock
|
7
|
+
InstalledFiles
|
8
|
+
_yardoc
|
9
|
+
coverage
|
10
|
+
doc/
|
11
|
+
lib/bundler/man
|
12
|
+
pkg
|
13
|
+
rdoc
|
14
|
+
spec/reports
|
15
|
+
test/tmp
|
16
|
+
test/version_tmp
|
17
|
+
tmp
|
18
|
+
*.swp
|
19
|
+
.conf
|
20
|
+
.idea
|
21
|
+
vendor
|
22
|
+
.ruby-version
|
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
Copyright (c) 2013 SHIMIZU Yusuke
|
2
|
+
|
3
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
you may not use this file except in compliance with the License.
|
5
|
+
You may obtain a copy of the License at
|
6
|
+
|
7
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
|
9
|
+
Unless required by applicable law or agreed to in writing, software
|
10
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
See the License for the specific language governing permissions and
|
13
|
+
limitations under the License.
|
data/README.md
ADDED
@@ -0,0 +1,98 @@
|
|
1
|
+
# fluent-plugin-histogram
|
2
|
+
|
3
|
+
Fluentd output plugin.
|
4
|
+
|
5
|
+
Count up input keys, and make **scalable and rough histogram** to help detecting hotspot problems.
|
6
|
+
|
7
|
+
"Scalable rough histogram" fit for cases there are an enormous variety of keys.
|
8
|
+
|
9
|
+
We refered ["Strauss, O.: Rough histograms for robust statistics, Pattern Recogniti, 2000. Proceedings. 15th International Conference on (Volume:2)"](http://ieeexplore.ieee.org/xpl/mostRecentIssue.jsp?punumber=7237) for "rough histogram".
|
10
|
+
In this approarch, a increment unit is not one value(`.`), increment some values like this shape `△ `.
|
11
|
+
To use this, please set `alpha >= 1`(default 1) option in fluent.conf.
|
12
|
+
|
13
|
+
Moreover, we optimized that histogram for enormous variety of keys by fix histogram width.
|
14
|
+
To use this, please set `bin_num`(default 100) in fluent.conf.
|
15
|
+
|
16
|
+
Be careful, our plugin's output histogram is not correct count-up result about provided data. But this plugin can handle 25,000 records/sec inputs data, and that outputted histogram is enough to use for detecting hotspot problem.
|
17
|
+
|
18
|
+
## Examples
|
19
|
+
|
20
|
+
##### Example 1
|
21
|
+
|
22
|
+
if run below commands,
|
23
|
+
```
|
24
|
+
$ echo '{"keys":["A", "B", "C", "A"]}' | fluent-cat input.sample
|
25
|
+
$ echo '{"keys":["A", "B", "D"]}' | fluent-cat input.sample
|
26
|
+
```
|
27
|
+
|
28
|
+
output is
|
29
|
+
```
|
30
|
+
2013-12-21T11:08:25+09:00 histo.sample.localhost {"hist":[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 6, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 4, 2, 0, 0, 0, 1, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 1, 0, 0, 0, 0, 0], "sum":28, "avg":0, "sd":0}
|
31
|
+
```
|
32
|
+
|
33
|
+
count up about you specified key, and make **histogramatic something**.
|
34
|
+
|
35
|
+
And calculate,
|
36
|
+
|
37
|
+
* Sum(**sum**)
|
38
|
+
* Average(**avg**)
|
39
|
+
* Standard Deviation(**sd**)
|
40
|
+
|
41
|
+
##### Example 2
|
42
|
+
|
43
|
+
run bench
|
44
|
+
```
|
45
|
+
$ ruby bench/genload.rb input.sample 5000
|
46
|
+
```
|
47
|
+
|
48
|
+
output is,
|
49
|
+
```
|
50
|
+
2013-12-21T11:09:52+09:00 histo.sample.localhost
|
51
|
+
{"hist":
|
52
|
+
[859, 963, 1224, 1252, 957, 764, 746, 929, 1406, 1519, 1072, 955, 1069, 916, 797, 948, 1090, 915, 727, 730, 898, 1051, 918, 780, 751, 890, 1104, 976, 949, 1138, 996, 959, 1100, 964, 840, 832, 1020, 1196, 969, 756, 750, 939, 1108, 928, 883, 1154, 1173, 951, 871, 837, 776, 896, 1048, 961, 825, 780, 959, 1113, 1034, 1019, 1090, 1274, 1370, 1207, 930, 898, 1029, 907, 951, 1113, 921, 992, 1422, 1509, 1253, 924, 941, 1099, 898, 775, 994, 1182, 1170, 1515, 1788, 1216, 870, 1038, 938, 744, 826, 969, 892, 843, 883, 840, 800, 966, 1115, 978],
|
53
|
+
"sum":100000,
|
54
|
+
"avg":1000,
|
55
|
+
"sd":193}
|
56
|
+
```
|
57
|
+
|
58
|
+
## Configuration
|
59
|
+
|
60
|
+
```
|
61
|
+
<match input.**>
|
62
|
+
type histogram
|
63
|
+
count_key keys # input message tag to be counted
|
64
|
+
flush_interval 10s # flush interval[s] (:default 60s)
|
65
|
+
tag_prefix histo
|
66
|
+
tag_suffix __HOSTNAME__ # this plugin mixined fluent-mixin-config-placeholders
|
67
|
+
input_tag_remove_prefix input
|
68
|
+
alpha 1 # count up like this, (■ = +1)
|
69
|
+
# ■
|
70
|
+
# ■ ■ ■ ■
|
71
|
+
# ■ ■ ■ ■ ■ ■ ■ ■ ■
|
72
|
+
# alpha: 0, 1, 2
|
73
|
+
|
74
|
+
sampling_rate 10 # input datas be thin outed to 1/10.
|
75
|
+
</match>
|
76
|
+
```
|
77
|
+
|
78
|
+
## Installation
|
79
|
+
|
80
|
+
Add this line to your application's Gemfile:
|
81
|
+
|
82
|
+
gem 'fluent-plugin-histogram'
|
83
|
+
|
84
|
+
And then execute:
|
85
|
+
|
86
|
+
$ bundle
|
87
|
+
|
88
|
+
Or install it yourself as:
|
89
|
+
|
90
|
+
$ gem install fluent-plugin-histogram
|
91
|
+
|
92
|
+
## Contributing
|
93
|
+
|
94
|
+
1. Fork it
|
95
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
96
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
97
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
98
|
+
5. Create new Pull Request
|
data/Rakefile
ADDED
data/bench/README.md
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
Benchmark tool for Fluent event collector
|
2
|
+
=========================================
|
3
|
+
|
4
|
+
## Install
|
5
|
+
|
6
|
+
# genload.rb depends on fluent gem
|
7
|
+
$ gem install fluent
|
8
|
+
|
9
|
+
## Usage
|
10
|
+
|
11
|
+
Usage: genload [options] <tag> <num>
|
12
|
+
-p, --port PORT fluent tcp port (default: 24224)
|
13
|
+
-h, --host HOST fluent host (default: 127.0.0.1)
|
14
|
+
-u, --unix use unix socket instead of tcp
|
15
|
+
-P, --path PATH unix socket path (default: /var/run/fluent/fluent.sock)
|
16
|
+
-r, --repeat NUM repeat number (default: 1)
|
17
|
+
-m, --multi NUM send multiple records at once (default: 1)
|
18
|
+
-c, --concurrent NUM number of threads (default: 1)
|
19
|
+
-s, --size SIZE size of a record (default: 100)
|
20
|
+
-G, --no-packed don't use lazy deserialization optimize
|
21
|
+
|
22
|
+
|
23
|
+
## Examples
|
24
|
+
|
25
|
+
# uses "benchmark.buffered" tag and sends 50,000 records
|
26
|
+
# -c: uses 10 threads/connections;
|
27
|
+
# -m: one message includes 20 record
|
28
|
+
# -r: repeats 100 times
|
29
|
+
ruby genload.rb benchamrk.buffered 50000 -c 10 -m 20 -r 100
|
30
|
+
|
data/bench/genload.rb
ADDED
@@ -0,0 +1,152 @@
|
|
1
|
+
require 'optparse'
|
2
|
+
require 'fluent/env'
|
3
|
+
|
4
|
+
op = OptionParser.new
|
5
|
+
|
6
|
+
op.banner += " <tag> <num>"
|
7
|
+
|
8
|
+
port = Fluent::DEFAULT_LISTEN_PORT
|
9
|
+
host = '127.0.0.1'
|
10
|
+
unix = false
|
11
|
+
socket_path = Fluent::DEFAULT_SOCKET_PATH
|
12
|
+
send_timeout = 20.0
|
13
|
+
repeat = 1
|
14
|
+
para = 1
|
15
|
+
multi = 1
|
16
|
+
record_len = 5
|
17
|
+
packed = true
|
18
|
+
|
19
|
+
config_path = Fluent::DEFAULT_CONFIG_PATH
|
20
|
+
|
21
|
+
op.on('-p', '--port PORT', "fluent tcp port (default: #{port})", Integer) {|i|
|
22
|
+
port = s
|
23
|
+
}
|
24
|
+
|
25
|
+
op.on('-h', '--host HOST', "fluent host (default: #{host})") {|s|
|
26
|
+
host = s
|
27
|
+
}
|
28
|
+
|
29
|
+
op.on('-u', '--unix', "use unix socket instead of tcp", TrueClass) {|b|
|
30
|
+
unix = b
|
31
|
+
}
|
32
|
+
|
33
|
+
op.on('-P', '--path PATH', "unix socket path (default: #{socket_path})") {|s|
|
34
|
+
socket_path = s
|
35
|
+
}
|
36
|
+
|
37
|
+
op.on('-r', '--repeat NUM', "repeat number (default: 1)", Integer) {|i|
|
38
|
+
repeat = i
|
39
|
+
}
|
40
|
+
|
41
|
+
op.on('-m', '--multi NUM', "send multiple records at once (default: 1)", Integer) {|i|
|
42
|
+
multi = i
|
43
|
+
}
|
44
|
+
|
45
|
+
op.on('-l', '--record_len NUM', "a record to be send have NUM keys (default: 5)", Integer) {|i|
|
46
|
+
record_len = i
|
47
|
+
}
|
48
|
+
|
49
|
+
op.on('-c', '--concurrent NUM', "number of threads (default: 1)", Integer) {|i|
|
50
|
+
para = i
|
51
|
+
}
|
52
|
+
|
53
|
+
op.on('-G', '--no-packed', "don't use lazy deserialization optimize") {|i|
|
54
|
+
packed = false
|
55
|
+
}
|
56
|
+
|
57
|
+
(class<<self;self;end).module_eval do
|
58
|
+
define_method(:usage) do |msg|
|
59
|
+
puts op.to_s
|
60
|
+
puts "error: #{msg}" if msg
|
61
|
+
exit 1
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
begin
|
66
|
+
op.parse!(ARGV)
|
67
|
+
|
68
|
+
if ARGV.length != 2
|
69
|
+
usage nil
|
70
|
+
end
|
71
|
+
|
72
|
+
tag = ARGV.shift
|
73
|
+
num = ARGV.shift.to_i
|
74
|
+
|
75
|
+
rescue
|
76
|
+
usage $!.to_s
|
77
|
+
end
|
78
|
+
|
79
|
+
require 'socket'
|
80
|
+
require 'msgpack'
|
81
|
+
require 'benchmark'
|
82
|
+
|
83
|
+
def gen_word(len=nil)
|
84
|
+
len = rand(5) + 1 unless len
|
85
|
+
rand(36**len).to_s(36)
|
86
|
+
end
|
87
|
+
|
88
|
+
def gen_record(num=5, w_len=nil)
|
89
|
+
(1..num).reduce([]) {|ret| ret << gen_word(w_len)}
|
90
|
+
end
|
91
|
+
|
92
|
+
|
93
|
+
connector = Proc.new {
|
94
|
+
if unix
|
95
|
+
sock = UNIXSocket.open(socket_path)
|
96
|
+
else
|
97
|
+
sock = TCPSocket.new(host, port)
|
98
|
+
end
|
99
|
+
|
100
|
+
opt = [1, send_timeout.to_i].pack('I!I!') # { int l_onoff; int l_linger; }
|
101
|
+
sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_LINGER, opt)
|
102
|
+
|
103
|
+
opt = [send_timeout.to_i, 0].pack('L!L!') # struct timeval
|
104
|
+
sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_SNDTIMEO, opt)
|
105
|
+
|
106
|
+
sock
|
107
|
+
}
|
108
|
+
|
109
|
+
def gen_data(tag, multi=1, r_len=5)
|
110
|
+
time = Time.now.to_i
|
111
|
+
data = ''
|
112
|
+
multi.times do
|
113
|
+
record = {"keys"=>gen_record(r_len)}
|
114
|
+
[time, record].to_msgpack(data)
|
115
|
+
end
|
116
|
+
data = [tag, data].to_msgpack
|
117
|
+
end
|
118
|
+
|
119
|
+
size = 0 # sum of data.bytesize
|
120
|
+
repeat.times do
|
121
|
+
puts "--- #{Time.now}"
|
122
|
+
Benchmark.bm do |x|
|
123
|
+
start = Time.now
|
124
|
+
|
125
|
+
lo = num / para / multi
|
126
|
+
lo = 1 if lo == 0
|
127
|
+
|
128
|
+
x.report do
|
129
|
+
(1..para).map {
|
130
|
+
Thread.new do
|
131
|
+
sock = connector.call
|
132
|
+
lo.times do
|
133
|
+
data = gen_data(tag, multi, record_len)
|
134
|
+
size += data.bytesize
|
135
|
+
sock.write data
|
136
|
+
end
|
137
|
+
sock.close
|
138
|
+
end
|
139
|
+
}.each {|t|
|
140
|
+
t.join
|
141
|
+
}
|
142
|
+
end
|
143
|
+
|
144
|
+
finish = Time.now
|
145
|
+
elapsed = finish - start
|
146
|
+
|
147
|
+
puts "% 10.3f Mbps" % [size*lo*para/elapsed/1000/1000]
|
148
|
+
puts "% 10.3f records/sec" % [lo*para*multi/elapsed]
|
149
|
+
end
|
150
|
+
|
151
|
+
end
|
152
|
+
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
Gem::Specification.new do |gem|
|
4
|
+
gem.name = "fluent-plugin-histogram"
|
5
|
+
gem.version = "0.1.2"
|
6
|
+
gem.authors = ["Yusuke SHIMIZU"]
|
7
|
+
gem.email = "a.ryuklnm@gmail.com"
|
8
|
+
gem.description = "Combine inputs data and make histogram which helps to detect a hotspot."
|
9
|
+
gem.summary = "Combine inputs data and make histogram which helps to detect a hotspot."
|
10
|
+
gem.homepage = "https://github.com/karahiyo/fluent-plugin-histogram"
|
11
|
+
gem.license = "APLv2"
|
12
|
+
|
13
|
+
gem.rubyforge_project = "fluent-plugin-histogram"
|
14
|
+
|
15
|
+
gem.files = `git ls-files`.split($/)
|
16
|
+
gem.executables = gem.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
17
|
+
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
18
|
+
gem.require_paths = ["lib"]
|
19
|
+
|
20
|
+
gem.add_development_dependency "bundler", "~> 1.3"
|
21
|
+
gem.add_development_dependency "rake", ">= 0.9.2"
|
22
|
+
gem.add_development_dependency "fluentd", "~> 0.10.9"
|
23
|
+
gem.add_runtime_dependency "fluent-mixin-config-placeholders", "~> 0.2.3"
|
24
|
+
|
25
|
+
end
|
@@ -0,0 +1,179 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
|
3
|
+
require 'fluent/mixin/config_placeholders'
|
4
|
+
|
5
|
+
module Fluent
|
6
|
+
class HistogramOutput < Fluent::Output
|
7
|
+
Fluent::Plugin.register_output('histogram', self)
|
8
|
+
|
9
|
+
config_param :tag, :string, :default => nil
|
10
|
+
config_param :tag_prefix, :string, :default => nil
|
11
|
+
config_param :tag_suffix, :string, :default => nil
|
12
|
+
config_param :input_tag_remove_prefix, :string, :default => nil
|
13
|
+
config_param :flush_interval, :time, :default => 60
|
14
|
+
config_param :count_key, :string, :default => 'keys'
|
15
|
+
config_param :bin_num, :integer, :default => 100
|
16
|
+
config_param :alpha, :integer, :default => 1
|
17
|
+
config_param :sampling_rate, :integer, :default => 1
|
18
|
+
|
19
|
+
include Fluent::Mixin::ConfigPlaceholders
|
20
|
+
|
21
|
+
attr_accessor :flush_interval
|
22
|
+
attr_accessor :hists
|
23
|
+
attr_accessor :zero_hist
|
24
|
+
attr_accessor :remove_prefix_string
|
25
|
+
|
26
|
+
## fluentd output plugin's methods
|
27
|
+
|
28
|
+
def initialize
|
29
|
+
super
|
30
|
+
end
|
31
|
+
|
32
|
+
def configure(conf)
|
33
|
+
super
|
34
|
+
|
35
|
+
raise Fluent::ConfigError, "bin_num must be > 0" if @bin_num <= 0
|
36
|
+
$log.warn %Q[too small "bin_num(=#{@bin_num})" may raise unexpected outcome] if @bin_num < 100
|
37
|
+
|
38
|
+
@tag_prefix_string = @tag_prefix + '.' if @tag_prefix
|
39
|
+
@tag_suffix_string = '.' + @tag_suffix if @tag_suffix
|
40
|
+
if @input_tag_remove_prefix
|
41
|
+
@remove_prefix_string = @input_tag_remove_prefix + '.'
|
42
|
+
@remove_prefix_length = @remove_prefix_string.length
|
43
|
+
end
|
44
|
+
|
45
|
+
@zero_hist = [0] * @bin_num
|
46
|
+
|
47
|
+
@hists = initialize_hists
|
48
|
+
@sampling_counter = 0
|
49
|
+
@mutex = Mutex.new
|
50
|
+
|
51
|
+
end
|
52
|
+
|
53
|
+
def start
|
54
|
+
super
|
55
|
+
@watcher = Thread.new(&method(:watch))
|
56
|
+
end
|
57
|
+
|
58
|
+
def watch
|
59
|
+
@last_checked = Fluent::Engine.now
|
60
|
+
while true
|
61
|
+
sleep 0.5
|
62
|
+
if Fluent::Engine.now - @last_checked >= @flush_interval
|
63
|
+
now = Fluent::Engine.now
|
64
|
+
flush_emit(now)
|
65
|
+
@last_checked = now
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
def shutdown
|
71
|
+
super
|
72
|
+
@watcher.terminate
|
73
|
+
@watcher.join
|
74
|
+
end
|
75
|
+
|
76
|
+
|
77
|
+
## Histogram plugin's method
|
78
|
+
|
79
|
+
def initialize_hists(tags=nil)
|
80
|
+
hists = {}
|
81
|
+
if tags
|
82
|
+
tags.each do |tag|
|
83
|
+
hists[tag] = @zero_hist.dup
|
84
|
+
end
|
85
|
+
end
|
86
|
+
hists
|
87
|
+
end
|
88
|
+
|
89
|
+
def increment(tag, key)
|
90
|
+
@hists[tag] ||= @zero_hist.dup
|
91
|
+
id = key.hash % @bin_num
|
92
|
+
@mutex.synchronize {
|
93
|
+
(0..@alpha).each do |alpha|
|
94
|
+
(-alpha..alpha).each do |a|
|
95
|
+
@hists[tag][(id + a) % @bin_num] += 1 * @sampling_rate
|
96
|
+
end
|
97
|
+
end
|
98
|
+
}
|
99
|
+
end
|
100
|
+
|
101
|
+
def emit(tag, es, chain)
|
102
|
+
chain.next
|
103
|
+
|
104
|
+
es.each do |time, record|
|
105
|
+
keys = record[@count_key]
|
106
|
+
[keys].flatten.each do |k|
|
107
|
+
if @sampling_rate == 1
|
108
|
+
increment(tag, k)
|
109
|
+
else
|
110
|
+
@sampling_counter += 1
|
111
|
+
if @sampling_counter >= @sampling_rate
|
112
|
+
increment(tag, k)
|
113
|
+
@sampling_counter = 0
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
def tagging(flushed)
|
121
|
+
tagged = {}
|
122
|
+
tagged = Hash[ flushed.map do |tag, hist|
|
123
|
+
if @tag
|
124
|
+
tag = @tag
|
125
|
+
else
|
126
|
+
if @input_tag_remove_prefix &&
|
127
|
+
( ( tag.start_with?(@remove_prefix_string) &&
|
128
|
+
tag.length > @remove_prefix_length ) ||
|
129
|
+
tag == @input_tag_remove_prefix)
|
130
|
+
tag = tag[@input_tag_remove_prefix.length..-1]
|
131
|
+
tag.gsub!(/^\.|\.$/, "")
|
132
|
+
end
|
133
|
+
if @tag_prefix
|
134
|
+
tag = @tag_prefix_string + tag
|
135
|
+
tag.gsub!(/^\.|\.$/, "")
|
136
|
+
end
|
137
|
+
if @tag_suffix
|
138
|
+
tag += @tag_suffix_string
|
139
|
+
tag.gsub!(/^\.|\.$/, "")
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
[tag, hist]
|
144
|
+
end ]
|
145
|
+
tagged
|
146
|
+
end
|
147
|
+
|
148
|
+
def generate_output(flushed)
|
149
|
+
output = {}
|
150
|
+
flushed.each do |tag, hist|
|
151
|
+
output[tag] = {}
|
152
|
+
sum = hist.inject(:+)
|
153
|
+
avg = sum / hist.size
|
154
|
+
sd = hist.instance_eval do
|
155
|
+
sigmas = map { |n| (avg - n)**2 }
|
156
|
+
Math.sqrt(sigmas.inject(:+) / size)
|
157
|
+
end
|
158
|
+
output[tag][:hist] = hist
|
159
|
+
output[tag][:sum] = sum
|
160
|
+
output[tag][:avg] = avg
|
161
|
+
output[tag][:sd] = sd.to_i
|
162
|
+
end
|
163
|
+
output
|
164
|
+
end
|
165
|
+
|
166
|
+
def flush
|
167
|
+
flushed, @hists = generate_output(@hists), initialize_hists(@hists.keys.dup)
|
168
|
+
tagging(flushed)
|
169
|
+
end
|
170
|
+
|
171
|
+
def flush_emit(now)
|
172
|
+
flushed = flush
|
173
|
+
flushed.each do |tag, data|
|
174
|
+
Fluent::Engine.emit(tag, now, data)
|
175
|
+
end
|
176
|
+
end
|
177
|
+
|
178
|
+
end
|
179
|
+
end
|
data/test/helper.rb
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'bundler'
|
3
|
+
|
4
|
+
begin
|
5
|
+
Bundler.setup(:default, :development)
|
6
|
+
rescue Bundler::BundlerError => e
|
7
|
+
$stderr.puts e.message
|
8
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
9
|
+
exit e.status_code
|
10
|
+
end
|
11
|
+
require 'test/unit'
|
12
|
+
|
13
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '/../lib', ))
|
14
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
15
|
+
require 'fluent/test'
|
16
|
+
unless ENV.has_key? 'VERBOSE'
|
17
|
+
nulllogger = Object.new
|
18
|
+
nulllogger.instance_eval {|logj|
|
19
|
+
def method_missing(methos, *args)
|
20
|
+
# pass
|
21
|
+
end
|
22
|
+
}
|
23
|
+
$log = nulllogger
|
24
|
+
end
|
25
|
+
|
26
|
+
require 'fluent/plugin/out_histogram'
|
27
|
+
|
28
|
+
class Test::Unit::TestCase
|
29
|
+
|
30
|
+
end
|
@@ -0,0 +1,202 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
|
3
|
+
require 'helper'
|
4
|
+
|
5
|
+
class HistogramOutputTest < Test::Unit::TestCase
|
6
|
+
def setup
|
7
|
+
Fluent::Test.setup
|
8
|
+
end
|
9
|
+
|
10
|
+
CONFIG = %[
|
11
|
+
count_key keys
|
12
|
+
flush_interval 60s
|
13
|
+
bin_num 100
|
14
|
+
tag_prefix histo
|
15
|
+
input_tag_remove_prefix test.input
|
16
|
+
]
|
17
|
+
|
18
|
+
def create_driver(conf = CONFIG, tag='test')
|
19
|
+
Fluent::Test::OutputTestDriver.new(Fluent::HistogramOutput, tag).configure(conf)
|
20
|
+
end
|
21
|
+
|
22
|
+
def test_configure
|
23
|
+
assert_raise(Fluent::ConfigError) {
|
24
|
+
create_driver %[ bin_num 0]
|
25
|
+
}
|
26
|
+
end
|
27
|
+
|
28
|
+
def test_small_increment_no_alpha
|
29
|
+
bin_num = 100
|
30
|
+
alpha = 0
|
31
|
+
f = create_driver(%[
|
32
|
+
bin_num #{bin_num}
|
33
|
+
alpha #{alpha}])
|
34
|
+
f.instance.increment("test.input", "A")
|
35
|
+
f.instance.increment("test.input", "B")
|
36
|
+
zero = f.instance.zero_hist.dup
|
37
|
+
id = "A".hash % bin_num
|
38
|
+
zero[id] += 1
|
39
|
+
id = "B".hash % bin_num
|
40
|
+
zero[id] += 1
|
41
|
+
assert_equal({"test.input" => {:hist => zero, :sum => 2, :avg => 2/bin_num, :sd=>0}},
|
42
|
+
f.instance.flush)
|
43
|
+
end
|
44
|
+
|
45
|
+
def test_small_increment_with_alpha
|
46
|
+
bin_num = 100
|
47
|
+
alpha = 1
|
48
|
+
f = create_driver(%[
|
49
|
+
bin_num #{bin_num}
|
50
|
+
alpha #{alpha}])
|
51
|
+
f.instance.increment("test.input", "A")
|
52
|
+
f.instance.increment("test.input", "B")
|
53
|
+
zero = f.instance.zero_hist.dup
|
54
|
+
id = "A".hash % bin_num
|
55
|
+
zero[id] += 2
|
56
|
+
zero[(id + alpha) % bin_num] += 1
|
57
|
+
zero[id - alpha] += 1
|
58
|
+
id = "B".hash % bin_num
|
59
|
+
zero[id] += 2
|
60
|
+
zero[(id + alpha) % bin_num] += 1
|
61
|
+
zero[id - alpha] += 1
|
62
|
+
assert_equal({"test.input" => {:hist => zero, :sum => 2*3+2, :avg => (2*3+2)/bin_num, :sd=>0}},
|
63
|
+
f.instance.flush)
|
64
|
+
end
|
65
|
+
|
66
|
+
def test_tagging_with_flush
|
67
|
+
f = create_driver(%[tag_prefix histo])
|
68
|
+
f.instance.increment("test", "A")
|
69
|
+
flushed = f.instance.flush
|
70
|
+
assert_equal("histo.test", flushed.keys.join(''))
|
71
|
+
|
72
|
+
f = create_driver(%[
|
73
|
+
tag_prefix histo
|
74
|
+
input_tag_remove_prefix test])
|
75
|
+
f.instance.increment("test", "A")
|
76
|
+
flushed = f.instance.flush
|
77
|
+
assert_equal("histo", flushed.keys.join(''))
|
78
|
+
end
|
79
|
+
|
80
|
+
def test_tagging
|
81
|
+
f = create_driver(%[
|
82
|
+
hostname localhost
|
83
|
+
tag_prefix histo
|
84
|
+
input_tag_remove_prefix test
|
85
|
+
tag_suffix __HOSTNAME__ ])
|
86
|
+
|
87
|
+
# input tag is one
|
88
|
+
data = {"test.input" => [1, 2, 3, 4, 5]}
|
89
|
+
tagged = f.instance.tagging(data)
|
90
|
+
assert_equal("histo.input.localhost", tagged.keys.join(''))
|
91
|
+
|
92
|
+
# input tag is more than one
|
93
|
+
data = {"test.a" => [1, 2, 3], "test.b" => [1, 2]}
|
94
|
+
tagged = f.instance.tagging(data)
|
95
|
+
assert_equal(true, tagged.key?("histo.a.localhost"))
|
96
|
+
assert_equal(true, tagged.key?("histo.b.localhost"))
|
97
|
+
end
|
98
|
+
|
99
|
+
def test_tagging_use_tag
|
100
|
+
f = create_driver(%[ tag histo ])
|
101
|
+
data = {"test.input" => [1, 2, 3, 4, 5]}
|
102
|
+
tagged = f.instance.tagging(data)
|
103
|
+
assert_equal("histo", tagged.keys.join(''))
|
104
|
+
end
|
105
|
+
|
106
|
+
def test_increment_sum
|
107
|
+
bin_num = 100
|
108
|
+
f = create_driver(%[
|
109
|
+
bin_num #{bin_num}
|
110
|
+
alpha 1 ])
|
111
|
+
1000.times do |i|
|
112
|
+
f.instance.increment("test.input", i.to_s)
|
113
|
+
end
|
114
|
+
flushed = f.instance.flush
|
115
|
+
assert_equal(1000*4, flushed["test.input"][:sum])
|
116
|
+
assert_equal(1000*4/bin_num, flushed["test.input"][:avg])
|
117
|
+
end
|
118
|
+
|
119
|
+
def test_emit
|
120
|
+
bin_num = 100
|
121
|
+
f = create_driver(%[
|
122
|
+
bin_num #{bin_num}
|
123
|
+
alpha 1 ])
|
124
|
+
f.run do
|
125
|
+
100.times do
|
126
|
+
f.emit({"keys" => ["A", "B", "C"]})
|
127
|
+
end
|
128
|
+
end
|
129
|
+
flushed = f.instance.flush
|
130
|
+
assert_equal(300*4, flushed["test"][:sum])
|
131
|
+
assert_equal(300*4/bin_num, flushed["test"][:avg])
|
132
|
+
end
|
133
|
+
|
134
|
+
def test_some_hist_exist_case_tagging_with_emit
|
135
|
+
f = create_driver
|
136
|
+
data = {"keys" => ["A", "B", "C"]}
|
137
|
+
f.run do
|
138
|
+
["test.a", "test.b", "test.c"].each do |tag|
|
139
|
+
f.instance.increment(tag, data)
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
f.instance.flush # clear hist
|
144
|
+
flushed = f.instance.flush
|
145
|
+
assert_equal(true, flushed.key?("histo.test.a"))
|
146
|
+
assert_equal(true, flushed.key?("histo.test.b"))
|
147
|
+
assert_equal(true, flushed.key?("histo.test.c"))
|
148
|
+
end
|
149
|
+
|
150
|
+
def test_can_detect_hotspot
|
151
|
+
f = create_driver(%[
|
152
|
+
count_key keys
|
153
|
+
flush_interval 10s
|
154
|
+
bin_num 100
|
155
|
+
tag_prefix histo
|
156
|
+
tag_suffix __HOSTNAME__
|
157
|
+
hostname localhost
|
158
|
+
input_tag_remove_prefix test])
|
159
|
+
# ("A".."ZZ").to_a.size == 702
|
160
|
+
data = ("A".."ZZ").to_a.shuffle
|
161
|
+
f.run do
|
162
|
+
100.times do
|
163
|
+
data.each_slice(10) do |d|
|
164
|
+
f.emit({"keys" => d})
|
165
|
+
end
|
166
|
+
end
|
167
|
+
end
|
168
|
+
flushed_even = f.instance.flush["histo.localhost"]
|
169
|
+
|
170
|
+
#('A'..'ZZ').to_a.shuffle.size == 702
|
171
|
+
# In here, replace 7 values of ('A'..'ZZ') to 'D' as example hotspot.
|
172
|
+
data.size.times {|i| data[i] = 'D' if i%100 == 0 }
|
173
|
+
f.run do
|
174
|
+
100.times do
|
175
|
+
data.each_slice(10) do |d|
|
176
|
+
f.emit({"keys" => d})
|
177
|
+
end
|
178
|
+
end
|
179
|
+
end
|
180
|
+
flushed_uneven = f.instance.flush["histo.localhost"]
|
181
|
+
|
182
|
+
assert_equal(true, flushed_even[:sd] < flushed_uneven[:sd])
|
183
|
+
end
|
184
|
+
|
185
|
+
def test_sampling
|
186
|
+
bin_num = 100
|
187
|
+
sampling_rate = 10
|
188
|
+
f = create_driver(%[
|
189
|
+
bin_num #{bin_num}
|
190
|
+
sampling_rate #{sampling_rate}
|
191
|
+
alpha 0 ])
|
192
|
+
f.run do
|
193
|
+
100.times do
|
194
|
+
f.emit({"keys" => ["A", "B", "C"]})
|
195
|
+
end
|
196
|
+
end
|
197
|
+
flushed = f.instance.flush
|
198
|
+
assert_equal(300, flushed["test"][:sum])
|
199
|
+
assert_equal(300/bin_num, flushed["test"][:avg])
|
200
|
+
end
|
201
|
+
|
202
|
+
end
|
metadata
ADDED
@@ -0,0 +1,112 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: fluent-plugin-histogram
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.2
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Yusuke SHIMIZU
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-01-28 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ~>
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.3'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ~>
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.3'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ! '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 0.9.2
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ! '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 0.9.2
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: fluentd
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ~>
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: 0.10.9
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ~>
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 0.10.9
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: fluent-mixin-config-placeholders
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ~>
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: 0.2.3
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ~>
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: 0.2.3
|
69
|
+
description: Combine inputs data and make histogram which helps to detect a hotspot.
|
70
|
+
email: a.ryuklnm@gmail.com
|
71
|
+
executables: []
|
72
|
+
extensions: []
|
73
|
+
extra_rdoc_files: []
|
74
|
+
files:
|
75
|
+
- .gitignore
|
76
|
+
- Gemfile
|
77
|
+
- LICENSE.txt
|
78
|
+
- README.md
|
79
|
+
- Rakefile
|
80
|
+
- bench/README.md
|
81
|
+
- bench/genload.rb
|
82
|
+
- fluent-plugin-histogram.gemspec
|
83
|
+
- lib/fluent/plugin/out_histogram.rb
|
84
|
+
- test/helper.rb
|
85
|
+
- test/plugin/test_out_histogram.rb
|
86
|
+
homepage: https://github.com/karahiyo/fluent-plugin-histogram
|
87
|
+
licenses:
|
88
|
+
- APLv2
|
89
|
+
metadata: {}
|
90
|
+
post_install_message:
|
91
|
+
rdoc_options: []
|
92
|
+
require_paths:
|
93
|
+
- lib
|
94
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
95
|
+
requirements:
|
96
|
+
- - ! '>='
|
97
|
+
- !ruby/object:Gem::Version
|
98
|
+
version: '0'
|
99
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - ! '>='
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
requirements: []
|
105
|
+
rubyforge_project: fluent-plugin-histogram
|
106
|
+
rubygems_version: 2.2.1
|
107
|
+
signing_key:
|
108
|
+
specification_version: 4
|
109
|
+
summary: Combine inputs data and make histogram which helps to detect a hotspot.
|
110
|
+
test_files:
|
111
|
+
- test/helper.rb
|
112
|
+
- test/plugin/test_out_histogram.rb
|