fluent-plugin-histogram 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/.gitignore +22 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +13 -0
- data/README.md +98 -0
- data/Rakefile +10 -0
- data/bench/README.md +30 -0
- data/bench/genload.rb +152 -0
- data/fluent-plugin-histogram.gemspec +25 -0
- data/lib/fluent/plugin/out_histogram.rb +179 -0
- data/test/helper.rb +30 -0
- data/test/plugin/test_out_histogram.rb +202 -0
- metadata +112 -0
checksums.yaml
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
---
|
2
|
+
!binary "U0hBMQ==":
|
3
|
+
metadata.gz: !binary |-
|
4
|
+
NmJkY2ZiNTU3NjcyMzRhN2EzNzA1MWFiMzJlMjgxNGY4ODI2Mjc4Zg==
|
5
|
+
data.tar.gz: !binary |-
|
6
|
+
YjVhOGEzNWQzY2YyZTBhYjYyMjVlYjkxMDdhZDczMjg1YzA5MzQ1Nw==
|
7
|
+
SHA512:
|
8
|
+
metadata.gz: !binary |-
|
9
|
+
ZDNlZGU3OWYwYzFlYjhiOTNjMzU4MjU3MTU0MzZjZmUzMzY4OGQ4NDg2ODky
|
10
|
+
NjBjNDJlNmM1MTE4MmE1YjcwNWNlMThlN2Y0OTQ3ZGMyN2VkNTQxMThlNmIx
|
11
|
+
YmUzMjQ1MzU2MDk0NzEwNjNlYmRjOGFlZjNlZTZiOTQ0YjkwMTY=
|
12
|
+
data.tar.gz: !binary |-
|
13
|
+
YWE0MmNjOWQ2MjI5N2EzZTVmNTNiZWY1NzNkNzgzOTNkOGM2NmJmMzU0YWNh
|
14
|
+
OGZmNjQxNzc0OTFmZGZjZmNjYTUxMzdiNTYyOTYzNjI4NGFmYWE4MjdhMTRj
|
15
|
+
ZDM5NmY0MzM2ZDhmOTBlY2UzZDk3MjA2MmJiYzhkODE3ZDJiOTE=
|
data/.gitignore
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
*.gem
|
2
|
+
*.rbc
|
3
|
+
.bundle
|
4
|
+
.config
|
5
|
+
.yardoc
|
6
|
+
Gemfile.lock
|
7
|
+
InstalledFiles
|
8
|
+
_yardoc
|
9
|
+
coverage
|
10
|
+
doc/
|
11
|
+
lib/bundler/man
|
12
|
+
pkg
|
13
|
+
rdoc
|
14
|
+
spec/reports
|
15
|
+
test/tmp
|
16
|
+
test/version_tmp
|
17
|
+
tmp
|
18
|
+
*.swp
|
19
|
+
.conf
|
20
|
+
.idea
|
21
|
+
vendor
|
22
|
+
.ruby-version
|
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
Copyright (c) 2013 SHIMIZU Yusuke
|
2
|
+
|
3
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
you may not use this file except in compliance with the License.
|
5
|
+
You may obtain a copy of the License at
|
6
|
+
|
7
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
|
9
|
+
Unless required by applicable law or agreed to in writing, software
|
10
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
See the License for the specific language governing permissions and
|
13
|
+
limitations under the License.
|
data/README.md
ADDED
@@ -0,0 +1,98 @@
|
|
1
|
+
# fluent-plugin-histogram
|
2
|
+
|
3
|
+
Fluentd output plugin.
|
4
|
+
|
5
|
+
Count up input keys, and make **scalable and rough histogram** to help detecting hotspot problems.
|
6
|
+
|
7
|
+
"Scalable rough histogram" fit for cases there are an enormous variety of keys.
|
8
|
+
|
9
|
+
We refered ["Strauss, O.: Rough histograms for robust statistics, Pattern Recogniti, 2000. Proceedings. 15th International Conference on (Volume:2)"](http://ieeexplore.ieee.org/xpl/mostRecentIssue.jsp?punumber=7237) for "rough histogram".
|
10
|
+
In this approarch, a increment unit is not one value(`.`), increment some values like this shape `△ `.
|
11
|
+
To use this, please set `alpha >= 1`(default 1) option in fluent.conf.
|
12
|
+
|
13
|
+
Moreover, we optimized that histogram for enormous variety of keys by fix histogram width.
|
14
|
+
To use this, please set `bin_num`(default 100) in fluent.conf.
|
15
|
+
|
16
|
+
Be careful, our plugin's output histogram is not correct count-up result about provided data. But this plugin can handle 25,000 records/sec inputs data, and that outputted histogram is enough to use for detecting hotspot problem.
|
17
|
+
|
18
|
+
## Examples
|
19
|
+
|
20
|
+
##### Example 1
|
21
|
+
|
22
|
+
if run below commands,
|
23
|
+
```
|
24
|
+
$ echo '{"keys":["A", "B", "C", "A"]}' | fluent-cat input.sample
|
25
|
+
$ echo '{"keys":["A", "B", "D"]}' | fluent-cat input.sample
|
26
|
+
```
|
27
|
+
|
28
|
+
output is
|
29
|
+
```
|
30
|
+
2013-12-21T11:08:25+09:00 histo.sample.localhost {"hist":[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 6, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 4, 2, 0, 0, 0, 1, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 1, 0, 0, 0, 0, 0], "sum":28, "avg":0, "sd":0}
|
31
|
+
```
|
32
|
+
|
33
|
+
count up about you specified key, and make **histogramatic something**.
|
34
|
+
|
35
|
+
And calculate,
|
36
|
+
|
37
|
+
* Sum(**sum**)
|
38
|
+
* Average(**avg**)
|
39
|
+
* Standard Deviation(**sd**)
|
40
|
+
|
41
|
+
##### Example 2
|
42
|
+
|
43
|
+
run bench
|
44
|
+
```
|
45
|
+
$ ruby bench/genload.rb input.sample 5000
|
46
|
+
```
|
47
|
+
|
48
|
+
output is,
|
49
|
+
```
|
50
|
+
2013-12-21T11:09:52+09:00 histo.sample.localhost
|
51
|
+
{"hist":
|
52
|
+
[859, 963, 1224, 1252, 957, 764, 746, 929, 1406, 1519, 1072, 955, 1069, 916, 797, 948, 1090, 915, 727, 730, 898, 1051, 918, 780, 751, 890, 1104, 976, 949, 1138, 996, 959, 1100, 964, 840, 832, 1020, 1196, 969, 756, 750, 939, 1108, 928, 883, 1154, 1173, 951, 871, 837, 776, 896, 1048, 961, 825, 780, 959, 1113, 1034, 1019, 1090, 1274, 1370, 1207, 930, 898, 1029, 907, 951, 1113, 921, 992, 1422, 1509, 1253, 924, 941, 1099, 898, 775, 994, 1182, 1170, 1515, 1788, 1216, 870, 1038, 938, 744, 826, 969, 892, 843, 883, 840, 800, 966, 1115, 978],
|
53
|
+
"sum":100000,
|
54
|
+
"avg":1000,
|
55
|
+
"sd":193}
|
56
|
+
```
|
57
|
+
|
58
|
+
## Configuration
|
59
|
+
|
60
|
+
```
|
61
|
+
<match input.**>
|
62
|
+
type histogram
|
63
|
+
count_key keys # input message tag to be counted
|
64
|
+
flush_interval 10s # flush interval[s] (:default 60s)
|
65
|
+
tag_prefix histo
|
66
|
+
tag_suffix __HOSTNAME__ # this plugin mixined fluent-mixin-config-placeholders
|
67
|
+
input_tag_remove_prefix input
|
68
|
+
alpha 1 # count up like this, (■ = +1)
|
69
|
+
# ■
|
70
|
+
# ■ ■ ■ ■
|
71
|
+
# ■ ■ ■ ■ ■ ■ ■ ■ ■
|
72
|
+
# alpha: 0, 1, 2
|
73
|
+
|
74
|
+
sampling_rate 10 # input datas be thin outed to 1/10.
|
75
|
+
</match>
|
76
|
+
```
|
77
|
+
|
78
|
+
## Installation
|
79
|
+
|
80
|
+
Add this line to your application's Gemfile:
|
81
|
+
|
82
|
+
gem 'fluent-plugin-histogram'
|
83
|
+
|
84
|
+
And then execute:
|
85
|
+
|
86
|
+
$ bundle
|
87
|
+
|
88
|
+
Or install it yourself as:
|
89
|
+
|
90
|
+
$ gem install fluent-plugin-histogram
|
91
|
+
|
92
|
+
## Contributing
|
93
|
+
|
94
|
+
1. Fork it
|
95
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
96
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
97
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
98
|
+
5. Create new Pull Request
|
data/Rakefile
ADDED
data/bench/README.md
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
Benchmark tool for Fluent event collector
|
2
|
+
=========================================
|
3
|
+
|
4
|
+
## Install
|
5
|
+
|
6
|
+
# genload.rb depends on fluent gem
|
7
|
+
$ gem install fluent
|
8
|
+
|
9
|
+
## Usage
|
10
|
+
|
11
|
+
Usage: genload [options] <tag> <num>
|
12
|
+
-p, --port PORT fluent tcp port (default: 24224)
|
13
|
+
-h, --host HOST fluent host (default: 127.0.0.1)
|
14
|
+
-u, --unix use unix socket instead of tcp
|
15
|
+
-P, --path PATH unix socket path (default: /var/run/fluent/fluent.sock)
|
16
|
+
-r, --repeat NUM repeat number (default: 1)
|
17
|
+
-m, --multi NUM send multiple records at once (default: 1)
|
18
|
+
-c, --concurrent NUM number of threads (default: 1)
|
19
|
+
-s, --size SIZE size of a record (default: 100)
|
20
|
+
-G, --no-packed don't use lazy deserialization optimize
|
21
|
+
|
22
|
+
|
23
|
+
## Examples
|
24
|
+
|
25
|
+
# uses "benchmark.buffered" tag and sends 50,000 records
|
26
|
+
# -c: uses 10 threads/connections;
|
27
|
+
# -m: one message includes 20 record
|
28
|
+
# -r: repeats 100 times
|
29
|
+
ruby genload.rb benchamrk.buffered 50000 -c 10 -m 20 -r 100
|
30
|
+
|
data/bench/genload.rb
ADDED
@@ -0,0 +1,152 @@
|
|
1
|
+
require 'optparse'
|
2
|
+
require 'fluent/env'
|
3
|
+
|
4
|
+
op = OptionParser.new
|
5
|
+
|
6
|
+
op.banner += " <tag> <num>"
|
7
|
+
|
8
|
+
port = Fluent::DEFAULT_LISTEN_PORT
|
9
|
+
host = '127.0.0.1'
|
10
|
+
unix = false
|
11
|
+
socket_path = Fluent::DEFAULT_SOCKET_PATH
|
12
|
+
send_timeout = 20.0
|
13
|
+
repeat = 1
|
14
|
+
para = 1
|
15
|
+
multi = 1
|
16
|
+
record_len = 5
|
17
|
+
packed = true
|
18
|
+
|
19
|
+
config_path = Fluent::DEFAULT_CONFIG_PATH
|
20
|
+
|
21
|
+
op.on('-p', '--port PORT', "fluent tcp port (default: #{port})", Integer) {|i|
|
22
|
+
port = s
|
23
|
+
}
|
24
|
+
|
25
|
+
op.on('-h', '--host HOST', "fluent host (default: #{host})") {|s|
|
26
|
+
host = s
|
27
|
+
}
|
28
|
+
|
29
|
+
op.on('-u', '--unix', "use unix socket instead of tcp", TrueClass) {|b|
|
30
|
+
unix = b
|
31
|
+
}
|
32
|
+
|
33
|
+
op.on('-P', '--path PATH', "unix socket path (default: #{socket_path})") {|s|
|
34
|
+
socket_path = s
|
35
|
+
}
|
36
|
+
|
37
|
+
op.on('-r', '--repeat NUM', "repeat number (default: 1)", Integer) {|i|
|
38
|
+
repeat = i
|
39
|
+
}
|
40
|
+
|
41
|
+
op.on('-m', '--multi NUM', "send multiple records at once (default: 1)", Integer) {|i|
|
42
|
+
multi = i
|
43
|
+
}
|
44
|
+
|
45
|
+
op.on('-l', '--record_len NUM', "a record to be send have NUM keys (default: 5)", Integer) {|i|
|
46
|
+
record_len = i
|
47
|
+
}
|
48
|
+
|
49
|
+
op.on('-c', '--concurrent NUM', "number of threads (default: 1)", Integer) {|i|
|
50
|
+
para = i
|
51
|
+
}
|
52
|
+
|
53
|
+
op.on('-G', '--no-packed', "don't use lazy deserialization optimize") {|i|
|
54
|
+
packed = false
|
55
|
+
}
|
56
|
+
|
57
|
+
(class<<self;self;end).module_eval do
|
58
|
+
define_method(:usage) do |msg|
|
59
|
+
puts op.to_s
|
60
|
+
puts "error: #{msg}" if msg
|
61
|
+
exit 1
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
begin
|
66
|
+
op.parse!(ARGV)
|
67
|
+
|
68
|
+
if ARGV.length != 2
|
69
|
+
usage nil
|
70
|
+
end
|
71
|
+
|
72
|
+
tag = ARGV.shift
|
73
|
+
num = ARGV.shift.to_i
|
74
|
+
|
75
|
+
rescue
|
76
|
+
usage $!.to_s
|
77
|
+
end
|
78
|
+
|
79
|
+
require 'socket'
|
80
|
+
require 'msgpack'
|
81
|
+
require 'benchmark'
|
82
|
+
|
83
|
+
def gen_word(len=nil)
|
84
|
+
len = rand(5) + 1 unless len
|
85
|
+
rand(36**len).to_s(36)
|
86
|
+
end
|
87
|
+
|
88
|
+
def gen_record(num=5, w_len=nil)
|
89
|
+
(1..num).reduce([]) {|ret| ret << gen_word(w_len)}
|
90
|
+
end
|
91
|
+
|
92
|
+
|
93
|
+
connector = Proc.new {
|
94
|
+
if unix
|
95
|
+
sock = UNIXSocket.open(socket_path)
|
96
|
+
else
|
97
|
+
sock = TCPSocket.new(host, port)
|
98
|
+
end
|
99
|
+
|
100
|
+
opt = [1, send_timeout.to_i].pack('I!I!') # { int l_onoff; int l_linger; }
|
101
|
+
sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_LINGER, opt)
|
102
|
+
|
103
|
+
opt = [send_timeout.to_i, 0].pack('L!L!') # struct timeval
|
104
|
+
sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_SNDTIMEO, opt)
|
105
|
+
|
106
|
+
sock
|
107
|
+
}
|
108
|
+
|
109
|
+
def gen_data(tag, multi=1, r_len=5)
|
110
|
+
time = Time.now.to_i
|
111
|
+
data = ''
|
112
|
+
multi.times do
|
113
|
+
record = {"keys"=>gen_record(r_len)}
|
114
|
+
[time, record].to_msgpack(data)
|
115
|
+
end
|
116
|
+
data = [tag, data].to_msgpack
|
117
|
+
end
|
118
|
+
|
119
|
+
size = 0 # sum of data.bytesize
|
120
|
+
repeat.times do
|
121
|
+
puts "--- #{Time.now}"
|
122
|
+
Benchmark.bm do |x|
|
123
|
+
start = Time.now
|
124
|
+
|
125
|
+
lo = num / para / multi
|
126
|
+
lo = 1 if lo == 0
|
127
|
+
|
128
|
+
x.report do
|
129
|
+
(1..para).map {
|
130
|
+
Thread.new do
|
131
|
+
sock = connector.call
|
132
|
+
lo.times do
|
133
|
+
data = gen_data(tag, multi, record_len)
|
134
|
+
size += data.bytesize
|
135
|
+
sock.write data
|
136
|
+
end
|
137
|
+
sock.close
|
138
|
+
end
|
139
|
+
}.each {|t|
|
140
|
+
t.join
|
141
|
+
}
|
142
|
+
end
|
143
|
+
|
144
|
+
finish = Time.now
|
145
|
+
elapsed = finish - start
|
146
|
+
|
147
|
+
puts "% 10.3f Mbps" % [size*lo*para/elapsed/1000/1000]
|
148
|
+
puts "% 10.3f records/sec" % [lo*para*multi/elapsed]
|
149
|
+
end
|
150
|
+
|
151
|
+
end
|
152
|
+
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
Gem::Specification.new do |gem|
|
4
|
+
gem.name = "fluent-plugin-histogram"
|
5
|
+
gem.version = "0.1.2"
|
6
|
+
gem.authors = ["Yusuke SHIMIZU"]
|
7
|
+
gem.email = "a.ryuklnm@gmail.com"
|
8
|
+
gem.description = "Combine inputs data and make histogram which helps to detect a hotspot."
|
9
|
+
gem.summary = "Combine inputs data and make histogram which helps to detect a hotspot."
|
10
|
+
gem.homepage = "https://github.com/karahiyo/fluent-plugin-histogram"
|
11
|
+
gem.license = "APLv2"
|
12
|
+
|
13
|
+
gem.rubyforge_project = "fluent-plugin-histogram"
|
14
|
+
|
15
|
+
gem.files = `git ls-files`.split($/)
|
16
|
+
gem.executables = gem.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
17
|
+
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
18
|
+
gem.require_paths = ["lib"]
|
19
|
+
|
20
|
+
gem.add_development_dependency "bundler", "~> 1.3"
|
21
|
+
gem.add_development_dependency "rake", ">= 0.9.2"
|
22
|
+
gem.add_development_dependency "fluentd", "~> 0.10.9"
|
23
|
+
gem.add_runtime_dependency "fluent-mixin-config-placeholders", "~> 0.2.3"
|
24
|
+
|
25
|
+
end
|
@@ -0,0 +1,179 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
|
3
|
+
require 'fluent/mixin/config_placeholders'
|
4
|
+
|
5
|
+
module Fluent
|
6
|
+
class HistogramOutput < Fluent::Output
|
7
|
+
Fluent::Plugin.register_output('histogram', self)
|
8
|
+
|
9
|
+
config_param :tag, :string, :default => nil
|
10
|
+
config_param :tag_prefix, :string, :default => nil
|
11
|
+
config_param :tag_suffix, :string, :default => nil
|
12
|
+
config_param :input_tag_remove_prefix, :string, :default => nil
|
13
|
+
config_param :flush_interval, :time, :default => 60
|
14
|
+
config_param :count_key, :string, :default => 'keys'
|
15
|
+
config_param :bin_num, :integer, :default => 100
|
16
|
+
config_param :alpha, :integer, :default => 1
|
17
|
+
config_param :sampling_rate, :integer, :default => 1
|
18
|
+
|
19
|
+
include Fluent::Mixin::ConfigPlaceholders
|
20
|
+
|
21
|
+
attr_accessor :flush_interval
|
22
|
+
attr_accessor :hists
|
23
|
+
attr_accessor :zero_hist
|
24
|
+
attr_accessor :remove_prefix_string
|
25
|
+
|
26
|
+
## fluentd output plugin's methods
|
27
|
+
|
28
|
+
def initialize
|
29
|
+
super
|
30
|
+
end
|
31
|
+
|
32
|
+
def configure(conf)
|
33
|
+
super
|
34
|
+
|
35
|
+
raise Fluent::ConfigError, "bin_num must be > 0" if @bin_num <= 0
|
36
|
+
$log.warn %Q[too small "bin_num(=#{@bin_num})" may raise unexpected outcome] if @bin_num < 100
|
37
|
+
|
38
|
+
@tag_prefix_string = @tag_prefix + '.' if @tag_prefix
|
39
|
+
@tag_suffix_string = '.' + @tag_suffix if @tag_suffix
|
40
|
+
if @input_tag_remove_prefix
|
41
|
+
@remove_prefix_string = @input_tag_remove_prefix + '.'
|
42
|
+
@remove_prefix_length = @remove_prefix_string.length
|
43
|
+
end
|
44
|
+
|
45
|
+
@zero_hist = [0] * @bin_num
|
46
|
+
|
47
|
+
@hists = initialize_hists
|
48
|
+
@sampling_counter = 0
|
49
|
+
@mutex = Mutex.new
|
50
|
+
|
51
|
+
end
|
52
|
+
|
53
|
+
def start
|
54
|
+
super
|
55
|
+
@watcher = Thread.new(&method(:watch))
|
56
|
+
end
|
57
|
+
|
58
|
+
def watch
|
59
|
+
@last_checked = Fluent::Engine.now
|
60
|
+
while true
|
61
|
+
sleep 0.5
|
62
|
+
if Fluent::Engine.now - @last_checked >= @flush_interval
|
63
|
+
now = Fluent::Engine.now
|
64
|
+
flush_emit(now)
|
65
|
+
@last_checked = now
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
def shutdown
|
71
|
+
super
|
72
|
+
@watcher.terminate
|
73
|
+
@watcher.join
|
74
|
+
end
|
75
|
+
|
76
|
+
|
77
|
+
## Histogram plugin's method
|
78
|
+
|
79
|
+
def initialize_hists(tags=nil)
|
80
|
+
hists = {}
|
81
|
+
if tags
|
82
|
+
tags.each do |tag|
|
83
|
+
hists[tag] = @zero_hist.dup
|
84
|
+
end
|
85
|
+
end
|
86
|
+
hists
|
87
|
+
end
|
88
|
+
|
89
|
+
def increment(tag, key)
|
90
|
+
@hists[tag] ||= @zero_hist.dup
|
91
|
+
id = key.hash % @bin_num
|
92
|
+
@mutex.synchronize {
|
93
|
+
(0..@alpha).each do |alpha|
|
94
|
+
(-alpha..alpha).each do |a|
|
95
|
+
@hists[tag][(id + a) % @bin_num] += 1 * @sampling_rate
|
96
|
+
end
|
97
|
+
end
|
98
|
+
}
|
99
|
+
end
|
100
|
+
|
101
|
+
def emit(tag, es, chain)
|
102
|
+
chain.next
|
103
|
+
|
104
|
+
es.each do |time, record|
|
105
|
+
keys = record[@count_key]
|
106
|
+
[keys].flatten.each do |k|
|
107
|
+
if @sampling_rate == 1
|
108
|
+
increment(tag, k)
|
109
|
+
else
|
110
|
+
@sampling_counter += 1
|
111
|
+
if @sampling_counter >= @sampling_rate
|
112
|
+
increment(tag, k)
|
113
|
+
@sampling_counter = 0
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
def tagging(flushed)
|
121
|
+
tagged = {}
|
122
|
+
tagged = Hash[ flushed.map do |tag, hist|
|
123
|
+
if @tag
|
124
|
+
tag = @tag
|
125
|
+
else
|
126
|
+
if @input_tag_remove_prefix &&
|
127
|
+
( ( tag.start_with?(@remove_prefix_string) &&
|
128
|
+
tag.length > @remove_prefix_length ) ||
|
129
|
+
tag == @input_tag_remove_prefix)
|
130
|
+
tag = tag[@input_tag_remove_prefix.length..-1]
|
131
|
+
tag.gsub!(/^\.|\.$/, "")
|
132
|
+
end
|
133
|
+
if @tag_prefix
|
134
|
+
tag = @tag_prefix_string + tag
|
135
|
+
tag.gsub!(/^\.|\.$/, "")
|
136
|
+
end
|
137
|
+
if @tag_suffix
|
138
|
+
tag += @tag_suffix_string
|
139
|
+
tag.gsub!(/^\.|\.$/, "")
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
[tag, hist]
|
144
|
+
end ]
|
145
|
+
tagged
|
146
|
+
end
|
147
|
+
|
148
|
+
def generate_output(flushed)
|
149
|
+
output = {}
|
150
|
+
flushed.each do |tag, hist|
|
151
|
+
output[tag] = {}
|
152
|
+
sum = hist.inject(:+)
|
153
|
+
avg = sum / hist.size
|
154
|
+
sd = hist.instance_eval do
|
155
|
+
sigmas = map { |n| (avg - n)**2 }
|
156
|
+
Math.sqrt(sigmas.inject(:+) / size)
|
157
|
+
end
|
158
|
+
output[tag][:hist] = hist
|
159
|
+
output[tag][:sum] = sum
|
160
|
+
output[tag][:avg] = avg
|
161
|
+
output[tag][:sd] = sd.to_i
|
162
|
+
end
|
163
|
+
output
|
164
|
+
end
|
165
|
+
|
166
|
+
def flush
|
167
|
+
flushed, @hists = generate_output(@hists), initialize_hists(@hists.keys.dup)
|
168
|
+
tagging(flushed)
|
169
|
+
end
|
170
|
+
|
171
|
+
def flush_emit(now)
|
172
|
+
flushed = flush
|
173
|
+
flushed.each do |tag, data|
|
174
|
+
Fluent::Engine.emit(tag, now, data)
|
175
|
+
end
|
176
|
+
end
|
177
|
+
|
178
|
+
end
|
179
|
+
end
|
data/test/helper.rb
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'bundler'
|
3
|
+
|
4
|
+
begin
|
5
|
+
Bundler.setup(:default, :development)
|
6
|
+
rescue Bundler::BundlerError => e
|
7
|
+
$stderr.puts e.message
|
8
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
9
|
+
exit e.status_code
|
10
|
+
end
|
11
|
+
require 'test/unit'
|
12
|
+
|
13
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '/../lib', ))
|
14
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
15
|
+
require 'fluent/test'
|
16
|
+
unless ENV.has_key? 'VERBOSE'
|
17
|
+
nulllogger = Object.new
|
18
|
+
nulllogger.instance_eval {|logj|
|
19
|
+
def method_missing(methos, *args)
|
20
|
+
# pass
|
21
|
+
end
|
22
|
+
}
|
23
|
+
$log = nulllogger
|
24
|
+
end
|
25
|
+
|
26
|
+
require 'fluent/plugin/out_histogram'
|
27
|
+
|
28
|
+
class Test::Unit::TestCase
|
29
|
+
|
30
|
+
end
|
@@ -0,0 +1,202 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
|
3
|
+
require 'helper'
|
4
|
+
|
5
|
+
class HistogramOutputTest < Test::Unit::TestCase
|
6
|
+
def setup
|
7
|
+
Fluent::Test.setup
|
8
|
+
end
|
9
|
+
|
10
|
+
CONFIG = %[
|
11
|
+
count_key keys
|
12
|
+
flush_interval 60s
|
13
|
+
bin_num 100
|
14
|
+
tag_prefix histo
|
15
|
+
input_tag_remove_prefix test.input
|
16
|
+
]
|
17
|
+
|
18
|
+
def create_driver(conf = CONFIG, tag='test')
|
19
|
+
Fluent::Test::OutputTestDriver.new(Fluent::HistogramOutput, tag).configure(conf)
|
20
|
+
end
|
21
|
+
|
22
|
+
def test_configure
|
23
|
+
assert_raise(Fluent::ConfigError) {
|
24
|
+
create_driver %[ bin_num 0]
|
25
|
+
}
|
26
|
+
end
|
27
|
+
|
28
|
+
def test_small_increment_no_alpha
|
29
|
+
bin_num = 100
|
30
|
+
alpha = 0
|
31
|
+
f = create_driver(%[
|
32
|
+
bin_num #{bin_num}
|
33
|
+
alpha #{alpha}])
|
34
|
+
f.instance.increment("test.input", "A")
|
35
|
+
f.instance.increment("test.input", "B")
|
36
|
+
zero = f.instance.zero_hist.dup
|
37
|
+
id = "A".hash % bin_num
|
38
|
+
zero[id] += 1
|
39
|
+
id = "B".hash % bin_num
|
40
|
+
zero[id] += 1
|
41
|
+
assert_equal({"test.input" => {:hist => zero, :sum => 2, :avg => 2/bin_num, :sd=>0}},
|
42
|
+
f.instance.flush)
|
43
|
+
end
|
44
|
+
|
45
|
+
def test_small_increment_with_alpha
|
46
|
+
bin_num = 100
|
47
|
+
alpha = 1
|
48
|
+
f = create_driver(%[
|
49
|
+
bin_num #{bin_num}
|
50
|
+
alpha #{alpha}])
|
51
|
+
f.instance.increment("test.input", "A")
|
52
|
+
f.instance.increment("test.input", "B")
|
53
|
+
zero = f.instance.zero_hist.dup
|
54
|
+
id = "A".hash % bin_num
|
55
|
+
zero[id] += 2
|
56
|
+
zero[(id + alpha) % bin_num] += 1
|
57
|
+
zero[id - alpha] += 1
|
58
|
+
id = "B".hash % bin_num
|
59
|
+
zero[id] += 2
|
60
|
+
zero[(id + alpha) % bin_num] += 1
|
61
|
+
zero[id - alpha] += 1
|
62
|
+
assert_equal({"test.input" => {:hist => zero, :sum => 2*3+2, :avg => (2*3+2)/bin_num, :sd=>0}},
|
63
|
+
f.instance.flush)
|
64
|
+
end
|
65
|
+
|
66
|
+
def test_tagging_with_flush
|
67
|
+
f = create_driver(%[tag_prefix histo])
|
68
|
+
f.instance.increment("test", "A")
|
69
|
+
flushed = f.instance.flush
|
70
|
+
assert_equal("histo.test", flushed.keys.join(''))
|
71
|
+
|
72
|
+
f = create_driver(%[
|
73
|
+
tag_prefix histo
|
74
|
+
input_tag_remove_prefix test])
|
75
|
+
f.instance.increment("test", "A")
|
76
|
+
flushed = f.instance.flush
|
77
|
+
assert_equal("histo", flushed.keys.join(''))
|
78
|
+
end
|
79
|
+
|
80
|
+
def test_tagging
|
81
|
+
f = create_driver(%[
|
82
|
+
hostname localhost
|
83
|
+
tag_prefix histo
|
84
|
+
input_tag_remove_prefix test
|
85
|
+
tag_suffix __HOSTNAME__ ])
|
86
|
+
|
87
|
+
# input tag is one
|
88
|
+
data = {"test.input" => [1, 2, 3, 4, 5]}
|
89
|
+
tagged = f.instance.tagging(data)
|
90
|
+
assert_equal("histo.input.localhost", tagged.keys.join(''))
|
91
|
+
|
92
|
+
# input tag is more than one
|
93
|
+
data = {"test.a" => [1, 2, 3], "test.b" => [1, 2]}
|
94
|
+
tagged = f.instance.tagging(data)
|
95
|
+
assert_equal(true, tagged.key?("histo.a.localhost"))
|
96
|
+
assert_equal(true, tagged.key?("histo.b.localhost"))
|
97
|
+
end
|
98
|
+
|
99
|
+
def test_tagging_use_tag
|
100
|
+
f = create_driver(%[ tag histo ])
|
101
|
+
data = {"test.input" => [1, 2, 3, 4, 5]}
|
102
|
+
tagged = f.instance.tagging(data)
|
103
|
+
assert_equal("histo", tagged.keys.join(''))
|
104
|
+
end
|
105
|
+
|
106
|
+
def test_increment_sum
|
107
|
+
bin_num = 100
|
108
|
+
f = create_driver(%[
|
109
|
+
bin_num #{bin_num}
|
110
|
+
alpha 1 ])
|
111
|
+
1000.times do |i|
|
112
|
+
f.instance.increment("test.input", i.to_s)
|
113
|
+
end
|
114
|
+
flushed = f.instance.flush
|
115
|
+
assert_equal(1000*4, flushed["test.input"][:sum])
|
116
|
+
assert_equal(1000*4/bin_num, flushed["test.input"][:avg])
|
117
|
+
end
|
118
|
+
|
119
|
+
def test_emit
|
120
|
+
bin_num = 100
|
121
|
+
f = create_driver(%[
|
122
|
+
bin_num #{bin_num}
|
123
|
+
alpha 1 ])
|
124
|
+
f.run do
|
125
|
+
100.times do
|
126
|
+
f.emit({"keys" => ["A", "B", "C"]})
|
127
|
+
end
|
128
|
+
end
|
129
|
+
flushed = f.instance.flush
|
130
|
+
assert_equal(300*4, flushed["test"][:sum])
|
131
|
+
assert_equal(300*4/bin_num, flushed["test"][:avg])
|
132
|
+
end
|
133
|
+
|
134
|
+
def test_some_hist_exist_case_tagging_with_emit
|
135
|
+
f = create_driver
|
136
|
+
data = {"keys" => ["A", "B", "C"]}
|
137
|
+
f.run do
|
138
|
+
["test.a", "test.b", "test.c"].each do |tag|
|
139
|
+
f.instance.increment(tag, data)
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
f.instance.flush # clear hist
|
144
|
+
flushed = f.instance.flush
|
145
|
+
assert_equal(true, flushed.key?("histo.test.a"))
|
146
|
+
assert_equal(true, flushed.key?("histo.test.b"))
|
147
|
+
assert_equal(true, flushed.key?("histo.test.c"))
|
148
|
+
end
|
149
|
+
|
150
|
+
def test_can_detect_hotspot
|
151
|
+
f = create_driver(%[
|
152
|
+
count_key keys
|
153
|
+
flush_interval 10s
|
154
|
+
bin_num 100
|
155
|
+
tag_prefix histo
|
156
|
+
tag_suffix __HOSTNAME__
|
157
|
+
hostname localhost
|
158
|
+
input_tag_remove_prefix test])
|
159
|
+
# ("A".."ZZ").to_a.size == 702
|
160
|
+
data = ("A".."ZZ").to_a.shuffle
|
161
|
+
f.run do
|
162
|
+
100.times do
|
163
|
+
data.each_slice(10) do |d|
|
164
|
+
f.emit({"keys" => d})
|
165
|
+
end
|
166
|
+
end
|
167
|
+
end
|
168
|
+
flushed_even = f.instance.flush["histo.localhost"]
|
169
|
+
|
170
|
+
#('A'..'ZZ').to_a.shuffle.size == 702
|
171
|
+
# In here, replace 7 values of ('A'..'ZZ') to 'D' as example hotspot.
|
172
|
+
data.size.times {|i| data[i] = 'D' if i%100 == 0 }
|
173
|
+
f.run do
|
174
|
+
100.times do
|
175
|
+
data.each_slice(10) do |d|
|
176
|
+
f.emit({"keys" => d})
|
177
|
+
end
|
178
|
+
end
|
179
|
+
end
|
180
|
+
flushed_uneven = f.instance.flush["histo.localhost"]
|
181
|
+
|
182
|
+
assert_equal(true, flushed_even[:sd] < flushed_uneven[:sd])
|
183
|
+
end
|
184
|
+
|
185
|
+
def test_sampling
|
186
|
+
bin_num = 100
|
187
|
+
sampling_rate = 10
|
188
|
+
f = create_driver(%[
|
189
|
+
bin_num #{bin_num}
|
190
|
+
sampling_rate #{sampling_rate}
|
191
|
+
alpha 0 ])
|
192
|
+
f.run do
|
193
|
+
100.times do
|
194
|
+
f.emit({"keys" => ["A", "B", "C"]})
|
195
|
+
end
|
196
|
+
end
|
197
|
+
flushed = f.instance.flush
|
198
|
+
assert_equal(300, flushed["test"][:sum])
|
199
|
+
assert_equal(300/bin_num, flushed["test"][:avg])
|
200
|
+
end
|
201
|
+
|
202
|
+
end
|
metadata
ADDED
@@ -0,0 +1,112 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: fluent-plugin-histogram
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.2
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Yusuke SHIMIZU
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-01-28 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ~>
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.3'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ~>
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.3'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ! '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 0.9.2
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ! '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 0.9.2
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: fluentd
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ~>
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: 0.10.9
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ~>
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 0.10.9
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: fluent-mixin-config-placeholders
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ~>
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: 0.2.3
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ~>
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: 0.2.3
|
69
|
+
description: Combine inputs data and make histogram which helps to detect a hotspot.
|
70
|
+
email: a.ryuklnm@gmail.com
|
71
|
+
executables: []
|
72
|
+
extensions: []
|
73
|
+
extra_rdoc_files: []
|
74
|
+
files:
|
75
|
+
- .gitignore
|
76
|
+
- Gemfile
|
77
|
+
- LICENSE.txt
|
78
|
+
- README.md
|
79
|
+
- Rakefile
|
80
|
+
- bench/README.md
|
81
|
+
- bench/genload.rb
|
82
|
+
- fluent-plugin-histogram.gemspec
|
83
|
+
- lib/fluent/plugin/out_histogram.rb
|
84
|
+
- test/helper.rb
|
85
|
+
- test/plugin/test_out_histogram.rb
|
86
|
+
homepage: https://github.com/karahiyo/fluent-plugin-histogram
|
87
|
+
licenses:
|
88
|
+
- APLv2
|
89
|
+
metadata: {}
|
90
|
+
post_install_message:
|
91
|
+
rdoc_options: []
|
92
|
+
require_paths:
|
93
|
+
- lib
|
94
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
95
|
+
requirements:
|
96
|
+
- - ! '>='
|
97
|
+
- !ruby/object:Gem::Version
|
98
|
+
version: '0'
|
99
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - ! '>='
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
requirements: []
|
105
|
+
rubyforge_project: fluent-plugin-histogram
|
106
|
+
rubygems_version: 2.2.1
|
107
|
+
signing_key:
|
108
|
+
specification_version: 4
|
109
|
+
summary: Combine inputs data and make histogram which helps to detect a hotspot.
|
110
|
+
test_files:
|
111
|
+
- test/helper.rb
|
112
|
+
- test/plugin/test_out_histogram.rb
|