fluent-plugin-histogram 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ NmJkY2ZiNTU3NjcyMzRhN2EzNzA1MWFiMzJlMjgxNGY4ODI2Mjc4Zg==
5
+ data.tar.gz: !binary |-
6
+ YjVhOGEzNWQzY2YyZTBhYjYyMjVlYjkxMDdhZDczMjg1YzA5MzQ1Nw==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ ZDNlZGU3OWYwYzFlYjhiOTNjMzU4MjU3MTU0MzZjZmUzMzY4OGQ4NDg2ODky
10
+ NjBjNDJlNmM1MTE4MmE1YjcwNWNlMThlN2Y0OTQ3ZGMyN2VkNTQxMThlNmIx
11
+ YmUzMjQ1MzU2MDk0NzEwNjNlYmRjOGFlZjNlZTZiOTQ0YjkwMTY=
12
+ data.tar.gz: !binary |-
13
+ YWE0MmNjOWQ2MjI5N2EzZTVmNTNiZWY1NzNkNzgzOTNkOGM2NmJmMzU0YWNh
14
+ OGZmNjQxNzc0OTFmZGZjZmNjYTUxMzdiNTYyOTYzNjI4NGFmYWE4MjdhMTRj
15
+ ZDM5NmY0MzM2ZDhmOTBlY2UzZDk3MjA2MmJiYzhkODE3ZDJiOTE=
data/.gitignore ADDED
@@ -0,0 +1,22 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+ *.swp
19
+ .conf
20
+ .idea
21
+ vendor
22
+ .ruby-version
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in fluent-plugin-histogram.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,13 @@
1
+ Copyright (c) 2013 SHIMIZU Yusuke
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
data/README.md ADDED
@@ -0,0 +1,98 @@
1
+ # fluent-plugin-histogram
2
+
3
+ Fluentd output plugin.
4
+
5
+ Count up input keys, and make **scalable and rough histogram** to help detecting hotspot problems.
6
+
7
+ "Scalable rough histogram" fit for cases there are an enormous variety of keys.
8
+
9
+ We refered ["Strauss, O.: Rough histograms for robust statistics, Pattern Recogniti, 2000. Proceedings. 15th International Conference on (Volume:2)"](http://ieeexplore.ieee.org/xpl/mostRecentIssue.jsp?punumber=7237) for "rough histogram".
10
+ In this approarch, a increment unit is not one value(`.`), increment some values like this shape `△ `.
11
+ To use this, please set `alpha >= 1`(default 1) option in fluent.conf.
12
+
13
+ Moreover, we optimized that histogram for enormous variety of keys by fix histogram width.
14
+ To use this, please set `bin_num`(default 100) in fluent.conf.
15
+
16
+ Be careful, our plugin's output histogram is not correct count-up result about provided data. But this plugin can handle 25,000 records/sec inputs data, and that outputted histogram is enough to use for detecting hotspot problem.
17
+
18
+ ## Examples
19
+
20
+ ##### Example 1
21
+
22
+ if run below commands,
23
+ ```
24
+ $ echo '{"keys":["A", "B", "C", "A"]}' | fluent-cat input.sample
25
+ $ echo '{"keys":["A", "B", "D"]}' | fluent-cat input.sample
26
+ ```
27
+
28
+ output is
29
+ ```
30
+ 2013-12-21T11:08:25+09:00 histo.sample.localhost {"hist":[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 6, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 4, 2, 0, 0, 0, 1, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 1, 0, 0, 0, 0, 0], "sum":28, "avg":0, "sd":0}
31
+ ```
32
+
33
+ count up about you specified key, and make **histogramatic something**.
34
+
35
+ And calculate,
36
+
37
+ * Sum(**sum**)
38
+ * Average(**avg**)
39
+ * Standard Deviation(**sd**)
40
+
41
+ ##### Example 2
42
+
43
+ run bench
44
+ ```
45
+ $ ruby bench/genload.rb input.sample 5000
46
+ ```
47
+
48
+ output is,
49
+ ```
50
+ 2013-12-21T11:09:52+09:00 histo.sample.localhost
51
+ {"hist":
52
+ [859, 963, 1224, 1252, 957, 764, 746, 929, 1406, 1519, 1072, 955, 1069, 916, 797, 948, 1090, 915, 727, 730, 898, 1051, 918, 780, 751, 890, 1104, 976, 949, 1138, 996, 959, 1100, 964, 840, 832, 1020, 1196, 969, 756, 750, 939, 1108, 928, 883, 1154, 1173, 951, 871, 837, 776, 896, 1048, 961, 825, 780, 959, 1113, 1034, 1019, 1090, 1274, 1370, 1207, 930, 898, 1029, 907, 951, 1113, 921, 992, 1422, 1509, 1253, 924, 941, 1099, 898, 775, 994, 1182, 1170, 1515, 1788, 1216, 870, 1038, 938, 744, 826, 969, 892, 843, 883, 840, 800, 966, 1115, 978],
53
+ "sum":100000,
54
+ "avg":1000,
55
+ "sd":193}
56
+ ```
57
+
58
+ ## Configuration
59
+
60
+ ```
61
+ <match input.**>
62
+ type histogram
63
+ count_key keys # input message tag to be counted
64
+ flush_interval 10s # flush interval[s] (:default 60s)
65
+ tag_prefix histo
66
+ tag_suffix __HOSTNAME__ # this plugin mixined fluent-mixin-config-placeholders
67
+ input_tag_remove_prefix input
68
+ alpha 1 # count up like this, (■ = +1)
69
+ # ■
70
+ # ■ ■ ■ ■
71
+ # ■ ■ ■ ■ ■ ■ ■ ■ ■
72
+ # alpha: 0, 1, 2
73
+
74
+ sampling_rate 10 # input datas be thin outed to 1/10.
75
+ </match>
76
+ ```
77
+
78
+ ## Installation
79
+
80
+ Add this line to your application's Gemfile:
81
+
82
+ gem 'fluent-plugin-histogram'
83
+
84
+ And then execute:
85
+
86
+ $ bundle
87
+
88
+ Or install it yourself as:
89
+
90
+ $ gem install fluent-plugin-histogram
91
+
92
+ ## Contributing
93
+
94
+ 1. Fork it
95
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
96
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
97
+ 4. Push to the branch (`git push origin my-new-feature`)
98
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,10 @@
1
+ require "bundler/gem_tasks"
2
+
3
+ require 'rake/testtask'
4
+ Rake::TestTask.new(:test) do |test|
5
+ test.libs << 'lib' << 'test'
6
+ test.pattern = 'test/**/test_*.rb'
7
+ test.verbose = true
8
+ end
9
+
10
+ task :default => :test
data/bench/README.md ADDED
@@ -0,0 +1,30 @@
1
+ Benchmark tool for Fluent event collector
2
+ =========================================
3
+
4
+ ## Install
5
+
6
+ # genload.rb depends on fluent gem
7
+ $ gem install fluent
8
+
9
+ ## Usage
10
+
11
+ Usage: genload [options] <tag> <num>
12
+ -p, --port PORT fluent tcp port (default: 24224)
13
+ -h, --host HOST fluent host (default: 127.0.0.1)
14
+ -u, --unix use unix socket instead of tcp
15
+ -P, --path PATH unix socket path (default: /var/run/fluent/fluent.sock)
16
+ -r, --repeat NUM repeat number (default: 1)
17
+ -m, --multi NUM send multiple records at once (default: 1)
18
+ -c, --concurrent NUM number of threads (default: 1)
19
+ -s, --size SIZE size of a record (default: 100)
20
+ -G, --no-packed don't use lazy deserialization optimize
21
+
22
+
23
+ ## Examples
24
+
25
+ # uses "benchmark.buffered" tag and sends 50,000 records
26
+ # -c: uses 10 threads/connections;
27
+ # -m: one message includes 20 record
28
+ # -r: repeats 100 times
29
+ ruby genload.rb benchamrk.buffered 50000 -c 10 -m 20 -r 100
30
+
data/bench/genload.rb ADDED
@@ -0,0 +1,152 @@
1
+ require 'optparse'
2
+ require 'fluent/env'
3
+
4
+ op = OptionParser.new
5
+
6
+ op.banner += " <tag> <num>"
7
+
8
+ port = Fluent::DEFAULT_LISTEN_PORT
9
+ host = '127.0.0.1'
10
+ unix = false
11
+ socket_path = Fluent::DEFAULT_SOCKET_PATH
12
+ send_timeout = 20.0
13
+ repeat = 1
14
+ para = 1
15
+ multi = 1
16
+ record_len = 5
17
+ packed = true
18
+
19
+ config_path = Fluent::DEFAULT_CONFIG_PATH
20
+
21
+ op.on('-p', '--port PORT', "fluent tcp port (default: #{port})", Integer) {|i|
22
+ port = s
23
+ }
24
+
25
+ op.on('-h', '--host HOST', "fluent host (default: #{host})") {|s|
26
+ host = s
27
+ }
28
+
29
+ op.on('-u', '--unix', "use unix socket instead of tcp", TrueClass) {|b|
30
+ unix = b
31
+ }
32
+
33
+ op.on('-P', '--path PATH', "unix socket path (default: #{socket_path})") {|s|
34
+ socket_path = s
35
+ }
36
+
37
+ op.on('-r', '--repeat NUM', "repeat number (default: 1)", Integer) {|i|
38
+ repeat = i
39
+ }
40
+
41
+ op.on('-m', '--multi NUM', "send multiple records at once (default: 1)", Integer) {|i|
42
+ multi = i
43
+ }
44
+
45
+ op.on('-l', '--record_len NUM', "a record to be send have NUM keys (default: 5)", Integer) {|i|
46
+ record_len = i
47
+ }
48
+
49
+ op.on('-c', '--concurrent NUM', "number of threads (default: 1)", Integer) {|i|
50
+ para = i
51
+ }
52
+
53
+ op.on('-G', '--no-packed', "don't use lazy deserialization optimize") {|i|
54
+ packed = false
55
+ }
56
+
57
+ (class<<self;self;end).module_eval do
58
+ define_method(:usage) do |msg|
59
+ puts op.to_s
60
+ puts "error: #{msg}" if msg
61
+ exit 1
62
+ end
63
+ end
64
+
65
+ begin
66
+ op.parse!(ARGV)
67
+
68
+ if ARGV.length != 2
69
+ usage nil
70
+ end
71
+
72
+ tag = ARGV.shift
73
+ num = ARGV.shift.to_i
74
+
75
+ rescue
76
+ usage $!.to_s
77
+ end
78
+
79
+ require 'socket'
80
+ require 'msgpack'
81
+ require 'benchmark'
82
+
83
+ def gen_word(len=nil)
84
+ len = rand(5) + 1 unless len
85
+ rand(36**len).to_s(36)
86
+ end
87
+
88
+ def gen_record(num=5, w_len=nil)
89
+ (1..num).reduce([]) {|ret| ret << gen_word(w_len)}
90
+ end
91
+
92
+
93
+ connector = Proc.new {
94
+ if unix
95
+ sock = UNIXSocket.open(socket_path)
96
+ else
97
+ sock = TCPSocket.new(host, port)
98
+ end
99
+
100
+ opt = [1, send_timeout.to_i].pack('I!I!') # { int l_onoff; int l_linger; }
101
+ sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_LINGER, opt)
102
+
103
+ opt = [send_timeout.to_i, 0].pack('L!L!') # struct timeval
104
+ sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_SNDTIMEO, opt)
105
+
106
+ sock
107
+ }
108
+
109
+ def gen_data(tag, multi=1, r_len=5)
110
+ time = Time.now.to_i
111
+ data = ''
112
+ multi.times do
113
+ record = {"keys"=>gen_record(r_len)}
114
+ [time, record].to_msgpack(data)
115
+ end
116
+ data = [tag, data].to_msgpack
117
+ end
118
+
119
+ size = 0 # sum of data.bytesize
120
+ repeat.times do
121
+ puts "--- #{Time.now}"
122
+ Benchmark.bm do |x|
123
+ start = Time.now
124
+
125
+ lo = num / para / multi
126
+ lo = 1 if lo == 0
127
+
128
+ x.report do
129
+ (1..para).map {
130
+ Thread.new do
131
+ sock = connector.call
132
+ lo.times do
133
+ data = gen_data(tag, multi, record_len)
134
+ size += data.bytesize
135
+ sock.write data
136
+ end
137
+ sock.close
138
+ end
139
+ }.each {|t|
140
+ t.join
141
+ }
142
+ end
143
+
144
+ finish = Time.now
145
+ elapsed = finish - start
146
+
147
+ puts "% 10.3f Mbps" % [size*lo*para/elapsed/1000/1000]
148
+ puts "% 10.3f records/sec" % [lo*para*multi/elapsed]
149
+ end
150
+
151
+ end
152
+
@@ -0,0 +1,25 @@
1
+ # coding: utf-8
2
+
3
+ Gem::Specification.new do |gem|
4
+ gem.name = "fluent-plugin-histogram"
5
+ gem.version = "0.1.2"
6
+ gem.authors = ["Yusuke SHIMIZU"]
7
+ gem.email = "a.ryuklnm@gmail.com"
8
+ gem.description = "Combine inputs data and make histogram which helps to detect a hotspot."
9
+ gem.summary = "Combine inputs data and make histogram which helps to detect a hotspot."
10
+ gem.homepage = "https://github.com/karahiyo/fluent-plugin-histogram"
11
+ gem.license = "APLv2"
12
+
13
+ gem.rubyforge_project = "fluent-plugin-histogram"
14
+
15
+ gem.files = `git ls-files`.split($/)
16
+ gem.executables = gem.files.grep(%r{^bin/}) { |f| File.basename(f) }
17
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
18
+ gem.require_paths = ["lib"]
19
+
20
+ gem.add_development_dependency "bundler", "~> 1.3"
21
+ gem.add_development_dependency "rake", ">= 0.9.2"
22
+ gem.add_development_dependency "fluentd", "~> 0.10.9"
23
+ gem.add_runtime_dependency "fluent-mixin-config-placeholders", "~> 0.2.3"
24
+
25
+ end
@@ -0,0 +1,179 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ require 'fluent/mixin/config_placeholders'
4
+
5
+ module Fluent
6
+ class HistogramOutput < Fluent::Output
7
+ Fluent::Plugin.register_output('histogram', self)
8
+
9
+ config_param :tag, :string, :default => nil
10
+ config_param :tag_prefix, :string, :default => nil
11
+ config_param :tag_suffix, :string, :default => nil
12
+ config_param :input_tag_remove_prefix, :string, :default => nil
13
+ config_param :flush_interval, :time, :default => 60
14
+ config_param :count_key, :string, :default => 'keys'
15
+ config_param :bin_num, :integer, :default => 100
16
+ config_param :alpha, :integer, :default => 1
17
+ config_param :sampling_rate, :integer, :default => 1
18
+
19
+ include Fluent::Mixin::ConfigPlaceholders
20
+
21
+ attr_accessor :flush_interval
22
+ attr_accessor :hists
23
+ attr_accessor :zero_hist
24
+ attr_accessor :remove_prefix_string
25
+
26
+ ## fluentd output plugin's methods
27
+
28
+ def initialize
29
+ super
30
+ end
31
+
32
+ def configure(conf)
33
+ super
34
+
35
+ raise Fluent::ConfigError, "bin_num must be > 0" if @bin_num <= 0
36
+ $log.warn %Q[too small "bin_num(=#{@bin_num})" may raise unexpected outcome] if @bin_num < 100
37
+
38
+ @tag_prefix_string = @tag_prefix + '.' if @tag_prefix
39
+ @tag_suffix_string = '.' + @tag_suffix if @tag_suffix
40
+ if @input_tag_remove_prefix
41
+ @remove_prefix_string = @input_tag_remove_prefix + '.'
42
+ @remove_prefix_length = @remove_prefix_string.length
43
+ end
44
+
45
+ @zero_hist = [0] * @bin_num
46
+
47
+ @hists = initialize_hists
48
+ @sampling_counter = 0
49
+ @mutex = Mutex.new
50
+
51
+ end
52
+
53
+ def start
54
+ super
55
+ @watcher = Thread.new(&method(:watch))
56
+ end
57
+
58
+ def watch
59
+ @last_checked = Fluent::Engine.now
60
+ while true
61
+ sleep 0.5
62
+ if Fluent::Engine.now - @last_checked >= @flush_interval
63
+ now = Fluent::Engine.now
64
+ flush_emit(now)
65
+ @last_checked = now
66
+ end
67
+ end
68
+ end
69
+
70
+ def shutdown
71
+ super
72
+ @watcher.terminate
73
+ @watcher.join
74
+ end
75
+
76
+
77
+ ## Histogram plugin's method
78
+
79
+ def initialize_hists(tags=nil)
80
+ hists = {}
81
+ if tags
82
+ tags.each do |tag|
83
+ hists[tag] = @zero_hist.dup
84
+ end
85
+ end
86
+ hists
87
+ end
88
+
89
+ def increment(tag, key)
90
+ @hists[tag] ||= @zero_hist.dup
91
+ id = key.hash % @bin_num
92
+ @mutex.synchronize {
93
+ (0..@alpha).each do |alpha|
94
+ (-alpha..alpha).each do |a|
95
+ @hists[tag][(id + a) % @bin_num] += 1 * @sampling_rate
96
+ end
97
+ end
98
+ }
99
+ end
100
+
101
+ def emit(tag, es, chain)
102
+ chain.next
103
+
104
+ es.each do |time, record|
105
+ keys = record[@count_key]
106
+ [keys].flatten.each do |k|
107
+ if @sampling_rate == 1
108
+ increment(tag, k)
109
+ else
110
+ @sampling_counter += 1
111
+ if @sampling_counter >= @sampling_rate
112
+ increment(tag, k)
113
+ @sampling_counter = 0
114
+ end
115
+ end
116
+ end
117
+ end
118
+ end
119
+
120
+ def tagging(flushed)
121
+ tagged = {}
122
+ tagged = Hash[ flushed.map do |tag, hist|
123
+ if @tag
124
+ tag = @tag
125
+ else
126
+ if @input_tag_remove_prefix &&
127
+ ( ( tag.start_with?(@remove_prefix_string) &&
128
+ tag.length > @remove_prefix_length ) ||
129
+ tag == @input_tag_remove_prefix)
130
+ tag = tag[@input_tag_remove_prefix.length..-1]
131
+ tag.gsub!(/^\.|\.$/, "")
132
+ end
133
+ if @tag_prefix
134
+ tag = @tag_prefix_string + tag
135
+ tag.gsub!(/^\.|\.$/, "")
136
+ end
137
+ if @tag_suffix
138
+ tag += @tag_suffix_string
139
+ tag.gsub!(/^\.|\.$/, "")
140
+ end
141
+ end
142
+
143
+ [tag, hist]
144
+ end ]
145
+ tagged
146
+ end
147
+
148
+ def generate_output(flushed)
149
+ output = {}
150
+ flushed.each do |tag, hist|
151
+ output[tag] = {}
152
+ sum = hist.inject(:+)
153
+ avg = sum / hist.size
154
+ sd = hist.instance_eval do
155
+ sigmas = map { |n| (avg - n)**2 }
156
+ Math.sqrt(sigmas.inject(:+) / size)
157
+ end
158
+ output[tag][:hist] = hist
159
+ output[tag][:sum] = sum
160
+ output[tag][:avg] = avg
161
+ output[tag][:sd] = sd.to_i
162
+ end
163
+ output
164
+ end
165
+
166
+ def flush
167
+ flushed, @hists = generate_output(@hists), initialize_hists(@hists.keys.dup)
168
+ tagging(flushed)
169
+ end
170
+
171
+ def flush_emit(now)
172
+ flushed = flush
173
+ flushed.each do |tag, data|
174
+ Fluent::Engine.emit(tag, now, data)
175
+ end
176
+ end
177
+
178
+ end
179
+ end
data/test/helper.rb ADDED
@@ -0,0 +1,30 @@
1
+ require 'rubygems'
2
+ require 'bundler'
3
+
4
+ begin
5
+ Bundler.setup(:default, :development)
6
+ rescue Bundler::BundlerError => e
7
+ $stderr.puts e.message
8
+ $stderr.puts "Run `bundle install` to install missing gems"
9
+ exit e.status_code
10
+ end
11
+ require 'test/unit'
12
+
13
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '/../lib', ))
14
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
15
+ require 'fluent/test'
16
+ unless ENV.has_key? 'VERBOSE'
17
+ nulllogger = Object.new
18
+ nulllogger.instance_eval {|logj|
19
+ def method_missing(methos, *args)
20
+ # pass
21
+ end
22
+ }
23
+ $log = nulllogger
24
+ end
25
+
26
+ require 'fluent/plugin/out_histogram'
27
+
28
+ class Test::Unit::TestCase
29
+
30
+ end
@@ -0,0 +1,202 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ require 'helper'
4
+
5
+ class HistogramOutputTest < Test::Unit::TestCase
6
+ def setup
7
+ Fluent::Test.setup
8
+ end
9
+
10
+ CONFIG = %[
11
+ count_key keys
12
+ flush_interval 60s
13
+ bin_num 100
14
+ tag_prefix histo
15
+ input_tag_remove_prefix test.input
16
+ ]
17
+
18
+ def create_driver(conf = CONFIG, tag='test')
19
+ Fluent::Test::OutputTestDriver.new(Fluent::HistogramOutput, tag).configure(conf)
20
+ end
21
+
22
+ def test_configure
23
+ assert_raise(Fluent::ConfigError) {
24
+ create_driver %[ bin_num 0]
25
+ }
26
+ end
27
+
28
+ def test_small_increment_no_alpha
29
+ bin_num = 100
30
+ alpha = 0
31
+ f = create_driver(%[
32
+ bin_num #{bin_num}
33
+ alpha #{alpha}])
34
+ f.instance.increment("test.input", "A")
35
+ f.instance.increment("test.input", "B")
36
+ zero = f.instance.zero_hist.dup
37
+ id = "A".hash % bin_num
38
+ zero[id] += 1
39
+ id = "B".hash % bin_num
40
+ zero[id] += 1
41
+ assert_equal({"test.input" => {:hist => zero, :sum => 2, :avg => 2/bin_num, :sd=>0}},
42
+ f.instance.flush)
43
+ end
44
+
45
+ def test_small_increment_with_alpha
46
+ bin_num = 100
47
+ alpha = 1
48
+ f = create_driver(%[
49
+ bin_num #{bin_num}
50
+ alpha #{alpha}])
51
+ f.instance.increment("test.input", "A")
52
+ f.instance.increment("test.input", "B")
53
+ zero = f.instance.zero_hist.dup
54
+ id = "A".hash % bin_num
55
+ zero[id] += 2
56
+ zero[(id + alpha) % bin_num] += 1
57
+ zero[id - alpha] += 1
58
+ id = "B".hash % bin_num
59
+ zero[id] += 2
60
+ zero[(id + alpha) % bin_num] += 1
61
+ zero[id - alpha] += 1
62
+ assert_equal({"test.input" => {:hist => zero, :sum => 2*3+2, :avg => (2*3+2)/bin_num, :sd=>0}},
63
+ f.instance.flush)
64
+ end
65
+
66
+ def test_tagging_with_flush
67
+ f = create_driver(%[tag_prefix histo])
68
+ f.instance.increment("test", "A")
69
+ flushed = f.instance.flush
70
+ assert_equal("histo.test", flushed.keys.join(''))
71
+
72
+ f = create_driver(%[
73
+ tag_prefix histo
74
+ input_tag_remove_prefix test])
75
+ f.instance.increment("test", "A")
76
+ flushed = f.instance.flush
77
+ assert_equal("histo", flushed.keys.join(''))
78
+ end
79
+
80
+ def test_tagging
81
+ f = create_driver(%[
82
+ hostname localhost
83
+ tag_prefix histo
84
+ input_tag_remove_prefix test
85
+ tag_suffix __HOSTNAME__ ])
86
+
87
+ # input tag is one
88
+ data = {"test.input" => [1, 2, 3, 4, 5]}
89
+ tagged = f.instance.tagging(data)
90
+ assert_equal("histo.input.localhost", tagged.keys.join(''))
91
+
92
+ # input tag is more than one
93
+ data = {"test.a" => [1, 2, 3], "test.b" => [1, 2]}
94
+ tagged = f.instance.tagging(data)
95
+ assert_equal(true, tagged.key?("histo.a.localhost"))
96
+ assert_equal(true, tagged.key?("histo.b.localhost"))
97
+ end
98
+
99
+ def test_tagging_use_tag
100
+ f = create_driver(%[ tag histo ])
101
+ data = {"test.input" => [1, 2, 3, 4, 5]}
102
+ tagged = f.instance.tagging(data)
103
+ assert_equal("histo", tagged.keys.join(''))
104
+ end
105
+
106
+ def test_increment_sum
107
+ bin_num = 100
108
+ f = create_driver(%[
109
+ bin_num #{bin_num}
110
+ alpha 1 ])
111
+ 1000.times do |i|
112
+ f.instance.increment("test.input", i.to_s)
113
+ end
114
+ flushed = f.instance.flush
115
+ assert_equal(1000*4, flushed["test.input"][:sum])
116
+ assert_equal(1000*4/bin_num, flushed["test.input"][:avg])
117
+ end
118
+
119
+ def test_emit
120
+ bin_num = 100
121
+ f = create_driver(%[
122
+ bin_num #{bin_num}
123
+ alpha 1 ])
124
+ f.run do
125
+ 100.times do
126
+ f.emit({"keys" => ["A", "B", "C"]})
127
+ end
128
+ end
129
+ flushed = f.instance.flush
130
+ assert_equal(300*4, flushed["test"][:sum])
131
+ assert_equal(300*4/bin_num, flushed["test"][:avg])
132
+ end
133
+
134
+ def test_some_hist_exist_case_tagging_with_emit
135
+ f = create_driver
136
+ data = {"keys" => ["A", "B", "C"]}
137
+ f.run do
138
+ ["test.a", "test.b", "test.c"].each do |tag|
139
+ f.instance.increment(tag, data)
140
+ end
141
+ end
142
+
143
+ f.instance.flush # clear hist
144
+ flushed = f.instance.flush
145
+ assert_equal(true, flushed.key?("histo.test.a"))
146
+ assert_equal(true, flushed.key?("histo.test.b"))
147
+ assert_equal(true, flushed.key?("histo.test.c"))
148
+ end
149
+
150
+ def test_can_detect_hotspot
151
+ f = create_driver(%[
152
+ count_key keys
153
+ flush_interval 10s
154
+ bin_num 100
155
+ tag_prefix histo
156
+ tag_suffix __HOSTNAME__
157
+ hostname localhost
158
+ input_tag_remove_prefix test])
159
+ # ("A".."ZZ").to_a.size == 702
160
+ data = ("A".."ZZ").to_a.shuffle
161
+ f.run do
162
+ 100.times do
163
+ data.each_slice(10) do |d|
164
+ f.emit({"keys" => d})
165
+ end
166
+ end
167
+ end
168
+ flushed_even = f.instance.flush["histo.localhost"]
169
+
170
+ #('A'..'ZZ').to_a.shuffle.size == 702
171
+ # In here, replace 7 values of ('A'..'ZZ') to 'D' as example hotspot.
172
+ data.size.times {|i| data[i] = 'D' if i%100 == 0 }
173
+ f.run do
174
+ 100.times do
175
+ data.each_slice(10) do |d|
176
+ f.emit({"keys" => d})
177
+ end
178
+ end
179
+ end
180
+ flushed_uneven = f.instance.flush["histo.localhost"]
181
+
182
+ assert_equal(true, flushed_even[:sd] < flushed_uneven[:sd])
183
+ end
184
+
185
+ def test_sampling
186
+ bin_num = 100
187
+ sampling_rate = 10
188
+ f = create_driver(%[
189
+ bin_num #{bin_num}
190
+ sampling_rate #{sampling_rate}
191
+ alpha 0 ])
192
+ f.run do
193
+ 100.times do
194
+ f.emit({"keys" => ["A", "B", "C"]})
195
+ end
196
+ end
197
+ flushed = f.instance.flush
198
+ assert_equal(300, flushed["test"][:sum])
199
+ assert_equal(300/bin_num, flushed["test"][:avg])
200
+ end
201
+
202
+ end
metadata ADDED
@@ -0,0 +1,112 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: fluent-plugin-histogram
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.2
5
+ platform: ruby
6
+ authors:
7
+ - Yusuke SHIMIZU
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-01-28 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: '1.3'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '1.3'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ! '>='
32
+ - !ruby/object:Gem::Version
33
+ version: 0.9.2
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ! '>='
39
+ - !ruby/object:Gem::Version
40
+ version: 0.9.2
41
+ - !ruby/object:Gem::Dependency
42
+ name: fluentd
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ~>
46
+ - !ruby/object:Gem::Version
47
+ version: 0.10.9
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ~>
53
+ - !ruby/object:Gem::Version
54
+ version: 0.10.9
55
+ - !ruby/object:Gem::Dependency
56
+ name: fluent-mixin-config-placeholders
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: 0.2.3
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ~>
67
+ - !ruby/object:Gem::Version
68
+ version: 0.2.3
69
+ description: Combine inputs data and make histogram which helps to detect a hotspot.
70
+ email: a.ryuklnm@gmail.com
71
+ executables: []
72
+ extensions: []
73
+ extra_rdoc_files: []
74
+ files:
75
+ - .gitignore
76
+ - Gemfile
77
+ - LICENSE.txt
78
+ - README.md
79
+ - Rakefile
80
+ - bench/README.md
81
+ - bench/genload.rb
82
+ - fluent-plugin-histogram.gemspec
83
+ - lib/fluent/plugin/out_histogram.rb
84
+ - test/helper.rb
85
+ - test/plugin/test_out_histogram.rb
86
+ homepage: https://github.com/karahiyo/fluent-plugin-histogram
87
+ licenses:
88
+ - APLv2
89
+ metadata: {}
90
+ post_install_message:
91
+ rdoc_options: []
92
+ require_paths:
93
+ - lib
94
+ required_ruby_version: !ruby/object:Gem::Requirement
95
+ requirements:
96
+ - - ! '>='
97
+ - !ruby/object:Gem::Version
98
+ version: '0'
99
+ required_rubygems_version: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ! '>='
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ requirements: []
105
+ rubyforge_project: fluent-plugin-histogram
106
+ rubygems_version: 2.2.1
107
+ signing_key:
108
+ specification_version: 4
109
+ summary: Combine inputs data and make histogram which helps to detect a hotspot.
110
+ test_files:
111
+ - test/helper.rb
112
+ - test/plugin/test_out_histogram.rb