fluent-plugin-anomalydetect 0.0.1 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/README.rdoc CHANGED
@@ -65,6 +65,13 @@ If you want to watch a value for a target field <fieldname> in data, write below
65
65
 
66
66
  If you want to know detail of these parameters, see "Theory".
67
67
 
68
+ <match access.**>
69
+ type anomalydetect
70
+ ...
71
+ store_file /path/to/anomalydetect.dat
72
+ </match>
73
+
74
+ If "store_file" option was specified, a historical stat will be stored to the file at shutdown, and it will be restored on started.
68
75
 
69
76
  == Theory
70
77
  "データマイニングによる異常検知" http://amzn.to/XHXNun
@@ -3,7 +3,7 @@ lib = File.expand_path('../lib', __FILE__)
3
3
 
4
4
  Gem::Specification.new do |gem|
5
5
  gem.name = "fluent-plugin-anomalydetect"
6
- gem.version = "0.0.1"
6
+ gem.version = "0.1.0"
7
7
  gem.authors = ["Muddy Dixon"]
8
8
  gem.email = ["muddydixon@gmail.com"]
9
9
  gem.description = %q{detect anomal sequential input casually}
@@ -3,6 +3,7 @@ module Fluent
3
3
  Fluent::Plugin.register_output('anomalydetect', self)
4
4
 
5
5
  require 'fluent/plugin/change_finder'
6
+ require 'pathname'
6
7
 
7
8
  config_param :outlier_term, :integer, :default => 28
8
9
  config_param :outlier_discount, :float, :default => 0.05
@@ -12,12 +13,14 @@ module Fluent
12
13
  config_param :tick, :integer, :default => 60 * 5
13
14
  config_param :tag, :string, :default => "anomaly"
14
15
  config_param :target, :string, :default => nil
16
+ config_param :store_file, :string, :default => nil
17
+ config_param :threshold, :float, :default => -1.0
15
18
 
16
19
  attr_accessor :outlier
17
20
  attr_accessor :score
18
21
  attr_accessor :record_count
19
22
 
20
- attr_accessor :outliers
23
+ attr_accessor :outlier_buf
21
24
 
22
25
  attr_accessor :records
23
26
 
@@ -41,8 +44,13 @@ module Fluent
41
44
  if @tick < 1
42
45
  raise Fluent::ConfigError, "tick timer should be greater than 1 sec"
43
46
  end
44
-
45
- @outliers = []
47
+ if @store_file
48
+ f = Pathname.new(@store_file)
49
+ if (f.exist? && !f.writable_real?) || (!f.exist? && !f.parent.writable_real?)
50
+ raise Fluent::ConfigError, "#{@store_file} is not writable"
51
+ end
52
+ end
53
+ @outlier_buf = []
46
54
  @outlier = ChangeFinder.new(@outlier_term, @outlier_discount)
47
55
  @score = ChangeFinder.new(@score_term, @score_discount)
48
56
 
@@ -53,6 +61,7 @@ module Fluent
53
61
 
54
62
  def start
55
63
  super
64
+ load_from_file
56
65
  init_records
57
66
  start_watch
58
67
  end
@@ -63,6 +72,53 @@ module Fluent
63
72
  @watcher.terminate
64
73
  @watcher.join
65
74
  end
75
+ store_to_file
76
+ end
77
+
78
+ def load_from_file
79
+ return unless @store_file
80
+ f = Pathname.new(@store_file)
81
+ return unless f.exist?
82
+
83
+ begin
84
+ f.open('rb') do |f|
85
+ stored = Marshal.load(f)
86
+ if (( stored[:outlier_term] == @outlier_term ) &&
87
+ ( stored[:outlier_discount] == @outlier_discount ) &&
88
+ ( stored[:score_term] == @score_term ) &&
89
+ ( stored[:score_discount] == @score_discount ) &&
90
+ ( stored[:smooth_term] == @smooth_term ))
91
+ then
92
+ @outlier = stored[:outlier]
93
+ @outlier_buf = stored[:outlier_buf]
94
+ @score = stored[:score]
95
+ else
96
+ $log.warn "configuration param was changed. ignore stored data"
97
+ end
98
+ end
99
+ rescue => e
100
+ $log.warn "Can't load store_file #{e}"
101
+ end
102
+ end
103
+
104
+ def store_to_file
105
+ return unless @store_file
106
+ begin
107
+ Pathname.new(@store_file).open('wb') do |f|
108
+ Marshal.dump({
109
+ :outlier => @outlier,
110
+ :outlier_buf => @outlier_buf,
111
+ :score => @score,
112
+ :outlier_term => @outlier_term,
113
+ :outlier_discount => @outlier_discount,
114
+ :score_term => @score_term,
115
+ :score_discount => @score_discount,
116
+ :smooth_term => @smooth_term,
117
+ }, f)
118
+ end
119
+ rescue => e
120
+ $log.warn "Can't write store_file #{e}"
121
+ end
66
122
  end
67
123
 
68
124
  def start_watch
@@ -88,7 +144,9 @@ module Fluent
88
144
 
89
145
  def flush_emit(step)
90
146
  output = flush
91
- Fluent::Engine.emit(@tag, Fluent::Engine.now, output)
147
+ if output
148
+ Fluent::Engine.emit(@tag, Fluent::Engine.now, output)
149
+ end
92
150
  end
93
151
 
94
152
  def flush
@@ -101,12 +159,15 @@ module Fluent
101
159
  end
102
160
 
103
161
  outlier = @outlier.next(val)
104
- @outliers.push outlier
105
- @outliers.shift if @outliers.size > @smooth_term
106
- score = @score.next(@outliers.inject(0) { |sum, v| sum += v } / @outliers.size)
107
-
108
- {"outlier" => outlier, "score" => score, "target" => val}
109
-
162
+ @outlier_buf.push outlier
163
+ @outlier_buf.shift if @outlier_buf.size > @smooth_term
164
+ score = @score.next(@outlier_buf.inject(0) { |sum, v| sum += v } / @outlier_buf.size)
165
+
166
+ if @threshold < 0 or (@threshold >= 0 and score > @threshold)
167
+ {"outlier" => outlier, "score" => score, "target" => val}
168
+ else
169
+ nil
170
+ end
110
171
  end
111
172
 
112
173
  def tick_time(time)
@@ -87,7 +87,7 @@ class AnomalyDetectOutputTest < Test::Unit::TestCase
87
87
 
88
88
  def test_array_init
89
89
  d = create_driver
90
- assert_equal [], d.instance.outliers
90
+ assert_equal [], d.instance.outlier_buf
91
91
  assert_nil d.instance.records # @records is initialized at start, not configure
92
92
  end
93
93
 
@@ -137,4 +137,71 @@ class AnomalyDetectOutputTest < Test::Unit::TestCase
137
137
  end
138
138
  end
139
139
  end
140
+
141
+ def test_store_file
142
+ dir = "test/tmp"
143
+ Dir.mkdir dir unless Dir.exist? dir
144
+ file = "#{dir}/test.dat"
145
+ File.unlink file if File.exist? file
146
+
147
+ d = create_driver %[
148
+ store_file #{file}
149
+ ]
150
+
151
+ d.run do
152
+ assert_equal [], d.instance.outlier_buf
153
+ d.emit({'x' => 1})
154
+ d.emit({'x' => 1})
155
+ d.emit({'x' => 1})
156
+ d.instance.flush
157
+ d.emit({'x' => 1})
158
+ d.emit({'x' => 1})
159
+ d.emit({'x' => 1})
160
+ d.instance.flush
161
+ end
162
+ assert File.exist? file
163
+
164
+ d2 = create_driver %[
165
+ store_file #{file}
166
+ ]
167
+ d2.run do
168
+ assert_equal 2, d2.instance.outlier_buf.size
169
+ end
170
+
171
+ File.unlink file
172
+ end
173
+
174
+ def test_set_large_threshold
175
+ require 'csv'
176
+ reader = CSV.open("test/stock.2432.csv", "r")
177
+ header = reader.take(1)[0]
178
+ d = create_driver %[
179
+ threshold 1000
180
+ ]
181
+ d.run do
182
+ reader.each_with_index do |row, idx|
183
+ break if idx > 5
184
+ d.emit({'y' => row[4].to_i})
185
+ r = d.instance.flush
186
+ assert_equal nil, r
187
+ end
188
+ end
189
+ end
190
+
191
+ def test_set_small_threshold
192
+ require 'csv'
193
+ reader = CSV.open("test/stock.2432.csv", "r")
194
+ header = reader.take(1)[0]
195
+ d = create_driver %[
196
+ threshold 1
197
+ ]
198
+ d.run do
199
+ reader.each_with_index do |row, idx|
200
+ break if idx > 5
201
+ d.emit({'y' => row[4].to_i})
202
+ r = d.instance.flush
203
+ assert_not_equal nil, r
204
+ end
205
+ end
206
+ end
140
207
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-anomalydetect
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.1.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-01-13 00:00:00.000000000 Z
12
+ date: 2013-01-21 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: fluentd