anomaly 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # Anomaly
2
2
 
3
- Anomaly detection using a normal distribution.
3
+ Easy-to-use anomaly detection
4
4
 
5
5
  ## Installation
6
6
 
@@ -16,6 +16,14 @@ And then execute:
16
16
  bundle install
17
17
  ```
18
18
 
19
+ For max performance (about 3x faster), also install the NArray gem:
20
+
21
+ ```ruby
22
+ gem "narray"
23
+ ```
24
+
25
+ Anomaly will automatically detect it and use it.
26
+
19
27
  ## How to Use
20
28
 
21
29
  Train the detector with **only non-anomalies**. Each row is a sample.
@@ -54,11 +62,18 @@ Here's sample to code to help you find the best ε for your application.
54
62
  # TODO
55
63
  ```
56
64
 
57
- You can easily persist the detector in a file or database.
65
+ You can easily persist the detector to a file or database - it's very tiny.
58
66
 
59
67
  ```ruby
60
- # TODO Finish example
61
- Marshal.dump(ad)
68
+ serialized_ad = Marshal.dump(ad)
69
+
70
+ # Save to a file
71
+ File.open("anomaly_detector.dump", "w") {|f| f.write(serialized_ad) }
72
+
73
+ # ...
74
+
75
+ # Read it later
76
+ ad2 = Marshal.load(File.open("anomaly_detector.dump", "r").read)
62
77
  ```
63
78
 
64
79
  ## Contributing
@@ -4,8 +4,8 @@ require File.expand_path('../lib/anomaly/version', __FILE__)
4
4
  Gem::Specification.new do |gem|
5
5
  gem.authors = ["Andrew Kane"]
6
6
  gem.email = ["andrew@getformidable.com"]
7
- gem.description = %q{Anomaly detection using a normal distribution.}
8
- gem.summary = %q{Anomaly detection using a normal distribution.}
7
+ gem.description = %q{Easy-to-use anomaly detection}
8
+ gem.summary = %q{Easy-to-use anomaly detection}
9
9
  gem.homepage = "https://github.com/ankane/anomaly"
10
10
 
11
11
  gem.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
@@ -1,25 +1,38 @@
1
1
  module Anomaly
2
2
  class Detector
3
3
 
4
- def initialize(data)
5
- # Use NMatrix if possible
6
- if defined?(NMatrix) and (!defined?(Matrix) or !data.is_a?(Matrix))
7
- d = data.is_a?(NMatrix) ? data : NMatrix.to_na(data)
4
+ def initialize(data = nil)
5
+ @trained = false
6
+ train(data) if data
7
+ end
8
8
 
9
+ def train(data)
10
+ if defined?(NMatrix)
11
+ d = NMatrix.to_na(data)
9
12
  # Convert these to an array for Marshal.dump
10
13
  @mean = d.mean(1).to_a
11
14
  @std = d.stddev(1).to_a
12
15
  else
13
- d = data.is_a?(Matrix) ? data : Matrix.rows(data)
14
- cols = d.column_size.times.map{|i| d.column(i)}
16
+ # Default to Array, since built-in Matrix does not give us a big performance advantage.
17
+ d = data.to_a
18
+ cols = d.first.size.times.map{|i| d.map{|r| r[i]}}
15
19
  @mean = cols.map{|c| mean(c)}
16
20
  @std = cols.each_with_index.map{|c,i| std(c, @mean[i])}
17
21
  end
18
22
 
19
- raise "Standard deviation cannot be zero" if @std.find_index{|i| i == 0 or i.nan?}
23
+ @std.map!{|std| (std == 0 or std.nan?) ? Float::MIN : std}
24
+
25
+ # raise "Standard deviation cannot be zero" if @std.find_index{|i| i == 0 or i.nan?}
26
+
27
+ @trained = true
28
+ end
29
+
30
+ def trained?
31
+ @trained
20
32
  end
21
33
 
22
34
  def probability(x)
35
+ raise "Train me first" unless trained?
23
36
  raise ArgumentError, "x must have #{@mean.size} elements" if x.size != @mean.size
24
37
  x.each_with_index.map{|a,i| normal_pdf(a, @mean[i], @std[i]) }.reduce(1, :*)
25
38
  end
@@ -32,8 +45,10 @@ module Anomaly
32
45
 
33
46
  SQRT2PI = Math.sqrt(2*Math::PI)
34
47
 
48
+ # Return 1 (exclude feature) if std ~ 0
35
49
  def normal_pdf(x, mean = 0, std = 1)
36
- 1/(SQRT2PI*std)*Math.exp(-((x - mean)**2/(2.0*(std**2))))
50
+ p = 1.0/(SQRT2PI*std)*Math.exp(-((x - mean)**2/(2.0*(std**2))))
51
+ p.nan? ? 1 : p
37
52
  end
38
53
 
39
54
  # Not used for NArray
@@ -1,3 +1,3 @@
1
1
  module Anomaly
2
- VERSION = "0.0.1"
2
+ VERSION = "0.0.2"
3
3
  end
@@ -14,18 +14,22 @@ describe Anomaly::Detector do
14
14
  end
15
15
 
16
16
  context "when standard deviation is 0" do
17
- let(:data) { [[1],[1]] }
17
+ let(:data) { [[0],[0]] }
18
18
 
19
- it "raises error" do
20
- expect{ ad }.to raise_error RuntimeError, "Standard deviation cannot be zero"
19
+ it "returns infinity for mean" do
20
+ ad.probability([0]).should == 1
21
+ end
22
+
23
+ it "returns 0 for not mean" do
24
+ ad.probability([1]).should == 0
21
25
  end
22
26
  end
23
27
 
24
28
  context "when one training example" do
25
- let(:data) { [[1]] }
29
+ let(:data) { [[0]] }
26
30
 
27
- it "raises error" do
28
- expect{ ad }.to raise_error RuntimeError, "Standard deviation cannot be zero"
31
+ it "returns infinity" do
32
+ ad.probability([0]).should == 1
29
33
  end
30
34
  end
31
35
 
@@ -34,7 +38,9 @@ describe Anomaly::Detector do
34
38
  let(:sample) { [rand, rand] }
35
39
 
36
40
  it "returns the same probability as an NMatrix" do
37
- ad.probability(sample).should == Anomaly::Detector.new(Matrix.rows(data)).probability(sample)
41
+ prob = ad.probability(sample)
42
+ Object.send(:remove_const, :NMatrix)
43
+ prob.should == Anomaly::Detector.new(data).probability(sample)
38
44
  end
39
45
  end
40
46
  end
@@ -2,7 +2,7 @@ require "rubygems"
2
2
  require "bundler/setup"
3
3
 
4
4
  require "anomaly"
5
- require "matrix"
5
+ require "narray"
6
6
 
7
7
  RSpec.configure do |config|
8
8
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: anomaly
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2011-12-11 00:00:00.000000000Z
12
+ date: 2011-12-12 00:00:00.000000000Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake
16
- requirement: &2156676440 !ruby/object:Gem::Requirement
16
+ requirement: &2160640240 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0'
22
22
  type: :development
23
23
  prerelease: false
24
- version_requirements: *2156676440
24
+ version_requirements: *2160640240
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: rspec
27
- requirement: &2156675180 !ruby/object:Gem::Requirement
27
+ requirement: &2160639580 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,8 +32,8 @@ dependencies:
32
32
  version: 2.0.0
33
33
  type: :development
34
34
  prerelease: false
35
- version_requirements: *2156675180
36
- description: Anomaly detection using a normal distribution.
35
+ version_requirements: *2160639580
36
+ description: Easy-to-use anomaly detection
37
37
  email:
38
38
  - andrew@getformidable.com
39
39
  executables: []
@@ -75,7 +75,7 @@ rubyforge_project:
75
75
  rubygems_version: 1.8.10
76
76
  signing_key:
77
77
  specification_version: 3
78
- summary: Anomaly detection using a normal distribution.
78
+ summary: Easy-to-use anomaly detection
79
79
  test_files:
80
80
  - spec/anomaly/detector_spec.rb
81
81
  - spec/spec_helper.rb