anomaly 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # Anomaly
2
2
 
3
- Anomaly detection using a normal distribution.
3
+ Easy-to-use anomaly detection
4
4
 
5
5
  ## Installation
6
6
 
@@ -16,6 +16,14 @@ And then execute:
16
16
  bundle install
17
17
  ```
18
18
 
19
+ For max performance (about 3x faster), also install the NArray gem:
20
+
21
+ ```ruby
22
+ gem "narray"
23
+ ```
24
+
25
+ Anomaly will automatically detect it and use it.
26
+
19
27
  ## How to Use
20
28
 
21
29
  Train the detector with **only non-anomalies**. Each row is a sample.
@@ -54,11 +62,18 @@ Here's sample to code to help you find the best ε for your application.
54
62
  # TODO
55
63
  ```
56
64
 
57
- You can easily persist the detector in a file or database.
65
+ You can easily persist the detector to a file or database - it's very tiny.
58
66
 
59
67
  ```ruby
60
- # TODO Finish example
61
- Marshal.dump(ad)
68
+ serialized_ad = Marshal.dump(ad)
69
+
70
+ # Save to a file
71
+ File.open("anomaly_detector.dump", "w") {|f| f.write(serialized_ad) }
72
+
73
+ # ...
74
+
75
+ # Read it later
76
+ ad2 = Marshal.load(File.open("anomaly_detector.dump", "r").read)
62
77
  ```
63
78
 
64
79
  ## Contributing
@@ -4,8 +4,8 @@ require File.expand_path('../lib/anomaly/version', __FILE__)
4
4
  Gem::Specification.new do |gem|
5
5
  gem.authors = ["Andrew Kane"]
6
6
  gem.email = ["andrew@getformidable.com"]
7
- gem.description = %q{Anomaly detection using a normal distribution.}
8
- gem.summary = %q{Anomaly detection using a normal distribution.}
7
+ gem.description = %q{Easy-to-use anomaly detection}
8
+ gem.summary = %q{Easy-to-use anomaly detection}
9
9
  gem.homepage = "https://github.com/ankane/anomaly"
10
10
 
11
11
  gem.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
@@ -1,25 +1,38 @@
1
1
  module Anomaly
2
2
  class Detector
3
3
 
4
- def initialize(data)
5
- # Use NMatrix if possible
6
- if defined?(NMatrix) and (!defined?(Matrix) or !data.is_a?(Matrix))
7
- d = data.is_a?(NMatrix) ? data : NMatrix.to_na(data)
4
+ def initialize(data = nil)
5
+ @trained = false
6
+ train(data) if data
7
+ end
8
8
 
9
+ def train(data)
10
+ if defined?(NMatrix)
11
+ d = NMatrix.to_na(data)
9
12
  # Convert these to an array for Marshal.dump
10
13
  @mean = d.mean(1).to_a
11
14
  @std = d.stddev(1).to_a
12
15
  else
13
- d = data.is_a?(Matrix) ? data : Matrix.rows(data)
14
- cols = d.column_size.times.map{|i| d.column(i)}
16
+ # Default to Array, since built-in Matrix does not give us a big performance advantage.
17
+ d = data.to_a
18
+ cols = d.first.size.times.map{|i| d.map{|r| r[i]}}
15
19
  @mean = cols.map{|c| mean(c)}
16
20
  @std = cols.each_with_index.map{|c,i| std(c, @mean[i])}
17
21
  end
18
22
 
19
- raise "Standard deviation cannot be zero" if @std.find_index{|i| i == 0 or i.nan?}
23
+ @std.map!{|std| (std == 0 or std.nan?) ? Float::MIN : std}
24
+
25
+ # raise "Standard deviation cannot be zero" if @std.find_index{|i| i == 0 or i.nan?}
26
+
27
+ @trained = true
28
+ end
29
+
30
+ def trained?
31
+ @trained
20
32
  end
21
33
 
22
34
  def probability(x)
35
+ raise "Train me first" unless trained?
23
36
  raise ArgumentError, "x must have #{@mean.size} elements" if x.size != @mean.size
24
37
  x.each_with_index.map{|a,i| normal_pdf(a, @mean[i], @std[i]) }.reduce(1, :*)
25
38
  end
@@ -32,8 +45,10 @@ module Anomaly
32
45
 
33
46
  SQRT2PI = Math.sqrt(2*Math::PI)
34
47
 
48
+ # Return 1 (exclude feature) if std ~ 0
35
49
  def normal_pdf(x, mean = 0, std = 1)
36
- 1/(SQRT2PI*std)*Math.exp(-((x - mean)**2/(2.0*(std**2))))
50
+ p = 1.0/(SQRT2PI*std)*Math.exp(-((x - mean)**2/(2.0*(std**2))))
51
+ p.nan? ? 1 : p
37
52
  end
38
53
 
39
54
  # Not used for NArray
@@ -1,3 +1,3 @@
1
1
  module Anomaly
2
- VERSION = "0.0.1"
2
+ VERSION = "0.0.2"
3
3
  end
@@ -14,18 +14,22 @@ describe Anomaly::Detector do
14
14
  end
15
15
 
16
16
  context "when standard deviation is 0" do
17
- let(:data) { [[1],[1]] }
17
+ let(:data) { [[0],[0]] }
18
18
 
19
- it "raises error" do
20
- expect{ ad }.to raise_error RuntimeError, "Standard deviation cannot be zero"
19
+ it "returns infinity for mean" do
20
+ ad.probability([0]).should == 1
21
+ end
22
+
23
+ it "returns 0 for not mean" do
24
+ ad.probability([1]).should == 0
21
25
  end
22
26
  end
23
27
 
24
28
  context "when one training example" do
25
- let(:data) { [[1]] }
29
+ let(:data) { [[0]] }
26
30
 
27
- it "raises error" do
28
- expect{ ad }.to raise_error RuntimeError, "Standard deviation cannot be zero"
31
+ it "returns infinity" do
32
+ ad.probability([0]).should == 1
29
33
  end
30
34
  end
31
35
 
@@ -34,7 +38,9 @@ describe Anomaly::Detector do
34
38
  let(:sample) { [rand, rand] }
35
39
 
36
40
  it "returns the same probability as an NMatrix" do
37
- ad.probability(sample).should == Anomaly::Detector.new(Matrix.rows(data)).probability(sample)
41
+ prob = ad.probability(sample)
42
+ Object.send(:remove_const, :NMatrix)
43
+ prob.should == Anomaly::Detector.new(data).probability(sample)
38
44
  end
39
45
  end
40
46
  end
@@ -2,7 +2,7 @@ require "rubygems"
2
2
  require "bundler/setup"
3
3
 
4
4
  require "anomaly"
5
- require "matrix"
5
+ require "narray"
6
6
 
7
7
  RSpec.configure do |config|
8
8
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: anomaly
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2011-12-11 00:00:00.000000000Z
12
+ date: 2011-12-12 00:00:00.000000000Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake
16
- requirement: &2156676440 !ruby/object:Gem::Requirement
16
+ requirement: &2160640240 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0'
22
22
  type: :development
23
23
  prerelease: false
24
- version_requirements: *2156676440
24
+ version_requirements: *2160640240
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: rspec
27
- requirement: &2156675180 !ruby/object:Gem::Requirement
27
+ requirement: &2160639580 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,8 +32,8 @@ dependencies:
32
32
  version: 2.0.0
33
33
  type: :development
34
34
  prerelease: false
35
- version_requirements: *2156675180
36
- description: Anomaly detection using a normal distribution.
35
+ version_requirements: *2160639580
36
+ description: Easy-to-use anomaly detection
37
37
  email:
38
38
  - andrew@getformidable.com
39
39
  executables: []
@@ -75,7 +75,7 @@ rubyforge_project:
75
75
  rubygems_version: 1.8.10
76
76
  signing_key:
77
77
  specification_version: 3
78
- summary: Anomaly detection using a normal distribution.
78
+ summary: Easy-to-use anomaly detection
79
79
  test_files:
80
80
  - spec/anomaly/detector_spec.rb
81
81
  - spec/spec_helper.rb