anomaly 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/.rspec ADDED
@@ -0,0 +1 @@
1
+ --color
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in anomaly.gemspec
4
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2011 Andrew Kane
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,74 @@
1
+ # Anomaly
2
+
3
+ Anomaly detection using a normal distribution.
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ ```ruby
10
+ gem "anomaly"
11
+ ```
12
+
13
+ And then execute:
14
+
15
+ ```sh
16
+ bundle install
17
+ ```
18
+
19
+ ## How to Use
20
+
21
+ Train the detector with **only non-anomalies**. Each row is a sample.
22
+
23
+ ```ruby
24
+ train_data = [
25
+ [0.1, 100, 1.4],
26
+ [0.2, 101, 2.1],
27
+ [0.5, 102, 1.6]
28
+ ]
29
+ ad = Anomaly::Detector.new(train_data)
30
+ ```
31
+
32
+ That's it! Let's test for anomalies.
33
+
34
+ ```ruby
35
+ test_sample = [1.0, 100, 1.4]
36
+ ad.probability(test_sample)
37
+ # => 0.0007328491480297603
38
+ ```
39
+
40
+ **Super-important:** You must select a threshold for anomalies (which we denote with ε - "epsilon")
41
+
42
+ Probabilities less than ε are considered anomalies. If ε is higher, more things are considered anomalies.
43
+
44
+ ``` ruby
45
+ ad.anomaly?(test_sample, 1e-10)
46
+ # => false
47
+ ad.anomaly?(test_sample, 0.5)
48
+ # => true
49
+ ```
50
+
51
+ Here's sample to code to help you find the best ε for your application.
52
+
53
+ ```ruby
54
+ # TODO
55
+ ```
56
+
57
+ You can easily persist the detector in a file or database.
58
+
59
+ ```ruby
60
+ # TODO Finish example
61
+ Marshal.dump(ad)
62
+ ```
63
+
64
+ ## Contributing
65
+
66
+ 1. Fork it
67
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
68
+ 3. Commit your changes (`git commit -am 'Added some feature'`)
69
+ 4. Push to the branch (`git push origin my-new-feature`)
70
+ 5. Create new Pull Request
71
+
72
+ ## Thanks
73
+
74
+ A special thanks to [Andrew Ng](http://www.ml-class.org).
@@ -0,0 +1,4 @@
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
3
+ require "rspec/core/rake_task"
4
+ RSpec::Core::RakeTask.new("spec")
@@ -0,0 +1,20 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require File.expand_path('../lib/anomaly/version', __FILE__)
3
+
4
+ Gem::Specification.new do |gem|
5
+ gem.authors = ["Andrew Kane"]
6
+ gem.email = ["andrew@getformidable.com"]
7
+ gem.description = %q{Anomaly detection using a normal distribution.}
8
+ gem.summary = %q{Anomaly detection using a normal distribution.}
9
+ gem.homepage = "https://github.com/ankane/anomaly"
10
+
11
+ gem.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
12
+ gem.files = `git ls-files`.split("\n")
13
+ gem.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
14
+ gem.name = "anomaly"
15
+ gem.require_paths = ["lib"]
16
+ gem.version = Anomaly::VERSION
17
+
18
+ gem.add_development_dependency "rake"
19
+ gem.add_development_dependency "rspec", ">= 2.0.0"
20
+ end
@@ -0,0 +1,2 @@
1
+ require "anomaly/version"
2
+ require "anomaly/detector"
@@ -0,0 +1,50 @@
1
+ module Anomaly
2
+ class Detector
3
+
4
+ def initialize(data)
5
+ # Use NMatrix if possible
6
+ if defined?(NMatrix) and (!defined?(Matrix) or !data.is_a?(Matrix))
7
+ d = data.is_a?(NMatrix) ? data : NMatrix.to_na(data)
8
+
9
+ # Convert these to an array for Marshal.dump
10
+ @mean = d.mean(1).to_a
11
+ @std = d.stddev(1).to_a
12
+ else
13
+ d = data.is_a?(Matrix) ? data : Matrix.rows(data)
14
+ cols = d.column_size.times.map{|i| d.column(i)}
15
+ @mean = cols.map{|c| mean(c)}
16
+ @std = cols.each_with_index.map{|c,i| std(c, @mean[i])}
17
+ end
18
+
19
+ raise "Standard deviation cannot be zero" if @std.find_index{|i| i == 0 or i.nan?}
20
+ end
21
+
22
+ def probability(x)
23
+ raise ArgumentError, "x must have #{@mean.size} elements" if x.size != @mean.size
24
+ x.each_with_index.map{|a,i| normal_pdf(a, @mean[i], @std[i]) }.reduce(1, :*)
25
+ end
26
+
27
+ def anomaly?(x, epsilon)
28
+ probability(x) < epsilon
29
+ end
30
+
31
+ protected
32
+
33
+ SQRT2PI = Math.sqrt(2*Math::PI)
34
+
35
+ def normal_pdf(x, mean = 0, std = 1)
36
+ 1/(SQRT2PI*std)*Math.exp(-((x - mean)**2/(2.0*(std**2))))
37
+ end
38
+
39
+ # Not used for NArray
40
+
41
+ def mean(x)
42
+ x.inject(0.0){|a, i| a + i}/x.size
43
+ end
44
+
45
+ def std(x, mean)
46
+ Math.sqrt(x.inject(0.0){|a, i| a + (i - mean) ** 2}/(x.size - 1))
47
+ end
48
+
49
+ end
50
+ end
@@ -0,0 +1,3 @@
1
+ module Anomaly
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,40 @@
1
+ require "spec_helper"
2
+
3
+ describe Anomaly::Detector do
4
+ let(:data) { [[-1,-2],[0,0],[1,2]] }
5
+ let(:ad) { Anomaly::Detector.new(data) }
6
+
7
+ # mean = [0, 0], std = [1, 2]
8
+ it "computes the right probability" do
9
+ ad.probability([0,0]).should == 0.079577471545947667
10
+ end
11
+
12
+ it "marshalizes" do
13
+ expect{ Marshal.dump(ad) }.to_not raise_error
14
+ end
15
+
16
+ context "when standard deviation is 0" do
17
+ let(:data) { [[1],[1]] }
18
+
19
+ it "raises error" do
20
+ expect{ ad }.to raise_error RuntimeError, "Standard deviation cannot be zero"
21
+ end
22
+ end
23
+
24
+ context "when one training example" do
25
+ let(:data) { [[1]] }
26
+
27
+ it "raises error" do
28
+ expect{ ad }.to raise_error RuntimeError, "Standard deviation cannot be zero"
29
+ end
30
+ end
31
+
32
+ context "when data is a matrix" do
33
+ let(:data) { [[-1,-2],[0,0],[1,2]] }
34
+ let(:sample) { [rand, rand] }
35
+
36
+ it "returns the same probability as an NMatrix" do
37
+ ad.probability(sample).should == Anomaly::Detector.new(Matrix.rows(data)).probability(sample)
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,8 @@
1
+ require "rubygems"
2
+ require "bundler/setup"
3
+
4
+ require "anomaly"
5
+ require "matrix"
6
+
7
+ RSpec.configure do |config|
8
+ end
metadata ADDED
@@ -0,0 +1,81 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: anomaly
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Andrew Kane
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2011-12-11 00:00:00.000000000Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rake
16
+ requirement: &2156676440 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: *2156676440
25
+ - !ruby/object:Gem::Dependency
26
+ name: rspec
27
+ requirement: &2156675180 !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ! '>='
31
+ - !ruby/object:Gem::Version
32
+ version: 2.0.0
33
+ type: :development
34
+ prerelease: false
35
+ version_requirements: *2156675180
36
+ description: Anomaly detection using a normal distribution.
37
+ email:
38
+ - andrew@getformidable.com
39
+ executables: []
40
+ extensions: []
41
+ extra_rdoc_files: []
42
+ files:
43
+ - .gitignore
44
+ - .rspec
45
+ - Gemfile
46
+ - LICENSE
47
+ - README.md
48
+ - Rakefile
49
+ - anomaly.gemspec
50
+ - lib/anomaly.rb
51
+ - lib/anomaly/detector.rb
52
+ - lib/anomaly/version.rb
53
+ - spec/anomaly/detector_spec.rb
54
+ - spec/spec_helper.rb
55
+ homepage: https://github.com/ankane/anomaly
56
+ licenses: []
57
+ post_install_message:
58
+ rdoc_options: []
59
+ require_paths:
60
+ - lib
61
+ required_ruby_version: !ruby/object:Gem::Requirement
62
+ none: false
63
+ requirements:
64
+ - - ! '>='
65
+ - !ruby/object:Gem::Version
66
+ version: '0'
67
+ required_rubygems_version: !ruby/object:Gem::Requirement
68
+ none: false
69
+ requirements:
70
+ - - ! '>='
71
+ - !ruby/object:Gem::Version
72
+ version: '0'
73
+ requirements: []
74
+ rubyforge_project:
75
+ rubygems_version: 1.8.10
76
+ signing_key:
77
+ specification_version: 3
78
+ summary: Anomaly detection using a normal distribution.
79
+ test_files:
80
+ - spec/anomaly/detector_spec.rb
81
+ - spec/spec_helper.rb