anomaly 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +19 -4
- data/anomaly.gemspec +2 -2
- data/lib/anomaly/detector.rb +23 -8
- data/lib/anomaly/version.rb +1 -1
- data/spec/anomaly/detector_spec.rb +13 -7
- data/spec/spec_helper.rb +1 -1
- metadata +8 -8
data/README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# Anomaly
|
2
2
|
|
3
|
-
|
3
|
+
Easy-to-use anomaly detection
|
4
4
|
|
5
5
|
## Installation
|
6
6
|
|
@@ -16,6 +16,14 @@ And then execute:
|
|
16
16
|
bundle install
|
17
17
|
```
|
18
18
|
|
19
|
+
For max performance (about 3x faster), also install the NArray gem:
|
20
|
+
|
21
|
+
```ruby
|
22
|
+
gem "narray"
|
23
|
+
```
|
24
|
+
|
25
|
+
Anomaly will automatically detect it and use it.
|
26
|
+
|
19
27
|
## How to Use
|
20
28
|
|
21
29
|
Train the detector with **only non-anomalies**. Each row is a sample.
|
@@ -54,11 +62,18 @@ Here's sample to code to help you find the best ε for your application.
|
|
54
62
|
# TODO
|
55
63
|
```
|
56
64
|
|
57
|
-
You can easily persist the detector
|
65
|
+
You can easily persist the detector to a file or database - it's very tiny.
|
58
66
|
|
59
67
|
```ruby
|
60
|
-
|
61
|
-
|
68
|
+
serialized_ad = Marshal.dump(ad)
|
69
|
+
|
70
|
+
# Save to a file
|
71
|
+
File.open("anomaly_detector.dump", "w") {|f| f.write(serialized_ad) }
|
72
|
+
|
73
|
+
# ...
|
74
|
+
|
75
|
+
# Read it later
|
76
|
+
ad2 = Marshal.load(File.open("anomaly_detector.dump", "r").read)
|
62
77
|
```
|
63
78
|
|
64
79
|
## Contributing
|
data/anomaly.gemspec
CHANGED
@@ -4,8 +4,8 @@ require File.expand_path('../lib/anomaly/version', __FILE__)
|
|
4
4
|
Gem::Specification.new do |gem|
|
5
5
|
gem.authors = ["Andrew Kane"]
|
6
6
|
gem.email = ["andrew@getformidable.com"]
|
7
|
-
gem.description = %q{
|
8
|
-
gem.summary = %q{
|
7
|
+
gem.description = %q{Easy-to-use anomaly detection}
|
8
|
+
gem.summary = %q{Easy-to-use anomaly detection}
|
9
9
|
gem.homepage = "https://github.com/ankane/anomaly"
|
10
10
|
|
11
11
|
gem.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
data/lib/anomaly/detector.rb
CHANGED
@@ -1,25 +1,38 @@
|
|
1
1
|
module Anomaly
|
2
2
|
class Detector
|
3
3
|
|
4
|
-
def initialize(data)
|
5
|
-
|
6
|
-
|
7
|
-
|
4
|
+
def initialize(data = nil)
|
5
|
+
@trained = false
|
6
|
+
train(data) if data
|
7
|
+
end
|
8
8
|
|
9
|
+
def train(data)
|
10
|
+
if defined?(NMatrix)
|
11
|
+
d = NMatrix.to_na(data)
|
9
12
|
# Convert these to an array for Marshal.dump
|
10
13
|
@mean = d.mean(1).to_a
|
11
14
|
@std = d.stddev(1).to_a
|
12
15
|
else
|
13
|
-
|
14
|
-
|
16
|
+
# Default to Array, since built-in Matrix does not give us a big performance advantage.
|
17
|
+
d = data.to_a
|
18
|
+
cols = d.first.size.times.map{|i| d.map{|r| r[i]}}
|
15
19
|
@mean = cols.map{|c| mean(c)}
|
16
20
|
@std = cols.each_with_index.map{|c,i| std(c, @mean[i])}
|
17
21
|
end
|
18
22
|
|
19
|
-
|
23
|
+
@std.map!{|std| (std == 0 or std.nan?) ? Float::MIN : std}
|
24
|
+
|
25
|
+
# raise "Standard deviation cannot be zero" if @std.find_index{|i| i == 0 or i.nan?}
|
26
|
+
|
27
|
+
@trained = true
|
28
|
+
end
|
29
|
+
|
30
|
+
def trained?
|
31
|
+
@trained
|
20
32
|
end
|
21
33
|
|
22
34
|
def probability(x)
|
35
|
+
raise "Train me first" unless trained?
|
23
36
|
raise ArgumentError, "x must have #{@mean.size} elements" if x.size != @mean.size
|
24
37
|
x.each_with_index.map{|a,i| normal_pdf(a, @mean[i], @std[i]) }.reduce(1, :*)
|
25
38
|
end
|
@@ -32,8 +45,10 @@ module Anomaly
|
|
32
45
|
|
33
46
|
SQRT2PI = Math.sqrt(2*Math::PI)
|
34
47
|
|
48
|
+
# Return 1 (exclude feature) if std ~ 0
|
35
49
|
def normal_pdf(x, mean = 0, std = 1)
|
36
|
-
1/(SQRT2PI*std)*Math.exp(-((x - mean)**2/(2.0*(std**2))))
|
50
|
+
p = 1.0/(SQRT2PI*std)*Math.exp(-((x - mean)**2/(2.0*(std**2))))
|
51
|
+
p.nan? ? 1 : p
|
37
52
|
end
|
38
53
|
|
39
54
|
# Not used for NArray
|
data/lib/anomaly/version.rb
CHANGED
@@ -14,18 +14,22 @@ describe Anomaly::Detector do
|
|
14
14
|
end
|
15
15
|
|
16
16
|
context "when standard deviation is 0" do
|
17
|
-
let(:data) { [[
|
17
|
+
let(:data) { [[0],[0]] }
|
18
18
|
|
19
|
-
it "
|
20
|
-
|
19
|
+
it "returns infinity for mean" do
|
20
|
+
ad.probability([0]).should == 1
|
21
|
+
end
|
22
|
+
|
23
|
+
it "returns 0 for not mean" do
|
24
|
+
ad.probability([1]).should == 0
|
21
25
|
end
|
22
26
|
end
|
23
27
|
|
24
28
|
context "when one training example" do
|
25
|
-
let(:data) { [[
|
29
|
+
let(:data) { [[0]] }
|
26
30
|
|
27
|
-
it "
|
28
|
-
|
31
|
+
it "returns infinity" do
|
32
|
+
ad.probability([0]).should == 1
|
29
33
|
end
|
30
34
|
end
|
31
35
|
|
@@ -34,7 +38,9 @@ describe Anomaly::Detector do
|
|
34
38
|
let(:sample) { [rand, rand] }
|
35
39
|
|
36
40
|
it "returns the same probability as an NMatrix" do
|
37
|
-
|
41
|
+
prob = ad.probability(sample)
|
42
|
+
Object.send(:remove_const, :NMatrix)
|
43
|
+
prob.should == Anomaly::Detector.new(data).probability(sample)
|
38
44
|
end
|
39
45
|
end
|
40
46
|
end
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: anomaly
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2011-12-
|
12
|
+
date: 2011-12-12 00:00:00.000000000Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|
16
|
-
requirement: &
|
16
|
+
requirement: &2160640240 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '0'
|
22
22
|
type: :development
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *2160640240
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: rspec
|
27
|
-
requirement: &
|
27
|
+
requirement: &2160639580 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ! '>='
|
@@ -32,8 +32,8 @@ dependencies:
|
|
32
32
|
version: 2.0.0
|
33
33
|
type: :development
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
36
|
-
description:
|
35
|
+
version_requirements: *2160639580
|
36
|
+
description: Easy-to-use anomaly detection
|
37
37
|
email:
|
38
38
|
- andrew@getformidable.com
|
39
39
|
executables: []
|
@@ -75,7 +75,7 @@ rubyforge_project:
|
|
75
75
|
rubygems_version: 1.8.10
|
76
76
|
signing_key:
|
77
77
|
specification_version: 3
|
78
|
-
summary:
|
78
|
+
summary: Easy-to-use anomaly detection
|
79
79
|
test_files:
|
80
80
|
- spec/anomaly/detector_spec.rb
|
81
81
|
- spec/spec_helper.rb
|