anomaly 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +19 -4
- data/anomaly.gemspec +2 -2
- data/lib/anomaly/detector.rb +23 -8
- data/lib/anomaly/version.rb +1 -1
- data/spec/anomaly/detector_spec.rb +13 -7
- data/spec/spec_helper.rb +1 -1
- metadata +8 -8
data/README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# Anomaly
|
2
2
|
|
3
|
-
|
3
|
+
Easy-to-use anomaly detection
|
4
4
|
|
5
5
|
## Installation
|
6
6
|
|
@@ -16,6 +16,14 @@ And then execute:
|
|
16
16
|
bundle install
|
17
17
|
```
|
18
18
|
|
19
|
+
For max performance (about 3x faster), also install the NArray gem:
|
20
|
+
|
21
|
+
```ruby
|
22
|
+
gem "narray"
|
23
|
+
```
|
24
|
+
|
25
|
+
Anomaly will automatically detect it and use it.
|
26
|
+
|
19
27
|
## How to Use
|
20
28
|
|
21
29
|
Train the detector with **only non-anomalies**. Each row is a sample.
|
@@ -54,11 +62,18 @@ Here's sample to code to help you find the best ε for your application.
|
|
54
62
|
# TODO
|
55
63
|
```
|
56
64
|
|
57
|
-
You can easily persist the detector
|
65
|
+
You can easily persist the detector to a file or database - it's very tiny.
|
58
66
|
|
59
67
|
```ruby
|
60
|
-
|
61
|
-
|
68
|
+
serialized_ad = Marshal.dump(ad)
|
69
|
+
|
70
|
+
# Save to a file
|
71
|
+
File.open("anomaly_detector.dump", "w") {|f| f.write(serialized_ad) }
|
72
|
+
|
73
|
+
# ...
|
74
|
+
|
75
|
+
# Read it later
|
76
|
+
ad2 = Marshal.load(File.open("anomaly_detector.dump", "r").read)
|
62
77
|
```
|
63
78
|
|
64
79
|
## Contributing
|
data/anomaly.gemspec
CHANGED
@@ -4,8 +4,8 @@ require File.expand_path('../lib/anomaly/version', __FILE__)
|
|
4
4
|
Gem::Specification.new do |gem|
|
5
5
|
gem.authors = ["Andrew Kane"]
|
6
6
|
gem.email = ["andrew@getformidable.com"]
|
7
|
-
gem.description = %q{
|
8
|
-
gem.summary = %q{
|
7
|
+
gem.description = %q{Easy-to-use anomaly detection}
|
8
|
+
gem.summary = %q{Easy-to-use anomaly detection}
|
9
9
|
gem.homepage = "https://github.com/ankane/anomaly"
|
10
10
|
|
11
11
|
gem.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
data/lib/anomaly/detector.rb
CHANGED
@@ -1,25 +1,38 @@
|
|
1
1
|
module Anomaly
|
2
2
|
class Detector
|
3
3
|
|
4
|
-
def initialize(data)
|
5
|
-
|
6
|
-
|
7
|
-
|
4
|
+
def initialize(data = nil)
|
5
|
+
@trained = false
|
6
|
+
train(data) if data
|
7
|
+
end
|
8
8
|
|
9
|
+
def train(data)
|
10
|
+
if defined?(NMatrix)
|
11
|
+
d = NMatrix.to_na(data)
|
9
12
|
# Convert these to an array for Marshal.dump
|
10
13
|
@mean = d.mean(1).to_a
|
11
14
|
@std = d.stddev(1).to_a
|
12
15
|
else
|
13
|
-
|
14
|
-
|
16
|
+
# Default to Array, since built-in Matrix does not give us a big performance advantage.
|
17
|
+
d = data.to_a
|
18
|
+
cols = d.first.size.times.map{|i| d.map{|r| r[i]}}
|
15
19
|
@mean = cols.map{|c| mean(c)}
|
16
20
|
@std = cols.each_with_index.map{|c,i| std(c, @mean[i])}
|
17
21
|
end
|
18
22
|
|
19
|
-
|
23
|
+
@std.map!{|std| (std == 0 or std.nan?) ? Float::MIN : std}
|
24
|
+
|
25
|
+
# raise "Standard deviation cannot be zero" if @std.find_index{|i| i == 0 or i.nan?}
|
26
|
+
|
27
|
+
@trained = true
|
28
|
+
end
|
29
|
+
|
30
|
+
def trained?
|
31
|
+
@trained
|
20
32
|
end
|
21
33
|
|
22
34
|
def probability(x)
|
35
|
+
raise "Train me first" unless trained?
|
23
36
|
raise ArgumentError, "x must have #{@mean.size} elements" if x.size != @mean.size
|
24
37
|
x.each_with_index.map{|a,i| normal_pdf(a, @mean[i], @std[i]) }.reduce(1, :*)
|
25
38
|
end
|
@@ -32,8 +45,10 @@ module Anomaly
|
|
32
45
|
|
33
46
|
SQRT2PI = Math.sqrt(2*Math::PI)
|
34
47
|
|
48
|
+
# Return 1 (exclude feature) if std ~ 0
|
35
49
|
def normal_pdf(x, mean = 0, std = 1)
|
36
|
-
1/(SQRT2PI*std)*Math.exp(-((x - mean)**2/(2.0*(std**2))))
|
50
|
+
p = 1.0/(SQRT2PI*std)*Math.exp(-((x - mean)**2/(2.0*(std**2))))
|
51
|
+
p.nan? ? 1 : p
|
37
52
|
end
|
38
53
|
|
39
54
|
# Not used for NArray
|
data/lib/anomaly/version.rb
CHANGED
@@ -14,18 +14,22 @@ describe Anomaly::Detector do
|
|
14
14
|
end
|
15
15
|
|
16
16
|
context "when standard deviation is 0" do
|
17
|
-
let(:data) { [[
|
17
|
+
let(:data) { [[0],[0]] }
|
18
18
|
|
19
|
-
it "
|
20
|
-
|
19
|
+
it "returns infinity for mean" do
|
20
|
+
ad.probability([0]).should == 1
|
21
|
+
end
|
22
|
+
|
23
|
+
it "returns 0 for not mean" do
|
24
|
+
ad.probability([1]).should == 0
|
21
25
|
end
|
22
26
|
end
|
23
27
|
|
24
28
|
context "when one training example" do
|
25
|
-
let(:data) { [[
|
29
|
+
let(:data) { [[0]] }
|
26
30
|
|
27
|
-
it "
|
28
|
-
|
31
|
+
it "returns infinity" do
|
32
|
+
ad.probability([0]).should == 1
|
29
33
|
end
|
30
34
|
end
|
31
35
|
|
@@ -34,7 +38,9 @@ describe Anomaly::Detector do
|
|
34
38
|
let(:sample) { [rand, rand] }
|
35
39
|
|
36
40
|
it "returns the same probability as an NMatrix" do
|
37
|
-
|
41
|
+
prob = ad.probability(sample)
|
42
|
+
Object.send(:remove_const, :NMatrix)
|
43
|
+
prob.should == Anomaly::Detector.new(data).probability(sample)
|
38
44
|
end
|
39
45
|
end
|
40
46
|
end
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: anomaly
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2011-12-
|
12
|
+
date: 2011-12-12 00:00:00.000000000Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|
16
|
-
requirement: &
|
16
|
+
requirement: &2160640240 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: '0'
|
22
22
|
type: :development
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *2160640240
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: rspec
|
27
|
-
requirement: &
|
27
|
+
requirement: &2160639580 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ! '>='
|
@@ -32,8 +32,8 @@ dependencies:
|
|
32
32
|
version: 2.0.0
|
33
33
|
type: :development
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
36
|
-
description:
|
35
|
+
version_requirements: *2160639580
|
36
|
+
description: Easy-to-use anomaly detection
|
37
37
|
email:
|
38
38
|
- andrew@getformidable.com
|
39
39
|
executables: []
|
@@ -75,7 +75,7 @@ rubyforge_project:
|
|
75
75
|
rubygems_version: 1.8.10
|
76
76
|
signing_key:
|
77
77
|
specification_version: 3
|
78
|
-
summary:
|
78
|
+
summary: Easy-to-use anomaly detection
|
79
79
|
test_files:
|
80
80
|
- spec/anomaly/detector_spec.rb
|
81
81
|
- spec/spec_helper.rb
|