magnifier-ruby 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +7 -0
  2. data/lib/magnifier.rb +78 -0
  3. metadata +86 -0
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 10ef01bc786e90cbc1f33214573a27b28ef4f00e
4
+ data.tar.gz: 3144c715c7f118d5b7462be5741f6bad4a948948
5
+ SHA512:
6
+ metadata.gz: fa8f6619518a249fbd8d15f5aa504fd2a0726d451b58385dbe4741a4cfa8893d76e7a71219d4a5e3f870e5ecac48a6f3edc728eeb6ea27cb893ba6ca1627b7fd
7
+ data.tar.gz: e34d47c3359525e70066818f640df6e53017c36921219a753b71a2c6248b1f99c2b6f35fb534d20986aecf47abcbcdf4c8a1380a96eae4a526679fbc5be32cf5
@@ -0,0 +1,78 @@
1
+ require 'numo/narray'
2
+
3
+ class Magnifier
4
+ # make configurable
5
+ # or check for convergence
6
+ LEARNING_STEPS = 1000
7
+
8
+ attr_reader :training_set, :training_set_size, :features_count, :f1_score
9
+ attr_accessor :mu_vector, :sigma_squared_vector, :threshold
10
+
11
+ # examples is exepcted to be 2-D array of real values
12
+ def initialize(examples, threshold = 0.01)
13
+ @training_set = Numo::DFloat[*examples].freeze
14
+ @training_set_size, @features_count = training_set.shape
15
+ @threshold = threshold
16
+ @mu_vector = Numo::DFloat.zeros(@features_count)
17
+ @sigma_squared_vector = Numo::DFloat.zeros(@features_count)
18
+ @f1_score = 0
19
+ end
20
+
21
+ def train
22
+ @mu_vector = @training_set.mean(0)
23
+ @sigma_squared_vector = (((training_set - mu_vector) ** 2).sum(0) / training_set_size).to_a
24
+ end
25
+
26
+ # optimize using F1 score
27
+ # requires cross-validation set (should differ from train set!)
28
+ # todo: convert base truth to boolean
29
+ def optimize_threshold(examples, base_truths)
30
+ boolean_base_thruths = base_truths.map{ |value| value == 1 || value == true }
31
+ examples_prob = examples.map { |example| probability(example) }
32
+
33
+ threshold_step = (examples_prob.max - examples_prob.min) / LEARNING_STEPS
34
+ @threshold = 0
35
+
36
+ (examples_prob.min..examples_prob.max).step(threshold_step) do |new_threshold|
37
+ predictions = examples_prob.map { |probability| probability < new_threshold }
38
+ current_f1 = compute_f1_score(predictions, boolean_base_thruths)
39
+
40
+ if current_f1 > @f1_score
41
+ @f1_score = current_f1
42
+ @threshold = new_threshold
43
+ end
44
+ end
45
+
46
+ [threshold, f1_score]
47
+ end
48
+
49
+ def probability(example)
50
+ probability = 1
51
+ example.each_with_index do |feature, i|
52
+ feature_prob = Math.exp(-((feature - mu_vector[i])**2 / (2 * sigma_squared_vector[i]))) / ((2 * Math::PI * sigma_squared_vector[i])**(0.5))
53
+
54
+ probability = probability * feature_prob
55
+ end
56
+
57
+ probability
58
+ end
59
+
60
+ def anomaly?(example)
61
+ probability(example) < threshold
62
+ end
63
+
64
+ private
65
+
66
+ def compute_f1_score(predictions, base_truths)
67
+ true_positives = predictions.map.with_index { |val, i| val && base_truths[i] }.count(true)
68
+ false_positives = predictions.map.with_index { |val, i| val && !base_truths[i] }.count(true)
69
+ false_negatives = predictions.map.with_index { |val, i| !val && base_truths[i] }.count(true)
70
+
71
+ return 0 if true_positives == 0
72
+
73
+ precision = true_positives.to_f / (true_positives + false_positives);
74
+ recall = true_positives.to_f / (true_positives + false_negatives);
75
+
76
+ (2 * precision * recall) / (precision + recall) rescue 0;
77
+ end
78
+ end
metadata ADDED
@@ -0,0 +1,86 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: magnifier-ruby
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Nick Grysimov
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2018-01-06 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: numo-narray
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '0.9'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '0.9'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rspec
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: 3.7.0
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: 3.7.0
41
+ - !ruby/object:Gem::Dependency
42
+ name: pry
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ description: Anomaly detection using gaussian distribution, written in ruby
56
+ email:
57
+ executables: []
58
+ extensions: []
59
+ extra_rdoc_files: []
60
+ files:
61
+ - lib/magnifier.rb
62
+ homepage: https://github.com/tuned-up/magnifier-ruby
63
+ licenses:
64
+ - MIT
65
+ metadata: {}
66
+ post_install_message:
67
+ rdoc_options: []
68
+ require_paths:
69
+ - lib
70
+ required_ruby_version: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - ">="
73
+ - !ruby/object:Gem::Version
74
+ version: '2.0'
75
+ required_rubygems_version: !ruby/object:Gem::Requirement
76
+ requirements:
77
+ - - ">="
78
+ - !ruby/object:Gem::Version
79
+ version: '0'
80
+ requirements: []
81
+ rubyforge_project:
82
+ rubygems_version: 2.6.14
83
+ signing_key:
84
+ specification_version: 4
85
+ summary: Anomaly Detection
86
+ test_files: []