magnifier-ruby 1.0.0 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/magnifier.rb +3 -78
- data/lib/magnifier/exporter.rb +37 -0
- data/lib/magnifier/importer.rb +29 -0
- data/lib/magnifier/magnifier.rb +89 -0
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 91fa371b828c434f81d74f542a76e905f15382c6
|
4
|
+
data.tar.gz: e02c99be650c6d4e133caa5080ad79859d153737
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b06e954d69d67474975819b3fca01a8d0eb1eb14c658a4f7c7a3955f75b01a97f42992ea36c1a2cb58ed5c3f01a36212328b9ea85955492fe308f365f4ce5611
|
7
|
+
data.tar.gz: 27dc5211d0fc5ad0bc057dc139d454cc724a501760d25c28bf58889a37e5c88bc8221ea801d9c0d386e17f1fb6ffa10a478c9953066f78b1f20c460d1c40c1a8
|
data/lib/magnifier.rb
CHANGED
@@ -1,78 +1,3 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
# make configurable
|
5
|
-
# or check for convergence
|
6
|
-
LEARNING_STEPS = 1000
|
7
|
-
|
8
|
-
attr_reader :training_set, :training_set_size, :features_count, :f1_score
|
9
|
-
attr_accessor :mu_vector, :sigma_squared_vector, :threshold
|
10
|
-
|
11
|
-
# examples is exepcted to be 2-D array of real values
|
12
|
-
def initialize(examples, threshold = 0.01)
|
13
|
-
@training_set = Numo::DFloat[*examples].freeze
|
14
|
-
@training_set_size, @features_count = training_set.shape
|
15
|
-
@threshold = threshold
|
16
|
-
@mu_vector = Numo::DFloat.zeros(@features_count)
|
17
|
-
@sigma_squared_vector = Numo::DFloat.zeros(@features_count)
|
18
|
-
@f1_score = 0
|
19
|
-
end
|
20
|
-
|
21
|
-
def train
|
22
|
-
@mu_vector = @training_set.mean(0)
|
23
|
-
@sigma_squared_vector = (((training_set - mu_vector) ** 2).sum(0) / training_set_size).to_a
|
24
|
-
end
|
25
|
-
|
26
|
-
# optimize using F1 score
|
27
|
-
# requires cross-validation set (should differ from train set!)
|
28
|
-
# todo: convert base truth to boolean
|
29
|
-
def optimize_threshold(examples, base_truths)
|
30
|
-
boolean_base_thruths = base_truths.map{ |value| value == 1 || value == true }
|
31
|
-
examples_prob = examples.map { |example| probability(example) }
|
32
|
-
|
33
|
-
threshold_step = (examples_prob.max - examples_prob.min) / LEARNING_STEPS
|
34
|
-
@threshold = 0
|
35
|
-
|
36
|
-
(examples_prob.min..examples_prob.max).step(threshold_step) do |new_threshold|
|
37
|
-
predictions = examples_prob.map { |probability| probability < new_threshold }
|
38
|
-
current_f1 = compute_f1_score(predictions, boolean_base_thruths)
|
39
|
-
|
40
|
-
if current_f1 > @f1_score
|
41
|
-
@f1_score = current_f1
|
42
|
-
@threshold = new_threshold
|
43
|
-
end
|
44
|
-
end
|
45
|
-
|
46
|
-
[threshold, f1_score]
|
47
|
-
end
|
48
|
-
|
49
|
-
def probability(example)
|
50
|
-
probability = 1
|
51
|
-
example.each_with_index do |feature, i|
|
52
|
-
feature_prob = Math.exp(-((feature - mu_vector[i])**2 / (2 * sigma_squared_vector[i]))) / ((2 * Math::PI * sigma_squared_vector[i])**(0.5))
|
53
|
-
|
54
|
-
probability = probability * feature_prob
|
55
|
-
end
|
56
|
-
|
57
|
-
probability
|
58
|
-
end
|
59
|
-
|
60
|
-
def anomaly?(example)
|
61
|
-
probability(example) < threshold
|
62
|
-
end
|
63
|
-
|
64
|
-
private
|
65
|
-
|
66
|
-
def compute_f1_score(predictions, base_truths)
|
67
|
-
true_positives = predictions.map.with_index { |val, i| val && base_truths[i] }.count(true)
|
68
|
-
false_positives = predictions.map.with_index { |val, i| val && !base_truths[i] }.count(true)
|
69
|
-
false_negatives = predictions.map.with_index { |val, i| !val && base_truths[i] }.count(true)
|
70
|
-
|
71
|
-
return 0 if true_positives == 0
|
72
|
-
|
73
|
-
precision = true_positives.to_f / (true_positives + false_positives);
|
74
|
-
recall = true_positives.to_f / (true_positives + false_negatives);
|
75
|
-
|
76
|
-
(2 * precision * recall) / (precision + recall) rescue 0;
|
77
|
-
end
|
78
|
-
end
|
1
|
+
require_relative "magnifier/magnifier"
|
2
|
+
require_relative "magnifier/exporter"
|
3
|
+
require_relative "magnifier/importer"
|
@@ -0,0 +1,37 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
|
3
|
+
class Magnifier::Exporter
|
4
|
+
|
5
|
+
attr_reader :magnifier_object, :path_object
|
6
|
+
|
7
|
+
def self.export(path_object, magnifier_object)
|
8
|
+
new(path_object, magnifier_object).export
|
9
|
+
end
|
10
|
+
|
11
|
+
def initialize(path_object, magnifier_object)
|
12
|
+
@path_object = path_object
|
13
|
+
@magnifier_object = magnifier_object
|
14
|
+
end
|
15
|
+
|
16
|
+
def export
|
17
|
+
file = File.open(@path_object, 'w')
|
18
|
+
file.write(compose_yaml)
|
19
|
+
file.close
|
20
|
+
|
21
|
+
file
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
|
26
|
+
def compose_yaml
|
27
|
+
result = {}
|
28
|
+
@magnifier_object.instance_variables.each do |var_name|
|
29
|
+
value = @magnifier_object.instance_variable_get(var_name)
|
30
|
+
value = value.to_a if value.respond_to?(:to_a) # convert martixes to arrays
|
31
|
+
|
32
|
+
result[var_name.to_s.slice(1..-1)] = value
|
33
|
+
end
|
34
|
+
|
35
|
+
result.to_yaml
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
|
3
|
+
class Magnifier::Importer
|
4
|
+
|
5
|
+
attr_reader :magnifier_object, :path_object
|
6
|
+
|
7
|
+
def self.import(path_object, magnifier_object = Magnifier.new([[0],[0]]))
|
8
|
+
new(path_object, magnifier_object).import
|
9
|
+
end
|
10
|
+
|
11
|
+
def initialize(path_object, magnifier_object)
|
12
|
+
@path_object = path_object
|
13
|
+
@magnifier_object = magnifier_object
|
14
|
+
end
|
15
|
+
|
16
|
+
def import
|
17
|
+
yaml_content = {}
|
18
|
+
File.open(@path_object, 'r') do |file|
|
19
|
+
yaml_content = YAML.load(file.read)
|
20
|
+
end
|
21
|
+
|
22
|
+
yaml_content.each_pair do |key, value|
|
23
|
+
value = Numo::DFloat[*value] if value.is_a?(Array)
|
24
|
+
@magnifier_object.instance_variable_set("@#{key}", value)
|
25
|
+
end
|
26
|
+
|
27
|
+
@magnifier_object
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,89 @@
|
|
1
|
+
require 'numo/narray'
|
2
|
+
|
3
|
+
class Magnifier
|
4
|
+
# make configurable
|
5
|
+
# or check for convergence
|
6
|
+
LEARNING_STEPS = 1000
|
7
|
+
|
8
|
+
attr_reader :training_set, :training_set_size,
|
9
|
+
:features_count, :f1_score,
|
10
|
+
:mu_vector, :sigma_squared_vector
|
11
|
+
attr_accessor :threshold
|
12
|
+
|
13
|
+
# examples is exepcted to be 2-D array of real values
|
14
|
+
def initialize(examples, threshold = 0.01)
|
15
|
+
@training_set = Numo::DFloat[*examples]
|
16
|
+
@training_set_size, @features_count = training_set.shape
|
17
|
+
@features_count ||= 1
|
18
|
+
@threshold = threshold
|
19
|
+
@mu_vector = Numo::DFloat.zeros(@features_count)
|
20
|
+
@sigma_squared_vector = Numo::DFloat.zeros(@features_count)
|
21
|
+
@f1_score = 0
|
22
|
+
end
|
23
|
+
|
24
|
+
def train
|
25
|
+
@mu_vector = @training_set.mean(0)
|
26
|
+
@sigma_squared_vector = (((training_set - mu_vector) ** 2).sum(0) / training_set_size).to_a
|
27
|
+
end
|
28
|
+
|
29
|
+
# optimize using F1 score
|
30
|
+
# requires cross-validation set (should differ from train set!)
|
31
|
+
# todo: convert base truth to boolean
|
32
|
+
def optimize_threshold(examples, base_truths)
|
33
|
+
boolean_base_thruths = base_truths.map{ |value| value == 1 || value == true }
|
34
|
+
examples_prob = examples.map { |example| probability(example) }
|
35
|
+
|
36
|
+
threshold_step = (examples_prob.max - examples_prob.min) / LEARNING_STEPS
|
37
|
+
@threshold = 0
|
38
|
+
|
39
|
+
(examples_prob.min..examples_prob.max).step(threshold_step) do |new_threshold|
|
40
|
+
predictions = examples_prob.map { |probability| probability < new_threshold }
|
41
|
+
current_f1 = compute_f1_score(predictions, boolean_base_thruths)
|
42
|
+
|
43
|
+
if current_f1 > @f1_score
|
44
|
+
@f1_score = current_f1
|
45
|
+
@threshold = new_threshold
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
[threshold, f1_score]
|
50
|
+
end
|
51
|
+
|
52
|
+
def probability(example)
|
53
|
+
probability = 1
|
54
|
+
example.each_with_index do |feature, i|
|
55
|
+
feature_prob = Math.exp(-((feature - mu_vector[i])**2 / (2 * sigma_squared_vector[i]))) / ((2 * Math::PI * sigma_squared_vector[i])**(0.5))
|
56
|
+
|
57
|
+
probability = probability * feature_prob
|
58
|
+
end
|
59
|
+
|
60
|
+
probability
|
61
|
+
end
|
62
|
+
|
63
|
+
def anomaly?(example)
|
64
|
+
probability(example) < threshold
|
65
|
+
end
|
66
|
+
|
67
|
+
def import(path_or_file)
|
68
|
+
Magnifier::Importer.export(path_or_file, self)
|
69
|
+
end
|
70
|
+
|
71
|
+
def export(path_or_file)
|
72
|
+
Magnifier::Exporter.export(path_or_file, self)
|
73
|
+
end
|
74
|
+
|
75
|
+
private
|
76
|
+
|
77
|
+
def compute_f1_score(predictions, base_truths)
|
78
|
+
true_positives = predictions.map.with_index { |val, i| val && base_truths[i] }.count(true)
|
79
|
+
false_positives = predictions.map.with_index { |val, i| val && !base_truths[i] }.count(true)
|
80
|
+
false_negatives = predictions.map.with_index { |val, i| !val && base_truths[i] }.count(true)
|
81
|
+
|
82
|
+
return 0 if true_positives == 0
|
83
|
+
|
84
|
+
precision = true_positives.to_f / (true_positives + false_positives);
|
85
|
+
recall = true_positives.to_f / (true_positives + false_negatives);
|
86
|
+
|
87
|
+
(2 * precision * recall) / (precision + recall) rescue 0;
|
88
|
+
end
|
89
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: magnifier-ruby
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Nick Grysimov
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-01-
|
11
|
+
date: 2018-01-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: numo-narray
|
@@ -59,6 +59,9 @@ extensions: []
|
|
59
59
|
extra_rdoc_files: []
|
60
60
|
files:
|
61
61
|
- lib/magnifier.rb
|
62
|
+
- lib/magnifier/exporter.rb
|
63
|
+
- lib/magnifier/importer.rb
|
64
|
+
- lib/magnifier/magnifier.rb
|
62
65
|
homepage: https://github.com/tuned-up/magnifier-ruby
|
63
66
|
licenses:
|
64
67
|
- MIT
|