magnifier-ruby 1.0.0 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 10ef01bc786e90cbc1f33214573a27b28ef4f00e
4
- data.tar.gz: 3144c715c7f118d5b7462be5741f6bad4a948948
3
+ metadata.gz: 91fa371b828c434f81d74f542a76e905f15382c6
4
+ data.tar.gz: e02c99be650c6d4e133caa5080ad79859d153737
5
5
  SHA512:
6
- metadata.gz: fa8f6619518a249fbd8d15f5aa504fd2a0726d451b58385dbe4741a4cfa8893d76e7a71219d4a5e3f870e5ecac48a6f3edc728eeb6ea27cb893ba6ca1627b7fd
7
- data.tar.gz: e34d47c3359525e70066818f640df6e53017c36921219a753b71a2c6248b1f99c2b6f35fb534d20986aecf47abcbcdf4c8a1380a96eae4a526679fbc5be32cf5
6
+ metadata.gz: b06e954d69d67474975819b3fca01a8d0eb1eb14c658a4f7c7a3955f75b01a97f42992ea36c1a2cb58ed5c3f01a36212328b9ea85955492fe308f365f4ce5611
7
+ data.tar.gz: 27dc5211d0fc5ad0bc057dc139d454cc724a501760d25c28bf58889a37e5c88bc8221ea801d9c0d386e17f1fb6ffa10a478c9953066f78b1f20c460d1c40c1a8
@@ -1,78 +1,3 @@
1
- require 'numo/narray'
2
-
3
- class Magnifier
4
- # make configurable
5
- # or check for convergence
6
- LEARNING_STEPS = 1000
7
-
8
- attr_reader :training_set, :training_set_size, :features_count, :f1_score
9
- attr_accessor :mu_vector, :sigma_squared_vector, :threshold
10
-
11
- # examples is exepcted to be 2-D array of real values
12
- def initialize(examples, threshold = 0.01)
13
- @training_set = Numo::DFloat[*examples].freeze
14
- @training_set_size, @features_count = training_set.shape
15
- @threshold = threshold
16
- @mu_vector = Numo::DFloat.zeros(@features_count)
17
- @sigma_squared_vector = Numo::DFloat.zeros(@features_count)
18
- @f1_score = 0
19
- end
20
-
21
- def train
22
- @mu_vector = @training_set.mean(0)
23
- @sigma_squared_vector = (((training_set - mu_vector) ** 2).sum(0) / training_set_size).to_a
24
- end
25
-
26
- # optimize using F1 score
27
- # requires cross-validation set (should differ from train set!)
28
- # todo: convert base truth to boolean
29
- def optimize_threshold(examples, base_truths)
30
- boolean_base_thruths = base_truths.map{ |value| value == 1 || value == true }
31
- examples_prob = examples.map { |example| probability(example) }
32
-
33
- threshold_step = (examples_prob.max - examples_prob.min) / LEARNING_STEPS
34
- @threshold = 0
35
-
36
- (examples_prob.min..examples_prob.max).step(threshold_step) do |new_threshold|
37
- predictions = examples_prob.map { |probability| probability < new_threshold }
38
- current_f1 = compute_f1_score(predictions, boolean_base_thruths)
39
-
40
- if current_f1 > @f1_score
41
- @f1_score = current_f1
42
- @threshold = new_threshold
43
- end
44
- end
45
-
46
- [threshold, f1_score]
47
- end
48
-
49
- def probability(example)
50
- probability = 1
51
- example.each_with_index do |feature, i|
52
- feature_prob = Math.exp(-((feature - mu_vector[i])**2 / (2 * sigma_squared_vector[i]))) / ((2 * Math::PI * sigma_squared_vector[i])**(0.5))
53
-
54
- probability = probability * feature_prob
55
- end
56
-
57
- probability
58
- end
59
-
60
- def anomaly?(example)
61
- probability(example) < threshold
62
- end
63
-
64
- private
65
-
66
- def compute_f1_score(predictions, base_truths)
67
- true_positives = predictions.map.with_index { |val, i| val && base_truths[i] }.count(true)
68
- false_positives = predictions.map.with_index { |val, i| val && !base_truths[i] }.count(true)
69
- false_negatives = predictions.map.with_index { |val, i| !val && base_truths[i] }.count(true)
70
-
71
- return 0 if true_positives == 0
72
-
73
- precision = true_positives.to_f / (true_positives + false_positives);
74
- recall = true_positives.to_f / (true_positives + false_negatives);
75
-
76
- (2 * precision * recall) / (precision + recall) rescue 0;
77
- end
78
- end
1
+ require_relative "magnifier/magnifier"
2
+ require_relative "magnifier/exporter"
3
+ require_relative "magnifier/importer"
@@ -0,0 +1,37 @@
1
+ require 'yaml'
2
+
3
+ class Magnifier::Exporter
4
+
5
+ attr_reader :magnifier_object, :path_object
6
+
7
+ def self.export(path_object, magnifier_object)
8
+ new(path_object, magnifier_object).export
9
+ end
10
+
11
+ def initialize(path_object, magnifier_object)
12
+ @path_object = path_object
13
+ @magnifier_object = magnifier_object
14
+ end
15
+
16
+ def export
17
+ file = File.open(@path_object, 'w')
18
+ file.write(compose_yaml)
19
+ file.close
20
+
21
+ file
22
+ end
23
+
24
+ private
25
+
26
+ def compose_yaml
27
+ result = {}
28
+ @magnifier_object.instance_variables.each do |var_name|
29
+ value = @magnifier_object.instance_variable_get(var_name)
30
+ value = value.to_a if value.respond_to?(:to_a) # convert martixes to arrays
31
+
32
+ result[var_name.to_s.slice(1..-1)] = value
33
+ end
34
+
35
+ result.to_yaml
36
+ end
37
+ end
@@ -0,0 +1,29 @@
1
+ require 'yaml'
2
+
3
+ class Magnifier::Importer
4
+
5
+ attr_reader :magnifier_object, :path_object
6
+
7
+ def self.import(path_object, magnifier_object = Magnifier.new([[0],[0]]))
8
+ new(path_object, magnifier_object).import
9
+ end
10
+
11
+ def initialize(path_object, magnifier_object)
12
+ @path_object = path_object
13
+ @magnifier_object = magnifier_object
14
+ end
15
+
16
+ def import
17
+ yaml_content = {}
18
+ File.open(@path_object, 'r') do |file|
19
+ yaml_content = YAML.load(file.read)
20
+ end
21
+
22
+ yaml_content.each_pair do |key, value|
23
+ value = Numo::DFloat[*value] if value.is_a?(Array)
24
+ @magnifier_object.instance_variable_set("@#{key}", value)
25
+ end
26
+
27
+ @magnifier_object
28
+ end
29
+ end
@@ -0,0 +1,89 @@
1
+ require 'numo/narray'
2
+
3
+ class Magnifier
4
+ # make configurable
5
+ # or check for convergence
6
+ LEARNING_STEPS = 1000
7
+
8
+ attr_reader :training_set, :training_set_size,
9
+ :features_count, :f1_score,
10
+ :mu_vector, :sigma_squared_vector
11
+ attr_accessor :threshold
12
+
13
+ # examples is exepcted to be 2-D array of real values
14
+ def initialize(examples, threshold = 0.01)
15
+ @training_set = Numo::DFloat[*examples]
16
+ @training_set_size, @features_count = training_set.shape
17
+ @features_count ||= 1
18
+ @threshold = threshold
19
+ @mu_vector = Numo::DFloat.zeros(@features_count)
20
+ @sigma_squared_vector = Numo::DFloat.zeros(@features_count)
21
+ @f1_score = 0
22
+ end
23
+
24
+ def train
25
+ @mu_vector = @training_set.mean(0)
26
+ @sigma_squared_vector = (((training_set - mu_vector) ** 2).sum(0) / training_set_size).to_a
27
+ end
28
+
29
+ # optimize using F1 score
30
+ # requires cross-validation set (should differ from train set!)
31
+ # todo: convert base truth to boolean
32
+ def optimize_threshold(examples, base_truths)
33
+ boolean_base_thruths = base_truths.map{ |value| value == 1 || value == true }
34
+ examples_prob = examples.map { |example| probability(example) }
35
+
36
+ threshold_step = (examples_prob.max - examples_prob.min) / LEARNING_STEPS
37
+ @threshold = 0
38
+
39
+ (examples_prob.min..examples_prob.max).step(threshold_step) do |new_threshold|
40
+ predictions = examples_prob.map { |probability| probability < new_threshold }
41
+ current_f1 = compute_f1_score(predictions, boolean_base_thruths)
42
+
43
+ if current_f1 > @f1_score
44
+ @f1_score = current_f1
45
+ @threshold = new_threshold
46
+ end
47
+ end
48
+
49
+ [threshold, f1_score]
50
+ end
51
+
52
+ def probability(example)
53
+ probability = 1
54
+ example.each_with_index do |feature, i|
55
+ feature_prob = Math.exp(-((feature - mu_vector[i])**2 / (2 * sigma_squared_vector[i]))) / ((2 * Math::PI * sigma_squared_vector[i])**(0.5))
56
+
57
+ probability = probability * feature_prob
58
+ end
59
+
60
+ probability
61
+ end
62
+
63
+ def anomaly?(example)
64
+ probability(example) < threshold
65
+ end
66
+
67
+ def import(path_or_file)
68
+ Magnifier::Importer.export(path_or_file, self)
69
+ end
70
+
71
+ def export(path_or_file)
72
+ Magnifier::Exporter.export(path_or_file, self)
73
+ end
74
+
75
+ private
76
+
77
+ def compute_f1_score(predictions, base_truths)
78
+ true_positives = predictions.map.with_index { |val, i| val && base_truths[i] }.count(true)
79
+ false_positives = predictions.map.with_index { |val, i| val && !base_truths[i] }.count(true)
80
+ false_negatives = predictions.map.with_index { |val, i| !val && base_truths[i] }.count(true)
81
+
82
+ return 0 if true_positives == 0
83
+
84
+ precision = true_positives.to_f / (true_positives + false_positives);
85
+ recall = true_positives.to_f / (true_positives + false_negatives);
86
+
87
+ (2 * precision * recall) / (precision + recall) rescue 0;
88
+ end
89
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: magnifier-ruby
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nick Grysimov
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-01-06 00:00:00.000000000 Z
11
+ date: 2018-01-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: numo-narray
@@ -59,6 +59,9 @@ extensions: []
59
59
  extra_rdoc_files: []
60
60
  files:
61
61
  - lib/magnifier.rb
62
+ - lib/magnifier/exporter.rb
63
+ - lib/magnifier/importer.rb
64
+ - lib/magnifier/magnifier.rb
62
65
  homepage: https://github.com/tuned-up/magnifier-ruby
63
66
  licenses:
64
67
  - MIT