magnifier-ruby 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 10ef01bc786e90cbc1f33214573a27b28ef4f00e
4
- data.tar.gz: 3144c715c7f118d5b7462be5741f6bad4a948948
3
+ metadata.gz: 91fa371b828c434f81d74f542a76e905f15382c6
4
+ data.tar.gz: e02c99be650c6d4e133caa5080ad79859d153737
5
5
  SHA512:
6
- metadata.gz: fa8f6619518a249fbd8d15f5aa504fd2a0726d451b58385dbe4741a4cfa8893d76e7a71219d4a5e3f870e5ecac48a6f3edc728eeb6ea27cb893ba6ca1627b7fd
7
- data.tar.gz: e34d47c3359525e70066818f640df6e53017c36921219a753b71a2c6248b1f99c2b6f35fb534d20986aecf47abcbcdf4c8a1380a96eae4a526679fbc5be32cf5
6
+ metadata.gz: b06e954d69d67474975819b3fca01a8d0eb1eb14c658a4f7c7a3955f75b01a97f42992ea36c1a2cb58ed5c3f01a36212328b9ea85955492fe308f365f4ce5611
7
+ data.tar.gz: 27dc5211d0fc5ad0bc057dc139d454cc724a501760d25c28bf58889a37e5c88bc8221ea801d9c0d386e17f1fb6ffa10a478c9953066f78b1f20c460d1c40c1a8
@@ -1,78 +1,3 @@
1
- require 'numo/narray'
2
-
3
- class Magnifier
4
- # make configurable
5
- # or check for convergence
6
- LEARNING_STEPS = 1000
7
-
8
- attr_reader :training_set, :training_set_size, :features_count, :f1_score
9
- attr_accessor :mu_vector, :sigma_squared_vector, :threshold
10
-
11
- # examples is exepcted to be 2-D array of real values
12
- def initialize(examples, threshold = 0.01)
13
- @training_set = Numo::DFloat[*examples].freeze
14
- @training_set_size, @features_count = training_set.shape
15
- @threshold = threshold
16
- @mu_vector = Numo::DFloat.zeros(@features_count)
17
- @sigma_squared_vector = Numo::DFloat.zeros(@features_count)
18
- @f1_score = 0
19
- end
20
-
21
- def train
22
- @mu_vector = @training_set.mean(0)
23
- @sigma_squared_vector = (((training_set - mu_vector) ** 2).sum(0) / training_set_size).to_a
24
- end
25
-
26
- # optimize using F1 score
27
- # requires cross-validation set (should differ from train set!)
28
- # todo: convert base truth to boolean
29
- def optimize_threshold(examples, base_truths)
30
- boolean_base_thruths = base_truths.map{ |value| value == 1 || value == true }
31
- examples_prob = examples.map { |example| probability(example) }
32
-
33
- threshold_step = (examples_prob.max - examples_prob.min) / LEARNING_STEPS
34
- @threshold = 0
35
-
36
- (examples_prob.min..examples_prob.max).step(threshold_step) do |new_threshold|
37
- predictions = examples_prob.map { |probability| probability < new_threshold }
38
- current_f1 = compute_f1_score(predictions, boolean_base_thruths)
39
-
40
- if current_f1 > @f1_score
41
- @f1_score = current_f1
42
- @threshold = new_threshold
43
- end
44
- end
45
-
46
- [threshold, f1_score]
47
- end
48
-
49
- def probability(example)
50
- probability = 1
51
- example.each_with_index do |feature, i|
52
- feature_prob = Math.exp(-((feature - mu_vector[i])**2 / (2 * sigma_squared_vector[i]))) / ((2 * Math::PI * sigma_squared_vector[i])**(0.5))
53
-
54
- probability = probability * feature_prob
55
- end
56
-
57
- probability
58
- end
59
-
60
- def anomaly?(example)
61
- probability(example) < threshold
62
- end
63
-
64
- private
65
-
66
- def compute_f1_score(predictions, base_truths)
67
- true_positives = predictions.map.with_index { |val, i| val && base_truths[i] }.count(true)
68
- false_positives = predictions.map.with_index { |val, i| val && !base_truths[i] }.count(true)
69
- false_negatives = predictions.map.with_index { |val, i| !val && base_truths[i] }.count(true)
70
-
71
- return 0 if true_positives == 0
72
-
73
- precision = true_positives.to_f / (true_positives + false_positives);
74
- recall = true_positives.to_f / (true_positives + false_negatives);
75
-
76
- (2 * precision * recall) / (precision + recall) rescue 0;
77
- end
78
- end
1
+ require_relative "magnifier/magnifier"
2
+ require_relative "magnifier/exporter"
3
+ require_relative "magnifier/importer"
@@ -0,0 +1,37 @@
1
+ require 'yaml'
2
+
3
+ class Magnifier::Exporter
4
+
5
+ attr_reader :magnifier_object, :path_object
6
+
7
+ def self.export(path_object, magnifier_object)
8
+ new(path_object, magnifier_object).export
9
+ end
10
+
11
+ def initialize(path_object, magnifier_object)
12
+ @path_object = path_object
13
+ @magnifier_object = magnifier_object
14
+ end
15
+
16
+ def export
17
+ file = File.open(@path_object, 'w')
18
+ file.write(compose_yaml)
19
+ file.close
20
+
21
+ file
22
+ end
23
+
24
+ private
25
+
26
+ def compose_yaml
27
+ result = {}
28
+ @magnifier_object.instance_variables.each do |var_name|
29
+ value = @magnifier_object.instance_variable_get(var_name)
30
+ value = value.to_a if value.respond_to?(:to_a) # convert martixes to arrays
31
+
32
+ result[var_name.to_s.slice(1..-1)] = value
33
+ end
34
+
35
+ result.to_yaml
36
+ end
37
+ end
@@ -0,0 +1,29 @@
1
+ require 'yaml'
2
+
3
+ class Magnifier::Importer
4
+
5
+ attr_reader :magnifier_object, :path_object
6
+
7
+ def self.import(path_object, magnifier_object = Magnifier.new([[0],[0]]))
8
+ new(path_object, magnifier_object).import
9
+ end
10
+
11
+ def initialize(path_object, magnifier_object)
12
+ @path_object = path_object
13
+ @magnifier_object = magnifier_object
14
+ end
15
+
16
+ def import
17
+ yaml_content = {}
18
+ File.open(@path_object, 'r') do |file|
19
+ yaml_content = YAML.load(file.read)
20
+ end
21
+
22
+ yaml_content.each_pair do |key, value|
23
+ value = Numo::DFloat[*value] if value.is_a?(Array)
24
+ @magnifier_object.instance_variable_set("@#{key}", value)
25
+ end
26
+
27
+ @magnifier_object
28
+ end
29
+ end
@@ -0,0 +1,89 @@
1
+ require 'numo/narray'
2
+
3
+ class Magnifier
4
+ # make configurable
5
+ # or check for convergence
6
+ LEARNING_STEPS = 1000
7
+
8
+ attr_reader :training_set, :training_set_size,
9
+ :features_count, :f1_score,
10
+ :mu_vector, :sigma_squared_vector
11
+ attr_accessor :threshold
12
+
13
+ # examples is exepcted to be 2-D array of real values
14
+ def initialize(examples, threshold = 0.01)
15
+ @training_set = Numo::DFloat[*examples]
16
+ @training_set_size, @features_count = training_set.shape
17
+ @features_count ||= 1
18
+ @threshold = threshold
19
+ @mu_vector = Numo::DFloat.zeros(@features_count)
20
+ @sigma_squared_vector = Numo::DFloat.zeros(@features_count)
21
+ @f1_score = 0
22
+ end
23
+
24
+ def train
25
+ @mu_vector = @training_set.mean(0)
26
+ @sigma_squared_vector = (((training_set - mu_vector) ** 2).sum(0) / training_set_size).to_a
27
+ end
28
+
29
+ # optimize using F1 score
30
+ # requires cross-validation set (should differ from train set!)
31
+ # todo: convert base truth to boolean
32
+ def optimize_threshold(examples, base_truths)
33
+ boolean_base_thruths = base_truths.map{ |value| value == 1 || value == true }
34
+ examples_prob = examples.map { |example| probability(example) }
35
+
36
+ threshold_step = (examples_prob.max - examples_prob.min) / LEARNING_STEPS
37
+ @threshold = 0
38
+
39
+ (examples_prob.min..examples_prob.max).step(threshold_step) do |new_threshold|
40
+ predictions = examples_prob.map { |probability| probability < new_threshold }
41
+ current_f1 = compute_f1_score(predictions, boolean_base_thruths)
42
+
43
+ if current_f1 > @f1_score
44
+ @f1_score = current_f1
45
+ @threshold = new_threshold
46
+ end
47
+ end
48
+
49
+ [threshold, f1_score]
50
+ end
51
+
52
+ def probability(example)
53
+ probability = 1
54
+ example.each_with_index do |feature, i|
55
+ feature_prob = Math.exp(-((feature - mu_vector[i])**2 / (2 * sigma_squared_vector[i]))) / ((2 * Math::PI * sigma_squared_vector[i])**(0.5))
56
+
57
+ probability = probability * feature_prob
58
+ end
59
+
60
+ probability
61
+ end
62
+
63
+ def anomaly?(example)
64
+ probability(example) < threshold
65
+ end
66
+
67
+ def import(path_or_file)
68
+ Magnifier::Importer.export(path_or_file, self)
69
+ end
70
+
71
+ def export(path_or_file)
72
+ Magnifier::Exporter.export(path_or_file, self)
73
+ end
74
+
75
+ private
76
+
77
+ def compute_f1_score(predictions, base_truths)
78
+ true_positives = predictions.map.with_index { |val, i| val && base_truths[i] }.count(true)
79
+ false_positives = predictions.map.with_index { |val, i| val && !base_truths[i] }.count(true)
80
+ false_negatives = predictions.map.with_index { |val, i| !val && base_truths[i] }.count(true)
81
+
82
+ return 0 if true_positives == 0
83
+
84
+ precision = true_positives.to_f / (true_positives + false_positives);
85
+ recall = true_positives.to_f / (true_positives + false_negatives);
86
+
87
+ (2 * precision * recall) / (precision + recall) rescue 0;
88
+ end
89
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: magnifier-ruby
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nick Grysimov
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-01-06 00:00:00.000000000 Z
11
+ date: 2018-01-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: numo-narray
@@ -59,6 +59,9 @@ extensions: []
59
59
  extra_rdoc_files: []
60
60
  files:
61
61
  - lib/magnifier.rb
62
+ - lib/magnifier/exporter.rb
63
+ - lib/magnifier/importer.rb
64
+ - lib/magnifier/magnifier.rb
62
65
  homepage: https://github.com/tuned-up/magnifier-ruby
63
66
  licenses:
64
67
  - MIT