RubyGems - adaboost - Versions diffs - 0.0.3 → 0.0.4 - Mend

adaboost 0.0.3 → 0.0.4

Files changed (9) hide show

checksums.yaml +4 -4
data/lib/adaboost/adaboost.rb +18 -22
data/lib/adaboost/contingency_table.rb +3 -3
data/lib/adaboost/evaluator.rb +9 -9
data/lib/adaboost/features_analyzer.rb +24 -24
data/lib/adaboost/resampler.rb +5 -5
data/lib/adaboost/weak_classifier.rb +4 -4
data/lib/adaboost/weak_learner.rb +20 -20
metadata +1 -1

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: f550adc8429f2927416f49e86461497d9f43a072
-  data.tar.gz: bf2fa99dad4d438a25588f93d8de7a2c62c7f45f
+  metadata.gz: dbef461fb8ab7809de5e99ec234e85c002427bd5
+  data.tar.gz: 446d84856754769aeb49e4526de18f4c547d85de
 SHA512:
-  metadata.gz: 38b3488dd4d034e9694200841a8b1c5022b60f8e8cb06afd88b99147dc140b73183153b12d7ba5c79e06865969958051dee9b59ef76af91e2901731626681b6b
-  data.tar.gz: 8f41f660c6a53b66ee8eed9a80b3948a66bae0bb516ab8d4f52d3322a66c0f40f9201f700cb499be885c5727bc1d2b65966efd04f48fe4bfdcb4453c8607fed9
+  metadata.gz: de5b31c2367c9459d09d5456684b595eeb8e2fa2b8846ee80911027e66b7a45a779762b464f2bd06365315ba298365727f8cbe1a81f8ebbdb3be31ef98686725
+  data.tar.gz: 8b6157db326c5510329fd3b0b3531ffe7e4bb8fee5fbcd3e5b1b73e36ea17081a0fe809c71e8fa6f2f871fdb238b3edb060c3559c6e196e8194fd18ecf604861

data/lib/adaboost/adaboost.rb CHANGED

@@ -4,40 +4,40 @@ module AdaBoost
     attr_reader :weak_classifiers, :y_index
-    def initialize number_of_classifiers, y_index
+    def initialize(number_of_classifiers, y_index)
       @weak_classifiers = []
-      @weak_learner = WeakLearner.new y_index
+      @weak_learner = WeakLearner.new(y_index)
       @number_of_classifiers = number_of_classifiers
       @weights = []
       @y_index = y_index
     end
-    def train samples
+    def train(samples)
       if Config::OVER_SAMPLING_TRAINING_SET
-        resampler = Resampler.new @y_index
-        resampler.over_sample samples
+        resampler = Resampler.new(@y_index)
+        resampler.over_sample(samples)
       end
-      initialize_weights samples
-      0.upto @number_of_classifiers - 1 do |i|
-        weak_classifier = @weak_learner.generate_weak_classifier samples, @weights
+      initialize_weights(samples)
+      0.upto(@number_of_classifiers - 1) do |i|
+        weak_classifier = @weak_learner.generate_weak_classifier(samples, @weights)
         weak_classifier.compute_alpha
-        update_weights weak_classifier, samples
+        update_weights(weak_classifier, samples)
         @weak_classifiers << weak_classifier
         yield i, weak_classifier if block_given?
       end
     end
-    def classify sample
+    def classify(sample)
       score = 0.0
       @weak_classifiers.each do |weak_classifier|
-        score += weak_classifier.classify_with_alpha sample
+        score += weak_classifier.classify_with_alpha(sample)
       end
       score
     end
-    def self.build_from_model model, y_index = 0
+    def self.build_from_model(model, y_index = 0)
       classifiers = model.weak_classifiers
-      adaboost = AdaBoost.new classifiers.size, y_index
+      adaboost = AdaBoost.new(classifiers.size, y_index)
       classifiers.each do |classifier|
         adaboost.weak_classifiers << WeakClassifier.new(classifier.feature_number, classifier.split, classifier.alpha)
       end
@@ -46,12 +46,12 @@ module AdaBoost
     private
-    def initialize_weights samples
+    def initialize_weights(samples)
       samples_size = samples.size.to_f
       negative_weight = 1 / samples_size
       positive_weight = negative_weight
       if Config::INCORPORATE_COST_SENSITIVE_LEARNING
-        analyzer = FeaturesAnalyzer.new @y_index
+        analyzer = FeaturesAnalyzer.new(@y_index)
         distribution = analyzer.analyze(samples).distribution
         positive_rate = distribution.positive / samples_size
         negative_rate = distribution.negative / samples_size
@@ -61,19 +61,15 @@ module AdaBoost
       end
       samples.each_with_index do |sample, i|
         y = sample[@y_index]
-        if y == -1
-          @weights[i] = positive_weight
-        else
-          @weights[i] = negative_weight
-        end
+        @weights[i] = (y == -1) ? positive_weight : negative_weight
       end
     end
-    def update_weights weak_classifier, samples
+    def update_weights(weak_classifier, samples)
       sum = 0.0
       samples.each_with_index do |sample, i|
         y = sample[@y_index]
-        @weights[i] *= Math.exp -(weak_classifier.alpha) * weak_classifier.classify(sample) * y
+        @weights[i] *= Math.exp(-(weak_classifier.alpha) * weak_classifier.classify(sample) * y)
         sum += @weights[i]
       end
       @weights.each_with_index do |_, i|

data/lib/adaboost/contingency_table.rb CHANGED

@@ -22,7 +22,7 @@ module AdaBoost
       @table[1][0]
     end
-    def add_prediction y, h
+    def add_prediction(y, h)
       @table[class_to_index(y)][class_to_index(h)] += 1
     end
@@ -175,8 +175,8 @@ module AdaBoost
       ]
     end
-    def class_to_index k
-      k > 0 ? 1 : 0
+    def class_to_index(k)
+      (k > 0) ? 1 : 0
     end
   end
 end

data/lib/adaboost/evaluator.rb CHANGED

@@ -2,26 +2,26 @@ module AdaBoost
   class Evaluator
-    def initialize classifier
+    def initialize(classifier)
       @classifier = classifier
       @threshold = Float::MAX
     end
-    def evaluate test_set
+    def evaluate(test_set)
       contingency_table = ContingencyTable.new
       test_set.each do |sample|
         y = sample[@classifier.y_index]
-        if Config::USE_THRESHOLD_CLASSIFICATION
-          h = classify_using_threshold sample
+        h = if Config::USE_THRESHOLD_CLASSIFICATION
+          classify_using_threshold(sample)
         else
-          h = e.classify_normally sample
+          classify_normally(sample)
         end
-        contingency_table.add_prediction y, h
+        contingency_table.add_prediction(y, h)
       end
       contingency_table
     end
-    def used_feature_numbers unique = false
+    def used_feature_numbers(unique = false)
       used_feature_numbers = []
       @classifier.weak_classifiers.each do |weak_classifier|
         used_feature_numbers << weak_classifier.feature_number
@@ -51,11 +51,11 @@ module AdaBoost
       @threshold
     end
-    def classify_normally sample
+    def classify_normally(sample)
       @classifier.classify(sample > 0) ? 1 : -1
     end
-    def classify_using_threshold sample
+    def classify_using_threshold(sample)
       score = 0.0
       @classifier.weak_classifiers.each do |weak_classifier|
         if sample[weak_classifier.feature_number] > weak_classifier.split

data/lib/adaboost/features_analyzer.rb CHANGED

@@ -7,45 +7,45 @@ module AdaBoost
   class FeaturesAnalyzer
-    def initialize y_index
+    def initialize(y_index)
       @y_index = y_index
     end
-    def analyze samples
+    def analyze(samples)
       statistics = []
-      distribution = Distribution.new 0, 0
+      distribution = Distribution.new(0, 0)
       number_of_samples = samples.size
       if number_of_samples < 1
-        raise ArgumentError.new 'At least one sample is needed to analyze.'
+        raise ArgumentError.new('At least one sample is needed to analyze.')
       end
       number_of_features = @y_index
       sample_size = samples[0].size
       if number_of_features < 1 or sample_size < 2 or sample_size <= @y_index
-        raise ArgumentError.new 'At least 1 feature is needed to analyze.'
+        raise ArgumentError.new('At least 1 feature is needed to analyze.')
       end
-      0.upto number_of_features - 1 do
-          statistics << FeatureStatistic.new(Float::MAX, -Float::MAX, 0, 0, 0, 0)
+      0.upto(number_of_features - 1) do
+        statistics << FeatureStatistic.new(Float::MAX, -Float::MAX, 0, 0, 0, 0)
       end
       samples.each do |sample|
-          y = sample[@y_index]
-          if y == -1
-              distribution.negative += 1
-          else
-              distribution.positive += 1
+        y = sample[@y_index]
+        if y == -1
+            distribution.negative += 1
+        else
+            distribution.positive += 1
+        end
+        0.upto(number_of_features - 1) do |i|
+          statistic = statistics[i]
+          feature_value = sample[i]
+          if feature_value < statistic.min
+            statistic.min = feature_value
           end
-          0.upto number_of_features - 1 do |i|
-              statistic = statistics[i]
-              feature_value = sample[i]
-              if feature_value < statistic.min
-                  statistic.min = feature_value
-              end
-              if feature_value > statistic.max
-                  statistic.max = feature_value
-              end
-              statistic.sum += feature_value
+          if feature_value > statistic.max
+            statistic.max = feature_value
           end
+          statistic.sum += feature_value
+        end
       end
       statistics.each do |statistic|
         statistic.avg = statistic.sum / number_of_samples.to_f
@@ -67,7 +67,7 @@ module AdaBoost
       analyze
     end
-    def relations x, y, samples, statistics
+    def relations(x, y, samples, statistics)
       sum = 0.0
       samples.each do |sample|
         x_value = sample[x].to_f
@@ -76,7 +76,7 @@ module AdaBoost
       end
       cov = sum / (samples.size - 1).to_f
       cor = cov / (statistics[x].std * statistics[y].std).to_f
-      VariableRelations.new x, y, cov, cor
+      VariableRelations.new(x, y, cov, cor)
     end
   end
 end

data/lib/adaboost/resampler.rb CHANGED

@@ -2,12 +2,12 @@ module AdaBoost
   class Resampler
-    def initialize y_index
+    def initialize(y_index)
       @y_index = y_index
     end
-    def over_sample samples
-      distribution = distribution samples
+    def over_sample(samples)
+      distribution = distribution(samples)
       y0 = distribution.negative
       y1 = distribution.positive
       majority = y0 < y1 ? 1.0 : -1.0
@@ -25,8 +25,8 @@ module AdaBoost
     private
-    def distribution instances
-      analyzer = FeaturesAnalyzer.new @y_index
+    def distribution(instances)
+      analyzer = FeaturesAnalyzer.new(@y_index)
       analyzer.analyze(instances).distribution
     end
   end

data/lib/adaboost/weak_classifier.rb CHANGED

@@ -5,7 +5,7 @@ module AdaBoost
     attr_accessor :error
     attr_reader :feature_number, :split, :alpha
-    def initialize feature_number, split, alpha = 0.0, error = 0.0
+    def initialize(feature_number, split, alpha = 0.0, error = 0.0)
       @feature_number = feature_number
       @split = split
       @error = error
@@ -16,15 +16,15 @@ module AdaBoost
       @alpha = 0.5 * Math.log((1.0 - @error) / @error)
     end
-    def classify sample
+    def classify(sample)
       sample[@feature_number] > @split ? 1 : -1
     end
-    def classify_with_alpha sample
+    def classify_with_alpha(sample)
       return classify(sample) * @alpha
     end
-    def increase_error amount
+    def increase_error(amount)
       @error += amount
     end
   end

data/lib/adaboost/weak_learner.rb CHANGED

@@ -2,31 +2,31 @@ module AdaBoost
   class WeakLearner
-    def initialize y_index
+    def initialize(y_index)
       @y_index = y_index
-      @analyzer = FeaturesAnalyzer.new y_index
+      @analyzer = FeaturesAnalyzer.new(y_index)
       @classifiers_cache = []
     end
-    def features_satistics samples
+    def features_satistics(samples)
        @analyzer.analyze(samples).statistics
     end
-    def generate_weak_classifier samples, weights
+    def generate_weak_classifier(samples, weights)
       number_of_samples = samples.size
       if number_of_samples < 1
-        raise ArgumentError.new 'At least one sample is needed to generate.'
+        raise ArgumentError.new('At least one sample is needed to generate.')
       end
       number_of_features = @y_index
       sample_size = samples[0].size
       if number_of_features < 1 or sample_size < 2 or sample_size <= @y_index
-        raise ArgumentError.new 'At least 1 feature is needed to generate.'
+        raise ArgumentError.new('At least 1 feature is needed to generate.')
       end
       classifiers = []
       if Config::USE_RANDOM_WEAK_CLASSIFIERS
-        classifiers = generate_random_classifiers samples, number_of_features
+        classifiers = generate_random_classifiers(samples, number_of_features)
       else
-        classifiers = generate_all_possible_classifiers samples, number_of_features
+        classifiers = generate_all_possible_classifiers(samples, number_of_features)
       end
       best_index = -1
       best_error = Float::MAX
@@ -35,7 +35,7 @@ module AdaBoost
         samples.each_with_index do |sample, j|
           y = sample[@y_index]
           if classifier.classify(sample).to_f != y
-            classifier.increase_error weights[j]
+            classifier.increase_error(weights[j])
           end
         end
         if classifier.error < best_error
@@ -45,33 +45,33 @@ module AdaBoost
       end
       best = classifiers[best_index]
       if !Config::USE_RANDOM_WEAK_CLASSIFIERS
-        classifiers.delete_at best_index
+        classifiers.delete_at(best_index)
       end
       best
     end
     private
-    def generate_random_classifiers samples, number_of_features
+    def generate_random_classifiers(samples, number_of_features)
       classifiers = []
-      statistics = features_satistics samples
-      0.upto Config::NUMBER_OF_RANDOM_CLASSIFIERS - 1 do
-          feature_number = rand number_of_features
-          info = statistics[feature_number]
-          split = rand * info.rng + info.min
-          classifiers << WeakClassifier.new(feature_number, split)
+      statistics = features_satistics(samples)
+      0.upto(Config::NUMBER_OF_RANDOM_CLASSIFIERS - 1) do
+        feature_number = rand(number_of_features)
+        info = statistics[feature_number]
+        split = rand * info.rng + info.min
+        classifiers << WeakClassifier.new(feature_number, split)
       end
       classifiers
     end
-    def generate_all_possible_classifiers samples, number_of_features
+    def generate_all_possible_classifiers(samples, number_of_features)
       if @classifiers_cache.size == 0
         matrix = []
-        0.upto number_of_features - 1 do
+        0.upto(number_of_features - 1) do
           matrix << []
         end
         samples.each do |sample|
-          0.upto number_of_features - 1 do |i|
+          0.upto(number_of_features - 1) do |i|
             sample_value = sample[i]
             matrix[i] << sample_value
           end

metadata CHANGED

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: adaboost
 version: !ruby/object:Gem::Version
-  version: 0.0.3
+  version: 0.0.4
 platform: ruby
 authors:
 - Dalmir da Silva