RubyGems - adaboost - Versions diffs - 0.0.1 - Mend

adaboost 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

checksums.yaml +7 -0
data/lib/adaboost/adaboost.rb +74 -0
data/lib/adaboost/config.rb +10 -0
data/lib/adaboost/contingency_table.rb +182 -0
data/lib/adaboost/evaluator.rb +66 -0
data/lib/adaboost/features_analyzer.rb +82 -0
data/lib/adaboost/resampler.rb +31 -0
data/lib/adaboost/weak_classifier.rb +31 -0
data/lib/adaboost/weak_learner.rb +87 -0
data/lib/adaboost.rb +11 -0
metadata +52 -0

checksums.yaml ADDED Viewed

@@ -0,0 +1,7 @@
+---
+SHA1:
+  metadata.gz: 2144b2d6abb1c701c8dbf955673a9a5ffd96e00e
+  data.tar.gz: 74dbd52e82039ca79e2dcfa3ef8c2b2ac20bd3e7
+SHA512:
+  metadata.gz: 035ca3856d5343afde2f968f43cc5de165f0f69fb1f8284c87fe2ba4525599221eb711ffeced58346ddbfbf2b89f640a240fbf336c3c2e367d6e6f99826a932b
+  data.tar.gz: d6f6adc39fa2327ea37e986f759976d83c9c0efda519f429af0d9325927fffe9dc8c540b84556ba890434fd59674b550191dd7d1f8df8ad9e39759795e697be8

data/lib/adaboost/adaboost.rb ADDED Viewed

@@ -0,0 +1,74 @@
+module AdaBoost
+  class AdaBoost
+    attr_reader :weak_classifiers, :y_index
+    def initialize number_of_classifiers, y_index
+      @weak_classifiers = []
+      @weak_learner = WeakLearner.new y_index
+      @number_of_classifiers = number_of_classifiers
+      @weights = []
+      @y_index = y_index
+    end
+    def initialize_weights samples
+      samples_size = samples.size.to_f
+      negative_weight = 1 / samples_size
+      positive_weight = negative_weight
+      if Config::INCORPORATE_COST_SENSITIVE_LEARNING
+          analyzer = FeaturesAnalyzer.new @y_index
+          distribution = analyzer.analyze(samples).distribution
+          positive_rate = distribution.positive / samples_size
+          negative_rate = distribution.negative / samples_size
+          normalizing_constant = distribution.negative * positive_rate + distribution.positive * negative_rate
+          positive_weight = positive_rate / normalizing_constant.to_f
+          negative_weight = negative_rate / normalizing_constant.to_f
+      end
+      samples.each_with_index do |sample, i|
+          y = sample[@y_index]
+          if y == -1
+              @weights[i] = positive_weight
+          else
+              @weights[i] = negative_weight
+          end
+      end
+    end
+    def update_weights weak_classifier, samples
+      sum = 0.0
+      samples.each_with_index do |sample, i|
+        y = sample[@y_index]
+        @weights[i] *= Math.exp -(weak_classifier.alpha) * weak_classifier.classify(sample) * y
+        sum += @weights[i]
+      end
+      @weights.each_with_index do |_, i|
+          @weights[i] /= sum
+      end
+    end
+    def train samples
+        puts "boom2"
+      if Config::OVER_SAMPLING_TRAINING_SET
+        resampler = Resampler.new @y_index
+        resampler.over_sample samples
+      end
+      initialize_weights samples
+      0.upto @number_of_classifiers - 1 do |i|
+        puts "boom"
+        weak_classifier = @weak_learner.generate_weak_classifier samples, @weights
+        weak_classifier.compute_alpha
+        update_weights weak_classifier, samples
+        @weak_classifiers << weak_classifier
+      end
+    end
+    def classify sample
+      score = 0.0
+      @weak_classifiers.each do |weak_classifier|
+        score += weak_classifier.classify_with_alpha sample
+      end
+      score
+    end
+  end
+end

data/lib/adaboost/config.rb ADDED Viewed

@@ -0,0 +1,10 @@
+module AdaBoost
+  module Config
+    NUMBER_OF_RANDOM_CLASSIFIERS = 100
+    INCORPORATE_COST_SENSITIVE_LEARNING = true
+    OVER_SAMPLING_TRAINING_SET = false
+    USE_RANDOM_WEAK_CLASSIFIERS = false
+    USE_THRESHOLD_CLASSIFICATION = true
+  end
+end

data/lib/adaboost/contingency_table.rb ADDED Viewed

@@ -0,0 +1,182 @@
+module AdaBoost
+  class ContingencyTable
+    def initialize
+      @table = [[0, 0], [0, 0]]
+    end
+    def true_positive
+      @table[1][1]
+    end
+    def false_positive
+      @table[0][1]
+    end
+    def true_negative
+      @table[0][0]
+    end
+    def false_negative
+      @table[1][0]
+    end
+    def add_prediction y, h
+      @table[class_to_index(y)][class_to_index(h)] += 1
+    end
+    def outcome_positive
+      true_positive + false_positive
+    end
+    def outcome_negative
+      true_negative + false_negative
+    end
+    def total_population
+      @table[0][0] + @table[0][1] + @table[1][0] + @table[1][1]
+    end
+    def predicted_condition_positive
+      true_positive + false_positive
+    end
+    def predicted_condition_negative
+      false_negative + true_negative
+    end
+    def condition_positive
+      true_positive + false_negative
+    end
+    def condition_negative
+      false_positive + true_negative
+    end
+    def prevalence
+      condition_positive / total_population.to_f
+    end
+    def true_positive_rate
+      true_positive / condition_positive.to_f
+    end
+    def recall
+      true_positive_rate
+    end
+    def sensitivity
+      true_positive_rate
+    end
+    def false_positive_rate
+      false_positive / condition_negative.to_f
+    end
+    def fall_out
+      false_positive_rate
+    end
+    def false_negative_rate
+      false_negative / condition_positive.to_f
+    end
+    def true_negative_rate
+      true_negative / condition_negative.to_f
+    end
+    def specificity
+      true_negative_rate
+    end
+    def accuracy
+      (true_positive + true_negative) / total_population.to_f
+    end
+    def positive_predictive_value
+      true_positive / outcome_positive.to_f
+    end
+    def precision
+      positive_predictive_value
+    end
+    def false_discovery_rate
+      false_positive / outcome_positive.to_f
+    end
+    def false_omission_rate
+      false_negative / outcome_negative.to_f
+    end
+    def negative_predictive_value
+      true_negative / outcome_negative.to_f
+    end
+    def positive_likelihood_ratio
+      true_positive_rate / false_positive_rate.to_f
+    end
+    def negative_likelihood_ratio
+      false_negative_rate / true_negative_rate.to_f
+    end
+    def diagnostic_odds_ratio
+      positive_likelihood_ratio / negative_likelihood_ratio.to_f
+    end
+    def to_s
+      "\nTotal population: %d\t \
+      \nCondition positive: %d\t \
+      \nCondition negative: %d\t \
+      \nPredicted Condition positive: %d\t \
+      \nPredicted Condition negative: %d\t \
+      \nTrue positive: %d\t \
+      \nTrue negative: %d\t \
+      \nFalse Negative: %d\t \
+      \nFalse Positive: %d\t \
+      \nPrevalence = Σ Condition positive / Σ Total population: %f\t \
+      \nTrue positive rate (TPR) = Σ True positive / Σ Condition positive: %f\t \
+      \nFalse positive rate (FPR) = Σ False positive / Σ Condition negative: %f\t \
+      \nFalse negative rate (FNR) = Σ False negative / Σ Condition positive: %f\t \
+      \nTrue negative rate (TNR) = Σ True negative / Σ Condition negative: %f\t \
+      \nAccuracy (ACC) = Σ True positive \ Σ True negative / Σ Total population: %f\t \
+      \nPositive predictive value (PPV) = Σ True positive / Σ Test outcome positive: %f\t \
+      \nFalse discovery rate (FDR) = Σ False positive / Σ Test outcome positive: %f\t \
+      \nFalse omission rate (FOR) = Σ False negative / Σ Test outcome negative: %f\t \
+      \nNegative predictive value (NPV) = Σ True negative / Σ Test outcome negative: %f\t \
+      \nPositive likelihood ratio (LR\) = TPR / FPR: %f\t \
+      \nNegative likelihood ratio (LR−) = FNR / TNR: %f\t \
+      \nDiagnostic odds ratio (DOR) = LR+ / LR−: %f\t" %
+      [
+        total_population,
+        condition_positive,
+        condition_negative,
+        predicted_condition_positive,
+        predicted_condition_negative,
+        true_positive,
+        true_negative,
+        false_negative,
+        false_positive,
+        prevalence,
+        true_positive_rate,
+        false_positive_rate,
+        false_negative_rate,
+        true_negative_rate,
+        accuracy,
+        positive_predictive_value,
+        false_discovery_rate,
+        false_omission_rate,
+        negative_predictive_value,
+        positive_likelihood_ratio,
+        negative_likelihood_ratio,
+        diagnostic_odds_ratio
+      ]
+    end
+    def class_to_index k
+      k > 0 ? 1 : 0
+    end
+  end
+end

data/lib/adaboost/evaluator.rb ADDED Viewed

@@ -0,0 +1,66 @@
+module AdaBoost
+  class Evaluator
+    def initialize classifier
+      @classifier = classifier
+      @threshold = Float::MAX
+    end
+    def evaluate test_set
+      contingency_table = ContingencyTable.new
+      test_set.each do |sample|
+        y = sample[@classifier.y_index]
+        if Config::USE_THRESHOLD_CLASSIFICATION
+          h = classify_using_threshold sample
+        else
+          h = e.classify_normally sample
+        end
+        contingency_table.add_prediction y, h
+      end
+      contingency_table
+    end
+    def threshold
+      if @threshold == Float::MAX
+        @threshold = 0
+        @classifier.weak_classifiers.each do |weak_classifier|
+          @threshold += weak_classifier.alpha / 2.0
+        end
+      end
+      @threshold
+    end
+    def classify_using_threshold sample
+      score = 0.0
+      @classifier.weak_classifiers.each do |weak_classifier|
+        if sample[weak_classifier.feature_number] > weak_classifier.split
+          score += weak_classifier.alpha
+        end
+      end
+      score > threshold ? 1 : -1
+    end
+    def classify_normally sample
+      @classifier.classify(sample > 0) ? 1 : -1
+    end
+    def used_feature_numbers unique = false
+      used_feature_numbers = []
+      @classifier.weak_classifiers.each do |weak_classifier|
+        used_feature_numbers << weak_classifier.feature_number
+      end
+      unique ? used_feature_numbers.uniq : used_feature_numbers
+    end
+    def feature_occurrences
+      used_numbers = used_feature_numbers
+      occurrences = {}
+      used_numbers.each do |number|
+        occurrences[number] = 0 if occurrences[number].nil?
+        occurrences[number] += 1
+      end
+      occurrences
+    end
+  end
+end

data/lib/adaboost/features_analyzer.rb ADDED Viewed

@@ -0,0 +1,82 @@
+module AdaBoost
+    Analyze = Struct.new(:statistics, :distribution)
+    Distribution = Struct.new(:negative, :positive)
+    FeatureStatistic = Struct.new(:min, :max, :sum, :avg, :vrn, :std, :rng)
+    VariableRelations = Struct.new(:x, :y, :cov, :cor)
+  class FeaturesAnalyzer
+    def initialize y_index
+      @y_index = y_index
+    end
+    def analyze samples
+      statistics = []
+      distribution = Distribution.new 0, 0
+      number_of_samples = samples.size
+      if number_of_samples < 1
+        raise ArgumentError.new 'At least one sample is needed to analyze.'
+      end
+      number_of_features = @y_index
+      sample_size = samples[0].size
+      if number_of_features < 1 or sample_size < 2 or sample_size <= @y_index
+        raise ArgumentError.new 'At least 1 feature is needed to analyze.'
+      end
+      0.upto number_of_features - 1 do
+          statistics << FeatureStatistic.new(Float::MAX, -Float::MAX, 0, 0, 0, 0)
+      end
+      samples.each do |sample|
+          y = sample[@y_index]
+          if y == -1
+              distribution.negative += 1
+          else
+              distribution.positive += 1
+          end
+          0.upto number_of_features - 1 do |i|
+              statistic = statistics[i]
+              feature_value = sample[i]
+              if feature_value < statistic.min
+                  statistic.min = feature_value
+              end
+              if feature_value > statistic.max
+                  statistic.max = feature_value
+              end
+              statistic.sum += feature_value
+          end
+      end
+      statistics.each do |statistic|
+        statistic.avg = statistic.sum / number_of_samples.to_f
+        statistic.rng = (statistic.max - statistic.min).abs
+      end
+      samples.each do |sample|
+        statistics.each_with_index do |statistic, i|
+          feature_value = sample[i]
+          statistic.vrn += (statistic.avg - feature_value) ** 2
+        end
+      end
+      statistics.each do |statistic|
+        statistic.vrn /= (number_of_samples - 1).to_f
+        statistic.std = Math.sqrt statistic.vrn
+      end
+      analyze = Analyze.new
+      analyze.statistics = statistics
+      analyze.distribution = distribution
+      analyze
+    end
+    def relations x, y, samples, statistics
+      sum = 0.0
+      samples.each do |sample|
+        x_value = sample[x].to_f
+        y_value = sample[y].to_f
+        sum += (x_value - statistics[x].avg) * (y_value - statistics[y].avg)
+      end
+      cov = sum / (samples.size - 1).to_f
+      cor = cov / (statistics[x].std * statistics[y].std).to_f
+      VariableRelations.new x, y, cov, cor
+    end
+  end
+end

data/lib/adaboost/resampler.rb ADDED Viewed

@@ -0,0 +1,31 @@
+module AdaBoost
+  class Resampler
+    def initialize y_index
+      @y_index = y_index
+    end
+    def over_sample samples
+      distribution = distribution samples
+      y0 = distribution.negative
+      y1 = distribution.positive
+      majority = y0 < y1 ? 1.0 : -1.0
+      difference = (y0 - y1).abs
+      samples.each do |sample|
+        if difference <= 0
+          break
+        end
+        if sample[@y_index] != majority
+          samples << sample
+          difference -= 1
+        end
+      end
+    end
+    def distribution instances
+      analyzer = FeaturesAnalyzer.new @y_index
+      analyzer.analyze(instances).distribution
+    end
+  end
+end

data/lib/adaboost/weak_classifier.rb ADDED Viewed

@@ -0,0 +1,31 @@
+module AdaBoost
+  class WeakClassifier
+    attr_accessor :error
+    attr_reader :feature_number, :split, :alpha
+    def initialize feature_number, split
+      @feature_number = feature_number
+      @split = split
+      @error = 0.0
+      @alpha = 0.0
+    end
+    def compute_alpha
+      @alpha = 0.5 * Math.log((1.0 - @error) / @error)
+    end
+    def classify sample
+      sample[@feature_number] > @split ? 1 : -1
+    end
+    def classify_with_alpha sample
+      return classify(sample) * @alpha
+    end
+    def increase_error amount
+      @error += amount
+    end
+  end
+end

data/lib/adaboost/weak_learner.rb ADDED Viewed

@@ -0,0 +1,87 @@
+module AdaBoost
+  class WeakLearner
+    def initialize y_index
+      @y_index = y_index
+      @analyzer = FeaturesAnalyzer.new y_index
+      @classifiers_cache = []
+    end
+    def features_satistics samples
+       @analyzer.analyze(samples).statistics
+    end
+    def generate_weak_classifier samples, weights
+      number_of_samples = samples.size
+      if number_of_samples < 1
+        raise ArgumentError.new 'At least one sample is needed to generate.'
+      end
+      number_of_features = @y_index
+      sample_size = samples[0].size
+      if number_of_features < 1 or sample_size < 2 or sample_size <= @y_index
+        raise ArgumentError.new 'At least 1 feature is needed to generate.'
+      end
+      classifiers = []
+      if Config::USE_RANDOM_WEAK_CLASSIFIERS
+        classifiers = generate_random_classifiers samples, number_of_features
+      else
+        classifiers = generate_all_possible_classifiers samples, number_of_features
+      end
+      best_index = -1
+      best_error = Float::MAX
+      classifiers.each_with_index do |classifier, i|
+        classifier.error = 0.0
+        samples.each_with_index do |sample, j|
+          y = sample[@y_index]
+          if classifier.classify(sample).to_f != y
+            classifier.increase_error weights[j]
+          end
+        end
+        if classifier.error < best_error
+          best_error = classifier.error
+          best_index = i
+        end
+      end
+      best = classifiers[best_index]
+      if !Config::USE_RANDOM_WEAK_CLASSIFIERS
+        classifiers.delete_at best_index
+      end
+      best
+    end
+    def generate_random_classifiers samples, number_of_features
+      classifiers = []
+      statistics = features_satistics samples
+      0.upto Config::NUMBER_OF_RANDOM_CLASSIFIERS - 1 do
+          feature_number = rand number_of_features
+          info = statistics[feature_number]
+          split = rand * info.rng + info.min
+          classifiers << WeakClassifier.new(feature_number, split)
+      end
+      classifiers
+    end
+    def generate_all_possible_classifiers samples, number_of_features
+      if @classifiers_cache.size == 0
+        matrix = []
+        0.upto number_of_features - 1 do
+          matrix << []
+        end
+        samples.each do |sample|
+          0.upto number_of_features - 1 do |i|
+            sample_value = sample[i]
+            matrix[i] << sample_value
+          end
+        end
+        matrix.each_with_index do |entry, i|
+          entry = entry.uniq
+          entry.each do |uniq_value|
+            @classifiers_cache << WeakClassifier.new(i, uniq_value)
+          end
+        end
+      end
+      @classifiers_cache
+    end
+  end
+end

data/lib/adaboost.rb ADDED Viewed

@@ -0,0 +1,11 @@
+require 'adaboost/adaboost.rb'
+require 'adaboost/config.rb'
+require 'adaboost/contingency_table.rb'
+require 'adaboost/evaluator.rb'
+require 'adaboost/features_analyzer.rb'
+require 'adaboost/resampler.rb'
+require 'adaboost/weak_classifier.rb'
+require 'adaboost/weak_learner.rb'
+module AdaBoost
+end

metadata ADDED Viewed

@@ -0,0 +1,52 @@
+--- !ruby/object:Gem::Specification
+name: adaboost
+version: !ruby/object:Gem::Version
+  version: 0.0.1
+platform: ruby
+authors:
+- Dalmir da Silva
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2016-07-25 00:00:00.000000000 Z
+dependencies: []
+description: AdaBoost classifier!
+email: dalmirdasilva@gmail.com
+executables: []
+extensions: []
+extra_rdoc_files: []
+files:
+- lib/adaboost.rb
+- lib/adaboost/adaboost.rb
+- lib/adaboost/config.rb
+- lib/adaboost/contingency_table.rb
+- lib/adaboost/evaluator.rb
+- lib/adaboost/features_analyzer.rb
+- lib/adaboost/resampler.rb
+- lib/adaboost/weak_classifier.rb
+- lib/adaboost/weak_learner.rb
+homepage: http://dalmirdasilva.com/adaboost-classifier
+licenses:
+- MIT
+metadata: {}
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: '0'
+required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: '0'
+requirements: []
+rubyforge_project:
+rubygems_version: 2.5.1
+signing_key:
+specification_version: 4
+summary: AdaBoost classifier!
+test_files: []