RubyGems - rumale - Versions diffs - 0.18.1 → 0.18.2 - Mend

rumale 0.18.1 → 0.18.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

checksums.yaml +4 -4
data/.travis.yml +0 -1
data/CHANGELOG.md +16 -4
data/lib/rumale.rb +6 -1
data/lib/rumale/clustering/dbscan.rb +0 -17
data/lib/rumale/clustering/gaussian_mixture.rb +0 -21
data/lib/rumale/clustering/hdbscan.rb +0 -15
data/lib/rumale/clustering/k_means.rb +0 -17
data/lib/rumale/clustering/k_medoids.rb +0 -19
data/lib/rumale/clustering/power_iteration.rb +0 -19
data/lib/rumale/clustering/single_linkage.rb +0 -17
data/lib/rumale/clustering/spectral_clustering.rb +0 -17
data/lib/rumale/evaluation_measure/function.rb +34 -0
data/lib/rumale/kernel_approximation/rbf.rb +0 -19
data/lib/rumale/kernel_machine/kernel_pca.rb +0 -21
data/lib/rumale/kernel_machine/kernel_ridge.rb +0 -15
data/lib/rumale/kernel_machine/kernel_svc.rb +0 -21
data/lib/rumale/naive_bayes/base_naive_bayes.rb +47 -0
data/lib/rumale/naive_bayes/bernoulli_nb.rb +82 -0
data/lib/rumale/naive_bayes/complement_nb.rb +85 -0
data/lib/rumale/naive_bayes/gaussian_nb.rb +69 -0
data/lib/rumale/naive_bayes/multinomial_nb.rb +74 -0
data/lib/rumale/naive_bayes/negation_nb.rb +71 -0
data/lib/rumale/nearest_neighbors/k_neighbors_classifier.rb +0 -19
data/lib/rumale/nearest_neighbors/k_neighbors_regressor.rb +0 -17
data/lib/rumale/neural_network/adam.rb +0 -19
data/lib/rumale/preprocessing/bin_discretizer.rb +0 -15
data/lib/rumale/preprocessing/label_binarizer.rb +0 -15
data/lib/rumale/preprocessing/label_encoder.rb +0 -15
data/lib/rumale/preprocessing/max_abs_scaler.rb +0 -15
data/lib/rumale/preprocessing/min_max_scaler.rb +0 -17
data/lib/rumale/preprocessing/one_hot_encoder.rb +0 -19
data/lib/rumale/preprocessing/ordinal_encoder.rb +0 -13
data/lib/rumale/preprocessing/standard_scaler.rb +0 -15
data/lib/rumale/version.rb +1 -1
metadata +8 -3
data/lib/rumale/naive_bayes/naive_bayes.rb +0 -250

data/lib/rumale/naive_bayes/bernoulli_nb.rb ADDED

@@ -0,0 +1,82 @@
+# frozen_string_literal: true
+require 'rumale/naive_bayes/base_naive_bayes'
+module Rumale
+  module NaiveBayes
+    # BernoulliNB is a class that implements Bernoulli Naive Bayes classifier.
+    #
+    # @example
+    #   estimator = Rumale::NaiveBayes::BernoulliNB.new(smoothing_param: 1.0, bin_threshold: 0.0)
+    #   estimator.fit(training_samples, training_labels)
+    #   results = estimator.predict(testing_samples)
+    #
+    # *Reference*
+    # - C D. Manning, P. Raghavan, and H. Schutze, "Introduction to Information Retrieval," Cambridge University Press., 2008.
+    class BernoulliNB < BaseNaiveBayes
+      # Return the class labels.
+      # @return [Numo::Int32] (size: n_classes)
+      attr_reader :classes
+      # Return the prior probabilities of the classes.
+      # @return [Numo::DFloat] (shape: [n_classes])
+      attr_reader :class_priors
+      # Return the conditional probabilities for features of each class.
+      # @return [Numo::DFloat] (shape: [n_classes, n_features])
+      attr_reader :feature_probs
+      # Create a new classifier with Bernoulli Naive Bayes.
+      #
+      # @param smoothing_param [Float] The Laplace smoothing parameter.
+      # @param bin_threshold [Float] The threshold for binarizing of features.
+      def initialize(smoothing_param: 1.0, bin_threshold: 0.0)
+        check_params_numeric(smoothing_param: smoothing_param, bin_threshold: bin_threshold)
+        check_params_positive(smoothing_param: smoothing_param)
+        @params = {}
+        @params[:smoothing_param] = smoothing_param
+        @params[:bin_threshold] = bin_threshold
+      end
+      # Fit the model with given training data.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
+      # @param y [Numo::Int32] (shape: [n_samples]) The categorical variables (e.g. labels)
+      #   to be used for fitting the model.
+      # @return [BernoulliNB] The learned classifier itself.
+      def fit(x, y)
+        x = check_convert_sample_array(x)
+        y = check_convert_label_array(y)
+        check_sample_label_size(x, y)
+        n_samples, = x.shape
+        bin_x = Numo::DFloat[*x.gt(@params[:bin_threshold])]
+        @classes = Numo::Int32[*y.to_a.uniq.sort]
+        n_samples_each_class = Numo::DFloat[*@classes.to_a.map { |l| y.eq(l).count.to_f }]
+        @class_priors = n_samples_each_class / n_samples
+        count_features = Numo::DFloat[*@classes.to_a.map { |l| bin_x[y.eq(l).where, true].sum(0) }]
+        count_features += @params[:smoothing_param]
+        n_samples_each_class += 2.0 * @params[:smoothing_param]
+        n_classes = @classes.size
+        @feature_probs = count_features / n_samples_each_class.reshape(n_classes, 1)
+        self
+      end
+      # Calculate confidence scores for samples.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
+      # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
+      def decision_function(x)
+        x = check_convert_sample_array(x)
+        n_classes = @classes.size
+        bin_x = Numo::DFloat[*x.gt(@params[:bin_threshold])]
+        not_bin_x = Numo::DFloat[*x.le(@params[:bin_threshold])]
+        log_likelihoods = Array.new(n_classes) do |l|
+          Math.log(@class_priors[l]) + (
+            (Numo::DFloat[*bin_x] * Numo::NMath.log(@feature_probs[l, true])).sum(1)
+            (Numo::DFloat[*not_bin_x] * Numo::NMath.log(1.0 - @feature_probs[l, true])).sum(1))
+        end
+        Numo::DFloat[*log_likelihoods].transpose
+      end
+    end
+  end
+end

data/lib/rumale/naive_bayes/complement_nb.rb ADDED

@@ -0,0 +1,85 @@
+# frozen_string_literal: true
+require 'rumale/naive_bayes/base_naive_bayes'
+module Rumale
+  module NaiveBayes
+    # ComplementNB is a class that implements Complement Naive Bayes classifier.
+    #
+    # @example
+    #   estimator = Rumale::NaiveBayes::ComplementNB.new(smoothing_param: 1.0)
+    #   estimator.fit(training_samples, training_labels)
+    #   results = estimator.predict(testing_samples)
+    #
+    # *Reference*
+    # - Rennie, J. D. M., Shih, L., Teevan, J., and Karger, D. R., "Tackling the Poor Assumptions of Naive Bayes Text Classifiers," ICML' 03, pp. 616--623, 2013.
+    class ComplementNB < BaseNaiveBayes
+      # Return the class labels.
+      # @return [Numo::Int32] (size: n_classes)
+      attr_reader :classes
+      # Return the prior probabilities of the classes.
+      # @return [Numo::DFloat] (shape: [n_classes])
+      attr_reader :class_priors
+      # Return the conditional probabilities for features of each class.
+      # @return [Numo::DFloat] (shape: [n_classes, n_features])
+      attr_reader :feature_probs
+      # Create a new classifier with Complement Naive Bayes.
+      #
+      # @param smoothing_param [Float] The smoothing parameter.
+      # @param norm [Boolean] The flag indicating whether to normlize the weight vectors.
+      def initialize(smoothing_param: 1.0, norm: false)
+        check_params_numeric(smoothing_param: smoothing_param)
+        check_params_positive(smoothing_param: smoothing_param)
+        check_params_boolean(norm: norm)
+        @params = {}
+        @params[:smoothing_param] = smoothing_param
+        @params[:norm] = norm
+      end
+      # Fit the model with given training data.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
+      # @param y [Numo::Int32] (shape: [n_samples]) The categorical variables (e.g. labels)
+      #   to be used for fitting the model.
+      # @return [ComplementNB] The learned classifier itself.
+      def fit(x, y)
+        x = check_convert_sample_array(x)
+        y = check_convert_label_array(y)
+        check_sample_label_size(x, y)
+        n_samples, = x.shape
+        @classes = Numo::Int32[*y.to_a.uniq.sort]
+        @class_priors = Numo::DFloat[*@classes.to_a.map { |l| y.eq(l).count.fdiv(n_samples) }]
+        @class_log_probs = Numo::NMath.log(@class_priors)
+        compl_features = Numo::DFloat[*@classes.to_a.map { |l| x[y.ne(l).where, true].sum(0) }]
+        compl_features += @params[:smoothing_param]
+        n_classes = @classes.size
+        @feature_probs = compl_features / compl_features.sum(1).reshape(n_classes, 1)
+        feature_log_probs = Numo::NMath.log(@feature_probs)
+        @weights = if normalize?
+                     feature_log_probs / feature_log_probs.sum(1).reshape(n_classes, 1)
+                   else
+                     -feature_log_probs
+                   end
+        self
+      end
+      # Calculate confidence scores for samples.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
+      # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
+      def decision_function(x)
+        x = check_convert_sample_array(x)
+        @class_log_probs + x.dot(@weights.transpose)
+      end
+      private
+      def normalize?
+        @params[:norm] == true
+      end
+    end
+  end
+end

data/lib/rumale/naive_bayes/gaussian_nb.rb ADDED

@@ -0,0 +1,69 @@
+# frozen_string_literal: true
+require 'rumale/naive_bayes/base_naive_bayes'
+module Rumale
+  module NaiveBayes
+    # GaussianNB is a class that implements Gaussian Naive Bayes classifier.
+    #
+    # @example
+    #   estimator = Rumale::NaiveBayes::GaussianNB.new
+    #   estimator.fit(training_samples, training_labels)
+    #   results = estimator.predict(testing_samples)
+    class GaussianNB < BaseNaiveBayes
+      # Return the class labels.
+      # @return [Numo::Int32] (size: n_classes)
+      attr_reader :classes
+      # Return the prior probabilities of the classes.
+      # @return [Numo::DFloat] (shape: [n_classes])
+      attr_reader :class_priors
+      # Return the mean vectors of the classes.
+      # @return [Numo::DFloat] (shape: [n_classes, n_features])
+      attr_reader :means
+      # Return the variance vectors of the classes.
+      # @return [Numo::DFloat] (shape: [n_classes, n_features])
+      attr_reader :variances
+      # Create a new classifier with Gaussian Naive Bayes.
+      def initialize
+        @params = {}
+      end
+      # Fit the model with given training data.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
+      # @param y [Numo::Int32] (shape: [n_samples]) The categorical variables (e.g. labels)
+      #   to be used for fitting the model.
+      # @return [GaussianNB] The learned classifier itself.
+      def fit(x, y)
+        x = check_convert_sample_array(x)
+        y = check_convert_label_array(y)
+        check_sample_label_size(x, y)
+        n_samples, = x.shape
+        @classes = Numo::Int32[*y.to_a.uniq.sort]
+        @class_priors = Numo::DFloat[*@classes.to_a.map { |l| y.eq(l).count / n_samples.to_f }]
+        @means = Numo::DFloat[*@classes.to_a.map { |l| x[y.eq(l).where, true].mean(0) }]
+        @variances = Numo::DFloat[*@classes.to_a.map { |l| x[y.eq(l).where, true].var(0) }]
+        self
+      end
+      # Calculate confidence scores for samples.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
+      # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
+      def decision_function(x)
+        x = check_convert_sample_array(x)
+        n_classes = @classes.size
+        log_likelihoods = Array.new(n_classes) do |l|
+          Math.log(@class_priors[l]) - 0.5 * (
+            Numo::NMath.log(2.0 * Math::PI * @variances[l, true]) +
+            ((x - @means[l, true])**2 / @variances[l, true])).sum(1)
+        end
+        Numo::DFloat[*log_likelihoods].transpose
+      end
+    end
+  end
+end

data/lib/rumale/naive_bayes/multinomial_nb.rb ADDED

@@ -0,0 +1,74 @@
+# frozen_string_literal: true
+require 'rumale/naive_bayes/base_naive_bayes'
+module Rumale
+  module NaiveBayes
+    # MultinomialNB is a class that implements Multinomial Naive Bayes classifier.
+    #
+    # @example
+    #   estimator = Rumale::NaiveBayes::MultinomialNB.new(smoothing_param: 1.0)
+    #   estimator.fit(training_samples, training_labels)
+    #   results = estimator.predict(testing_samples)
+    #
+    # *Reference*
+    # - C D. Manning, P. Raghavan, and H. Schutze, "Introduction to Information Retrieval," Cambridge University Press., 2008.
+    class MultinomialNB < BaseNaiveBayes
+      # Return the class labels.
+      # @return [Numo::Int32] (size: n_classes)
+      attr_reader :classes
+      # Return the prior probabilities of the classes.
+      # @return [Numo::DFloat] (shape: [n_classes])
+      attr_reader :class_priors
+      # Return the conditional probabilities for features of each class.
+      # @return [Numo::DFloat] (shape: [n_classes, n_features])
+      attr_reader :feature_probs
+      # Create a new classifier with Multinomial Naive Bayes.
+      #
+      # @param smoothing_param [Float] The Laplace smoothing parameter.
+      def initialize(smoothing_param: 1.0)
+        check_params_numeric(smoothing_param: smoothing_param)
+        check_params_positive(smoothing_param: smoothing_param)
+        @params = {}
+        @params[:smoothing_param] = smoothing_param
+      end
+      # Fit the model with given training data.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
+      # @param y [Numo::Int32] (shape: [n_samples]) The categorical variables (e.g. labels)
+      #   to be used for fitting the model.
+      # @return [MultinomialNB] The learned classifier itself.
+      def fit(x, y)
+        x = check_convert_sample_array(x)
+        y = check_convert_label_array(y)
+        check_sample_label_size(x, y)
+        n_samples, = x.shape
+        @classes = Numo::Int32[*y.to_a.uniq.sort]
+        @class_priors = Numo::DFloat[*@classes.to_a.map { |l| y.eq(l).count / n_samples.to_f }]
+        count_features = Numo::DFloat[*@classes.to_a.map { |l| x[y.eq(l).where, true].sum(0) }]
+        count_features += @params[:smoothing_param]
+        n_classes = @classes.size
+        @feature_probs = count_features / count_features.sum(1).reshape(n_classes, 1)
+        self
+      end
+      # Calculate confidence scores for samples.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
+      # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
+      def decision_function(x)
+        x = check_convert_sample_array(x)
+        n_classes = @classes.size
+        bin_x = x.gt(0)
+        log_likelihoods = Array.new(n_classes) do |l|
+          Math.log(@class_priors[l]) + (Numo::DFloat[*bin_x] * Numo::NMath.log(@feature_probs[l, true])).sum(1)
+        end
+        Numo::DFloat[*log_likelihoods].transpose
+      end
+    end
+  end
+end

data/lib/rumale/naive_bayes/negation_nb.rb ADDED

@@ -0,0 +1,71 @@
+# frozen_string_literal: true
+require 'rumale/naive_bayes/base_naive_bayes'
+module Rumale
+  module NaiveBayes
+    # NegationNB is a class that implements Negation Naive Bayes classifier.
+    #
+    # @example
+    #   estimator = Rumale::NaiveBayes::NegationNB.new(smoothing_param: 1.0)
+    #   estimator.fit(training_samples, training_labels)
+    #   results = estimator.predict(testing_samples)
+    #
+    # *Reference*
+    # - Komiya, K., Sato, N., Fujimoto, K., and Kotani, Y., "Negation Naive Bayes for Categorization of Product Pages on the Web," RANLP' 11, pp. 586--592, 2011.
+    class NegationNB < BaseNaiveBayes
+      # Return the class labels.
+      # @return [Numo::Int32] (size: n_classes)
+      attr_reader :classes
+      # Return the prior probabilities of the classes.
+      # @return [Numo::DFloat] (shape: [n_classes])
+      attr_reader :class_priors
+      # Return the conditional probabilities for features of each class.
+      # @return [Numo::DFloat] (shape: [n_classes, n_features])
+      attr_reader :feature_probs
+      # Create a new classifier with Complement Naive Bayes.
+      #
+      # @param smoothing_param [Float] The smoothing parameter.
+      def initialize(smoothing_param: 1.0)
+        check_params_numeric(smoothing_param: smoothing_param)
+        check_params_positive(smoothing_param: smoothing_param)
+        @params = {}
+        @params[:smoothing_param] = smoothing_param
+      end
+      # Fit the model with given training data.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
+      # @param y [Numo::Int32] (shape: [n_samples]) The categorical variables (e.g. labels)
+      #   to be used for fitting the model.
+      # @return [ComplementNB] The learned classifier itself.
+      def fit(x, y)
+        x = check_convert_sample_array(x)
+        y = check_convert_label_array(y)
+        check_sample_label_size(x, y)
+        n_samples, = x.shape
+        @classes = Numo::Int32[*y.to_a.uniq.sort]
+        @class_priors = Numo::DFloat[*@classes.to_a.map { |l| y.eq(l).count.fdiv(n_samples) }]
+        @class_log_probs = Numo::NMath.log(1 / (1 - @class_priors))
+        compl_features = Numo::DFloat[*@classes.to_a.map { |l| x[y.ne(l).where, true].sum(0) }]
+        compl_features += @params[:smoothing_param]
+        n_classes = @classes.size
+        @feature_probs = compl_features / compl_features.sum(1).reshape(n_classes, 1)
+        @weights = Numo::NMath.log(@feature_probs)
+        self
+      end
+      # Calculate confidence scores for samples.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
+      # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
+      def decision_function(x)
+        x = check_convert_sample_array(x)
+        @class_log_probs - x.dot(@weights.transpose)
+      end
+    end
+  end
+end

data/lib/rumale/nearest_neighbors/k_neighbors_classifier.rb CHANGED

@@ -127,25 +127,6 @@ module Rumale
         n_samples = x.shape[0]
         Numo::Int32.asarray(Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] })
       end
-      # Dump marshal data.
-      # @return [Hash] The marshal data about KNeighborsClassifier.
-      def marshal_dump
-        { params: @params,
-          prototypes: @prototypes,
-          labels: @labels,
-          classes: @classes }
-      end
-      # Load marshal data.
-      # @return [nil]
-      def marshal_load(obj)
-        @params = obj[:params]
-        @prototypes = obj[:prototypes]
-        @labels = obj[:labels]
-        @classes = obj[:classes]
-        nil
-      end
     end
   end
 end

data/lib/rumale/nearest_neighbors/k_neighbors_regressor.rb CHANGED

@@ -101,23 +101,6 @@ module Rumale
         end
         Numo::DFloat[*predicted_values]
       end
-      # Dump marshal data.
-      # @return [Hash] The marshal data about KNeighborsRegressor.
-      def marshal_dump
-        { params: @params,
-          prototypes: @prototypes,
-          values: @values }
-      end
-      # Load marshal data.
-      # @return [nil]
-      def marshal_load(obj)
-        @params = obj[:params]
-        @prototypes = obj[:prototypes]
-        @values = obj[:values]
-        nil
-      end
     end
   end
 end

data/lib/rumale/neural_network/adam.rb CHANGED

@@ -50,25 +50,6 @@ module Rumale
           weight - @params[:learning_rate] * nm_fst_moment / (nm_sec_moment**0.5 + 1e-8)
         end
-        # Dump marshal data.
-        # @return [Hash] The marshal data.
-        # def marshal_dump
-        #  { params: @params,
-        #    fst_moment: @fst_moment,
-        #    sec_moment: @sec_moment,
-        #    iter: @iter }
-        # end
-        # Load marshal data.
-        # @return [nil]
-        # def marshal_load(obj)
-        #  @params = obj[:params]
-        #  @fst_moment = obj[:fst_moment]
-        #  @sec_moment = obj[:sec_moment]
-        #  @iter = obj[:iter]
-        #  nil
-        # end
       end
     end
   end