RubyGems - rumale - Versions diffs - 0.18.1 → 0.18.2 - Mend

rumale 0.18.1 → 0.18.2

Files changed (37) hide show

checksums.yaml +4 -4
data/.travis.yml +0 -1
data/CHANGELOG.md +16 -4
data/lib/rumale.rb +6 -1
data/lib/rumale/clustering/dbscan.rb +0 -17
data/lib/rumale/clustering/gaussian_mixture.rb +0 -21
data/lib/rumale/clustering/hdbscan.rb +0 -15
data/lib/rumale/clustering/k_means.rb +0 -17
data/lib/rumale/clustering/k_medoids.rb +0 -19
data/lib/rumale/clustering/power_iteration.rb +0 -19
data/lib/rumale/clustering/single_linkage.rb +0 -17
data/lib/rumale/clustering/spectral_clustering.rb +0 -17
data/lib/rumale/evaluation_measure/function.rb +34 -0
data/lib/rumale/kernel_approximation/rbf.rb +0 -19
data/lib/rumale/kernel_machine/kernel_pca.rb +0 -21
data/lib/rumale/kernel_machine/kernel_ridge.rb +0 -15
data/lib/rumale/kernel_machine/kernel_svc.rb +0 -21
data/lib/rumale/naive_bayes/base_naive_bayes.rb +47 -0
data/lib/rumale/naive_bayes/bernoulli_nb.rb +82 -0
data/lib/rumale/naive_bayes/complement_nb.rb +85 -0
data/lib/rumale/naive_bayes/gaussian_nb.rb +69 -0
data/lib/rumale/naive_bayes/multinomial_nb.rb +74 -0
data/lib/rumale/naive_bayes/negation_nb.rb +71 -0
data/lib/rumale/nearest_neighbors/k_neighbors_classifier.rb +0 -19
data/lib/rumale/nearest_neighbors/k_neighbors_regressor.rb +0 -17
data/lib/rumale/neural_network/adam.rb +0 -19
data/lib/rumale/preprocessing/bin_discretizer.rb +0 -15
data/lib/rumale/preprocessing/label_binarizer.rb +0 -15
data/lib/rumale/preprocessing/label_encoder.rb +0 -15
data/lib/rumale/preprocessing/max_abs_scaler.rb +0 -15
data/lib/rumale/preprocessing/min_max_scaler.rb +0 -17
data/lib/rumale/preprocessing/one_hot_encoder.rb +0 -19
data/lib/rumale/preprocessing/ordinal_encoder.rb +0 -13
data/lib/rumale/preprocessing/standard_scaler.rb +0 -15
data/lib/rumale/version.rb +1 -1
metadata +8 -3
data/lib/rumale/naive_bayes/naive_bayes.rb +0 -250

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 1b344bc829bda6e2b5f60baba450f3c38d5f90ebf0c0cfccd02d2894189d540e
-  data.tar.gz: 91fcce138ced31e94363b6f137bd66b6e1637d1fb5d03a1b1b531a3e1d2a3502
+  metadata.gz: 4028734b509ce8a05301fe152bdcc2a26a0e318e692a866d3bc1669ca7e5f859
+  data.tar.gz: c2dfc3614f786b59ce72a96adc2ca16b140a72e42f4851cb2cba8cebb3cab8dc
 SHA512:
-  metadata.gz: 2d2176c440222ba9b1265ce97b575649670c4067da4ddd51529b219ba63d8e15852227843474ae40c8af328d7c366b5ee49f63dac5982c0e1db96157315dd256
-  data.tar.gz: 2dc2d4d16ed22e837e603cd2b7c50a06e3c57d3c5882f47df671ea096aadbab6cae8c0ca50d940f9545d59163fa3034a667d80df84f600900b2c29c7b519dded
+  metadata.gz: 53b09b0eb4f783e5c9980023ad90749cee0ad7b77de590b0622a97bbc4899d96fdd6b3181a287026c547925d0938af925754c81a6eca5e9aca5746fb65699632
+  data.tar.gz: 5f45554a4d2fb0486a2c84a317c8faf1ca98a001c175380e12efcaba9ea1a909c337b9674f6d094ca25df2f9fdb372b7e460631a4fa99d19bce68e10200dc213

data/.travis.yml CHANGED

@@ -1,4 +1,3 @@
-sudo: false
 os: linux
 dist: xenial
 language: ruby

data/CHANGELOG.md CHANGED

@@ -1,12 +1,24 @@
+# 0.18.2
+- Change file composition of naive bayes classifiers.
+- Add classifier class for [ComplementNaiveBayes](https://yoshoku.github.io/rumale/doc/Rumale/NaiveBayes/ComplementNB.html).
+- Add classifier class for [NegationNaiveBayes](https://yoshoku.github.io/rumale/doc/Rumale/NaiveBayes/NegationNB.html).
+- Add [module function](https://yoshoku.github.io/rumale/doc/Rumale/EvaluationMeasure.html#confusion_matrix-class_method) for calculating confusion matrix.
+- Delete unneeded marshal dump and load methods.
+  - [Clustering](https://yoshoku.github.io/rumale/doc/Rumale/Clustering.html),
+  [KernelApproximation](https://yoshoku.github.io/rumale/doc/Rumale/KernelApproximation.html),
+  [KernelMachine](https://yoshoku.github.io/rumale/doc/Rumale/KernelMachine.html),
+  [NearestNeighbors](https://yoshoku.github.io/rumale/doc/Rumale/NearestNeighbors.html),
+  [Preprocessing](https://yoshoku.github.io/rumale/doc/Rumale/Preprocessing.html).
 # 0.18.1
 - Add [module function](https://yoshoku.github.io/rumale/doc/Rumale/EvaluationMeasure.html#classification_report-class_method) for generating summary of classification performance.
 - Delete marshal dump and load methods for documentation.
   The marshal methods are written in estimator classes for indicating on API documentation that the learned model can be saved with Marshal.
   Even without these methods, Marshal can save the learned model, so they are deleted sequentially.
-  - [Manifold](https://yoshoku.github.io/rumale/doc/Rumale/Manifold.html)
-  - [NaiveBayes](https://yoshoku.github.io/rumale/doc/Rumale/NaiveBayes.html)
-  - [PolynomialModel](https://yoshoku.github.io/rumale/doc/Rumale/PolynomialModel.html)
-  - [Decomposition](https://yoshoku.github.io/doc/Rumale/Decomposition.html)
+  - [Manifold](https://yoshoku.github.io/rumale/doc/Rumale/Manifold.html),
+  [NaiveBayes](https://yoshoku.github.io/rumale/doc/Rumale/NaiveBayes.html),
+  [PolynomialModel](https://yoshoku.github.io/rumale/doc/Rumale/PolynomialModel.html),
+  [Decomposition](https://yoshoku.github.io/doc/Rumale/Decomposition.html).
 # 0.18.0
 - Add transformer class for [FisherDiscriminantAnalysis](https://yoshoku.github.io/rumale/doc/Rumale/MetricLearning/FisherDiscriminantAnalysis.html).

data/lib/rumale.rb CHANGED

@@ -47,7 +47,12 @@ require 'rumale/multiclass/one_vs_rest_classifier'
 require 'rumale/nearest_neighbors/vp_tree'
 require 'rumale/nearest_neighbors/k_neighbors_classifier'
 require 'rumale/nearest_neighbors/k_neighbors_regressor'
-require 'rumale/naive_bayes/naive_bayes'
+require 'rumale/naive_bayes/base_naive_bayes'
+require 'rumale/naive_bayes/bernoulli_nb'
+require 'rumale/naive_bayes/complement_nb'
+require 'rumale/naive_bayes/gaussian_nb'
+require 'rumale/naive_bayes/multinomial_nb'
+require 'rumale/naive_bayes/negation_nb'
 require 'rumale/tree/node'
 require 'rumale/tree/base_decision_tree'
 require 'rumale/tree/decision_tree_classifier'

data/lib/rumale/clustering/dbscan.rb CHANGED

@@ -70,23 +70,6 @@ module Rumale
         labels
       end
-      # Dump marshal data.
-      # @return [Hash] The marshal data.
-      def marshal_dump
-        { params: @params,
-          core_sample_ids: @core_sample_ids,
-          labels: @labels }
-      end
-      # Load marshal data.
-      # @return [nil]
-      def marshal_load(obj)
-        @params = obj[:params]
-        @core_sample_ids = obj[:core_sample_ids]
-        @labels = obj[:labels]
-        nil
-      end
       private
       def partial_fit(x)

data/lib/rumale/clustering/gaussian_mixture.rb CHANGED

@@ -114,27 +114,6 @@ module Rumale
         fit(x).predict(x)
       end
-      # Dump marshal data.
-      # @return [Hash] The marshal data.
-      def marshal_dump
-        { params: @params,
-          n_iter: @n_iter,
-          weights: @weights,
-          means: @means,
-          covariances: @covariances }
-      end
-      # Load marshal data.
-      # @return [nil]
-      def marshal_load(obj)
-        @params = obj[:params]
-        @n_iter = obj[:n_iter]
-        @weights = obj[:weights]
-        @means = obj[:means]
-        @covariances = obj[:covariances]
-        nil
-      end
       private
       def assign_cluster(memberships)

data/lib/rumale/clustering/hdbscan.rb CHANGED

@@ -71,21 +71,6 @@ module Rumale
         @labels = partial_fit(distance_mat)
       end
-      # Dump marshal data.
-      # @return [Hash] The marshal data.
-      def marshal_dump
-        { params: @params,
-          labels: @labels }
-      end
-      # Load marshal data.
-      # @return [nil]
-      def marshal_load(obj)
-        @params = obj[:params]
-        @labels = obj[:labels]
-        nil
-      end
       private
       # @!visibility private

data/lib/rumale/clustering/k_means.rb CHANGED

@@ -92,23 +92,6 @@ module Rumale
         predict(x)
       end
-      # Dump marshal data.
-      # @return [Hash] The marshal data.
-      def marshal_dump
-        { params: @params,
-          cluster_centers: @cluster_centers,
-          rng: @rng }
-      end
-      # Load marshal data.
-      # @return [nil]
-      def marshal_load(obj)
-        @params = obj[:params]
-        @cluster_centers = obj[:cluster_centers]
-        @rng = obj[:rng]
-        nil
-      end
       private
       def assign_cluster(x)

data/lib/rumale/clustering/k_medoids.rb CHANGED

@@ -111,25 +111,6 @@ module Rumale
         end
       end
-      # Dump marshal data.
-      # @return [Hash] The marshal data.
-      def marshal_dump
-        { params: @params,
-          medoid_ids: @medoid_ids,
-          cluster_centers: @cluster_centers,
-          rng: @rng }
-      end
-      # Load marshal data.
-      # @return [nil]
-      def marshal_load(obj)
-        @params = obj[:params]
-        @medoid_ids = obj[:medoid_ids]
-        @cluster_centers = obj[:cluster_centers]
-        @rng = obj[:rng]
-        nil
-      end
       private
       def assign_cluster(distances_to_medoids)

data/lib/rumale/clustering/power_iteration.rb CHANGED

@@ -89,25 +89,6 @@ module Rumale
         @labels = line_kmeans_clustering(@embedding)
       end
-      # Dump marshal data.
-      # @return [Hash] The marshal data.
-      def marshal_dump
-        { params: @params,
-          embedding: @embedding,
-          labels: @labels,
-          n_iter: @n_iter }
-      end
-      # Load marshal data.
-      # @return [nil]
-      def marshal_load(obj)
-        @params = obj[:params]
-        @embedding = obj[:embedding]
-        @labels = obj[:labels]
-        @n_iter = obj[:n_iter]
-        nil
-      end
       private
       def embedded_space(affinity_mat, max_iter, tol)

data/lib/rumale/clustering/single_linkage.rb CHANGED

@@ -70,23 +70,6 @@ module Rumale
         @labels = partial_fit(distance_mat)
       end
-      # Dump marshal data.
-      # @return [Hash] The marshal data.
-      def marshal_dump
-        { params: @params,
-          labels: @labels,
-          hierarchy: @hierarchy }
-      end
-      # Load marshal data.
-      # @return [nil]
-      def marshal_load(obj)
-        @params = obj[:params]
-        @labels = obj[:labels]
-        @hierarchy = obj[:hierarchy]
-        nil
-      end
       private
       # @!visibility private

data/lib/rumale/clustering/spectral_clustering.rb CHANGED

@@ -92,23 +92,6 @@ module Rumale
         @labels = kmeans_clustering(normalized_embedding)
       end
-      # Dump marshal data.
-      # @return [Hash] The marshal data.
-      def marshal_dump
-        { params: @params,
-          embedding: @embedding,
-          labels: @labels }
-      end
-      # Load marshal data.
-      # @return [nil]
-      def marshal_load(obj)
-        @params = obj[:params]
-        @embedding = obj[:embedding]
-        @labels = obj[:labels]
-        nil
-      end
       private
       def embedded_space(affinity_mat, n_clusters)

data/lib/rumale/evaluation_measure/function.rb CHANGED

@@ -8,6 +8,40 @@ module Rumale
   module EvaluationMeasure
     module_function
+    # Calculate confusion matrix for evaluating classification performance.
+    #
+    # @example
+    #   y_true = Numo::Int32[2, 0, 2, 2, 0, 1]
+    #   y_pred = Numo::Int32[0, 0, 2, 2, 0, 2]
+    #   p confusion_matrix(y_true, y_pred)
+    #
+    #   # Numo::Int32#shape=[3,3]
+    #   # [[2, 0, 0],
+    #   #  [0, 0, 1],
+    #   #  [1, 0, 2]]
+    #
+    # @param y_true [Numo::Int32] (shape: [n_samples]) The ground truth labels.
+    # @param y_pred [Numo::Int32] (shape: [n_samples]) The predicted labels.
+    # @return [Numo::Int32] (shape: [n_classes, n_classes]) The confusion matrix.
+    def confusion_matrix(y_true, y_pred)
+      y_true = Rumale::Validation.check_convert_label_array(y_true)
+      y_pred = Rumale::Validation.check_convert_label_array(y_pred)
+      labels = y_true.to_a.uniq.sort
+      n_labels = labels.size
+      conf_mat = Numo::Int32.zeros(n_labels, n_labels)
+      labels.each_with_index do |lbl_a, i|
+        y_p = y_pred[y_true.eq(lbl_a)]
+        labels.each_with_index do |lbl_b, j|
+          conf_mat[i, j] = y_p.eq(lbl_b).count
+        end
+      end
+      conf_mat
+    end
     # Output a summary of classification performance for each class.
     #
     # @example

data/lib/rumale/kernel_approximation/rbf.rb CHANGED

@@ -97,25 +97,6 @@ module Rumale
         projection = x.dot(@random_mat) + @random_vec.tile(n_samples, 1)
         Numo::NMath.sin(projection) * ((2.0 / @params[:n_components])**0.5)
       end
-      # Dump marshal data.
-      # @return [Hash] The marshal data about RBF.
-      def marshal_dump
-        { params: @params,
-          random_mat: @random_mat,
-          random_vec: @random_vec,
-          rng: @rng }
-      end
-      # Load marshal data.
-      # @return [nil]
-      def marshal_load(obj)
-        @params = obj[:params]
-        @random_mat = obj[:random_mat]
-        @random_vec = obj[:random_vec]
-        @rng = obj[:rng]
-        nil
-      end
     end
   end
 end

data/lib/rumale/kernel_machine/kernel_pca.rb CHANGED

@@ -91,27 +91,6 @@ module Rumale
         transformed = centered_kernel_mat.dot(transform_mat)
         @params[:n_components] == 1 ? transformed[true, 0].dup : transformed
       end
-      # Dump marshal data.
-      # @return [Hash] The marshal data.
-      def marshal_dump
-        { params: @params,
-          row_mean: @row_mean,
-          all_mean: @all_mean,
-          alphas: @alphas,
-          lambdas: @lambdas }
-      end
-      # Load marshal data.
-      # @return [nil]
-      def marshal_load(obj)
-        @params = obj[:params]
-        @row_mean = obj[:row_mean]
-        @all_mean = obj[:all_mean]
-        @alphas = obj[:alphas]
-        @lambdas = obj[:lambdas]
-        nil
-      end
     end
   end
 end

data/lib/rumale/kernel_machine/kernel_ridge.rb CHANGED

@@ -75,21 +75,6 @@ module Rumale
         x = check_convert_sample_array(x)
         x.dot(@weight_vec)
       end
-      # Dump marshal data.
-      # @return [Hash] The marshal data.
-      def marshal_dump
-        { params: @params,
-          weight_vec: @weight_vec }
-      end
-      # Load marshal data.
-      # @return [nil]
-      def marshal_load(obj)
-        @params = obj[:params]
-        @weight_vec = obj[:weight_vec]
-        nil
-      end
     end
   end
 end

data/lib/rumale/kernel_machine/kernel_svc.rb CHANGED

@@ -161,27 +161,6 @@ module Rumale
         probs
       end
-      # Dump marshal data.
-      # @return [Hash] The marshal data about KernelSVC.
-      def marshal_dump
-        { params: @params,
-          weight_vec: @weight_vec,
-          prob_param: @prob_param,
-          classes: @classes,
-          rng: @rng }
-      end
-      # Load marshal data.
-      # @return [nil]
-      def marshal_load(obj)
-        @params = obj[:params]
-        @weight_vec = obj[:weight_vec]
-        @prob_param = obj[:prob_param]
-        @classes = obj[:classes]
-        @rng = obj[:rng]
-        nil
-      end
       private
       def partial_fit(x, bin_y)

data/lib/rumale/naive_bayes/base_naive_bayes.rb ADDED

@@ -0,0 +1,47 @@
+# frozen_string_literal: true
+require 'rumale/base/base_estimator'
+require 'rumale/base/classifier'
+module Rumale
+  # This module consists of the classes that implement naive bayes models.
+  module NaiveBayes
+    # BaseNaiveBayes is a class that has methods for common processes of naive bayes classifier.
+    # This class is used internally.
+    class BaseNaiveBayes
+      include Base::BaseEstimator
+      include Base::Classifier
+      # Predict class labels for samples.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
+      # @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
+      def predict(x)
+        x = check_convert_sample_array(x)
+        n_samples = x.shape.first
+        decision_values = decision_function(x)
+        Numo::Int32.asarray(Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] })
+      end
+      # Predict log-probability for samples.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the log-probailities.
+      # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted log-probability of each class per sample.
+      def predict_log_proba(x)
+        x = check_convert_sample_array(x)
+        n_samples, = x.shape
+        log_likelihoods = decision_function(x)
+        log_likelihoods - Numo::NMath.log(Numo::NMath.exp(log_likelihoods).sum(1)).reshape(n_samples, 1)
+      end
+      # Predict probability for samples.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
+      # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
+      def predict_proba(x)
+        x = check_convert_sample_array(x)
+        Numo::NMath.exp(predict_log_proba(x)).abs
+      end
+    end
+  end
+end