RubyGems - rumale - Versions diffs - 0.23.3 → 0.24.0 - Mend

rumale 0.23.3 → 0.24.0

Files changed (142) hide show

checksums.yaml +4 -4
data/LICENSE.txt +5 -1
data/README.md +3 -288
data/lib/rumale/version.rb +1 -1
data/lib/rumale.rb +20 -131
metadata +252 -150
data/CHANGELOG.md +0 -643
data/CODE_OF_CONDUCT.md +0 -74
data/ext/rumale/extconf.rb +0 -37
data/ext/rumale/rumaleext.c +0 -545
data/ext/rumale/rumaleext.h +0 -12
data/lib/rumale/base/base_estimator.rb +0 -49
data/lib/rumale/base/classifier.rb +0 -36
data/lib/rumale/base/cluster_analyzer.rb +0 -31
data/lib/rumale/base/evaluator.rb +0 -17
data/lib/rumale/base/regressor.rb +0 -36
data/lib/rumale/base/splitter.rb +0 -21
data/lib/rumale/base/transformer.rb +0 -22
data/lib/rumale/clustering/dbscan.rb +0 -123
data/lib/rumale/clustering/gaussian_mixture.rb +0 -218
data/lib/rumale/clustering/hdbscan.rb +0 -291
data/lib/rumale/clustering/k_means.rb +0 -122
data/lib/rumale/clustering/k_medoids.rb +0 -141
data/lib/rumale/clustering/mini_batch_k_means.rb +0 -139
data/lib/rumale/clustering/power_iteration.rb +0 -127
data/lib/rumale/clustering/single_linkage.rb +0 -203
data/lib/rumale/clustering/snn.rb +0 -76
data/lib/rumale/clustering/spectral_clustering.rb +0 -115
data/lib/rumale/dataset.rb +0 -246
data/lib/rumale/decomposition/factor_analysis.rb +0 -150
data/lib/rumale/decomposition/fast_ica.rb +0 -188
data/lib/rumale/decomposition/nmf.rb +0 -124
data/lib/rumale/decomposition/pca.rb +0 -159
data/lib/rumale/ensemble/ada_boost_classifier.rb +0 -179
data/lib/rumale/ensemble/ada_boost_regressor.rb +0 -160
data/lib/rumale/ensemble/extra_trees_classifier.rb +0 -139
data/lib/rumale/ensemble/extra_trees_regressor.rb +0 -125
data/lib/rumale/ensemble/gradient_boosting_classifier.rb +0 -306
data/lib/rumale/ensemble/gradient_boosting_regressor.rb +0 -237
data/lib/rumale/ensemble/random_forest_classifier.rb +0 -189
data/lib/rumale/ensemble/random_forest_regressor.rb +0 -153
data/lib/rumale/ensemble/stacking_classifier.rb +0 -215
data/lib/rumale/ensemble/stacking_regressor.rb +0 -163
data/lib/rumale/ensemble/voting_classifier.rb +0 -126
data/lib/rumale/ensemble/voting_regressor.rb +0 -82
data/lib/rumale/evaluation_measure/accuracy.rb +0 -29
data/lib/rumale/evaluation_measure/adjusted_rand_score.rb +0 -74
data/lib/rumale/evaluation_measure/calinski_harabasz_score.rb +0 -56
data/lib/rumale/evaluation_measure/davies_bouldin_score.rb +0 -53
data/lib/rumale/evaluation_measure/explained_variance_score.rb +0 -39
data/lib/rumale/evaluation_measure/f_score.rb +0 -50
data/lib/rumale/evaluation_measure/function.rb +0 -147
data/lib/rumale/evaluation_measure/log_loss.rb +0 -45
data/lib/rumale/evaluation_measure/mean_absolute_error.rb +0 -29
data/lib/rumale/evaluation_measure/mean_squared_error.rb +0 -29
data/lib/rumale/evaluation_measure/mean_squared_log_error.rb +0 -29
data/lib/rumale/evaluation_measure/median_absolute_error.rb +0 -30
data/lib/rumale/evaluation_measure/mutual_information.rb +0 -49
data/lib/rumale/evaluation_measure/normalized_mutual_information.rb +0 -53
data/lib/rumale/evaluation_measure/precision.rb +0 -50
data/lib/rumale/evaluation_measure/precision_recall.rb +0 -96
data/lib/rumale/evaluation_measure/purity.rb +0 -40
data/lib/rumale/evaluation_measure/r2_score.rb +0 -43
data/lib/rumale/evaluation_measure/recall.rb +0 -50
data/lib/rumale/evaluation_measure/roc_auc.rb +0 -130
data/lib/rumale/evaluation_measure/silhouette_score.rb +0 -82
data/lib/rumale/feature_extraction/feature_hasher.rb +0 -110
data/lib/rumale/feature_extraction/hash_vectorizer.rb +0 -155
data/lib/rumale/feature_extraction/tfidf_transformer.rb +0 -113
data/lib/rumale/kernel_approximation/nystroem.rb +0 -126
data/lib/rumale/kernel_approximation/rbf.rb +0 -102
data/lib/rumale/kernel_machine/kernel_fda.rb +0 -120
data/lib/rumale/kernel_machine/kernel_pca.rb +0 -97
data/lib/rumale/kernel_machine/kernel_ridge.rb +0 -82
data/lib/rumale/kernel_machine/kernel_ridge_classifier.rb +0 -92
data/lib/rumale/kernel_machine/kernel_svc.rb +0 -193
data/lib/rumale/linear_model/base_sgd.rb +0 -285
data/lib/rumale/linear_model/elastic_net.rb +0 -119
data/lib/rumale/linear_model/lasso.rb +0 -115
data/lib/rumale/linear_model/linear_regression.rb +0 -201
data/lib/rumale/linear_model/logistic_regression.rb +0 -275
data/lib/rumale/linear_model/nnls.rb +0 -137
data/lib/rumale/linear_model/ridge.rb +0 -209
data/lib/rumale/linear_model/svc.rb +0 -213
data/lib/rumale/linear_model/svr.rb +0 -132
data/lib/rumale/manifold/mds.rb +0 -155
data/lib/rumale/manifold/tsne.rb +0 -222
data/lib/rumale/metric_learning/fisher_discriminant_analysis.rb +0 -113
data/lib/rumale/metric_learning/mlkr.rb +0 -161
data/lib/rumale/metric_learning/neighbourhood_component_analysis.rb +0 -167
data/lib/rumale/model_selection/cross_validation.rb +0 -125
data/lib/rumale/model_selection/function.rb +0 -42
data/lib/rumale/model_selection/grid_search_cv.rb +0 -225
data/lib/rumale/model_selection/group_k_fold.rb +0 -93
data/lib/rumale/model_selection/group_shuffle_split.rb +0 -115
data/lib/rumale/model_selection/k_fold.rb +0 -81
data/lib/rumale/model_selection/shuffle_split.rb +0 -90
data/lib/rumale/model_selection/stratified_k_fold.rb +0 -99
data/lib/rumale/model_selection/stratified_shuffle_split.rb +0 -118
data/lib/rumale/model_selection/time_series_split.rb +0 -91
data/lib/rumale/multiclass/one_vs_rest_classifier.rb +0 -83
data/lib/rumale/naive_bayes/base_naive_bayes.rb +0 -47
data/lib/rumale/naive_bayes/bernoulli_nb.rb +0 -82
data/lib/rumale/naive_bayes/complement_nb.rb +0 -85
data/lib/rumale/naive_bayes/gaussian_nb.rb +0 -69
data/lib/rumale/naive_bayes/multinomial_nb.rb +0 -74
data/lib/rumale/naive_bayes/negation_nb.rb +0 -71
data/lib/rumale/nearest_neighbors/k_neighbors_classifier.rb +0 -133
data/lib/rumale/nearest_neighbors/k_neighbors_regressor.rb +0 -108
data/lib/rumale/nearest_neighbors/vp_tree.rb +0 -132
data/lib/rumale/neural_network/adam.rb +0 -56
data/lib/rumale/neural_network/base_mlp.rb +0 -248
data/lib/rumale/neural_network/mlp_classifier.rb +0 -120
data/lib/rumale/neural_network/mlp_regressor.rb +0 -90
data/lib/rumale/pairwise_metric.rb +0 -152
data/lib/rumale/pipeline/feature_union.rb +0 -69
data/lib/rumale/pipeline/pipeline.rb +0 -175
data/lib/rumale/preprocessing/bin_discretizer.rb +0 -93
data/lib/rumale/preprocessing/binarizer.rb +0 -60
data/lib/rumale/preprocessing/kernel_calculator.rb +0 -92
data/lib/rumale/preprocessing/l1_normalizer.rb +0 -62
data/lib/rumale/preprocessing/l2_normalizer.rb +0 -63
data/lib/rumale/preprocessing/label_binarizer.rb +0 -89
data/lib/rumale/preprocessing/label_encoder.rb +0 -79
data/lib/rumale/preprocessing/max_abs_scaler.rb +0 -61
data/lib/rumale/preprocessing/max_normalizer.rb +0 -62
data/lib/rumale/preprocessing/min_max_scaler.rb +0 -76
data/lib/rumale/preprocessing/one_hot_encoder.rb +0 -100
data/lib/rumale/preprocessing/ordinal_encoder.rb +0 -109
data/lib/rumale/preprocessing/polynomial_features.rb +0 -109
data/lib/rumale/preprocessing/standard_scaler.rb +0 -71
data/lib/rumale/probabilistic_output.rb +0 -114
data/lib/rumale/tree/base_decision_tree.rb +0 -150
data/lib/rumale/tree/decision_tree_classifier.rb +0 -150
data/lib/rumale/tree/decision_tree_regressor.rb +0 -116
data/lib/rumale/tree/extra_tree_classifier.rb +0 -107
data/lib/rumale/tree/extra_tree_regressor.rb +0 -94
data/lib/rumale/tree/gradient_tree_regressor.rb +0 -202
data/lib/rumale/tree/node.rb +0 -39
data/lib/rumale/utils.rb +0 -42
data/lib/rumale/validation.rb +0 -128
data/lib/rumale/values.rb +0 -13

data/lib/rumale/decomposition/nmf.rb DELETED Viewed

@@ -1,124 +0,0 @@
-# frozen_string_literal: true
-require 'rumale/utils'
-require 'rumale/base/base_estimator'
-require 'rumale/base/transformer'
-module Rumale
-  module Decomposition
-    # NMF is a class that implements Non-negative Matrix Factorization.
-    #
-    # @example
-    #   decomposer = Rumale::Decomposition::NMF.new(n_components: 2)
-    #   representaion = decomposer.fit_transform(samples)
-    #
-    # *Reference*
-    # - Xu, W., Liu, X., and Gong, Y., "Document Clustering Based On Non-negative Matrix Factorization," Proc. SIGIR' 03 , pp. 267--273, 2003.
-    class NMF
-      include Base::BaseEstimator
-      include Base::Transformer
-      # Returns the factorization matrix.
-      # @return [Numo::DFloat] (shape: [n_components, n_features])
-      attr_reader :components
-      # Return the random generator.
-      # @return [Random]
-      attr_reader :rng
-      # Create a new transformer with NMF.
-      #
-      # @param n_components [Integer] The number of components.
-      # @param max_iter [Integer] The maximum number of iterations.
-      # @param tol [Float] The tolerance of termination criterion.
-      # @param eps [Float] A small value close to zero to avoid zero division error.
-      # @param random_seed [Integer] The seed value using to initialize the random generator.
-      def initialize(n_components: 2, max_iter: 500, tol: 1.0e-4, eps: 1.0e-16, random_seed: nil)
-        check_params_numeric(n_components: n_components, max_iter: max_iter, tol: tol, eps: eps)
-        check_params_numeric_or_nil(random_seed: random_seed)
-        check_params_positive(n_components: n_components, max_iter: max_iter, tol: tol, eps: eps)
-        @params = {}
-        @params[:n_components] = n_components
-        @params[:max_iter] = max_iter
-        @params[:tol] = tol
-        @params[:eps] = eps
-        @params[:random_seed] = random_seed
-        @params[:random_seed] ||= srand
-        @components = nil
-        @rng = Random.new(@params[:random_seed])
-      end
-      # Fit the model with given training data.
-      #
-      # @overload fit(x) -> NMF
-      #
-      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
-      # @return [NMF] The learned transformer itself.
-      def fit(x, _y = nil)
-        x = check_convert_sample_array(x)
-        partial_fit(x)
-        self
-      end
-      # Fit the model with training data, and then transform them with the learned model.
-      #
-      # @overload fit_transform(x) -> Numo::DFloat
-      #
-      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
-      # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data
-      def fit_transform(x, _y = nil)
-        x = check_convert_sample_array(x)
-        partial_fit(x)
-      end
-      # Transform the given data with the learned model.
-      #
-      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be transformed with the learned model.
-      # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data.
-      def transform(x)
-        x = check_convert_sample_array(x)
-        partial_fit(x, update_comps: false)
-      end
-      # Inverse transform the given transformed data with the learned model.
-      #
-      # @param z [Numo::DFloat] (shape: [n_samples, n_components]) The data to be restored into original space with the learned model.
-      # @return [Numo::DFloat] (shape: [n_samples, n_featuress]) The restored data.
-      def inverse_transform(z)
-        z = check_convert_sample_array(z)
-        z.dot(@components)
-      end
-      private
-      def partial_fit(x, update_comps: true)
-        # initialize some variables.
-        n_samples, n_features = x.shape
-        scale = Math.sqrt(x.mean / @params[:n_components])
-        sub_rng = @rng.dup
-        @components = Rumale::Utils.rand_uniform([@params[:n_components], n_features], sub_rng) * scale if update_comps
-        coefficients = Rumale::Utils.rand_uniform([n_samples, @params[:n_components]], sub_rng) * scale
-        # optimization.
-        @params[:max_iter].times do
-          # update
-          if update_comps
-            nume = coefficients.transpose.dot(x)
-            deno = coefficients.transpose.dot(coefficients).dot(@components) + @params[:eps]
-            @components *= (nume / deno)
-          end
-          nume = x.dot(@components.transpose)
-          deno = coefficients.dot(@components).dot(@components.transpose) + @params[:eps]
-          coefficients *= (nume / deno)
-          # normalize
-          norm = Numo::NMath.sqrt((@components**2).sum(1)) + @params[:eps]
-          @components /= norm.expand_dims(1) if update_comps
-          coefficients *= norm
-          # check convergence
-          err = ((x - coefficients.dot(@components))**2).sum(1).mean
-          break if err < @params[:tol]
-        end
-        coefficients
-      end
-    end
-  end
-end

data/lib/rumale/decomposition/pca.rb DELETED Viewed

@@ -1,159 +0,0 @@
-# frozen_string_literal: true
-require 'rumale/base/base_estimator'
-require 'rumale/base/transformer'
-module Rumale
-  # Module for matrix decomposition algorithms.
-  module Decomposition
-    # PCA is a class that implements Principal Component Analysis.
-    #
-    # @example
-    #   decomposer = Rumale::Decomposition::PCA.new(n_components: 2, solver: 'fpt')
-    #   representaion = decomposer.fit_transform(samples)
-    #
-    #   # If Numo::Linalg is installed, you can specify 'evd' for the solver option.
-    #   require 'numo/linalg/autoloader'
-    #   decomposer = Rumale::Decomposition::PCA.new(n_components: 2, solver: 'evd')
-    #   representaion = decomposer.fit_transform(samples)
-    #
-    #   # If Numo::Linalg is loaded and the solver option is not given,
-    #   # the solver option is choosen 'evd' automatically.
-    #   decomposer = Rumale::Decomposition::PCA.new(n_components: 2)
-    #   representaion = decomposer.fit_transform(samples)
-    #
-    # *Reference*
-    # - Sharma, A., and Paliwal, K K., "Fast principal component analysis using fixed-point algorithm," Pattern Recognition Letters, 28, pp. 1151--1155, 2007.
-    class PCA
-      include Base::BaseEstimator
-      include Base::Transformer
-      # Returns the principal components.
-      # @return [Numo::DFloat] (shape: [n_components, n_features])
-      attr_reader :components
-      # Returns the mean vector.
-      # @return [Numo::DFloat] (shape: [n_features])
-      attr_reader :mean
-      # Return the random generator.
-      # @return [Random]
-      attr_reader :rng
-      # Create a new transformer with PCA.
-      #
-      # @param n_components [Integer] The number of principal components.
-      # @param solver [String] The algorithm for the optimization ('auto', 'fpt' or 'evd').
-      #   'auto' chooses the 'evd' solver if Numo::Linalg is loaded. Otherwise, it chooses the 'fpt' solver.
-      #   'fpt' uses the fixed-point algorithm.
-      #   'evd' performs eigen value decomposition of the covariance matrix of samples.
-      # @param max_iter [Integer] The maximum number of iterations. If solver = 'evd', this parameter is ignored.
-      # @param tol [Float] The tolerance of termination criterion. If solver = 'evd', this parameter is ignored.
-      # @param random_seed [Integer] The seed value using to initialize the random generator.
-      def initialize(n_components: 2, solver: 'auto', max_iter: 100, tol: 1.0e-4, random_seed: nil)
-        check_params_numeric(n_components: n_components, max_iter: max_iter, tol: tol)
-        check_params_string(solver: solver)
-        check_params_numeric_or_nil(random_seed: random_seed)
-        check_params_positive(n_components: n_components, max_iter: max_iter, tol: tol)
-        @params = {}
-        @params[:solver] = if solver == 'auto'
-                             load_linalg? ? 'evd' : 'fpt'
-                           else
-                             solver != 'evd' ? 'fpt' : 'evd' # rubocop:disable Style/NegatedIfElseCondition
-                           end
-        @params[:n_components] = n_components
-        @params[:max_iter] = max_iter
-        @params[:tol] = tol
-        @params[:random_seed] = random_seed
-        @params[:random_seed] ||= srand
-        @components = nil
-        @mean = nil
-        @rng = Random.new(@params[:random_seed])
-      end
-      # Fit the model with given training data.
-      #
-      # @overload fit(x) -> PCA
-      #
-      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
-      # @return [PCA] The learned transformer itself.
-      def fit(x, _y = nil)
-        x = check_convert_sample_array(x)
-        # initialize some variables.
-        @components = nil
-        n_samples, n_features = x.shape
-        sub_rng = @rng.dup
-        # centering.
-        @mean = x.mean(0)
-        centered_x = x - @mean
-        # optimization.
-        covariance_mat = centered_x.transpose.dot(centered_x) / (n_samples - 1)
-        if @params[:solver] == 'evd' && enable_linalg?
-          _, evecs = Numo::Linalg.eigh(covariance_mat, vals_range: (n_features - @params[:n_components])...n_features)
-          comps = evecs.reverse(1).transpose
-          @components = @params[:n_components] == 1 ? comps[0, true].dup : comps.dup
-        else
-          @params[:n_components].times do
-            comp_vec = Rumale::Utils.rand_uniform(n_features, sub_rng)
-            @params[:max_iter].times do
-              updated = orthogonalize(covariance_mat.dot(comp_vec))
-              break if (updated.dot(comp_vec) - 1).abs < @params[:tol]
-              comp_vec = updated
-            end
-            @components = @components.nil? ? comp_vec : Numo::NArray.vstack([@components, comp_vec])
-          end
-        end
-        self
-      end
-      # Fit the model with training data, and then transform them with the learned model.
-      #
-      # @overload fit_transform(x) -> Numo::DFloat
-      #
-      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
-      # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data
-      def fit_transform(x, _y = nil)
-        x = check_convert_sample_array(x)
-        fit(x).transform(x)
-      end
-      # Transform the given data with the learned model.
-      #
-      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be transformed with the learned model.
-      # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data.
-      def transform(x)
-        x = check_convert_sample_array(x)
-        (x - @mean).dot(@components.transpose)
-      end
-      # Inverse transform the given transformed data with the learned model.
-      #
-      # @param z [Numo::DFloat] (shape: [n_samples, n_components]) The data to be restored into original space with the learned model.
-      # @return [Numo::DFloat] (shape: [n_samples, n_featuress]) The restored data.
-      def inverse_transform(z)
-        z = check_convert_sample_array(z)
-        c = @components.shape[1].nil? ? @components.expand_dims(0) : @components
-        z.dot(c) + @mean
-      end
-      private
-      def load_linalg?
-        return false if defined?(Numo::Linalg).nil?
-        return false if Numo::Linalg::VERSION < '0.1.4'
-        true
-      end
-      def orthogonalize(pcvec)
-        unless @components.nil?
-          delta = @components.dot(pcvec) * @components.transpose
-          delta = delta.sum(1) unless delta.shape[1].nil?
-          pcvec -= delta
-        end
-        pcvec / Math.sqrt((pcvec**2).sum.abs) + 1.0e-12
-      end
-    end
-  end
-end

data/lib/rumale/ensemble/ada_boost_classifier.rb DELETED Viewed

@@ -1,179 +0,0 @@
-# frozen_string_literal: true
-require 'rumale/values'
-require 'rumale/utils'
-require 'rumale/base/base_estimator'
-require 'rumale/base/classifier'
-require 'rumale/tree/decision_tree_classifier'
-module Rumale
-  module Ensemble
-    # AdaBoostClassifier is a class that implements AdaBoost (SAMME.R) for classification.
-    # This class uses decision tree for a weak learner.
-    #
-    # @example
-    #   estimator =
-    #     Rumale::Ensemble::AdaBoostClassifier.new(
-    #       n_estimators: 10, criterion: 'gini', max_depth: 3, max_leaf_nodes: 10, min_samples_leaf: 5, random_seed: 1)
-    #   estimator.fit(training_samples, traininig_labels)
-    #   results = estimator.predict(testing_samples)
-    #
-    # *Reference*
-    # - Zhu, J., Rosset, S., Zou, H., and Hashie, T., "Multi-class AdaBoost," Technical Report No. 430, Department of Statistics, University of Michigan, 2005.
-    class AdaBoostClassifier
-      include Base::BaseEstimator
-      include Base::Classifier
-      # Return the set of estimators.
-      # @return [Array<DecisionTreeClassifier>]
-      attr_reader :estimators
-      # Return the class labels.
-      # @return [Numo::Int32] (size: n_classes)
-      attr_reader :classes
-      # Return the importance for each feature.
-      # @return [Numo::DFloat] (size: n_features)
-      attr_reader :feature_importances
-      # Return the random generator for random selection of feature index.
-      # @return [Random]
-      attr_reader :rng
-      # Create a new classifier with AdaBoost.
-      #
-      # @param n_estimators [Integer] The numeber of decision trees for contructing AdaBoost classifier.
-      # @param criterion [String] The function to evalue spliting point. Supported criteria are 'gini' and 'entropy'.
-      # @param max_depth [Integer] The maximum depth of the tree.
-      #   If nil is given, decision tree grows without concern for depth.
-      # @param max_leaf_nodes [Integer] The maximum number of leaves on decision tree.
-      #   If nil is given, number of leaves is not limited.
-      # @param min_samples_leaf [Integer] The minimum number of samples at a leaf node.
-      # @param max_features [Integer] The number of features to consider when searching optimal split point.
-      #   If nil is given, split process considers all features.
-      # @param random_seed [Integer] The seed value using to initialize the random generator.
-      #   It is used to randomly determine the order of features when deciding spliting point.
-      def initialize(n_estimators: 50,
-                     criterion: 'gini', max_depth: nil, max_leaf_nodes: nil, min_samples_leaf: 1,
-                     max_features: nil, random_seed: nil)
-        check_params_numeric_or_nil(max_depth: max_depth, max_leaf_nodes: max_leaf_nodes,
-                                    max_features: max_features, random_seed: random_seed)
-        check_params_numeric(n_estimators: n_estimators, min_samples_leaf: min_samples_leaf)
-        check_params_string(criterion: criterion)
-        check_params_positive(n_estimators: n_estimators, max_depth: max_depth,
-                              max_leaf_nodes: max_leaf_nodes, min_samples_leaf: min_samples_leaf,
-                              max_features: max_features)
-        @params = {}
-        @params[:n_estimators] = n_estimators
-        @params[:criterion] = criterion
-        @params[:max_depth] = max_depth
-        @params[:max_leaf_nodes] = max_leaf_nodes
-        @params[:min_samples_leaf] = min_samples_leaf
-        @params[:max_features] = max_features
-        @params[:random_seed] = random_seed
-        @params[:random_seed] ||= srand
-        @estimators = nil
-        @classes = nil
-        @feature_importances = nil
-        @rng = Random.new(@params[:random_seed])
-      end
-      # Fit the model with given training data.
-      #
-      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
-      # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
-      # @return [AdaBoostClassifier] The learned classifier itself.
-      def fit(x, y) # rubocop:disable Metrics/AbcSize
-        x = check_convert_sample_array(x)
-        y = check_convert_label_array(y)
-        check_sample_label_size(x, y)
-        ## Initialize some variables.
-        n_samples, n_features = x.shape
-        @estimators = []
-        @feature_importances = Numo::DFloat.zeros(n_features)
-        @params[:max_features] = n_features unless @params[:max_features].is_a?(Integer)
-        @params[:max_features] = [[1, @params[:max_features]].max, n_features].min
-        @classes = Numo::Int32.asarray(y.to_a.uniq.sort)
-        n_classes = @classes.shape[0]
-        sub_rng = @rng.dup
-        ## Boosting.
-        classes_arr = @classes.to_a
-        y_codes = Numo::DFloat.zeros(n_samples, n_classes) - 1.fdiv(n_classes - 1)
-        n_samples.times { |n| y_codes[n, classes_arr.index(y[n])] = 1.0 }
-        observation_weights = Numo::DFloat.zeros(n_samples) + 1.fdiv(n_samples)
-        @params[:n_estimators].times do |_t|
-          # Fit classfier.
-          ids = Rumale::Utils.choice_ids(n_samples, observation_weights, sub_rng)
-          break if y[ids].to_a.uniq.size != n_classes
-          tree = Tree::DecisionTreeClassifier.new(
-            criterion: @params[:criterion], max_depth: @params[:max_depth],
-            max_leaf_nodes: @params[:max_leaf_nodes], min_samples_leaf: @params[:min_samples_leaf],
-            max_features: @params[:max_features], random_seed: sub_rng.rand(Rumale::Values.int_max)
-          )
-          tree.fit(x[ids, true], y[ids])
-          # Calculate estimator error.
-          proba = tree.predict_proba(x).clip(1.0e-15, nil)
-          p = Numo::Int32.asarray(Array.new(n_samples) { |n| @classes[proba[n, true].max_index] })
-          inds = p.ne(y)
-          error = (observation_weights * inds).sum / observation_weights.sum
-          # Store model.
-          @estimators.push(tree)
-          @feature_importances += tree.feature_importances
-          break if error.zero?
-          # Update observation weights.
-          log_proba = Numo::NMath.log(proba)
-          observation_weights *= Numo::NMath.exp(-1.0 * (n_classes - 1).fdiv(n_classes) * (y_codes * log_proba).sum(1))
-          observation_weights = observation_weights.clip(1.0e-15, nil)
-          sum_observation_weights = observation_weights.sum
-          break if sum_observation_weights.zero?
-          observation_weights /= sum_observation_weights
-        end
-        @feature_importances /= @feature_importances.sum
-        self
-      end
-      # Calculate confidence scores for samples.
-      #
-      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
-      # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence score per sample.
-      def decision_function(x)
-        x = check_convert_sample_array(x)
-        n_samples, = x.shape
-        n_classes = @classes.size
-        sum_probs = Numo::DFloat.zeros(n_samples, n_classes)
-        @estimators.each do |tree|
-          log_proba = Numo::NMath.log(tree.predict_proba(x).clip(1.0e-15, nil))
-          sum_probs += (n_classes - 1) * (log_proba - 1.fdiv(n_classes) * Numo::DFloat[log_proba.sum(1)].transpose)
-        end
-        sum_probs /= @estimators.size
-      end
-      # Predict class labels for samples.
-      #
-      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
-      # @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
-      def predict(x)
-        x = check_convert_sample_array(x)
-        n_samples, = x.shape
-        probs = decision_function(x)
-        Numo::Int32.asarray(Array.new(n_samples) { |n| @classes[probs[n, true].max_index] })
-      end
-      # Predict probability for samples.
-      #
-      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
-      # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
-      def predict_proba(x)
-        x = check_convert_sample_array(x)
-        n_classes = @classes.size
-        probs = Numo::NMath.exp(1.fdiv(n_classes - 1) * decision_function(x))
-        sum_probs = probs.sum(1)
-        probs /= Numo::DFloat[sum_probs].transpose
-        probs
-      end
-    end
-  end
-end

data/lib/rumale/ensemble/ada_boost_regressor.rb DELETED Viewed

@@ -1,160 +0,0 @@
-# frozen_string_literal: true
-require 'rumale/values'
-require 'rumale/base/base_estimator'
-require 'rumale/base/regressor'
-require 'rumale/tree/decision_tree_regressor'
-module Rumale
-  module Ensemble
-    # AdaBoostRegressor is a class that implements random forest for regression.
-    # This class uses decision tree for a weak learner.
-    #
-    # @example
-    #   estimator =
-    #     Rumale::Ensemble::AdaBoostRegressor.new(
-    #       n_estimators: 10, criterion: 'mse', max_depth: 3, max_leaf_nodes: 10, min_samples_leaf: 5, random_seed: 1)
-    #   estimator.fit(training_samples, traininig_values)
-    #   results = estimator.predict(testing_samples)
-    #
-    # *Reference*
-    # - Shrestha, D. L., and Solomatine, D. P., "Experiments with AdaBoost.RT, an Improved Boosting Scheme for Regression," Neural Computation 18 (7), pp. 1678--1710, 2006.
-    class AdaBoostRegressor
-      include Base::BaseEstimator
-      include Base::Regressor
-      # Return the set of estimators.
-      # @return [Array<DecisionTreeRegressor>]
-      attr_reader :estimators
-      # Return the weight for each weak learner.
-      # @return [Numo::DFloat] (size: n_estimates)
-      attr_reader :estimator_weights
-      # Return the importance for each feature.
-      # @return [Numo::DFloat] (size: n_features)
-      attr_reader :feature_importances
-      # Return the random generator for random selection of feature index.
-      # @return [Random]
-      attr_reader :rng
-      # Create a new regressor with random forest.
-      #
-      # @param n_estimators [Integer] The numeber of decision trees for contructing AdaBoost regressor.
-      # @param threshold [Float] The threshold for delimiting correct and incorrect predictions. That is constrained to [0, 1]
-      # @param exponent [Float] The exponent for the weight of each weak learner.
-      # @param criterion [String] The function to evalue spliting point. Supported criteria are 'gini' and 'entropy'.
-      # @param max_depth [Integer] The maximum depth of the tree.
-      #   If nil is given, decision tree grows without concern for depth.
-      # @param max_leaf_nodes [Integer] The maximum number of leaves on decision tree.
-      #   If nil is given, number of leaves is not limited.
-      # @param min_samples_leaf [Integer] The minimum number of samples at a leaf node.
-      # @param max_features [Integer] The number of features to consider when searching optimal split point.
-      #   If nil is given, split process considers all features.
-      # @param random_seed [Integer] The seed value using to initialize the random generator.
-      #   It is used to randomly determine the order of features when deciding spliting point.
-      def initialize(n_estimators: 10, threshold: 0.2, exponent: 1.0,
-                     criterion: 'mse', max_depth: nil, max_leaf_nodes: nil, min_samples_leaf: 1,
-                     max_features: nil, random_seed: nil)
-        check_params_numeric_or_nil(max_depth: max_depth, max_leaf_nodes: max_leaf_nodes,
-                                    max_features: max_features, random_seed: random_seed)
-        check_params_numeric(n_estimators: n_estimators, min_samples_leaf: min_samples_leaf,
-                             threshold: threshold, exponent: exponent)
-        check_params_string(criterion: criterion)
-        check_params_positive(n_estimators: n_estimators, threshold: threshold, exponent: exponent,
-                              max_depth: max_depth,
-                              max_leaf_nodes: max_leaf_nodes, min_samples_leaf: min_samples_leaf,
-                              max_features: max_features)
-        @params = {}
-        @params[:n_estimators] = n_estimators
-        @params[:threshold] = threshold
-        @params[:exponent] = exponent
-        @params[:criterion] = criterion
-        @params[:max_depth] = max_depth
-        @params[:max_leaf_nodes] = max_leaf_nodes
-        @params[:min_samples_leaf] = min_samples_leaf
-        @params[:max_features] = max_features
-        @params[:random_seed] = random_seed
-        @params[:random_seed] ||= srand
-        @estimators = nil
-        @feature_importances = nil
-        @rng = Random.new(@params[:random_seed])
-      end
-      # Fit the model with given training data.
-      #
-      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
-      # @param y [Numo::DFloat] (shape: [n_samples]) The target values to be used for fitting the model.
-      # @return [AdaBoostRegressor] The learned regressor itself.
-      def fit(x, y) # rubocop:disable Metrics/AbcSize
-        x = check_convert_sample_array(x)
-        y = check_convert_tvalue_array(y)
-        check_sample_tvalue_size(x, y)
-        # Check target values
-        raise ArgumentError, 'Expect target value vector to be 1-D arrray' unless y.shape.size == 1
-        # Initialize some variables.
-        n_samples, n_features = x.shape
-        @params[:max_features] = n_features unless @params[:max_features].is_a?(Integer)
-        @params[:max_features] = [[1, @params[:max_features]].max, n_features].min
-        observation_weights = Numo::DFloat.zeros(n_samples) + 1.fdiv(n_samples)
-        @estimators = []
-        @estimator_weights = []
-        @feature_importances = Numo::DFloat.zeros(n_features)
-        sub_rng = @rng.dup
-        # Construct forest.
-        @params[:n_estimators].times do |_t|
-          # Fit weak learner.
-          ids = Rumale::Utils.choice_ids(n_samples, observation_weights, sub_rng)
-          tree = Tree::DecisionTreeRegressor.new(
-            criterion: @params[:criterion], max_depth: @params[:max_depth],
-            max_leaf_nodes: @params[:max_leaf_nodes], min_samples_leaf: @params[:min_samples_leaf],
-            max_features: @params[:max_features], random_seed: sub_rng.rand(Rumale::Values.int_max)
-          )
-          tree.fit(x[ids, true], y[ids])
-          p = tree.predict(x)
-          # Calculate errors.
-          abs_err = ((p - y) / y).abs
-          err = observation_weights[abs_err.gt(@params[:threshold])].sum
-          break if err <= 0.0
-          # Calculate weight.
-          beta = err**@params[:exponent]
-          weight = Math.log(1.fdiv(beta))
-          # Store model.
-          @estimators.push(tree)
-          @estimator_weights.push(weight)
-          @feature_importances += weight * tree.feature_importances
-          # Update observation weights.
-          update = Numo::DFloat.ones(n_samples)
-          update[abs_err.le(@params[:threshold])] = beta
-          observation_weights *= update
-          observation_weights = observation_weights.clip(1.0e-15, nil)
-          sum_observation_weights = observation_weights.sum
-          break if sum_observation_weights.zero?
-          observation_weights /= sum_observation_weights
-        end
-        @estimator_weights = Numo::DFloat.asarray(@estimator_weights)
-        @feature_importances /= @estimator_weights.sum
-        self
-      end
-      # Predict values for samples.
-      #
-      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
-      # @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted value per sample.
-      def predict(x)
-        x = check_convert_sample_array(x)
-        n_samples, = x.shape
-        predictions = Numo::DFloat.zeros(n_samples)
-        @estimators.size.times do |t|
-          predictions += @estimator_weights[t] * @estimators[t].predict(x)
-        end
-        sum_weight = @estimator_weights.sum
-        predictions / sum_weight
-      end
-    end
-  end
-end