RubyGems - rumale - Versions diffs - 0.23.3 → 0.24.0 - Mend

rumale 0.23.3 → 0.24.0

Files changed (142) hide show

checksums.yaml +4 -4
data/LICENSE.txt +5 -1
data/README.md +3 -288
data/lib/rumale/version.rb +1 -1
data/lib/rumale.rb +20 -131
metadata +252 -150
data/CHANGELOG.md +0 -643
data/CODE_OF_CONDUCT.md +0 -74
data/ext/rumale/extconf.rb +0 -37
data/ext/rumale/rumaleext.c +0 -545
data/ext/rumale/rumaleext.h +0 -12
data/lib/rumale/base/base_estimator.rb +0 -49
data/lib/rumale/base/classifier.rb +0 -36
data/lib/rumale/base/cluster_analyzer.rb +0 -31
data/lib/rumale/base/evaluator.rb +0 -17
data/lib/rumale/base/regressor.rb +0 -36
data/lib/rumale/base/splitter.rb +0 -21
data/lib/rumale/base/transformer.rb +0 -22
data/lib/rumale/clustering/dbscan.rb +0 -123
data/lib/rumale/clustering/gaussian_mixture.rb +0 -218
data/lib/rumale/clustering/hdbscan.rb +0 -291
data/lib/rumale/clustering/k_means.rb +0 -122
data/lib/rumale/clustering/k_medoids.rb +0 -141
data/lib/rumale/clustering/mini_batch_k_means.rb +0 -139
data/lib/rumale/clustering/power_iteration.rb +0 -127
data/lib/rumale/clustering/single_linkage.rb +0 -203
data/lib/rumale/clustering/snn.rb +0 -76
data/lib/rumale/clustering/spectral_clustering.rb +0 -115
data/lib/rumale/dataset.rb +0 -246
data/lib/rumale/decomposition/factor_analysis.rb +0 -150
data/lib/rumale/decomposition/fast_ica.rb +0 -188
data/lib/rumale/decomposition/nmf.rb +0 -124
data/lib/rumale/decomposition/pca.rb +0 -159
data/lib/rumale/ensemble/ada_boost_classifier.rb +0 -179
data/lib/rumale/ensemble/ada_boost_regressor.rb +0 -160
data/lib/rumale/ensemble/extra_trees_classifier.rb +0 -139
data/lib/rumale/ensemble/extra_trees_regressor.rb +0 -125
data/lib/rumale/ensemble/gradient_boosting_classifier.rb +0 -306
data/lib/rumale/ensemble/gradient_boosting_regressor.rb +0 -237
data/lib/rumale/ensemble/random_forest_classifier.rb +0 -189
data/lib/rumale/ensemble/random_forest_regressor.rb +0 -153
data/lib/rumale/ensemble/stacking_classifier.rb +0 -215
data/lib/rumale/ensemble/stacking_regressor.rb +0 -163
data/lib/rumale/ensemble/voting_classifier.rb +0 -126
data/lib/rumale/ensemble/voting_regressor.rb +0 -82
data/lib/rumale/evaluation_measure/accuracy.rb +0 -29
data/lib/rumale/evaluation_measure/adjusted_rand_score.rb +0 -74
data/lib/rumale/evaluation_measure/calinski_harabasz_score.rb +0 -56
data/lib/rumale/evaluation_measure/davies_bouldin_score.rb +0 -53
data/lib/rumale/evaluation_measure/explained_variance_score.rb +0 -39
data/lib/rumale/evaluation_measure/f_score.rb +0 -50
data/lib/rumale/evaluation_measure/function.rb +0 -147
data/lib/rumale/evaluation_measure/log_loss.rb +0 -45
data/lib/rumale/evaluation_measure/mean_absolute_error.rb +0 -29
data/lib/rumale/evaluation_measure/mean_squared_error.rb +0 -29
data/lib/rumale/evaluation_measure/mean_squared_log_error.rb +0 -29
data/lib/rumale/evaluation_measure/median_absolute_error.rb +0 -30
data/lib/rumale/evaluation_measure/mutual_information.rb +0 -49
data/lib/rumale/evaluation_measure/normalized_mutual_information.rb +0 -53
data/lib/rumale/evaluation_measure/precision.rb +0 -50
data/lib/rumale/evaluation_measure/precision_recall.rb +0 -96
data/lib/rumale/evaluation_measure/purity.rb +0 -40
data/lib/rumale/evaluation_measure/r2_score.rb +0 -43
data/lib/rumale/evaluation_measure/recall.rb +0 -50
data/lib/rumale/evaluation_measure/roc_auc.rb +0 -130
data/lib/rumale/evaluation_measure/silhouette_score.rb +0 -82
data/lib/rumale/feature_extraction/feature_hasher.rb +0 -110
data/lib/rumale/feature_extraction/hash_vectorizer.rb +0 -155
data/lib/rumale/feature_extraction/tfidf_transformer.rb +0 -113
data/lib/rumale/kernel_approximation/nystroem.rb +0 -126
data/lib/rumale/kernel_approximation/rbf.rb +0 -102
data/lib/rumale/kernel_machine/kernel_fda.rb +0 -120
data/lib/rumale/kernel_machine/kernel_pca.rb +0 -97
data/lib/rumale/kernel_machine/kernel_ridge.rb +0 -82
data/lib/rumale/kernel_machine/kernel_ridge_classifier.rb +0 -92
data/lib/rumale/kernel_machine/kernel_svc.rb +0 -193
data/lib/rumale/linear_model/base_sgd.rb +0 -285
data/lib/rumale/linear_model/elastic_net.rb +0 -119
data/lib/rumale/linear_model/lasso.rb +0 -115
data/lib/rumale/linear_model/linear_regression.rb +0 -201
data/lib/rumale/linear_model/logistic_regression.rb +0 -275
data/lib/rumale/linear_model/nnls.rb +0 -137
data/lib/rumale/linear_model/ridge.rb +0 -209
data/lib/rumale/linear_model/svc.rb +0 -213
data/lib/rumale/linear_model/svr.rb +0 -132
data/lib/rumale/manifold/mds.rb +0 -155
data/lib/rumale/manifold/tsne.rb +0 -222
data/lib/rumale/metric_learning/fisher_discriminant_analysis.rb +0 -113
data/lib/rumale/metric_learning/mlkr.rb +0 -161
data/lib/rumale/metric_learning/neighbourhood_component_analysis.rb +0 -167
data/lib/rumale/model_selection/cross_validation.rb +0 -125
data/lib/rumale/model_selection/function.rb +0 -42
data/lib/rumale/model_selection/grid_search_cv.rb +0 -225
data/lib/rumale/model_selection/group_k_fold.rb +0 -93
data/lib/rumale/model_selection/group_shuffle_split.rb +0 -115
data/lib/rumale/model_selection/k_fold.rb +0 -81
data/lib/rumale/model_selection/shuffle_split.rb +0 -90
data/lib/rumale/model_selection/stratified_k_fold.rb +0 -99
data/lib/rumale/model_selection/stratified_shuffle_split.rb +0 -118
data/lib/rumale/model_selection/time_series_split.rb +0 -91
data/lib/rumale/multiclass/one_vs_rest_classifier.rb +0 -83
data/lib/rumale/naive_bayes/base_naive_bayes.rb +0 -47
data/lib/rumale/naive_bayes/bernoulli_nb.rb +0 -82
data/lib/rumale/naive_bayes/complement_nb.rb +0 -85
data/lib/rumale/naive_bayes/gaussian_nb.rb +0 -69
data/lib/rumale/naive_bayes/multinomial_nb.rb +0 -74
data/lib/rumale/naive_bayes/negation_nb.rb +0 -71
data/lib/rumale/nearest_neighbors/k_neighbors_classifier.rb +0 -133
data/lib/rumale/nearest_neighbors/k_neighbors_regressor.rb +0 -108
data/lib/rumale/nearest_neighbors/vp_tree.rb +0 -132
data/lib/rumale/neural_network/adam.rb +0 -56
data/lib/rumale/neural_network/base_mlp.rb +0 -248
data/lib/rumale/neural_network/mlp_classifier.rb +0 -120
data/lib/rumale/neural_network/mlp_regressor.rb +0 -90
data/lib/rumale/pairwise_metric.rb +0 -152
data/lib/rumale/pipeline/feature_union.rb +0 -69
data/lib/rumale/pipeline/pipeline.rb +0 -175
data/lib/rumale/preprocessing/bin_discretizer.rb +0 -93
data/lib/rumale/preprocessing/binarizer.rb +0 -60
data/lib/rumale/preprocessing/kernel_calculator.rb +0 -92
data/lib/rumale/preprocessing/l1_normalizer.rb +0 -62
data/lib/rumale/preprocessing/l2_normalizer.rb +0 -63
data/lib/rumale/preprocessing/label_binarizer.rb +0 -89
data/lib/rumale/preprocessing/label_encoder.rb +0 -79
data/lib/rumale/preprocessing/max_abs_scaler.rb +0 -61
data/lib/rumale/preprocessing/max_normalizer.rb +0 -62
data/lib/rumale/preprocessing/min_max_scaler.rb +0 -76
data/lib/rumale/preprocessing/one_hot_encoder.rb +0 -100
data/lib/rumale/preprocessing/ordinal_encoder.rb +0 -109
data/lib/rumale/preprocessing/polynomial_features.rb +0 -109
data/lib/rumale/preprocessing/standard_scaler.rb +0 -71
data/lib/rumale/probabilistic_output.rb +0 -114
data/lib/rumale/tree/base_decision_tree.rb +0 -150
data/lib/rumale/tree/decision_tree_classifier.rb +0 -150
data/lib/rumale/tree/decision_tree_regressor.rb +0 -116
data/lib/rumale/tree/extra_tree_classifier.rb +0 -107
data/lib/rumale/tree/extra_tree_regressor.rb +0 -94
data/lib/rumale/tree/gradient_tree_regressor.rb +0 -202
data/lib/rumale/tree/node.rb +0 -39
data/lib/rumale/utils.rb +0 -42
data/lib/rumale/validation.rb +0 -128
data/lib/rumale/values.rb +0 -13

data/lib/rumale/preprocessing/ordinal_encoder.rb DELETED Viewed

@@ -1,109 +0,0 @@
-# frozen_string_literal: true
-require 'rumale/base/base_estimator'
-require 'rumale/base/transformer'
-module Rumale
-  module Preprocessing
-    # Transfrom categorical features to integer values.
-    #
-    # @example
-    #   encoder = Rumale::Preprocessing::OrdinalEncoder.new
-    #   training_samples = [['left', 10], ['right', 15], ['right', 20]]
-    #   training_samples = Numo::NArray.asarray(training_samples)
-    #   encoder.fit(training_samples)
-    #   p encoder.categories
-    #   # [["left", "right"], [10, 15, 20]]
-    #   testing_samples = [['left', 20], ['right', 10]]
-    #   testing_samples = Numo::NArray.asarray(testing_samples)
-    #   encoded = encoder.transform(testing_samples)
-    #   p encoded
-    #   # Numo::DFloat#shape=[2,2]
-    #   # [[0, 2],
-    #   #  [1, 0]]
-    #   p encoder.inverse_transform(encoded)
-    #   # Numo::RObject#shape=[2,2]
-    #   # [["left", 20],
-    #   #  ["right", 10]]
-    class OrdinalEncoder
-      include Base::BaseEstimator
-      include Base::Transformer
-      # Return the array consists of categorical value each feature.
-      # @return [Array] (size: n_features)
-      attr_reader :categories
-      # Create a new encoder that transform categorical features to integer values.
-      #
-      # @param categories [Nil/Array] The category list for each feature.
-      #   If nil is given, extracted categories from the training data by calling the fit method are used.
-      def initialize(categories: nil)
-        check_params_type_or_nil(Array, categories: categories)
-        @categories = categories
-      end
-      # Fit encoder by extracting the category for each feature.
-      #
-      # @overload fit(x) -> OrdinalEncoder
-      #
-      # @param x [Numo::NArray] (shape: [n_samples, n_features]) The samples consisting of categorical features.
-      # @return [LabelEncoder]
-      def fit(x, _y = nil)
-        raise TypeError, 'Expect class of sample matrix to be Numo::NArray' unless x.is_a?(Numo::NArray)
-        raise ArgumentError, 'Expect sample matrix to be 2-D array' unless x.shape.size == 2
-        n_features = x.shape[1]
-        @categories = Array.new(n_features) { |n| x[true, n].to_a.uniq.sort }
-        self
-      end
-      # Fit encoder, then return encoded categorical features to integer values.
-      #
-      # @overload fit_transform(x) -> Numo::DFloat
-      #
-      # @param x [Numo::NArray] (shape: [n_samples, n_features]) The samples consisting of categorical features.
-      # @return [Numo::DFloat] The encoded categorical features to integer values.
-      def fit_transform(x, _y = nil)
-        raise TypeError, 'Expect class of sample matrix to be Numo::NArray' unless x.is_a?(Numo::NArray)
-        raise ArgumentError, 'Expect sample matrix to be 2-D array' unless x.shape.size == 2
-        fit(x).transform(x)
-      end
-      # Encode categorical features.
-      #
-      # @param x [Numo::NArray] (shape: [n_samples, n_features]) The samples consisting of categorical features.
-      # @return [Numo::DFloat] The encoded categorical features to integer values.
-      def transform(x)
-        raise TypeError, 'Expect class of sample matrix to be Numo::NArray' unless x.is_a?(Numo::NArray)
-        raise ArgumentError, 'Expect sample matrix to be 2-D array' unless x.shape.size == 2
-        n_features = x.shape[1]
-        raise ArgumentError, 'Expect the number of features and the number of categories to be equal' if n_features != @categories.size
-        transformed = Array.new(n_features) do |n|
-          x[true, n].to_a.map { |v| @categories[n].index(v) }
-        end
-        Numo::DFloat.asarray(transformed.transpose)
-      end
-      # Decode values to categorical features.
-      #
-      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples consisting of values transformed from categorical features.
-      # @return [Numo::NArray] The decoded features.
-      def inverse_transform(x)
-        x = check_convert_sample_array(x)
-        n_features = x.shape[1]
-        raise ArgumentError, 'Expect the number of features and the number of categories to be equal' if n_features != @categories.size
-        inv_transformed = Array.new(n_features) do |n|
-          x[true, n].to_a.map { |i| @categories[n][i.to_i] }
-        end
-        Numo::NArray.asarray(inv_transformed.transpose)
-      end
-    end
-  end
-end

data/lib/rumale/preprocessing/polynomial_features.rb DELETED Viewed

@@ -1,109 +0,0 @@
-# frozen_string_literal: true
-require 'rumale/base/base_estimator'
-require 'rumale/base/transformer'
-module Rumale
-  module Preprocessing
-    # Generating polynomial features from the given samples.
-    #
-    # @example
-    #   require 'rumale'
-    #
-    #   transformer = Rumale::Preprocessing::PolynomialFeatures.new(degree: 2)
-    #   x = Numo::DFloat[[0, 1], [2, 3], [4, 5]]
-    #   z = transformer.fit_transform(x)
-    #   p z
-    #
-    #   # Numo::DFloat#shape=[3,6]
-    #   # [[1, 0, 1, 0, 0, 1],
-    #   #  [1, 2, 3, 4, 6, 9],
-    #   #  [1, 4, 5, 16, 20, 25]]
-    #
-    #   # If you want to perform polynomial regression, combine it with LinearRegression as follows:
-    #   ply = Rumale::Preprocessing::PolynomialFeatures.new(degree: 2)
-    #   reg = Rumale::LinearModel::LinearRegression.new(fit_bias: false, random_seed: 1)
-    #   pipeline = Rumale::Pipeline::Pipeline.new(steps: { trs: ply, est: reg })
-    #   pipeline.fit(training_samples, training_values)
-    #   results = pipeline.predict(testing_samples)
-    #
-    class PolynomialFeatures
-      include Base::BaseEstimator
-      include Base::Transformer
-      # Return the number of polynomial features.
-      # @return [Integer]
-      attr_reader :n_output_features
-      # Create a transformer for generating polynomial features.
-      #
-      # @param degree [Integer] The degree of polynomial features.
-      def initialize(degree: 2)
-        check_params_numeric(degree: degree)
-        raise ArgumentError, 'Expect the value of degree parameter greater than or eqaul to 1.' if degree < 1
-        @params = {}
-        @params[:degree] = degree
-        @n_output_features = nil
-      end
-      # Calculate the number of output polynomial fetures.
-      #
-      # @overload fit(x) -> PolynomialFeatures
-      #   @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate the number of output polynomial fetures.
-      # @return [PolynomialFeatures]
-      def fit(x, _y = nil)
-        x = check_convert_sample_array(x)
-        n_features = x.shape[1]
-        @n_output_features = 1
-        @params[:degree].times do |t|
-          @n_output_features += Array.new(n_features) { |n| n }.repeated_combination(t + 1).size
-        end
-        self
-      end
-      # Calculate the number of polynomial features, and then transform samples to polynomial features.
-      #
-      # @overload fit_transform(x) -> Numo::DFloat
-      #   @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate the number of polynomial features
-      #     and be transformed.
-      # @return [Numo::DFloat] (shape: [n_samples, n_output_features]) The transformed samples.
-      def fit_transform(x, _y = nil)
-        x = check_convert_sample_array(x)
-        fit(x).transform(x)
-      end
-      # Transform the given samples to polynomial features.
-      #
-      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be transformed.
-      # @return [Numo::DFloat] (shape: [n_samples, n_output_features]) The transformed samples.
-      def transform(x)
-        x = check_convert_sample_array(x)
-        # initialize transformed features
-        n_samples, n_features = x.shape
-        z = Numo::DFloat.zeros(n_samples, n_output_features)
-        # bias
-        z[true, 0] = 1
-        curr_col = 1
-        # itself
-        z[true, 1..n_features] = x
-        curr_col += n_features
-        # high degree features
-        curr_feat_ids = Array.new(n_features + 1) { |n| n + 1 }
-        (1...@params[:degree]).each do
-          next_feat_ids = []
-          n_features.times do |d|
-            f_range = curr_feat_ids[d]...curr_feat_ids.last
-            next_col = curr_col + f_range.size
-            z[true, curr_col...next_col] = z[true, f_range] * x[true, d..d]
-            next_feat_ids.push(curr_col)
-            curr_col = next_col
-          end
-          next_feat_ids.push(curr_col)
-          curr_feat_ids = next_feat_ids
-        end
-        z
-      end
-    end
-  end
-end

data/lib/rumale/preprocessing/standard_scaler.rb DELETED Viewed

@@ -1,71 +0,0 @@
-# frozen_string_literal: true
-require 'rumale/base/base_estimator'
-require 'rumale/base/transformer'
-module Rumale
-  # This module consists of the classes that perform preprocessings.
-  module Preprocessing
-    # Normalize samples by centering and scaling to unit variance.
-    #
-    # @example
-    #   normalizer = Rumale::Preprocessing::StandardScaler.new
-    #   new_training_samples = normalizer.fit_transform(training_samples)
-    #   new_testing_samples = normalizer.transform(testing_samples)
-    class StandardScaler
-      include Base::BaseEstimator
-      include Base::Transformer
-      # Return the vector consists of the mean value for each feature.
-      # @return [Numo::DFloat] (shape: [n_features])
-      attr_reader :mean_vec
-      # Return the vector consists of the standard deviation for each feature.
-      # @return [Numo::DFloat] (shape: [n_features])
-      attr_reader :std_vec
-      # Create a new normalizer for centering and scaling to unit variance.
-      def initialize
-        @params = {}
-        @mean_vec = nil
-        @std_vec = nil
-      end
-      # Calculate the mean value and standard deviation of each feature for scaling.
-      #
-      # @overload fit(x) -> StandardScaler
-      #
-      # @param x [Numo::DFloat] (shape: [n_samples, n_features])
-      #   The samples to calculate the mean values and standard deviations.
-      # @return [StandardScaler]
-      def fit(x, _y = nil)
-        x = check_convert_sample_array(x)
-        @mean_vec = x.mean(0)
-        @std_vec = x.stddev(0)
-        self
-      end
-      # Calculate the mean values and standard deviations, and then normalize samples using them.
-      #
-      # @overload fit_transform(x) -> Numo::DFloat
-      #
-      # @param x [Numo::DFloat] (shape: [n_samples, n_features])
-      #   The samples to calculate the mean values and standard deviations.
-      # @return [Numo::DFloat] The scaled samples.
-      def fit_transform(x, _y = nil)
-        x = check_convert_sample_array(x)
-        fit(x).transform(x)
-      end
-      # Perform standardization the given samples.
-      #
-      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be scaled.
-      # @return [Numo::DFloat] The scaled samples.
-      def transform(x)
-        x = check_convert_sample_array(x)
-        n_samples, = x.shape
-        (x - @mean_vec.tile(n_samples, 1)) / @std_vec.tile(n_samples, 1)
-      end
-    end
-  end
-end

data/lib/rumale/probabilistic_output.rb DELETED Viewed

@@ -1,114 +0,0 @@
-# frozen_string_literal: true
-module Rumale
-  # Module for calculating posterior class probabilities with SVM outputs.
-  # This module is used for internal processes.
-  #
-  # @example
-  #   estimator = Rumale::LinearModel::SVC.new
-  #   estimator.fit(x, bin_y)
-  #   df = estimator.decision_function(x)
-  #   params = Rumale::ProbabilisticOutput.fit_sigmoid(df, bin_y)
-  #   probs = 1 / (Numo::NMath.exp(params[0] * df + params[1]) + 1)
-  #
-  # *Reference*
-  # - Platt, J C., "Probabilistic Outputs for Support Vector Machines and Comparisons to Regularized Likelihood Methods," Adv. Large Margin Classifiers, pp. 61--74, 2000.
-  # - Lin, H-T., Lin, C-J., and Weng, R C., "A Note on Platt's Probabilistic Outputs for Support Vector Machines," J. Machine Learning, Vol. 63 (3), pp. 267--276, 2007.
-  module ProbabilisticOutput
-    class << self
-      # Fit the probabilistic model for binary SVM outputs.
-      #
-      # @param df [Numo::DFloat] (shape: [n_samples]) The outputs of decision function to be used for fitting the model.
-      # @param bin_y [Numo::Int32] (shape: [n_samples]) The binary labels to be used for fitting the model.
-      # @param max_iter [Integer] The maximum number of iterations.
-      # @param min_step [Float] The minimum step of Newton's method.
-      # @param sigma [Float] The parameter to avoid hessian matrix from becoming singular matrix.
-      # @return [Numo::DFloat] (shape: 2) The parameters of the model.
-      def fit_sigmoid(df, bin_y, max_iter = 100, min_step = 1e-10, sigma = 1e-12)
-        # Initialize some variables.
-        n_samples = bin_y.size
-        negative_label = bin_y.to_a.uniq.min
-        pos = bin_y.ne(negative_label)
-        neg = bin_y.eq(negative_label)
-        n_pos_samples = pos.count
-        n_neg_samples = neg.count
-        target_probs = Numo::DFloat.zeros(n_samples)
-        target_probs[pos] = (n_pos_samples + 1) / (n_pos_samples + 2.0)
-        target_probs[neg] = 1 / (n_neg_samples + 2.0)
-        alpha = 0.0
-        beta = Math.log((n_neg_samples + 1) / (n_pos_samples + 1.0))
-        err = error_function(target_probs, df, alpha, beta)
-        # Optimize parameters for class porbability calculation.
-        old_grad_vec = Numo::DFloat.zeros(2)
-        max_iter.times do
-          # Calculate gradient and hessian matrix.
-          probs = predicted_probs(df, alpha, beta)
-          grad_vec = gradient(target_probs, probs, df)
-          hess_mat = hessian_matrix(probs, df, sigma)
-          break if grad_vec.abs.lt(1e-5).count == 2
-          break if (old_grad_vec - grad_vec).abs.sum < 1e-5
-          old_grad_vec = grad_vec
-          # Calculate Newton directions.
-          dirs_vec = directions(grad_vec, hess_mat)
-          grad_dir = grad_vec.dot(dirs_vec)
-          stepsize = 2.0
-          while stepsize >= min_step
-            stepsize *= 0.5
-            new_alpha = alpha + stepsize * dirs_vec[0]
-            new_beta = beta + stepsize * dirs_vec[1]
-            new_err = error_function(target_probs, df, new_alpha, new_beta)
-            next unless new_err < err + 0.0001 * stepsize * grad_dir
-            alpha = new_alpha
-            beta = new_beta
-            err = new_err
-            break
-          end
-        end
-        Numo::DFloat[alpha, beta]
-      end
-      private
-      def error_function(target_probs, df, alpha, beta)
-        fn = alpha * df + beta
-        pos = fn.ge(0.0)
-        neg = fn.lt(0.0)
-        err = 0.0
-        err += (target_probs[pos] * fn[pos] + Numo::NMath.log(1 + Numo::NMath.exp(-fn[pos]))).sum if pos.count.positive?
-        err += ((target_probs[neg] - 1) * fn[neg] + Numo::NMath.log(1 + Numo::NMath.exp(fn[neg]))).sum if neg.count.positive?
-        err
-      end
-      def predicted_probs(df, alpha, beta)
-        fn = alpha * df + beta
-        pos = fn.ge(0.0)
-        neg = fn.lt(0.0)
-        probs = Numo::DFloat.zeros(df.shape[0])
-        probs[pos] = Numo::NMath.exp(-fn[pos]) / (1 + Numo::NMath.exp(-fn[pos])) if pos.count.positive?
-        probs[neg] = 1 / (1 + Numo::NMath.exp(fn[neg])) if neg.count.positive?
-        probs
-      end
-      def gradient(target_probs, probs, df)
-        sub = target_probs - probs
-        Numo::DFloat[(df * sub).sum, sub.sum]
-      end
-      def hessian_matrix(probs, df, sigma)
-        sub = probs * (1 - probs)
-        h11 = (df**2 * sub).sum + sigma
-        h22 = sub.sum + sigma
-        h21 = (df * sub).sum
-        Numo::DFloat[[h11, h21], [h21, h22]]
-      end
-      def directions(grad_vec, hess_mat)
-        det = hess_mat[0, 0] * hess_mat[1, 1] - hess_mat[0, 1] * hess_mat[1, 0]
-        inv_hess_mat = Numo::DFloat[[hess_mat[1, 1], -hess_mat[0, 1]], [-hess_mat[1, 0], hess_mat[0, 0]]] / det
-        -inv_hess_mat.dot(grad_vec)
-      end
-    end
-  end
-end

data/lib/rumale/tree/base_decision_tree.rb DELETED Viewed

@@ -1,150 +0,0 @@
-# frozen_string_literal: true
-require 'rumale/base/base_estimator'
-require 'rumale/tree/node'
-require 'rumale/rumaleext'
-module Rumale
-  # This module consists of the classes that implement tree models.
-  module Tree
-    # BaseDecisionTree is an abstract class for implementation of decision tree-based estimator.
-    # This class is used internally.
-    class BaseDecisionTree
-      include Base::BaseEstimator
-      # Initialize a decision tree-based estimator.
-      #
-      # @param criterion [String] The function to evalue spliting point.
-      # @param max_depth [Integer] The maximum depth of the tree.
-      #   If nil is given, decision tree grows without concern for depth.
-      # @param max_leaf_nodes [Integer] The maximum number of leaves on decision tree.
-      #   If nil is given, number of leaves is not limited.
-      # @param min_samples_leaf [Integer] The minimum number of samples at a leaf node.
-      # @param max_features [Integer] The number of features to consider when searching optimal split point.
-      #   If nil is given, split process considers all features.
-      # @param random_seed [Integer] The seed value using to initialize the random generator.
-      #   It is used to randomly determine the order of features when deciding spliting point.
-      def initialize(criterion: nil, max_depth: nil, max_leaf_nodes: nil, min_samples_leaf: 1, max_features: nil, random_seed: nil)
-        @params = {}
-        @params[:criterion] = criterion
-        @params[:max_depth] = max_depth
-        @params[:max_leaf_nodes] = max_leaf_nodes
-        @params[:min_samples_leaf] = min_samples_leaf
-        @params[:max_features] = max_features
-        @params[:random_seed] = random_seed
-        @params[:random_seed] ||= srand
-        @tree = nil
-        @feature_importances = nil
-        @n_leaves = nil
-        @rng = Random.new(@params[:random_seed])
-      end
-      # Return the index of the leaf that each sample reached.
-      #
-      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
-      # @return [Numo::Int32] (shape: [n_samples]) Leaf index for sample.
-      def apply(x)
-        x = check_convert_sample_array(x)
-        Numo::Int32[*(Array.new(x.shape[0]) { |n| partial_apply(@tree, x[n, true]) })]
-      end
-      private
-      def partial_apply(tree, sample)
-        node = tree
-        until node.leaf
-          # :nocov:
-          node = if node.right.nil?
-                   node.left
-                 elsif node.left.nil?
-                   node.right
-                   # :nocov:
-                 else
-                   sample[node.feature_id] <= node.threshold ? node.left : node.right
-                 end
-        end
-        node.leaf_id
-      end
-      def build_tree(x, y)
-        y = y.expand_dims(1).dup if y.shape[1].nil?
-        @feature_ids = Array.new(x.shape[1]) { |v| v }
-        @tree = grow_node(0, x, y, impurity(y))
-        @feature_ids = nil
-        nil
-      end
-      def grow_node(depth, x, y, impurity)
-        # intialize node.
-        n_samples = x.shape[0]
-        node = Node.new(depth: depth, impurity: impurity, n_samples: n_samples)
-        # terminate growing.
-        return nil if !@params[:max_leaf_nodes].nil? && @n_leaves >= @params[:max_leaf_nodes]
-        return nil if n_samples < @params[:min_samples_leaf]
-        return put_leaf(node, y) if n_samples == @params[:min_samples_leaf]
-        return put_leaf(node, y) if !@params[:max_depth].nil? && depth == @params[:max_depth]
-        return put_leaf(node, y) if stop_growing?(y)
-        # calculate optimal parameters.
-        feature_id, left_imp, right_imp, threshold, gain =
-          rand_ids.map { |n| [n, *best_split(x[true, n], y, impurity)] }.max_by(&:last)
-        return put_leaf(node, y) if gain.nil? || gain.zero?
-        left_ids = x[true, feature_id].le(threshold).where
-        right_ids = x[true, feature_id].gt(threshold).where
-        node.left = grow_node(depth + 1, x[left_ids, true], y[left_ids, true], left_imp)
-        node.right = grow_node(depth + 1, x[right_ids, true], y[right_ids, true], right_imp)
-        return put_leaf(node, y) if node.left.nil? && node.right.nil?
-        node.feature_id = feature_id
-        node.threshold = threshold
-        node.leaf = false
-        node
-      end
-      def stop_growing?(_y)
-        raise NotImplementedError, "#{__method__} has to be implemented in #{self.class}."
-      end
-      def put_leaf(_node, _y)
-        raise NotImplementedError, "#{__method__} has to be implemented in #{self.class}."
-      end
-      def rand_ids
-        @feature_ids.sample(@params[:max_features], random: @sub_rng)
-      end
-      def best_split(_features, _y, _impurity)
-        raise NotImplementedError, "#{__method__} has to be implemented in #{self.class}."
-      end
-      def impurity(_y)
-        raise NotImplementedError, "#{__method__} has to be implemented in #{self.class}."
-      end
-      def eval_importance(n_samples, n_features)
-        @feature_importances = Numo::DFloat.zeros(n_features)
-        eval_importance_at_node(@tree)
-        @feature_importances /= n_samples
-        normalizer = @feature_importances.sum
-        @feature_importances /= normalizer if normalizer > 0.0
-        nil
-      end
-      def eval_importance_at_node(node)
-        return nil if node.leaf
-        return nil if node.left.nil? || node.right.nil?
-        gain = node.n_samples * node.impurity -
-               node.left.n_samples * node.left.impurity -
-               node.right.n_samples * node.right.impurity
-        @feature_importances[node.feature_id] += gain
-        eval_importance_at_node(node.left)
-        eval_importance_at_node(node.right)
-      end
-    end
-  end
-end