RubyGems - rumale - Versions diffs - 0.9.1 → 0.9.2 - Mend

rumale 0.9.1 → 0.9.2

Files changed (20) hide show

checksums.yaml +4 -4
data/.rubocop.yml +2 -0
data/CHANGELOG.md +8 -0
data/README.md +1 -1
data/ext/rumale/rumale.c +74 -0
data/lib/rumale.rb +4 -0
data/lib/rumale/ensemble/ada_boost_classifier.rb +1 -1
data/lib/rumale/ensemble/ada_boost_regressor.rb +1 -1
data/lib/rumale/ensemble/gradient_boosting_classifier.rb +278 -0
data/lib/rumale/ensemble/gradient_boosting_regressor.rb +230 -0
data/lib/rumale/preprocessing/bin_discretizer.rb +108 -0
data/lib/rumale/preprocessing/l2_normalizer.rb +1 -1
data/lib/rumale/tree/base_decision_tree.rb +4 -5
data/lib/rumale/tree/extra_tree_classifier.rb +2 -2
data/lib/rumale/tree/extra_tree_regressor.rb +2 -2
data/lib/rumale/tree/gradient_tree_regressor.rb +228 -0
data/lib/rumale/tree/node.rb +1 -1
data/lib/rumale/version.rb +1 -1
data/rumale.gemspec +1 -1
metadata +7 -3

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 48089085f7a6249801c36408822454d4e0b293fb
-  data.tar.gz: c069743334925f090699ca30da72b35c8e70f5f2
+  metadata.gz: 1d73f16bcd1d149babe18c1db66d3f72bb9a1206
+  data.tar.gz: 247fd7d548563ef27622c293073236468f634b7d
 SHA512:
-  metadata.gz: d95950b1d358be77f93b6d4e0593355fd043a1abe712763b9613b57a87a83e627d41c978c1a236ce94c9b259bc533a03b471fe7630f862f94bd7aeea8c77377e
-  data.tar.gz: 307713e776a611ed05c0a21630c69de8abb12717f97a1c452bdba4bfe177dbe10c3b73dc20b64e236c42a1402875678cada4c736058555755586833ebb460c71
+  metadata.gz: 6a4a92d08ee0a8295d96a930a46fb67a9299a9e0beb717d52186347fef3b70727e35a2375e6c50f5da37ab699132fe0d3c3beeeb0a9730a158e3a5864f6b8364
+  data.tar.gz: a614c5002c750f9091a0b7b80b678115ea6b65a1a7d0de621431ee942f8f1678d36c64a271cdb1cc0c4a68c49d20bacfe934d844381ad78361c09e762e02e872

data/.rubocop.yml CHANGED

@@ -1,3 +1,5 @@
+require: rubocop-performance
 inherit_from: .rubocop_todo.yml
 AllCops:

data/CHANGELOG.md CHANGED

@@ -1,3 +1,11 @@
+# 0.9.2
+- Add class for Gradient tree boosting classifier.
+- Add class for Gradient tree boosting regressor.
+- Add class for discretizing feature values.
+- Refactor extra-trees estimators.
+- Refactor decision tree base class.
+- Fix some typos on document ([#6](https://github.com/yoshoku/rumale/pull/6)).
 # 0.9.1
 - Add class for Extra-Trees classifier.
 - Add class for Extra-Trees regressor.

data/README.md CHANGED

@@ -12,7 +12,7 @@ Rumale (**Ru**by **ma**chine **le**arning) is a machine learninig library in Rub
 Rumale provides machine learning algorithms with interfaces similar to Scikit-Learn in Python.
 Rumale supports Linear / Kernel Support Vector Machine,
 Logistic Regression, Linear Regression, Ridge, Lasso, Factorization Machine,
-Naive Bayes, Decision Tree, AdaBoost, Random Forest, Extra-Trees, K-nearest neighbor classifier,
+Naive Bayes, Decision Tree, AdaBoost, Gradient Tree Boosting, Random Forest, Extra-Trees, K-nearest neighbor classifier,
 K-Means, DBSCAN, Principal Component Analysis, and Non-negative Matrix Factorization.
 This project was formerly known as "SVMKit".

data/ext/rumale/rumale.c CHANGED

@@ -334,6 +334,72 @@ find_split_params_reg(VALUE self, VALUE criterion, VALUE whole_impurity, VALUE s
   return opt_params;
 }
+/**
+ * @!visibility private
+ * Find for split point with maximum information gain.
+ *
+ * @overload find_split_params(sorted_features, sorted_gradient, sorted_hessian, sum_gradient, sum_hessian) -> Array<Float>
+ *
+ * @param sorted_features [Array<Float>] (size: n_samples) The feature values sorted in ascending order.
+ * @param sorted_targets [Array<Float>] (size: n_samples) The target values sorted according to feature values.
+ * @param sorted_gradient [Array<Float>] (size: n_samples) The gradient values of loss function sorted according to feature values.
+ * @param sorted_hessian [Array<Float>] (size: n_samples) The hessian values of loss function sorted according to feature values.
+ * @param sum_gradient [Float] The sum of gradient values.
+ * @param sum_hessian [Float] The sum of hessian values.
+ * @param reg_lambda [Float] The L2 regularization term on weight.
+ * @return [Array<Float>] The array consists of optimal parameters including threshold and gain.
+ */
+static VALUE
+find_split_params_grad_reg
+(VALUE self, VALUE sorted_f, VALUE sorted_g, VALUE sorted_h, VALUE sum_g, VALUE sum_h, VALUE reg_l)
+{
+  const long n_elements = RARRAY_LEN(sorted_f);
+  const double s_grad = NUM2DBL(sum_g);
+  const double s_hess = NUM2DBL(sum_h);
+  const double reg_lambda = NUM2DBL(reg_l);
+  long curr_pos = 0;
+  long next_pos = 0;
+  double last_el = NUM2DBL(rb_ary_entry(sorted_f, n_elements - 1));
+  double curr_el = NUM2DBL(rb_ary_entry(sorted_f, 0));
+  double next_el;
+  double l_grad = 0.0;
+  double l_hess = 0.0;
+  double r_grad;
+  double r_hess;
+  double gain;
+  VALUE opt_params = rb_ary_new2(2);
+  /* Initialize optimal parameters. */
+  rb_ary_store(opt_params, 0, rb_ary_entry(sorted_f, 0)); /* threshold */
+  rb_ary_store(opt_params, 1, DBL2NUM(0));                /* gain */
+  /* Find optimal parameters. */
+  while (curr_pos < n_elements && curr_el != last_el) {
+    next_el = NUM2DBL(rb_ary_entry(sorted_f, next_pos));
+    while (next_pos < n_elements && next_el == curr_el) {
+      l_grad += NUM2DBL(rb_ary_entry(sorted_g, next_pos));
+      l_hess += NUM2DBL(rb_ary_entry(sorted_h, next_pos));
+      next_el = NUM2DBL(rb_ary_entry(sorted_f, ++next_pos));
+    }
+    /* Calculate gain of new split. */
+    r_grad = s_grad - l_grad;
+    r_hess = s_hess - l_hess;
+    gain = (l_grad * l_grad) / (l_hess + reg_lambda) +
+           (r_grad * r_grad) / (r_hess + reg_lambda) -
+           (s_grad * s_grad) / (s_hess + reg_lambda);
+    /* Update optimal parameters. */
+    if (gain > NUM2DBL(rb_ary_entry(opt_params, 1))) {
+      rb_ary_store(opt_params, 0, DBL2NUM(0.5 * (curr_el + next_el)));
+      rb_ary_store(opt_params, 1, DBL2NUM(gain));
+    }
+    if (next_pos == n_elements) break;
+    curr_pos = next_pos;
+    curr_el = NUM2DBL(rb_ary_entry(sorted_f, curr_pos));
+  }
+  return opt_params;
+}
 /**
  * @!visibility private
  * Calculate impurity based on criterion.
@@ -406,9 +472,17 @@ void Init_rumale(void)
    * This module is used internally.
    */
   VALUE mExtDTreeReg = rb_define_module_under(mTree, "ExtDecisionTreeRegressor");
+  /**
+   * Document-module: Rumale::Tree::ExtGradientTreeRegressor
+   * @!visibility private
+   * The mixin module consisting of extension method for GradientTreeRegressor class.
+   * This module is used internally.
+   */
+  VALUE mExtGTreeReg = rb_define_module_under(mTree, "ExtGradientTreeRegressor");
   rb_define_private_method(mExtDTreeCls, "find_split_params", find_split_params_cls, 5);
   rb_define_private_method(mExtDTreeReg, "find_split_params", find_split_params_reg, 4);
+  rb_define_private_method(mExtGTreeReg, "find_split_params", find_split_params_grad_reg, 6);
   rb_define_private_method(mExtDTreeCls, "node_impurity", node_impurity_cls, 3);
   rb_define_private_method(mExtDTreeReg, "node_impurity", node_impurity_reg, 2);
 }

data/lib/rumale.rb CHANGED

@@ -47,8 +47,11 @@ require 'rumale/tree/decision_tree_classifier'
 require 'rumale/tree/decision_tree_regressor'
 require 'rumale/tree/extra_tree_classifier'
 require 'rumale/tree/extra_tree_regressor'
+require 'rumale/tree/gradient_tree_regressor'
 require 'rumale/ensemble/ada_boost_classifier'
 require 'rumale/ensemble/ada_boost_regressor'
+require 'rumale/ensemble/gradient_boosting_classifier'
+require 'rumale/ensemble/gradient_boosting_regressor'
 require 'rumale/ensemble/random_forest_classifier'
 require 'rumale/ensemble/random_forest_regressor'
 require 'rumale/ensemble/extra_trees_classifier'
@@ -61,6 +64,7 @@ require 'rumale/preprocessing/l2_normalizer'
 require 'rumale/preprocessing/min_max_scaler'
 require 'rumale/preprocessing/max_abs_scaler'
 require 'rumale/preprocessing/standard_scaler'
+require 'rumale/preprocessing/bin_discretizer'
 require 'rumale/preprocessing/label_encoder'
 require 'rumale/preprocessing/one_hot_encoder'
 require 'rumale/model_selection/k_fold'

data/lib/rumale/ensemble/ada_boost_classifier.rb CHANGED

@@ -42,7 +42,7 @@ module Rumale
       # Create a new classifier with AdaBoost.
       #
-      # @param n_estimators [Integer] The numeber of decision trees for contructing random forest.
+      # @param n_estimators [Integer] The numeber of decision trees for contructing AdaBoost classifier.
       # @param criterion [String] The function to evalue spliting point. Supported criteria are 'gini' and 'entropy'.
       # @param max_depth [Integer] The maximum depth of the tree.
       #   If nil is given, decision tree grows without concern for depth.

data/lib/rumale/ensemble/ada_boost_regressor.rb CHANGED

@@ -42,7 +42,7 @@ module Rumale
       # Create a new regressor with random forest.
       #
-      # @param n_estimators [Integer] The numeber of decision trees for contructing random forest.
+      # @param n_estimators [Integer] The numeber of decision trees for contructing AdaBoost regressor.
       # @param threshold [Float] The threshold for delimiting correct and incorrect predictions. That is constrained to [0, 1]
       # @param exponent [Float] The exponent for the weight of each weak learner.
       # @param criterion [String] The function to evalue spliting point. Supported criteria are 'gini' and 'entropy'.

data/lib/rumale/ensemble/gradient_boosting_classifier.rb ADDED

@@ -0,0 +1,278 @@
+# frozen_string_literal: true
+require 'rumale/values'
+require 'rumale/base/base_estimator'
+require 'rumale/base/classifier'
+require 'rumale/tree/gradient_tree_regressor'
+module Rumale
+  module Ensemble
+    # GradientBoostingClassifier is a class that implements gradient tree boosting for classification.
+    # The class use negative binomial log-likelihood for the loss function.
+    # For multiclass classification problem, it uses one-vs-the-rest strategy.
+    #
+    # @example
+    #   estimator =
+    #     Rumale::Ensemble::GradientBoostingClassifier.new(
+    #       n_estimators: 100, learning_rate: 0.3, reg_lambda: 0.001, random_seed: 1)
+    #   estimator.fit(training_samples, traininig_values)
+    #   results = estimator.predict(testing_samples)
+    #
+    # *reference*
+    # - J H. Friedman, "Greedy Function Approximation: A Gradient Boosting Machine," Annals of Statistics, 29 (5), pp. 1189--1232, 2001.
+    # - J H. Friedman, "Stochastic Gradient Boosting," Computational Statistics and Data Analysis, 38 (4), pp. 367--378, 2002.
+    # - T. Chen and C. Guestrin, "XGBoost: A Scalable Tree Boosting System,"  Proc. KDD'16, pp. 785--794, 2016.
+    #
+    class GradientBoostingClassifier
+      include Base::BaseEstimator
+      include Base::Classifier
+      # Return the set of estimators.
+      # @return [Array<GradientTreeRegressor>] or [Array<Array<GradientTreeRegressor>>]
+      attr_reader :estimators
+      # Return the class labels.
+      # @return [Numo::Int32] (size: n_classes)
+      attr_reader :classes
+      # Return the importance for each feature.
+      # The feature importances are calculated based on the numbers of times the feature is used for splitting.
+      # @return [Numo::DFloat] (size: n_features)
+      attr_reader :feature_importances
+      # Return the random generator for random selection of feature index.
+      # @return [Random]
+      attr_reader :rng
+      # Create a new classifier with gradient tree boosting.
+      #
+      # @param n_estimators [Integer] The numeber of trees for contructing classifier.
+      # @param learning_rate [Float] The boosting learining rate
+      # @param reg_lambda [Float] The L2 regularization term on weight.
+      # @param max_depth [Integer] The maximum depth of the tree.
+      #   If nil is given, decision tree grows without concern for depth.
+      # @param max_leaf_nodes [Integer] The maximum number of leaves on decision tree.
+      #   If nil is given, number of leaves is not limited.
+      # @param min_samples_leaf [Integer] The minimum number of samples at a leaf node.
+      # @param max_features [Integer] The number of features to consider when searching optimal split point.
+      #   If nil is given, split process considers all features.
+      # @param random_seed [Integer] The seed value using to initialize the random generator.
+      #   It is used to randomly determine the order of features when deciding spliting point.
+      def initialize(n_estimators: 100, learning_rate: 0.1, reg_lambda: 0.0, subsample: 1.0,
+                     max_depth: nil, max_leaf_nodes: nil, min_samples_leaf: 1,
+                     max_features: nil, random_seed: nil)
+        check_params_type_or_nil(Integer, max_depth: max_depth, max_leaf_nodes: max_leaf_nodes,
+                                          max_features: max_features, random_seed: random_seed)
+        check_params_integer(n_estimators: n_estimators, min_samples_leaf: min_samples_leaf)
+        check_params_float(learning_rate: learning_rate, reg_lambda: reg_lambda, subsample: subsample)
+        check_params_positive(n_estimators: n_estimators,
+                              learning_rate: learning_rate, reg_lambda: reg_lambda, subsample: subsample,
+                              max_depth: max_depth, max_leaf_nodes: max_leaf_nodes, min_samples_leaf: min_samples_leaf,
+                              max_features: max_features)
+        @params = {}
+        @params[:n_estimators] = n_estimators
+        @params[:learning_rate] = learning_rate
+        @params[:reg_lambda] = reg_lambda
+        @params[:subsample] = subsample
+        @params[:max_depth] = max_depth
+        @params[:max_leaf_nodes] = max_leaf_nodes
+        @params[:min_samples_leaf] = min_samples_leaf
+        @params[:max_features] = max_features
+        @params[:random_seed] = random_seed
+        @params[:random_seed] ||= srand
+        @estimators = nil
+        @classes = nil
+        @base_predictions = nil
+        @feature_importances = nil
+        @rng = Random.new(@params[:random_seed])
+      end
+      # Fit the model with given training data.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
+      # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
+      # @return [GradientBoostingClassifier] The learned classifier itself.
+      def fit(x, y)
+        check_sample_array(x)
+        check_label_array(y)
+        check_sample_label_size(x, y)
+        n_features = x.shape[1]
+        @params[:max_features] = n_features if @params[:max_features].nil?
+        @params[:max_features] = [[1, @params[:max_features]].max, n_features].min
+        # train estimator.
+        @classes = Numo::Int32[*y.to_a.uniq.sort]
+        n_classes = @classes.size
+        if n_classes > 2
+          @base_predictions = Numo::DFloat.zeros(n_classes)
+          @estimators = Array.new(n_classes) do |n|
+            bin_y = Numo::DFloat.cast(y.eq(@classes[n])) * 2 - 1
+            y_mean = bin_y.mean
+            @base_predictions[n] = 0.5 * Numo::NMath.log((1.0 + y_mean) / (1.0 - y_mean))
+            partial_fit(x, bin_y, @base_predictions[n])
+          end
+        else
+          negative_label = y.to_a.uniq.min
+          bin_y = Numo::DFloat.cast(y.ne(negative_label)) * 2 - 1
+          y_mean = bin_y.mean
+          @base_predictions = 0.5 * Numo::NMath.log((1.0 + y_mean) / (1.0 - y_mean))
+          @estimators = partial_fit(x, bin_y, @base_predictions)
+        end
+        # calculate feature importances.
+        @feature_importances = Numo::DFloat.zeros(n_features)
+        if n_classes > 2
+          n_classes.times do |n|
+            @estimators[n].each { |tree| @feature_importances += tree.feature_importances }
+          end
+        else
+          @estimators.each { |tree| @feature_importances += tree.feature_importances }
+        end
+        self
+      end
+      # Calculate confidence scores for samples.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
+      # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence score per sample.
+      def decision_function(x)
+        check_sample_array(x)
+        n_samples = x.shape[0]
+        n_classes = @classes.size
+        if n_classes > 2
+          scores = Numo::DFloat.ones(n_samples, n_classes) * @base_predictions
+          n_classes.times do |n|
+            @estimators[n].each { |tree| scores[true, n] += tree.predict(x) }
+          end
+        else
+          scores = Numo::DFloat.ones(n_samples) * @base_predictions
+          @estimators.each { |tree| scores += tree.predict(x) }
+        end
+        scores
+      end
+      # Predict class labels for samples.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
+      # @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
+      def predict(x)
+        check_sample_array(x)
+        n_samples = x.shape[0]
+        probs = predict_proba(x)
+        Numo::Int32.asarray(Array.new(n_samples) { |n| @classes[probs[n, true].max_index] })
+      end
+      # Predict probability for samples.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
+      # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
+      def predict_proba(x)
+        check_sample_array(x)
+        proba = 1.0 / (Numo::NMath.exp(-decision_function(x)) + 1.0)
+        return (proba.transpose / proba.sum(axis: 1)).transpose if @classes.size > 2
+        n_samples, = x.shape
+        probs = Numo::DFloat.zeros(n_samples, 2)
+        probs[true, 1] = proba
+        probs[true, 0] = 1.0 - proba
+        probs
+      end
+      # Return the index of the leaf that each sample reached.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
+      # @return [Numo::Int32] (shape: [n_samples, n_estimators, n_classes]) Leaf index for sample.
+      def apply(x)
+        check_sample_array(x)
+        n_classes = @classes.size
+        leaf_ids = if n_classes > 2
+                     Array.new(n_classes) { |n| @estimators[n].map { |tree| tree.apply(x) } }
+                   else
+                     @estimators.map { |tree| tree.apply(x) }
+                   end
+        Numo::Int32[*leaf_ids].transpose
+      end
+      # Dump marshal data.
+      # @return [Hash] The marshal data about GradientBoostingClassifier.
+      def marshal_dump
+        { params: @params,
+          estimators: @estimators,
+          classes: @classes,
+          base_predictions: @base_predictions,
+          feature_importances: @feature_importances,
+          rng: @rng }
+      end
+      # Load marshal data.
+      # @return [nil]
+      def marshal_load(obj)
+        @params = obj[:params]
+        @estimators = obj[:estimators]
+        @classes = obj[:classes]
+        @base_predictions = obj[:base_predictions]
+        @feature_importances = obj[:feature_importances]
+        @rng = obj[:rng]
+        nil
+      end
+      private
+      def partial_fit(x, y, init_pred)
+        # initialize some variables.
+        estimators = []
+        n_samples = x.shape[0]
+        n_sub_samples = [n_samples, [(n_samples * @params[:subsample]).to_i, 1].max].min
+        whole_ids = Array.new(n_samples) { |v| v }
+        y_pred = Numo::DFloat.ones(n_samples) * init_pred
+        # grow trees.
+        @params[:n_estimators].times do |_t|
+          # subsampling
+          ids = whole_ids.sample(n_sub_samples, random: @rng)
+          x_sub = x[ids, true]
+          y_sub = y[ids]
+          y_pred_sub = y_pred[ids]
+          # train tree
+          g = gradient(y_sub, y_pred_sub)
+          h = hessian(y_sub, y_pred_sub)
+          tree = plant_tree
+          tree.fit(x_sub, y_sub, g, h)
+          estimators.push(tree)
+          # update
+          y_pred += tree.predict(x)
+        end
+        estimators
+      end
+      # for debug
+      #
+      # def loss(y_true, y_pred)
+      #   # y_true in {-1, 1}
+      #   Numo::NMath.log(1.0 + Numo::NMath.exp(-2.0 * y_true * y_pred)).mean
+      # end
+      def gradient(y_true, y_pred)
+        # y in {-1, 1}
+        -2.0 * y_true / (1.0 + Numo::NMath.exp(2.0 * y_true * y_pred))
+      end
+      def hessian(y_true, y_pred)
+        abs_response = gradient(y_true, y_pred).abs
+        abs_response * (2.0 - abs_response)
+      end
+      def plant_tree
+        Rumale::Tree::GradientTreeRegressor.new(
+          reg_lambda: @params[:reg_lambda], shrinkage_rate: @params[:learning_rate],
+          max_depth: @params[:max_depth],
+          max_leaf_nodes: @params[:max_leaf_nodes], min_samples_leaf: @params[:min_samples_leaf],
+          max_features: @params[:max_features], random_seed: @rng.rand(Rumale::Values.int_max)
+        )
+      end
+    end
+  end
+end

data/lib/rumale/ensemble/gradient_boosting_regressor.rb ADDED

@@ -0,0 +1,230 @@
+# frozen_string_literal: true
+require 'rumale/values'
+require 'rumale/base/base_estimator'
+require 'rumale/base/regressor'
+require 'rumale/tree/gradient_tree_regressor'
+module Rumale
+  module Ensemble
+    # GradientBoostingRegressor is a class that implements gradient tree boosting for regression.
+    # The class use L2 loss for the loss function.
+    #
+    # @example
+    #   estimator =
+    #     Rumale::Ensemble::GradientBoostingRegressor.new(
+    #       n_estimators: 100, learning_rate: 0.3, reg_lambda: 0.001, random_seed: 1)
+    #   estimator.fit(training_samples, traininig_values)
+    #   results = estimator.predict(testing_samples)
+    #
+    # *reference*
+    # - J H. Friedman, "Greedy Function Approximation: A Gradient Boosting Machine," Annals of Statistics, 29 (5), pp. 1189--1232, 2001.
+    # - J H. Friedman, "Stochastic Gradient Boosting," Computational Statistics and Data Analysis, 38 (4), pp. 367--378, 2002.
+    # - T. Chen and C. Guestrin, "XGBoost: A Scalable Tree Boosting System,"  Proc. KDD'16, pp. 785--794, 2016.
+    #
+    class GradientBoostingRegressor
+      include Base::BaseEstimator
+      include Base::Regressor
+      # Return the set of estimators.
+      # @return [Array<GradientTreeRegressor>] or [Array<Array<GradientTreeRegressor>>]
+      attr_reader :estimators
+      # Return the importance for each feature.
+      # The feature importances are calculated based on the numbers of times the feature is used for splitting.
+      # @return [Numo::DFloat] (size: n_features)
+      attr_reader :feature_importances
+      # Return the random generator for random selection of feature index.
+      # @return [Random]
+      attr_reader :rng
+      # Create a new regressor with gradient tree boosting.
+      #
+      # @param n_estimators [Integer] The numeber of trees for contructing regressor.
+      # @param learning_rate [Float] The boosting learining rate
+      # @param reg_lambda [Float] The L2 regularization term on weight.
+      # @param max_depth [Integer] The maximum depth of the tree.
+      #   If nil is given, decision tree grows without concern for depth.
+      # @param max_leaf_nodes [Integer] The maximum number of leaves on decision tree.
+      #   If nil is given, number of leaves is not limited.
+      # @param min_samples_leaf [Integer] The minimum number of samples at a leaf node.
+      # @param max_features [Integer] The number of features to consider when searching optimal split point.
+      #   If nil is given, split process considers all features.
+      # @param random_seed [Integer] The seed value using to initialize the random generator.
+      #   It is used to randomly determine the order of features when deciding spliting point.
+      def initialize(n_estimators: 100, learning_rate: 0.1, reg_lambda: 0.0, subsample: 1.0,
+                     max_depth: nil, max_leaf_nodes: nil, min_samples_leaf: 1,
+                     max_features: nil, random_seed: nil)
+        check_params_type_or_nil(Integer, max_depth: max_depth, max_leaf_nodes: max_leaf_nodes,
+                                          max_features: max_features, random_seed: random_seed)
+        check_params_integer(n_estimators: n_estimators, min_samples_leaf: min_samples_leaf)
+        check_params_float(learning_rate: learning_rate, reg_lambda: reg_lambda, subsample: subsample)
+        check_params_positive(n_estimators: n_estimators,
+                              learning_rate: learning_rate, reg_lambda: reg_lambda, subsample: subsample,
+                              max_depth: max_depth, max_leaf_nodes: max_leaf_nodes, min_samples_leaf: min_samples_leaf,
+                              max_features: max_features)
+        @params = {}
+        @params[:n_estimators] = n_estimators
+        @params[:learning_rate] = learning_rate
+        @params[:reg_lambda] = reg_lambda
+        @params[:subsample] = subsample
+        @params[:max_depth] = max_depth
+        @params[:max_leaf_nodes] = max_leaf_nodes
+        @params[:min_samples_leaf] = min_samples_leaf
+        @params[:max_features] = max_features
+        @params[:random_seed] = random_seed
+        @params[:random_seed] ||= srand
+        @estimators = nil
+        @base_predictions = nil
+        @feature_importances = nil
+        @rng = Random.new(@params[:random_seed])
+      end
+      # Fit the model with given training data.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
+      # @param y [Numo::DFloat] (shape: [n_samples]) The target values to be used for fitting the model.
+      # @return [GradientBoostingRegressor] The learned regressor itself.
+      def fit(x, y)
+        check_sample_array(x)
+        check_tvalue_array(y)
+        check_sample_tvalue_size(x, y)
+        n_features = x.shape[1]
+        @params[:max_features] = n_features if @params[:max_features].nil?
+        @params[:max_features] = [[1, @params[:max_features]].max, n_features].min
+        # train regressor.
+        n_outputs = y.shape[1].nil? ? 1 : y.shape[1]
+        @base_predictions = n_outputs > 1 ? y.mean(0) : y.mean
+        @estimators = if n_outputs > 1
+                        Array.new(n_outputs) do |n|
+                          partial_fit(x, y[true, n], @base_predictions[n])
+                        end
+                      else
+                        partial_fit(x, y, @base_predictions)
+                      end
+        # calculate feature importances.
+        @feature_importances = Numo::DFloat.zeros(n_features)
+        if n_outputs > 1
+          n_outputs.times do |n|
+            @estimators[n].each { |tree| @feature_importances += tree.feature_importances }
+          end
+        else
+          @estimators.each { |tree| @feature_importances += tree.feature_importances }
+        end
+        self
+      end
+      # Predict values for samples.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
+      # @return [Numo::DFloat] (shape: [n_samples]) Predicted values per sample.
+      def predict(x)
+        check_sample_array(x)
+        n_samples = x.shape[0]
+        n_outputs = @estimators.first.is_a?(Array) ? @estimators.size : 1
+        if n_outputs > 1
+          predicted = Numo::DFloat.ones(n_samples, n_outputs) * @base_predictions
+          n_outputs.times do |n|
+            @estimators[n].each { |tree| predicted[true, n] += tree.predict(x) }
+          end
+        else
+          predicted = Numo::DFloat.ones(n_samples) * @base_predictions
+          @estimators.each { |tree| predicted += tree.predict(x) }
+        end
+        predicted
+      end
+      # Return the index of the leaf that each sample reached.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
+      # @return [Numo::Int32] (shape: [n_samples, n_estimators]) Leaf index for sample.
+      def apply(x)
+        check_sample_array(x)
+        n_outputs = @estimators.first.is_a?(Array) ? @estimators.size : 1
+        leaf_ids = if n_outputs > 1
+                     Array.new(n_outputs) { |n| @estimators[n].map { |tree| tree.apply(x) } }
+                   else
+                     @estimators.map { |tree| tree.apply(x) }
+                   end
+        Numo::Int32[*leaf_ids].transpose
+      end
+      # Dump marshal data.
+      # @return [Hash] The marshal data about GradientBoostingRegressor.
+      def marshal_dump
+        { params: @params,
+          estimators: @estimators,
+          base_predictions: @base_predictions,
+          feature_importances: @feature_importances,
+          rng: @rng }
+      end
+      # Load marshal data.
+      # @return [nil]
+      def marshal_load(obj)
+        @params = obj[:params]
+        @estimators = obj[:estimators]
+        @base_predictions = obj[:base_predictions]
+        @feature_importances = obj[:feature_importances]
+        @rng = obj[:rng]
+        nil
+      end
+      private
+      def partial_fit(x, y, init_pred)
+        # initialize some variables.
+        estimators = []
+        n_samples = x.shape[0]
+        n_sub_samples = [n_samples, [(n_samples * @params[:subsample]).to_i, 1].max].min
+        whole_ids = Array.new(n_samples) { |v| v }
+        y_pred = Numo::DFloat.ones(n_samples) * init_pred
+        # grow trees.
+        @params[:n_estimators].times do |_t|
+          # subsampling
+          ids = whole_ids.sample(n_sub_samples, random: @rng)
+          x_sub = x[ids, true]
+          y_sub = y[ids]
+          y_pred_sub = y_pred[ids]
+          # train tree
+          g = gradient(y_sub, y_pred_sub)
+          h = hessian(n_sub_samples)
+          tree = plant_tree
+          tree.fit(x_sub, y_sub, g, h)
+          estimators.push(tree)
+          # update
+          y_pred += tree.predict(x)
+        end
+        estimators
+      end
+      # for debug
+      #
+      # def loss(y_true, y_pred)
+      #   ((y_true - y_pred)**2).mean
+      # end
+      def gradient(y_true, y_pred)
+        y_pred - y_true
+      end
+      def hessian(n_samples)
+        Numo::DFloat.ones(n_samples)
+      end
+      def plant_tree
+        Rumale::Tree::GradientTreeRegressor.new(
+          reg_lambda: @params[:reg_lambda], shrinkage_rate: @params[:learning_rate],
+          max_depth: @params[:max_depth],
+          max_leaf_nodes: @params[:max_leaf_nodes], min_samples_leaf: @params[:min_samples_leaf],
+          max_features: @params[:max_features], random_seed: @rng.rand(Rumale::Values.int_max)
+        )
+      end
+    end
+  end
+end

data/lib/rumale/preprocessing/bin_discretizer.rb ADDED

@@ -0,0 +1,108 @@
+# frozen_string_literal: true
+require 'rumale/base/base_estimator'
+require 'rumale/base/transformer'
+module Rumale
+  module Preprocessing
+    # Discretizes features with a given number of bins.
+    # In some cases, discretizing features may accelerate decision tree training.
+    #
+    # @example
+    #   discretizer = Rumale::Preprocessing::BinDiscretizer.new(n_bins: 4)
+    #   samples = Numo::DFloat.new(5, 2).rand - 0.5
+    #   transformed = discretizer.fit_transform(samples)
+    #   # > pp samples
+    #   # Numo::DFloat#shape=[5,2]
+    #   # [[-0.438246, -0.126933],
+    #   #  [ 0.294815, -0.298958],
+    #   #  [-0.383959, -0.155968],
+    #   #  [ 0.039948,  0.237815],
+    #   #  [-0.334911, -0.449117]]
+    #   # > pp transformed
+    #   # Numo::DFloat#shape=[5,2]
+    #   # [[0, 1],
+    #   #  [3, 0],
+    #   #  [0, 1],
+    #   #  [2, 3],
+    #   #  [0, 0]]
+    class BinDiscretizer
+      include Base::BaseEstimator
+      include Base::Transformer
+      # Return the feature steps to be used discretizing.
+      # @return [Array<Numo::DFloat>] (shape: [n_features, n_bins])
+      attr_reader :feature_steps
+      # Create a new discretizer for features with given number of bins.
+      #
+      # @param n_bins [Integer] The number of bins to be used disretizing feature values.
+      def initialize(n_bins: 32)
+        @params = {}
+        @params[:n_bins] = n_bins
+        @feature_steps = nil
+      end
+      # Fit feature ranges to be discretized.
+      #
+      # @overload fit(x) -> BinDiscretizer
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate the feature ranges.
+      # @return [BinDiscretizer]
+      def fit(x, _y = nil)
+        check_sample_array(x)
+        n_features = x.shape[1]
+        max_vals = x.max(0)
+        min_vals = x.min(0)
+        @feature_steps = Array.new(n_features) do |n|
+          Numo::DFloat.linspace(min_vals[n], max_vals[n], @params[:n_bins] + 1)[0...@params[:n_bins]]
+        end
+        self
+      end
+      # Fit feature ranges to be discretized, then return discretized samples.
+      #
+      # @overload fit_transform(x) -> Numo::DFloat
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be discretized.
+      # @return [Numo::DFloat] The discretized samples.
+      def fit_transform(x, _y = nil)
+        check_sample_array(x)
+        fit(x).transform(x)
+      end
+      # Peform discretizing the given samples.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be discretized.
+      # @return [Numo::DFloat] The discretized samples.
+      def transform(x)
+        check_sample_array(x)
+        n_samples, n_features = x.shape
+        transformed = Numo::DFloat.zeros(n_samples, n_features)
+        n_features.times do |n|
+          steps = @feature_steps[n]
+          @params[:n_bins].times do |bin|
+            mask = x[true, n].ge(steps[bin]).where
+            transformed[mask, n] = bin
+          end
+        end
+        transformed
+      end
+      # Dump marshal data.
+      # @return [Hash] The marshal data about BinDiscretizer
+      def marshal_dump
+        { params: @params,
+          feature_steps: @feature_steps }
+      end
+      # Load marshal data.
+      # @return [nil]
+      def marshal_load(obj)
+        @params = obj[:params]
+        @feature_steps = obj[:feature_steps]
+        nil
+      end
+    end
+  end
+end

data/lib/rumale/preprocessing/l2_normalizer.rb CHANGED

@@ -9,7 +9,7 @@ module Rumale
     # Normalize samples to unit L2-norm.
     #
     # @example
-    #   normalizer = Rumale::Preprocessing::StandardScaler.new
+    #   normalizer = Rumale::Preprocessing::L2Normalizer.new
     #   new_samples = normalizer.fit_transform(samples)
     class L2Normalizer
       include Base::BaseEstimator

data/lib/rumale/tree/base_decision_tree.rb CHANGED

@@ -86,14 +86,13 @@ module Rumale
         return put_leaf(node, y) if stop_growing?(y)
         # calculate optimal parameters.
-        feature_id, left_ids, right_ids, left_imp, right_imp, threshold, gain = rand_ids(n_features).map do |fid|
-          ft = x[true, fid]
-          limp, rimp, th, ga = best_split(ft, y, whole_impurity)
-          [fid, ft.le(th).where, ft.gt(th).where, limp, rimp, th, ga]
-        end.max_by(&:last)
+        feature_id, left_imp, right_imp, threshold, gain =
+          rand_ids(n_features).map { |n| [n, *best_split(x[true, n], y, whole_impurity)] }.max_by(&:last)
         return put_leaf(node, y) if gain.nil? || gain.zero?
+        left_ids = x[true, feature_id].le(threshold).where
+        right_ids = x[true, feature_id].gt(threshold).where
         node.left = grow_node(depth + 1, x[left_ids, true], y[left_ids, true], left_imp)
         node.right = grow_node(depth + 1, x[right_ids, true], y[right_ids, true], right_imp)

data/lib/rumale/tree/extra_tree_classifier.rb CHANGED

@@ -107,8 +107,8 @@ module Rumale
         threshold = @rng.rand(features.min..features.max)
         l_ids = features.le(threshold).where
         r_ids = features.gt(threshold).where
-        l_impurity = l_ids.size > 0 ? impurity(y[l_ids, true]) : 0.0
-        r_impurity = r_ids.size > 0 ? impurity(y[r_ids, true]) : 0.0
+        l_impurity = l_ids.empty? ? 0.0 : impurity(y[l_ids, true])
+        r_impurity = r_ids.empty? ? 0.0 : impurity(y[r_ids, true])
         gain = whole_impurity -
                l_impurity * l_ids.size.fdiv(y.shape[0]) -
                r_impurity * r_ids.size.fdiv(y.shape[0])

data/lib/rumale/tree/extra_tree_regressor.rb CHANGED

@@ -94,8 +94,8 @@ module Rumale
         threshold = @rng.rand(features.min..features.max)
         l_ids = features.le(threshold).where
         r_ids = features.gt(threshold).where
-        l_impurity = l_ids.size > 0 ? impurity(y[l_ids, true]) : 0.0
-        r_impurity = r_ids.size > 0 ? impurity(y[r_ids, true]) : 0.0
+        l_impurity = l_ids.empty? ? 0.0 : impurity(y[l_ids, true])
+        r_impurity = r_ids.empty? ? 0.0 : impurity(y[r_ids, true])
         gain = whole_impurity -
                l_impurity * l_ids.size.fdiv(y.shape[0]) -
                r_impurity * r_ids.size.fdiv(y.shape[0])

data/lib/rumale/tree/gradient_tree_regressor.rb ADDED

@@ -0,0 +1,228 @@
+# frozen_string_literal: true
+require 'rumale/rumale'
+require 'rumale/base/base_estimator'
+require 'rumale/base/regressor'
+require 'rumale/tree/node'
+module Rumale
+  module Tree
+    # GradientTreeRegressor is a class that implements decision tree for regression with exact gredy algorithm.
+    # This class is used internally for estimators with gradient tree boosting.
+    #
+    # *reference*
+    # - J H. Friedman, "Greedy Function Approximation: A Gradient Boosting Machine," Annals of Statistics, 29 (5), pp. 1189--1232, 2001.
+    # - J H. Friedman, "Stochastic Gradient Boosting," Computational Statistics and Data Analysis, 38 (4), pp. 367--378, 2002.
+    # - T. Chen and C. Guestrin, "XGBoost: A Scalable Tree Boosting System,"  Proc. KDD'16, pp. 785--794, 2016.
+    #
+    class GradientTreeRegressor
+      include Base::BaseEstimator
+      include Base::Regressor
+      include ExtGradientTreeRegressor
+      # Return the importance for each feature.
+      # The feature importances are calculated based on the numbers of times the feature is used for splitting.
+      # @return [Numo::DFloat] (shape: [n_features])
+      attr_reader :feature_importances
+      # Return the learned tree.
+      # @return [Node]
+      attr_reader :tree
+      # Return the random generator for random selection of feature index.
+      # @return [Random]
+      attr_reader :rng
+      # Return the values assigned each leaf.
+      # @return [Numo::DFloat] (shape: [n_leaves])
+      attr_reader :leaf_weights
+      # Initialize a gradient tree regressor
+      #
+      # @param reg_lambda [Float] The L2 regularization term on weight.
+      # @param shrinkage_rate [Float] The shrinkage rate for weight.
+      # @param max_depth [Integer] The maximum depth of the tree.
+      #   If nil is given, decision tree grows without concern for depth.
+      # @param max_leaf_nodes [Integer] The maximum number of leaves on decision tree.
+      #   If nil is given, number of leaves is not limited.
+      # @param min_samples_leaf [Integer] The minimum number of samples at a leaf node.
+      # @param max_features [Integer] The number of features to consider when searching optimal split point.
+      #   If nil is given, split process considers all features.
+      # @param random_seed [Integer] The seed value using to initialize the random generator.
+      #   It is used to randomly determine the order of features when deciding spliting point.
+      def initialize(reg_lambda: 0.0, shrinkage_rate: 1.0,
+                     max_depth: nil, max_leaf_nodes: nil, min_samples_leaf: 1, max_features: nil, random_seed: nil)
+        check_params_type_or_nil(Integer, max_depth: max_depth, max_leaf_nodes: max_leaf_nodes,
+                                          max_features: max_features, random_seed: random_seed)
+        check_params_float(reg_lambda: reg_lambda, shrinkage_rate: shrinkage_rate)
+        check_params_integer(min_samples_leaf: min_samples_leaf)
+        check_params_positive(reg_lambda: reg_lambda, shrinkage_rate: shrinkage_rate,
+                              max_depth: max_depth, max_leaf_nodes: max_leaf_nodes,
+                              min_samples_leaf: min_samples_leaf, max_features: max_features)
+        @params = {}
+        @params[:reg_lambda] = reg_lambda
+        @params[:shrinkage_rate] = shrinkage_rate
+        @params[:max_depth] = max_depth
+        @params[:max_leaf_nodes] = max_leaf_nodes
+        @params[:min_samples_leaf] = min_samples_leaf
+        @params[:max_features] = max_features
+        @params[:random_seed] = random_seed
+        @params[:random_seed] ||= srand
+        @tree = nil
+        @feature_importances = nil
+        @n_leaves = nil
+        @leaf_weights = nil
+        @rng = Random.new(@params[:random_seed])
+      end
+      # Fit the model with given training data.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
+      # @param y [Numo::DFloat] (shape: [n_samples]) The taget values to be used for fitting the model.
+      # @param g [Numo::DFloat] (shape: [n_samples]) The gradient of loss function.
+      # @param h [Numo::DFloat] (shape: [n_samples]) The hessian of loss function.
+      # @return [GradientTreeRegressor] The learned regressor itself.
+      def fit(x, y, g, h)
+        check_sample_array(x)
+        check_tvalue_array(y)
+        check_sample_tvalue_size(x, y)
+        check_params_type(Numo::DFloat, g: g, h: g)
+        # Initialize some variables.
+        n_features = x.shape[1]
+        @params[:max_features] ||= n_features
+        @n_leaves = 0
+        @leaf_weights = []
+        @feature_importances = Numo::DFloat.zeros(n_features)
+        # Build tree.
+        build_tree(x, y, g, h)
+        @leaf_weights = Numo::DFloat[*@leaf_weights]
+        self
+      end
+      # Predict values for samples.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
+      # @return [Numo::DFloat] (size: n_samples) Predicted values per sample.
+      def predict(x)
+        check_sample_array(x)
+        @leaf_weights[apply(x)].dup
+      end
+      # Return the index of the leaf that each sample reached.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
+      # @return [Numo::Int32] (shape: [n_samples]) Leaf index for sample.
+      def apply(x)
+        check_sample_array(x)
+        Numo::Int32[*(Array.new(x.shape[0]) { |n| apply_at_node(@tree, x[n, true]) })]
+      end
+      # Dump marshal data.
+      # @return [Hash] The marshal data about DecisionTreeRegressor
+      def marshal_dump
+        { params: @params,
+          tree: @tree,
+          feature_importances: @feature_importances,
+          leaf_weights: @leaf_weights,
+          rng: @rng }
+      end
+      # Load marshal data.
+      # @return [nil]
+      def marshal_load(obj)
+        @params = obj[:params]
+        @tree = obj[:tree]
+        @feature_importances = obj[:feature_importances]
+        @leaf_weights = obj[:leaf_weights]
+        @rng = obj[:rng]
+        nil
+      end
+      private
+      def apply_at_node(node, sample)
+        return node.leaf_id if node.leaf
+        return apply_at_node(node.left, sample) if node.right.nil?
+        return apply_at_node(node.right, sample) if node.left.nil?
+        if sample[node.feature_id] <= node.threshold
+          apply_at_node(node.left, sample)
+        else
+          apply_at_node(node.right, sample)
+        end
+      end
+      def build_tree(x, y, g, h)
+        @tree = grow_node(0, x, y, g, h)
+        nil
+      end
+      def grow_node(depth, x, y, g, h)
+        # intialize some variables.
+        sum_g = g.sum
+        sum_h = h.sum
+        n_samples, n_features = x.shape
+        node = Node.new(depth: depth, n_samples: n_samples)
+        # terminate growing.
+        unless @params[:max_leaf_nodes].nil?
+          return nil if @n_leaves >= @params[:max_leaf_nodes]
+        end
+        return nil if n_samples < @params[:min_samples_leaf]
+        return put_leaf(node, sum_g, sum_h) if n_samples == @params[:min_samples_leaf]
+        unless @params[:max_depth].nil?
+          return put_leaf(node, sum_g, sum_h) if depth == @params[:max_depth]
+        end
+        return put_leaf(node, sum_g, sum_h) if stop_growing?(y)
+        # calculate optimal parameters.
+        feature_id, threshold, gain =
+          rand_ids(n_features).map { |n| [n, *best_split(x[true, n], g, h, sum_g, sum_h)] }.max_by(&:last)
+        return put_leaf(node, sum_g, sum_h) if gain.nil? || gain.zero?
+        left_ids = x[true, feature_id].le(threshold).where
+        right_ids = x[true, feature_id].gt(threshold).where
+        node.left = grow_node(depth + 1, x[left_ids, true], y[left_ids], g[left_ids], h[left_ids])
+        node.right = grow_node(depth + 1, x[right_ids, true], y[right_ids], g[right_ids], h[right_ids])
+        return put_leaf(node, sum_g, sum_h) if node.left.nil? && node.right.nil?
+        @feature_importances[feature_id] += 1.0
+        node.feature_id = feature_id
+        node.threshold = threshold
+        node.leaf = false
+        node
+      end
+      def stop_growing?(y)
+        y.to_a.uniq.size == 1
+      end
+      def put_leaf(node, sum_g, sum_h)
+        node.probs = nil
+        node.leaf = true
+        node.leaf_id = @n_leaves
+        weight = -@params[:shrinkage_rate] * sum_g / (sum_h + @params[:reg_lambda])
+        @leaf_weights.push(weight)
+        @n_leaves += 1
+        node
+      end
+      def best_split(features, g, h, sum_g, sum_h)
+        order = features.sort_index
+        sorted_f = features[order].to_a
+        sorted_g = g[order].to_a
+        sorted_h = h[order].to_a
+        find_split_params(sorted_f, sorted_g, sorted_h, sum_g, sum_h, @params[:reg_lambda])
+      end
+      def rand_ids(n)
+        [*0...n].sample(@params[:max_features], random: @rng)
+      end
+    end
+  end
+end

data/lib/rumale/tree/node.rb CHANGED

@@ -21,7 +21,7 @@ module Rumale
       # @param feature_id [Integer] The feature index used for evaluation.
       # @param threshold [Float] The threshold value of the feature for splitting the node.
       def initialize(depth: 0, impurity: 0.0, n_samples: 0, probs: 0.0,
-                     leaf: true, leaf_id: 0,
+                     leaf: false, leaf_id: nil,
                      left: nil, right: nil, feature_id: 0, threshold: 0.0)
         @depth = depth
         @impurity = impurity

data/lib/rumale/version.rb CHANGED

@@ -3,5 +3,5 @@
 # Rumale is a machine learning library in Ruby.
 module Rumale
   # The version of Rumale you are using.
-  VERSION = '0.9.1'
+  VERSION = '0.9.2'
 end

data/rumale.gemspec CHANGED

@@ -17,7 +17,7 @@ Rumale is a machine learninig library in Ruby.
 Rumale provides machine learning algorithms with interfaces similar to Scikit-Learn in Python.
 Rumale currently supports Linear / Kernel Support Vector Machine,
 Logistic Regression, Linear Regression, Ridge, Lasso, Factorization Machine,
-Naive Bayes, Decision Tree, AdaBoost, Random Forest, Extra-Trees, K-nearest neighbor algorithm,
+Naive Bayes, Decision Tree, AdaBoost, Gradient Tree Boosting, Random Forest, Extra-Trees, K-nearest neighbor algorithm,
 K-Means, DBSCAN, Principal Component Analysis, and Non-negative Matrix Factorization.
 MSG
   spec.homepage      = 'https://github.com/yoshoku/rumale'

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: rumale
 version: !ruby/object:Gem::Version
-  version: 0.9.1
+  version: 0.9.2
 platform: ruby
 authors:
 - yoshoku
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2019-05-01 00:00:00.000000000 Z
+date: 2019-05-11 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: numo-narray
@@ -99,7 +99,7 @@ description: |
   Rumale provides machine learning algorithms with interfaces similar to Scikit-Learn in Python.
   Rumale currently supports Linear / Kernel Support Vector Machine,
   Logistic Regression, Linear Regression, Ridge, Lasso, Factorization Machine,
-  Naive Bayes, Decision Tree, AdaBoost, Random Forest, Extra-Trees, K-nearest neighbor algorithm,
+  Naive Bayes, Decision Tree, AdaBoost, Gradient Tree Boosting, Random Forest, Extra-Trees, K-nearest neighbor algorithm,
   K-Means, DBSCAN, Principal Component Analysis, and Non-negative Matrix Factorization.
 email:
 - yoshoku@outlook.com
@@ -142,6 +142,8 @@ files:
 - lib/rumale/ensemble/ada_boost_regressor.rb
 - lib/rumale/ensemble/extra_trees_classifier.rb
 - lib/rumale/ensemble/extra_trees_regressor.rb
+- lib/rumale/ensemble/gradient_boosting_classifier.rb
+- lib/rumale/ensemble/gradient_boosting_regressor.rb
 - lib/rumale/ensemble/random_forest_classifier.rb
 - lib/rumale/ensemble/random_forest_regressor.rb
 - lib/rumale/evaluation_measure/accuracy.rb
@@ -191,6 +193,7 @@ files:
 - lib/rumale/polynomial_model/base_factorization_machine.rb
 - lib/rumale/polynomial_model/factorization_machine_classifier.rb
 - lib/rumale/polynomial_model/factorization_machine_regressor.rb
+- lib/rumale/preprocessing/bin_discretizer.rb
 - lib/rumale/preprocessing/l2_normalizer.rb
 - lib/rumale/preprocessing/label_encoder.rb
 - lib/rumale/preprocessing/max_abs_scaler.rb
@@ -203,6 +206,7 @@ files:
 - lib/rumale/tree/decision_tree_regressor.rb
 - lib/rumale/tree/extra_tree_classifier.rb
 - lib/rumale/tree/extra_tree_regressor.rb
+- lib/rumale/tree/gradient_tree_regressor.rb
 - lib/rumale/tree/node.rb
 - lib/rumale/utils.rb
 - lib/rumale/validation.rb