rumale 0.10.0 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/README.md +1 -1
- data/lib/rumale/base/base_estimator.rb +16 -0
- data/lib/rumale/ensemble/extra_trees_classifier.rb +28 -13
- data/lib/rumale/ensemble/extra_trees_regressor.rb +28 -13
- data/lib/rumale/ensemble/gradient_boosting_classifier.rb +83 -34
- data/lib/rumale/ensemble/gradient_boosting_regressor.rb +58 -30
- data/lib/rumale/ensemble/random_forest_classifier.rb +66 -37
- data/lib/rumale/ensemble/random_forest_regressor.rb +45 -15
- data/lib/rumale/kernel_machine/kernel_svc.rb +37 -11
- data/lib/rumale/linear_model/base_linear_model.rb +5 -1
- data/lib/rumale/linear_model/lasso.rb +13 -4
- data/lib/rumale/linear_model/linear_regression.rb +13 -3
- data/lib/rumale/linear_model/logistic_regression.rb +25 -6
- data/lib/rumale/linear_model/ridge.rb +13 -3
- data/lib/rumale/linear_model/svc.rb +40 -18
- data/lib/rumale/linear_model/svr.rb +12 -3
- data/lib/rumale/polynomial_model/base_factorization_machine.rb +6 -1
- data/lib/rumale/polynomial_model/factorization_machine_classifier.rb +26 -7
- data/lib/rumale/polynomial_model/factorization_machine_regressor.rb +12 -3
- data/lib/rumale/version.rb +1 -1
- data/rumale.gemspec +1 -0
- metadata +16 -2
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA1:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: 50ce110d0d5ad24245b5b52347a7ae72c1a7c673
         | 
| 4 | 
            +
              data.tar.gz: 52c1acc4ebe4c8da8120dc431be4e1a953317a63
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: f8774f51f6bde00ea9414de9bfbe2c31b1c3c09c6931bd29ae414117d2648ee8273fa4f8dc32e78573a9e9da96db2cba19ca67372e4ac56adbe2a68c9be5b92a
         | 
| 7 | 
            +
              data.tar.gz: 7777ba4d627830877dea89b1c9573340fd03882ccdafac57700e261f1e0b621962cc9744129bdbf26ae1078995e7d16db9c36758ae9a327d93ef3e5c3f572b28
         | 
    
        data/CHANGELOG.md
    CHANGED
    
    | @@ -1,3 +1,15 @@ | |
| 1 | 
            +
            # 0.11.0
         | 
| 2 | 
            +
            - Introduce [Parallel gem](https://github.com/grosser/parallel) to improve execution speed for one-vs-the-rest and bagging methods.
         | 
| 3 | 
            +
            - Add the n_jobs parameter that specifies the number of jobs for parallel processing in some estimators belong to the Rumale::LinearModel, Rumale::PolynomialModel, and Rumale::Ensemble.
         | 
| 4 | 
            +
            - The n_jobs parameter is valid only when parallel gem is loaded.
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            ```ruby
         | 
| 7 | 
            +
            require 'rumale'
         | 
| 8 | 
            +
            require 'parallel'
         | 
| 9 | 
            +
             | 
| 10 | 
            +
            svc = Rumale::LinearModel::SVC.new(n_jobs: -1)
         | 
| 11 | 
            +
            ```
         | 
| 12 | 
            +
             | 
| 1 13 | 
             
            # 0.10.0
         | 
| 2 14 | 
             
            - Add class for t-distributed Stochastic Neighborhood Embedding.
         | 
| 3 15 | 
             
            - Fix bug of zero division on min-max scaling class.
         | 
    
        data/README.md
    CHANGED
    
    | @@ -6,7 +6,7 @@ | |
| 6 6 | 
             
            [](https://coveralls.io/github/yoshoku/rumale?branch=master)
         | 
| 7 7 | 
             
            [](https://badge.fury.io/rb/rumale)
         | 
| 8 8 | 
             
            [](https://github.com/yoshoku/rumale/blob/master/LICENSE.txt)
         | 
| 9 | 
            -
            [](https://www.rubydoc.info/gems/rumale/0. | 
| 9 | 
            +
            [](https://www.rubydoc.info/gems/rumale/0.11.0)
         | 
| 10 10 |  | 
| 11 11 | 
             
            Rumale (**Ru**by **ma**chine **le**arning) is a machine learning library in Ruby.
         | 
| 12 12 | 
             
            Rumale provides machine learning algorithms with interfaces similar to Scikit-Learn in Python.
         | 
| @@ -8,6 +8,22 @@ module Rumale | |
| 8 8 | 
             
                  # Return parameters about an estimator.
         | 
| 9 9 | 
             
                  # @return [Hash]
         | 
| 10 10 | 
             
                  attr_reader :params
         | 
| 11 | 
            +
             | 
| 12 | 
            +
                  private
         | 
| 13 | 
            +
             | 
| 14 | 
            +
                  def enable_parallel?
         | 
| 15 | 
            +
                    return false if @params[:n_jobs].nil? || defined?(Parallel).nil?
         | 
| 16 | 
            +
                    true
         | 
| 17 | 
            +
                  end
         | 
| 18 | 
            +
             | 
| 19 | 
            +
                  def n_processes
         | 
| 20 | 
            +
                    return 1 unless enable_parallel?
         | 
| 21 | 
            +
                    @params[:n_jobs] <= 0 ? Parallel.processor_count : @params[:n_jobs]
         | 
| 22 | 
            +
                  end
         | 
| 23 | 
            +
             | 
| 24 | 
            +
                  def parallel_map(n_outputs, &block)
         | 
| 25 | 
            +
                    Parallel.map(Array.new(n_outputs) { |v| v }, in_processes: n_processes, &block)
         | 
| 26 | 
            +
                  end
         | 
| 11 27 | 
             
                end
         | 
| 12 28 | 
             
              end
         | 
| 13 29 | 
             
            end
         | 
| @@ -47,13 +47,17 @@ module Rumale | |
| 47 47 | 
             
                  # @param min_samples_leaf [Integer] The minimum number of samples at a leaf node.
         | 
| 48 48 | 
             
                  # @param max_features [Integer] The number of features to consider when searching optimal split point.
         | 
| 49 49 | 
             
                  #   If nil is given, split process considers all features.
         | 
| 50 | 
            +
                  # @param n_jobs [Integer] The number of jobs for running the fit method in parallel.
         | 
| 51 | 
            +
                  #   If nil is given, the method does not execute in parallel.
         | 
| 52 | 
            +
                  #   If zero or less is given, it becomes equal to the number of processors.
         | 
| 53 | 
            +
                  #   This parameter is ignored if the Parallel gem is not loaded.
         | 
| 50 54 | 
             
                  # @param random_seed [Integer] The seed value using to initialize the random generator.
         | 
| 51 55 | 
             
                  #   It is used to randomly determine the order of features when deciding spliting point.
         | 
| 52 56 | 
             
                  def initialize(n_estimators: 10,
         | 
| 53 57 | 
             
                                 criterion: 'gini', max_depth: nil, max_leaf_nodes: nil, min_samples_leaf: 1,
         | 
| 54 | 
            -
                                 max_features: nil, random_seed: nil)
         | 
| 58 | 
            +
                                 max_features: nil, n_jobs: nil, random_seed: nil)
         | 
| 55 59 | 
             
                    check_params_type_or_nil(Integer, max_depth: max_depth, max_leaf_nodes: max_leaf_nodes,
         | 
| 56 | 
            -
                                                      max_features: max_features, random_seed: random_seed)
         | 
| 60 | 
            +
                                                      max_features: max_features, n_jobs: n_jobs, random_seed: random_seed)
         | 
| 57 61 | 
             
                    check_params_integer(n_estimators: n_estimators, min_samples_leaf: min_samples_leaf)
         | 
| 58 62 | 
             
                    check_params_string(criterion: criterion)
         | 
| 59 63 | 
             
                    check_params_positive(n_estimators: n_estimators, max_depth: max_depth,
         | 
| @@ -76,18 +80,19 @@ module Rumale | |
| 76 80 | 
             
                    @params[:max_features] = Math.sqrt(n_features).to_i unless @params[:max_features].is_a?(Integer)
         | 
| 77 81 | 
             
                    @params[:max_features] = [[1, @params[:max_features]].max, n_features].min
         | 
| 78 82 | 
             
                    @classes = Numo::Int32.asarray(y.to_a.uniq.sort)
         | 
| 79 | 
            -
                    @feature_importances = Numo::DFloat.zeros(n_features)
         | 
| 80 83 | 
             
                    # Construct trees.
         | 
| 81 | 
            -
                     | 
| 82 | 
            -
             | 
| 83 | 
            -
             | 
| 84 | 
            -
             | 
| 85 | 
            -
             | 
| 86 | 
            -
             | 
| 87 | 
            -
             | 
| 88 | 
            -
                       | 
| 89 | 
            -
             | 
| 90 | 
            -
             | 
| 84 | 
            +
                    rng_seeds = Array.new(@params[:n_estimators]) { @rng.rand(Rumale::Values.int_max) }
         | 
| 85 | 
            +
                    @estimators = if enable_parallel?
         | 
| 86 | 
            +
                                    parallel_map(@params[:n_estimators]) { |n| plant_tree(rng_seeds[n]).fit(x, y) }
         | 
| 87 | 
            +
                                  else
         | 
| 88 | 
            +
                                    Array.new(@params[:n_estimators]) { |n| plant_tree(rng_seeds[n]).fit(x, y) }
         | 
| 89 | 
            +
                                  end
         | 
| 90 | 
            +
                    @feature_importances =
         | 
| 91 | 
            +
                      if enable_parallel?
         | 
| 92 | 
            +
                        parallel_map(@params[:n_estimators]) { |n| @estimators[n].feature_importances }.reduce(&:+)
         | 
| 93 | 
            +
                      else
         | 
| 94 | 
            +
                        @estimators.map(&:feature_importances).reduce(&:+)
         | 
| 95 | 
            +
                      end
         | 
| 91 96 | 
             
                    @feature_importances /= @feature_importances.sum
         | 
| 92 97 | 
             
                    self
         | 
| 93 98 | 
             
                  end
         | 
| @@ -130,6 +135,16 @@ module Rumale | |
| 130 135 | 
             
                  def marshal_load(obj)
         | 
| 131 136 | 
             
                    super
         | 
| 132 137 | 
             
                  end
         | 
| 138 | 
            +
             | 
| 139 | 
            +
                  private
         | 
| 140 | 
            +
             | 
| 141 | 
            +
                  def plant_tree(rnd_seed)
         | 
| 142 | 
            +
                    Tree::ExtraTreeClassifier.new(
         | 
| 143 | 
            +
                      criterion: @params[:criterion], max_depth: @params[:max_depth],
         | 
| 144 | 
            +
                      max_leaf_nodes: @params[:max_leaf_nodes], min_samples_leaf: @params[:min_samples_leaf],
         | 
| 145 | 
            +
                      max_features: @params[:max_features], random_seed: rnd_seed
         | 
| 146 | 
            +
                    )
         | 
| 147 | 
            +
                  end
         | 
| 133 148 | 
             
                end
         | 
| 134 149 | 
             
              end
         | 
| 135 150 | 
             
            end
         | 
| @@ -43,13 +43,17 @@ module Rumale | |
| 43 43 | 
             
                  # @param min_samples_leaf [Integer] The minimum number of samples at a leaf node.
         | 
| 44 44 | 
             
                  # @param max_features [Integer] The number of features to consider when searching optimal split point.
         | 
| 45 45 | 
             
                  #   If nil is given, split process considers all features.
         | 
| 46 | 
            +
                  # @param n_jobs [Integer] The number of jobs for running the fit and predict methods in parallel.
         | 
| 47 | 
            +
                  #   If nil is given, the methods do not execute in parallel.
         | 
| 48 | 
            +
                  #   If zero or less is given, it becomes equal to the number of processors.
         | 
| 49 | 
            +
                  #   This parameter is ignored if the Parallel gem is not loaded.
         | 
| 46 50 | 
             
                  # @param random_seed [Integer] The seed value using to initialize the random generator.
         | 
| 47 51 | 
             
                  #   It is used to randomly determine the order of features when deciding spliting point.
         | 
| 48 52 | 
             
                  def initialize(n_estimators: 10,
         | 
| 49 53 | 
             
                                 criterion: 'mse', max_depth: nil, max_leaf_nodes: nil, min_samples_leaf: 1,
         | 
| 50 | 
            -
                                 max_features: nil, random_seed: nil)
         | 
| 54 | 
            +
                                 max_features: nil, n_jobs: nil, random_seed: nil)
         | 
| 51 55 | 
             
                    check_params_type_or_nil(Integer, max_depth: max_depth, max_leaf_nodes: max_leaf_nodes,
         | 
| 52 | 
            -
                                                      max_features: max_features, random_seed: random_seed)
         | 
| 56 | 
            +
                                                      max_features: max_features, n_jobs: n_jobs, random_seed: random_seed)
         | 
| 53 57 | 
             
                    check_params_integer(n_estimators: n_estimators, min_samples_leaf: min_samples_leaf)
         | 
| 54 58 | 
             
                    check_params_string(criterion: criterion)
         | 
| 55 59 | 
             
                    check_params_positive(n_estimators: n_estimators, max_depth: max_depth,
         | 
| @@ -71,18 +75,19 @@ module Rumale | |
| 71 75 | 
             
                    n_features = x.shape[1]
         | 
| 72 76 | 
             
                    @params[:max_features] = Math.sqrt(n_features).to_i unless @params[:max_features].is_a?(Integer)
         | 
| 73 77 | 
             
                    @params[:max_features] = [[1, @params[:max_features]].max, n_features].min
         | 
| 74 | 
            -
                    @feature_importances = Numo::DFloat.zeros(n_features)
         | 
| 75 78 | 
             
                    # Construct forest.
         | 
| 76 | 
            -
                     | 
| 77 | 
            -
             | 
| 78 | 
            -
             | 
| 79 | 
            -
             | 
| 80 | 
            -
             | 
| 81 | 
            -
             | 
| 82 | 
            -
             | 
| 83 | 
            -
                       | 
| 84 | 
            -
             | 
| 85 | 
            -
             | 
| 79 | 
            +
                    rng_seeds = Array.new(@params[:n_estimators]) { @rng.rand(Rumale::Values.int_max) }
         | 
| 80 | 
            +
                    @estimators = if enable_parallel?
         | 
| 81 | 
            +
                                    parallel_map(@params[:n_estimators]) { |n| plant_tree(rng_seeds[n]).fit(x, y) }
         | 
| 82 | 
            +
                                  else
         | 
| 83 | 
            +
                                    Array.new(@params[:n_estimators]) { |n| plant_tree(rng_seeds[n]).fit(x, y) }
         | 
| 84 | 
            +
                                  end
         | 
| 85 | 
            +
                    @feature_importances =
         | 
| 86 | 
            +
                      if enable_parallel?
         | 
| 87 | 
            +
                        parallel_map(@params[:n_estimators]) { |n| @estimators[n].feature_importances }.reduce(&:+)
         | 
| 88 | 
            +
                      else
         | 
| 89 | 
            +
                        @estimators.map(&:feature_importances).reduce(&:+)
         | 
| 90 | 
            +
                      end
         | 
| 86 91 | 
             
                    @feature_importances /= @feature_importances.sum
         | 
| 87 92 | 
             
                    self
         | 
| 88 93 | 
             
                  end
         | 
| @@ -116,6 +121,16 @@ module Rumale | |
| 116 121 | 
             
                  def marshal_load(obj)
         | 
| 117 122 | 
             
                    super
         | 
| 118 123 | 
             
                  end
         | 
| 124 | 
            +
             | 
| 125 | 
            +
                  private
         | 
| 126 | 
            +
             | 
| 127 | 
            +
                  def plant_tree(rnd_seed)
         | 
| 128 | 
            +
                    Tree::ExtraTreeRegressor.new(
         | 
| 129 | 
            +
                      criterion: @params[:criterion], max_depth: @params[:max_depth],
         | 
| 130 | 
            +
                      max_leaf_nodes: @params[:max_leaf_nodes], min_samples_leaf: @params[:min_samples_leaf],
         | 
| 131 | 
            +
                      max_features: @params[:max_features], random_seed:rnd_seed
         | 
| 132 | 
            +
                    )
         | 
| 133 | 
            +
                  end
         | 
| 119 134 | 
             
                end
         | 
| 120 135 | 
             
              end
         | 
| 121 136 | 
             
            end
         | 
| @@ -56,19 +56,22 @@ module Rumale | |
| 56 56 | 
             
                  # @param min_samples_leaf [Integer] The minimum number of samples at a leaf node.
         | 
| 57 57 | 
             
                  # @param max_features [Integer] The number of features to consider when searching optimal split point.
         | 
| 58 58 | 
             
                  #   If nil is given, split process considers all features.
         | 
| 59 | 
            +
                  # @param n_jobs [Integer] The number of jobs for running the fit and predict methods in parallel.
         | 
| 60 | 
            +
                  #   If nil is given, the methods do not execute in parallel.
         | 
| 61 | 
            +
                  #   If zero or less is given, it becomes equal to the number of processors.
         | 
| 62 | 
            +
                  #   This parameter is ignored if the Parallel gem is not loaded.
         | 
| 59 63 | 
             
                  # @param random_seed [Integer] The seed value using to initialize the random generator.
         | 
| 60 64 | 
             
                  #   It is used to randomly determine the order of features when deciding spliting point.
         | 
| 61 65 | 
             
                  def initialize(n_estimators: 100, learning_rate: 0.1, reg_lambda: 0.0, subsample: 1.0,
         | 
| 62 66 | 
             
                                 max_depth: nil, max_leaf_nodes: nil, min_samples_leaf: 1,
         | 
| 63 | 
            -
                                 max_features: nil, random_seed: nil)
         | 
| 67 | 
            +
                                 max_features: nil, n_jobs: nil, random_seed: nil)
         | 
| 64 68 | 
             
                    check_params_type_or_nil(Integer, max_depth: max_depth, max_leaf_nodes: max_leaf_nodes,
         | 
| 65 | 
            -
                                                      max_features: max_features, random_seed: random_seed)
         | 
| 69 | 
            +
                                                      max_features: max_features, n_jobs: n_jobs, random_seed: random_seed)
         | 
| 66 70 | 
             
                    check_params_integer(n_estimators: n_estimators, min_samples_leaf: min_samples_leaf)
         | 
| 67 71 | 
             
                    check_params_float(learning_rate: learning_rate, reg_lambda: reg_lambda, subsample: subsample)
         | 
| 68 | 
            -
                    check_params_positive(n_estimators: n_estimators,
         | 
| 69 | 
            -
                                           | 
| 70 | 
            -
                                           | 
| 71 | 
            -
                                          max_features: max_features)
         | 
| 72 | 
            +
                    check_params_positive(n_estimators: n_estimators, learning_rate: learning_rate, reg_lambda: reg_lambda,
         | 
| 73 | 
            +
                                          subsample: subsample, max_depth: max_depth, max_leaf_nodes: max_leaf_nodes,
         | 
| 74 | 
            +
                                          min_samples_leaf: min_samples_leaf, max_features: max_features)
         | 
| 72 75 | 
             
                    @params = {}
         | 
| 73 76 | 
             
                    @params[:n_estimators] = n_estimators
         | 
| 74 77 | 
             
                    @params[:learning_rate] = learning_rate
         | 
| @@ -78,6 +81,7 @@ module Rumale | |
| 78 81 | 
             
                    @params[:max_leaf_nodes] = max_leaf_nodes
         | 
| 79 82 | 
             
                    @params[:min_samples_leaf] = min_samples_leaf
         | 
| 80 83 | 
             
                    @params[:max_features] = max_features
         | 
| 84 | 
            +
                    @params[:n_jobs] = n_jobs
         | 
| 81 85 | 
             
                    @params[:random_seed] = random_seed
         | 
| 82 86 | 
             
                    @params[:random_seed] ||= srand
         | 
| 83 87 | 
             
                    @estimators = nil
         | 
| @@ -96,22 +100,16 @@ module Rumale | |
| 96 100 | 
             
                    check_sample_array(x)
         | 
| 97 101 | 
             
                    check_label_array(y)
         | 
| 98 102 | 
             
                    check_sample_label_size(x, y)
         | 
| 99 | 
            -
             | 
| 103 | 
            +
                    # initialize some variables.
         | 
| 100 104 | 
             
                    n_features = x.shape[1]
         | 
| 101 105 | 
             
                    @params[:max_features] = n_features if @params[:max_features].nil?
         | 
| 102 106 | 
             
                    @params[:max_features] = [[1, @params[:max_features]].max, n_features].min
         | 
| 103 | 
            -
             | 
| 104 | 
            -
                    # train estimator.
         | 
| 105 107 | 
             
                    @classes = Numo::Int32[*y.to_a.uniq.sort]
         | 
| 106 108 | 
             
                    n_classes = @classes.size
         | 
| 109 | 
            +
                    # train estimator.
         | 
| 107 110 | 
             
                    if n_classes > 2
         | 
| 108 | 
            -
                      @base_predictions =  | 
| 109 | 
            -
                      @estimators =  | 
| 110 | 
            -
                        bin_y = Numo::DFloat.cast(y.eq(@classes[n])) * 2 - 1
         | 
| 111 | 
            -
                        y_mean = bin_y.mean
         | 
| 112 | 
            -
                        @base_predictions[n] = 0.5 * Numo::NMath.log((1.0 + y_mean) / (1.0 - y_mean))
         | 
| 113 | 
            -
                        partial_fit(x, bin_y, @base_predictions[n])
         | 
| 114 | 
            -
                      end
         | 
| 111 | 
            +
                      @base_predictions = multiclass_base_predictions(y)
         | 
| 112 | 
            +
                      @estimators = multiclass_estimators(x, y)
         | 
| 115 113 | 
             
                    else
         | 
| 116 114 | 
             
                      negative_label = y.to_a.uniq.min
         | 
| 117 115 | 
             
                      bin_y = Numo::DFloat.cast(y.ne(negative_label)) * 2 - 1
         | 
| @@ -119,17 +117,12 @@ module Rumale | |
| 119 117 | 
             
                      @base_predictions = 0.5 * Numo::NMath.log((1.0 + y_mean) / (1.0 - y_mean))
         | 
| 120 118 | 
             
                      @estimators = partial_fit(x, bin_y, @base_predictions)
         | 
| 121 119 | 
             
                    end
         | 
| 122 | 
            -
             | 
| 123 120 | 
             
                    # calculate feature importances.
         | 
| 124 | 
            -
                    @feature_importances =  | 
| 125 | 
            -
             | 
| 126 | 
            -
             | 
| 127 | 
            -
             | 
| 128 | 
            -
             | 
| 129 | 
            -
                    else
         | 
| 130 | 
            -
                      @estimators.each { |tree| @feature_importances += tree.feature_importances }
         | 
| 131 | 
            -
                    end
         | 
| 132 | 
            -
             | 
| 121 | 
            +
                    @feature_importances = if n_classes > 2
         | 
| 122 | 
            +
                                             multiclass_feature_importances
         | 
| 123 | 
            +
                                           else
         | 
| 124 | 
            +
                                             @estimators.map(&:feature_importances).reduce(&:+)
         | 
| 125 | 
            +
                                           end
         | 
| 133 126 | 
             
                    self
         | 
| 134 127 | 
             
                  end
         | 
| 135 128 |  | 
| @@ -139,18 +132,12 @@ module Rumale | |
| 139 132 | 
             
                  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence score per sample.
         | 
| 140 133 | 
             
                  def decision_function(x)
         | 
| 141 134 | 
             
                    check_sample_array(x)
         | 
| 142 | 
            -
                    n_samples = x.shape[0]
         | 
| 143 135 | 
             
                    n_classes = @classes.size
         | 
| 144 136 | 
             
                    if n_classes > 2
         | 
| 145 | 
            -
                       | 
| 146 | 
            -
                      n_classes.times do |n|
         | 
| 147 | 
            -
                        @estimators[n].each { |tree| scores[true, n] += tree.predict(x) }
         | 
| 148 | 
            -
                      end
         | 
| 137 | 
            +
                      multiclass_scores(x)
         | 
| 149 138 | 
             
                    else
         | 
| 150 | 
            -
                       | 
| 151 | 
            -
                      @estimators.each { |tree| scores += tree.predict(x) }
         | 
| 139 | 
            +
                      @estimators.map { |tree| tree.predict(x) }.reduce(&:+) + @base_predictions
         | 
| 152 140 | 
             
                    end
         | 
| 153 | 
            -
                    scores
         | 
| 154 141 | 
             
                  end
         | 
| 155 142 |  | 
| 156 143 | 
             
                  # Predict class labels for samples.
         | 
| @@ -273,6 +260,68 @@ module Rumale | |
| 273 260 | 
             
                      max_features: @params[:max_features], random_seed: @rng.rand(Rumale::Values.int_max)
         | 
| 274 261 | 
             
                    )
         | 
| 275 262 | 
             
                  end
         | 
| 263 | 
            +
             | 
| 264 | 
            +
                  def multiclass_base_predictions(y)
         | 
| 265 | 
            +
                    n_classes = @classes.size
         | 
| 266 | 
            +
                    b = if enable_parallel?
         | 
| 267 | 
            +
                          # :nocov:
         | 
| 268 | 
            +
                          parallel_map(n_classes) do |n|
         | 
| 269 | 
            +
                            bin_y = Numo::DFloat.cast(y.eq(@classes[n])) * 2 - 1
         | 
| 270 | 
            +
                            y_mean = bin_y.mean
         | 
| 271 | 
            +
                            0.5 * Math.log((1.0 + y_mean) / (1.0 - y_mean))
         | 
| 272 | 
            +
                          end
         | 
| 273 | 
            +
                          # :nocov:
         | 
| 274 | 
            +
                        else
         | 
| 275 | 
            +
                          Array.new(n_classes) do |n|
         | 
| 276 | 
            +
                            bin_y = Numo::DFloat.cast(y.eq(@classes[n])) * 2 - 1
         | 
| 277 | 
            +
                            y_mean = bin_y.mean
         | 
| 278 | 
            +
                            0.5 * Math.log((1.0 + y_mean) / (1.0 - y_mean))
         | 
| 279 | 
            +
                          end
         | 
| 280 | 
            +
                        end
         | 
| 281 | 
            +
                    Numo::DFloat.asarray(b)
         | 
| 282 | 
            +
                  end
         | 
| 283 | 
            +
             | 
| 284 | 
            +
                  def multiclass_estimators(x, y)
         | 
| 285 | 
            +
                    n_classes = @classes.size
         | 
| 286 | 
            +
                    if enable_parallel?
         | 
| 287 | 
            +
                      # :nocov:
         | 
| 288 | 
            +
                      parallel_map(n_classes) do |n|
         | 
| 289 | 
            +
                        bin_y = Numo::DFloat.cast(y.eq(@classes[n])) * 2 - 1
         | 
| 290 | 
            +
                        partial_fit(x, bin_y, @base_predictions[n])
         | 
| 291 | 
            +
                      end
         | 
| 292 | 
            +
                      # :nocov:
         | 
| 293 | 
            +
                    else
         | 
| 294 | 
            +
                      Array.new(n_classes) do |n|
         | 
| 295 | 
            +
                        bin_y = Numo::DFloat.cast(y.eq(@classes[n])) * 2 - 1
         | 
| 296 | 
            +
                        partial_fit(x, bin_y, @base_predictions[n])
         | 
| 297 | 
            +
                      end
         | 
| 298 | 
            +
                    end
         | 
| 299 | 
            +
                  end
         | 
| 300 | 
            +
             | 
| 301 | 
            +
                  def multiclass_feature_importances
         | 
| 302 | 
            +
                    n_classes = @classes.size
         | 
| 303 | 
            +
                    if enable_parallel?
         | 
| 304 | 
            +
                      parallel_map(n_classes) { |n| @estimators[n].map(&:feature_importances).reduce(&:+) }.reduce(&:+)
         | 
| 305 | 
            +
                    else
         | 
| 306 | 
            +
                      Array.new(n_classes) { |n| @estimators[n].map(&:feature_importances).reduce(&:+) }.reduce(&:+)
         | 
| 307 | 
            +
                    end
         | 
| 308 | 
            +
                  end
         | 
| 309 | 
            +
             | 
| 310 | 
            +
                  def multiclass_scores(x)
         | 
| 311 | 
            +
                    n_classes = @classes.size
         | 
| 312 | 
            +
                    s = if enable_parallel?
         | 
| 313 | 
            +
                          # :nocov:
         | 
| 314 | 
            +
                          parallel_map(n_classes) do |n|
         | 
| 315 | 
            +
                            @estimators[n].map { |tree| tree.predict(x) }.reduce(&:+)
         | 
| 316 | 
            +
                          end
         | 
| 317 | 
            +
                          # :nocov:
         | 
| 318 | 
            +
                        else
         | 
| 319 | 
            +
                          Array.new(n_classes) do |n|
         | 
| 320 | 
            +
                            @estimators[n].map { |tree| tree.predict(x) }.reduce(&:+)
         | 
| 321 | 
            +
                          end
         | 
| 322 | 
            +
                        end
         | 
| 323 | 
            +
                    Numo::DFloat.asarray(s).transpose + @base_predictions
         | 
| 324 | 
            +
                  end
         | 
| 276 325 | 
             
                end
         | 
| 277 326 | 
             
              end
         | 
| 278 327 | 
             
            end
         | 
| @@ -51,19 +51,22 @@ module Rumale | |
| 51 51 | 
             
                  # @param min_samples_leaf [Integer] The minimum number of samples at a leaf node.
         | 
| 52 52 | 
             
                  # @param max_features [Integer] The number of features to consider when searching optimal split point.
         | 
| 53 53 | 
             
                  #   If nil is given, split process considers all features.
         | 
| 54 | 
            +
                  # @param n_jobs [Integer] The number of jobs for running the fit and predict methods in parallel.
         | 
| 55 | 
            +
                  #   If nil is given, the methods do not execute in parallel.
         | 
| 56 | 
            +
                  #   If zero or less is given, it becomes equal to the number of processors.
         | 
| 57 | 
            +
                  #   This parameter is ignored if the Parallel gem is not loaded.
         | 
| 54 58 | 
             
                  # @param random_seed [Integer] The seed value using to initialize the random generator.
         | 
| 55 59 | 
             
                  #   It is used to randomly determine the order of features when deciding spliting point.
         | 
| 56 60 | 
             
                  def initialize(n_estimators: 100, learning_rate: 0.1, reg_lambda: 0.0, subsample: 1.0,
         | 
| 57 61 | 
             
                                 max_depth: nil, max_leaf_nodes: nil, min_samples_leaf: 1,
         | 
| 58 | 
            -
                                 max_features: nil, random_seed: nil)
         | 
| 62 | 
            +
                                 max_features: nil, n_jobs: nil, random_seed: nil)
         | 
| 59 63 | 
             
                    check_params_type_or_nil(Integer, max_depth: max_depth, max_leaf_nodes: max_leaf_nodes,
         | 
| 60 | 
            -
                                                      max_features: max_features, random_seed: random_seed)
         | 
| 64 | 
            +
                                                      max_features: max_features, n_jobs: n_jobs, random_seed: random_seed)
         | 
| 61 65 | 
             
                    check_params_integer(n_estimators: n_estimators, min_samples_leaf: min_samples_leaf)
         | 
| 62 66 | 
             
                    check_params_float(learning_rate: learning_rate, reg_lambda: reg_lambda, subsample: subsample)
         | 
| 63 | 
            -
                    check_params_positive(n_estimators: n_estimators,
         | 
| 64 | 
            -
                                           | 
| 65 | 
            -
                                           | 
| 66 | 
            -
                                          max_features: max_features)
         | 
| 67 | 
            +
                    check_params_positive(n_estimators: n_estimators, learning_rate: learning_rate, reg_lambda: reg_lambda,
         | 
| 68 | 
            +
                                          subsample: subsample, max_depth: max_depth, max_leaf_nodes: max_leaf_nodes,
         | 
| 69 | 
            +
                                          min_samples_leaf: min_samples_leaf, max_features: max_features)
         | 
| 67 70 | 
             
                    @params = {}
         | 
| 68 71 | 
             
                    @params[:n_estimators] = n_estimators
         | 
| 69 72 | 
             
                    @params[:learning_rate] = learning_rate
         | 
| @@ -73,6 +76,7 @@ module Rumale | |
| 73 76 | 
             
                    @params[:max_leaf_nodes] = max_leaf_nodes
         | 
| 74 77 | 
             
                    @params[:min_samples_leaf] = min_samples_leaf
         | 
| 75 78 | 
             
                    @params[:max_features] = max_features
         | 
| 79 | 
            +
                    @params[:n_jobs] = n_jobs
         | 
| 76 80 | 
             
                    @params[:random_seed] = random_seed
         | 
| 77 81 | 
             
                    @params[:random_seed] ||= srand
         | 
| 78 82 | 
             
                    @estimators = nil
         | 
| @@ -90,32 +94,24 @@ module Rumale | |
| 90 94 | 
             
                    check_sample_array(x)
         | 
| 91 95 | 
             
                    check_tvalue_array(y)
         | 
| 92 96 | 
             
                    check_sample_tvalue_size(x, y)
         | 
| 93 | 
            -
             | 
| 97 | 
            +
                    # initialize some variables.
         | 
| 94 98 | 
             
                    n_features = x.shape[1]
         | 
| 95 99 | 
             
                    @params[:max_features] = n_features if @params[:max_features].nil?
         | 
| 96 100 | 
             
                    @params[:max_features] = [[1, @params[:max_features]].max, n_features].min
         | 
| 97 | 
            -
             | 
| 98 | 
            -
                    # train regressor.
         | 
| 99 101 | 
             
                    n_outputs = y.shape[1].nil? ? 1 : y.shape[1]
         | 
| 102 | 
            +
                    # train regressor.
         | 
| 100 103 | 
             
                    @base_predictions = n_outputs > 1 ? y.mean(0) : y.mean
         | 
| 101 104 | 
             
                    @estimators = if n_outputs > 1
         | 
| 102 | 
            -
                                     | 
| 103 | 
            -
                                      partial_fit(x, y[true, n], @base_predictions[n])
         | 
| 104 | 
            -
                                    end
         | 
| 105 | 
            +
                                    multivar_estimators(x, y)
         | 
| 105 106 | 
             
                                  else
         | 
| 106 107 | 
             
                                    partial_fit(x, y, @base_predictions)
         | 
| 107 108 | 
             
                                  end
         | 
| 108 | 
            -
             | 
| 109 109 | 
             
                    # calculate feature importances.
         | 
| 110 | 
            -
                    @feature_importances =  | 
| 111 | 
            -
             | 
| 112 | 
            -
             | 
| 113 | 
            -
             | 
| 114 | 
            -
             | 
| 115 | 
            -
                    else
         | 
| 116 | 
            -
                      @estimators.each { |tree| @feature_importances += tree.feature_importances }
         | 
| 117 | 
            -
                    end
         | 
| 118 | 
            -
             | 
| 110 | 
            +
                    @feature_importances = if n_outputs > 1
         | 
| 111 | 
            +
                                             multivar_feature_importances
         | 
| 112 | 
            +
                                           else
         | 
| 113 | 
            +
                                             @estimators.map(&:feature_importances).reduce(&:+)
         | 
| 114 | 
            +
                                           end
         | 
| 119 115 | 
             
                    self
         | 
| 120 116 | 
             
                  end
         | 
| 121 117 |  | 
| @@ -125,18 +121,16 @@ module Rumale | |
| 125 121 | 
             
                  # @return [Numo::DFloat] (shape: [n_samples]) Predicted values per sample.
         | 
| 126 122 | 
             
                  def predict(x)
         | 
| 127 123 | 
             
                    check_sample_array(x)
         | 
| 128 | 
            -
                    n_samples = x.shape[0]
         | 
| 129 124 | 
             
                    n_outputs = @estimators.first.is_a?(Array) ? @estimators.size : 1
         | 
| 130 125 | 
             
                    if n_outputs > 1
         | 
| 131 | 
            -
                       | 
| 132 | 
            -
                      n_outputs.times do |n|
         | 
| 133 | 
            -
                        @estimators[n].each { |tree| predicted[true, n] += tree.predict(x) }
         | 
| 134 | 
            -
                      end
         | 
| 126 | 
            +
                      multivar_predict(x)
         | 
| 135 127 | 
             
                    else
         | 
| 136 | 
            -
                       | 
| 137 | 
            -
             | 
| 128 | 
            +
                      if enable_parallel?
         | 
| 129 | 
            +
                        parallel_map(@params[:n_estimators]) { |n| @estimators[n].predict(x) }.reduce(&:+) + @base_predictions
         | 
| 130 | 
            +
                      else
         | 
| 131 | 
            +
                        @estimators.map { |tree| tree.predict(x) }.reduce(&:+) + @base_predictions
         | 
| 132 | 
            +
                      end
         | 
| 138 133 | 
             
                    end
         | 
| 139 | 
            -
                    predicted
         | 
| 140 134 | 
             
                  end
         | 
| 141 135 |  | 
| 142 136 | 
             
                  # Return the index of the leaf that each sample reached.
         | 
| @@ -225,6 +219,40 @@ module Rumale | |
| 225 219 | 
             
                      max_features: @params[:max_features], random_seed: @rng.rand(Rumale::Values.int_max)
         | 
| 226 220 | 
             
                    )
         | 
| 227 221 | 
             
                  end
         | 
| 222 | 
            +
             | 
| 223 | 
            +
                  def multivar_estimators(x, y)
         | 
| 224 | 
            +
                    n_outputs = y.shape[1]
         | 
| 225 | 
            +
                    if enable_parallel?
         | 
| 226 | 
            +
                      parallel_map(n_outputs) { |n| partial_fit(x, y[true, n], @base_predictions[n]) }
         | 
| 227 | 
            +
                    else
         | 
| 228 | 
            +
                      Array.new(n_outputs) { |n| partial_fit(x, y[true, n], @base_predictions[n]) }
         | 
| 229 | 
            +
                    end
         | 
| 230 | 
            +
                  end
         | 
| 231 | 
            +
             | 
| 232 | 
            +
                  def multivar_feature_importances
         | 
| 233 | 
            +
                    n_outputs = @estimators.size
         | 
| 234 | 
            +
                    if enable_parallel?
         | 
| 235 | 
            +
                      parallel_map(n_outputs) { |n| @estimators[n].map(&:feature_importances).reduce(&:+) }.reduce(&:+)
         | 
| 236 | 
            +
                    else
         | 
| 237 | 
            +
                      Array.new(n_outputs) { |n| @estimators[n].map(&:feature_importances).reduce(&:+) }.reduce(&:+)
         | 
| 238 | 
            +
                    end
         | 
| 239 | 
            +
                  end
         | 
| 240 | 
            +
             | 
| 241 | 
            +
                  def multivar_predict(x)
         | 
| 242 | 
            +
                    n_outputs = @estimators.size
         | 
| 243 | 
            +
                    p = if enable_parallel?
         | 
| 244 | 
            +
                          # :nocov:
         | 
| 245 | 
            +
                          parallel_map(n_outputs) do |n|
         | 
| 246 | 
            +
                            @estimators[n].map { |tree| tree.predict(x) }.reduce(&:+)
         | 
| 247 | 
            +
                          end
         | 
| 248 | 
            +
                          # :nocov:
         | 
| 249 | 
            +
                        else
         | 
| 250 | 
            +
                          Array.new(n_outputs) do |n|
         | 
| 251 | 
            +
                            @estimators[n].map { |tree| tree.predict(x) }.reduce(&:+)
         | 
| 252 | 
            +
                          end
         | 
| 253 | 
            +
                        end
         | 
| 254 | 
            +
                    Numo::DFloat.asarray(p).transpose + @base_predictions
         | 
| 255 | 
            +
                  end
         | 
| 228 256 | 
             
                end
         | 
| 229 257 | 
             
              end
         | 
| 230 258 | 
             
            end
         |