rumale 0.22.2 → 0.23.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.clang-format +149 -0
- data/.coveralls.yml +1 -0
- data/.github/workflows/build.yml +5 -2
- data/.github/workflows/coverage.yml +30 -0
- data/.gitignore +1 -0
- data/CHANGELOG.md +38 -0
- data/Gemfile +3 -2
- data/LICENSE.txt +1 -1
- data/README.md +45 -8
- data/Rakefile +2 -1
- data/ext/rumale/extconf.rb +1 -1
- data/ext/rumale/{rumale.c → rumaleext.c} +2 -3
- data/ext/rumale/{rumale.h → rumaleext.h} +1 -1
- data/ext/rumale/tree.c +76 -96
- data/ext/rumale/tree.h +2 -0
- data/lib/rumale.rb +6 -1
- data/lib/rumale/base/base_estimator.rb +5 -3
- data/lib/rumale/dataset.rb +7 -3
- data/lib/rumale/decomposition/fast_ica.rb +1 -1
- data/lib/rumale/ensemble/gradient_boosting_classifier.rb +2 -2
- data/lib/rumale/ensemble/gradient_boosting_regressor.rb +1 -1
- data/lib/rumale/ensemble/random_forest_classifier.rb +1 -1
- data/lib/rumale/ensemble/random_forest_regressor.rb +1 -1
- data/lib/rumale/ensemble/stacking_classifier.rb +5 -4
- data/lib/rumale/ensemble/stacking_regressor.rb +3 -3
- data/lib/rumale/ensemble/voting_classifier.rb +126 -0
- data/lib/rumale/ensemble/voting_regressor.rb +82 -0
- data/lib/rumale/kernel_approximation/nystroem.rb +30 -10
- data/lib/rumale/kernel_machine/kernel_ridge_classifier.rb +92 -0
- data/lib/rumale/kernel_machine/kernel_svc.rb +1 -1
- data/lib/rumale/linear_model/elastic_net.rb +1 -1
- data/lib/rumale/linear_model/lasso.rb +1 -1
- data/lib/rumale/linear_model/linear_regression.rb +66 -35
- data/lib/rumale/linear_model/nnls.rb +137 -0
- data/lib/rumale/linear_model/ridge.rb +71 -34
- data/lib/rumale/model_selection/grid_search_cv.rb +3 -3
- data/lib/rumale/naive_bayes/bernoulli_nb.rb +1 -1
- data/lib/rumale/naive_bayes/gaussian_nb.rb +1 -1
- data/lib/rumale/naive_bayes/multinomial_nb.rb +1 -1
- data/lib/rumale/preprocessing/kernel_calculator.rb +92 -0
- data/lib/rumale/tree/base_decision_tree.rb +15 -10
- data/lib/rumale/tree/decision_tree_classifier.rb +14 -11
- data/lib/rumale/tree/decision_tree_regressor.rb +0 -1
- data/lib/rumale/tree/gradient_tree_regressor.rb +15 -11
- data/lib/rumale/validation.rb +12 -0
- data/lib/rumale/version.rb +1 -1
- metadata +13 -6
- data/.travis.yml +0 -17
    
        data/ext/rumale/tree.h
    CHANGED
    
    
    
        data/lib/rumale.rb
    CHANGED
    
    | @@ -2,7 +2,7 @@ | |
| 2 2 |  | 
| 3 3 | 
             
            require 'numo/narray'
         | 
| 4 4 |  | 
| 5 | 
            -
            require 'rumale/ | 
| 5 | 
            +
            require 'rumale/rumaleext'
         | 
| 6 6 |  | 
| 7 7 | 
             
            require 'rumale/version'
         | 
| 8 8 | 
             
            require 'rumale/validation'
         | 
| @@ -30,10 +30,12 @@ require 'rumale/linear_model/linear_regression' | |
| 30 30 | 
             
            require 'rumale/linear_model/ridge'
         | 
| 31 31 | 
             
            require 'rumale/linear_model/lasso'
         | 
| 32 32 | 
             
            require 'rumale/linear_model/elastic_net'
         | 
| 33 | 
            +
            require 'rumale/linear_model/nnls'
         | 
| 33 34 | 
             
            require 'rumale/kernel_machine/kernel_svc'
         | 
| 34 35 | 
             
            require 'rumale/kernel_machine/kernel_pca'
         | 
| 35 36 | 
             
            require 'rumale/kernel_machine/kernel_fda'
         | 
| 36 37 | 
             
            require 'rumale/kernel_machine/kernel_ridge'
         | 
| 38 | 
            +
            require 'rumale/kernel_machine/kernel_ridge_classifier'
         | 
| 37 39 | 
             
            require 'rumale/multiclass/one_vs_rest_classifier'
         | 
| 38 40 | 
             
            require 'rumale/nearest_neighbors/vp_tree'
         | 
| 39 41 | 
             
            require 'rumale/nearest_neighbors/k_neighbors_classifier'
         | 
| @@ -61,6 +63,8 @@ require 'rumale/ensemble/extra_trees_classifier' | |
| 61 63 | 
             
            require 'rumale/ensemble/extra_trees_regressor'
         | 
| 62 64 | 
             
            require 'rumale/ensemble/stacking_classifier'
         | 
| 63 65 | 
             
            require 'rumale/ensemble/stacking_regressor'
         | 
| 66 | 
            +
            require 'rumale/ensemble/voting_classifier'
         | 
| 67 | 
            +
            require 'rumale/ensemble/voting_regressor'
         | 
| 64 68 | 
             
            require 'rumale/clustering/k_means'
         | 
| 65 69 | 
             
            require 'rumale/clustering/mini_batch_k_means'
         | 
| 66 70 | 
             
            require 'rumale/clustering/k_medoids'
         | 
| @@ -100,6 +104,7 @@ require 'rumale/preprocessing/one_hot_encoder' | |
| 100 104 | 
             
            require 'rumale/preprocessing/ordinal_encoder'
         | 
| 101 105 | 
             
            require 'rumale/preprocessing/binarizer'
         | 
| 102 106 | 
             
            require 'rumale/preprocessing/polynomial_features'
         | 
| 107 | 
            +
            require 'rumale/preprocessing/kernel_calculator'
         | 
| 103 108 | 
             
            require 'rumale/model_selection/k_fold'
         | 
| 104 109 | 
             
            require 'rumale/model_selection/group_k_fold'
         | 
| 105 110 | 
             
            require 'rumale/model_selection/stratified_k_fold'
         | 
| @@ -11,13 +11,15 @@ module Rumale | |
| 11 11 |  | 
| 12 12 | 
             
                  private
         | 
| 13 13 |  | 
| 14 | 
            -
                  def enable_linalg?
         | 
| 14 | 
            +
                  def enable_linalg?(warning: true)
         | 
| 15 15 | 
             
                    if defined?(Numo::Linalg).nil?
         | 
| 16 | 
            -
                      warn('If you want to use features that depend on Numo::Linalg, you should install and load Numo::Linalg in advance.')
         | 
| 16 | 
            +
                      warn('If you want to use features that depend on Numo::Linalg, you should install and load Numo::Linalg in advance.') if warning
         | 
| 17 17 | 
             
                      return false
         | 
| 18 18 | 
             
                    end
         | 
| 19 19 | 
             
                    if Numo::Linalg::VERSION < '0.1.4'
         | 
| 20 | 
            -
                       | 
| 20 | 
            +
                      if warning
         | 
| 21 | 
            +
                        warn('The loaded Numo::Linalg does not implement the methods required by Rumale. Please load Numo::Linalg version 0.1.4 or later.')
         | 
| 22 | 
            +
                      end
         | 
| 21 23 | 
             
                      return false
         | 
| 22 24 | 
             
                    end
         | 
| 23 25 | 
             
                    true
         | 
    
        data/lib/rumale/dataset.rb
    CHANGED
    
    | @@ -12,22 +12,26 @@ module Rumale | |
| 12 12 | 
             
                  # Load a dataset with the libsvm file format into Numo::NArray.
         | 
| 13 13 | 
             
                  #
         | 
| 14 14 | 
             
                  # @param filename [String] A path to a dataset file.
         | 
| 15 | 
            +
                  # @param n_features [Integer/Nil] The number of features of data to load.
         | 
| 16 | 
            +
                  #   If nil is given, it will be detected automatically from given file.
         | 
| 15 17 | 
             
                  # @param zero_based [Boolean] Whether the column index starts from 0 (true) or 1 (false).
         | 
| 16 18 | 
             
                  # @param dtype [Numo::NArray] Data type of Numo::NArray for features to be loaded.
         | 
| 17 19 | 
             
                  #
         | 
| 18 20 | 
             
                  # @return [Array<Numo::NArray>]
         | 
| 19 21 | 
             
                  #   Returns array containing the (n_samples x n_features) matrix for feature vectors
         | 
| 20 22 | 
             
                  #   and (n_samples) vector for labels or target values.
         | 
| 21 | 
            -
                  def load_libsvm_file(filename, zero_based: false, dtype: Numo::DFloat)
         | 
| 23 | 
            +
                  def load_libsvm_file(filename, n_features: nil, zero_based: false, dtype: Numo::DFloat)
         | 
| 22 24 | 
             
                    ftvecs = []
         | 
| 23 25 | 
             
                    labels = []
         | 
| 24 | 
            -
                     | 
| 26 | 
            +
                    n_features_detected = 0
         | 
| 25 27 | 
             
                    CSV.foreach(filename, col_sep: "\s", headers: false) do |line|
         | 
| 26 28 | 
             
                      label, ftvec, max_idx = parse_libsvm_line(line, zero_based)
         | 
| 27 29 | 
             
                      labels.push(label)
         | 
| 28 30 | 
             
                      ftvecs.push(ftvec)
         | 
| 29 | 
            -
                       | 
| 31 | 
            +
                      n_features_detected = max_idx if n_features_detected < max_idx
         | 
| 30 32 | 
             
                    end
         | 
| 33 | 
            +
                    n_features ||= n_features_detected
         | 
| 34 | 
            +
                    n_features = [n_features, n_features_detected].max
         | 
| 31 35 | 
             
                    [convert_to_matrix(ftvecs, n_features, dtype), Numo::NArray.asarray(labels)]
         | 
| 32 36 | 
             
                  end
         | 
| 33 37 |  | 
| @@ -81,7 +81,7 @@ module Rumale | |
| 81 81 | 
             
                    wx = @params[:whiten] ? (x - @mean).dot(whiten_mat.transpose) : x
         | 
| 82 82 | 
             
                    unmixing, @n_iter = ica(wx, @params[:fun], @params[:max_iter], @params[:tol], @rng.dup)
         | 
| 83 83 | 
             
                    @components = @params[:whiten] ? unmixing.dot(whiten_mat) : unmixing
         | 
| 84 | 
            -
                    @mixing = Numo::Linalg.pinv(@components)
         | 
| 84 | 
            +
                    @mixing = Numo::Linalg.pinv(@components).dup
         | 
| 85 85 | 
             
                    if @params[:n_components] == 1
         | 
| 86 86 | 
             
                      @components = @components.flatten.dup
         | 
| 87 87 | 
             
                      @mixing = @mixing.flatten.dup
         | 
| @@ -161,7 +161,7 @@ module Rumale | |
| 161 161 |  | 
| 162 162 | 
             
                    proba = 1.0 / (Numo::NMath.exp(-decision_function(x)) + 1.0)
         | 
| 163 163 |  | 
| 164 | 
            -
                    return (proba.transpose / proba.sum(axis: 1)).transpose if @classes.size > 2
         | 
| 164 | 
            +
                    return (proba.transpose / proba.sum(axis: 1)).transpose.dup if @classes.size > 2
         | 
| 165 165 |  | 
| 166 166 | 
             
                    n_samples, = x.shape
         | 
| 167 167 | 
             
                    probs = Numo::DFloat.zeros(n_samples, 2)
         | 
| @@ -182,7 +182,7 @@ module Rumale | |
| 182 182 | 
             
                               else
         | 
| 183 183 | 
             
                                 @estimators.map { |tree| tree.apply(x) }
         | 
| 184 184 | 
             
                               end
         | 
| 185 | 
            -
                    Numo::Int32[*leaf_ids].transpose
         | 
| 185 | 
            +
                    Numo::Int32[*leaf_ids].transpose.dup
         | 
| 186 186 | 
             
                  end
         | 
| 187 187 |  | 
| 188 188 | 
             
                  private
         | 
| @@ -159,7 +159,7 @@ module Rumale | |
| 159 159 | 
             
                  # @return [Numo::Int32] (shape: [n_samples, n_estimators]) Leaf index for sample.
         | 
| 160 160 | 
             
                  def apply(x)
         | 
| 161 161 | 
             
                    x = check_convert_sample_array(x)
         | 
| 162 | 
            -
                    Numo::Int32[*Array.new(@params[:n_estimators]) { |n| @estimators[n].apply(x) }].transpose
         | 
| 162 | 
            +
                    Numo::Int32[*Array.new(@params[:n_estimators]) { |n| @estimators[n].apply(x) }].transpose.dup
         | 
| 163 163 | 
             
                  end
         | 
| 164 164 |  | 
| 165 165 | 
             
                  private
         | 
| @@ -136,7 +136,7 @@ module Rumale | |
| 136 136 | 
             
                  # @return [Numo::Int32] (shape: [n_samples, n_estimators]) Leaf index for sample.
         | 
| 137 137 | 
             
                  def apply(x)
         | 
| 138 138 | 
             
                    x = check_convert_sample_array(x)
         | 
| 139 | 
            -
                    Numo::Int32[*Array.new(@params[:n_estimators]) { |n| @estimators[n].apply(x) }].transpose
         | 
| 139 | 
            +
                    Numo::Int32[*Array.new(@params[:n_estimators]) { |n| @estimators[n].apply(x) }].transpose.dup
         | 
| 140 140 | 
             
                  end
         | 
| 141 141 |  | 
| 142 142 | 
             
                  private
         | 
| @@ -2,6 +2,7 @@ | |
| 2 2 |  | 
| 3 3 | 
             
            require 'rumale/base/base_estimator'
         | 
| 4 4 | 
             
            require 'rumale/base/classifier'
         | 
| 5 | 
            +
            require 'rumale/preprocessing/label_encoder'
         | 
| 5 6 |  | 
| 6 7 | 
             
            module Rumale
         | 
| 7 8 | 
             
              module Ensemble
         | 
| @@ -10,18 +11,18 @@ module Rumale | |
| 10 11 | 
             
                # @example
         | 
| 11 12 | 
             
                #   estimators = {
         | 
| 12 13 | 
             
                #     lgr: Rumale::LinearModel::LogisticRegression.new(reg_param: 1e-2, random_seed: 1),
         | 
| 13 | 
            -
                #     mlp:  | 
| 14 | 
            +
                #     mlp: Rumale::NeuralNetwork::MLPClassifier.new(hidden_units: [256], random_seed: 1),
         | 
| 14 15 | 
             
                #     rnd: Rumale::Ensemble::RandomForestClassifier.new(random_seed: 1)
         | 
| 15 16 | 
             
                #   }
         | 
| 16 17 | 
             
                #   meta_estimator = Rumale::LinearModel::LogisticRegression.new(random_seed: 1)
         | 
| 17 18 | 
             
                #   classifier = Rumale::Ensemble::StackedClassifier.new(
         | 
| 18 19 | 
             
                #     estimators: estimators, meta_estimator: meta_estimator, random_seed: 1
         | 
| 19 20 | 
             
                #   )
         | 
| 20 | 
            -
                #   classifier.fit(training_samples,  | 
| 21 | 
            +
                #   classifier.fit(training_samples, training_labels)
         | 
| 21 22 | 
             
                #   results = classifier.predict(testing_samples)
         | 
| 22 23 | 
             
                #
         | 
| 23 24 | 
             
                # *Reference*
         | 
| 24 | 
            -
                # - Zhou, Z-H., "Ensemble  | 
| 25 | 
            +
                # - Zhou, Z-H., "Ensemble Methods - Foundations and Algorithms," CRC Press Taylor and Francis Group, Chapman and Hall/CRC, 2012.
         | 
| 25 26 | 
             
                class StackingClassifier
         | 
| 26 27 | 
             
                  include Base::BaseEstimator
         | 
| 27 28 | 
             
                  include Base::Classifier
         | 
| @@ -149,7 +150,7 @@ module Rumale | |
| 149 150 |  | 
| 150 151 | 
             
                  # Predict probability for samples.
         | 
| 151 152 | 
             
                  #
         | 
| 152 | 
            -
                  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the  | 
| 153 | 
            +
                  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probabilities.
         | 
| 153 154 | 
             
                  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) The predicted probability of each class per sample.
         | 
| 154 155 | 
             
                  def predict_proba(x)
         | 
| 155 156 | 
             
                    x = check_convert_sample_array(x)
         | 
| @@ -10,18 +10,18 @@ module Rumale | |
| 10 10 | 
             
                # @example
         | 
| 11 11 | 
             
                #   estimators = {
         | 
| 12 12 | 
             
                #     las: Rumale::LinearModel::Lasso.new(reg_param: 1e-2, random_seed: 1),
         | 
| 13 | 
            -
                #     mlp:  | 
| 13 | 
            +
                #     mlp: Rumale::NeuralNetwork::MLPRegressor.new(hidden_units: [256], random_seed: 1),
         | 
| 14 14 | 
             
                #     rnd: Rumale::Ensemble::RandomForestRegressor.new(random_seed: 1)
         | 
| 15 15 | 
             
                #   }
         | 
| 16 16 | 
             
                #   meta_estimator = Rumale::LinearModel::Ridge.new(random_seed: 1)
         | 
| 17 17 | 
             
                #   regressor = Rumale::Ensemble::StackedRegressor.new(
         | 
| 18 18 | 
             
                #     estimators: estimators, meta_estimator: meta_estimator, random_seed: 1
         | 
| 19 19 | 
             
                #   )
         | 
| 20 | 
            -
                #   regressor.fit(training_samples,  | 
| 20 | 
            +
                #   regressor.fit(training_samples, training_values)
         | 
| 21 21 | 
             
                #   results = regressor.predict(testing_samples)
         | 
| 22 22 | 
             
                #
         | 
| 23 23 | 
             
                # *Reference*
         | 
| 24 | 
            -
                # - Zhou, Z-H., "Ensemble  | 
| 24 | 
            +
                # - Zhou, Z-H., "Ensemble Methods - Foundations and Algorithms," CRC Press Taylor and Francis Group, Chapman and Hall/CRC, 2012.
         | 
| 25 25 | 
             
                class StackingRegressor
         | 
| 26 26 | 
             
                  include Base::BaseEstimator
         | 
| 27 27 | 
             
                  include Base::Regressor
         | 
| @@ -0,0 +1,126 @@ | |
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            require 'rumale/base/base_estimator'
         | 
| 4 | 
            +
            require 'rumale/base/classifier'
         | 
| 5 | 
            +
            require 'rumale/preprocessing/label_encoder'
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            module Rumale
         | 
| 8 | 
            +
              module Ensemble
         | 
| 9 | 
            +
                # VotingClassifier is a class that implements classifier with voting ensemble method.
         | 
| 10 | 
            +
                #
         | 
| 11 | 
            +
                # @example
         | 
| 12 | 
            +
                #   estimators = {
         | 
| 13 | 
            +
                #     lgr: Rumale::LinearModel::LogisticRegression.new(reg_param: 1e-2, random_seed: 1),
         | 
| 14 | 
            +
                #     mlp: Rumale::NeuralNetwork::MLPClassifier.new(hidden_units: [256], random_seed: 1),
         | 
| 15 | 
            +
                #     rnd: Rumale::Ensemble::RandomForestClassifier.new(random_seed: 1)
         | 
| 16 | 
            +
                #   }
         | 
| 17 | 
            +
                #   weights = { lgr: 0.2, mlp: 0.3, rnd: 0.5 }
         | 
| 18 | 
            +
                #
         | 
| 19 | 
            +
                #   classifier = Rumale::Ensemble::VotingClassifier.new(estimators: estimators, weights: weights, voting: 'soft')
         | 
| 20 | 
            +
                #   classifier.fit(x_train, y_train)
         | 
| 21 | 
            +
                #   results = classifier.predict(x_test)
         | 
| 22 | 
            +
                #
         | 
| 23 | 
            +
                # *Reference*
         | 
| 24 | 
            +
                # - Zhou, Z-H., "Ensemble Methods - Foundations and Algorithms," CRC Press Taylor and Francis Group, Chapman and Hall/CRC, 2012.
         | 
| 25 | 
            +
                class VotingClassifier
         | 
| 26 | 
            +
                  include Base::BaseEstimator
         | 
| 27 | 
            +
                  include Base::Classifier
         | 
| 28 | 
            +
             | 
| 29 | 
            +
                  # Return the sub-classifiers that voted.
         | 
| 30 | 
            +
                  # @return [Hash<Symbol,Classifier>]
         | 
| 31 | 
            +
                  attr_reader :estimators
         | 
| 32 | 
            +
             | 
| 33 | 
            +
                  # Return the class labels.
         | 
| 34 | 
            +
                  # @return [Numo::Int32] (size: n_classes)
         | 
| 35 | 
            +
                  attr_reader :classes
         | 
| 36 | 
            +
             | 
| 37 | 
            +
                  # Create a new ensembled classifier with voting rule.
         | 
| 38 | 
            +
                  #
         | 
| 39 | 
            +
                  # @param estimators [Hash<Symbol,Classifier>] The sub-classifiers to vote.
         | 
| 40 | 
            +
                  # @param weights [Hash<Symbol,Float>] The weight value for each classifier.
         | 
| 41 | 
            +
                  # @param voting [String] The voting rule for the predicted results of each classifier.
         | 
| 42 | 
            +
                  #   If 'hard' is given, the ensembled classifier predicts the class label by majority vote.
         | 
| 43 | 
            +
                  #   If 'soft' is given, the ensembled classifier uses the weighted average of predicted probabilities for the prediction.
         | 
| 44 | 
            +
                  def initialize(estimators:, weights: nil, voting: 'hard')
         | 
| 45 | 
            +
                    check_params_type(Hash, estimators: estimators)
         | 
| 46 | 
            +
                    check_params_type_or_nil(Hash, weights: weights)
         | 
| 47 | 
            +
                    check_params_string(voting: voting)
         | 
| 48 | 
            +
                    @estimators = estimators
         | 
| 49 | 
            +
                    @classes = nil
         | 
| 50 | 
            +
                    @params = {}
         | 
| 51 | 
            +
                    @params[:weights] = weights || estimators.each_key.with_object({}) { |name, w| w[name] = 1.0 }
         | 
| 52 | 
            +
                    @params[:voting] = voting
         | 
| 53 | 
            +
                  end
         | 
| 54 | 
            +
             | 
| 55 | 
            +
                  # Fit the model with given training data.
         | 
| 56 | 
            +
                  #
         | 
| 57 | 
            +
                  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
         | 
| 58 | 
            +
                  # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
         | 
| 59 | 
            +
                  # @return [VotingClassifier] The learned classifier itself.
         | 
| 60 | 
            +
                  def fit(x, y)
         | 
| 61 | 
            +
                    x = check_convert_sample_array(x)
         | 
| 62 | 
            +
                    y = check_convert_label_array(y)
         | 
| 63 | 
            +
                    check_sample_label_size(x, y)
         | 
| 64 | 
            +
             | 
| 65 | 
            +
                    @encoder = Rumale::Preprocessing::LabelEncoder.new
         | 
| 66 | 
            +
                    y_encoded = @encoder.fit_transform(y)
         | 
| 67 | 
            +
                    @classes = Numo::NArray[*@encoder.classes]
         | 
| 68 | 
            +
                    @estimators.each_key { |name| @estimators[name].fit(x, y_encoded) }
         | 
| 69 | 
            +
             | 
| 70 | 
            +
                    self
         | 
| 71 | 
            +
                  end
         | 
| 72 | 
            +
             | 
| 73 | 
            +
                  # Calculate confidence scores for samples.
         | 
| 74 | 
            +
                  #
         | 
| 75 | 
            +
                  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
         | 
| 76 | 
            +
                  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) The confidence score per sample.
         | 
| 77 | 
            +
                  def decision_function(x)
         | 
| 78 | 
            +
                    x = check_convert_sample_array(x)
         | 
| 79 | 
            +
                    return predict_proba(x) if soft_voting?
         | 
| 80 | 
            +
             | 
| 81 | 
            +
                    n_samples = x.shape[0]
         | 
| 82 | 
            +
                    n_classes = @classes.size
         | 
| 83 | 
            +
                    z = Numo::DFloat.zeros(n_samples, n_classes)
         | 
| 84 | 
            +
                    @estimators.each do |name, estimator|
         | 
| 85 | 
            +
                      estimator.predict(x).to_a.each_with_index { |c, i| z[i, c] += @params[:weights][name] }
         | 
| 86 | 
            +
                    end
         | 
| 87 | 
            +
                    z
         | 
| 88 | 
            +
                  end
         | 
| 89 | 
            +
             | 
| 90 | 
            +
                  # Predict class labels for samples.
         | 
| 91 | 
            +
                  #
         | 
| 92 | 
            +
                  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
         | 
| 93 | 
            +
                  # @return [Numo::Int32] (shape: [n_samples]) The predicted class label per sample.
         | 
| 94 | 
            +
                  def predict(x)
         | 
| 95 | 
            +
                    x = check_convert_sample_array(x)
         | 
| 96 | 
            +
                    n_samples = x.shape[0]
         | 
| 97 | 
            +
                    n_classes = @classes.size
         | 
| 98 | 
            +
                    z = decision_function(x)
         | 
| 99 | 
            +
                    predicted = z.max_index(axis: 1) - Numo::Int32.new(n_samples).seq * n_classes
         | 
| 100 | 
            +
                    Numo::Int32.cast(@encoder.inverse_transform(predicted))
         | 
| 101 | 
            +
                  end
         | 
| 102 | 
            +
             | 
| 103 | 
            +
                  # Predict probability for samples.
         | 
| 104 | 
            +
                  #
         | 
| 105 | 
            +
                  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probabilities.
         | 
| 106 | 
            +
                  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
         | 
| 107 | 
            +
                  def predict_proba(x)
         | 
| 108 | 
            +
                    x = check_convert_sample_array(x)
         | 
| 109 | 
            +
                    n_samples = x.shape[0]
         | 
| 110 | 
            +
                    n_classes = @classes.size
         | 
| 111 | 
            +
                    z = Numo::DFloat.zeros(n_samples, n_classes)
         | 
| 112 | 
            +
                    sum_weight = @params[:weights].each_value.inject(&:+)
         | 
| 113 | 
            +
                    @estimators.each do |name, estimator|
         | 
| 114 | 
            +
                      z += @params[:weights][name] * estimator.predict_proba(x)
         | 
| 115 | 
            +
                    end
         | 
| 116 | 
            +
                    z /= sum_weight
         | 
| 117 | 
            +
                  end
         | 
| 118 | 
            +
             | 
| 119 | 
            +
                  private
         | 
| 120 | 
            +
             | 
| 121 | 
            +
                  def soft_voting?
         | 
| 122 | 
            +
                    @params[:voting] == 'soft'
         | 
| 123 | 
            +
                  end
         | 
| 124 | 
            +
                end
         | 
| 125 | 
            +
              end
         | 
| 126 | 
            +
            end
         | 
| @@ -0,0 +1,82 @@ | |
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            require 'rumale/base/base_estimator'
         | 
| 4 | 
            +
            require 'rumale/base/regressor'
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            module Rumale
         | 
| 7 | 
            +
              module Ensemble
         | 
| 8 | 
            +
                # VotingRegressor is a class that implements regressor with voting ensemble method.
         | 
| 9 | 
            +
                #
         | 
| 10 | 
            +
                # @example
         | 
| 11 | 
            +
                #   estimators = {
         | 
| 12 | 
            +
                #     rdg: Rumale::LinearModel::Ridge.new(reg_param: 1e-2, random_seed: 1),
         | 
| 13 | 
            +
                #     mlp: Rumale::NeuralNetwork::MLPRegressor.new(hidden_units: [256], random_seed: 1),
         | 
| 14 | 
            +
                #     rnd: Rumale::Ensemble::RandomForestRegressor.new(random_seed: 1)
         | 
| 15 | 
            +
                #   }
         | 
| 16 | 
            +
                #   weights = { rdg: 0.2, mlp: 0.3, rnd: 0.5 }
         | 
| 17 | 
            +
                #
         | 
| 18 | 
            +
                #   regressor = Rumale::Ensemble::VotingRegressor.new(estimators: estimators, weights: weights, voting: 'soft')
         | 
| 19 | 
            +
                #   regressor.fit(x_train, y_train)
         | 
| 20 | 
            +
                #   results = regressor.predict(x_test)
         | 
| 21 | 
            +
                #
         | 
| 22 | 
            +
                # *Reference*
         | 
| 23 | 
            +
                # - Zhou, Z-H., "Ensemble Methods - Foundations and Algorithms," CRC Press Taylor and Francis Group, Chapman and Hall/CRC, 2012.
         | 
| 24 | 
            +
                class VotingRegressor
         | 
| 25 | 
            +
                  include Base::BaseEstimator
         | 
| 26 | 
            +
                  include Base::Regressor
         | 
| 27 | 
            +
             | 
| 28 | 
            +
                  # Return the sub-regressors that voted.
         | 
| 29 | 
            +
                  # @return [Hash<Symbol,Regressor>]
         | 
| 30 | 
            +
                  attr_reader :estimators
         | 
| 31 | 
            +
             | 
| 32 | 
            +
                  # Create a new ensembled regressor with voting rule.
         | 
| 33 | 
            +
                  #
         | 
| 34 | 
            +
                  # @param estimators [Hash<Symbol,Regressor>] The sub-regressors to vote.
         | 
| 35 | 
            +
                  # @param weights [Hash<Symbol,Float>] The weight value for each regressor.
         | 
| 36 | 
            +
                  def initialize(estimators:, weights: nil)
         | 
| 37 | 
            +
                    check_params_type(Hash, estimators: estimators)
         | 
| 38 | 
            +
                    check_params_type_or_nil(Hash, weights: weights)
         | 
| 39 | 
            +
                    @estimators = estimators
         | 
| 40 | 
            +
                    @n_outputs = nil
         | 
| 41 | 
            +
                    @params = {}
         | 
| 42 | 
            +
                    @params[:weights] = weights || estimators.each_key.with_object({}) { |name, w| w[name] = 1.0 }
         | 
| 43 | 
            +
                  end
         | 
| 44 | 
            +
             | 
| 45 | 
            +
                  # Fit the model with given training data.
         | 
| 46 | 
            +
                  #
         | 
| 47 | 
            +
                  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
         | 
| 48 | 
            +
                  # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
         | 
| 49 | 
            +
                  # @return [VotingRegressor] The learned regressor itself.
         | 
| 50 | 
            +
                  def fit(x, y)
         | 
| 51 | 
            +
                    x = check_convert_sample_array(x)
         | 
| 52 | 
            +
                    y = check_convert_tvalue_array(y)
         | 
| 53 | 
            +
                    check_sample_tvalue_size(x, y)
         | 
| 54 | 
            +
             | 
| 55 | 
            +
                    @n_outputs = y.ndim > 1 ? y.shape[1] : 1
         | 
| 56 | 
            +
                    @estimators.each_key { |name| @estimators[name].fit(x, y) }
         | 
| 57 | 
            +
             | 
| 58 | 
            +
                    self
         | 
| 59 | 
            +
                  end
         | 
| 60 | 
            +
             | 
| 61 | 
            +
                  # Predict values for samples.
         | 
| 62 | 
            +
                  #
         | 
| 63 | 
            +
                  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
         | 
| 64 | 
            +
                  # @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted value per sample.
         | 
| 65 | 
            +
                  def predict(x)
         | 
| 66 | 
            +
                    x = check_convert_sample_array(x)
         | 
| 67 | 
            +
                    z = single_target? ? Numo::DFloat.zeros(x.shape[0]) : Numo::DFloat.zeros(x.shape[0], @n_outputs)
         | 
| 68 | 
            +
                    sum_weight = @params[:weights].each_value.inject(&:+)
         | 
| 69 | 
            +
                    @estimators.each do |name, estimator|
         | 
| 70 | 
            +
                      z += @params[:weights][name] * estimator.predict(x)
         | 
| 71 | 
            +
                    end
         | 
| 72 | 
            +
                    z / sum_weight
         | 
| 73 | 
            +
                  end
         | 
| 74 | 
            +
             | 
| 75 | 
            +
                  private
         | 
| 76 | 
            +
             | 
| 77 | 
            +
                  def single_target?
         | 
| 78 | 
            +
                    @n_outputs == 1
         | 
| 79 | 
            +
                  end
         | 
| 80 | 
            +
                end
         | 
| 81 | 
            +
              end
         | 
| 82 | 
            +
            end
         | 
| @@ -11,7 +11,7 @@ module Rumale | |
| 11 11 | 
             
                # @example
         | 
| 12 12 | 
             
                #   require 'numo/linalg/autoloader'
         | 
| 13 13 | 
             
                #
         | 
| 14 | 
            -
                #   transformer = Rumale::KernelApproximation::Nystroem.new(gamma: 1, n_components: 128, random_seed: 1)
         | 
| 14 | 
            +
                #   transformer = Rumale::KernelApproximation::Nystroem.new(kernel: 'rbf', gamma: 1, n_components: 128, random_seed: 1)
         | 
| 15 15 | 
             
                #   new_training_samples = transformer.fit_transform(training_samples)
         | 
| 16 16 | 
             
                #   new_testing_samples = transformer.transform(testing_samples)
         | 
| 17 17 | 
             
                #
         | 
| @@ -39,12 +39,15 @@ module Rumale | |
| 39 39 |  | 
| 40 40 | 
             
                  # Create a new transformer for mapping to kernel feature space with Nystrom method.
         | 
| 41 41 | 
             
                  #
         | 
| 42 | 
            -
                  # @param kernel [String] The type of kernel | 
| 43 | 
            -
                  # @param gamma [Float] The parameter  | 
| 44 | 
            -
                  # @param  | 
| 42 | 
            +
                  # @param kernel [String] The type of kernel function ('rbf', 'linear', 'poly', and 'sigmoid)
         | 
| 43 | 
            +
                  # @param gamma [Float] The gamma parameter in rbf/poly/sigmoid kernel function.
         | 
| 44 | 
            +
                  # @param degree [Integer] The degree parameter in polynomial kernel function.
         | 
| 45 | 
            +
                  # @param coef [Float] The coefficient in poly/sigmoid kernel function.
         | 
| 46 | 
            +
                  # @param n_components [Integer] The number of dimensions of the kernel feature space.
         | 
| 45 47 | 
             
                  # @param random_seed [Integer] The seed value using to initialize the random generator.
         | 
| 46 | 
            -
                  def initialize(kernel: 'rbf', gamma: 1, n_components: 100, random_seed: nil)
         | 
| 47 | 
            -
                     | 
| 48 | 
            +
                  def initialize(kernel: 'rbf', gamma: 1, degree: 3, coef: 1, n_components: 100, random_seed: nil)
         | 
| 49 | 
            +
                    check_params_string(kernel: kernel)
         | 
| 50 | 
            +
                    check_params_numeric(gamma: gamma, coef: coef, degree: degree, n_components: n_components)
         | 
| 48 51 | 
             
                    check_params_numeric_or_nil(random_seed: random_seed)
         | 
| 49 52 | 
             
                    @params = method(:initialize).parameters.map { |_t, arg| [arg, binding.local_variable_get(arg)] }.to_h
         | 
| 50 53 | 
             
                    @params[:random_seed] ||= srand
         | 
| @@ -56,7 +59,7 @@ module Rumale | |
| 56 59 |  | 
| 57 60 | 
             
                  # Fit the model with given training data.
         | 
| 58 61 | 
             
                  #
         | 
| 59 | 
            -
                  # @overload fit(x) ->  | 
| 62 | 
            +
                  # @overload fit(x) -> Nystroem
         | 
| 60 63 | 
             
                  #   @param x [Numo::NArray] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
         | 
| 61 64 | 
             
                  # @return [Nystroem] The learned transformer itself.
         | 
| 62 65 | 
             
                  def fit(x, _y = nil)
         | 
| @@ -70,10 +73,10 @@ module Rumale | |
| 70 73 |  | 
| 71 74 | 
             
                    # random sampling.
         | 
| 72 75 | 
             
                    @component_indices = Numo::Int32.cast(Array(0...n_samples).shuffle(random: sub_rng)[0...n_components])
         | 
| 73 | 
            -
                    @components = x[@component_indices, true]
         | 
| 76 | 
            +
                    @components = x[@component_indices, true].dup
         | 
| 74 77 |  | 
| 75 78 | 
             
                    # calculate normalizing factor.
         | 
| 76 | 
            -
                    kernel_mat =  | 
| 79 | 
            +
                    kernel_mat = kernel_mat(@components)
         | 
| 77 80 | 
             
                    eig_vals, eig_vecs = Numo::Linalg.eigh(kernel_mat)
         | 
| 78 81 | 
             
                    la = eig_vals.class.maximum(eig_vals.reverse, 1e-12)
         | 
| 79 82 | 
             
                    u = eig_vecs.reverse(1)
         | 
| @@ -98,9 +101,26 @@ module Rumale | |
| 98 101 | 
             
                  # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data.
         | 
| 99 102 | 
             
                  def transform(x)
         | 
| 100 103 | 
             
                    x = check_convert_sample_array(x)
         | 
| 101 | 
            -
                    z =  | 
| 104 | 
            +
                    z = kernel_mat(x, @components)
         | 
| 102 105 | 
             
                    z.dot(@normalizer)
         | 
| 103 106 | 
             
                  end
         | 
| 107 | 
            +
             | 
| 108 | 
            +
                  private
         | 
| 109 | 
            +
             | 
| 110 | 
            +
                  def kernel_mat(x, y = nil)
         | 
| 111 | 
            +
                    case @params[:kernel]
         | 
| 112 | 
            +
                    when 'rbf'
         | 
| 113 | 
            +
                      Rumale::PairwiseMetric.rbf_kernel(x, y, @params[:gamma])
         | 
| 114 | 
            +
                    when 'poly'
         | 
| 115 | 
            +
                      Rumale::PairwiseMetric.polynomial_kernel(x, y, @params[:degree], @params[:gamma], @params[:coef])
         | 
| 116 | 
            +
                    when 'sigmoid'
         | 
| 117 | 
            +
                      Rumale::PairwiseMetric.sigmoid_kernel(x, y, @params[:gamma], @params[:coef])
         | 
| 118 | 
            +
                    when 'linear'
         | 
| 119 | 
            +
                      Rumale::PairwiseMetric.linear_kernel(x, y)
         | 
| 120 | 
            +
                    else
         | 
| 121 | 
            +
                      raise ArgumentError, "Expect kernel parameter to be given 'rbf', 'linear', 'poly', or 'sigmoid'."
         | 
| 122 | 
            +
                    end
         | 
| 123 | 
            +
                  end
         | 
| 104 124 | 
             
                end
         | 
| 105 125 | 
             
              end
         | 
| 106 126 | 
             
            end
         |