RubyGems - rumale - Versions diffs - 0.23.3 → 0.24.0 - Mend

rumale 0.23.3 → 0.24.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (142) hide show

checksums.yaml +4 -4
data/LICENSE.txt +5 -1
data/README.md +3 -288
data/lib/rumale/version.rb +1 -1
data/lib/rumale.rb +20 -131
metadata +252 -150
data/CHANGELOG.md +0 -643
data/CODE_OF_CONDUCT.md +0 -74
data/ext/rumale/extconf.rb +0 -37
data/ext/rumale/rumaleext.c +0 -545
data/ext/rumale/rumaleext.h +0 -12
data/lib/rumale/base/base_estimator.rb +0 -49
data/lib/rumale/base/classifier.rb +0 -36
data/lib/rumale/base/cluster_analyzer.rb +0 -31
data/lib/rumale/base/evaluator.rb +0 -17
data/lib/rumale/base/regressor.rb +0 -36
data/lib/rumale/base/splitter.rb +0 -21
data/lib/rumale/base/transformer.rb +0 -22
data/lib/rumale/clustering/dbscan.rb +0 -123
data/lib/rumale/clustering/gaussian_mixture.rb +0 -218
data/lib/rumale/clustering/hdbscan.rb +0 -291
data/lib/rumale/clustering/k_means.rb +0 -122
data/lib/rumale/clustering/k_medoids.rb +0 -141
data/lib/rumale/clustering/mini_batch_k_means.rb +0 -139
data/lib/rumale/clustering/power_iteration.rb +0 -127
data/lib/rumale/clustering/single_linkage.rb +0 -203
data/lib/rumale/clustering/snn.rb +0 -76
data/lib/rumale/clustering/spectral_clustering.rb +0 -115
data/lib/rumale/dataset.rb +0 -246
data/lib/rumale/decomposition/factor_analysis.rb +0 -150
data/lib/rumale/decomposition/fast_ica.rb +0 -188
data/lib/rumale/decomposition/nmf.rb +0 -124
data/lib/rumale/decomposition/pca.rb +0 -159
data/lib/rumale/ensemble/ada_boost_classifier.rb +0 -179
data/lib/rumale/ensemble/ada_boost_regressor.rb +0 -160
data/lib/rumale/ensemble/extra_trees_classifier.rb +0 -139
data/lib/rumale/ensemble/extra_trees_regressor.rb +0 -125
data/lib/rumale/ensemble/gradient_boosting_classifier.rb +0 -306
data/lib/rumale/ensemble/gradient_boosting_regressor.rb +0 -237
data/lib/rumale/ensemble/random_forest_classifier.rb +0 -189
data/lib/rumale/ensemble/random_forest_regressor.rb +0 -153
data/lib/rumale/ensemble/stacking_classifier.rb +0 -215
data/lib/rumale/ensemble/stacking_regressor.rb +0 -163
data/lib/rumale/ensemble/voting_classifier.rb +0 -126
data/lib/rumale/ensemble/voting_regressor.rb +0 -82
data/lib/rumale/evaluation_measure/accuracy.rb +0 -29
data/lib/rumale/evaluation_measure/adjusted_rand_score.rb +0 -74
data/lib/rumale/evaluation_measure/calinski_harabasz_score.rb +0 -56
data/lib/rumale/evaluation_measure/davies_bouldin_score.rb +0 -53
data/lib/rumale/evaluation_measure/explained_variance_score.rb +0 -39
data/lib/rumale/evaluation_measure/f_score.rb +0 -50
data/lib/rumale/evaluation_measure/function.rb +0 -147
data/lib/rumale/evaluation_measure/log_loss.rb +0 -45
data/lib/rumale/evaluation_measure/mean_absolute_error.rb +0 -29
data/lib/rumale/evaluation_measure/mean_squared_error.rb +0 -29
data/lib/rumale/evaluation_measure/mean_squared_log_error.rb +0 -29
data/lib/rumale/evaluation_measure/median_absolute_error.rb +0 -30
data/lib/rumale/evaluation_measure/mutual_information.rb +0 -49
data/lib/rumale/evaluation_measure/normalized_mutual_information.rb +0 -53
data/lib/rumale/evaluation_measure/precision.rb +0 -50
data/lib/rumale/evaluation_measure/precision_recall.rb +0 -96
data/lib/rumale/evaluation_measure/purity.rb +0 -40
data/lib/rumale/evaluation_measure/r2_score.rb +0 -43
data/lib/rumale/evaluation_measure/recall.rb +0 -50
data/lib/rumale/evaluation_measure/roc_auc.rb +0 -130
data/lib/rumale/evaluation_measure/silhouette_score.rb +0 -82
data/lib/rumale/feature_extraction/feature_hasher.rb +0 -110
data/lib/rumale/feature_extraction/hash_vectorizer.rb +0 -155
data/lib/rumale/feature_extraction/tfidf_transformer.rb +0 -113
data/lib/rumale/kernel_approximation/nystroem.rb +0 -126
data/lib/rumale/kernel_approximation/rbf.rb +0 -102
data/lib/rumale/kernel_machine/kernel_fda.rb +0 -120
data/lib/rumale/kernel_machine/kernel_pca.rb +0 -97
data/lib/rumale/kernel_machine/kernel_ridge.rb +0 -82
data/lib/rumale/kernel_machine/kernel_ridge_classifier.rb +0 -92
data/lib/rumale/kernel_machine/kernel_svc.rb +0 -193
data/lib/rumale/linear_model/base_sgd.rb +0 -285
data/lib/rumale/linear_model/elastic_net.rb +0 -119
data/lib/rumale/linear_model/lasso.rb +0 -115
data/lib/rumale/linear_model/linear_regression.rb +0 -201
data/lib/rumale/linear_model/logistic_regression.rb +0 -275
data/lib/rumale/linear_model/nnls.rb +0 -137
data/lib/rumale/linear_model/ridge.rb +0 -209
data/lib/rumale/linear_model/svc.rb +0 -213
data/lib/rumale/linear_model/svr.rb +0 -132
data/lib/rumale/manifold/mds.rb +0 -155
data/lib/rumale/manifold/tsne.rb +0 -222
data/lib/rumale/metric_learning/fisher_discriminant_analysis.rb +0 -113
data/lib/rumale/metric_learning/mlkr.rb +0 -161
data/lib/rumale/metric_learning/neighbourhood_component_analysis.rb +0 -167
data/lib/rumale/model_selection/cross_validation.rb +0 -125
data/lib/rumale/model_selection/function.rb +0 -42
data/lib/rumale/model_selection/grid_search_cv.rb +0 -225
data/lib/rumale/model_selection/group_k_fold.rb +0 -93
data/lib/rumale/model_selection/group_shuffle_split.rb +0 -115
data/lib/rumale/model_selection/k_fold.rb +0 -81
data/lib/rumale/model_selection/shuffle_split.rb +0 -90
data/lib/rumale/model_selection/stratified_k_fold.rb +0 -99
data/lib/rumale/model_selection/stratified_shuffle_split.rb +0 -118
data/lib/rumale/model_selection/time_series_split.rb +0 -91
data/lib/rumale/multiclass/one_vs_rest_classifier.rb +0 -83
data/lib/rumale/naive_bayes/base_naive_bayes.rb +0 -47
data/lib/rumale/naive_bayes/bernoulli_nb.rb +0 -82
data/lib/rumale/naive_bayes/complement_nb.rb +0 -85
data/lib/rumale/naive_bayes/gaussian_nb.rb +0 -69
data/lib/rumale/naive_bayes/multinomial_nb.rb +0 -74
data/lib/rumale/naive_bayes/negation_nb.rb +0 -71
data/lib/rumale/nearest_neighbors/k_neighbors_classifier.rb +0 -133
data/lib/rumale/nearest_neighbors/k_neighbors_regressor.rb +0 -108
data/lib/rumale/nearest_neighbors/vp_tree.rb +0 -132
data/lib/rumale/neural_network/adam.rb +0 -56
data/lib/rumale/neural_network/base_mlp.rb +0 -248
data/lib/rumale/neural_network/mlp_classifier.rb +0 -120
data/lib/rumale/neural_network/mlp_regressor.rb +0 -90
data/lib/rumale/pairwise_metric.rb +0 -152
data/lib/rumale/pipeline/feature_union.rb +0 -69
data/lib/rumale/pipeline/pipeline.rb +0 -175
data/lib/rumale/preprocessing/bin_discretizer.rb +0 -93
data/lib/rumale/preprocessing/binarizer.rb +0 -60
data/lib/rumale/preprocessing/kernel_calculator.rb +0 -92
data/lib/rumale/preprocessing/l1_normalizer.rb +0 -62
data/lib/rumale/preprocessing/l2_normalizer.rb +0 -63
data/lib/rumale/preprocessing/label_binarizer.rb +0 -89
data/lib/rumale/preprocessing/label_encoder.rb +0 -79
data/lib/rumale/preprocessing/max_abs_scaler.rb +0 -61
data/lib/rumale/preprocessing/max_normalizer.rb +0 -62
data/lib/rumale/preprocessing/min_max_scaler.rb +0 -76
data/lib/rumale/preprocessing/one_hot_encoder.rb +0 -100
data/lib/rumale/preprocessing/ordinal_encoder.rb +0 -109
data/lib/rumale/preprocessing/polynomial_features.rb +0 -109
data/lib/rumale/preprocessing/standard_scaler.rb +0 -71
data/lib/rumale/probabilistic_output.rb +0 -114
data/lib/rumale/tree/base_decision_tree.rb +0 -150
data/lib/rumale/tree/decision_tree_classifier.rb +0 -150
data/lib/rumale/tree/decision_tree_regressor.rb +0 -116
data/lib/rumale/tree/extra_tree_classifier.rb +0 -107
data/lib/rumale/tree/extra_tree_regressor.rb +0 -94
data/lib/rumale/tree/gradient_tree_regressor.rb +0 -202
data/lib/rumale/tree/node.rb +0 -39
data/lib/rumale/utils.rb +0 -42
data/lib/rumale/validation.rb +0 -128
data/lib/rumale/values.rb +0 -13

data/lib/rumale/ensemble/stacking_classifier.rb DELETED Viewed

@@ -1,215 +0,0 @@
-# frozen_string_literal: true
-require 'rumale/base/base_estimator'
-require 'rumale/base/classifier'
-require 'rumale/preprocessing/label_encoder'
-module Rumale
-  module Ensemble
-    # StackingClassifier is a class that implements classifier with stacking method.
-    #
-    # @example
-    #   estimators = {
-    #     lgr: Rumale::LinearModel::LogisticRegression.new(reg_param: 1e-2, random_seed: 1),
-    #     mlp: Rumale::NeuralNetwork::MLPClassifier.new(hidden_units: [256], random_seed: 1),
-    #     rnd: Rumale::Ensemble::RandomForestClassifier.new(random_seed: 1)
-    #   }
-    #   meta_estimator = Rumale::LinearModel::LogisticRegression.new(random_seed: 1)
-    #   classifier = Rumale::Ensemble::StackedClassifier.new(
-    #     estimators: estimators, meta_estimator: meta_estimator, random_seed: 1
-    #   )
-    #   classifier.fit(training_samples, training_labels)
-    #   results = classifier.predict(testing_samples)
-    #
-    # *Reference*
-    # - Zhou, Z-H., "Ensemble Methods - Foundations and Algorithms," CRC Press Taylor and Francis Group, Chapman and Hall/CRC, 2012.
-    class StackingClassifier
-      include Base::BaseEstimator
-      include Base::Classifier
-      # Return the base classifiers.
-      # @return [Hash<Symbol,Classifier>]
-      attr_reader :estimators
-      # Return the meta classifier.
-      # @return [Classifier]
-      attr_reader :meta_estimator
-      # Return the class labels.
-      # @return [Numo::Int32] (size: n_classes)
-      attr_reader :classes
-      # Return the method used by each base classifier.
-      # @return [Hash<Symbol,Symbol>]
-      attr_reader :stack_method
-      # Create a new classifier with stacking method.
-      #
-      # @param estimators [Hash<Symbol,Classifier>] The base classifiers for extracting meta features.
-      # @param meta_estimator [Classifier/Nil] The meta classifier that predicts class label.
-      #   If nil is given, LogisticRegression is used.
-      # @param n_splits [Integer] The number of folds for cross validation with stratified k-fold on meta feature extraction in training phase.
-      # @param shuffle [Boolean] The flag indicating whether to shuffle the dataset on cross validation.
-      # @param stack_method [String] The method name of base classifier for using meta feature extraction.
-      #   If 'auto' is given, it searches the callable method in the order 'predict_proba', 'decision_function', and 'predict'
-      #   on each classifier.
-      # @param passthrough [Boolean] The flag indicating whether to concatenate the original features and meta features when training the meta classifier.
-      # @param random_seed [Integer/Nil] The seed value using to initialize the random generator on cross validation.
-      def initialize(estimators:, meta_estimator: nil, n_splits: 5, shuffle: true, stack_method: 'auto', passthrough: false, random_seed: nil)
-        check_params_type(Hash, estimators: estimators)
-        check_params_numeric(n_splits: n_splits)
-        check_params_string(stack_method: stack_method)
-        check_params_boolean(shuffle: shuffle, passthrough: passthrough)
-        check_params_numeric_or_nil(random_seed: random_seed)
-        @estimators = estimators
-        @meta_estimator = meta_estimator || Rumale::LinearModel::LogisticRegression.new
-        @classes = nil
-        @stack_method = nil
-        @output_size = nil
-        @params = {}
-        @params[:n_splits] = n_splits
-        @params[:shuffle] = shuffle
-        @params[:stack_method] = stack_method
-        @params[:passthrough] = passthrough
-        @params[:random_seed] = random_seed || srand
-      end
-      # Fit the model with given training data.
-      #
-      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
-      # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
-      # @return [StackedClassifier] The learned classifier itself.
-      def fit(x, y)
-        x = check_convert_sample_array(x)
-        y = check_convert_label_array(y)
-        check_sample_label_size(x, y)
-        n_samples, n_features = x.shape
-        @encoder = Rumale::Preprocessing::LabelEncoder.new
-        y_encoded = @encoder.fit_transform(y)
-        @classes = Numo::NArray[*@encoder.classes]
-        # training base classifiers with all training data.
-        @estimators.each_key { |name| @estimators[name].fit(x, y_encoded) }
-        # detecting feature extraction method and its size of output for each base classifier.
-        @stack_method = detect_stack_method
-        @output_size = detect_output_size(n_features)
-        # extracting meta features with base classifiers.
-        n_components = @output_size.values.inject(:+)
-        z = Numo::DFloat.zeros(n_samples, n_components)
-        kf = Rumale::ModelSelection::StratifiedKFold.new(
-          n_splits: @params[:n_splits], shuffle: @params[:shuffle], random_seed: @params[:random_seed]
-        )
-        kf.split(x, y_encoded).each do |train_ids, valid_ids|
-          x_train = x[train_ids, true]
-          y_train = y_encoded[train_ids]
-          x_valid = x[valid_ids, true]
-          f_start = 0
-          @estimators.each_key do |name|
-            est_fold = Marshal.load(Marshal.dump(@estimators[name]))
-            f_last = f_start + @output_size[name]
-            f_position = @output_size[name] == 1 ? f_start : f_start...f_last
-            z[valid_ids, f_position] = est_fold.fit(x_train, y_train).public_send(@stack_method[name], x_valid)
-            f_start = f_last
-          end
-        end
-        # concatenating original features.
-        z = Numo::NArray.hstack([z, x]) if @params[:passthrough]
-        # training meta classifier.
-        @meta_estimator.fit(z, y_encoded)
-        self
-      end
-      # Calculate confidence scores for samples.
-      #
-      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
-      # @return [Numo::DFloat] (shape: [n_samples, n_classes]) The confidence score per sample.
-      def decision_function(x)
-        x = check_convert_sample_array(x)
-        z = transform(x)
-        @meta_estimator.decision_function(z)
-      end
-      # Predict class labels for samples.
-      #
-      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
-      # @return [Numo::Int32] (shape: [n_samples]) The predicted class label per sample.
-      def predict(x)
-        x = check_convert_sample_array(x)
-        z = transform(x)
-        Numo::Int32.cast(@encoder.inverse_transform(@meta_estimator.predict(z)))
-      end
-      # Predict probability for samples.
-      #
-      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probabilities.
-      # @return [Numo::DFloat] (shape: [n_samples, n_classes]) The predicted probability of each class per sample.
-      def predict_proba(x)
-        x = check_convert_sample_array(x)
-        z = transform(x)
-        @meta_estimator.predict_proba(z)
-      end
-      # Transform the given data with the learned model.
-      #
-      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be transformed with the learned model.
-      # @return [Numo::DFloat] (shape: [n_samples, n_components]) The meta features for samples.
-      def transform(x)
-        x = check_convert_sample_array(x)
-        n_samples = x.shape[0]
-        n_components = @output_size.values.inject(:+)
-        z = Numo::DFloat.zeros(n_samples, n_components)
-        f_start = 0
-        @estimators.each_key do |name|
-          f_last = f_start + @output_size[name]
-          f_position = @output_size[name] == 1 ? f_start : f_start...f_last
-          z[true, f_position] = @estimators[name].public_send(@stack_method[name], x)
-          f_start = f_last
-        end
-        z = Numo::NArray.hstack([z, x]) if @params[:passthrough]
-        z
-      end
-      # Fit the model with training data, and then transform them with the learned model.
-      #
-      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
-      # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
-      # @return [Numo::DFloat] (shape: [n_samples, n_components]) The meta features for training data.
-      def fit_transform(x, y)
-        x = check_convert_sample_array(x)
-        y = check_convert_label_array(y)
-        fit(x, y).transform(x)
-      end
-      private
-      STACK_METHODS = %i[predict_proba decision_function predict].freeze
-      private_constant :STACK_METHODS
-      def detect_stack_method
-        if @params[:stack_method] == 'auto'
-          @estimators.each_key.with_object({}) { |name, obj| obj[name] = STACK_METHODS.detect { |m| @estimators[name].respond_to?(m) } }
-        else
-          @estimators.each_key.with_object({}) { |name, obj| obj[name] = @params[:stack_method].to_sym }
-        end
-      end
-      def detect_output_size(n_features)
-        x_dummy = Numo::DFloat.new(2, n_features).rand
-        @estimators.each_key.with_object({}) do |name, obj|
-          output_dummy = @estimators[name].public_send(@stack_method[name], x_dummy)
-          obj[name] = output_dummy.ndim == 1 ? 1 : output_dummy.shape[1]
-        end
-      end
-    end
-  end
-end

data/lib/rumale/ensemble/stacking_regressor.rb DELETED Viewed

@@ -1,163 +0,0 @@
-# frozen_string_literal: true
-require 'rumale/base/base_estimator'
-require 'rumale/base/regressor'
-module Rumale
-  module Ensemble
-    # StackingRegressor is a class that implements regressor with stacking method.
-    #
-    # @example
-    #   estimators = {
-    #     las: Rumale::LinearModel::Lasso.new(reg_param: 1e-2, random_seed: 1),
-    #     mlp: Rumale::NeuralNetwork::MLPRegressor.new(hidden_units: [256], random_seed: 1),
-    #     rnd: Rumale::Ensemble::RandomForestRegressor.new(random_seed: 1)
-    #   }
-    #   meta_estimator = Rumale::LinearModel::Ridge.new(random_seed: 1)
-    #   regressor = Rumale::Ensemble::StackedRegressor.new(
-    #     estimators: estimators, meta_estimator: meta_estimator, random_seed: 1
-    #   )
-    #   regressor.fit(training_samples, training_values)
-    #   results = regressor.predict(testing_samples)
-    #
-    # *Reference*
-    # - Zhou, Z-H., "Ensemble Methods - Foundations and Algorithms," CRC Press Taylor and Francis Group, Chapman and Hall/CRC, 2012.
-    class StackingRegressor
-      include Base::BaseEstimator
-      include Base::Regressor
-      # Return the base regressors.
-      # @return [Hash<Symbol,Regressor>]
-      attr_reader :estimators
-      # Return the meta regressor.
-      # @return [Regressor]
-      attr_reader :meta_estimator
-      # Create a new regressor with stacking method.
-      #
-      # @param estimators [Hash<Symbol,Regressor>] The base regressors for extracting meta features.
-      # @param meta_estimator [Regressor/Nil] The meta regressor that predicts values.
-      #   If nil is given, Ridge is used.
-      # @param n_splits [Integer] The number of folds for cross validation with k-fold on meta feature extraction in training phase.
-      # @param shuffle [Boolean] The flag indicating whether to shuffle the dataset on cross validation.
-      # @param passthrough [Boolean] The flag indicating whether to concatenate the original features and meta features when training the meta regressor.
-      # @param random_seed [Integer/Nil] The seed value using to initialize the random generator on cross validation.
-      def initialize(estimators:, meta_estimator: nil, n_splits: 5, shuffle: true, passthrough: false, random_seed: nil)
-        check_params_type(Hash, estimators: estimators)
-        check_params_numeric(n_splits: n_splits)
-        check_params_boolean(shuffle: shuffle, passthrough: passthrough)
-        check_params_numeric_or_nil(random_seed: random_seed)
-        @estimators = estimators
-        @meta_estimator = meta_estimator || Rumale::LinearModel::Ridge.new
-        @output_size = nil
-        @params = {}
-        @params[:n_splits] = n_splits
-        @params[:shuffle] = shuffle
-        @params[:passthrough] = passthrough
-        @params[:random_seed] = random_seed || srand
-      end
-      # Fit the model with given training data.
-      #
-      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
-      # @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The target variables to be used for fitting the model.
-      # @return [StackedRegressor] The learned regressor itself.
-      def fit(x, y)
-        x = check_convert_sample_array(x)
-        y = check_convert_tvalue_array(y)
-        check_sample_tvalue_size(x, y)
-        n_samples, n_features = x.shape
-        n_outputs = y.ndim == 1 ? 1 : y.shape[1]
-        # training base regressors with all training data.
-        @estimators.each_key { |name| @estimators[name].fit(x, y) }
-        # detecting size of output for each base regressor.
-        @output_size = detect_output_size(n_features)
-        # extracting meta features with base regressors.
-        n_components = @output_size.values.inject(:+)
-        z = Numo::DFloat.zeros(n_samples, n_components)
-        kf = Rumale::ModelSelection::KFold.new(
-          n_splits: @params[:n_splits], shuffle: @params[:shuffle], random_seed: @params[:random_seed]
-        )
-        kf.split(x, y).each do |train_ids, valid_ids|
-          x_train = x[train_ids, true]
-          y_train = n_outputs == 1 ? y[train_ids] : y[train_ids, true]
-          x_valid = x[valid_ids, true]
-          f_start = 0
-          @estimators.each_key do |name|
-            est_fold = Marshal.load(Marshal.dump(@estimators[name]))
-            f_last = f_start + @output_size[name]
-            f_position = @output_size[name] == 1 ? f_start : f_start...f_last
-            z[valid_ids, f_position] = est_fold.fit(x_train, y_train).predict(x_valid)
-            f_start = f_last
-          end
-        end
-        # concatenating original features.
-        z = Numo::NArray.hstack([z, x]) if @params[:passthrough]
-        # training meta regressor.
-        @meta_estimator.fit(z, y)
-        self
-      end
-      # Predict values for samples.
-      #
-      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
-      # @return [Numo::DFloat] (shape: [n_samples, n_outputs]) The predicted values per sample.
-      def predict(x)
-        x = check_convert_sample_array(x)
-        z = transform(x)
-        @meta_estimator.predict(z)
-      end
-      # Transform the given data with the learned model.
-      #
-      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be transformed with the learned model.
-      # @return [Numo::DFloat] (shape: [n_samples, n_components]) The meta features for samples.
-      def transform(x)
-        x = check_convert_sample_array(x)
-        n_samples = x.shape[0]
-        n_components = @output_size.values.inject(:+)
-        z = Numo::DFloat.zeros(n_samples, n_components)
-        f_start = 0
-        @estimators.each_key do |name|
-          f_last = f_start + @output_size[name]
-          f_position = @output_size[name] == 1 ? f_start : f_start...f_last
-          z[true, f_position] = @estimators[name].predict(x)
-          f_start = f_last
-        end
-        z = Numo::NArray.hstack([z, x]) if @params[:passthrough]
-        z
-      end
-      # Fit the model with training data, and then transform them with the learned model.
-      #
-      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
-      # @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The target variables to be used for fitting the model.
-      # @return [Numo::DFloat] (shape: [n_samples, n_components]) The meta features for training data.
-      def fit_transform(x, y)
-        x = check_convert_sample_array(x)
-        y = check_convert_tvalue_array(y)
-        fit(x, y).transform(x)
-      end
-      private
-      def detect_output_size(n_features)
-        x_dummy = Numo::DFloat.new(2, n_features).rand
-        @estimators.each_key.with_object({}) do |name, obj|
-          output_dummy = @estimators[name].predict(x_dummy)
-          obj[name] = output_dummy.ndim == 1 ? 1 : output_dummy.shape[1]
-        end
-      end
-    end
-  end
-end

data/lib/rumale/ensemble/voting_classifier.rb DELETED Viewed

@@ -1,126 +0,0 @@
-# frozen_string_literal: true
-require 'rumale/base/base_estimator'
-require 'rumale/base/classifier'
-require 'rumale/preprocessing/label_encoder'
-module Rumale
-  module Ensemble
-    # VotingClassifier is a class that implements classifier with voting ensemble method.
-    #
-    # @example
-    #   estimators = {
-    #     lgr: Rumale::LinearModel::LogisticRegression.new(reg_param: 1e-2, random_seed: 1),
-    #     mlp: Rumale::NeuralNetwork::MLPClassifier.new(hidden_units: [256], random_seed: 1),
-    #     rnd: Rumale::Ensemble::RandomForestClassifier.new(random_seed: 1)
-    #   }
-    #   weights = { lgr: 0.2, mlp: 0.3, rnd: 0.5 }
-    #
-    #   classifier = Rumale::Ensemble::VotingClassifier.new(estimators: estimators, weights: weights, voting: 'soft')
-    #   classifier.fit(x_train, y_train)
-    #   results = classifier.predict(x_test)
-    #
-    # *Reference*
-    # - Zhou, Z-H., "Ensemble Methods - Foundations and Algorithms," CRC Press Taylor and Francis Group, Chapman and Hall/CRC, 2012.
-    class VotingClassifier
-      include Base::BaseEstimator
-      include Base::Classifier
-      # Return the sub-classifiers that voted.
-      # @return [Hash<Symbol,Classifier>]
-      attr_reader :estimators
-      # Return the class labels.
-      # @return [Numo::Int32] (size: n_classes)
-      attr_reader :classes
-      # Create a new ensembled classifier with voting rule.
-      #
-      # @param estimators [Hash<Symbol,Classifier>] The sub-classifiers to vote.
-      # @param weights [Hash<Symbol,Float>] The weight value for each classifier.
-      # @param voting [String] The voting rule for the predicted results of each classifier.
-      #   If 'hard' is given, the ensembled classifier predicts the class label by majority vote.
-      #   If 'soft' is given, the ensembled classifier uses the weighted average of predicted probabilities for the prediction.
-      def initialize(estimators:, weights: nil, voting: 'hard')
-        check_params_type(Hash, estimators: estimators)
-        check_params_type_or_nil(Hash, weights: weights)
-        check_params_string(voting: voting)
-        @estimators = estimators
-        @classes = nil
-        @params = {}
-        @params[:weights] = weights || estimators.each_key.with_object({}) { |name, w| w[name] = 1.0 }
-        @params[:voting] = voting
-      end
-      # Fit the model with given training data.
-      #
-      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
-      # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
-      # @return [VotingClassifier] The learned classifier itself.
-      def fit(x, y)
-        x = check_convert_sample_array(x)
-        y = check_convert_label_array(y)
-        check_sample_label_size(x, y)
-        @encoder = Rumale::Preprocessing::LabelEncoder.new
-        y_encoded = @encoder.fit_transform(y)
-        @classes = Numo::NArray[*@encoder.classes]
-        @estimators.each_key { |name| @estimators[name].fit(x, y_encoded) }
-        self
-      end
-      # Calculate confidence scores for samples.
-      #
-      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
-      # @return [Numo::DFloat] (shape: [n_samples, n_classes]) The confidence score per sample.
-      def decision_function(x)
-        x = check_convert_sample_array(x)
-        return predict_proba(x) if soft_voting?
-        n_samples = x.shape[0]
-        n_classes = @classes.size
-        z = Numo::DFloat.zeros(n_samples, n_classes)
-        @estimators.each do |name, estimator|
-          estimator.predict(x).to_a.each_with_index { |c, i| z[i, c] += @params[:weights][name] }
-        end
-        z
-      end
-      # Predict class labels for samples.
-      #
-      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
-      # @return [Numo::Int32] (shape: [n_samples]) The predicted class label per sample.
-      def predict(x)
-        x = check_convert_sample_array(x)
-        n_samples = x.shape[0]
-        n_classes = @classes.size
-        z = decision_function(x)
-        predicted = z.max_index(axis: 1) - Numo::Int32.new(n_samples).seq * n_classes
-        Numo::Int32.cast(@encoder.inverse_transform(predicted))
-      end
-      # Predict probability for samples.
-      #
-      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probabilities.
-      # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
-      def predict_proba(x)
-        x = check_convert_sample_array(x)
-        n_samples = x.shape[0]
-        n_classes = @classes.size
-        z = Numo::DFloat.zeros(n_samples, n_classes)
-        sum_weight = @params[:weights].each_value.inject(&:+)
-        @estimators.each do |name, estimator|
-          z += @params[:weights][name] * estimator.predict_proba(x)
-        end
-        z /= sum_weight
-      end
-      private
-      def soft_voting?
-        @params[:voting] == 'soft'
-      end
-    end
-  end
-end

data/lib/rumale/ensemble/voting_regressor.rb DELETED Viewed

@@ -1,82 +0,0 @@
-# frozen_string_literal: true
-require 'rumale/base/base_estimator'
-require 'rumale/base/regressor'
-module Rumale
-  module Ensemble
-    # VotingRegressor is a class that implements regressor with voting ensemble method.
-    #
-    # @example
-    #   estimators = {
-    #     rdg: Rumale::LinearModel::Ridge.new(reg_param: 1e-2, random_seed: 1),
-    #     mlp: Rumale::NeuralNetwork::MLPRegressor.new(hidden_units: [256], random_seed: 1),
-    #     rnd: Rumale::Ensemble::RandomForestRegressor.new(random_seed: 1)
-    #   }
-    #   weights = { rdg: 0.2, mlp: 0.3, rnd: 0.5 }
-    #
-    #   regressor = Rumale::Ensemble::VotingRegressor.new(estimators: estimators, weights: weights, voting: 'soft')
-    #   regressor.fit(x_train, y_train)
-    #   results = regressor.predict(x_test)
-    #
-    # *Reference*
-    # - Zhou, Z-H., "Ensemble Methods - Foundations and Algorithms," CRC Press Taylor and Francis Group, Chapman and Hall/CRC, 2012.
-    class VotingRegressor
-      include Base::BaseEstimator
-      include Base::Regressor
-      # Return the sub-regressors that voted.
-      # @return [Hash<Symbol,Regressor>]
-      attr_reader :estimators
-      # Create a new ensembled regressor with voting rule.
-      #
-      # @param estimators [Hash<Symbol,Regressor>] The sub-regressors to vote.
-      # @param weights [Hash<Symbol,Float>] The weight value for each regressor.
-      def initialize(estimators:, weights: nil)
-        check_params_type(Hash, estimators: estimators)
-        check_params_type_or_nil(Hash, weights: weights)
-        @estimators = estimators
-        @n_outputs = nil
-        @params = {}
-        @params[:weights] = weights || estimators.each_key.with_object({}) { |name, w| w[name] = 1.0 }
-      end
-      # Fit the model with given training data.
-      #
-      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
-      # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
-      # @return [VotingRegressor] The learned regressor itself.
-      def fit(x, y)
-        x = check_convert_sample_array(x)
-        y = check_convert_tvalue_array(y)
-        check_sample_tvalue_size(x, y)
-        @n_outputs = y.ndim > 1 ? y.shape[1] : 1
-        @estimators.each_key { |name| @estimators[name].fit(x, y) }
-        self
-      end
-      # Predict values for samples.
-      #
-      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
-      # @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted value per sample.
-      def predict(x)
-        x = check_convert_sample_array(x)
-        z = single_target? ? Numo::DFloat.zeros(x.shape[0]) : Numo::DFloat.zeros(x.shape[0], @n_outputs)
-        sum_weight = @params[:weights].each_value.inject(&:+)
-        @estimators.each do |name, estimator|
-          z += @params[:weights][name] * estimator.predict(x)
-        end
-        z / sum_weight
-      end
-      private
-      def single_target?
-        @n_outputs == 1
-      end
-    end
-  end
-end

data/lib/rumale/evaluation_measure/accuracy.rb DELETED Viewed

@@ -1,29 +0,0 @@
-# frozen_string_literal: true
-require 'rumale/base/evaluator'
-module Rumale
-  # This module consists of the classes for model evaluation.
-  module EvaluationMeasure
-    # Accuracy is a class that calculates the accuracy of classifier from the predicted labels.
-    #
-    # @example
-    #   evaluator = Rumale::EvaluationMeasure::Accuracy.new
-    #   puts evaluator.score(ground_truth, predicted)
-    class Accuracy
-      include Base::Evaluator
-      # Calculate mean accuracy.
-      #
-      # @param y_true [Numo::Int32] (shape: [n_samples]) Ground truth labels.
-      # @param y_pred [Numo::Int32] (shape: [n_samples]) Predicted labels.
-      # @return [Float] Mean accuracy
-      def score(y_true, y_pred)
-        y_true = check_convert_label_array(y_true)
-        y_pred = check_convert_label_array(y_pred)
-        (y_true.to_a.map.with_index { |label, n| label == y_pred[n] ? 1 : 0 }).inject(:+) / y_true.size.to_f
-      end
-    end
-  end
-end

data/lib/rumale/evaluation_measure/adjusted_rand_score.rb DELETED Viewed

@@ -1,74 +0,0 @@
-# frozen_string_literal: true
-require 'rumale/base/evaluator'
-module Rumale
-  module EvaluationMeasure
-    # AdjustedRandScore is a class that calculates the adjusted rand index.
-    #
-    # @example
-    #   evaluator = Rumale::EvaluationMeasure::AdjustedRandScore.new
-    #   puts evaluator.score(ground_truth, predicted)
-    #
-    # *Reference*
-    # - Vinh, N X., Epps, J., and Bailey, J., "Information Theoretic Measures for Clusterings Comparison: Variants, Properties, Normalization and Correction for Chance", J. Machine Learnig Research, Vol. 11, pp.2837--2854, 2010.
-    class AdjustedRandScore
-      include Base::Evaluator
-      # Calculate adjusted rand index.
-      #
-      # @param y_true [Numo::Int32] (shape: [n_samples]) Ground truth labels.
-      # @param y_pred [Numo::Int32] (shape: [n_samples]) Predicted cluster labels.
-      # @return [Float] Adjusted rand index.
-      def score(y_true, y_pred)
-        y_true = check_convert_label_array(y_true)
-        y_pred = check_convert_label_array(y_pred)
-        # initiazlie some variables.
-        n_samples = y_pred.size
-        n_classes = y_true.to_a.uniq.size
-        n_clusters = y_pred.to_a.uniq.size
-        # check special cases.
-        return 1.0 if special_cases?(n_samples, n_classes, n_clusters)
-        # calculate adjusted rand index.
-        table = contingency_table(y_true, y_pred)
-        sum_comb_a = table.sum(axis: 1).map { |v| comb_two(v) }.sum
-        sum_comb_b = table.sum(axis: 0).map { |v| comb_two(v) }.sum
-        sum_comb = table.flatten.map { |v| comb_two(v) }.sum
-        prod_comb = (sum_comb_a * sum_comb_b).fdiv(comb_two(n_samples))
-        mean_comb = (sum_comb_a + sum_comb_b).fdiv(2)
-        (sum_comb - prod_comb).fdiv(mean_comb - prod_comb)
-      end
-      private
-      def contingency_table(y_true, y_pred)
-        class_ids = y_true.to_a.uniq
-        cluster_ids = y_pred.to_a.uniq
-        n_classes = class_ids.size
-        n_clusters = cluster_ids.size
-        table = Numo::Int32.zeros(n_classes, n_clusters)
-        n_classes.times do |i|
-          b_true = y_true.eq(class_ids[i])
-          n_clusters.times do |j|
-            b_pred = y_pred.eq(cluster_ids[j])
-            table[i, j] = (b_true & b_pred).count
-          end
-        end
-        table
-      end
-      def special_cases?(n_samples, n_classes, n_clusters)
-        ((n_classes.zero? && n_clusters.zero?) ||
-         (n_classes == 1 && n_clusters == 1) ||
-         (n_classes == n_samples && n_clusters == n_samples))
-      end
-      def comb_two(k)
-        k * (k - 1) / 2
-      end
-    end
-  end
-end