RubyGems - rumale - Versions diffs - 0.13.8 → 0.14.0 - Mend

rumale 0.13.8 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (102) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +24 -0
data/README.md +8 -10
data/lib/rumale.rb +3 -0
data/lib/rumale/base/classifier.rb +2 -2
data/lib/rumale/base/cluster_analyzer.rb +2 -2
data/lib/rumale/base/regressor.rb +2 -2
data/lib/rumale/clustering/dbscan.rb +3 -4
data/lib/rumale/clustering/gaussian_mixture.rb +5 -6
data/lib/rumale/clustering/hdbscan.rb +4 -4
data/lib/rumale/clustering/k_means.rb +5 -6
data/lib/rumale/clustering/k_medoids.rb +5 -6
data/lib/rumale/clustering/power_iteration.rb +4 -6
data/lib/rumale/clustering/single_linkage.rb +3 -3
data/lib/rumale/clustering/snn.rb +1 -1
data/lib/rumale/clustering/spectral_clustering.rb +4 -6
data/lib/rumale/dataset.rb +6 -10
data/lib/rumale/decomposition/factor_analysis.rb +4 -4
data/lib/rumale/decomposition/fast_ica.rb +6 -7
data/lib/rumale/decomposition/nmf.rb +6 -7
data/lib/rumale/decomposition/pca.rb +6 -7
data/lib/rumale/ensemble/ada_boost_classifier.rb +8 -8
data/lib/rumale/ensemble/ada_boost_regressor.rb +7 -7
data/lib/rumale/ensemble/extra_trees_classifier.rb +8 -8
data/lib/rumale/ensemble/extra_trees_regressor.rb +7 -7
data/lib/rumale/ensemble/gradient_boosting_classifier.rb +8 -8
data/lib/rumale/ensemble/gradient_boosting_regressor.rb +8 -8
data/lib/rumale/ensemble/random_forest_classifier.rb +8 -8
data/lib/rumale/ensemble/random_forest_regressor.rb +7 -7
data/lib/rumale/evaluation_measure/accuracy.rb +2 -2
data/lib/rumale/evaluation_measure/adjusted_rand_score.rb +2 -2
data/lib/rumale/evaluation_measure/calinski_harabasz_score.rb +2 -2
data/lib/rumale/evaluation_measure/davies_bouldin_score.rb +2 -2
data/lib/rumale/evaluation_measure/explained_variance_score.rb +2 -2
data/lib/rumale/evaluation_measure/f_score.rb +2 -2
data/lib/rumale/evaluation_measure/log_loss.rb +2 -2
data/lib/rumale/evaluation_measure/mean_absolute_error.rb +2 -2
data/lib/rumale/evaluation_measure/mean_squared_error.rb +2 -2
data/lib/rumale/evaluation_measure/mean_squared_log_error.rb +2 -2
data/lib/rumale/evaluation_measure/median_absolute_error.rb +2 -2
data/lib/rumale/evaluation_measure/mutual_information.rb +2 -2
data/lib/rumale/evaluation_measure/normalized_mutual_information.rb +2 -2
data/lib/rumale/evaluation_measure/precision.rb +2 -2
data/lib/rumale/evaluation_measure/purity.rb +2 -2
data/lib/rumale/evaluation_measure/r2_score.rb +2 -2
data/lib/rumale/evaluation_measure/recall.rb +2 -2
data/lib/rumale/evaluation_measure/roc_auc.rb +6 -3
data/lib/rumale/evaluation_measure/silhouette_score.rb +2 -2
data/lib/rumale/kernel_approximation/rbf.rb +5 -6
data/lib/rumale/kernel_machine/kernel_pca.rb +4 -4
data/lib/rumale/kernel_machine/kernel_ridge.rb +3 -3
data/lib/rumale/kernel_machine/kernel_svc.rb +7 -8
data/lib/rumale/linear_model/lasso.rb +5 -6
data/lib/rumale/linear_model/linear_regression.rb +5 -6
data/lib/rumale/linear_model/logistic_regression.rb +16 -15
data/lib/rumale/linear_model/ridge.rb +5 -6
data/lib/rumale/linear_model/svc.rb +34 -28
data/lib/rumale/linear_model/svr.rb +5 -6
data/lib/rumale/manifold/mds.rb +3 -4
data/lib/rumale/manifold/tsne.rb +3 -5
data/lib/rumale/model_selection/cross_validation.rb +6 -5
data/lib/rumale/model_selection/grid_search_cv.rb +6 -6
data/lib/rumale/model_selection/k_fold.rb +3 -3
data/lib/rumale/model_selection/shuffle_split.rb +3 -5
data/lib/rumale/model_selection/stratified_k_fold.rb +4 -4
data/lib/rumale/model_selection/stratified_shuffle_split.rb +4 -6
data/lib/rumale/multiclass/one_vs_rest_classifier.rb +4 -4
data/lib/rumale/naive_bayes/naive_bayes.rb +14 -14
data/lib/rumale/nearest_neighbors/k_neighbors_classifier.rb +5 -5
data/lib/rumale/nearest_neighbors/k_neighbors_regressor.rb +4 -4
data/lib/rumale/neural_network/base_mlp.rb +244 -0
data/lib/rumale/neural_network/mlp_classifier.rb +119 -0
data/lib/rumale/neural_network/mlp_regressor.rb +89 -0
data/lib/rumale/optimizer/ada_grad.rb +1 -1
data/lib/rumale/optimizer/adam.rb +3 -3
data/lib/rumale/optimizer/nadam.rb +1 -1
data/lib/rumale/optimizer/rmsprop.rb +1 -1
data/lib/rumale/optimizer/sgd.rb +1 -1
data/lib/rumale/optimizer/yellow_fin.rb +1 -2
data/lib/rumale/pairwise_metric.rb +17 -19
data/lib/rumale/pipeline/pipeline.rb +10 -10
data/lib/rumale/polynomial_model/factorization_machine_classifier.rb +29 -21
data/lib/rumale/polynomial_model/factorization_machine_regressor.rb +6 -6
data/lib/rumale/preprocessing/bin_discretizer.rb +3 -3
data/lib/rumale/preprocessing/l2_normalizer.rb +2 -2
data/lib/rumale/preprocessing/label_binarizer.rb +2 -2
data/lib/rumale/preprocessing/label_encoder.rb +1 -1
data/lib/rumale/preprocessing/max_abs_scaler.rb +3 -3
data/lib/rumale/preprocessing/min_max_scaler.rb +3 -3
data/lib/rumale/preprocessing/one_hot_encoder.rb +4 -3
data/lib/rumale/preprocessing/ordinal_encoder.rb +1 -1
data/lib/rumale/preprocessing/standard_scaler.rb +3 -3
data/lib/rumale/tree/base_decision_tree.rb +1 -1
data/lib/rumale/tree/decision_tree_classifier.rb +7 -7
data/lib/rumale/tree/decision_tree_regressor.rb +6 -6
data/lib/rumale/tree/extra_tree_classifier.rb +7 -7
data/lib/rumale/tree/extra_tree_regressor.rb +6 -6
data/lib/rumale/tree/gradient_tree_regressor.rb +9 -9
data/lib/rumale/validation.rb +32 -2
data/lib/rumale/version.rb +1 -1
data/rumale.gemspec +7 -7
metadata +11 -7

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 581437e401cec76201212dbd1aa1e38ff6142eef
-  data.tar.gz: 42ce62c892dcee05d41a5857a28c80627da95833
+  metadata.gz: 4770a703a1c19b899b495006c36c19db5edf6fb1
+  data.tar.gz: 7a5493680c41332162c4434b6b5c1d0a07c1b456
 SHA512:
-  metadata.gz: 36d7bb281d676827b40f3382292a414b474a83c153e7260f5ceb8af2b7664d621ae0adb3ece7475f35e83debd6f6b1e031c804bf76260ec58be6aca315b0d431
-  data.tar.gz: 367c273c90a4685913eb48dde5cb1d109e871e87ac1b2aab76d06b565a283996ea25ece3aa9030b7df64348902f8636c2ee3580a19ac533224fbbe62a495e45c
+  metadata.gz: 9c569cfef32fe2a161c2a1350b516f1dde3dfd72109fbc442118a91162c8193ede68a21dd3f2446d0e2b00d203800375c5594c9a9563867ac9fbe4509a3f8e88
+  data.tar.gz: 8f15681e92c08859745dd8f93acdfedf09b31e321dc5105196f73f161c449a33aeded9f6ff4483c0061044828f69ad5a7aeba7a884443ab163457011d6f5dd46

data/CHANGELOG.md CHANGED

@@ -1,3 +1,27 @@
+# 0.14.0
+- Add classifier and regressor class with multi-layer perceptron.
+  - [MLPClassifier](https://yoshoku.github.io/rumale/doc/Rumale/NeuralNetwork/MLPClassifier.html)
+  - [MLPRegressor](https://yoshoku.github.io/rumale/doc/Rumale/NeuralNetwork/MLPRegressor.html)
+- Refactor specs.
+## Breaking changes
+- Change predict method of SVC, LogisticRegression, and FactorizationMachineClassifier classes to return the original label instead of -1 or 1 labels when binary classification problem.
+- Fix hyperparameter validation to check if the type of given value is Numeric type.
+- Fix array validation for samples, labels, and target values to accept Ruby Array.
+```ruby
+require 'rumale'
+samples = [[-1, 1], [1, 1], [1, -1], [-1, -1]]
+labels = [0, 1, 1, 0]
+svc = Rumale::LinearModel::SVC.new(reg_param: 1, batch_size: 1, random_seed: 1)
+svc.fit(samples, labels)
+svc.predict([[-1, 0], [1, 0]])
+# => Numo::Int32#shape=[2]
+# [0, 1]
+```
 # 0.13.8
 - Add [module function](https://yoshoku.github.io/rumale/doc/Rumale/Dataset.html#make_blobs-class_method) for generating artificial dataset with gaussian blobs.
 - Add documents about Rumale::SVM.

data/README.md CHANGED

@@ -10,11 +10,12 @@
 Rumale (**Ru**by **ma**chine **le**arning) is a machine learning library in Ruby.
 Rumale provides machine learning algorithms with interfaces similar to Scikit-Learn in Python.
-Rumale supports Linear / Kernel Support Vector Machine,
-Logistic Regression, Linear Regression, Ridge, Lasso, Kernel Ridge, Factorization Machine,
-Naive Bayes, Decision Tree, AdaBoost, Gradient Tree Boosting, Random Forest, Extra-Trees, K-nearest neighbor classifier,
-K-Means, K-Medoids, Gaussian Mixture Model, DBSCAN, HDBSCAN, SNN, Spectral Clustering, Power Iteration Clustering,
-Mutidimensional Scaling, t-SNE, Principal Component Analysis, Kernel PCA and Non-negative Matrix Factorization.
+Rumale supports Support Vector Machine,
+Logistic Regression, Ridge, Lasso, Factorization Machine,
+Multi-layer Perceptron,
+Naive Bayes, Decision Tree, Gradient Tree Boosting, Random Forest,
+K-Means, Gaussian Mixture Model, DBSCAN, Spectral Clustering,
+Mutidimensional Scaling, t-SNE, Principal Component Analysis, and Non-negative Matrix Factorization.
 This project was formerly known as "SVMKit".
 If you are using SVMKit, please install Rumale and replace `SVMKit` constants with `Rumale`.
@@ -39,7 +40,6 @@ Or install it yourself as:
 ### Example 1. XOR data
 First, let's classify simple xor data.
-In Rumale, feature vectors and labels are represented by [Numo::NArray](https://github.com/ruby-numo/numo-narray).
 ```ruby
 require 'rumale'
@@ -48,10 +48,6 @@ require 'rumale'
 features = [[0, 0], [0, 1], [1, 0], [1, 1]]
 labels = [0, 1, 1, 0]
-# Convert Ruby Array into Numo::NArray.
-x = Numo::DFloat.asarray(features)
-y = Numo::Int32.asarray(labels)
 # Train classifier with nearest neighbor rule.
 estimator = Rumale::NearestNeighbors::KNeighborsClassifier.new(n_neighbors: 1)
 estimator.fit(x, y)
@@ -72,6 +68,8 @@ Numo::Int32#shape=[4]
 The basic usage of Rumale is to first train the model with the fit method
 and then estimate with the predict method.
+In addition, Rumale recommends using arrays such as feature vectors and labels with
+[Numo::NArray](https://github.com/ruby-numo/numo-narray).
 ### Example 2. Pendigits dataset classification

data/lib/rumale.rb CHANGED

@@ -73,6 +73,9 @@ require 'rumale/decomposition/factor_analysis'
 require 'rumale/decomposition/fast_ica'
 require 'rumale/manifold/tsne'
 require 'rumale/manifold/mds'
+require 'rumale/neural_network/base_mlp'
+require 'rumale/neural_network/mlp_regressor'
+require 'rumale/neural_network/mlp_classifier'
 require 'rumale/preprocessing/l2_normalizer'
 require 'rumale/preprocessing/min_max_scaler'
 require 'rumale/preprocessing/max_abs_scaler'

data/lib/rumale/base/classifier.rb CHANGED

@@ -25,8 +25,8 @@ module Rumale
       # @param y [Numo::Int32] (shape: [n_samples]) True labels for testing data.
       # @return [Float] Mean accuracy
       def score(x, y)
-        check_sample_array(x)
-        check_label_array(y)
+        x = check_convert_sample_array(x)
+        y = check_convert_label_array(y)
         check_sample_label_size(x, y)
         evaluator = Rumale::EvaluationMeasure::Accuracy.new
         evaluator.score(y, predict(x))

data/lib/rumale/base/cluster_analyzer.rb CHANGED

@@ -20,8 +20,8 @@ module Rumale
       # @param y [Numo::Int32] (shape: [n_samples]) True labels for testing data.
       # @return [Float] Purity
       def score(x, y)
-        check_sample_array(x)
-        check_label_array(y)
+        x = check_convert_sample_array(x)
+        y = check_convert_label_array(y)
         check_sample_label_size(x, y)
         evaluator = Rumale::EvaluationMeasure::Purity.new
         evaluator.score(y, fit_predict(x))

data/lib/rumale/base/regressor.rb CHANGED

@@ -25,8 +25,8 @@ module Rumale
       # @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) Target values for testing data.
       # @return [Float] Coefficient of determination
       def score(x, y)
-        check_sample_array(x)
-        check_tvalue_array(y)
+        x = check_convert_sample_array(x)
+        y = check_convert_tvalue_array(y)
         check_sample_tvalue_size(x, y)
         evaluator = Rumale::EvaluationMeasure::R2Score.new
         evaluator.score(y, predict(x))

data/lib/rumale/clustering/dbscan.rb CHANGED

@@ -34,8 +34,7 @@ module Rumale
       #   If metric is 'euclidean', Euclidean distance is calculated for distance between points.
       #   If metric is 'precomputed', the fit and fit_transform methods expect to be given a distance matrix.
       def initialize(eps: 0.5, min_samples: 5, metric: 'euclidean')
-        check_params_float(eps: eps)
-        check_params_integer(min_samples: min_samples)
+        check_params_numeric(eps: eps, min_samples: min_samples)
         check_params_string(metric: metric)
         @params = {}
         @params[:eps] = eps
@@ -53,7 +52,7 @@ module Rumale
       #   If the metric is 'precomputed', x must be a square distance matrix (shape: [n_samples, n_samples]).
       # @return [DBSCAN] The learned cluster analyzer itself.
       def fit(x, _y = nil)
-        check_sample_array(x)
+        x = check_convert_sample_array(x)
         raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
         partial_fit(x)
         self
@@ -65,7 +64,7 @@ module Rumale
       #   If the metric is 'precomputed', x must be a square distance matrix (shape: [n_samples, n_samples]).
       # @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
       def fit_predict(x)
-        check_sample_array(x)
+        x = check_convert_sample_array(x)
         raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
         partial_fit(x)
         labels

data/lib/rumale/clustering/gaussian_mixture.rb CHANGED

@@ -47,10 +47,9 @@ module Rumale
       # @param reg_covar [Float] The non-negative regularization to the diagonal of covariance.
       # @param random_seed [Integer] The seed value using to initialize the random generator.
       def initialize(n_clusters: 8, init: 'k-means++', covariance_type: 'diag', max_iter: 50, tol: 1.0e-4, reg_covar: 1.0e-6, random_seed: nil)
-        check_params_integer(n_clusters: n_clusters, max_iter: max_iter)
-        check_params_float(tol: tol)
+        check_params_numeric(n_clusters: n_clusters, max_iter: max_iter, tol: tol)
         check_params_string(init: init)
-        check_params_type_or_nil(Integer, random_seed: random_seed)
+        check_params_numeric_or_nil(random_seed: random_seed)
         check_params_positive(n_clusters: n_clusters, max_iter: max_iter)
         @params = {}
         @params[:n_clusters] = n_clusters
@@ -74,7 +73,7 @@ module Rumale
       # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for cluster analysis.
       # @return [GaussianMixture] The learned cluster analyzer itself.
       def fit(x, _y = nil)
-        check_sample_array(x)
+        x = check_convert_sample_array(x)
         check_enable_linalg('fit')
         n_samples = x.shape[0]
@@ -97,7 +96,7 @@ module Rumale
       # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the cluster label.
       # @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
       def predict(x)
-        check_sample_array(x)
+        x = check_convert_sample_array(x)
         check_enable_linalg('predict')
         memberships = calc_memberships(x, @weights, @means, @covariances, @params[:covariance_type])
@@ -109,7 +108,7 @@ module Rumale
       # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for cluster analysis.
       # @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
       def fit_predict(x)
-        check_sample_array(x)
+        x = check_convert_sample_array(x)
         check_enable_linalg('fit_predict')
         fit(x).predict(x)

data/lib/rumale/clustering/hdbscan.rb CHANGED

@@ -34,8 +34,8 @@ module Rumale
       #   If metric is 'euclidean', Euclidean distance is calculated for distance between points.
       #   If metric is 'precomputed', the fit and fit_transform methods expect to be given a distance matrix.
       def initialize(min_samples: 10, min_cluster_size: nil, metric: 'euclidean')
-        check_params_integer(min_samples: min_samples)
-        check_params_type_or_nil(Integer, min_cluster_size: min_cluster_size)
+        check_params_numeric(min_samples: min_samples)
+        check_params_numeric_or_nil(min_cluster_size: min_cluster_size)
         check_params_string(metric: metric)
         check_params_positive(min_samples: min_samples)
         @params = {}
@@ -53,7 +53,7 @@ module Rumale
       #   If the metric is 'precomputed', x must be a square distance matrix (shape: [n_samples, n_samples]).
       # @return [HDBSCAN] The learned cluster analyzer itself.
       def fit(x, _y = nil)
-        check_sample_array(x)
+        x = check_convert_sample_array(x)
         raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
         fit_predict(x)
         self
@@ -65,7 +65,7 @@ module Rumale
       #   If the metric is 'precomputed', x must be a square distance matrix (shape: [n_samples, n_samples]).
       # @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
       def fit_predict(x)
-        check_sample_array(x)
+        x = check_convert_sample_array(x)
         raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
         distance_mat = @params[:metric] == 'precomputed' ? x : Rumale::PairwiseMetric.euclidean_distance(x)
         @labels = partial_fit(distance_mat)

data/lib/rumale/clustering/k_means.rb CHANGED

@@ -36,10 +36,9 @@ module Rumale
       # @param tol [Float] The tolerance of termination criterion.
       # @param random_seed [Integer] The seed value using to initialize the random generator.
       def initialize(n_clusters: 8, init: 'k-means++', max_iter: 50, tol: 1.0e-4, random_seed: nil)
-        check_params_integer(n_clusters: n_clusters, max_iter: max_iter)
-        check_params_float(tol: tol)
+        check_params_numeric(n_clusters: n_clusters, max_iter: max_iter, tol: tol)
         check_params_string(init: init)
-        check_params_type_or_nil(Integer, random_seed: random_seed)
+        check_params_numeric_or_nil(random_seed: random_seed)
         check_params_positive(n_clusters: n_clusters, max_iter: max_iter)
         @params = {}
         @params[:n_clusters] = n_clusters
@@ -59,7 +58,7 @@ module Rumale
       # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for cluster analysis.
       # @return [KMeans] The learned cluster analyzer itself.
       def fit(x, _y = nil)
-        check_sample_array(x)
+        x = check_convert_sample_array(x)
         init_cluster_centers(x)
         @params[:max_iter].times do |_t|
           cluster_labels = assign_cluster(x)
@@ -79,7 +78,7 @@ module Rumale
       # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the cluster label.
       # @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
       def predict(x)
-        check_sample_array(x)
+        x = check_convert_sample_array(x)
         assign_cluster(x)
       end
@@ -88,7 +87,7 @@ module Rumale
       # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for cluster analysis.
       # @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
       def fit_predict(x)
-        check_sample_array(x)
+        x = check_convert_sample_array(x)
         fit(x)
         predict(x)
       end

data/lib/rumale/clustering/k_medoids.rb CHANGED

@@ -37,10 +37,9 @@ module Rumale
       # @param tol [Float] The tolerance of termination criterion.
       # @param random_seed [Integer] The seed value using to initialize the random generator.
       def initialize(n_clusters: 8, metric: 'euclidean', init: 'k-means++', max_iter: 50, tol: 1.0e-4, random_seed: nil)
-        check_params_integer(n_clusters: n_clusters, max_iter: max_iter)
-        check_params_float(tol: tol)
+        check_params_numeric(n_clusters: n_clusters, max_iter: max_iter, tol: tol)
         check_params_string(metric: metric, init: init)
-        check_params_type_or_nil(Integer, random_seed: random_seed)
+        check_params_numeric_or_nil(random_seed: random_seed)
         check_params_positive(n_clusters: n_clusters, max_iter: max_iter)
         @params = {}
         @params[:n_clusters] = n_clusters
@@ -63,7 +62,7 @@ module Rumale
       #   If the metric is 'precomputed', x must be a square distance matrix (shape: [n_samples, n_samples]).
       # @return [KMedoids] The learned cluster analyzer itself.
       def fit(x, _not_used = nil)
-        check_sample_array(x)
+        x = check_convert_sample_array(x)
         raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
         # initialize some varibales.
         distance_mat = @params[:metric] == 'precomputed' ? x : Rumale::PairwiseMetric.euclidean_distance(x)
@@ -89,7 +88,7 @@ module Rumale
       #   If the metric is 'precomputed', x must be distances between samples and medoids (shape: [n_samples, n_clusters]).
       # @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
       def predict(x)
-        check_sample_array(x)
+        x = check_convert_sample_array(x)
         distance_mat = @params[:metric] == 'precomputed' ? x : Rumale::PairwiseMetric.euclidean_distance(x, @cluster_centers)
         if @params[:metric] == 'precomputed' && distance_mat.shape[1] != @medoid_ids.size
           raise ArgumentError, 'Expect the size input matrix to be n_samples-by-n_clusters.'
@@ -103,7 +102,7 @@ module Rumale
       #   If the metric is 'precomputed', x must be a square distance matrix (shape: [n_samples, n_samples]).
       # @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
       def fit_predict(x)
-        check_sample_array(x)
+        x = check_convert_sample_array(x)
         fit(x)
         if @params[:metric] == 'precomputed'
           predict(x[true, @medoid_ids])

data/lib/rumale/clustering/power_iteration.rb CHANGED

@@ -42,11 +42,9 @@ module Rumale
       # @param eps [Float] A small value close to zero to avoid zero division error.
       # @param random_seed [Integer] The seed value using to initialize the random generator.
       def initialize(n_clusters: 8, affinity: 'rbf', gamma: nil, init: 'k-means++', max_iter: 1000, tol: 1.0e-8, eps: 1.0e-5, random_seed: nil)
-        check_params_integer(n_clusters: n_clusters, max_iter: max_iter)
-        check_params_float(tol: tol, eps: eps)
+        check_params_numeric(n_clusters: n_clusters, max_iter: max_iter, tol: tol, eps: eps)
+        check_params_numeric_or_nil(gamma: gamma, random_seed: random_seed)
         check_params_string(affinity: affinity, init: init)
-        check_params_type_or_nil(Float, gamma: gamma)
-        check_params_type_or_nil(Integer, random_seed: random_seed)
         check_params_positive(n_clusters: n_clusters, max_iter: max_iter, tol: tol, eps: eps)
         @params = {}
         @params[:n_clusters] = n_clusters
@@ -71,7 +69,7 @@ module Rumale
       #   If the metric is 'precomputed', x must be a square affinity matrix (shape: [n_samples, n_samples]).
       # @return [PowerIteration] The learned cluster analyzer itself.
       def fit(x, _y = nil)
-        check_sample_array(x)
+        x = check_convert_sample_array(x)
         raise ArgumentError, 'Expect the input affinity matrix to be square.' if @params[:affinity] == 'precomputed' && x.shape[0] != x.shape[1]
         fit_predict(x)
         self
@@ -83,7 +81,7 @@ module Rumale
       #   If the metric is 'precomputed', x must be a square affinity matrix (shape: [n_samples, n_samples]).
       # @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
       def fit_predict(x)
-        check_sample_array(x)
+        x = check_convert_sample_array(x)
         raise ArgumentError, 'Expect the input affinity matrix to be square.' if @params[:affinity] == 'precomputed' && x.shape[0] != x.shape[1]
         affinity_mat = @params[:metric] == 'precomputed' ? x : Rumale::PairwiseMetric.rbf_kernel(x, nil, @params[:gamma])

data/lib/rumale/clustering/single_linkage.rb CHANGED

@@ -35,7 +35,7 @@ module Rumale
       #   If metric is 'euclidean', Euclidean distance is calculated for distance between points.
       #   If metric is 'precomputed', the fit and fit_transform methods expect to be given a distance matrix.
       def initialize(n_clusters: 2, metric: 'euclidean')
-        check_params_integer(n_clusters: n_clusters)
+        check_params_numeric(n_clusters: n_clusters)
         check_params_string(metric: metric)
         @params = {}
         @params[:n_clusters] = n_clusters
@@ -52,7 +52,7 @@ module Rumale
       #   If the metric is 'precomputed', x must be a square distance matrix (shape: [n_samples, n_samples]).
       # @return [SingleLinkage] The learned cluster analyzer itself.
       def fit(x, _y = nil)
-        check_sample_array(x)
+        x = check_convert_sample_array(x)
         raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
         fit_predict(x)
         self
@@ -64,7 +64,7 @@ module Rumale
       #   If the metric is 'precomputed', x must be a square distance matrix (shape: [n_samples, n_samples]).
       # @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
       def fit_predict(x)
-        check_sample_array(x)
+        x = check_convert_sample_array(x)
         raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
         distance_mat = @params[:metric] == 'precomputed' ? x : Rumale::PairwiseMetric.euclidean_distance(x)
         @labels = partial_fit(distance_mat)

data/lib/rumale/clustering/snn.rb CHANGED

@@ -25,7 +25,7 @@ module Rumale
       #   If metric is 'euclidean', Euclidean distance is calculated for distance between points.
       #   If metric is 'precomputed', the fit and fit_transform methods expect to be given a distance matrix.
       def initialize(n_neighbors: 10, eps: 5, min_samples: 5, metric: 'euclidean')
-        check_params_integer(n_neighbors: n_neighbors, min_samples: min_samples)
+        check_params_numeric(n_neighbors: n_neighbors, min_samples: min_samples)
         check_params_string(metric: metric)
         @params = {}
         @params[:n_neighbors] = n_neighbors

data/lib/rumale/clustering/spectral_clustering.rb CHANGED

@@ -42,11 +42,9 @@ module Rumale
       # @param tol [Float] The tolerance of termination criterion for K-Means clustering.
       # @param random_seed [Integer] The seed value using to initialize the random generator.
       def initialize(n_clusters: 2, affinity: 'rbf', gamma: nil, init: 'k-means++', max_iter: 10, tol: 1.0e-8, random_seed: nil)
-        check_params_integer(n_clusters: n_clusters, max_iter: max_iter)
-        check_params_float(tol: tol)
+        check_params_numeric(n_clusters: n_clusters, max_iter: max_iter, tol: tol)
+        check_params_numeric_or_nil(gamma: gamma, random_seed: random_seed)
         check_params_string(affinity: affinity, init: init)
-        check_params_type_or_nil(Float, gamma: gamma)
-        check_params_type_or_nil(Integer, random_seed: random_seed)
         check_params_positive(n_clusters: n_clusters, max_iter: max_iter, tol: tol)
         @params = {}
         @params[:n_clusters] = n_clusters
@@ -69,7 +67,7 @@ module Rumale
       #     If the metric is 'precomputed', x must be a square affinity matrix (shape: [n_samples, n_samples]).
       # @return [SpectralClustering] The learned cluster analyzer itself.
       def fit(x, _y = nil)
-        check_sample_array(x)
+        x = check_convert_sample_array(x)
         raise ArgumentError, 'Expect the input affinity matrix to be square.' if @params[:affinity] == 'precomputed' && x.shape[0] != x.shape[1]
         raise 'SpectralClustering#fit requires Numo::Linalg but that is not loaded.' unless enable_linalg?
@@ -84,7 +82,7 @@ module Rumale
       #   If the metric is 'precomputed', x must be a square affinity matrix (shape: [n_samples, n_samples]).
       # @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
       def fit_predict(x)
-        check_sample_array(x)
+        x = check_convert_sample_array(x)
         raise ArgumentError, 'Expect the input affinity matrix to be square.' if @params[:affinity] == 'precomputed' && x.shape[0] != x.shape[1]
         raise 'SpectralClustering#fit_predict requires Numo::Linalg but that is not loaded.' unless enable_linalg?

data/lib/rumale/dataset.rb CHANGED

@@ -60,11 +60,9 @@ module Rumale
       # @param factor [Float] The scale factor between inner and outer circles. The interval of factor is (0, 1).
       # @param random_seed [Integer] The seed value using to initialize the random generator.
       def make_circles(n_samples, shuffle: true, noise: nil, factor: 0.8, random_seed: nil)
-        Rumale::Validation.check_params_integer(n_samples: n_samples)
+        Rumale::Validation.check_params_numeric(n_samples: n_samples, factor: factor)
         Rumale::Validation.check_params_boolean(shuffle: shuffle)
-        Rumale::Validation.check_params_type_or_nil(Float, noise: noise)
-        Rumale::Validation.check_params_float(factor: factor)
-        Rumale::Validation.check_params_type_or_nil(Integer, random_seed: random_seed)
+        Rumale::Validation.check_params_numeric_or_nil(noise: noise, random_seed: random_seed)
         raise ArgumentError, 'The number of samples must be more than 2.' if n_samples <= 1
         raise RangeError, 'The interval of factor is (0, 1).' if factor <= 0 || factor >= 1
         # initialize some variables.
@@ -99,10 +97,9 @@ module Rumale
       #   If nil is given, no noise is added.
       # @param random_seed [Integer] The seed value using to initialize the random generator.
       def make_moons(n_samples, shuffle: true, noise: nil, random_seed: nil)
-        Rumale::Validation.check_params_integer(n_samples: n_samples)
+        Rumale::Validation.check_params_numeric(n_samples: n_samples)
         Rumale::Validation.check_params_boolean(shuffle: shuffle)
-        Rumale::Validation.check_params_type_or_nil(Float, noise: noise)
-        Rumale::Validation.check_params_type_or_nil(Integer, random_seed: random_seed)
+        Rumale::Validation.check_params_numeric_or_nil(noise: noise, random_seed: random_seed)
         raise ArgumentError, 'The number of samples must be more than 2.' if n_samples <= 1
         # initialize some variables.
         rs = random_seed
@@ -142,11 +139,10 @@ module Rumale
       # @param random_seed [Integer] The seed value using to initialize the random generator.
       def make_blobs(n_samples = 1000, n_features = 2,
                      centers: nil, cluster_std: 1.0, center_box: [-10, 10], shuffle: true, random_seed: nil)
-        Rumale::Validation.check_params_integer(n_samples: n_samples, n_features: n_features)
-        Rumale::Validation.check_params_float(cluster_std: cluster_std)
+        Rumale::Validation.check_params_numeric(n_samples: n_samples, n_features: n_features, cluster_std: cluster_std)
         Rumale::Validation.check_params_type(Array, center_box: center_box)
         Rumale::Validation.check_params_boolean(shuffle: shuffle)
-        Rumale::Validation.check_params_type_or_nil(Integer, random_seed: random_seed)
+        Rumale::Validation.check_params_numeric_or_nil(random_seed: random_seed)
         # initialize rng.
         rs = random_seed
         rs ||= srand