rumale 0.13.8 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +24 -0
  3. data/README.md +8 -10
  4. data/lib/rumale.rb +3 -0
  5. data/lib/rumale/base/classifier.rb +2 -2
  6. data/lib/rumale/base/cluster_analyzer.rb +2 -2
  7. data/lib/rumale/base/regressor.rb +2 -2
  8. data/lib/rumale/clustering/dbscan.rb +3 -4
  9. data/lib/rumale/clustering/gaussian_mixture.rb +5 -6
  10. data/lib/rumale/clustering/hdbscan.rb +4 -4
  11. data/lib/rumale/clustering/k_means.rb +5 -6
  12. data/lib/rumale/clustering/k_medoids.rb +5 -6
  13. data/lib/rumale/clustering/power_iteration.rb +4 -6
  14. data/lib/rumale/clustering/single_linkage.rb +3 -3
  15. data/lib/rumale/clustering/snn.rb +1 -1
  16. data/lib/rumale/clustering/spectral_clustering.rb +4 -6
  17. data/lib/rumale/dataset.rb +6 -10
  18. data/lib/rumale/decomposition/factor_analysis.rb +4 -4
  19. data/lib/rumale/decomposition/fast_ica.rb +6 -7
  20. data/lib/rumale/decomposition/nmf.rb +6 -7
  21. data/lib/rumale/decomposition/pca.rb +6 -7
  22. data/lib/rumale/ensemble/ada_boost_classifier.rb +8 -8
  23. data/lib/rumale/ensemble/ada_boost_regressor.rb +7 -7
  24. data/lib/rumale/ensemble/extra_trees_classifier.rb +8 -8
  25. data/lib/rumale/ensemble/extra_trees_regressor.rb +7 -7
  26. data/lib/rumale/ensemble/gradient_boosting_classifier.rb +8 -8
  27. data/lib/rumale/ensemble/gradient_boosting_regressor.rb +8 -8
  28. data/lib/rumale/ensemble/random_forest_classifier.rb +8 -8
  29. data/lib/rumale/ensemble/random_forest_regressor.rb +7 -7
  30. data/lib/rumale/evaluation_measure/accuracy.rb +2 -2
  31. data/lib/rumale/evaluation_measure/adjusted_rand_score.rb +2 -2
  32. data/lib/rumale/evaluation_measure/calinski_harabasz_score.rb +2 -2
  33. data/lib/rumale/evaluation_measure/davies_bouldin_score.rb +2 -2
  34. data/lib/rumale/evaluation_measure/explained_variance_score.rb +2 -2
  35. data/lib/rumale/evaluation_measure/f_score.rb +2 -2
  36. data/lib/rumale/evaluation_measure/log_loss.rb +2 -2
  37. data/lib/rumale/evaluation_measure/mean_absolute_error.rb +2 -2
  38. data/lib/rumale/evaluation_measure/mean_squared_error.rb +2 -2
  39. data/lib/rumale/evaluation_measure/mean_squared_log_error.rb +2 -2
  40. data/lib/rumale/evaluation_measure/median_absolute_error.rb +2 -2
  41. data/lib/rumale/evaluation_measure/mutual_information.rb +2 -2
  42. data/lib/rumale/evaluation_measure/normalized_mutual_information.rb +2 -2
  43. data/lib/rumale/evaluation_measure/precision.rb +2 -2
  44. data/lib/rumale/evaluation_measure/purity.rb +2 -2
  45. data/lib/rumale/evaluation_measure/r2_score.rb +2 -2
  46. data/lib/rumale/evaluation_measure/recall.rb +2 -2
  47. data/lib/rumale/evaluation_measure/roc_auc.rb +6 -3
  48. data/lib/rumale/evaluation_measure/silhouette_score.rb +2 -2
  49. data/lib/rumale/kernel_approximation/rbf.rb +5 -6
  50. data/lib/rumale/kernel_machine/kernel_pca.rb +4 -4
  51. data/lib/rumale/kernel_machine/kernel_ridge.rb +3 -3
  52. data/lib/rumale/kernel_machine/kernel_svc.rb +7 -8
  53. data/lib/rumale/linear_model/lasso.rb +5 -6
  54. data/lib/rumale/linear_model/linear_regression.rb +5 -6
  55. data/lib/rumale/linear_model/logistic_regression.rb +16 -15
  56. data/lib/rumale/linear_model/ridge.rb +5 -6
  57. data/lib/rumale/linear_model/svc.rb +34 -28
  58. data/lib/rumale/linear_model/svr.rb +5 -6
  59. data/lib/rumale/manifold/mds.rb +3 -4
  60. data/lib/rumale/manifold/tsne.rb +3 -5
  61. data/lib/rumale/model_selection/cross_validation.rb +6 -5
  62. data/lib/rumale/model_selection/grid_search_cv.rb +6 -6
  63. data/lib/rumale/model_selection/k_fold.rb +3 -3
  64. data/lib/rumale/model_selection/shuffle_split.rb +3 -5
  65. data/lib/rumale/model_selection/stratified_k_fold.rb +4 -4
  66. data/lib/rumale/model_selection/stratified_shuffle_split.rb +4 -6
  67. data/lib/rumale/multiclass/one_vs_rest_classifier.rb +4 -4
  68. data/lib/rumale/naive_bayes/naive_bayes.rb +14 -14
  69. data/lib/rumale/nearest_neighbors/k_neighbors_classifier.rb +5 -5
  70. data/lib/rumale/nearest_neighbors/k_neighbors_regressor.rb +4 -4
  71. data/lib/rumale/neural_network/base_mlp.rb +244 -0
  72. data/lib/rumale/neural_network/mlp_classifier.rb +119 -0
  73. data/lib/rumale/neural_network/mlp_regressor.rb +89 -0
  74. data/lib/rumale/optimizer/ada_grad.rb +1 -1
  75. data/lib/rumale/optimizer/adam.rb +3 -3
  76. data/lib/rumale/optimizer/nadam.rb +1 -1
  77. data/lib/rumale/optimizer/rmsprop.rb +1 -1
  78. data/lib/rumale/optimizer/sgd.rb +1 -1
  79. data/lib/rumale/optimizer/yellow_fin.rb +1 -2
  80. data/lib/rumale/pairwise_metric.rb +17 -19
  81. data/lib/rumale/pipeline/pipeline.rb +10 -10
  82. data/lib/rumale/polynomial_model/factorization_machine_classifier.rb +29 -21
  83. data/lib/rumale/polynomial_model/factorization_machine_regressor.rb +6 -6
  84. data/lib/rumale/preprocessing/bin_discretizer.rb +3 -3
  85. data/lib/rumale/preprocessing/l2_normalizer.rb +2 -2
  86. data/lib/rumale/preprocessing/label_binarizer.rb +2 -2
  87. data/lib/rumale/preprocessing/label_encoder.rb +1 -1
  88. data/lib/rumale/preprocessing/max_abs_scaler.rb +3 -3
  89. data/lib/rumale/preprocessing/min_max_scaler.rb +3 -3
  90. data/lib/rumale/preprocessing/one_hot_encoder.rb +4 -3
  91. data/lib/rumale/preprocessing/ordinal_encoder.rb +1 -1
  92. data/lib/rumale/preprocessing/standard_scaler.rb +3 -3
  93. data/lib/rumale/tree/base_decision_tree.rb +1 -1
  94. data/lib/rumale/tree/decision_tree_classifier.rb +7 -7
  95. data/lib/rumale/tree/decision_tree_regressor.rb +6 -6
  96. data/lib/rumale/tree/extra_tree_classifier.rb +7 -7
  97. data/lib/rumale/tree/extra_tree_regressor.rb +6 -6
  98. data/lib/rumale/tree/gradient_tree_regressor.rb +9 -9
  99. data/lib/rumale/validation.rb +32 -2
  100. data/lib/rumale/version.rb +1 -1
  101. data/rumale.gemspec +7 -7
  102. metadata +11 -7
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 581437e401cec76201212dbd1aa1e38ff6142eef
4
- data.tar.gz: 42ce62c892dcee05d41a5857a28c80627da95833
3
+ metadata.gz: 4770a703a1c19b899b495006c36c19db5edf6fb1
4
+ data.tar.gz: 7a5493680c41332162c4434b6b5c1d0a07c1b456
5
5
  SHA512:
6
- metadata.gz: 36d7bb281d676827b40f3382292a414b474a83c153e7260f5ceb8af2b7664d621ae0adb3ece7475f35e83debd6f6b1e031c804bf76260ec58be6aca315b0d431
7
- data.tar.gz: 367c273c90a4685913eb48dde5cb1d109e871e87ac1b2aab76d06b565a283996ea25ece3aa9030b7df64348902f8636c2ee3580a19ac533224fbbe62a495e45c
6
+ metadata.gz: 9c569cfef32fe2a161c2a1350b516f1dde3dfd72109fbc442118a91162c8193ede68a21dd3f2446d0e2b00d203800375c5594c9a9563867ac9fbe4509a3f8e88
7
+ data.tar.gz: 8f15681e92c08859745dd8f93acdfedf09b31e321dc5105196f73f161c449a33aeded9f6ff4483c0061044828f69ad5a7aeba7a884443ab163457011d6f5dd46
@@ -1,3 +1,27 @@
1
+ # 0.14.0
2
+ - Add classifier and regressor class with multi-layer perceptron.
3
+ - [MLPClassifier](https://yoshoku.github.io/rumale/doc/Rumale/NeuralNetwork/MLPClassifier.html)
4
+ - [MLPRegressor](https://yoshoku.github.io/rumale/doc/Rumale/NeuralNetwork/MLPRegressor.html)
5
+ - Refactor specs.
6
+
7
+ ## Breaking changes
8
+ - Change predict method of SVC, LogisticRegression, and FactorizationMachineClassifier classes to return the original label instead of -1 or 1 labels when binary classification problem.
9
+ - Fix hyperparameter validation to check if the type of given value is Numeric type.
10
+ - Fix array validation for samples, labels, and target values to accept Ruby Array.
11
+
12
+ ```ruby
13
+ require 'rumale'
14
+
15
+ samples = [[-1, 1], [1, 1], [1, -1], [-1, -1]]
16
+ labels = [0, 1, 1, 0]
17
+
18
+ svc = Rumale::LinearModel::SVC.new(reg_param: 1, batch_size: 1, random_seed: 1)
19
+ svc.fit(samples, labels)
20
+ svc.predict([[-1, 0], [1, 0]])
21
+ # => Numo::Int32#shape=[2]
22
+ # [0, 1]
23
+ ```
24
+
1
25
  # 0.13.8
2
26
  - Add [module function](https://yoshoku.github.io/rumale/doc/Rumale/Dataset.html#make_blobs-class_method) for generating artificial dataset with gaussian blobs.
3
27
  - Add documents about Rumale::SVM.
data/README.md CHANGED
@@ -10,11 +10,12 @@
10
10
 
11
11
  Rumale (**Ru**by **ma**chine **le**arning) is a machine learning library in Ruby.
12
12
  Rumale provides machine learning algorithms with interfaces similar to Scikit-Learn in Python.
13
- Rumale supports Linear / Kernel Support Vector Machine,
14
- Logistic Regression, Linear Regression, Ridge, Lasso, Kernel Ridge, Factorization Machine,
15
- Naive Bayes, Decision Tree, AdaBoost, Gradient Tree Boosting, Random Forest, Extra-Trees, K-nearest neighbor classifier,
16
- K-Means, K-Medoids, Gaussian Mixture Model, DBSCAN, HDBSCAN, SNN, Spectral Clustering, Power Iteration Clustering,
17
- Mutidimensional Scaling, t-SNE, Principal Component Analysis, Kernel PCA and Non-negative Matrix Factorization.
13
+ Rumale supports Support Vector Machine,
14
+ Logistic Regression, Ridge, Lasso, Factorization Machine,
15
+ Multi-layer Perceptron,
16
+ Naive Bayes, Decision Tree, Gradient Tree Boosting, Random Forest,
17
+ K-Means, Gaussian Mixture Model, DBSCAN, Spectral Clustering,
18
+ Mutidimensional Scaling, t-SNE, Principal Component Analysis, and Non-negative Matrix Factorization.
18
19
 
19
20
  This project was formerly known as "SVMKit".
20
21
  If you are using SVMKit, please install Rumale and replace `SVMKit` constants with `Rumale`.
@@ -39,7 +40,6 @@ Or install it yourself as:
39
40
 
40
41
  ### Example 1. XOR data
41
42
  First, let's classify simple xor data.
42
- In Rumale, feature vectors and labels are represented by [Numo::NArray](https://github.com/ruby-numo/numo-narray).
43
43
 
44
44
  ```ruby
45
45
  require 'rumale'
@@ -48,10 +48,6 @@ require 'rumale'
48
48
  features = [[0, 0], [0, 1], [1, 0], [1, 1]]
49
49
  labels = [0, 1, 1, 0]
50
50
 
51
- # Convert Ruby Array into Numo::NArray.
52
- x = Numo::DFloat.asarray(features)
53
- y = Numo::Int32.asarray(labels)
54
-
55
51
  # Train classifier with nearest neighbor rule.
56
52
  estimator = Rumale::NearestNeighbors::KNeighborsClassifier.new(n_neighbors: 1)
57
53
  estimator.fit(x, y)
@@ -72,6 +68,8 @@ Numo::Int32#shape=[4]
72
68
 
73
69
  The basic usage of Rumale is to first train the model with the fit method
74
70
  and then estimate with the predict method.
71
+ In addition, Rumale recommends using arrays such as feature vectors and labels with
72
+ [Numo::NArray](https://github.com/ruby-numo/numo-narray).
75
73
 
76
74
  ### Example 2. Pendigits dataset classification
77
75
 
@@ -73,6 +73,9 @@ require 'rumale/decomposition/factor_analysis'
73
73
  require 'rumale/decomposition/fast_ica'
74
74
  require 'rumale/manifold/tsne'
75
75
  require 'rumale/manifold/mds'
76
+ require 'rumale/neural_network/base_mlp'
77
+ require 'rumale/neural_network/mlp_regressor'
78
+ require 'rumale/neural_network/mlp_classifier'
76
79
  require 'rumale/preprocessing/l2_normalizer'
77
80
  require 'rumale/preprocessing/min_max_scaler'
78
81
  require 'rumale/preprocessing/max_abs_scaler'
@@ -25,8 +25,8 @@ module Rumale
25
25
  # @param y [Numo::Int32] (shape: [n_samples]) True labels for testing data.
26
26
  # @return [Float] Mean accuracy
27
27
  def score(x, y)
28
- check_sample_array(x)
29
- check_label_array(y)
28
+ x = check_convert_sample_array(x)
29
+ y = check_convert_label_array(y)
30
30
  check_sample_label_size(x, y)
31
31
  evaluator = Rumale::EvaluationMeasure::Accuracy.new
32
32
  evaluator.score(y, predict(x))
@@ -20,8 +20,8 @@ module Rumale
20
20
  # @param y [Numo::Int32] (shape: [n_samples]) True labels for testing data.
21
21
  # @return [Float] Purity
22
22
  def score(x, y)
23
- check_sample_array(x)
24
- check_label_array(y)
23
+ x = check_convert_sample_array(x)
24
+ y = check_convert_label_array(y)
25
25
  check_sample_label_size(x, y)
26
26
  evaluator = Rumale::EvaluationMeasure::Purity.new
27
27
  evaluator.score(y, fit_predict(x))
@@ -25,8 +25,8 @@ module Rumale
25
25
  # @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) Target values for testing data.
26
26
  # @return [Float] Coefficient of determination
27
27
  def score(x, y)
28
- check_sample_array(x)
29
- check_tvalue_array(y)
28
+ x = check_convert_sample_array(x)
29
+ y = check_convert_tvalue_array(y)
30
30
  check_sample_tvalue_size(x, y)
31
31
  evaluator = Rumale::EvaluationMeasure::R2Score.new
32
32
  evaluator.score(y, predict(x))
@@ -34,8 +34,7 @@ module Rumale
34
34
  # If metric is 'euclidean', Euclidean distance is calculated for distance between points.
35
35
  # If metric is 'precomputed', the fit and fit_transform methods expect to be given a distance matrix.
36
36
  def initialize(eps: 0.5, min_samples: 5, metric: 'euclidean')
37
- check_params_float(eps: eps)
38
- check_params_integer(min_samples: min_samples)
37
+ check_params_numeric(eps: eps, min_samples: min_samples)
39
38
  check_params_string(metric: metric)
40
39
  @params = {}
41
40
  @params[:eps] = eps
@@ -53,7 +52,7 @@ module Rumale
53
52
  # If the metric is 'precomputed', x must be a square distance matrix (shape: [n_samples, n_samples]).
54
53
  # @return [DBSCAN] The learned cluster analyzer itself.
55
54
  def fit(x, _y = nil)
56
- check_sample_array(x)
55
+ x = check_convert_sample_array(x)
57
56
  raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
58
57
  partial_fit(x)
59
58
  self
@@ -65,7 +64,7 @@ module Rumale
65
64
  # If the metric is 'precomputed', x must be a square distance matrix (shape: [n_samples, n_samples]).
66
65
  # @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
67
66
  def fit_predict(x)
68
- check_sample_array(x)
67
+ x = check_convert_sample_array(x)
69
68
  raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
70
69
  partial_fit(x)
71
70
  labels
@@ -47,10 +47,9 @@ module Rumale
47
47
  # @param reg_covar [Float] The non-negative regularization to the diagonal of covariance.
48
48
  # @param random_seed [Integer] The seed value using to initialize the random generator.
49
49
  def initialize(n_clusters: 8, init: 'k-means++', covariance_type: 'diag', max_iter: 50, tol: 1.0e-4, reg_covar: 1.0e-6, random_seed: nil)
50
- check_params_integer(n_clusters: n_clusters, max_iter: max_iter)
51
- check_params_float(tol: tol)
50
+ check_params_numeric(n_clusters: n_clusters, max_iter: max_iter, tol: tol)
52
51
  check_params_string(init: init)
53
- check_params_type_or_nil(Integer, random_seed: random_seed)
52
+ check_params_numeric_or_nil(random_seed: random_seed)
54
53
  check_params_positive(n_clusters: n_clusters, max_iter: max_iter)
55
54
  @params = {}
56
55
  @params[:n_clusters] = n_clusters
@@ -74,7 +73,7 @@ module Rumale
74
73
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for cluster analysis.
75
74
  # @return [GaussianMixture] The learned cluster analyzer itself.
76
75
  def fit(x, _y = nil)
77
- check_sample_array(x)
76
+ x = check_convert_sample_array(x)
78
77
  check_enable_linalg('fit')
79
78
 
80
79
  n_samples = x.shape[0]
@@ -97,7 +96,7 @@ module Rumale
97
96
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the cluster label.
98
97
  # @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
99
98
  def predict(x)
100
- check_sample_array(x)
99
+ x = check_convert_sample_array(x)
101
100
  check_enable_linalg('predict')
102
101
 
103
102
  memberships = calc_memberships(x, @weights, @means, @covariances, @params[:covariance_type])
@@ -109,7 +108,7 @@ module Rumale
109
108
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for cluster analysis.
110
109
  # @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
111
110
  def fit_predict(x)
112
- check_sample_array(x)
111
+ x = check_convert_sample_array(x)
113
112
  check_enable_linalg('fit_predict')
114
113
 
115
114
  fit(x).predict(x)
@@ -34,8 +34,8 @@ module Rumale
34
34
  # If metric is 'euclidean', Euclidean distance is calculated for distance between points.
35
35
  # If metric is 'precomputed', the fit and fit_transform methods expect to be given a distance matrix.
36
36
  def initialize(min_samples: 10, min_cluster_size: nil, metric: 'euclidean')
37
- check_params_integer(min_samples: min_samples)
38
- check_params_type_or_nil(Integer, min_cluster_size: min_cluster_size)
37
+ check_params_numeric(min_samples: min_samples)
38
+ check_params_numeric_or_nil(min_cluster_size: min_cluster_size)
39
39
  check_params_string(metric: metric)
40
40
  check_params_positive(min_samples: min_samples)
41
41
  @params = {}
@@ -53,7 +53,7 @@ module Rumale
53
53
  # If the metric is 'precomputed', x must be a square distance matrix (shape: [n_samples, n_samples]).
54
54
  # @return [HDBSCAN] The learned cluster analyzer itself.
55
55
  def fit(x, _y = nil)
56
- check_sample_array(x)
56
+ x = check_convert_sample_array(x)
57
57
  raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
58
58
  fit_predict(x)
59
59
  self
@@ -65,7 +65,7 @@ module Rumale
65
65
  # If the metric is 'precomputed', x must be a square distance matrix (shape: [n_samples, n_samples]).
66
66
  # @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
67
67
  def fit_predict(x)
68
- check_sample_array(x)
68
+ x = check_convert_sample_array(x)
69
69
  raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
70
70
  distance_mat = @params[:metric] == 'precomputed' ? x : Rumale::PairwiseMetric.euclidean_distance(x)
71
71
  @labels = partial_fit(distance_mat)
@@ -36,10 +36,9 @@ module Rumale
36
36
  # @param tol [Float] The tolerance of termination criterion.
37
37
  # @param random_seed [Integer] The seed value using to initialize the random generator.
38
38
  def initialize(n_clusters: 8, init: 'k-means++', max_iter: 50, tol: 1.0e-4, random_seed: nil)
39
- check_params_integer(n_clusters: n_clusters, max_iter: max_iter)
40
- check_params_float(tol: tol)
39
+ check_params_numeric(n_clusters: n_clusters, max_iter: max_iter, tol: tol)
41
40
  check_params_string(init: init)
42
- check_params_type_or_nil(Integer, random_seed: random_seed)
41
+ check_params_numeric_or_nil(random_seed: random_seed)
43
42
  check_params_positive(n_clusters: n_clusters, max_iter: max_iter)
44
43
  @params = {}
45
44
  @params[:n_clusters] = n_clusters
@@ -59,7 +58,7 @@ module Rumale
59
58
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for cluster analysis.
60
59
  # @return [KMeans] The learned cluster analyzer itself.
61
60
  def fit(x, _y = nil)
62
- check_sample_array(x)
61
+ x = check_convert_sample_array(x)
63
62
  init_cluster_centers(x)
64
63
  @params[:max_iter].times do |_t|
65
64
  cluster_labels = assign_cluster(x)
@@ -79,7 +78,7 @@ module Rumale
79
78
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the cluster label.
80
79
  # @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
81
80
  def predict(x)
82
- check_sample_array(x)
81
+ x = check_convert_sample_array(x)
83
82
  assign_cluster(x)
84
83
  end
85
84
 
@@ -88,7 +87,7 @@ module Rumale
88
87
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for cluster analysis.
89
88
  # @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
90
89
  def fit_predict(x)
91
- check_sample_array(x)
90
+ x = check_convert_sample_array(x)
92
91
  fit(x)
93
92
  predict(x)
94
93
  end
@@ -37,10 +37,9 @@ module Rumale
37
37
  # @param tol [Float] The tolerance of termination criterion.
38
38
  # @param random_seed [Integer] The seed value using to initialize the random generator.
39
39
  def initialize(n_clusters: 8, metric: 'euclidean', init: 'k-means++', max_iter: 50, tol: 1.0e-4, random_seed: nil)
40
- check_params_integer(n_clusters: n_clusters, max_iter: max_iter)
41
- check_params_float(tol: tol)
40
+ check_params_numeric(n_clusters: n_clusters, max_iter: max_iter, tol: tol)
42
41
  check_params_string(metric: metric, init: init)
43
- check_params_type_or_nil(Integer, random_seed: random_seed)
42
+ check_params_numeric_or_nil(random_seed: random_seed)
44
43
  check_params_positive(n_clusters: n_clusters, max_iter: max_iter)
45
44
  @params = {}
46
45
  @params[:n_clusters] = n_clusters
@@ -63,7 +62,7 @@ module Rumale
63
62
  # If the metric is 'precomputed', x must be a square distance matrix (shape: [n_samples, n_samples]).
64
63
  # @return [KMedoids] The learned cluster analyzer itself.
65
64
  def fit(x, _not_used = nil)
66
- check_sample_array(x)
65
+ x = check_convert_sample_array(x)
67
66
  raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
68
67
  # initialize some varibales.
69
68
  distance_mat = @params[:metric] == 'precomputed' ? x : Rumale::PairwiseMetric.euclidean_distance(x)
@@ -89,7 +88,7 @@ module Rumale
89
88
  # If the metric is 'precomputed', x must be distances between samples and medoids (shape: [n_samples, n_clusters]).
90
89
  # @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
91
90
  def predict(x)
92
- check_sample_array(x)
91
+ x = check_convert_sample_array(x)
93
92
  distance_mat = @params[:metric] == 'precomputed' ? x : Rumale::PairwiseMetric.euclidean_distance(x, @cluster_centers)
94
93
  if @params[:metric] == 'precomputed' && distance_mat.shape[1] != @medoid_ids.size
95
94
  raise ArgumentError, 'Expect the size input matrix to be n_samples-by-n_clusters.'
@@ -103,7 +102,7 @@ module Rumale
103
102
  # If the metric is 'precomputed', x must be a square distance matrix (shape: [n_samples, n_samples]).
104
103
  # @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
105
104
  def fit_predict(x)
106
- check_sample_array(x)
105
+ x = check_convert_sample_array(x)
107
106
  fit(x)
108
107
  if @params[:metric] == 'precomputed'
109
108
  predict(x[true, @medoid_ids])
@@ -42,11 +42,9 @@ module Rumale
42
42
  # @param eps [Float] A small value close to zero to avoid zero division error.
43
43
  # @param random_seed [Integer] The seed value using to initialize the random generator.
44
44
  def initialize(n_clusters: 8, affinity: 'rbf', gamma: nil, init: 'k-means++', max_iter: 1000, tol: 1.0e-8, eps: 1.0e-5, random_seed: nil)
45
- check_params_integer(n_clusters: n_clusters, max_iter: max_iter)
46
- check_params_float(tol: tol, eps: eps)
45
+ check_params_numeric(n_clusters: n_clusters, max_iter: max_iter, tol: tol, eps: eps)
46
+ check_params_numeric_or_nil(gamma: gamma, random_seed: random_seed)
47
47
  check_params_string(affinity: affinity, init: init)
48
- check_params_type_or_nil(Float, gamma: gamma)
49
- check_params_type_or_nil(Integer, random_seed: random_seed)
50
48
  check_params_positive(n_clusters: n_clusters, max_iter: max_iter, tol: tol, eps: eps)
51
49
  @params = {}
52
50
  @params[:n_clusters] = n_clusters
@@ -71,7 +69,7 @@ module Rumale
71
69
  # If the metric is 'precomputed', x must be a square affinity matrix (shape: [n_samples, n_samples]).
72
70
  # @return [PowerIteration] The learned cluster analyzer itself.
73
71
  def fit(x, _y = nil)
74
- check_sample_array(x)
72
+ x = check_convert_sample_array(x)
75
73
  raise ArgumentError, 'Expect the input affinity matrix to be square.' if @params[:affinity] == 'precomputed' && x.shape[0] != x.shape[1]
76
74
  fit_predict(x)
77
75
  self
@@ -83,7 +81,7 @@ module Rumale
83
81
  # If the metric is 'precomputed', x must be a square affinity matrix (shape: [n_samples, n_samples]).
84
82
  # @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
85
83
  def fit_predict(x)
86
- check_sample_array(x)
84
+ x = check_convert_sample_array(x)
87
85
  raise ArgumentError, 'Expect the input affinity matrix to be square.' if @params[:affinity] == 'precomputed' && x.shape[0] != x.shape[1]
88
86
 
89
87
  affinity_mat = @params[:metric] == 'precomputed' ? x : Rumale::PairwiseMetric.rbf_kernel(x, nil, @params[:gamma])
@@ -35,7 +35,7 @@ module Rumale
35
35
  # If metric is 'euclidean', Euclidean distance is calculated for distance between points.
36
36
  # If metric is 'precomputed', the fit and fit_transform methods expect to be given a distance matrix.
37
37
  def initialize(n_clusters: 2, metric: 'euclidean')
38
- check_params_integer(n_clusters: n_clusters)
38
+ check_params_numeric(n_clusters: n_clusters)
39
39
  check_params_string(metric: metric)
40
40
  @params = {}
41
41
  @params[:n_clusters] = n_clusters
@@ -52,7 +52,7 @@ module Rumale
52
52
  # If the metric is 'precomputed', x must be a square distance matrix (shape: [n_samples, n_samples]).
53
53
  # @return [SingleLinkage] The learned cluster analyzer itself.
54
54
  def fit(x, _y = nil)
55
- check_sample_array(x)
55
+ x = check_convert_sample_array(x)
56
56
  raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
57
57
  fit_predict(x)
58
58
  self
@@ -64,7 +64,7 @@ module Rumale
64
64
  # If the metric is 'precomputed', x must be a square distance matrix (shape: [n_samples, n_samples]).
65
65
  # @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
66
66
  def fit_predict(x)
67
- check_sample_array(x)
67
+ x = check_convert_sample_array(x)
68
68
  raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
69
69
  distance_mat = @params[:metric] == 'precomputed' ? x : Rumale::PairwiseMetric.euclidean_distance(x)
70
70
  @labels = partial_fit(distance_mat)
@@ -25,7 +25,7 @@ module Rumale
25
25
  # If metric is 'euclidean', Euclidean distance is calculated for distance between points.
26
26
  # If metric is 'precomputed', the fit and fit_transform methods expect to be given a distance matrix.
27
27
  def initialize(n_neighbors: 10, eps: 5, min_samples: 5, metric: 'euclidean')
28
- check_params_integer(n_neighbors: n_neighbors, min_samples: min_samples)
28
+ check_params_numeric(n_neighbors: n_neighbors, min_samples: min_samples)
29
29
  check_params_string(metric: metric)
30
30
  @params = {}
31
31
  @params[:n_neighbors] = n_neighbors
@@ -42,11 +42,9 @@ module Rumale
42
42
  # @param tol [Float] The tolerance of termination criterion for K-Means clustering.
43
43
  # @param random_seed [Integer] The seed value using to initialize the random generator.
44
44
  def initialize(n_clusters: 2, affinity: 'rbf', gamma: nil, init: 'k-means++', max_iter: 10, tol: 1.0e-8, random_seed: nil)
45
- check_params_integer(n_clusters: n_clusters, max_iter: max_iter)
46
- check_params_float(tol: tol)
45
+ check_params_numeric(n_clusters: n_clusters, max_iter: max_iter, tol: tol)
46
+ check_params_numeric_or_nil(gamma: gamma, random_seed: random_seed)
47
47
  check_params_string(affinity: affinity, init: init)
48
- check_params_type_or_nil(Float, gamma: gamma)
49
- check_params_type_or_nil(Integer, random_seed: random_seed)
50
48
  check_params_positive(n_clusters: n_clusters, max_iter: max_iter, tol: tol)
51
49
  @params = {}
52
50
  @params[:n_clusters] = n_clusters
@@ -69,7 +67,7 @@ module Rumale
69
67
  # If the metric is 'precomputed', x must be a square affinity matrix (shape: [n_samples, n_samples]).
70
68
  # @return [SpectralClustering] The learned cluster analyzer itself.
71
69
  def fit(x, _y = nil)
72
- check_sample_array(x)
70
+ x = check_convert_sample_array(x)
73
71
  raise ArgumentError, 'Expect the input affinity matrix to be square.' if @params[:affinity] == 'precomputed' && x.shape[0] != x.shape[1]
74
72
  raise 'SpectralClustering#fit requires Numo::Linalg but that is not loaded.' unless enable_linalg?
75
73
 
@@ -84,7 +82,7 @@ module Rumale
84
82
  # If the metric is 'precomputed', x must be a square affinity matrix (shape: [n_samples, n_samples]).
85
83
  # @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
86
84
  def fit_predict(x)
87
- check_sample_array(x)
85
+ x = check_convert_sample_array(x)
88
86
  raise ArgumentError, 'Expect the input affinity matrix to be square.' if @params[:affinity] == 'precomputed' && x.shape[0] != x.shape[1]
89
87
  raise 'SpectralClustering#fit_predict requires Numo::Linalg but that is not loaded.' unless enable_linalg?
90
88
 
@@ -60,11 +60,9 @@ module Rumale
60
60
  # @param factor [Float] The scale factor between inner and outer circles. The interval of factor is (0, 1).
61
61
  # @param random_seed [Integer] The seed value using to initialize the random generator.
62
62
  def make_circles(n_samples, shuffle: true, noise: nil, factor: 0.8, random_seed: nil)
63
- Rumale::Validation.check_params_integer(n_samples: n_samples)
63
+ Rumale::Validation.check_params_numeric(n_samples: n_samples, factor: factor)
64
64
  Rumale::Validation.check_params_boolean(shuffle: shuffle)
65
- Rumale::Validation.check_params_type_or_nil(Float, noise: noise)
66
- Rumale::Validation.check_params_float(factor: factor)
67
- Rumale::Validation.check_params_type_or_nil(Integer, random_seed: random_seed)
65
+ Rumale::Validation.check_params_numeric_or_nil(noise: noise, random_seed: random_seed)
68
66
  raise ArgumentError, 'The number of samples must be more than 2.' if n_samples <= 1
69
67
  raise RangeError, 'The interval of factor is (0, 1).' if factor <= 0 || factor >= 1
70
68
  # initialize some variables.
@@ -99,10 +97,9 @@ module Rumale
99
97
  # If nil is given, no noise is added.
100
98
  # @param random_seed [Integer] The seed value using to initialize the random generator.
101
99
  def make_moons(n_samples, shuffle: true, noise: nil, random_seed: nil)
102
- Rumale::Validation.check_params_integer(n_samples: n_samples)
100
+ Rumale::Validation.check_params_numeric(n_samples: n_samples)
103
101
  Rumale::Validation.check_params_boolean(shuffle: shuffle)
104
- Rumale::Validation.check_params_type_or_nil(Float, noise: noise)
105
- Rumale::Validation.check_params_type_or_nil(Integer, random_seed: random_seed)
102
+ Rumale::Validation.check_params_numeric_or_nil(noise: noise, random_seed: random_seed)
106
103
  raise ArgumentError, 'The number of samples must be more than 2.' if n_samples <= 1
107
104
  # initialize some variables.
108
105
  rs = random_seed
@@ -142,11 +139,10 @@ module Rumale
142
139
  # @param random_seed [Integer] The seed value using to initialize the random generator.
143
140
  def make_blobs(n_samples = 1000, n_features = 2,
144
141
  centers: nil, cluster_std: 1.0, center_box: [-10, 10], shuffle: true, random_seed: nil)
145
- Rumale::Validation.check_params_integer(n_samples: n_samples, n_features: n_features)
146
- Rumale::Validation.check_params_float(cluster_std: cluster_std)
142
+ Rumale::Validation.check_params_numeric(n_samples: n_samples, n_features: n_features, cluster_std: cluster_std)
147
143
  Rumale::Validation.check_params_type(Array, center_box: center_box)
148
144
  Rumale::Validation.check_params_boolean(shuffle: shuffle)
149
- Rumale::Validation.check_params_type_or_nil(Integer, random_seed: random_seed)
145
+ Rumale::Validation.check_params_numeric_or_nil(random_seed: random_seed)
150
146
  # initialize rng.
151
147
  rs = random_seed
152
148
  rs ||= srand