rumale 0.13.8 → 0.14.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (102) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +24 -0
  3. data/README.md +8 -10
  4. data/lib/rumale.rb +3 -0
  5. data/lib/rumale/base/classifier.rb +2 -2
  6. data/lib/rumale/base/cluster_analyzer.rb +2 -2
  7. data/lib/rumale/base/regressor.rb +2 -2
  8. data/lib/rumale/clustering/dbscan.rb +3 -4
  9. data/lib/rumale/clustering/gaussian_mixture.rb +5 -6
  10. data/lib/rumale/clustering/hdbscan.rb +4 -4
  11. data/lib/rumale/clustering/k_means.rb +5 -6
  12. data/lib/rumale/clustering/k_medoids.rb +5 -6
  13. data/lib/rumale/clustering/power_iteration.rb +4 -6
  14. data/lib/rumale/clustering/single_linkage.rb +3 -3
  15. data/lib/rumale/clustering/snn.rb +1 -1
  16. data/lib/rumale/clustering/spectral_clustering.rb +4 -6
  17. data/lib/rumale/dataset.rb +6 -10
  18. data/lib/rumale/decomposition/factor_analysis.rb +4 -4
  19. data/lib/rumale/decomposition/fast_ica.rb +6 -7
  20. data/lib/rumale/decomposition/nmf.rb +6 -7
  21. data/lib/rumale/decomposition/pca.rb +6 -7
  22. data/lib/rumale/ensemble/ada_boost_classifier.rb +8 -8
  23. data/lib/rumale/ensemble/ada_boost_regressor.rb +7 -7
  24. data/lib/rumale/ensemble/extra_trees_classifier.rb +8 -8
  25. data/lib/rumale/ensemble/extra_trees_regressor.rb +7 -7
  26. data/lib/rumale/ensemble/gradient_boosting_classifier.rb +8 -8
  27. data/lib/rumale/ensemble/gradient_boosting_regressor.rb +8 -8
  28. data/lib/rumale/ensemble/random_forest_classifier.rb +8 -8
  29. data/lib/rumale/ensemble/random_forest_regressor.rb +7 -7
  30. data/lib/rumale/evaluation_measure/accuracy.rb +2 -2
  31. data/lib/rumale/evaluation_measure/adjusted_rand_score.rb +2 -2
  32. data/lib/rumale/evaluation_measure/calinski_harabasz_score.rb +2 -2
  33. data/lib/rumale/evaluation_measure/davies_bouldin_score.rb +2 -2
  34. data/lib/rumale/evaluation_measure/explained_variance_score.rb +2 -2
  35. data/lib/rumale/evaluation_measure/f_score.rb +2 -2
  36. data/lib/rumale/evaluation_measure/log_loss.rb +2 -2
  37. data/lib/rumale/evaluation_measure/mean_absolute_error.rb +2 -2
  38. data/lib/rumale/evaluation_measure/mean_squared_error.rb +2 -2
  39. data/lib/rumale/evaluation_measure/mean_squared_log_error.rb +2 -2
  40. data/lib/rumale/evaluation_measure/median_absolute_error.rb +2 -2
  41. data/lib/rumale/evaluation_measure/mutual_information.rb +2 -2
  42. data/lib/rumale/evaluation_measure/normalized_mutual_information.rb +2 -2
  43. data/lib/rumale/evaluation_measure/precision.rb +2 -2
  44. data/lib/rumale/evaluation_measure/purity.rb +2 -2
  45. data/lib/rumale/evaluation_measure/r2_score.rb +2 -2
  46. data/lib/rumale/evaluation_measure/recall.rb +2 -2
  47. data/lib/rumale/evaluation_measure/roc_auc.rb +6 -3
  48. data/lib/rumale/evaluation_measure/silhouette_score.rb +2 -2
  49. data/lib/rumale/kernel_approximation/rbf.rb +5 -6
  50. data/lib/rumale/kernel_machine/kernel_pca.rb +4 -4
  51. data/lib/rumale/kernel_machine/kernel_ridge.rb +3 -3
  52. data/lib/rumale/kernel_machine/kernel_svc.rb +7 -8
  53. data/lib/rumale/linear_model/lasso.rb +5 -6
  54. data/lib/rumale/linear_model/linear_regression.rb +5 -6
  55. data/lib/rumale/linear_model/logistic_regression.rb +16 -15
  56. data/lib/rumale/linear_model/ridge.rb +5 -6
  57. data/lib/rumale/linear_model/svc.rb +34 -28
  58. data/lib/rumale/linear_model/svr.rb +5 -6
  59. data/lib/rumale/manifold/mds.rb +3 -4
  60. data/lib/rumale/manifold/tsne.rb +3 -5
  61. data/lib/rumale/model_selection/cross_validation.rb +6 -5
  62. data/lib/rumale/model_selection/grid_search_cv.rb +6 -6
  63. data/lib/rumale/model_selection/k_fold.rb +3 -3
  64. data/lib/rumale/model_selection/shuffle_split.rb +3 -5
  65. data/lib/rumale/model_selection/stratified_k_fold.rb +4 -4
  66. data/lib/rumale/model_selection/stratified_shuffle_split.rb +4 -6
  67. data/lib/rumale/multiclass/one_vs_rest_classifier.rb +4 -4
  68. data/lib/rumale/naive_bayes/naive_bayes.rb +14 -14
  69. data/lib/rumale/nearest_neighbors/k_neighbors_classifier.rb +5 -5
  70. data/lib/rumale/nearest_neighbors/k_neighbors_regressor.rb +4 -4
  71. data/lib/rumale/neural_network/base_mlp.rb +244 -0
  72. data/lib/rumale/neural_network/mlp_classifier.rb +119 -0
  73. data/lib/rumale/neural_network/mlp_regressor.rb +89 -0
  74. data/lib/rumale/optimizer/ada_grad.rb +1 -1
  75. data/lib/rumale/optimizer/adam.rb +3 -3
  76. data/lib/rumale/optimizer/nadam.rb +1 -1
  77. data/lib/rumale/optimizer/rmsprop.rb +1 -1
  78. data/lib/rumale/optimizer/sgd.rb +1 -1
  79. data/lib/rumale/optimizer/yellow_fin.rb +1 -2
  80. data/lib/rumale/pairwise_metric.rb +17 -19
  81. data/lib/rumale/pipeline/pipeline.rb +10 -10
  82. data/lib/rumale/polynomial_model/factorization_machine_classifier.rb +29 -21
  83. data/lib/rumale/polynomial_model/factorization_machine_regressor.rb +6 -6
  84. data/lib/rumale/preprocessing/bin_discretizer.rb +3 -3
  85. data/lib/rumale/preprocessing/l2_normalizer.rb +2 -2
  86. data/lib/rumale/preprocessing/label_binarizer.rb +2 -2
  87. data/lib/rumale/preprocessing/label_encoder.rb +1 -1
  88. data/lib/rumale/preprocessing/max_abs_scaler.rb +3 -3
  89. data/lib/rumale/preprocessing/min_max_scaler.rb +3 -3
  90. data/lib/rumale/preprocessing/one_hot_encoder.rb +4 -3
  91. data/lib/rumale/preprocessing/ordinal_encoder.rb +1 -1
  92. data/lib/rumale/preprocessing/standard_scaler.rb +3 -3
  93. data/lib/rumale/tree/base_decision_tree.rb +1 -1
  94. data/lib/rumale/tree/decision_tree_classifier.rb +7 -7
  95. data/lib/rumale/tree/decision_tree_regressor.rb +6 -6
  96. data/lib/rumale/tree/extra_tree_classifier.rb +7 -7
  97. data/lib/rumale/tree/extra_tree_regressor.rb +6 -6
  98. data/lib/rumale/tree/gradient_tree_regressor.rb +9 -9
  99. data/lib/rumale/validation.rb +32 -2
  100. data/lib/rumale/version.rb +1 -1
  101. data/rumale.gemspec +7 -7
  102. metadata +11 -7
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 581437e401cec76201212dbd1aa1e38ff6142eef
4
- data.tar.gz: 42ce62c892dcee05d41a5857a28c80627da95833
3
+ metadata.gz: 4770a703a1c19b899b495006c36c19db5edf6fb1
4
+ data.tar.gz: 7a5493680c41332162c4434b6b5c1d0a07c1b456
5
5
  SHA512:
6
- metadata.gz: 36d7bb281d676827b40f3382292a414b474a83c153e7260f5ceb8af2b7664d621ae0adb3ece7475f35e83debd6f6b1e031c804bf76260ec58be6aca315b0d431
7
- data.tar.gz: 367c273c90a4685913eb48dde5cb1d109e871e87ac1b2aab76d06b565a283996ea25ece3aa9030b7df64348902f8636c2ee3580a19ac533224fbbe62a495e45c
6
+ metadata.gz: 9c569cfef32fe2a161c2a1350b516f1dde3dfd72109fbc442118a91162c8193ede68a21dd3f2446d0e2b00d203800375c5594c9a9563867ac9fbe4509a3f8e88
7
+ data.tar.gz: 8f15681e92c08859745dd8f93acdfedf09b31e321dc5105196f73f161c449a33aeded9f6ff4483c0061044828f69ad5a7aeba7a884443ab163457011d6f5dd46
@@ -1,3 +1,27 @@
1
+ # 0.14.0
2
+ - Add classifier and regressor class with multi-layer perceptron.
3
+ - [MLPClassifier](https://yoshoku.github.io/rumale/doc/Rumale/NeuralNetwork/MLPClassifier.html)
4
+ - [MLPRegressor](https://yoshoku.github.io/rumale/doc/Rumale/NeuralNetwork/MLPRegressor.html)
5
+ - Refactor specs.
6
+
7
+ ## Breaking changes
8
+ - Change predict method of SVC, LogisticRegression, and FactorizationMachineClassifier classes to return the original label instead of -1 or 1 labels when binary classification problem.
9
+ - Fix hyperparameter validation to check if the type of given value is Numeric type.
10
+ - Fix array validation for samples, labels, and target values to accept Ruby Array.
11
+
12
+ ```ruby
13
+ require 'rumale'
14
+
15
+ samples = [[-1, 1], [1, 1], [1, -1], [-1, -1]]
16
+ labels = [0, 1, 1, 0]
17
+
18
+ svc = Rumale::LinearModel::SVC.new(reg_param: 1, batch_size: 1, random_seed: 1)
19
+ svc.fit(samples, labels)
20
+ svc.predict([[-1, 0], [1, 0]])
21
+ # => Numo::Int32#shape=[2]
22
+ # [0, 1]
23
+ ```
24
+
1
25
  # 0.13.8
2
26
  - Add [module function](https://yoshoku.github.io/rumale/doc/Rumale/Dataset.html#make_blobs-class_method) for generating artificial dataset with gaussian blobs.
3
27
  - Add documents about Rumale::SVM.
data/README.md CHANGED
@@ -10,11 +10,12 @@
10
10
 
11
11
  Rumale (**Ru**by **ma**chine **le**arning) is a machine learning library in Ruby.
12
12
  Rumale provides machine learning algorithms with interfaces similar to Scikit-Learn in Python.
13
- Rumale supports Linear / Kernel Support Vector Machine,
14
- Logistic Regression, Linear Regression, Ridge, Lasso, Kernel Ridge, Factorization Machine,
15
- Naive Bayes, Decision Tree, AdaBoost, Gradient Tree Boosting, Random Forest, Extra-Trees, K-nearest neighbor classifier,
16
- K-Means, K-Medoids, Gaussian Mixture Model, DBSCAN, HDBSCAN, SNN, Spectral Clustering, Power Iteration Clustering,
17
- Mutidimensional Scaling, t-SNE, Principal Component Analysis, Kernel PCA and Non-negative Matrix Factorization.
13
+ Rumale supports Support Vector Machine,
14
+ Logistic Regression, Ridge, Lasso, Factorization Machine,
15
+ Multi-layer Perceptron,
16
+ Naive Bayes, Decision Tree, Gradient Tree Boosting, Random Forest,
17
+ K-Means, Gaussian Mixture Model, DBSCAN, Spectral Clustering,
18
+ Mutidimensional Scaling, t-SNE, Principal Component Analysis, and Non-negative Matrix Factorization.
18
19
 
19
20
  This project was formerly known as "SVMKit".
20
21
  If you are using SVMKit, please install Rumale and replace `SVMKit` constants with `Rumale`.
@@ -39,7 +40,6 @@ Or install it yourself as:
39
40
 
40
41
  ### Example 1. XOR data
41
42
  First, let's classify simple xor data.
42
- In Rumale, feature vectors and labels are represented by [Numo::NArray](https://github.com/ruby-numo/numo-narray).
43
43
 
44
44
  ```ruby
45
45
  require 'rumale'
@@ -48,10 +48,6 @@ require 'rumale'
48
48
  features = [[0, 0], [0, 1], [1, 0], [1, 1]]
49
49
  labels = [0, 1, 1, 0]
50
50
 
51
- # Convert Ruby Array into Numo::NArray.
52
- x = Numo::DFloat.asarray(features)
53
- y = Numo::Int32.asarray(labels)
54
-
55
51
  # Train classifier with nearest neighbor rule.
56
52
  estimator = Rumale::NearestNeighbors::KNeighborsClassifier.new(n_neighbors: 1)
57
53
  estimator.fit(x, y)
@@ -72,6 +68,8 @@ Numo::Int32#shape=[4]
72
68
 
73
69
  The basic usage of Rumale is to first train the model with the fit method
74
70
  and then estimate with the predict method.
71
+ In addition, Rumale recommends using arrays such as feature vectors and labels with
72
+ [Numo::NArray](https://github.com/ruby-numo/numo-narray).
75
73
 
76
74
  ### Example 2. Pendigits dataset classification
77
75
 
@@ -73,6 +73,9 @@ require 'rumale/decomposition/factor_analysis'
73
73
  require 'rumale/decomposition/fast_ica'
74
74
  require 'rumale/manifold/tsne'
75
75
  require 'rumale/manifold/mds'
76
+ require 'rumale/neural_network/base_mlp'
77
+ require 'rumale/neural_network/mlp_regressor'
78
+ require 'rumale/neural_network/mlp_classifier'
76
79
  require 'rumale/preprocessing/l2_normalizer'
77
80
  require 'rumale/preprocessing/min_max_scaler'
78
81
  require 'rumale/preprocessing/max_abs_scaler'
@@ -25,8 +25,8 @@ module Rumale
25
25
  # @param y [Numo::Int32] (shape: [n_samples]) True labels for testing data.
26
26
  # @return [Float] Mean accuracy
27
27
  def score(x, y)
28
- check_sample_array(x)
29
- check_label_array(y)
28
+ x = check_convert_sample_array(x)
29
+ y = check_convert_label_array(y)
30
30
  check_sample_label_size(x, y)
31
31
  evaluator = Rumale::EvaluationMeasure::Accuracy.new
32
32
  evaluator.score(y, predict(x))
@@ -20,8 +20,8 @@ module Rumale
20
20
  # @param y [Numo::Int32] (shape: [n_samples]) True labels for testing data.
21
21
  # @return [Float] Purity
22
22
  def score(x, y)
23
- check_sample_array(x)
24
- check_label_array(y)
23
+ x = check_convert_sample_array(x)
24
+ y = check_convert_label_array(y)
25
25
  check_sample_label_size(x, y)
26
26
  evaluator = Rumale::EvaluationMeasure::Purity.new
27
27
  evaluator.score(y, fit_predict(x))
@@ -25,8 +25,8 @@ module Rumale
25
25
  # @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) Target values for testing data.
26
26
  # @return [Float] Coefficient of determination
27
27
  def score(x, y)
28
- check_sample_array(x)
29
- check_tvalue_array(y)
28
+ x = check_convert_sample_array(x)
29
+ y = check_convert_tvalue_array(y)
30
30
  check_sample_tvalue_size(x, y)
31
31
  evaluator = Rumale::EvaluationMeasure::R2Score.new
32
32
  evaluator.score(y, predict(x))
@@ -34,8 +34,7 @@ module Rumale
34
34
  # If metric is 'euclidean', Euclidean distance is calculated for distance between points.
35
35
  # If metric is 'precomputed', the fit and fit_transform methods expect to be given a distance matrix.
36
36
  def initialize(eps: 0.5, min_samples: 5, metric: 'euclidean')
37
- check_params_float(eps: eps)
38
- check_params_integer(min_samples: min_samples)
37
+ check_params_numeric(eps: eps, min_samples: min_samples)
39
38
  check_params_string(metric: metric)
40
39
  @params = {}
41
40
  @params[:eps] = eps
@@ -53,7 +52,7 @@ module Rumale
53
52
  # If the metric is 'precomputed', x must be a square distance matrix (shape: [n_samples, n_samples]).
54
53
  # @return [DBSCAN] The learned cluster analyzer itself.
55
54
  def fit(x, _y = nil)
56
- check_sample_array(x)
55
+ x = check_convert_sample_array(x)
57
56
  raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
58
57
  partial_fit(x)
59
58
  self
@@ -65,7 +64,7 @@ module Rumale
65
64
  # If the metric is 'precomputed', x must be a square distance matrix (shape: [n_samples, n_samples]).
66
65
  # @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
67
66
  def fit_predict(x)
68
- check_sample_array(x)
67
+ x = check_convert_sample_array(x)
69
68
  raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
70
69
  partial_fit(x)
71
70
  labels
@@ -47,10 +47,9 @@ module Rumale
47
47
  # @param reg_covar [Float] The non-negative regularization to the diagonal of covariance.
48
48
  # @param random_seed [Integer] The seed value using to initialize the random generator.
49
49
  def initialize(n_clusters: 8, init: 'k-means++', covariance_type: 'diag', max_iter: 50, tol: 1.0e-4, reg_covar: 1.0e-6, random_seed: nil)
50
- check_params_integer(n_clusters: n_clusters, max_iter: max_iter)
51
- check_params_float(tol: tol)
50
+ check_params_numeric(n_clusters: n_clusters, max_iter: max_iter, tol: tol)
52
51
  check_params_string(init: init)
53
- check_params_type_or_nil(Integer, random_seed: random_seed)
52
+ check_params_numeric_or_nil(random_seed: random_seed)
54
53
  check_params_positive(n_clusters: n_clusters, max_iter: max_iter)
55
54
  @params = {}
56
55
  @params[:n_clusters] = n_clusters
@@ -74,7 +73,7 @@ module Rumale
74
73
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for cluster analysis.
75
74
  # @return [GaussianMixture] The learned cluster analyzer itself.
76
75
  def fit(x, _y = nil)
77
- check_sample_array(x)
76
+ x = check_convert_sample_array(x)
78
77
  check_enable_linalg('fit')
79
78
 
80
79
  n_samples = x.shape[0]
@@ -97,7 +96,7 @@ module Rumale
97
96
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the cluster label.
98
97
  # @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
99
98
  def predict(x)
100
- check_sample_array(x)
99
+ x = check_convert_sample_array(x)
101
100
  check_enable_linalg('predict')
102
101
 
103
102
  memberships = calc_memberships(x, @weights, @means, @covariances, @params[:covariance_type])
@@ -109,7 +108,7 @@ module Rumale
109
108
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for cluster analysis.
110
109
  # @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
111
110
  def fit_predict(x)
112
- check_sample_array(x)
111
+ x = check_convert_sample_array(x)
113
112
  check_enable_linalg('fit_predict')
114
113
 
115
114
  fit(x).predict(x)
@@ -34,8 +34,8 @@ module Rumale
34
34
  # If metric is 'euclidean', Euclidean distance is calculated for distance between points.
35
35
  # If metric is 'precomputed', the fit and fit_transform methods expect to be given a distance matrix.
36
36
  def initialize(min_samples: 10, min_cluster_size: nil, metric: 'euclidean')
37
- check_params_integer(min_samples: min_samples)
38
- check_params_type_or_nil(Integer, min_cluster_size: min_cluster_size)
37
+ check_params_numeric(min_samples: min_samples)
38
+ check_params_numeric_or_nil(min_cluster_size: min_cluster_size)
39
39
  check_params_string(metric: metric)
40
40
  check_params_positive(min_samples: min_samples)
41
41
  @params = {}
@@ -53,7 +53,7 @@ module Rumale
53
53
  # If the metric is 'precomputed', x must be a square distance matrix (shape: [n_samples, n_samples]).
54
54
  # @return [HDBSCAN] The learned cluster analyzer itself.
55
55
  def fit(x, _y = nil)
56
- check_sample_array(x)
56
+ x = check_convert_sample_array(x)
57
57
  raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
58
58
  fit_predict(x)
59
59
  self
@@ -65,7 +65,7 @@ module Rumale
65
65
  # If the metric is 'precomputed', x must be a square distance matrix (shape: [n_samples, n_samples]).
66
66
  # @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
67
67
  def fit_predict(x)
68
- check_sample_array(x)
68
+ x = check_convert_sample_array(x)
69
69
  raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
70
70
  distance_mat = @params[:metric] == 'precomputed' ? x : Rumale::PairwiseMetric.euclidean_distance(x)
71
71
  @labels = partial_fit(distance_mat)
@@ -36,10 +36,9 @@ module Rumale
36
36
  # @param tol [Float] The tolerance of termination criterion.
37
37
  # @param random_seed [Integer] The seed value using to initialize the random generator.
38
38
  def initialize(n_clusters: 8, init: 'k-means++', max_iter: 50, tol: 1.0e-4, random_seed: nil)
39
- check_params_integer(n_clusters: n_clusters, max_iter: max_iter)
40
- check_params_float(tol: tol)
39
+ check_params_numeric(n_clusters: n_clusters, max_iter: max_iter, tol: tol)
41
40
  check_params_string(init: init)
42
- check_params_type_or_nil(Integer, random_seed: random_seed)
41
+ check_params_numeric_or_nil(random_seed: random_seed)
43
42
  check_params_positive(n_clusters: n_clusters, max_iter: max_iter)
44
43
  @params = {}
45
44
  @params[:n_clusters] = n_clusters
@@ -59,7 +58,7 @@ module Rumale
59
58
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for cluster analysis.
60
59
  # @return [KMeans] The learned cluster analyzer itself.
61
60
  def fit(x, _y = nil)
62
- check_sample_array(x)
61
+ x = check_convert_sample_array(x)
63
62
  init_cluster_centers(x)
64
63
  @params[:max_iter].times do |_t|
65
64
  cluster_labels = assign_cluster(x)
@@ -79,7 +78,7 @@ module Rumale
79
78
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the cluster label.
80
79
  # @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
81
80
  def predict(x)
82
- check_sample_array(x)
81
+ x = check_convert_sample_array(x)
83
82
  assign_cluster(x)
84
83
  end
85
84
 
@@ -88,7 +87,7 @@ module Rumale
88
87
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for cluster analysis.
89
88
  # @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
90
89
  def fit_predict(x)
91
- check_sample_array(x)
90
+ x = check_convert_sample_array(x)
92
91
  fit(x)
93
92
  predict(x)
94
93
  end
@@ -37,10 +37,9 @@ module Rumale
37
37
  # @param tol [Float] The tolerance of termination criterion.
38
38
  # @param random_seed [Integer] The seed value using to initialize the random generator.
39
39
  def initialize(n_clusters: 8, metric: 'euclidean', init: 'k-means++', max_iter: 50, tol: 1.0e-4, random_seed: nil)
40
- check_params_integer(n_clusters: n_clusters, max_iter: max_iter)
41
- check_params_float(tol: tol)
40
+ check_params_numeric(n_clusters: n_clusters, max_iter: max_iter, tol: tol)
42
41
  check_params_string(metric: metric, init: init)
43
- check_params_type_or_nil(Integer, random_seed: random_seed)
42
+ check_params_numeric_or_nil(random_seed: random_seed)
44
43
  check_params_positive(n_clusters: n_clusters, max_iter: max_iter)
45
44
  @params = {}
46
45
  @params[:n_clusters] = n_clusters
@@ -63,7 +62,7 @@ module Rumale
63
62
  # If the metric is 'precomputed', x must be a square distance matrix (shape: [n_samples, n_samples]).
64
63
  # @return [KMedoids] The learned cluster analyzer itself.
65
64
  def fit(x, _not_used = nil)
66
- check_sample_array(x)
65
+ x = check_convert_sample_array(x)
67
66
  raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
68
67
  # initialize some varibales.
69
68
  distance_mat = @params[:metric] == 'precomputed' ? x : Rumale::PairwiseMetric.euclidean_distance(x)
@@ -89,7 +88,7 @@ module Rumale
89
88
  # If the metric is 'precomputed', x must be distances between samples and medoids (shape: [n_samples, n_clusters]).
90
89
  # @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
91
90
  def predict(x)
92
- check_sample_array(x)
91
+ x = check_convert_sample_array(x)
93
92
  distance_mat = @params[:metric] == 'precomputed' ? x : Rumale::PairwiseMetric.euclidean_distance(x, @cluster_centers)
94
93
  if @params[:metric] == 'precomputed' && distance_mat.shape[1] != @medoid_ids.size
95
94
  raise ArgumentError, 'Expect the size input matrix to be n_samples-by-n_clusters.'
@@ -103,7 +102,7 @@ module Rumale
103
102
  # If the metric is 'precomputed', x must be a square distance matrix (shape: [n_samples, n_samples]).
104
103
  # @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
105
104
  def fit_predict(x)
106
- check_sample_array(x)
105
+ x = check_convert_sample_array(x)
107
106
  fit(x)
108
107
  if @params[:metric] == 'precomputed'
109
108
  predict(x[true, @medoid_ids])
@@ -42,11 +42,9 @@ module Rumale
42
42
  # @param eps [Float] A small value close to zero to avoid zero division error.
43
43
  # @param random_seed [Integer] The seed value using to initialize the random generator.
44
44
  def initialize(n_clusters: 8, affinity: 'rbf', gamma: nil, init: 'k-means++', max_iter: 1000, tol: 1.0e-8, eps: 1.0e-5, random_seed: nil)
45
- check_params_integer(n_clusters: n_clusters, max_iter: max_iter)
46
- check_params_float(tol: tol, eps: eps)
45
+ check_params_numeric(n_clusters: n_clusters, max_iter: max_iter, tol: tol, eps: eps)
46
+ check_params_numeric_or_nil(gamma: gamma, random_seed: random_seed)
47
47
  check_params_string(affinity: affinity, init: init)
48
- check_params_type_or_nil(Float, gamma: gamma)
49
- check_params_type_or_nil(Integer, random_seed: random_seed)
50
48
  check_params_positive(n_clusters: n_clusters, max_iter: max_iter, tol: tol, eps: eps)
51
49
  @params = {}
52
50
  @params[:n_clusters] = n_clusters
@@ -71,7 +69,7 @@ module Rumale
71
69
  # If the metric is 'precomputed', x must be a square affinity matrix (shape: [n_samples, n_samples]).
72
70
  # @return [PowerIteration] The learned cluster analyzer itself.
73
71
  def fit(x, _y = nil)
74
- check_sample_array(x)
72
+ x = check_convert_sample_array(x)
75
73
  raise ArgumentError, 'Expect the input affinity matrix to be square.' if @params[:affinity] == 'precomputed' && x.shape[0] != x.shape[1]
76
74
  fit_predict(x)
77
75
  self
@@ -83,7 +81,7 @@ module Rumale
83
81
  # If the metric is 'precomputed', x must be a square affinity matrix (shape: [n_samples, n_samples]).
84
82
  # @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
85
83
  def fit_predict(x)
86
- check_sample_array(x)
84
+ x = check_convert_sample_array(x)
87
85
  raise ArgumentError, 'Expect the input affinity matrix to be square.' if @params[:affinity] == 'precomputed' && x.shape[0] != x.shape[1]
88
86
 
89
87
  affinity_mat = @params[:metric] == 'precomputed' ? x : Rumale::PairwiseMetric.rbf_kernel(x, nil, @params[:gamma])
@@ -35,7 +35,7 @@ module Rumale
35
35
  # If metric is 'euclidean', Euclidean distance is calculated for distance between points.
36
36
  # If metric is 'precomputed', the fit and fit_transform methods expect to be given a distance matrix.
37
37
  def initialize(n_clusters: 2, metric: 'euclidean')
38
- check_params_integer(n_clusters: n_clusters)
38
+ check_params_numeric(n_clusters: n_clusters)
39
39
  check_params_string(metric: metric)
40
40
  @params = {}
41
41
  @params[:n_clusters] = n_clusters
@@ -52,7 +52,7 @@ module Rumale
52
52
  # If the metric is 'precomputed', x must be a square distance matrix (shape: [n_samples, n_samples]).
53
53
  # @return [SingleLinkage] The learned cluster analyzer itself.
54
54
  def fit(x, _y = nil)
55
- check_sample_array(x)
55
+ x = check_convert_sample_array(x)
56
56
  raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
57
57
  fit_predict(x)
58
58
  self
@@ -64,7 +64,7 @@ module Rumale
64
64
  # If the metric is 'precomputed', x must be a square distance matrix (shape: [n_samples, n_samples]).
65
65
  # @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
66
66
  def fit_predict(x)
67
- check_sample_array(x)
67
+ x = check_convert_sample_array(x)
68
68
  raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
69
69
  distance_mat = @params[:metric] == 'precomputed' ? x : Rumale::PairwiseMetric.euclidean_distance(x)
70
70
  @labels = partial_fit(distance_mat)
@@ -25,7 +25,7 @@ module Rumale
25
25
  # If metric is 'euclidean', Euclidean distance is calculated for distance between points.
26
26
  # If metric is 'precomputed', the fit and fit_transform methods expect to be given a distance matrix.
27
27
  def initialize(n_neighbors: 10, eps: 5, min_samples: 5, metric: 'euclidean')
28
- check_params_integer(n_neighbors: n_neighbors, min_samples: min_samples)
28
+ check_params_numeric(n_neighbors: n_neighbors, min_samples: min_samples)
29
29
  check_params_string(metric: metric)
30
30
  @params = {}
31
31
  @params[:n_neighbors] = n_neighbors
@@ -42,11 +42,9 @@ module Rumale
42
42
  # @param tol [Float] The tolerance of termination criterion for K-Means clustering.
43
43
  # @param random_seed [Integer] The seed value using to initialize the random generator.
44
44
  def initialize(n_clusters: 2, affinity: 'rbf', gamma: nil, init: 'k-means++', max_iter: 10, tol: 1.0e-8, random_seed: nil)
45
- check_params_integer(n_clusters: n_clusters, max_iter: max_iter)
46
- check_params_float(tol: tol)
45
+ check_params_numeric(n_clusters: n_clusters, max_iter: max_iter, tol: tol)
46
+ check_params_numeric_or_nil(gamma: gamma, random_seed: random_seed)
47
47
  check_params_string(affinity: affinity, init: init)
48
- check_params_type_or_nil(Float, gamma: gamma)
49
- check_params_type_or_nil(Integer, random_seed: random_seed)
50
48
  check_params_positive(n_clusters: n_clusters, max_iter: max_iter, tol: tol)
51
49
  @params = {}
52
50
  @params[:n_clusters] = n_clusters
@@ -69,7 +67,7 @@ module Rumale
69
67
  # If the metric is 'precomputed', x must be a square affinity matrix (shape: [n_samples, n_samples]).
70
68
  # @return [SpectralClustering] The learned cluster analyzer itself.
71
69
  def fit(x, _y = nil)
72
- check_sample_array(x)
70
+ x = check_convert_sample_array(x)
73
71
  raise ArgumentError, 'Expect the input affinity matrix to be square.' if @params[:affinity] == 'precomputed' && x.shape[0] != x.shape[1]
74
72
  raise 'SpectralClustering#fit requires Numo::Linalg but that is not loaded.' unless enable_linalg?
75
73
 
@@ -84,7 +82,7 @@ module Rumale
84
82
  # If the metric is 'precomputed', x must be a square affinity matrix (shape: [n_samples, n_samples]).
85
83
  # @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
86
84
  def fit_predict(x)
87
- check_sample_array(x)
85
+ x = check_convert_sample_array(x)
88
86
  raise ArgumentError, 'Expect the input affinity matrix to be square.' if @params[:affinity] == 'precomputed' && x.shape[0] != x.shape[1]
89
87
  raise 'SpectralClustering#fit_predict requires Numo::Linalg but that is not loaded.' unless enable_linalg?
90
88
 
@@ -60,11 +60,9 @@ module Rumale
60
60
  # @param factor [Float] The scale factor between inner and outer circles. The interval of factor is (0, 1).
61
61
  # @param random_seed [Integer] The seed value using to initialize the random generator.
62
62
  def make_circles(n_samples, shuffle: true, noise: nil, factor: 0.8, random_seed: nil)
63
- Rumale::Validation.check_params_integer(n_samples: n_samples)
63
+ Rumale::Validation.check_params_numeric(n_samples: n_samples, factor: factor)
64
64
  Rumale::Validation.check_params_boolean(shuffle: shuffle)
65
- Rumale::Validation.check_params_type_or_nil(Float, noise: noise)
66
- Rumale::Validation.check_params_float(factor: factor)
67
- Rumale::Validation.check_params_type_or_nil(Integer, random_seed: random_seed)
65
+ Rumale::Validation.check_params_numeric_or_nil(noise: noise, random_seed: random_seed)
68
66
  raise ArgumentError, 'The number of samples must be more than 2.' if n_samples <= 1
69
67
  raise RangeError, 'The interval of factor is (0, 1).' if factor <= 0 || factor >= 1
70
68
  # initialize some variables.
@@ -99,10 +97,9 @@ module Rumale
99
97
  # If nil is given, no noise is added.
100
98
  # @param random_seed [Integer] The seed value using to initialize the random generator.
101
99
  def make_moons(n_samples, shuffle: true, noise: nil, random_seed: nil)
102
- Rumale::Validation.check_params_integer(n_samples: n_samples)
100
+ Rumale::Validation.check_params_numeric(n_samples: n_samples)
103
101
  Rumale::Validation.check_params_boolean(shuffle: shuffle)
104
- Rumale::Validation.check_params_type_or_nil(Float, noise: noise)
105
- Rumale::Validation.check_params_type_or_nil(Integer, random_seed: random_seed)
102
+ Rumale::Validation.check_params_numeric_or_nil(noise: noise, random_seed: random_seed)
106
103
  raise ArgumentError, 'The number of samples must be more than 2.' if n_samples <= 1
107
104
  # initialize some variables.
108
105
  rs = random_seed
@@ -142,11 +139,10 @@ module Rumale
142
139
  # @param random_seed [Integer] The seed value using to initialize the random generator.
143
140
  def make_blobs(n_samples = 1000, n_features = 2,
144
141
  centers: nil, cluster_std: 1.0, center_box: [-10, 10], shuffle: true, random_seed: nil)
145
- Rumale::Validation.check_params_integer(n_samples: n_samples, n_features: n_features)
146
- Rumale::Validation.check_params_float(cluster_std: cluster_std)
142
+ Rumale::Validation.check_params_numeric(n_samples: n_samples, n_features: n_features, cluster_std: cluster_std)
147
143
  Rumale::Validation.check_params_type(Array, center_box: center_box)
148
144
  Rumale::Validation.check_params_boolean(shuffle: shuffle)
149
- Rumale::Validation.check_params_type_or_nil(Integer, random_seed: random_seed)
145
+ Rumale::Validation.check_params_numeric_or_nil(random_seed: random_seed)
150
146
  # initialize rng.
151
147
  rs = random_seed
152
148
  rs ||= srand