rumale 0.13.8 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +24 -0
- data/README.md +8 -10
- data/lib/rumale.rb +3 -0
- data/lib/rumale/base/classifier.rb +2 -2
- data/lib/rumale/base/cluster_analyzer.rb +2 -2
- data/lib/rumale/base/regressor.rb +2 -2
- data/lib/rumale/clustering/dbscan.rb +3 -4
- data/lib/rumale/clustering/gaussian_mixture.rb +5 -6
- data/lib/rumale/clustering/hdbscan.rb +4 -4
- data/lib/rumale/clustering/k_means.rb +5 -6
- data/lib/rumale/clustering/k_medoids.rb +5 -6
- data/lib/rumale/clustering/power_iteration.rb +4 -6
- data/lib/rumale/clustering/single_linkage.rb +3 -3
- data/lib/rumale/clustering/snn.rb +1 -1
- data/lib/rumale/clustering/spectral_clustering.rb +4 -6
- data/lib/rumale/dataset.rb +6 -10
- data/lib/rumale/decomposition/factor_analysis.rb +4 -4
- data/lib/rumale/decomposition/fast_ica.rb +6 -7
- data/lib/rumale/decomposition/nmf.rb +6 -7
- data/lib/rumale/decomposition/pca.rb +6 -7
- data/lib/rumale/ensemble/ada_boost_classifier.rb +8 -8
- data/lib/rumale/ensemble/ada_boost_regressor.rb +7 -7
- data/lib/rumale/ensemble/extra_trees_classifier.rb +8 -8
- data/lib/rumale/ensemble/extra_trees_regressor.rb +7 -7
- data/lib/rumale/ensemble/gradient_boosting_classifier.rb +8 -8
- data/lib/rumale/ensemble/gradient_boosting_regressor.rb +8 -8
- data/lib/rumale/ensemble/random_forest_classifier.rb +8 -8
- data/lib/rumale/ensemble/random_forest_regressor.rb +7 -7
- data/lib/rumale/evaluation_measure/accuracy.rb +2 -2
- data/lib/rumale/evaluation_measure/adjusted_rand_score.rb +2 -2
- data/lib/rumale/evaluation_measure/calinski_harabasz_score.rb +2 -2
- data/lib/rumale/evaluation_measure/davies_bouldin_score.rb +2 -2
- data/lib/rumale/evaluation_measure/explained_variance_score.rb +2 -2
- data/lib/rumale/evaluation_measure/f_score.rb +2 -2
- data/lib/rumale/evaluation_measure/log_loss.rb +2 -2
- data/lib/rumale/evaluation_measure/mean_absolute_error.rb +2 -2
- data/lib/rumale/evaluation_measure/mean_squared_error.rb +2 -2
- data/lib/rumale/evaluation_measure/mean_squared_log_error.rb +2 -2
- data/lib/rumale/evaluation_measure/median_absolute_error.rb +2 -2
- data/lib/rumale/evaluation_measure/mutual_information.rb +2 -2
- data/lib/rumale/evaluation_measure/normalized_mutual_information.rb +2 -2
- data/lib/rumale/evaluation_measure/precision.rb +2 -2
- data/lib/rumale/evaluation_measure/purity.rb +2 -2
- data/lib/rumale/evaluation_measure/r2_score.rb +2 -2
- data/lib/rumale/evaluation_measure/recall.rb +2 -2
- data/lib/rumale/evaluation_measure/roc_auc.rb +6 -3
- data/lib/rumale/evaluation_measure/silhouette_score.rb +2 -2
- data/lib/rumale/kernel_approximation/rbf.rb +5 -6
- data/lib/rumale/kernel_machine/kernel_pca.rb +4 -4
- data/lib/rumale/kernel_machine/kernel_ridge.rb +3 -3
- data/lib/rumale/kernel_machine/kernel_svc.rb +7 -8
- data/lib/rumale/linear_model/lasso.rb +5 -6
- data/lib/rumale/linear_model/linear_regression.rb +5 -6
- data/lib/rumale/linear_model/logistic_regression.rb +16 -15
- data/lib/rumale/linear_model/ridge.rb +5 -6
- data/lib/rumale/linear_model/svc.rb +34 -28
- data/lib/rumale/linear_model/svr.rb +5 -6
- data/lib/rumale/manifold/mds.rb +3 -4
- data/lib/rumale/manifold/tsne.rb +3 -5
- data/lib/rumale/model_selection/cross_validation.rb +6 -5
- data/lib/rumale/model_selection/grid_search_cv.rb +6 -6
- data/lib/rumale/model_selection/k_fold.rb +3 -3
- data/lib/rumale/model_selection/shuffle_split.rb +3 -5
- data/lib/rumale/model_selection/stratified_k_fold.rb +4 -4
- data/lib/rumale/model_selection/stratified_shuffle_split.rb +4 -6
- data/lib/rumale/multiclass/one_vs_rest_classifier.rb +4 -4
- data/lib/rumale/naive_bayes/naive_bayes.rb +14 -14
- data/lib/rumale/nearest_neighbors/k_neighbors_classifier.rb +5 -5
- data/lib/rumale/nearest_neighbors/k_neighbors_regressor.rb +4 -4
- data/lib/rumale/neural_network/base_mlp.rb +244 -0
- data/lib/rumale/neural_network/mlp_classifier.rb +119 -0
- data/lib/rumale/neural_network/mlp_regressor.rb +89 -0
- data/lib/rumale/optimizer/ada_grad.rb +1 -1
- data/lib/rumale/optimizer/adam.rb +3 -3
- data/lib/rumale/optimizer/nadam.rb +1 -1
- data/lib/rumale/optimizer/rmsprop.rb +1 -1
- data/lib/rumale/optimizer/sgd.rb +1 -1
- data/lib/rumale/optimizer/yellow_fin.rb +1 -2
- data/lib/rumale/pairwise_metric.rb +17 -19
- data/lib/rumale/pipeline/pipeline.rb +10 -10
- data/lib/rumale/polynomial_model/factorization_machine_classifier.rb +29 -21
- data/lib/rumale/polynomial_model/factorization_machine_regressor.rb +6 -6
- data/lib/rumale/preprocessing/bin_discretizer.rb +3 -3
- data/lib/rumale/preprocessing/l2_normalizer.rb +2 -2
- data/lib/rumale/preprocessing/label_binarizer.rb +2 -2
- data/lib/rumale/preprocessing/label_encoder.rb +1 -1
- data/lib/rumale/preprocessing/max_abs_scaler.rb +3 -3
- data/lib/rumale/preprocessing/min_max_scaler.rb +3 -3
- data/lib/rumale/preprocessing/one_hot_encoder.rb +4 -3
- data/lib/rumale/preprocessing/ordinal_encoder.rb +1 -1
- data/lib/rumale/preprocessing/standard_scaler.rb +3 -3
- data/lib/rumale/tree/base_decision_tree.rb +1 -1
- data/lib/rumale/tree/decision_tree_classifier.rb +7 -7
- data/lib/rumale/tree/decision_tree_regressor.rb +6 -6
- data/lib/rumale/tree/extra_tree_classifier.rb +7 -7
- data/lib/rumale/tree/extra_tree_regressor.rb +6 -6
- data/lib/rumale/tree/gradient_tree_regressor.rb +9 -9
- data/lib/rumale/validation.rb +32 -2
- data/lib/rumale/version.rb +1 -1
- data/rumale.gemspec +7 -7
- metadata +11 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4770a703a1c19b899b495006c36c19db5edf6fb1
|
4
|
+
data.tar.gz: 7a5493680c41332162c4434b6b5c1d0a07c1b456
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9c569cfef32fe2a161c2a1350b516f1dde3dfd72109fbc442118a91162c8193ede68a21dd3f2446d0e2b00d203800375c5594c9a9563867ac9fbe4509a3f8e88
|
7
|
+
data.tar.gz: 8f15681e92c08859745dd8f93acdfedf09b31e321dc5105196f73f161c449a33aeded9f6ff4483c0061044828f69ad5a7aeba7a884443ab163457011d6f5dd46
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,27 @@
|
|
1
|
+
# 0.14.0
|
2
|
+
- Add classifier and regressor class with multi-layer perceptron.
|
3
|
+
- [MLPClassifier](https://yoshoku.github.io/rumale/doc/Rumale/NeuralNetwork/MLPClassifier.html)
|
4
|
+
- [MLPRegressor](https://yoshoku.github.io/rumale/doc/Rumale/NeuralNetwork/MLPRegressor.html)
|
5
|
+
- Refactor specs.
|
6
|
+
|
7
|
+
## Breaking changes
|
8
|
+
- Change predict method of SVC, LogisticRegression, and FactorizationMachineClassifier classes to return the original label instead of -1 or 1 labels when binary classification problem.
|
9
|
+
- Fix hyperparameter validation to check if the type of given value is Numeric type.
|
10
|
+
- Fix array validation for samples, labels, and target values to accept Ruby Array.
|
11
|
+
|
12
|
+
```ruby
|
13
|
+
require 'rumale'
|
14
|
+
|
15
|
+
samples = [[-1, 1], [1, 1], [1, -1], [-1, -1]]
|
16
|
+
labels = [0, 1, 1, 0]
|
17
|
+
|
18
|
+
svc = Rumale::LinearModel::SVC.new(reg_param: 1, batch_size: 1, random_seed: 1)
|
19
|
+
svc.fit(samples, labels)
|
20
|
+
svc.predict([[-1, 0], [1, 0]])
|
21
|
+
# => Numo::Int32#shape=[2]
|
22
|
+
# [0, 1]
|
23
|
+
```
|
24
|
+
|
1
25
|
# 0.13.8
|
2
26
|
- Add [module function](https://yoshoku.github.io/rumale/doc/Rumale/Dataset.html#make_blobs-class_method) for generating artificial dataset with gaussian blobs.
|
3
27
|
- Add documents about Rumale::SVM.
|
data/README.md
CHANGED
@@ -10,11 +10,12 @@
|
|
10
10
|
|
11
11
|
Rumale (**Ru**by **ma**chine **le**arning) is a machine learning library in Ruby.
|
12
12
|
Rumale provides machine learning algorithms with interfaces similar to Scikit-Learn in Python.
|
13
|
-
Rumale supports
|
14
|
-
Logistic Regression,
|
15
|
-
|
16
|
-
|
17
|
-
|
13
|
+
Rumale supports Support Vector Machine,
|
14
|
+
Logistic Regression, Ridge, Lasso, Factorization Machine,
|
15
|
+
Multi-layer Perceptron,
|
16
|
+
Naive Bayes, Decision Tree, Gradient Tree Boosting, Random Forest,
|
17
|
+
K-Means, Gaussian Mixture Model, DBSCAN, Spectral Clustering,
|
18
|
+
Mutidimensional Scaling, t-SNE, Principal Component Analysis, and Non-negative Matrix Factorization.
|
18
19
|
|
19
20
|
This project was formerly known as "SVMKit".
|
20
21
|
If you are using SVMKit, please install Rumale and replace `SVMKit` constants with `Rumale`.
|
@@ -39,7 +40,6 @@ Or install it yourself as:
|
|
39
40
|
|
40
41
|
### Example 1. XOR data
|
41
42
|
First, let's classify simple xor data.
|
42
|
-
In Rumale, feature vectors and labels are represented by [Numo::NArray](https://github.com/ruby-numo/numo-narray).
|
43
43
|
|
44
44
|
```ruby
|
45
45
|
require 'rumale'
|
@@ -48,10 +48,6 @@ require 'rumale'
|
|
48
48
|
features = [[0, 0], [0, 1], [1, 0], [1, 1]]
|
49
49
|
labels = [0, 1, 1, 0]
|
50
50
|
|
51
|
-
# Convert Ruby Array into Numo::NArray.
|
52
|
-
x = Numo::DFloat.asarray(features)
|
53
|
-
y = Numo::Int32.asarray(labels)
|
54
|
-
|
55
51
|
# Train classifier with nearest neighbor rule.
|
56
52
|
estimator = Rumale::NearestNeighbors::KNeighborsClassifier.new(n_neighbors: 1)
|
57
53
|
estimator.fit(x, y)
|
@@ -72,6 +68,8 @@ Numo::Int32#shape=[4]
|
|
72
68
|
|
73
69
|
The basic usage of Rumale is to first train the model with the fit method
|
74
70
|
and then estimate with the predict method.
|
71
|
+
In addition, Rumale recommends using arrays such as feature vectors and labels with
|
72
|
+
[Numo::NArray](https://github.com/ruby-numo/numo-narray).
|
75
73
|
|
76
74
|
### Example 2. Pendigits dataset classification
|
77
75
|
|
data/lib/rumale.rb
CHANGED
@@ -73,6 +73,9 @@ require 'rumale/decomposition/factor_analysis'
|
|
73
73
|
require 'rumale/decomposition/fast_ica'
|
74
74
|
require 'rumale/manifold/tsne'
|
75
75
|
require 'rumale/manifold/mds'
|
76
|
+
require 'rumale/neural_network/base_mlp'
|
77
|
+
require 'rumale/neural_network/mlp_regressor'
|
78
|
+
require 'rumale/neural_network/mlp_classifier'
|
76
79
|
require 'rumale/preprocessing/l2_normalizer'
|
77
80
|
require 'rumale/preprocessing/min_max_scaler'
|
78
81
|
require 'rumale/preprocessing/max_abs_scaler'
|
@@ -25,8 +25,8 @@ module Rumale
|
|
25
25
|
# @param y [Numo::Int32] (shape: [n_samples]) True labels for testing data.
|
26
26
|
# @return [Float] Mean accuracy
|
27
27
|
def score(x, y)
|
28
|
-
|
29
|
-
|
28
|
+
x = check_convert_sample_array(x)
|
29
|
+
y = check_convert_label_array(y)
|
30
30
|
check_sample_label_size(x, y)
|
31
31
|
evaluator = Rumale::EvaluationMeasure::Accuracy.new
|
32
32
|
evaluator.score(y, predict(x))
|
@@ -20,8 +20,8 @@ module Rumale
|
|
20
20
|
# @param y [Numo::Int32] (shape: [n_samples]) True labels for testing data.
|
21
21
|
# @return [Float] Purity
|
22
22
|
def score(x, y)
|
23
|
-
|
24
|
-
|
23
|
+
x = check_convert_sample_array(x)
|
24
|
+
y = check_convert_label_array(y)
|
25
25
|
check_sample_label_size(x, y)
|
26
26
|
evaluator = Rumale::EvaluationMeasure::Purity.new
|
27
27
|
evaluator.score(y, fit_predict(x))
|
@@ -25,8 +25,8 @@ module Rumale
|
|
25
25
|
# @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) Target values for testing data.
|
26
26
|
# @return [Float] Coefficient of determination
|
27
27
|
def score(x, y)
|
28
|
-
|
29
|
-
|
28
|
+
x = check_convert_sample_array(x)
|
29
|
+
y = check_convert_tvalue_array(y)
|
30
30
|
check_sample_tvalue_size(x, y)
|
31
31
|
evaluator = Rumale::EvaluationMeasure::R2Score.new
|
32
32
|
evaluator.score(y, predict(x))
|
@@ -34,8 +34,7 @@ module Rumale
|
|
34
34
|
# If metric is 'euclidean', Euclidean distance is calculated for distance between points.
|
35
35
|
# If metric is 'precomputed', the fit and fit_transform methods expect to be given a distance matrix.
|
36
36
|
def initialize(eps: 0.5, min_samples: 5, metric: 'euclidean')
|
37
|
-
|
38
|
-
check_params_integer(min_samples: min_samples)
|
37
|
+
check_params_numeric(eps: eps, min_samples: min_samples)
|
39
38
|
check_params_string(metric: metric)
|
40
39
|
@params = {}
|
41
40
|
@params[:eps] = eps
|
@@ -53,7 +52,7 @@ module Rumale
|
|
53
52
|
# If the metric is 'precomputed', x must be a square distance matrix (shape: [n_samples, n_samples]).
|
54
53
|
# @return [DBSCAN] The learned cluster analyzer itself.
|
55
54
|
def fit(x, _y = nil)
|
56
|
-
|
55
|
+
x = check_convert_sample_array(x)
|
57
56
|
raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
|
58
57
|
partial_fit(x)
|
59
58
|
self
|
@@ -65,7 +64,7 @@ module Rumale
|
|
65
64
|
# If the metric is 'precomputed', x must be a square distance matrix (shape: [n_samples, n_samples]).
|
66
65
|
# @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
|
67
66
|
def fit_predict(x)
|
68
|
-
|
67
|
+
x = check_convert_sample_array(x)
|
69
68
|
raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
|
70
69
|
partial_fit(x)
|
71
70
|
labels
|
@@ -47,10 +47,9 @@ module Rumale
|
|
47
47
|
# @param reg_covar [Float] The non-negative regularization to the diagonal of covariance.
|
48
48
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
49
49
|
def initialize(n_clusters: 8, init: 'k-means++', covariance_type: 'diag', max_iter: 50, tol: 1.0e-4, reg_covar: 1.0e-6, random_seed: nil)
|
50
|
-
|
51
|
-
check_params_float(tol: tol)
|
50
|
+
check_params_numeric(n_clusters: n_clusters, max_iter: max_iter, tol: tol)
|
52
51
|
check_params_string(init: init)
|
53
|
-
|
52
|
+
check_params_numeric_or_nil(random_seed: random_seed)
|
54
53
|
check_params_positive(n_clusters: n_clusters, max_iter: max_iter)
|
55
54
|
@params = {}
|
56
55
|
@params[:n_clusters] = n_clusters
|
@@ -74,7 +73,7 @@ module Rumale
|
|
74
73
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for cluster analysis.
|
75
74
|
# @return [GaussianMixture] The learned cluster analyzer itself.
|
76
75
|
def fit(x, _y = nil)
|
77
|
-
|
76
|
+
x = check_convert_sample_array(x)
|
78
77
|
check_enable_linalg('fit')
|
79
78
|
|
80
79
|
n_samples = x.shape[0]
|
@@ -97,7 +96,7 @@ module Rumale
|
|
97
96
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the cluster label.
|
98
97
|
# @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
|
99
98
|
def predict(x)
|
100
|
-
|
99
|
+
x = check_convert_sample_array(x)
|
101
100
|
check_enable_linalg('predict')
|
102
101
|
|
103
102
|
memberships = calc_memberships(x, @weights, @means, @covariances, @params[:covariance_type])
|
@@ -109,7 +108,7 @@ module Rumale
|
|
109
108
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for cluster analysis.
|
110
109
|
# @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
|
111
110
|
def fit_predict(x)
|
112
|
-
|
111
|
+
x = check_convert_sample_array(x)
|
113
112
|
check_enable_linalg('fit_predict')
|
114
113
|
|
115
114
|
fit(x).predict(x)
|
@@ -34,8 +34,8 @@ module Rumale
|
|
34
34
|
# If metric is 'euclidean', Euclidean distance is calculated for distance between points.
|
35
35
|
# If metric is 'precomputed', the fit and fit_transform methods expect to be given a distance matrix.
|
36
36
|
def initialize(min_samples: 10, min_cluster_size: nil, metric: 'euclidean')
|
37
|
-
|
38
|
-
|
37
|
+
check_params_numeric(min_samples: min_samples)
|
38
|
+
check_params_numeric_or_nil(min_cluster_size: min_cluster_size)
|
39
39
|
check_params_string(metric: metric)
|
40
40
|
check_params_positive(min_samples: min_samples)
|
41
41
|
@params = {}
|
@@ -53,7 +53,7 @@ module Rumale
|
|
53
53
|
# If the metric is 'precomputed', x must be a square distance matrix (shape: [n_samples, n_samples]).
|
54
54
|
# @return [HDBSCAN] The learned cluster analyzer itself.
|
55
55
|
def fit(x, _y = nil)
|
56
|
-
|
56
|
+
x = check_convert_sample_array(x)
|
57
57
|
raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
|
58
58
|
fit_predict(x)
|
59
59
|
self
|
@@ -65,7 +65,7 @@ module Rumale
|
|
65
65
|
# If the metric is 'precomputed', x must be a square distance matrix (shape: [n_samples, n_samples]).
|
66
66
|
# @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
|
67
67
|
def fit_predict(x)
|
68
|
-
|
68
|
+
x = check_convert_sample_array(x)
|
69
69
|
raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
|
70
70
|
distance_mat = @params[:metric] == 'precomputed' ? x : Rumale::PairwiseMetric.euclidean_distance(x)
|
71
71
|
@labels = partial_fit(distance_mat)
|
@@ -36,10 +36,9 @@ module Rumale
|
|
36
36
|
# @param tol [Float] The tolerance of termination criterion.
|
37
37
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
38
38
|
def initialize(n_clusters: 8, init: 'k-means++', max_iter: 50, tol: 1.0e-4, random_seed: nil)
|
39
|
-
|
40
|
-
check_params_float(tol: tol)
|
39
|
+
check_params_numeric(n_clusters: n_clusters, max_iter: max_iter, tol: tol)
|
41
40
|
check_params_string(init: init)
|
42
|
-
|
41
|
+
check_params_numeric_or_nil(random_seed: random_seed)
|
43
42
|
check_params_positive(n_clusters: n_clusters, max_iter: max_iter)
|
44
43
|
@params = {}
|
45
44
|
@params[:n_clusters] = n_clusters
|
@@ -59,7 +58,7 @@ module Rumale
|
|
59
58
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for cluster analysis.
|
60
59
|
# @return [KMeans] The learned cluster analyzer itself.
|
61
60
|
def fit(x, _y = nil)
|
62
|
-
|
61
|
+
x = check_convert_sample_array(x)
|
63
62
|
init_cluster_centers(x)
|
64
63
|
@params[:max_iter].times do |_t|
|
65
64
|
cluster_labels = assign_cluster(x)
|
@@ -79,7 +78,7 @@ module Rumale
|
|
79
78
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the cluster label.
|
80
79
|
# @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
|
81
80
|
def predict(x)
|
82
|
-
|
81
|
+
x = check_convert_sample_array(x)
|
83
82
|
assign_cluster(x)
|
84
83
|
end
|
85
84
|
|
@@ -88,7 +87,7 @@ module Rumale
|
|
88
87
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for cluster analysis.
|
89
88
|
# @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
|
90
89
|
def fit_predict(x)
|
91
|
-
|
90
|
+
x = check_convert_sample_array(x)
|
92
91
|
fit(x)
|
93
92
|
predict(x)
|
94
93
|
end
|
@@ -37,10 +37,9 @@ module Rumale
|
|
37
37
|
# @param tol [Float] The tolerance of termination criterion.
|
38
38
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
39
39
|
def initialize(n_clusters: 8, metric: 'euclidean', init: 'k-means++', max_iter: 50, tol: 1.0e-4, random_seed: nil)
|
40
|
-
|
41
|
-
check_params_float(tol: tol)
|
40
|
+
check_params_numeric(n_clusters: n_clusters, max_iter: max_iter, tol: tol)
|
42
41
|
check_params_string(metric: metric, init: init)
|
43
|
-
|
42
|
+
check_params_numeric_or_nil(random_seed: random_seed)
|
44
43
|
check_params_positive(n_clusters: n_clusters, max_iter: max_iter)
|
45
44
|
@params = {}
|
46
45
|
@params[:n_clusters] = n_clusters
|
@@ -63,7 +62,7 @@ module Rumale
|
|
63
62
|
# If the metric is 'precomputed', x must be a square distance matrix (shape: [n_samples, n_samples]).
|
64
63
|
# @return [KMedoids] The learned cluster analyzer itself.
|
65
64
|
def fit(x, _not_used = nil)
|
66
|
-
|
65
|
+
x = check_convert_sample_array(x)
|
67
66
|
raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
|
68
67
|
# initialize some varibales.
|
69
68
|
distance_mat = @params[:metric] == 'precomputed' ? x : Rumale::PairwiseMetric.euclidean_distance(x)
|
@@ -89,7 +88,7 @@ module Rumale
|
|
89
88
|
# If the metric is 'precomputed', x must be distances between samples and medoids (shape: [n_samples, n_clusters]).
|
90
89
|
# @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
|
91
90
|
def predict(x)
|
92
|
-
|
91
|
+
x = check_convert_sample_array(x)
|
93
92
|
distance_mat = @params[:metric] == 'precomputed' ? x : Rumale::PairwiseMetric.euclidean_distance(x, @cluster_centers)
|
94
93
|
if @params[:metric] == 'precomputed' && distance_mat.shape[1] != @medoid_ids.size
|
95
94
|
raise ArgumentError, 'Expect the size input matrix to be n_samples-by-n_clusters.'
|
@@ -103,7 +102,7 @@ module Rumale
|
|
103
102
|
# If the metric is 'precomputed', x must be a square distance matrix (shape: [n_samples, n_samples]).
|
104
103
|
# @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
|
105
104
|
def fit_predict(x)
|
106
|
-
|
105
|
+
x = check_convert_sample_array(x)
|
107
106
|
fit(x)
|
108
107
|
if @params[:metric] == 'precomputed'
|
109
108
|
predict(x[true, @medoid_ids])
|
@@ -42,11 +42,9 @@ module Rumale
|
|
42
42
|
# @param eps [Float] A small value close to zero to avoid zero division error.
|
43
43
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
44
44
|
def initialize(n_clusters: 8, affinity: 'rbf', gamma: nil, init: 'k-means++', max_iter: 1000, tol: 1.0e-8, eps: 1.0e-5, random_seed: nil)
|
45
|
-
|
46
|
-
|
45
|
+
check_params_numeric(n_clusters: n_clusters, max_iter: max_iter, tol: tol, eps: eps)
|
46
|
+
check_params_numeric_or_nil(gamma: gamma, random_seed: random_seed)
|
47
47
|
check_params_string(affinity: affinity, init: init)
|
48
|
-
check_params_type_or_nil(Float, gamma: gamma)
|
49
|
-
check_params_type_or_nil(Integer, random_seed: random_seed)
|
50
48
|
check_params_positive(n_clusters: n_clusters, max_iter: max_iter, tol: tol, eps: eps)
|
51
49
|
@params = {}
|
52
50
|
@params[:n_clusters] = n_clusters
|
@@ -71,7 +69,7 @@ module Rumale
|
|
71
69
|
# If the metric is 'precomputed', x must be a square affinity matrix (shape: [n_samples, n_samples]).
|
72
70
|
# @return [PowerIteration] The learned cluster analyzer itself.
|
73
71
|
def fit(x, _y = nil)
|
74
|
-
|
72
|
+
x = check_convert_sample_array(x)
|
75
73
|
raise ArgumentError, 'Expect the input affinity matrix to be square.' if @params[:affinity] == 'precomputed' && x.shape[0] != x.shape[1]
|
76
74
|
fit_predict(x)
|
77
75
|
self
|
@@ -83,7 +81,7 @@ module Rumale
|
|
83
81
|
# If the metric is 'precomputed', x must be a square affinity matrix (shape: [n_samples, n_samples]).
|
84
82
|
# @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
|
85
83
|
def fit_predict(x)
|
86
|
-
|
84
|
+
x = check_convert_sample_array(x)
|
87
85
|
raise ArgumentError, 'Expect the input affinity matrix to be square.' if @params[:affinity] == 'precomputed' && x.shape[0] != x.shape[1]
|
88
86
|
|
89
87
|
affinity_mat = @params[:metric] == 'precomputed' ? x : Rumale::PairwiseMetric.rbf_kernel(x, nil, @params[:gamma])
|
@@ -35,7 +35,7 @@ module Rumale
|
|
35
35
|
# If metric is 'euclidean', Euclidean distance is calculated for distance between points.
|
36
36
|
# If metric is 'precomputed', the fit and fit_transform methods expect to be given a distance matrix.
|
37
37
|
def initialize(n_clusters: 2, metric: 'euclidean')
|
38
|
-
|
38
|
+
check_params_numeric(n_clusters: n_clusters)
|
39
39
|
check_params_string(metric: metric)
|
40
40
|
@params = {}
|
41
41
|
@params[:n_clusters] = n_clusters
|
@@ -52,7 +52,7 @@ module Rumale
|
|
52
52
|
# If the metric is 'precomputed', x must be a square distance matrix (shape: [n_samples, n_samples]).
|
53
53
|
# @return [SingleLinkage] The learned cluster analyzer itself.
|
54
54
|
def fit(x, _y = nil)
|
55
|
-
|
55
|
+
x = check_convert_sample_array(x)
|
56
56
|
raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
|
57
57
|
fit_predict(x)
|
58
58
|
self
|
@@ -64,7 +64,7 @@ module Rumale
|
|
64
64
|
# If the metric is 'precomputed', x must be a square distance matrix (shape: [n_samples, n_samples]).
|
65
65
|
# @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
|
66
66
|
def fit_predict(x)
|
67
|
-
|
67
|
+
x = check_convert_sample_array(x)
|
68
68
|
raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
|
69
69
|
distance_mat = @params[:metric] == 'precomputed' ? x : Rumale::PairwiseMetric.euclidean_distance(x)
|
70
70
|
@labels = partial_fit(distance_mat)
|
@@ -25,7 +25,7 @@ module Rumale
|
|
25
25
|
# If metric is 'euclidean', Euclidean distance is calculated for distance between points.
|
26
26
|
# If metric is 'precomputed', the fit and fit_transform methods expect to be given a distance matrix.
|
27
27
|
def initialize(n_neighbors: 10, eps: 5, min_samples: 5, metric: 'euclidean')
|
28
|
-
|
28
|
+
check_params_numeric(n_neighbors: n_neighbors, min_samples: min_samples)
|
29
29
|
check_params_string(metric: metric)
|
30
30
|
@params = {}
|
31
31
|
@params[:n_neighbors] = n_neighbors
|
@@ -42,11 +42,9 @@ module Rumale
|
|
42
42
|
# @param tol [Float] The tolerance of termination criterion for K-Means clustering.
|
43
43
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
44
44
|
def initialize(n_clusters: 2, affinity: 'rbf', gamma: nil, init: 'k-means++', max_iter: 10, tol: 1.0e-8, random_seed: nil)
|
45
|
-
|
46
|
-
|
45
|
+
check_params_numeric(n_clusters: n_clusters, max_iter: max_iter, tol: tol)
|
46
|
+
check_params_numeric_or_nil(gamma: gamma, random_seed: random_seed)
|
47
47
|
check_params_string(affinity: affinity, init: init)
|
48
|
-
check_params_type_or_nil(Float, gamma: gamma)
|
49
|
-
check_params_type_or_nil(Integer, random_seed: random_seed)
|
50
48
|
check_params_positive(n_clusters: n_clusters, max_iter: max_iter, tol: tol)
|
51
49
|
@params = {}
|
52
50
|
@params[:n_clusters] = n_clusters
|
@@ -69,7 +67,7 @@ module Rumale
|
|
69
67
|
# If the metric is 'precomputed', x must be a square affinity matrix (shape: [n_samples, n_samples]).
|
70
68
|
# @return [SpectralClustering] The learned cluster analyzer itself.
|
71
69
|
def fit(x, _y = nil)
|
72
|
-
|
70
|
+
x = check_convert_sample_array(x)
|
73
71
|
raise ArgumentError, 'Expect the input affinity matrix to be square.' if @params[:affinity] == 'precomputed' && x.shape[0] != x.shape[1]
|
74
72
|
raise 'SpectralClustering#fit requires Numo::Linalg but that is not loaded.' unless enable_linalg?
|
75
73
|
|
@@ -84,7 +82,7 @@ module Rumale
|
|
84
82
|
# If the metric is 'precomputed', x must be a square affinity matrix (shape: [n_samples, n_samples]).
|
85
83
|
# @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
|
86
84
|
def fit_predict(x)
|
87
|
-
|
85
|
+
x = check_convert_sample_array(x)
|
88
86
|
raise ArgumentError, 'Expect the input affinity matrix to be square.' if @params[:affinity] == 'precomputed' && x.shape[0] != x.shape[1]
|
89
87
|
raise 'SpectralClustering#fit_predict requires Numo::Linalg but that is not loaded.' unless enable_linalg?
|
90
88
|
|
data/lib/rumale/dataset.rb
CHANGED
@@ -60,11 +60,9 @@ module Rumale
|
|
60
60
|
# @param factor [Float] The scale factor between inner and outer circles. The interval of factor is (0, 1).
|
61
61
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
62
62
|
def make_circles(n_samples, shuffle: true, noise: nil, factor: 0.8, random_seed: nil)
|
63
|
-
Rumale::Validation.
|
63
|
+
Rumale::Validation.check_params_numeric(n_samples: n_samples, factor: factor)
|
64
64
|
Rumale::Validation.check_params_boolean(shuffle: shuffle)
|
65
|
-
Rumale::Validation.
|
66
|
-
Rumale::Validation.check_params_float(factor: factor)
|
67
|
-
Rumale::Validation.check_params_type_or_nil(Integer, random_seed: random_seed)
|
65
|
+
Rumale::Validation.check_params_numeric_or_nil(noise: noise, random_seed: random_seed)
|
68
66
|
raise ArgumentError, 'The number of samples must be more than 2.' if n_samples <= 1
|
69
67
|
raise RangeError, 'The interval of factor is (0, 1).' if factor <= 0 || factor >= 1
|
70
68
|
# initialize some variables.
|
@@ -99,10 +97,9 @@ module Rumale
|
|
99
97
|
# If nil is given, no noise is added.
|
100
98
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
101
99
|
def make_moons(n_samples, shuffle: true, noise: nil, random_seed: nil)
|
102
|
-
Rumale::Validation.
|
100
|
+
Rumale::Validation.check_params_numeric(n_samples: n_samples)
|
103
101
|
Rumale::Validation.check_params_boolean(shuffle: shuffle)
|
104
|
-
Rumale::Validation.
|
105
|
-
Rumale::Validation.check_params_type_or_nil(Integer, random_seed: random_seed)
|
102
|
+
Rumale::Validation.check_params_numeric_or_nil(noise: noise, random_seed: random_seed)
|
106
103
|
raise ArgumentError, 'The number of samples must be more than 2.' if n_samples <= 1
|
107
104
|
# initialize some variables.
|
108
105
|
rs = random_seed
|
@@ -142,11 +139,10 @@ module Rumale
|
|
142
139
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
143
140
|
def make_blobs(n_samples = 1000, n_features = 2,
|
144
141
|
centers: nil, cluster_std: 1.0, center_box: [-10, 10], shuffle: true, random_seed: nil)
|
145
|
-
Rumale::Validation.
|
146
|
-
Rumale::Validation.check_params_float(cluster_std: cluster_std)
|
142
|
+
Rumale::Validation.check_params_numeric(n_samples: n_samples, n_features: n_features, cluster_std: cluster_std)
|
147
143
|
Rumale::Validation.check_params_type(Array, center_box: center_box)
|
148
144
|
Rumale::Validation.check_params_boolean(shuffle: shuffle)
|
149
|
-
Rumale::Validation.
|
145
|
+
Rumale::Validation.check_params_numeric_or_nil(random_seed: random_seed)
|
150
146
|
# initialize rng.
|
151
147
|
rs = random_seed
|
152
148
|
rs ||= srand
|