rumale 0.13.8 → 0.14.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +24 -0
- data/README.md +8 -10
- data/lib/rumale.rb +3 -0
- data/lib/rumale/base/classifier.rb +2 -2
- data/lib/rumale/base/cluster_analyzer.rb +2 -2
- data/lib/rumale/base/regressor.rb +2 -2
- data/lib/rumale/clustering/dbscan.rb +3 -4
- data/lib/rumale/clustering/gaussian_mixture.rb +5 -6
- data/lib/rumale/clustering/hdbscan.rb +4 -4
- data/lib/rumale/clustering/k_means.rb +5 -6
- data/lib/rumale/clustering/k_medoids.rb +5 -6
- data/lib/rumale/clustering/power_iteration.rb +4 -6
- data/lib/rumale/clustering/single_linkage.rb +3 -3
- data/lib/rumale/clustering/snn.rb +1 -1
- data/lib/rumale/clustering/spectral_clustering.rb +4 -6
- data/lib/rumale/dataset.rb +6 -10
- data/lib/rumale/decomposition/factor_analysis.rb +4 -4
- data/lib/rumale/decomposition/fast_ica.rb +6 -7
- data/lib/rumale/decomposition/nmf.rb +6 -7
- data/lib/rumale/decomposition/pca.rb +6 -7
- data/lib/rumale/ensemble/ada_boost_classifier.rb +8 -8
- data/lib/rumale/ensemble/ada_boost_regressor.rb +7 -7
- data/lib/rumale/ensemble/extra_trees_classifier.rb +8 -8
- data/lib/rumale/ensemble/extra_trees_regressor.rb +7 -7
- data/lib/rumale/ensemble/gradient_boosting_classifier.rb +8 -8
- data/lib/rumale/ensemble/gradient_boosting_regressor.rb +8 -8
- data/lib/rumale/ensemble/random_forest_classifier.rb +8 -8
- data/lib/rumale/ensemble/random_forest_regressor.rb +7 -7
- data/lib/rumale/evaluation_measure/accuracy.rb +2 -2
- data/lib/rumale/evaluation_measure/adjusted_rand_score.rb +2 -2
- data/lib/rumale/evaluation_measure/calinski_harabasz_score.rb +2 -2
- data/lib/rumale/evaluation_measure/davies_bouldin_score.rb +2 -2
- data/lib/rumale/evaluation_measure/explained_variance_score.rb +2 -2
- data/lib/rumale/evaluation_measure/f_score.rb +2 -2
- data/lib/rumale/evaluation_measure/log_loss.rb +2 -2
- data/lib/rumale/evaluation_measure/mean_absolute_error.rb +2 -2
- data/lib/rumale/evaluation_measure/mean_squared_error.rb +2 -2
- data/lib/rumale/evaluation_measure/mean_squared_log_error.rb +2 -2
- data/lib/rumale/evaluation_measure/median_absolute_error.rb +2 -2
- data/lib/rumale/evaluation_measure/mutual_information.rb +2 -2
- data/lib/rumale/evaluation_measure/normalized_mutual_information.rb +2 -2
- data/lib/rumale/evaluation_measure/precision.rb +2 -2
- data/lib/rumale/evaluation_measure/purity.rb +2 -2
- data/lib/rumale/evaluation_measure/r2_score.rb +2 -2
- data/lib/rumale/evaluation_measure/recall.rb +2 -2
- data/lib/rumale/evaluation_measure/roc_auc.rb +6 -3
- data/lib/rumale/evaluation_measure/silhouette_score.rb +2 -2
- data/lib/rumale/kernel_approximation/rbf.rb +5 -6
- data/lib/rumale/kernel_machine/kernel_pca.rb +4 -4
- data/lib/rumale/kernel_machine/kernel_ridge.rb +3 -3
- data/lib/rumale/kernel_machine/kernel_svc.rb +7 -8
- data/lib/rumale/linear_model/lasso.rb +5 -6
- data/lib/rumale/linear_model/linear_regression.rb +5 -6
- data/lib/rumale/linear_model/logistic_regression.rb +16 -15
- data/lib/rumale/linear_model/ridge.rb +5 -6
- data/lib/rumale/linear_model/svc.rb +34 -28
- data/lib/rumale/linear_model/svr.rb +5 -6
- data/lib/rumale/manifold/mds.rb +3 -4
- data/lib/rumale/manifold/tsne.rb +3 -5
- data/lib/rumale/model_selection/cross_validation.rb +6 -5
- data/lib/rumale/model_selection/grid_search_cv.rb +6 -6
- data/lib/rumale/model_selection/k_fold.rb +3 -3
- data/lib/rumale/model_selection/shuffle_split.rb +3 -5
- data/lib/rumale/model_selection/stratified_k_fold.rb +4 -4
- data/lib/rumale/model_selection/stratified_shuffle_split.rb +4 -6
- data/lib/rumale/multiclass/one_vs_rest_classifier.rb +4 -4
- data/lib/rumale/naive_bayes/naive_bayes.rb +14 -14
- data/lib/rumale/nearest_neighbors/k_neighbors_classifier.rb +5 -5
- data/lib/rumale/nearest_neighbors/k_neighbors_regressor.rb +4 -4
- data/lib/rumale/neural_network/base_mlp.rb +244 -0
- data/lib/rumale/neural_network/mlp_classifier.rb +119 -0
- data/lib/rumale/neural_network/mlp_regressor.rb +89 -0
- data/lib/rumale/optimizer/ada_grad.rb +1 -1
- data/lib/rumale/optimizer/adam.rb +3 -3
- data/lib/rumale/optimizer/nadam.rb +1 -1
- data/lib/rumale/optimizer/rmsprop.rb +1 -1
- data/lib/rumale/optimizer/sgd.rb +1 -1
- data/lib/rumale/optimizer/yellow_fin.rb +1 -2
- data/lib/rumale/pairwise_metric.rb +17 -19
- data/lib/rumale/pipeline/pipeline.rb +10 -10
- data/lib/rumale/polynomial_model/factorization_machine_classifier.rb +29 -21
- data/lib/rumale/polynomial_model/factorization_machine_regressor.rb +6 -6
- data/lib/rumale/preprocessing/bin_discretizer.rb +3 -3
- data/lib/rumale/preprocessing/l2_normalizer.rb +2 -2
- data/lib/rumale/preprocessing/label_binarizer.rb +2 -2
- data/lib/rumale/preprocessing/label_encoder.rb +1 -1
- data/lib/rumale/preprocessing/max_abs_scaler.rb +3 -3
- data/lib/rumale/preprocessing/min_max_scaler.rb +3 -3
- data/lib/rumale/preprocessing/one_hot_encoder.rb +4 -3
- data/lib/rumale/preprocessing/ordinal_encoder.rb +1 -1
- data/lib/rumale/preprocessing/standard_scaler.rb +3 -3
- data/lib/rumale/tree/base_decision_tree.rb +1 -1
- data/lib/rumale/tree/decision_tree_classifier.rb +7 -7
- data/lib/rumale/tree/decision_tree_regressor.rb +6 -6
- data/lib/rumale/tree/extra_tree_classifier.rb +7 -7
- data/lib/rumale/tree/extra_tree_regressor.rb +6 -6
- data/lib/rumale/tree/gradient_tree_regressor.rb +9 -9
- data/lib/rumale/validation.rb +32 -2
- data/lib/rumale/version.rb +1 -1
- data/rumale.gemspec +7 -7
- metadata +11 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4770a703a1c19b899b495006c36c19db5edf6fb1
|
4
|
+
data.tar.gz: 7a5493680c41332162c4434b6b5c1d0a07c1b456
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9c569cfef32fe2a161c2a1350b516f1dde3dfd72109fbc442118a91162c8193ede68a21dd3f2446d0e2b00d203800375c5594c9a9563867ac9fbe4509a3f8e88
|
7
|
+
data.tar.gz: 8f15681e92c08859745dd8f93acdfedf09b31e321dc5105196f73f161c449a33aeded9f6ff4483c0061044828f69ad5a7aeba7a884443ab163457011d6f5dd46
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,27 @@
|
|
1
|
+
# 0.14.0
|
2
|
+
- Add classifier and regressor class with multi-layer perceptron.
|
3
|
+
- [MLPClassifier](https://yoshoku.github.io/rumale/doc/Rumale/NeuralNetwork/MLPClassifier.html)
|
4
|
+
- [MLPRegressor](https://yoshoku.github.io/rumale/doc/Rumale/NeuralNetwork/MLPRegressor.html)
|
5
|
+
- Refactor specs.
|
6
|
+
|
7
|
+
## Breaking changes
|
8
|
+
- Change predict method of SVC, LogisticRegression, and FactorizationMachineClassifier classes to return the original label instead of -1 or 1 labels when binary classification problem.
|
9
|
+
- Fix hyperparameter validation to check if the type of given value is Numeric type.
|
10
|
+
- Fix array validation for samples, labels, and target values to accept Ruby Array.
|
11
|
+
|
12
|
+
```ruby
|
13
|
+
require 'rumale'
|
14
|
+
|
15
|
+
samples = [[-1, 1], [1, 1], [1, -1], [-1, -1]]
|
16
|
+
labels = [0, 1, 1, 0]
|
17
|
+
|
18
|
+
svc = Rumale::LinearModel::SVC.new(reg_param: 1, batch_size: 1, random_seed: 1)
|
19
|
+
svc.fit(samples, labels)
|
20
|
+
svc.predict([[-1, 0], [1, 0]])
|
21
|
+
# => Numo::Int32#shape=[2]
|
22
|
+
# [0, 1]
|
23
|
+
```
|
24
|
+
|
1
25
|
# 0.13.8
|
2
26
|
- Add [module function](https://yoshoku.github.io/rumale/doc/Rumale/Dataset.html#make_blobs-class_method) for generating artificial dataset with gaussian blobs.
|
3
27
|
- Add documents about Rumale::SVM.
|
data/README.md
CHANGED
@@ -10,11 +10,12 @@
|
|
10
10
|
|
11
11
|
Rumale (**Ru**by **ma**chine **le**arning) is a machine learning library in Ruby.
|
12
12
|
Rumale provides machine learning algorithms with interfaces similar to Scikit-Learn in Python.
|
13
|
-
Rumale supports
|
14
|
-
Logistic Regression,
|
15
|
-
|
16
|
-
|
17
|
-
|
13
|
+
Rumale supports Support Vector Machine,
|
14
|
+
Logistic Regression, Ridge, Lasso, Factorization Machine,
|
15
|
+
Multi-layer Perceptron,
|
16
|
+
Naive Bayes, Decision Tree, Gradient Tree Boosting, Random Forest,
|
17
|
+
K-Means, Gaussian Mixture Model, DBSCAN, Spectral Clustering,
|
18
|
+
Mutidimensional Scaling, t-SNE, Principal Component Analysis, and Non-negative Matrix Factorization.
|
18
19
|
|
19
20
|
This project was formerly known as "SVMKit".
|
20
21
|
If you are using SVMKit, please install Rumale and replace `SVMKit` constants with `Rumale`.
|
@@ -39,7 +40,6 @@ Or install it yourself as:
|
|
39
40
|
|
40
41
|
### Example 1. XOR data
|
41
42
|
First, let's classify simple xor data.
|
42
|
-
In Rumale, feature vectors and labels are represented by [Numo::NArray](https://github.com/ruby-numo/numo-narray).
|
43
43
|
|
44
44
|
```ruby
|
45
45
|
require 'rumale'
|
@@ -48,10 +48,6 @@ require 'rumale'
|
|
48
48
|
features = [[0, 0], [0, 1], [1, 0], [1, 1]]
|
49
49
|
labels = [0, 1, 1, 0]
|
50
50
|
|
51
|
-
# Convert Ruby Array into Numo::NArray.
|
52
|
-
x = Numo::DFloat.asarray(features)
|
53
|
-
y = Numo::Int32.asarray(labels)
|
54
|
-
|
55
51
|
# Train classifier with nearest neighbor rule.
|
56
52
|
estimator = Rumale::NearestNeighbors::KNeighborsClassifier.new(n_neighbors: 1)
|
57
53
|
estimator.fit(x, y)
|
@@ -72,6 +68,8 @@ Numo::Int32#shape=[4]
|
|
72
68
|
|
73
69
|
The basic usage of Rumale is to first train the model with the fit method
|
74
70
|
and then estimate with the predict method.
|
71
|
+
In addition, Rumale recommends using arrays such as feature vectors and labels with
|
72
|
+
[Numo::NArray](https://github.com/ruby-numo/numo-narray).
|
75
73
|
|
76
74
|
### Example 2. Pendigits dataset classification
|
77
75
|
|
data/lib/rumale.rb
CHANGED
@@ -73,6 +73,9 @@ require 'rumale/decomposition/factor_analysis'
|
|
73
73
|
require 'rumale/decomposition/fast_ica'
|
74
74
|
require 'rumale/manifold/tsne'
|
75
75
|
require 'rumale/manifold/mds'
|
76
|
+
require 'rumale/neural_network/base_mlp'
|
77
|
+
require 'rumale/neural_network/mlp_regressor'
|
78
|
+
require 'rumale/neural_network/mlp_classifier'
|
76
79
|
require 'rumale/preprocessing/l2_normalizer'
|
77
80
|
require 'rumale/preprocessing/min_max_scaler'
|
78
81
|
require 'rumale/preprocessing/max_abs_scaler'
|
@@ -25,8 +25,8 @@ module Rumale
|
|
25
25
|
# @param y [Numo::Int32] (shape: [n_samples]) True labels for testing data.
|
26
26
|
# @return [Float] Mean accuracy
|
27
27
|
def score(x, y)
|
28
|
-
|
29
|
-
|
28
|
+
x = check_convert_sample_array(x)
|
29
|
+
y = check_convert_label_array(y)
|
30
30
|
check_sample_label_size(x, y)
|
31
31
|
evaluator = Rumale::EvaluationMeasure::Accuracy.new
|
32
32
|
evaluator.score(y, predict(x))
|
@@ -20,8 +20,8 @@ module Rumale
|
|
20
20
|
# @param y [Numo::Int32] (shape: [n_samples]) True labels for testing data.
|
21
21
|
# @return [Float] Purity
|
22
22
|
def score(x, y)
|
23
|
-
|
24
|
-
|
23
|
+
x = check_convert_sample_array(x)
|
24
|
+
y = check_convert_label_array(y)
|
25
25
|
check_sample_label_size(x, y)
|
26
26
|
evaluator = Rumale::EvaluationMeasure::Purity.new
|
27
27
|
evaluator.score(y, fit_predict(x))
|
@@ -25,8 +25,8 @@ module Rumale
|
|
25
25
|
# @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) Target values for testing data.
|
26
26
|
# @return [Float] Coefficient of determination
|
27
27
|
def score(x, y)
|
28
|
-
|
29
|
-
|
28
|
+
x = check_convert_sample_array(x)
|
29
|
+
y = check_convert_tvalue_array(y)
|
30
30
|
check_sample_tvalue_size(x, y)
|
31
31
|
evaluator = Rumale::EvaluationMeasure::R2Score.new
|
32
32
|
evaluator.score(y, predict(x))
|
@@ -34,8 +34,7 @@ module Rumale
|
|
34
34
|
# If metric is 'euclidean', Euclidean distance is calculated for distance between points.
|
35
35
|
# If metric is 'precomputed', the fit and fit_transform methods expect to be given a distance matrix.
|
36
36
|
def initialize(eps: 0.5, min_samples: 5, metric: 'euclidean')
|
37
|
-
|
38
|
-
check_params_integer(min_samples: min_samples)
|
37
|
+
check_params_numeric(eps: eps, min_samples: min_samples)
|
39
38
|
check_params_string(metric: metric)
|
40
39
|
@params = {}
|
41
40
|
@params[:eps] = eps
|
@@ -53,7 +52,7 @@ module Rumale
|
|
53
52
|
# If the metric is 'precomputed', x must be a square distance matrix (shape: [n_samples, n_samples]).
|
54
53
|
# @return [DBSCAN] The learned cluster analyzer itself.
|
55
54
|
def fit(x, _y = nil)
|
56
|
-
|
55
|
+
x = check_convert_sample_array(x)
|
57
56
|
raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
|
58
57
|
partial_fit(x)
|
59
58
|
self
|
@@ -65,7 +64,7 @@ module Rumale
|
|
65
64
|
# If the metric is 'precomputed', x must be a square distance matrix (shape: [n_samples, n_samples]).
|
66
65
|
# @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
|
67
66
|
def fit_predict(x)
|
68
|
-
|
67
|
+
x = check_convert_sample_array(x)
|
69
68
|
raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
|
70
69
|
partial_fit(x)
|
71
70
|
labels
|
@@ -47,10 +47,9 @@ module Rumale
|
|
47
47
|
# @param reg_covar [Float] The non-negative regularization to the diagonal of covariance.
|
48
48
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
49
49
|
def initialize(n_clusters: 8, init: 'k-means++', covariance_type: 'diag', max_iter: 50, tol: 1.0e-4, reg_covar: 1.0e-6, random_seed: nil)
|
50
|
-
|
51
|
-
check_params_float(tol: tol)
|
50
|
+
check_params_numeric(n_clusters: n_clusters, max_iter: max_iter, tol: tol)
|
52
51
|
check_params_string(init: init)
|
53
|
-
|
52
|
+
check_params_numeric_or_nil(random_seed: random_seed)
|
54
53
|
check_params_positive(n_clusters: n_clusters, max_iter: max_iter)
|
55
54
|
@params = {}
|
56
55
|
@params[:n_clusters] = n_clusters
|
@@ -74,7 +73,7 @@ module Rumale
|
|
74
73
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for cluster analysis.
|
75
74
|
# @return [GaussianMixture] The learned cluster analyzer itself.
|
76
75
|
def fit(x, _y = nil)
|
77
|
-
|
76
|
+
x = check_convert_sample_array(x)
|
78
77
|
check_enable_linalg('fit')
|
79
78
|
|
80
79
|
n_samples = x.shape[0]
|
@@ -97,7 +96,7 @@ module Rumale
|
|
97
96
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the cluster label.
|
98
97
|
# @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
|
99
98
|
def predict(x)
|
100
|
-
|
99
|
+
x = check_convert_sample_array(x)
|
101
100
|
check_enable_linalg('predict')
|
102
101
|
|
103
102
|
memberships = calc_memberships(x, @weights, @means, @covariances, @params[:covariance_type])
|
@@ -109,7 +108,7 @@ module Rumale
|
|
109
108
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for cluster analysis.
|
110
109
|
# @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
|
111
110
|
def fit_predict(x)
|
112
|
-
|
111
|
+
x = check_convert_sample_array(x)
|
113
112
|
check_enable_linalg('fit_predict')
|
114
113
|
|
115
114
|
fit(x).predict(x)
|
@@ -34,8 +34,8 @@ module Rumale
|
|
34
34
|
# If metric is 'euclidean', Euclidean distance is calculated for distance between points.
|
35
35
|
# If metric is 'precomputed', the fit and fit_transform methods expect to be given a distance matrix.
|
36
36
|
def initialize(min_samples: 10, min_cluster_size: nil, metric: 'euclidean')
|
37
|
-
|
38
|
-
|
37
|
+
check_params_numeric(min_samples: min_samples)
|
38
|
+
check_params_numeric_or_nil(min_cluster_size: min_cluster_size)
|
39
39
|
check_params_string(metric: metric)
|
40
40
|
check_params_positive(min_samples: min_samples)
|
41
41
|
@params = {}
|
@@ -53,7 +53,7 @@ module Rumale
|
|
53
53
|
# If the metric is 'precomputed', x must be a square distance matrix (shape: [n_samples, n_samples]).
|
54
54
|
# @return [HDBSCAN] The learned cluster analyzer itself.
|
55
55
|
def fit(x, _y = nil)
|
56
|
-
|
56
|
+
x = check_convert_sample_array(x)
|
57
57
|
raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
|
58
58
|
fit_predict(x)
|
59
59
|
self
|
@@ -65,7 +65,7 @@ module Rumale
|
|
65
65
|
# If the metric is 'precomputed', x must be a square distance matrix (shape: [n_samples, n_samples]).
|
66
66
|
# @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
|
67
67
|
def fit_predict(x)
|
68
|
-
|
68
|
+
x = check_convert_sample_array(x)
|
69
69
|
raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
|
70
70
|
distance_mat = @params[:metric] == 'precomputed' ? x : Rumale::PairwiseMetric.euclidean_distance(x)
|
71
71
|
@labels = partial_fit(distance_mat)
|
@@ -36,10 +36,9 @@ module Rumale
|
|
36
36
|
# @param tol [Float] The tolerance of termination criterion.
|
37
37
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
38
38
|
def initialize(n_clusters: 8, init: 'k-means++', max_iter: 50, tol: 1.0e-4, random_seed: nil)
|
39
|
-
|
40
|
-
check_params_float(tol: tol)
|
39
|
+
check_params_numeric(n_clusters: n_clusters, max_iter: max_iter, tol: tol)
|
41
40
|
check_params_string(init: init)
|
42
|
-
|
41
|
+
check_params_numeric_or_nil(random_seed: random_seed)
|
43
42
|
check_params_positive(n_clusters: n_clusters, max_iter: max_iter)
|
44
43
|
@params = {}
|
45
44
|
@params[:n_clusters] = n_clusters
|
@@ -59,7 +58,7 @@ module Rumale
|
|
59
58
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for cluster analysis.
|
60
59
|
# @return [KMeans] The learned cluster analyzer itself.
|
61
60
|
def fit(x, _y = nil)
|
62
|
-
|
61
|
+
x = check_convert_sample_array(x)
|
63
62
|
init_cluster_centers(x)
|
64
63
|
@params[:max_iter].times do |_t|
|
65
64
|
cluster_labels = assign_cluster(x)
|
@@ -79,7 +78,7 @@ module Rumale
|
|
79
78
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the cluster label.
|
80
79
|
# @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
|
81
80
|
def predict(x)
|
82
|
-
|
81
|
+
x = check_convert_sample_array(x)
|
83
82
|
assign_cluster(x)
|
84
83
|
end
|
85
84
|
|
@@ -88,7 +87,7 @@ module Rumale
|
|
88
87
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for cluster analysis.
|
89
88
|
# @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
|
90
89
|
def fit_predict(x)
|
91
|
-
|
90
|
+
x = check_convert_sample_array(x)
|
92
91
|
fit(x)
|
93
92
|
predict(x)
|
94
93
|
end
|
@@ -37,10 +37,9 @@ module Rumale
|
|
37
37
|
# @param tol [Float] The tolerance of termination criterion.
|
38
38
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
39
39
|
def initialize(n_clusters: 8, metric: 'euclidean', init: 'k-means++', max_iter: 50, tol: 1.0e-4, random_seed: nil)
|
40
|
-
|
41
|
-
check_params_float(tol: tol)
|
40
|
+
check_params_numeric(n_clusters: n_clusters, max_iter: max_iter, tol: tol)
|
42
41
|
check_params_string(metric: metric, init: init)
|
43
|
-
|
42
|
+
check_params_numeric_or_nil(random_seed: random_seed)
|
44
43
|
check_params_positive(n_clusters: n_clusters, max_iter: max_iter)
|
45
44
|
@params = {}
|
46
45
|
@params[:n_clusters] = n_clusters
|
@@ -63,7 +62,7 @@ module Rumale
|
|
63
62
|
# If the metric is 'precomputed', x must be a square distance matrix (shape: [n_samples, n_samples]).
|
64
63
|
# @return [KMedoids] The learned cluster analyzer itself.
|
65
64
|
def fit(x, _not_used = nil)
|
66
|
-
|
65
|
+
x = check_convert_sample_array(x)
|
67
66
|
raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
|
68
67
|
# initialize some varibales.
|
69
68
|
distance_mat = @params[:metric] == 'precomputed' ? x : Rumale::PairwiseMetric.euclidean_distance(x)
|
@@ -89,7 +88,7 @@ module Rumale
|
|
89
88
|
# If the metric is 'precomputed', x must be distances between samples and medoids (shape: [n_samples, n_clusters]).
|
90
89
|
# @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
|
91
90
|
def predict(x)
|
92
|
-
|
91
|
+
x = check_convert_sample_array(x)
|
93
92
|
distance_mat = @params[:metric] == 'precomputed' ? x : Rumale::PairwiseMetric.euclidean_distance(x, @cluster_centers)
|
94
93
|
if @params[:metric] == 'precomputed' && distance_mat.shape[1] != @medoid_ids.size
|
95
94
|
raise ArgumentError, 'Expect the size input matrix to be n_samples-by-n_clusters.'
|
@@ -103,7 +102,7 @@ module Rumale
|
|
103
102
|
# If the metric is 'precomputed', x must be a square distance matrix (shape: [n_samples, n_samples]).
|
104
103
|
# @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
|
105
104
|
def fit_predict(x)
|
106
|
-
|
105
|
+
x = check_convert_sample_array(x)
|
107
106
|
fit(x)
|
108
107
|
if @params[:metric] == 'precomputed'
|
109
108
|
predict(x[true, @medoid_ids])
|
@@ -42,11 +42,9 @@ module Rumale
|
|
42
42
|
# @param eps [Float] A small value close to zero to avoid zero division error.
|
43
43
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
44
44
|
def initialize(n_clusters: 8, affinity: 'rbf', gamma: nil, init: 'k-means++', max_iter: 1000, tol: 1.0e-8, eps: 1.0e-5, random_seed: nil)
|
45
|
-
|
46
|
-
|
45
|
+
check_params_numeric(n_clusters: n_clusters, max_iter: max_iter, tol: tol, eps: eps)
|
46
|
+
check_params_numeric_or_nil(gamma: gamma, random_seed: random_seed)
|
47
47
|
check_params_string(affinity: affinity, init: init)
|
48
|
-
check_params_type_or_nil(Float, gamma: gamma)
|
49
|
-
check_params_type_or_nil(Integer, random_seed: random_seed)
|
50
48
|
check_params_positive(n_clusters: n_clusters, max_iter: max_iter, tol: tol, eps: eps)
|
51
49
|
@params = {}
|
52
50
|
@params[:n_clusters] = n_clusters
|
@@ -71,7 +69,7 @@ module Rumale
|
|
71
69
|
# If the metric is 'precomputed', x must be a square affinity matrix (shape: [n_samples, n_samples]).
|
72
70
|
# @return [PowerIteration] The learned cluster analyzer itself.
|
73
71
|
def fit(x, _y = nil)
|
74
|
-
|
72
|
+
x = check_convert_sample_array(x)
|
75
73
|
raise ArgumentError, 'Expect the input affinity matrix to be square.' if @params[:affinity] == 'precomputed' && x.shape[0] != x.shape[1]
|
76
74
|
fit_predict(x)
|
77
75
|
self
|
@@ -83,7 +81,7 @@ module Rumale
|
|
83
81
|
# If the metric is 'precomputed', x must be a square affinity matrix (shape: [n_samples, n_samples]).
|
84
82
|
# @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
|
85
83
|
def fit_predict(x)
|
86
|
-
|
84
|
+
x = check_convert_sample_array(x)
|
87
85
|
raise ArgumentError, 'Expect the input affinity matrix to be square.' if @params[:affinity] == 'precomputed' && x.shape[0] != x.shape[1]
|
88
86
|
|
89
87
|
affinity_mat = @params[:metric] == 'precomputed' ? x : Rumale::PairwiseMetric.rbf_kernel(x, nil, @params[:gamma])
|
@@ -35,7 +35,7 @@ module Rumale
|
|
35
35
|
# If metric is 'euclidean', Euclidean distance is calculated for distance between points.
|
36
36
|
# If metric is 'precomputed', the fit and fit_transform methods expect to be given a distance matrix.
|
37
37
|
def initialize(n_clusters: 2, metric: 'euclidean')
|
38
|
-
|
38
|
+
check_params_numeric(n_clusters: n_clusters)
|
39
39
|
check_params_string(metric: metric)
|
40
40
|
@params = {}
|
41
41
|
@params[:n_clusters] = n_clusters
|
@@ -52,7 +52,7 @@ module Rumale
|
|
52
52
|
# If the metric is 'precomputed', x must be a square distance matrix (shape: [n_samples, n_samples]).
|
53
53
|
# @return [SingleLinkage] The learned cluster analyzer itself.
|
54
54
|
def fit(x, _y = nil)
|
55
|
-
|
55
|
+
x = check_convert_sample_array(x)
|
56
56
|
raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
|
57
57
|
fit_predict(x)
|
58
58
|
self
|
@@ -64,7 +64,7 @@ module Rumale
|
|
64
64
|
# If the metric is 'precomputed', x must be a square distance matrix (shape: [n_samples, n_samples]).
|
65
65
|
# @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
|
66
66
|
def fit_predict(x)
|
67
|
-
|
67
|
+
x = check_convert_sample_array(x)
|
68
68
|
raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
|
69
69
|
distance_mat = @params[:metric] == 'precomputed' ? x : Rumale::PairwiseMetric.euclidean_distance(x)
|
70
70
|
@labels = partial_fit(distance_mat)
|
@@ -25,7 +25,7 @@ module Rumale
|
|
25
25
|
# If metric is 'euclidean', Euclidean distance is calculated for distance between points.
|
26
26
|
# If metric is 'precomputed', the fit and fit_transform methods expect to be given a distance matrix.
|
27
27
|
def initialize(n_neighbors: 10, eps: 5, min_samples: 5, metric: 'euclidean')
|
28
|
-
|
28
|
+
check_params_numeric(n_neighbors: n_neighbors, min_samples: min_samples)
|
29
29
|
check_params_string(metric: metric)
|
30
30
|
@params = {}
|
31
31
|
@params[:n_neighbors] = n_neighbors
|
@@ -42,11 +42,9 @@ module Rumale
|
|
42
42
|
# @param tol [Float] The tolerance of termination criterion for K-Means clustering.
|
43
43
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
44
44
|
def initialize(n_clusters: 2, affinity: 'rbf', gamma: nil, init: 'k-means++', max_iter: 10, tol: 1.0e-8, random_seed: nil)
|
45
|
-
|
46
|
-
|
45
|
+
check_params_numeric(n_clusters: n_clusters, max_iter: max_iter, tol: tol)
|
46
|
+
check_params_numeric_or_nil(gamma: gamma, random_seed: random_seed)
|
47
47
|
check_params_string(affinity: affinity, init: init)
|
48
|
-
check_params_type_or_nil(Float, gamma: gamma)
|
49
|
-
check_params_type_or_nil(Integer, random_seed: random_seed)
|
50
48
|
check_params_positive(n_clusters: n_clusters, max_iter: max_iter, tol: tol)
|
51
49
|
@params = {}
|
52
50
|
@params[:n_clusters] = n_clusters
|
@@ -69,7 +67,7 @@ module Rumale
|
|
69
67
|
# If the metric is 'precomputed', x must be a square affinity matrix (shape: [n_samples, n_samples]).
|
70
68
|
# @return [SpectralClustering] The learned cluster analyzer itself.
|
71
69
|
def fit(x, _y = nil)
|
72
|
-
|
70
|
+
x = check_convert_sample_array(x)
|
73
71
|
raise ArgumentError, 'Expect the input affinity matrix to be square.' if @params[:affinity] == 'precomputed' && x.shape[0] != x.shape[1]
|
74
72
|
raise 'SpectralClustering#fit requires Numo::Linalg but that is not loaded.' unless enable_linalg?
|
75
73
|
|
@@ -84,7 +82,7 @@ module Rumale
|
|
84
82
|
# If the metric is 'precomputed', x must be a square affinity matrix (shape: [n_samples, n_samples]).
|
85
83
|
# @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
|
86
84
|
def fit_predict(x)
|
87
|
-
|
85
|
+
x = check_convert_sample_array(x)
|
88
86
|
raise ArgumentError, 'Expect the input affinity matrix to be square.' if @params[:affinity] == 'precomputed' && x.shape[0] != x.shape[1]
|
89
87
|
raise 'SpectralClustering#fit_predict requires Numo::Linalg but that is not loaded.' unless enable_linalg?
|
90
88
|
|
data/lib/rumale/dataset.rb
CHANGED
@@ -60,11 +60,9 @@ module Rumale
|
|
60
60
|
# @param factor [Float] The scale factor between inner and outer circles. The interval of factor is (0, 1).
|
61
61
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
62
62
|
def make_circles(n_samples, shuffle: true, noise: nil, factor: 0.8, random_seed: nil)
|
63
|
-
Rumale::Validation.
|
63
|
+
Rumale::Validation.check_params_numeric(n_samples: n_samples, factor: factor)
|
64
64
|
Rumale::Validation.check_params_boolean(shuffle: shuffle)
|
65
|
-
Rumale::Validation.
|
66
|
-
Rumale::Validation.check_params_float(factor: factor)
|
67
|
-
Rumale::Validation.check_params_type_or_nil(Integer, random_seed: random_seed)
|
65
|
+
Rumale::Validation.check_params_numeric_or_nil(noise: noise, random_seed: random_seed)
|
68
66
|
raise ArgumentError, 'The number of samples must be more than 2.' if n_samples <= 1
|
69
67
|
raise RangeError, 'The interval of factor is (0, 1).' if factor <= 0 || factor >= 1
|
70
68
|
# initialize some variables.
|
@@ -99,10 +97,9 @@ module Rumale
|
|
99
97
|
# If nil is given, no noise is added.
|
100
98
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
101
99
|
def make_moons(n_samples, shuffle: true, noise: nil, random_seed: nil)
|
102
|
-
Rumale::Validation.
|
100
|
+
Rumale::Validation.check_params_numeric(n_samples: n_samples)
|
103
101
|
Rumale::Validation.check_params_boolean(shuffle: shuffle)
|
104
|
-
Rumale::Validation.
|
105
|
-
Rumale::Validation.check_params_type_or_nil(Integer, random_seed: random_seed)
|
102
|
+
Rumale::Validation.check_params_numeric_or_nil(noise: noise, random_seed: random_seed)
|
106
103
|
raise ArgumentError, 'The number of samples must be more than 2.' if n_samples <= 1
|
107
104
|
# initialize some variables.
|
108
105
|
rs = random_seed
|
@@ -142,11 +139,10 @@ module Rumale
|
|
142
139
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
143
140
|
def make_blobs(n_samples = 1000, n_features = 2,
|
144
141
|
centers: nil, cluster_std: 1.0, center_box: [-10, 10], shuffle: true, random_seed: nil)
|
145
|
-
Rumale::Validation.
|
146
|
-
Rumale::Validation.check_params_float(cluster_std: cluster_std)
|
142
|
+
Rumale::Validation.check_params_numeric(n_samples: n_samples, n_features: n_features, cluster_std: cluster_std)
|
147
143
|
Rumale::Validation.check_params_type(Array, center_box: center_box)
|
148
144
|
Rumale::Validation.check_params_boolean(shuffle: shuffle)
|
149
|
-
Rumale::Validation.
|
145
|
+
Rumale::Validation.check_params_numeric_or_nil(random_seed: random_seed)
|
150
146
|
# initialize rng.
|
151
147
|
rs = random_seed
|
152
148
|
rs ||= srand
|