rumale 0.23.3 → 0.24.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENSE.txt +5 -1
- data/README.md +3 -288
- data/lib/rumale/version.rb +1 -1
- data/lib/rumale.rb +20 -131
- metadata +252 -150
- data/CHANGELOG.md +0 -643
- data/CODE_OF_CONDUCT.md +0 -74
- data/ext/rumale/extconf.rb +0 -37
- data/ext/rumale/rumaleext.c +0 -545
- data/ext/rumale/rumaleext.h +0 -12
- data/lib/rumale/base/base_estimator.rb +0 -49
- data/lib/rumale/base/classifier.rb +0 -36
- data/lib/rumale/base/cluster_analyzer.rb +0 -31
- data/lib/rumale/base/evaluator.rb +0 -17
- data/lib/rumale/base/regressor.rb +0 -36
- data/lib/rumale/base/splitter.rb +0 -21
- data/lib/rumale/base/transformer.rb +0 -22
- data/lib/rumale/clustering/dbscan.rb +0 -123
- data/lib/rumale/clustering/gaussian_mixture.rb +0 -218
- data/lib/rumale/clustering/hdbscan.rb +0 -291
- data/lib/rumale/clustering/k_means.rb +0 -122
- data/lib/rumale/clustering/k_medoids.rb +0 -141
- data/lib/rumale/clustering/mini_batch_k_means.rb +0 -139
- data/lib/rumale/clustering/power_iteration.rb +0 -127
- data/lib/rumale/clustering/single_linkage.rb +0 -203
- data/lib/rumale/clustering/snn.rb +0 -76
- data/lib/rumale/clustering/spectral_clustering.rb +0 -115
- data/lib/rumale/dataset.rb +0 -246
- data/lib/rumale/decomposition/factor_analysis.rb +0 -150
- data/lib/rumale/decomposition/fast_ica.rb +0 -188
- data/lib/rumale/decomposition/nmf.rb +0 -124
- data/lib/rumale/decomposition/pca.rb +0 -159
- data/lib/rumale/ensemble/ada_boost_classifier.rb +0 -179
- data/lib/rumale/ensemble/ada_boost_regressor.rb +0 -160
- data/lib/rumale/ensemble/extra_trees_classifier.rb +0 -139
- data/lib/rumale/ensemble/extra_trees_regressor.rb +0 -125
- data/lib/rumale/ensemble/gradient_boosting_classifier.rb +0 -306
- data/lib/rumale/ensemble/gradient_boosting_regressor.rb +0 -237
- data/lib/rumale/ensemble/random_forest_classifier.rb +0 -189
- data/lib/rumale/ensemble/random_forest_regressor.rb +0 -153
- data/lib/rumale/ensemble/stacking_classifier.rb +0 -215
- data/lib/rumale/ensemble/stacking_regressor.rb +0 -163
- data/lib/rumale/ensemble/voting_classifier.rb +0 -126
- data/lib/rumale/ensemble/voting_regressor.rb +0 -82
- data/lib/rumale/evaluation_measure/accuracy.rb +0 -29
- data/lib/rumale/evaluation_measure/adjusted_rand_score.rb +0 -74
- data/lib/rumale/evaluation_measure/calinski_harabasz_score.rb +0 -56
- data/lib/rumale/evaluation_measure/davies_bouldin_score.rb +0 -53
- data/lib/rumale/evaluation_measure/explained_variance_score.rb +0 -39
- data/lib/rumale/evaluation_measure/f_score.rb +0 -50
- data/lib/rumale/evaluation_measure/function.rb +0 -147
- data/lib/rumale/evaluation_measure/log_loss.rb +0 -45
- data/lib/rumale/evaluation_measure/mean_absolute_error.rb +0 -29
- data/lib/rumale/evaluation_measure/mean_squared_error.rb +0 -29
- data/lib/rumale/evaluation_measure/mean_squared_log_error.rb +0 -29
- data/lib/rumale/evaluation_measure/median_absolute_error.rb +0 -30
- data/lib/rumale/evaluation_measure/mutual_information.rb +0 -49
- data/lib/rumale/evaluation_measure/normalized_mutual_information.rb +0 -53
- data/lib/rumale/evaluation_measure/precision.rb +0 -50
- data/lib/rumale/evaluation_measure/precision_recall.rb +0 -96
- data/lib/rumale/evaluation_measure/purity.rb +0 -40
- data/lib/rumale/evaluation_measure/r2_score.rb +0 -43
- data/lib/rumale/evaluation_measure/recall.rb +0 -50
- data/lib/rumale/evaluation_measure/roc_auc.rb +0 -130
- data/lib/rumale/evaluation_measure/silhouette_score.rb +0 -82
- data/lib/rumale/feature_extraction/feature_hasher.rb +0 -110
- data/lib/rumale/feature_extraction/hash_vectorizer.rb +0 -155
- data/lib/rumale/feature_extraction/tfidf_transformer.rb +0 -113
- data/lib/rumale/kernel_approximation/nystroem.rb +0 -126
- data/lib/rumale/kernel_approximation/rbf.rb +0 -102
- data/lib/rumale/kernel_machine/kernel_fda.rb +0 -120
- data/lib/rumale/kernel_machine/kernel_pca.rb +0 -97
- data/lib/rumale/kernel_machine/kernel_ridge.rb +0 -82
- data/lib/rumale/kernel_machine/kernel_ridge_classifier.rb +0 -92
- data/lib/rumale/kernel_machine/kernel_svc.rb +0 -193
- data/lib/rumale/linear_model/base_sgd.rb +0 -285
- data/lib/rumale/linear_model/elastic_net.rb +0 -119
- data/lib/rumale/linear_model/lasso.rb +0 -115
- data/lib/rumale/linear_model/linear_regression.rb +0 -201
- data/lib/rumale/linear_model/logistic_regression.rb +0 -275
- data/lib/rumale/linear_model/nnls.rb +0 -137
- data/lib/rumale/linear_model/ridge.rb +0 -209
- data/lib/rumale/linear_model/svc.rb +0 -213
- data/lib/rumale/linear_model/svr.rb +0 -132
- data/lib/rumale/manifold/mds.rb +0 -155
- data/lib/rumale/manifold/tsne.rb +0 -222
- data/lib/rumale/metric_learning/fisher_discriminant_analysis.rb +0 -113
- data/lib/rumale/metric_learning/mlkr.rb +0 -161
- data/lib/rumale/metric_learning/neighbourhood_component_analysis.rb +0 -167
- data/lib/rumale/model_selection/cross_validation.rb +0 -125
- data/lib/rumale/model_selection/function.rb +0 -42
- data/lib/rumale/model_selection/grid_search_cv.rb +0 -225
- data/lib/rumale/model_selection/group_k_fold.rb +0 -93
- data/lib/rumale/model_selection/group_shuffle_split.rb +0 -115
- data/lib/rumale/model_selection/k_fold.rb +0 -81
- data/lib/rumale/model_selection/shuffle_split.rb +0 -90
- data/lib/rumale/model_selection/stratified_k_fold.rb +0 -99
- data/lib/rumale/model_selection/stratified_shuffle_split.rb +0 -118
- data/lib/rumale/model_selection/time_series_split.rb +0 -91
- data/lib/rumale/multiclass/one_vs_rest_classifier.rb +0 -83
- data/lib/rumale/naive_bayes/base_naive_bayes.rb +0 -47
- data/lib/rumale/naive_bayes/bernoulli_nb.rb +0 -82
- data/lib/rumale/naive_bayes/complement_nb.rb +0 -85
- data/lib/rumale/naive_bayes/gaussian_nb.rb +0 -69
- data/lib/rumale/naive_bayes/multinomial_nb.rb +0 -74
- data/lib/rumale/naive_bayes/negation_nb.rb +0 -71
- data/lib/rumale/nearest_neighbors/k_neighbors_classifier.rb +0 -133
- data/lib/rumale/nearest_neighbors/k_neighbors_regressor.rb +0 -108
- data/lib/rumale/nearest_neighbors/vp_tree.rb +0 -132
- data/lib/rumale/neural_network/adam.rb +0 -56
- data/lib/rumale/neural_network/base_mlp.rb +0 -248
- data/lib/rumale/neural_network/mlp_classifier.rb +0 -120
- data/lib/rumale/neural_network/mlp_regressor.rb +0 -90
- data/lib/rumale/pairwise_metric.rb +0 -152
- data/lib/rumale/pipeline/feature_union.rb +0 -69
- data/lib/rumale/pipeline/pipeline.rb +0 -175
- data/lib/rumale/preprocessing/bin_discretizer.rb +0 -93
- data/lib/rumale/preprocessing/binarizer.rb +0 -60
- data/lib/rumale/preprocessing/kernel_calculator.rb +0 -92
- data/lib/rumale/preprocessing/l1_normalizer.rb +0 -62
- data/lib/rumale/preprocessing/l2_normalizer.rb +0 -63
- data/lib/rumale/preprocessing/label_binarizer.rb +0 -89
- data/lib/rumale/preprocessing/label_encoder.rb +0 -79
- data/lib/rumale/preprocessing/max_abs_scaler.rb +0 -61
- data/lib/rumale/preprocessing/max_normalizer.rb +0 -62
- data/lib/rumale/preprocessing/min_max_scaler.rb +0 -76
- data/lib/rumale/preprocessing/one_hot_encoder.rb +0 -100
- data/lib/rumale/preprocessing/ordinal_encoder.rb +0 -109
- data/lib/rumale/preprocessing/polynomial_features.rb +0 -109
- data/lib/rumale/preprocessing/standard_scaler.rb +0 -71
- data/lib/rumale/probabilistic_output.rb +0 -114
- data/lib/rumale/tree/base_decision_tree.rb +0 -150
- data/lib/rumale/tree/decision_tree_classifier.rb +0 -150
- data/lib/rumale/tree/decision_tree_regressor.rb +0 -116
- data/lib/rumale/tree/extra_tree_classifier.rb +0 -107
- data/lib/rumale/tree/extra_tree_regressor.rb +0 -94
- data/lib/rumale/tree/gradient_tree_regressor.rb +0 -202
- data/lib/rumale/tree/node.rb +0 -39
- data/lib/rumale/utils.rb +0 -42
- data/lib/rumale/validation.rb +0 -128
- data/lib/rumale/values.rb +0 -13
|
@@ -1,47 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require 'rumale/base/base_estimator'
|
|
4
|
-
require 'rumale/base/classifier'
|
|
5
|
-
|
|
6
|
-
module Rumale
|
|
7
|
-
# This module consists of the classes that implement naive bayes models.
|
|
8
|
-
module NaiveBayes
|
|
9
|
-
# BaseNaiveBayes is a class that has methods for common processes of naive bayes classifier.
|
|
10
|
-
# This class is used internally.
|
|
11
|
-
class BaseNaiveBayes
|
|
12
|
-
include Base::BaseEstimator
|
|
13
|
-
include Base::Classifier
|
|
14
|
-
|
|
15
|
-
# Predict class labels for samples.
|
|
16
|
-
#
|
|
17
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
|
|
18
|
-
# @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
|
|
19
|
-
def predict(x)
|
|
20
|
-
x = check_convert_sample_array(x)
|
|
21
|
-
n_samples = x.shape.first
|
|
22
|
-
decision_values = decision_function(x)
|
|
23
|
-
Numo::Int32.asarray(Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] })
|
|
24
|
-
end
|
|
25
|
-
|
|
26
|
-
# Predict log-probability for samples.
|
|
27
|
-
#
|
|
28
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the log-probailities.
|
|
29
|
-
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted log-probability of each class per sample.
|
|
30
|
-
def predict_log_proba(x)
|
|
31
|
-
x = check_convert_sample_array(x)
|
|
32
|
-
n_samples, = x.shape
|
|
33
|
-
log_likelihoods = decision_function(x)
|
|
34
|
-
log_likelihoods - Numo::NMath.log(Numo::NMath.exp(log_likelihoods).sum(1)).reshape(n_samples, 1)
|
|
35
|
-
end
|
|
36
|
-
|
|
37
|
-
# Predict probability for samples.
|
|
38
|
-
#
|
|
39
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
|
|
40
|
-
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
|
|
41
|
-
def predict_proba(x)
|
|
42
|
-
x = check_convert_sample_array(x)
|
|
43
|
-
Numo::NMath.exp(predict_log_proba(x)).abs
|
|
44
|
-
end
|
|
45
|
-
end
|
|
46
|
-
end
|
|
47
|
-
end
|
|
@@ -1,82 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require 'rumale/naive_bayes/base_naive_bayes'
|
|
4
|
-
|
|
5
|
-
module Rumale
|
|
6
|
-
module NaiveBayes
|
|
7
|
-
# BernoulliNB is a class that implements Bernoulli Naive Bayes classifier.
|
|
8
|
-
#
|
|
9
|
-
# @example
|
|
10
|
-
# estimator = Rumale::NaiveBayes::BernoulliNB.new(smoothing_param: 1.0, bin_threshold: 0.0)
|
|
11
|
-
# estimator.fit(training_samples, training_labels)
|
|
12
|
-
# results = estimator.predict(testing_samples)
|
|
13
|
-
#
|
|
14
|
-
# *Reference*
|
|
15
|
-
# - Manning, C D., Raghavan, P., and Schutze, H., "Introduction to Information Retrieval," Cambridge University Press., 2008.
|
|
16
|
-
class BernoulliNB < BaseNaiveBayes
|
|
17
|
-
# Return the class labels.
|
|
18
|
-
# @return [Numo::Int32] (size: n_classes)
|
|
19
|
-
attr_reader :classes
|
|
20
|
-
|
|
21
|
-
# Return the prior probabilities of the classes.
|
|
22
|
-
# @return [Numo::DFloat] (shape: [n_classes])
|
|
23
|
-
attr_reader :class_priors
|
|
24
|
-
|
|
25
|
-
# Return the conditional probabilities for features of each class.
|
|
26
|
-
# @return [Numo::DFloat] (shape: [n_classes, n_features])
|
|
27
|
-
attr_reader :feature_probs
|
|
28
|
-
|
|
29
|
-
# Create a new classifier with Bernoulli Naive Bayes.
|
|
30
|
-
#
|
|
31
|
-
# @param smoothing_param [Float] The Laplace smoothing parameter.
|
|
32
|
-
# @param bin_threshold [Float] The threshold for binarizing of features.
|
|
33
|
-
def initialize(smoothing_param: 1.0, bin_threshold: 0.0)
|
|
34
|
-
check_params_numeric(smoothing_param: smoothing_param, bin_threshold: bin_threshold)
|
|
35
|
-
check_params_positive(smoothing_param: smoothing_param)
|
|
36
|
-
@params = {}
|
|
37
|
-
@params[:smoothing_param] = smoothing_param
|
|
38
|
-
@params[:bin_threshold] = bin_threshold
|
|
39
|
-
end
|
|
40
|
-
|
|
41
|
-
# Fit the model with given training data.
|
|
42
|
-
#
|
|
43
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
|
44
|
-
# @param y [Numo::Int32] (shape: [n_samples]) The categorical variables (e.g. labels)
|
|
45
|
-
# to be used for fitting the model.
|
|
46
|
-
# @return [BernoulliNB] The learned classifier itself.
|
|
47
|
-
def fit(x, y)
|
|
48
|
-
x = check_convert_sample_array(x)
|
|
49
|
-
y = check_convert_label_array(y)
|
|
50
|
-
check_sample_label_size(x, y)
|
|
51
|
-
n_samples, = x.shape
|
|
52
|
-
bin_x = Numo::DFloat[*x.gt(@params[:bin_threshold])]
|
|
53
|
-
@classes = Numo::Int32[*y.to_a.uniq.sort]
|
|
54
|
-
n_samples_each_class = Numo::DFloat[*@classes.to_a.map { |l| y.eq(l).count.to_f }]
|
|
55
|
-
@class_priors = n_samples_each_class / n_samples
|
|
56
|
-
count_features = Numo::DFloat[*@classes.to_a.map { |l| bin_x[y.eq(l).where, true].sum(0) }]
|
|
57
|
-
count_features += @params[:smoothing_param]
|
|
58
|
-
n_samples_each_class += 2.0 * @params[:smoothing_param]
|
|
59
|
-
n_classes = @classes.size
|
|
60
|
-
@feature_probs = count_features / n_samples_each_class.reshape(n_classes, 1)
|
|
61
|
-
self
|
|
62
|
-
end
|
|
63
|
-
|
|
64
|
-
# Calculate confidence scores for samples.
|
|
65
|
-
#
|
|
66
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
|
|
67
|
-
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
|
|
68
|
-
def decision_function(x)
|
|
69
|
-
x = check_convert_sample_array(x)
|
|
70
|
-
n_classes = @classes.size
|
|
71
|
-
bin_x = Numo::DFloat[*x.gt(@params[:bin_threshold])]
|
|
72
|
-
not_bin_x = Numo::DFloat[*x.le(@params[:bin_threshold])]
|
|
73
|
-
log_likelihoods = Array.new(n_classes) do |l|
|
|
74
|
-
Math.log(@class_priors[l]) + (
|
|
75
|
-
(Numo::DFloat[*bin_x] * Numo::NMath.log(@feature_probs[l, true])).sum(1)
|
|
76
|
-
(Numo::DFloat[*not_bin_x] * Numo::NMath.log(1.0 - @feature_probs[l, true])).sum(1))
|
|
77
|
-
end
|
|
78
|
-
Numo::DFloat[*log_likelihoods].transpose.dup
|
|
79
|
-
end
|
|
80
|
-
end
|
|
81
|
-
end
|
|
82
|
-
end
|
|
@@ -1,85 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require 'rumale/naive_bayes/base_naive_bayes'
|
|
4
|
-
|
|
5
|
-
module Rumale
|
|
6
|
-
module NaiveBayes
|
|
7
|
-
# ComplementNB is a class that implements Complement Naive Bayes classifier.
|
|
8
|
-
#
|
|
9
|
-
# @example
|
|
10
|
-
# estimator = Rumale::NaiveBayes::ComplementNB.new(smoothing_param: 1.0)
|
|
11
|
-
# estimator.fit(training_samples, training_labels)
|
|
12
|
-
# results = estimator.predict(testing_samples)
|
|
13
|
-
#
|
|
14
|
-
# *Reference*
|
|
15
|
-
# - Rennie, J. D. M., Shih, L., Teevan, J., and Karger, D. R., "Tackling the Poor Assumptions of Naive Bayes Text Classifiers," ICML' 03, pp. 616--623, 2013.
|
|
16
|
-
class ComplementNB < BaseNaiveBayes
|
|
17
|
-
# Return the class labels.
|
|
18
|
-
# @return [Numo::Int32] (size: n_classes)
|
|
19
|
-
attr_reader :classes
|
|
20
|
-
|
|
21
|
-
# Return the prior probabilities of the classes.
|
|
22
|
-
# @return [Numo::DFloat] (shape: [n_classes])
|
|
23
|
-
attr_reader :class_priors
|
|
24
|
-
|
|
25
|
-
# Return the conditional probabilities for features of each class.
|
|
26
|
-
# @return [Numo::DFloat] (shape: [n_classes, n_features])
|
|
27
|
-
attr_reader :feature_probs
|
|
28
|
-
|
|
29
|
-
# Create a new classifier with Complement Naive Bayes.
|
|
30
|
-
#
|
|
31
|
-
# @param smoothing_param [Float] The smoothing parameter.
|
|
32
|
-
# @param norm [Boolean] The flag indicating whether to normlize the weight vectors.
|
|
33
|
-
def initialize(smoothing_param: 1.0, norm: false)
|
|
34
|
-
check_params_numeric(smoothing_param: smoothing_param)
|
|
35
|
-
check_params_positive(smoothing_param: smoothing_param)
|
|
36
|
-
check_params_boolean(norm: norm)
|
|
37
|
-
@params = {}
|
|
38
|
-
@params[:smoothing_param] = smoothing_param
|
|
39
|
-
@params[:norm] = norm
|
|
40
|
-
end
|
|
41
|
-
|
|
42
|
-
# Fit the model with given training data.
|
|
43
|
-
#
|
|
44
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
|
45
|
-
# @param y [Numo::Int32] (shape: [n_samples]) The categorical variables (e.g. labels)
|
|
46
|
-
# to be used for fitting the model.
|
|
47
|
-
# @return [ComplementNB] The learned classifier itself.
|
|
48
|
-
def fit(x, y)
|
|
49
|
-
x = check_convert_sample_array(x)
|
|
50
|
-
y = check_convert_label_array(y)
|
|
51
|
-
check_sample_label_size(x, y)
|
|
52
|
-
n_samples, = x.shape
|
|
53
|
-
@classes = Numo::Int32[*y.to_a.uniq.sort]
|
|
54
|
-
@class_priors = Numo::DFloat[*@classes.to_a.map { |l| y.eq(l).count.fdiv(n_samples) }]
|
|
55
|
-
@class_log_probs = Numo::NMath.log(@class_priors)
|
|
56
|
-
compl_features = Numo::DFloat[*@classes.to_a.map { |l| x[y.ne(l).where, true].sum(0) }]
|
|
57
|
-
compl_features += @params[:smoothing_param]
|
|
58
|
-
n_classes = @classes.size
|
|
59
|
-
@feature_probs = compl_features / compl_features.sum(1).reshape(n_classes, 1)
|
|
60
|
-
feature_log_probs = Numo::NMath.log(@feature_probs)
|
|
61
|
-
@weights = if normalize?
|
|
62
|
-
feature_log_probs / feature_log_probs.sum(1).reshape(n_classes, 1)
|
|
63
|
-
else
|
|
64
|
-
-feature_log_probs
|
|
65
|
-
end
|
|
66
|
-
self
|
|
67
|
-
end
|
|
68
|
-
|
|
69
|
-
# Calculate confidence scores for samples.
|
|
70
|
-
#
|
|
71
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
|
|
72
|
-
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
|
|
73
|
-
def decision_function(x)
|
|
74
|
-
x = check_convert_sample_array(x)
|
|
75
|
-
@class_log_probs + x.dot(@weights.transpose)
|
|
76
|
-
end
|
|
77
|
-
|
|
78
|
-
private
|
|
79
|
-
|
|
80
|
-
def normalize?
|
|
81
|
-
@params[:norm] == true
|
|
82
|
-
end
|
|
83
|
-
end
|
|
84
|
-
end
|
|
85
|
-
end
|
|
@@ -1,69 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require 'rumale/naive_bayes/base_naive_bayes'
|
|
4
|
-
|
|
5
|
-
module Rumale
|
|
6
|
-
module NaiveBayes
|
|
7
|
-
# GaussianNB is a class that implements Gaussian Naive Bayes classifier.
|
|
8
|
-
#
|
|
9
|
-
# @example
|
|
10
|
-
# estimator = Rumale::NaiveBayes::GaussianNB.new
|
|
11
|
-
# estimator.fit(training_samples, training_labels)
|
|
12
|
-
# results = estimator.predict(testing_samples)
|
|
13
|
-
class GaussianNB < BaseNaiveBayes
|
|
14
|
-
# Return the class labels.
|
|
15
|
-
# @return [Numo::Int32] (size: n_classes)
|
|
16
|
-
attr_reader :classes
|
|
17
|
-
|
|
18
|
-
# Return the prior probabilities of the classes.
|
|
19
|
-
# @return [Numo::DFloat] (shape: [n_classes])
|
|
20
|
-
attr_reader :class_priors
|
|
21
|
-
|
|
22
|
-
# Return the mean vectors of the classes.
|
|
23
|
-
# @return [Numo::DFloat] (shape: [n_classes, n_features])
|
|
24
|
-
attr_reader :means
|
|
25
|
-
|
|
26
|
-
# Return the variance vectors of the classes.
|
|
27
|
-
# @return [Numo::DFloat] (shape: [n_classes, n_features])
|
|
28
|
-
attr_reader :variances
|
|
29
|
-
|
|
30
|
-
# Create a new classifier with Gaussian Naive Bayes.
|
|
31
|
-
def initialize
|
|
32
|
-
@params = {}
|
|
33
|
-
end
|
|
34
|
-
|
|
35
|
-
# Fit the model with given training data.
|
|
36
|
-
#
|
|
37
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
|
38
|
-
# @param y [Numo::Int32] (shape: [n_samples]) The categorical variables (e.g. labels)
|
|
39
|
-
# to be used for fitting the model.
|
|
40
|
-
# @return [GaussianNB] The learned classifier itself.
|
|
41
|
-
def fit(x, y)
|
|
42
|
-
x = check_convert_sample_array(x)
|
|
43
|
-
y = check_convert_label_array(y)
|
|
44
|
-
check_sample_label_size(x, y)
|
|
45
|
-
n_samples, = x.shape
|
|
46
|
-
@classes = Numo::Int32[*y.to_a.uniq.sort]
|
|
47
|
-
@class_priors = Numo::DFloat[*@classes.to_a.map { |l| y.eq(l).count / n_samples.to_f }]
|
|
48
|
-
@means = Numo::DFloat[*@classes.to_a.map { |l| x[y.eq(l).where, true].mean(0) }]
|
|
49
|
-
@variances = Numo::DFloat[*@classes.to_a.map { |l| x[y.eq(l).where, true].var(0) }]
|
|
50
|
-
self
|
|
51
|
-
end
|
|
52
|
-
|
|
53
|
-
# Calculate confidence scores for samples.
|
|
54
|
-
#
|
|
55
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
|
|
56
|
-
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
|
|
57
|
-
def decision_function(x)
|
|
58
|
-
x = check_convert_sample_array(x)
|
|
59
|
-
n_classes = @classes.size
|
|
60
|
-
log_likelihoods = Array.new(n_classes) do |l|
|
|
61
|
-
Math.log(@class_priors[l]) - 0.5 * (
|
|
62
|
-
Numo::NMath.log(2.0 * Math::PI * @variances[l, true]) +
|
|
63
|
-
((x - @means[l, true])**2 / @variances[l, true])).sum(1)
|
|
64
|
-
end
|
|
65
|
-
Numo::DFloat[*log_likelihoods].transpose.dup
|
|
66
|
-
end
|
|
67
|
-
end
|
|
68
|
-
end
|
|
69
|
-
end
|
|
@@ -1,74 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require 'rumale/naive_bayes/base_naive_bayes'
|
|
4
|
-
|
|
5
|
-
module Rumale
|
|
6
|
-
module NaiveBayes
|
|
7
|
-
# MultinomialNB is a class that implements Multinomial Naive Bayes classifier.
|
|
8
|
-
#
|
|
9
|
-
# @example
|
|
10
|
-
# estimator = Rumale::NaiveBayes::MultinomialNB.new(smoothing_param: 1.0)
|
|
11
|
-
# estimator.fit(training_samples, training_labels)
|
|
12
|
-
# results = estimator.predict(testing_samples)
|
|
13
|
-
#
|
|
14
|
-
# *Reference*
|
|
15
|
-
# - Manning, C D., Raghavan, P., and Schutze, H., "Introduction to Information Retrieval," Cambridge University Press., 2008.
|
|
16
|
-
class MultinomialNB < BaseNaiveBayes
|
|
17
|
-
# Return the class labels.
|
|
18
|
-
# @return [Numo::Int32] (size: n_classes)
|
|
19
|
-
attr_reader :classes
|
|
20
|
-
|
|
21
|
-
# Return the prior probabilities of the classes.
|
|
22
|
-
# @return [Numo::DFloat] (shape: [n_classes])
|
|
23
|
-
attr_reader :class_priors
|
|
24
|
-
|
|
25
|
-
# Return the conditional probabilities for features of each class.
|
|
26
|
-
# @return [Numo::DFloat] (shape: [n_classes, n_features])
|
|
27
|
-
attr_reader :feature_probs
|
|
28
|
-
|
|
29
|
-
# Create a new classifier with Multinomial Naive Bayes.
|
|
30
|
-
#
|
|
31
|
-
# @param smoothing_param [Float] The Laplace smoothing parameter.
|
|
32
|
-
def initialize(smoothing_param: 1.0)
|
|
33
|
-
check_params_numeric(smoothing_param: smoothing_param)
|
|
34
|
-
check_params_positive(smoothing_param: smoothing_param)
|
|
35
|
-
@params = {}
|
|
36
|
-
@params[:smoothing_param] = smoothing_param
|
|
37
|
-
end
|
|
38
|
-
|
|
39
|
-
# Fit the model with given training data.
|
|
40
|
-
#
|
|
41
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
|
42
|
-
# @param y [Numo::Int32] (shape: [n_samples]) The categorical variables (e.g. labels)
|
|
43
|
-
# to be used for fitting the model.
|
|
44
|
-
# @return [MultinomialNB] The learned classifier itself.
|
|
45
|
-
def fit(x, y)
|
|
46
|
-
x = check_convert_sample_array(x)
|
|
47
|
-
y = check_convert_label_array(y)
|
|
48
|
-
check_sample_label_size(x, y)
|
|
49
|
-
n_samples, = x.shape
|
|
50
|
-
@classes = Numo::Int32[*y.to_a.uniq.sort]
|
|
51
|
-
@class_priors = Numo::DFloat[*@classes.to_a.map { |l| y.eq(l).count / n_samples.to_f }]
|
|
52
|
-
count_features = Numo::DFloat[*@classes.to_a.map { |l| x[y.eq(l).where, true].sum(0) }]
|
|
53
|
-
count_features += @params[:smoothing_param]
|
|
54
|
-
n_classes = @classes.size
|
|
55
|
-
@feature_probs = count_features / count_features.sum(1).reshape(n_classes, 1)
|
|
56
|
-
self
|
|
57
|
-
end
|
|
58
|
-
|
|
59
|
-
# Calculate confidence scores for samples.
|
|
60
|
-
#
|
|
61
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
|
|
62
|
-
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
|
|
63
|
-
def decision_function(x)
|
|
64
|
-
x = check_convert_sample_array(x)
|
|
65
|
-
n_classes = @classes.size
|
|
66
|
-
bin_x = x.gt(0)
|
|
67
|
-
log_likelihoods = Array.new(n_classes) do |l|
|
|
68
|
-
Math.log(@class_priors[l]) + (Numo::DFloat[*bin_x] * Numo::NMath.log(@feature_probs[l, true])).sum(1)
|
|
69
|
-
end
|
|
70
|
-
Numo::DFloat[*log_likelihoods].transpose.dup
|
|
71
|
-
end
|
|
72
|
-
end
|
|
73
|
-
end
|
|
74
|
-
end
|
|
@@ -1,71 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require 'rumale/naive_bayes/base_naive_bayes'
|
|
4
|
-
|
|
5
|
-
module Rumale
|
|
6
|
-
module NaiveBayes
|
|
7
|
-
# NegationNB is a class that implements Negation Naive Bayes classifier.
|
|
8
|
-
#
|
|
9
|
-
# @example
|
|
10
|
-
# estimator = Rumale::NaiveBayes::NegationNB.new(smoothing_param: 1.0)
|
|
11
|
-
# estimator.fit(training_samples, training_labels)
|
|
12
|
-
# results = estimator.predict(testing_samples)
|
|
13
|
-
#
|
|
14
|
-
# *Reference*
|
|
15
|
-
# - Komiya, K., Sato, N., Fujimoto, K., and Kotani, Y., "Negation Naive Bayes for Categorization of Product Pages on the Web," RANLP' 11, pp. 586--592, 2011.
|
|
16
|
-
class NegationNB < BaseNaiveBayes
|
|
17
|
-
# Return the class labels.
|
|
18
|
-
# @return [Numo::Int32] (size: n_classes)
|
|
19
|
-
attr_reader :classes
|
|
20
|
-
|
|
21
|
-
# Return the prior probabilities of the classes.
|
|
22
|
-
# @return [Numo::DFloat] (shape: [n_classes])
|
|
23
|
-
attr_reader :class_priors
|
|
24
|
-
|
|
25
|
-
# Return the conditional probabilities for features of each class.
|
|
26
|
-
# @return [Numo::DFloat] (shape: [n_classes, n_features])
|
|
27
|
-
attr_reader :feature_probs
|
|
28
|
-
|
|
29
|
-
# Create a new classifier with Complement Naive Bayes.
|
|
30
|
-
#
|
|
31
|
-
# @param smoothing_param [Float] The smoothing parameter.
|
|
32
|
-
def initialize(smoothing_param: 1.0)
|
|
33
|
-
check_params_numeric(smoothing_param: smoothing_param)
|
|
34
|
-
check_params_positive(smoothing_param: smoothing_param)
|
|
35
|
-
@params = {}
|
|
36
|
-
@params[:smoothing_param] = smoothing_param
|
|
37
|
-
end
|
|
38
|
-
|
|
39
|
-
# Fit the model with given training data.
|
|
40
|
-
#
|
|
41
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
|
42
|
-
# @param y [Numo::Int32] (shape: [n_samples]) The categorical variables (e.g. labels)
|
|
43
|
-
# to be used for fitting the model.
|
|
44
|
-
# @return [ComplementNB] The learned classifier itself.
|
|
45
|
-
def fit(x, y)
|
|
46
|
-
x = check_convert_sample_array(x)
|
|
47
|
-
y = check_convert_label_array(y)
|
|
48
|
-
check_sample_label_size(x, y)
|
|
49
|
-
n_samples, = x.shape
|
|
50
|
-
@classes = Numo::Int32[*y.to_a.uniq.sort]
|
|
51
|
-
@class_priors = Numo::DFloat[*@classes.to_a.map { |l| y.eq(l).count.fdiv(n_samples) }]
|
|
52
|
-
@class_log_probs = Numo::NMath.log(1 / (1 - @class_priors))
|
|
53
|
-
compl_features = Numo::DFloat[*@classes.to_a.map { |l| x[y.ne(l).where, true].sum(0) }]
|
|
54
|
-
compl_features += @params[:smoothing_param]
|
|
55
|
-
n_classes = @classes.size
|
|
56
|
-
@feature_probs = compl_features / compl_features.sum(1).reshape(n_classes, 1)
|
|
57
|
-
@weights = Numo::NMath.log(@feature_probs)
|
|
58
|
-
self
|
|
59
|
-
end
|
|
60
|
-
|
|
61
|
-
# Calculate confidence scores for samples.
|
|
62
|
-
#
|
|
63
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
|
|
64
|
-
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
|
|
65
|
-
def decision_function(x)
|
|
66
|
-
x = check_convert_sample_array(x)
|
|
67
|
-
@class_log_probs - x.dot(@weights.transpose)
|
|
68
|
-
end
|
|
69
|
-
end
|
|
70
|
-
end
|
|
71
|
-
end
|
|
@@ -1,133 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require 'rumale/base/base_estimator'
|
|
4
|
-
require 'rumale/base/classifier'
|
|
5
|
-
|
|
6
|
-
module Rumale
|
|
7
|
-
# This module consists of the classes that implement estimators based on nearest neighbors rule.
|
|
8
|
-
module NearestNeighbors
|
|
9
|
-
# KNeighborsClassifier is a class that implements the classifier with the k-nearest neighbors rule.
|
|
10
|
-
# The current implementation uses the Euclidean distance for finding the neighbors.
|
|
11
|
-
#
|
|
12
|
-
# @example
|
|
13
|
-
# estimator =
|
|
14
|
-
# Rumale::NearestNeighbors::KNeighborsClassifier.new(n_neighbors: 5)
|
|
15
|
-
# estimator.fit(training_samples, traininig_labels)
|
|
16
|
-
# results = estimator.predict(testing_samples)
|
|
17
|
-
#
|
|
18
|
-
class KNeighborsClassifier
|
|
19
|
-
include Base::BaseEstimator
|
|
20
|
-
include Base::Classifier
|
|
21
|
-
|
|
22
|
-
# Return the prototypes for the nearest neighbor classifier.
|
|
23
|
-
# If the metric is 'precomputed', that returns nil.
|
|
24
|
-
# If the algorithm is 'vptree', that returns Rumale::NearestNeighbors::VPTree.
|
|
25
|
-
# @return [Numo::DFloat] (shape: [n_training_samples, n_features])
|
|
26
|
-
attr_reader :prototypes
|
|
27
|
-
|
|
28
|
-
# Return the labels of the prototypes
|
|
29
|
-
# @return [Numo::Int32] (size: n_training_samples)
|
|
30
|
-
attr_reader :labels
|
|
31
|
-
|
|
32
|
-
# Return the class labels.
|
|
33
|
-
# @return [Numo::Int32] (size: n_classes)
|
|
34
|
-
attr_reader :classes
|
|
35
|
-
|
|
36
|
-
# Create a new classifier with the nearest neighbor rule.
|
|
37
|
-
#
|
|
38
|
-
# @param n_neighbors [Integer] The number of neighbors.
|
|
39
|
-
# @param algorithm [String] The algorithm is used for finding the nearest neighbors.
|
|
40
|
-
# If algorithm is 'brute', brute-force search will be used.
|
|
41
|
-
# If algorithm is 'vptree', vantage point tree will be used.
|
|
42
|
-
# This parameter is ignored when metric parameter is 'precomputed'.
|
|
43
|
-
# @param metric [String] The metric to calculate the distances.
|
|
44
|
-
# If metric is 'euclidean', Euclidean distance is calculated for distance between points.
|
|
45
|
-
# If metric is 'precomputed', the fit and predict methods expect to be given a distance matrix.
|
|
46
|
-
def initialize(n_neighbors: 5, algorithm: 'brute', metric: 'euclidean')
|
|
47
|
-
check_params_numeric(n_neighbors: n_neighbors)
|
|
48
|
-
check_params_positive(n_neighbors: n_neighbors)
|
|
49
|
-
check_params_string(algorith: algorithm, metric: metric)
|
|
50
|
-
@params = {}
|
|
51
|
-
@params[:n_neighbors] = n_neighbors
|
|
52
|
-
@params[:algorithm] = algorithm == 'vptree' ? 'vptree' : 'brute'
|
|
53
|
-
@params[:metric] = metric == 'precomputed' ? 'precomputed' : 'euclidean'
|
|
54
|
-
@prototypes = nil
|
|
55
|
-
@labels = nil
|
|
56
|
-
@classes = nil
|
|
57
|
-
end
|
|
58
|
-
|
|
59
|
-
# Fit the model with given training data.
|
|
60
|
-
#
|
|
61
|
-
# @param x [Numo::DFloat] (shape: [n_training_samples, n_features]) The training data to be used for fitting the model.
|
|
62
|
-
# If the metric is 'precomputed', x must be a square distance matrix (shape: [n_training_samples, n_training_samples]).
|
|
63
|
-
# @param y [Numo::Int32] (shape: [n_training_samples]) The labels to be used for fitting the model.
|
|
64
|
-
# @return [KNeighborsClassifier] The learned classifier itself.
|
|
65
|
-
def fit(x, y)
|
|
66
|
-
x = check_convert_sample_array(x)
|
|
67
|
-
y = check_convert_label_array(y)
|
|
68
|
-
check_sample_label_size(x, y)
|
|
69
|
-
raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
|
|
70
|
-
|
|
71
|
-
@prototypes = if @params[:metric] == 'euclidean'
|
|
72
|
-
if @params[:algorithm] == 'vptree'
|
|
73
|
-
VPTree.new(x)
|
|
74
|
-
else
|
|
75
|
-
x.dup
|
|
76
|
-
end
|
|
77
|
-
end
|
|
78
|
-
@labels = Numo::Int32.asarray(y.to_a)
|
|
79
|
-
@classes = Numo::Int32.asarray(y.to_a.uniq.sort)
|
|
80
|
-
self
|
|
81
|
-
end
|
|
82
|
-
|
|
83
|
-
# Calculate confidence scores for samples.
|
|
84
|
-
#
|
|
85
|
-
# @param x [Numo::DFloat] (shape: [n_testing_samples, n_features]) The samples to compute the scores.
|
|
86
|
-
# If the metric is 'precomputed', x must be a square distance matrix (shape: [n_testing_samples, n_training_samples]).
|
|
87
|
-
# @return [Numo::DFloat] (shape: [n_testing_samples, n_classes]) Confidence scores per sample for each class.
|
|
88
|
-
def decision_function(x)
|
|
89
|
-
x = check_convert_sample_array(x)
|
|
90
|
-
if @params[:metric] == 'precomputed' && x.shape[1] != @labels.size
|
|
91
|
-
raise ArgumentError, 'Expect the size input matrix to be n_testing_samples-by-n_training_samples.'
|
|
92
|
-
end
|
|
93
|
-
|
|
94
|
-
n_prototypes = @labels.size
|
|
95
|
-
n_neighbors = [@params[:n_neighbors], n_prototypes].min
|
|
96
|
-
n_samples = x.shape[0]
|
|
97
|
-
n_classes = @classes.size
|
|
98
|
-
scores = Numo::DFloat.zeros(n_samples, n_classes)
|
|
99
|
-
|
|
100
|
-
if @params[:metric] == 'euclidean' && @params[:algorithm] == 'vptree'
|
|
101
|
-
neighbor_ids, = @prototypes.query(x, n_neighbors)
|
|
102
|
-
n_samples.times do |m|
|
|
103
|
-
neighbor_ids[m, true].each { |n| scores[m, @classes.to_a.index(@labels[n])] += 1.0 }
|
|
104
|
-
end
|
|
105
|
-
else
|
|
106
|
-
distance_matrix = @params[:metric] == 'precomputed' ? x : PairwiseMetric.euclidean_distance(x, @prototypes)
|
|
107
|
-
n_samples.times do |m|
|
|
108
|
-
neighbor_ids = distance_matrix[m, true].to_a.each_with_index.sort.map(&:last)[0...n_neighbors]
|
|
109
|
-
neighbor_ids.each { |n| scores[m, @classes.to_a.index(@labels[n])] += 1.0 }
|
|
110
|
-
end
|
|
111
|
-
end
|
|
112
|
-
|
|
113
|
-
scores
|
|
114
|
-
end
|
|
115
|
-
|
|
116
|
-
# Predict class labels for samples.
|
|
117
|
-
#
|
|
118
|
-
# @param x [Numo::DFloat] (shape: [n_testing_samples, n_features]) The samples to predict the labels.
|
|
119
|
-
# If the metric is 'precomputed', x must be a square distance matrix (shape: [n_testing_samples, n_training_samples]).
|
|
120
|
-
# @return [Numo::Int32] (shape: [n_testing_samples]) Predicted class label per sample.
|
|
121
|
-
def predict(x)
|
|
122
|
-
x = check_convert_sample_array(x)
|
|
123
|
-
if @params[:metric] == 'precomputed' && x.shape[1] != @labels.size
|
|
124
|
-
raise ArgumentError, 'Expect the size input matrix to be n_samples-by-n_training_samples.'
|
|
125
|
-
end
|
|
126
|
-
|
|
127
|
-
decision_values = decision_function(x)
|
|
128
|
-
n_samples = x.shape[0]
|
|
129
|
-
Numo::Int32.asarray(Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] })
|
|
130
|
-
end
|
|
131
|
-
end
|
|
132
|
-
end
|
|
133
|
-
end
|
|
@@ -1,108 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
require 'rumale/base/base_estimator'
|
|
4
|
-
require 'rumale/base/regressor'
|
|
5
|
-
|
|
6
|
-
module Rumale
|
|
7
|
-
module NearestNeighbors
|
|
8
|
-
# KNeighborsRegressor is a class that implements the regressor with the k-nearest neighbors rule.
|
|
9
|
-
# The current implementation uses the Euclidean distance for finding the neighbors.
|
|
10
|
-
#
|
|
11
|
-
# @example
|
|
12
|
-
# estimator =
|
|
13
|
-
# Rumale::NearestNeighbors::KNeighborsRegressor.new(n_neighbors: 5)
|
|
14
|
-
# estimator.fit(training_samples, traininig_target_values)
|
|
15
|
-
# results = estimator.predict(testing_samples)
|
|
16
|
-
#
|
|
17
|
-
class KNeighborsRegressor
|
|
18
|
-
include Base::BaseEstimator
|
|
19
|
-
include Base::Regressor
|
|
20
|
-
|
|
21
|
-
# Return the prototypes for the nearest neighbor regressor.
|
|
22
|
-
# If the metric is 'precomputed', that returns nil.
|
|
23
|
-
# If the algorithm is 'vptree', that returns Rumale::NearestNeighbors::VPTree.
|
|
24
|
-
# @return [Numo::DFloat] (shape: [n_training_samples, n_features])
|
|
25
|
-
attr_reader :prototypes
|
|
26
|
-
|
|
27
|
-
# Return the values of the prototypes
|
|
28
|
-
# @return [Numo::DFloat] (shape: [n_training_samples, n_outputs])
|
|
29
|
-
attr_reader :values
|
|
30
|
-
|
|
31
|
-
# Create a new regressor with the nearest neighbor rule.
|
|
32
|
-
#
|
|
33
|
-
# @param n_neighbors [Integer] The number of neighbors.
|
|
34
|
-
# @param algorithm [String] The algorithm is used for finding the nearest neighbors.
|
|
35
|
-
# If algorithm is 'brute', brute-force search will be used.
|
|
36
|
-
# If algorithm is 'vptree', vantage point tree will be used.
|
|
37
|
-
# This parameter is ignored when metric parameter is 'precomputed'.
|
|
38
|
-
# @param metric [String] The metric to calculate the distances.
|
|
39
|
-
# If metric is 'euclidean', Euclidean distance is calculated for distance between points.
|
|
40
|
-
# If metric is 'precomputed', the fit and predict methods expect to be given a distance matrix.
|
|
41
|
-
def initialize(n_neighbors: 5, algorithm: 'brute', metric: 'euclidean')
|
|
42
|
-
check_params_numeric(n_neighbors: n_neighbors)
|
|
43
|
-
check_params_positive(n_neighbors: n_neighbors)
|
|
44
|
-
check_params_string(algorith: algorithm, metric: metric)
|
|
45
|
-
@params = {}
|
|
46
|
-
@params[:n_neighbors] = n_neighbors
|
|
47
|
-
@params[:algorithm] = algorithm == 'vptree' ? 'vptree' : 'brute'
|
|
48
|
-
@params[:metric] = metric == 'precomputed' ? 'precomputed' : 'euclidean'
|
|
49
|
-
@prototypes = nil
|
|
50
|
-
@values = nil
|
|
51
|
-
end
|
|
52
|
-
|
|
53
|
-
# Fit the model with given training data.
|
|
54
|
-
#
|
|
55
|
-
# @param x [Numo::DFloat] (shape: [n_training_samples, n_features]) The training data to be used for fitting the model.
|
|
56
|
-
# If the metric is 'precomputed', x must be a square distance matrix (shape: [n_training_samples, n_training_samples]).
|
|
57
|
-
# @param y [Numo::DFloat] (shape: [n_training_samples, n_outputs]) The target values to be used for fitting the model.
|
|
58
|
-
# @return [KNeighborsRegressor] The learned regressor itself.
|
|
59
|
-
def fit(x, y)
|
|
60
|
-
x = check_convert_sample_array(x)
|
|
61
|
-
y = check_convert_tvalue_array(y)
|
|
62
|
-
check_sample_tvalue_size(x, y)
|
|
63
|
-
raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
|
|
64
|
-
|
|
65
|
-
@prototypes = if @params[:metric] == 'euclidean'
|
|
66
|
-
if @params[:algorithm] == 'vptree'
|
|
67
|
-
VPTree.new(x)
|
|
68
|
-
else
|
|
69
|
-
x.dup
|
|
70
|
-
end
|
|
71
|
-
end
|
|
72
|
-
@values = y.dup
|
|
73
|
-
self
|
|
74
|
-
end
|
|
75
|
-
|
|
76
|
-
# Predict values for samples.
|
|
77
|
-
#
|
|
78
|
-
# @param x [Numo::DFloat] (shape: [n_testing_samples, n_features]) The samples to predict the values.
|
|
79
|
-
# If the metric is 'precomputed', x must be a square distance matrix (shape: [n_testing_samples, n_training_samples]).
|
|
80
|
-
# @return [Numo::DFloat] (shape: [n_testing_samples, n_outputs]) Predicted values per sample.
|
|
81
|
-
def predict(x)
|
|
82
|
-
x = check_convert_sample_array(x)
|
|
83
|
-
if @params[:metric] == 'precomputed' && x.shape[1] != @values.shape[0]
|
|
84
|
-
raise ArgumentError, 'Expect the size input matrix to be n_testing_samples-by-n_training_samples.'
|
|
85
|
-
end
|
|
86
|
-
|
|
87
|
-
# Initialize some variables.
|
|
88
|
-
n_samples = x.shape[0]
|
|
89
|
-
n_prototypes, n_outputs = @values.shape
|
|
90
|
-
n_neighbors = [@params[:n_neighbors], n_prototypes].min
|
|
91
|
-
# Predict values for the given samples.
|
|
92
|
-
if @params[:metric] == 'euclidean' && @params[:algorithm] == 'vptree'
|
|
93
|
-
neighbor_ids, = @prototypes.query(x, n_neighbors)
|
|
94
|
-
predicted_values = Array.new(n_samples) do |n|
|
|
95
|
-
n_outputs.nil? ? @values[neighbor_ids[n, true]].mean : @values[neighbor_ids[n, true], true].mean(0).to_a
|
|
96
|
-
end
|
|
97
|
-
else
|
|
98
|
-
distance_matrix = @params[:metric] == 'precomputed' ? x : PairwiseMetric.euclidean_distance(x, @prototypes)
|
|
99
|
-
predicted_values = Array.new(n_samples) do |n|
|
|
100
|
-
neighbor_ids = distance_matrix[n, true].to_a.each_with_index.sort.map(&:last)[0...n_neighbors]
|
|
101
|
-
n_outputs.nil? ? @values[neighbor_ids].mean : @values[neighbor_ids, true].mean(0).to_a
|
|
102
|
-
end
|
|
103
|
-
end
|
|
104
|
-
Numo::DFloat[*predicted_values]
|
|
105
|
-
end
|
|
106
|
-
end
|
|
107
|
-
end
|
|
108
|
-
end
|