rumale 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.coveralls.yml +1 -0
- data/.gitignore +20 -0
- data/.rspec +3 -0
- data/.rubocop.yml +47 -0
- data/.rubocop_todo.yml +58 -0
- data/.travis.yml +13 -0
- data/CHANGELOG.md +2 -0
- data/CODE_OF_CONDUCT.md +74 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +23 -0
- data/README.md +175 -0
- data/Rakefile +6 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/lib/rumale.rb +70 -0
- data/lib/rumale/base/base_estimator.rb +13 -0
- data/lib/rumale/base/classifier.rb +36 -0
- data/lib/rumale/base/cluster_analyzer.rb +31 -0
- data/lib/rumale/base/evaluator.rb +17 -0
- data/lib/rumale/base/regressor.rb +36 -0
- data/lib/rumale/base/splitter.rb +21 -0
- data/lib/rumale/base/transformer.rb +22 -0
- data/lib/rumale/clustering/dbscan.rb +125 -0
- data/lib/rumale/clustering/k_means.rb +138 -0
- data/lib/rumale/dataset.rb +110 -0
- data/lib/rumale/decomposition/nmf.rb +141 -0
- data/lib/rumale/decomposition/pca.rb +148 -0
- data/lib/rumale/ensemble/ada_boost_classifier.rb +196 -0
- data/lib/rumale/ensemble/ada_boost_regressor.rb +178 -0
- data/lib/rumale/ensemble/random_forest_classifier.rb +180 -0
- data/lib/rumale/ensemble/random_forest_regressor.rb +141 -0
- data/lib/rumale/evaluation_measure/accuracy.rb +29 -0
- data/lib/rumale/evaluation_measure/f_score.rb +50 -0
- data/lib/rumale/evaluation_measure/log_loss.rb +45 -0
- data/lib/rumale/evaluation_measure/mean_absolute_error.rb +29 -0
- data/lib/rumale/evaluation_measure/mean_squared_error.rb +29 -0
- data/lib/rumale/evaluation_measure/normalized_mutual_information.rb +62 -0
- data/lib/rumale/evaluation_measure/precision.rb +50 -0
- data/lib/rumale/evaluation_measure/precision_recall.rb +91 -0
- data/lib/rumale/evaluation_measure/purity.rb +40 -0
- data/lib/rumale/evaluation_measure/r2_score.rb +43 -0
- data/lib/rumale/evaluation_measure/recall.rb +50 -0
- data/lib/rumale/kernel_approximation/rbf.rb +121 -0
- data/lib/rumale/kernel_machine/kernel_svc.rb +193 -0
- data/lib/rumale/linear_model/base_linear_model.rb +89 -0
- data/lib/rumale/linear_model/lasso.rb +136 -0
- data/lib/rumale/linear_model/linear_regression.rb +110 -0
- data/lib/rumale/linear_model/logistic_regression.rb +159 -0
- data/lib/rumale/linear_model/ridge.rb +110 -0
- data/lib/rumale/linear_model/svc.rb +183 -0
- data/lib/rumale/linear_model/svr.rb +122 -0
- data/lib/rumale/model_selection/cross_validation.rb +123 -0
- data/lib/rumale/model_selection/grid_search_cv.rb +247 -0
- data/lib/rumale/model_selection/k_fold.rb +76 -0
- data/lib/rumale/model_selection/stratified_k_fold.rb +94 -0
- data/lib/rumale/multiclass/one_vs_rest_classifier.rb +100 -0
- data/lib/rumale/naive_bayes/naive_bayes.rb +315 -0
- data/lib/rumale/nearest_neighbors/k_neighbors_classifier.rb +111 -0
- data/lib/rumale/nearest_neighbors/k_neighbors_regressor.rb +93 -0
- data/lib/rumale/optimizer/nadam.rb +90 -0
- data/lib/rumale/optimizer/rmsprop.rb +69 -0
- data/lib/rumale/optimizer/sgd.rb +65 -0
- data/lib/rumale/optimizer/yellow_fin.rb +144 -0
- data/lib/rumale/pairwise_metric.rb +91 -0
- data/lib/rumale/pipeline/pipeline.rb +197 -0
- data/lib/rumale/polynomial_model/base_factorization_machine.rb +99 -0
- data/lib/rumale/polynomial_model/factorization_machine_classifier.rb +197 -0
- data/lib/rumale/polynomial_model/factorization_machine_regressor.rb +131 -0
- data/lib/rumale/preprocessing/l2_normalizer.rb +62 -0
- data/lib/rumale/preprocessing/label_encoder.rb +94 -0
- data/lib/rumale/preprocessing/min_max_scaler.rb +92 -0
- data/lib/rumale/preprocessing/one_hot_encoder.rb +98 -0
- data/lib/rumale/preprocessing/standard_scaler.rb +86 -0
- data/lib/rumale/probabilistic_output.rb +112 -0
- data/lib/rumale/tree/base_decision_tree.rb +153 -0
- data/lib/rumale/tree/decision_tree_classifier.rb +163 -0
- data/lib/rumale/tree/decision_tree_regressor.rb +135 -0
- data/lib/rumale/tree/node.rb +70 -0
- data/lib/rumale/utils.rb +37 -0
- data/lib/rumale/validation.rb +79 -0
- data/lib/rumale/values.rb +13 -0
- data/lib/rumale/version.rb +6 -0
- data/rumale.gemspec +41 -0
- metadata +204 -0
@@ -0,0 +1,111 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/base_estimator'
|
4
|
+
require 'rumale/base/classifier'
|
5
|
+
|
6
|
+
module Rumale
|
7
|
+
# This module consists of the classes that implement estimators based on nearest neighbors rule.
|
8
|
+
module NearestNeighbors
|
9
|
+
# KNeighborsClassifier is a class that implements the classifier with the k-nearest neighbors rule.
|
10
|
+
# The current implementation uses the Euclidean distance for finding the neighbors.
|
11
|
+
#
|
12
|
+
# @example
|
13
|
+
# estimator =
|
14
|
+
# Rumale::NearestNeighbors::KNeighborsClassifier.new(n_neighbors = 5)
|
15
|
+
# estimator.fit(training_samples, traininig_labels)
|
16
|
+
# results = estimator.predict(testing_samples)
|
17
|
+
#
|
18
|
+
class KNeighborsClassifier
|
19
|
+
include Base::BaseEstimator
|
20
|
+
include Base::Classifier
|
21
|
+
|
22
|
+
# Return the prototypes for the nearest neighbor classifier.
|
23
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_features])
|
24
|
+
attr_reader :prototypes
|
25
|
+
|
26
|
+
# Return the labels of the prototypes
|
27
|
+
# @return [Numo::Int32] (size: n_samples)
|
28
|
+
attr_reader :labels
|
29
|
+
|
30
|
+
# Return the class labels.
|
31
|
+
# @return [Numo::Int32] (size: n_classes)
|
32
|
+
attr_reader :classes
|
33
|
+
|
34
|
+
# Create a new classifier with the nearest neighbor rule.
|
35
|
+
#
|
36
|
+
# @param n_neighbors [Integer] The number of neighbors.
|
37
|
+
def initialize(n_neighbors: 5)
|
38
|
+
check_params_integer(n_neighbors: n_neighbors)
|
39
|
+
check_params_positive(n_neighbors: n_neighbors)
|
40
|
+
@params = {}
|
41
|
+
@params[:n_neighbors] = n_neighbors
|
42
|
+
@prototypes = nil
|
43
|
+
@labels = nil
|
44
|
+
@classes = nil
|
45
|
+
end
|
46
|
+
|
47
|
+
# Fit the model with given training data.
|
48
|
+
#
|
49
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
50
|
+
# @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
|
51
|
+
# @return [KNeighborsClassifier] The learned classifier itself.
|
52
|
+
def fit(x, y)
|
53
|
+
check_sample_array(x)
|
54
|
+
check_label_array(y)
|
55
|
+
check_sample_label_size(x, y)
|
56
|
+
@prototypes = Numo::DFloat.asarray(x.to_a)
|
57
|
+
@labels = Numo::Int32.asarray(y.to_a)
|
58
|
+
@classes = Numo::Int32.asarray(y.to_a.uniq.sort)
|
59
|
+
self
|
60
|
+
end
|
61
|
+
|
62
|
+
# Calculate confidence scores for samples.
|
63
|
+
#
|
64
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
|
65
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
|
66
|
+
def decision_function(x)
|
67
|
+
check_sample_array(x)
|
68
|
+
distance_matrix = PairwiseMetric.euclidean_distance(x, @prototypes)
|
69
|
+
n_samples, n_prototypes = distance_matrix.shape
|
70
|
+
n_classes = @classes.size
|
71
|
+
n_neighbors = [@params[:n_neighbors], n_prototypes].min
|
72
|
+
scores = Numo::DFloat.zeros(n_samples, n_classes)
|
73
|
+
n_samples.times do |m|
|
74
|
+
neighbor_ids = distance_matrix[m, true].to_a.each_with_index.sort.map(&:last)[0...n_neighbors]
|
75
|
+
neighbor_ids.each { |n| scores[m, @classes.to_a.index(@labels[n])] += 1.0 }
|
76
|
+
end
|
77
|
+
scores
|
78
|
+
end
|
79
|
+
|
80
|
+
# Predict class labels for samples.
|
81
|
+
#
|
82
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
|
83
|
+
# @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
|
84
|
+
def predict(x)
|
85
|
+
check_sample_array(x)
|
86
|
+
n_samples = x.shape.first
|
87
|
+
decision_values = decision_function(x)
|
88
|
+
Numo::Int32.asarray(Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] })
|
89
|
+
end
|
90
|
+
|
91
|
+
# Dump marshal data.
|
92
|
+
# @return [Hash] The marshal data about KNeighborsClassifier.
|
93
|
+
def marshal_dump
|
94
|
+
{ params: @params,
|
95
|
+
prototypes: @prototypes,
|
96
|
+
labels: @labels,
|
97
|
+
classes: @classes }
|
98
|
+
end
|
99
|
+
|
100
|
+
# Load marshal data.
|
101
|
+
# @return [nil]
|
102
|
+
def marshal_load(obj)
|
103
|
+
@params = obj[:params]
|
104
|
+
@prototypes = obj[:prototypes]
|
105
|
+
@labels = obj[:labels]
|
106
|
+
@classes = obj[:classes]
|
107
|
+
nil
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
@@ -0,0 +1,93 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/base_estimator'
|
4
|
+
require 'rumale/base/regressor'
|
5
|
+
|
6
|
+
module Rumale
|
7
|
+
module NearestNeighbors
|
8
|
+
# KNeighborsRegressor is a class that implements the regressor with the k-nearest neighbors rule.
|
9
|
+
# The current implementation uses the Euclidean distance for finding the neighbors.
|
10
|
+
#
|
11
|
+
# @example
|
12
|
+
# estimator =
|
13
|
+
# Rumale::NearestNeighbors::KNeighborsRegressor.new(n_neighbors = 5)
|
14
|
+
# estimator.fit(training_samples, traininig_target_values)
|
15
|
+
# results = estimator.predict(testing_samples)
|
16
|
+
#
|
17
|
+
class KNeighborsRegressor
|
18
|
+
include Base::BaseEstimator
|
19
|
+
include Base::Regressor
|
20
|
+
|
21
|
+
# Return the prototypes for the nearest neighbor regressor.
|
22
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_features])
|
23
|
+
attr_reader :prototypes
|
24
|
+
|
25
|
+
# Return the values of the prototypes
|
26
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_outputs])
|
27
|
+
attr_reader :values
|
28
|
+
|
29
|
+
# Create a new regressor with the nearest neighbor rule.
|
30
|
+
#
|
31
|
+
# @param n_neighbors [Integer] The number of neighbors.
|
32
|
+
def initialize(n_neighbors: 5)
|
33
|
+
check_params_integer(n_neighbors: n_neighbors)
|
34
|
+
check_params_positive(n_neighbors: n_neighbors)
|
35
|
+
@params = {}
|
36
|
+
@params[:n_neighbors] = n_neighbors
|
37
|
+
@prototypes = nil
|
38
|
+
@values = nil
|
39
|
+
end
|
40
|
+
|
41
|
+
# Fit the model with given training data.
|
42
|
+
#
|
43
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
44
|
+
# @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
|
45
|
+
# @return [KNeighborsRegressor] The learned regressor itself.
|
46
|
+
def fit(x, y)
|
47
|
+
check_sample_array(x)
|
48
|
+
check_tvalue_array(y)
|
49
|
+
check_sample_tvalue_size(x, y)
|
50
|
+
@prototypes = x.dup
|
51
|
+
@values = y.dup
|
52
|
+
self
|
53
|
+
end
|
54
|
+
|
55
|
+
# Predict values for samples.
|
56
|
+
#
|
57
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
|
58
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted values per sample.
|
59
|
+
def predict(x)
|
60
|
+
check_sample_array(x)
|
61
|
+
# Initialize some variables.
|
62
|
+
n_samples, = x.shape
|
63
|
+
n_prototypes, n_outputs = @values.shape
|
64
|
+
n_neighbors = [@params[:n_neighbors], n_prototypes].min
|
65
|
+
# Calculate distance matrix.
|
66
|
+
distance_matrix = PairwiseMetric.euclidean_distance(x, @prototypes)
|
67
|
+
# Predict values for the given samples.
|
68
|
+
predicted_values = Array.new(n_samples) do |n|
|
69
|
+
neighbor_ids = distance_matrix[n, true].to_a.each_with_index.sort.map(&:last)[0...n_neighbors]
|
70
|
+
n_outputs.nil? ? @values[neighbor_ids].mean : @values[neighbor_ids, true].mean(0).to_a
|
71
|
+
end
|
72
|
+
Numo::DFloat[*predicted_values]
|
73
|
+
end
|
74
|
+
|
75
|
+
# Dump marshal data.
|
76
|
+
# @return [Hash] The marshal data about KNeighborsRegressor.
|
77
|
+
def marshal_dump
|
78
|
+
{ params: @params,
|
79
|
+
prototypes: @prototypes,
|
80
|
+
values: @values }
|
81
|
+
end
|
82
|
+
|
83
|
+
# Load marshal data.
|
84
|
+
# @return [nil]
|
85
|
+
def marshal_load(obj)
|
86
|
+
@params = obj[:params]
|
87
|
+
@prototypes = obj[:prototypes]
|
88
|
+
@values = obj[:values]
|
89
|
+
nil
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
@@ -0,0 +1,90 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/validation'
|
4
|
+
require 'rumale/base/base_estimator'
|
5
|
+
|
6
|
+
module Rumale
|
7
|
+
# This module consists of the classes that implement optimizers adaptively tuning hyperparameters.
|
8
|
+
module Optimizer
|
9
|
+
# Nadam is a class that implements Nadam optimizer.
|
10
|
+
#
|
11
|
+
# @example
|
12
|
+
# optimizer = Rumale::Optimizer::Nadam.new(learning_rate: 0.01, momentum: 0.9, decay1: 0.9, decay2: 0.999)
|
13
|
+
# estimator = Rumale::LinearModel::LinearRegression.new(optimizer: optimizer, random_seed: 1)
|
14
|
+
# estimator.fit(samples, values)
|
15
|
+
#
|
16
|
+
# *Reference*
|
17
|
+
# - T. Dozat, "Incorporating Nesterov Momentum into Adam," Tech. Repo. Stanford University, 2015.
|
18
|
+
class Nadam
|
19
|
+
include Base::BaseEstimator
|
20
|
+
include Validation
|
21
|
+
|
22
|
+
# Create a new optimizer with Nadam
|
23
|
+
#
|
24
|
+
# @param learning_rate [Float] The initial value of learning rate.
|
25
|
+
# @param momentum [Float] The initial value of momentum.
|
26
|
+
# @param decay1 [Float] The smoothing parameter for the first moment.
|
27
|
+
# @param decay2 [Float] The smoothing parameter for the second moment.
|
28
|
+
def initialize(learning_rate: 0.01, momentum: 0.9, decay1: 0.9, decay2: 0.999)
|
29
|
+
check_params_float(learning_rate: learning_rate, momentum: momentum, decay1: decay1, decay2: decay2)
|
30
|
+
check_params_positive(learning_rate: learning_rate, momentum: momentum, decay1: decay1, decay2: decay2)
|
31
|
+
@params = {}
|
32
|
+
@params[:learning_rate] = learning_rate
|
33
|
+
@params[:momentum] = momentum
|
34
|
+
@params[:decay1] = decay1
|
35
|
+
@params[:decay2] = decay2
|
36
|
+
@fst_moment = nil
|
37
|
+
@sec_moment = nil
|
38
|
+
@decay1_prod = 1.0
|
39
|
+
@iter = 0
|
40
|
+
end
|
41
|
+
|
42
|
+
# Calculate the updated weight with Nadam adaptive learning rate.
|
43
|
+
#
|
44
|
+
# @param weight [Numo::DFloat] (shape: [n_features]) The weight to be updated.
|
45
|
+
# @param gradient [Numo::DFloat] (shape: [n_features]) The gradient for updating the weight.
|
46
|
+
# @return [Numo::DFloat] (shape: [n_feautres]) The updated weight.
|
47
|
+
def call(weight, gradient)
|
48
|
+
@fst_moment ||= Numo::DFloat.zeros(weight.shape[0])
|
49
|
+
@sec_moment ||= Numo::DFloat.zeros(weight.shape[0])
|
50
|
+
|
51
|
+
@iter += 1
|
52
|
+
|
53
|
+
decay1_curr = @params[:decay1] * (1.0 - 0.5 * 0.96**(@iter * 0.004))
|
54
|
+
decay1_next = @params[:decay1] * (1.0 - 0.5 * 0.96**((@iter + 1) * 0.004))
|
55
|
+
decay1_prod_curr = @decay1_prod * decay1_curr
|
56
|
+
decay1_prod_next = @decay1_prod * decay1_curr * decay1_next
|
57
|
+
@decay1_prod = decay1_prod_curr
|
58
|
+
|
59
|
+
@fst_moment = @params[:decay1] * @fst_moment + (1.0 - @params[:decay1]) * gradient
|
60
|
+
@sec_moment = @params[:decay2] * @sec_moment + (1.0 - @params[:decay2]) * gradient**2
|
61
|
+
nm_gradient = gradient / (1.0 - decay1_prod_curr)
|
62
|
+
nm_fst_moment = @fst_moment / (1.0 - decay1_prod_next)
|
63
|
+
nm_sec_moment = @sec_moment / (1.0 - @params[:decay2]**@iter)
|
64
|
+
|
65
|
+
weight - (@params[:learning_rate] / (nm_sec_moment**0.5 + 1e-8)) * ((1 - decay1_curr) * nm_gradient + decay1_next * nm_fst_moment)
|
66
|
+
end
|
67
|
+
|
68
|
+
# Dump marshal data.
|
69
|
+
# @return [Hash] The marshal data.
|
70
|
+
def marshal_dump
|
71
|
+
{ params: @params,
|
72
|
+
fst_moment: @fst_moment,
|
73
|
+
sec_moment: @sec_moment,
|
74
|
+
decay1_prod: @decay1_prod,
|
75
|
+
iter: @iter }
|
76
|
+
end
|
77
|
+
|
78
|
+
# Load marshal data.
|
79
|
+
# @return [nil]
|
80
|
+
def marshal_load(obj)
|
81
|
+
@params = obj[:params]
|
82
|
+
@fst_moment = obj[:fst_moment]
|
83
|
+
@sec_moment = obj[:sec_moment]
|
84
|
+
@decay1_prod = obj[:decay1_prod]
|
85
|
+
@iter = obj[:iter]
|
86
|
+
nil
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
@@ -0,0 +1,69 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/validation'
|
4
|
+
require 'rumale/base/base_estimator'
|
5
|
+
|
6
|
+
module Rumale
|
7
|
+
module Optimizer
|
8
|
+
# RMSProp is a class that implements RMSProp optimizer.
|
9
|
+
#
|
10
|
+
# @example
|
11
|
+
# optimizer = Rumale::Optimizer::RMSProp.new(learning_rate: 0.01, momentum: 0.9, decay: 0.9)
|
12
|
+
# estimator = Rumale::LinearModel::LinearRegression.new(optimizer: optimizer, random_seed: 1)
|
13
|
+
# estimator.fit(samples, values)
|
14
|
+
#
|
15
|
+
# *Reference*
|
16
|
+
# - I. Sutskever, J. Martens, G. Dahl, and G. Hinton, "On the importance of initialization and momentum in deep learning," Proc. ICML' 13, pp. 1139--1147, 2013.
|
17
|
+
# - G. Hinton, N. Srivastava, and K. Swersky, "Lecture 6e rmsprop," Neural Networks for Machine Learning, 2012.
|
18
|
+
class RMSProp
|
19
|
+
include Base::BaseEstimator
|
20
|
+
include Validation
|
21
|
+
|
22
|
+
# Create a new optimizer with RMSProp.
|
23
|
+
#
|
24
|
+
# @param learning_rate [Float] The initial value of learning rate.
|
25
|
+
# @param momentum [Float] The initial value of momentum.
|
26
|
+
# @param decay [Float] The smooting parameter.
|
27
|
+
def initialize(learning_rate: 0.01, momentum: 0.9, decay: 0.9)
|
28
|
+
check_params_float(learning_rate: learning_rate, momentum: momentum, decay: decay)
|
29
|
+
check_params_positive(learning_rate: learning_rate, momentum: momentum, decay: decay)
|
30
|
+
@params = {}
|
31
|
+
@params[:learning_rate] = learning_rate
|
32
|
+
@params[:momentum] = momentum
|
33
|
+
@params[:decay] = decay
|
34
|
+
@moment = nil
|
35
|
+
@update = nil
|
36
|
+
end
|
37
|
+
|
38
|
+
# Calculate the updated weight with RMSProp adaptive learning rate.
|
39
|
+
#
|
40
|
+
# @param weight [Numo::DFloat] (shape: [n_features]) The weight to be updated.
|
41
|
+
# @param gradient [Numo::DFloat] (shape: [n_features]) The gradient for updating the weight.
|
42
|
+
# @return [Numo::DFloat] (shape: [n_feautres]) The updated weight.
|
43
|
+
def call(weight, gradient)
|
44
|
+
@moment ||= Numo::DFloat.zeros(weight.shape[0])
|
45
|
+
@update ||= Numo::DFloat.zeros(weight.shape[0])
|
46
|
+
@moment = @params[:decay] * @moment + (1.0 - @params[:decay]) * gradient**2
|
47
|
+
@update = @params[:momentum] * @update - (@params[:learning_rate] / (@moment**0.5 + 1.0e-8)) * gradient
|
48
|
+
weight + @update
|
49
|
+
end
|
50
|
+
|
51
|
+
# Dump marshal data.
|
52
|
+
# @return [Hash] The marshal data.
|
53
|
+
def marshal_dump
|
54
|
+
{ params: @params,
|
55
|
+
moment: @moment,
|
56
|
+
update: @update }
|
57
|
+
end
|
58
|
+
|
59
|
+
# Load marshal data.
|
60
|
+
# @return [nil]
|
61
|
+
def marshal_load(obj)
|
62
|
+
@params = obj[:params]
|
63
|
+
@moment = obj[:moment]
|
64
|
+
@update = obj[:update]
|
65
|
+
nil
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
@@ -0,0 +1,65 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/validation'
|
4
|
+
require 'rumale/base/base_estimator'
|
5
|
+
|
6
|
+
module Rumale
|
7
|
+
module Optimizer
|
8
|
+
# SGD is a class that implements SGD optimizer.
|
9
|
+
#
|
10
|
+
# @example
|
11
|
+
# optimizer = Rumale::Optimizer::SGD.new(learning_rate: 0.01, momentum: 0.9, decay: 0.9)
|
12
|
+
# estimator = Rumale::LinearModel::LinearRegression.new(optimizer: optimizer, random_seed: 1)
|
13
|
+
# estimator.fit(samples, values)
|
14
|
+
class SGD
|
15
|
+
include Base::BaseEstimator
|
16
|
+
include Validation
|
17
|
+
|
18
|
+
# Create a new optimizer with SGD.
|
19
|
+
#
|
20
|
+
# @param learning_rate [Float] The initial value of learning rate.
|
21
|
+
# @param momentum [Float] The initial value of momentum.
|
22
|
+
# @param decay [Float] The smooting parameter.
|
23
|
+
def initialize(learning_rate: 0.01, momentum: 0.0, decay: 0.0)
|
24
|
+
check_params_float(learning_rate: learning_rate, momentum: momentum, decay: decay)
|
25
|
+
check_params_positive(learning_rate: learning_rate, momentum: momentum, decay: decay)
|
26
|
+
@params = {}
|
27
|
+
@params[:learning_rate] = learning_rate
|
28
|
+
@params[:momentum] = momentum
|
29
|
+
@params[:decay] = decay
|
30
|
+
@iter = 0
|
31
|
+
@update = nil
|
32
|
+
end
|
33
|
+
|
34
|
+
# Calculate the updated weight with SGD.
|
35
|
+
#
|
36
|
+
# @param weight [Numo::DFloat] (shape: [n_features]) The weight to be updated.
|
37
|
+
# @param gradient [Numo::DFloat] (shape: [n_features]) The gradient for updating the weight.
|
38
|
+
# @return [Numo::DFloat] (shape: [n_feautres]) The updated weight.
|
39
|
+
def call(weight, gradient)
|
40
|
+
@update ||= Numo::DFloat.zeros(weight.shape[0])
|
41
|
+
current_learning_rate = @params[:learning_rate] / (1.0 + @params[:decay] * @iter)
|
42
|
+
@iter += 1
|
43
|
+
@update = @params[:momentum] * @update - current_learning_rate * gradient
|
44
|
+
weight + @update
|
45
|
+
end
|
46
|
+
|
47
|
+
# Dump marshal data.
|
48
|
+
# @return [Hash] The marshal data.
|
49
|
+
def marshal_dump
|
50
|
+
{ params: @params,
|
51
|
+
iter: @iter,
|
52
|
+
update: @update }
|
53
|
+
end
|
54
|
+
|
55
|
+
# Load marshal data.
|
56
|
+
# @return [nil]
|
57
|
+
def marshal_load(obj)
|
58
|
+
@params = obj[:params]
|
59
|
+
@iter = obj[:iter]
|
60
|
+
@update = obj[:update]
|
61
|
+
nil
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
@@ -0,0 +1,144 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/validation'
|
4
|
+
require 'rumale/base/base_estimator'
|
5
|
+
|
6
|
+
module Rumale
|
7
|
+
module Optimizer
|
8
|
+
# YellowFin is a class that implements YellowFin optimizer.
|
9
|
+
#
|
10
|
+
# @example
|
11
|
+
# optimizer = Rumale::Optimizer::YellowFin.new(learning_rate: 0.01, momentum: 0.9, decay: 0.999, window_width: 20)
|
12
|
+
# estimator = Rumale::LinearModel::LinearRegression.new(optimizer: optimizer, random_seed: 1)
|
13
|
+
# estimator.fit(samples, values)
|
14
|
+
#
|
15
|
+
# *Reference*
|
16
|
+
# - J. Zhang and I. Mitliagkas, "YellowFin and the Art of Momentum Tuning," CoRR abs/1706.03471, 2017.
|
17
|
+
class YellowFin
|
18
|
+
include Base::BaseEstimator
|
19
|
+
include Validation
|
20
|
+
|
21
|
+
# Create a new optimizer with YellowFin.
|
22
|
+
#
|
23
|
+
# @param learning_rate [Float] The initial value of learning rate.
|
24
|
+
# @param momentum [Float] The initial value of momentum.
|
25
|
+
# @param decay [Float] The smooting parameter.
|
26
|
+
# @param window_width [Integer] The sliding window width for searching curvature range.
|
27
|
+
def initialize(learning_rate: 0.01, momentum: 0.9, decay: 0.999, window_width: 20)
|
28
|
+
check_params_float(learning_rate: learning_rate, momentum: momentum, decay: decay)
|
29
|
+
check_params_integer(window_width: window_width)
|
30
|
+
check_params_positive(learning_rate: learning_rate, momentum: momentum, decay: decay, window_width: window_width)
|
31
|
+
@params = {}
|
32
|
+
@params[:learning_rate] = learning_rate
|
33
|
+
@params[:momentum] = momentum
|
34
|
+
@params[:decay] = decay
|
35
|
+
@params[:window_width] = window_width
|
36
|
+
@smth_learning_rate = learning_rate
|
37
|
+
@smth_momentum = momentum
|
38
|
+
@grad_norms = nil
|
39
|
+
@grad_norm_min = 0.0
|
40
|
+
@grad_norm_max = 0.0
|
41
|
+
@grad_mean_sqr = 0.0
|
42
|
+
@grad_mean = 0.0
|
43
|
+
@grad_var = 0.0
|
44
|
+
@grad_norm_mean = 0.0
|
45
|
+
@curve_mean = 0.0
|
46
|
+
@distance_mean = 0.0
|
47
|
+
@update = nil
|
48
|
+
end
|
49
|
+
|
50
|
+
# Calculate the updated weight with adaptive momentum coefficient and learning rate.
|
51
|
+
#
|
52
|
+
# @param weight [Numo::DFloat] (shape: [n_features]) The weight to be updated.
|
53
|
+
# @param gradient [Numo::DFloat] (shape: [n_features]) The gradient for updating the weight.
|
54
|
+
# @return [Numo::DFloat] (shape: [n_feautres]) The updated weight.
|
55
|
+
def call(weight, gradient)
|
56
|
+
@update ||= Numo::DFloat.zeros(weight.shape[0])
|
57
|
+
curvature_range(gradient)
|
58
|
+
gradient_variance(gradient)
|
59
|
+
distance_to_optimum(gradient)
|
60
|
+
@smth_momentum = @params[:decay] * @smth_momentum + (1 - @params[:decay]) * current_momentum
|
61
|
+
@smth_learning_rate = @params[:decay] * @smth_learning_rate + (1 - @params[:decay]) * current_learning_rate
|
62
|
+
@update = @smth_momentum * @update - @smth_learning_rate * gradient
|
63
|
+
weight + @update
|
64
|
+
end
|
65
|
+
|
66
|
+
private
|
67
|
+
|
68
|
+
def current_momentum
|
69
|
+
dr = Math.sqrt(@grad_norm_max / @grad_norm_min + 1.0e-8)
|
70
|
+
[cubic_root**2, ((dr - 1) / (dr + 1))**2].max
|
71
|
+
end
|
72
|
+
|
73
|
+
def current_learning_rate
|
74
|
+
(1.0 - Math.sqrt(@params[:momentum]))**2 / (@grad_norm_min + 1.0e-8)
|
75
|
+
end
|
76
|
+
|
77
|
+
def cubic_root
|
78
|
+
p = (@distance_mean**2 * @grad_norm_min**2) / (2 * @grad_var + 1.0e-8)
|
79
|
+
w3 = (-Math.sqrt(p**2 + 4.fdiv(27) * p**3) - p).fdiv(2)
|
80
|
+
w = (w3 >= 0.0 ? 1 : -1) * w3.abs**1.fdiv(3)
|
81
|
+
y = w - p / (3 * w + 1.0e-8)
|
82
|
+
y + 1
|
83
|
+
end
|
84
|
+
|
85
|
+
def curvature_range(gradient)
|
86
|
+
@grad_norms ||= []
|
87
|
+
@grad_norms.push((gradient**2).sum)
|
88
|
+
@grad_norms.shift(@grad_norms.size - @params[:window_width]) if @grad_norms.size > @params[:window_width]
|
89
|
+
@grad_norm_min = @params[:decay] * @grad_norm_min + (1 - @params[:decay]) * @grad_norms.min
|
90
|
+
@grad_norm_max = @params[:decay] * @grad_norm_max + (1 - @params[:decay]) * @grad_norms.max
|
91
|
+
end
|
92
|
+
|
93
|
+
def gradient_variance(gradient)
|
94
|
+
@grad_mean_sqr = @params[:decay] * @grad_mean_sqr + (1 - @params[:decay]) * gradient**2
|
95
|
+
@grad_mean = @params[:decay] * @grad_mean + (1 - @params[:decay]) * gradient
|
96
|
+
@grad_var = (@grad_mean_sqr - @grad_mean**2).sum
|
97
|
+
end
|
98
|
+
|
99
|
+
def distance_to_optimum(gradient)
|
100
|
+
grad_sqr = (gradient**2).sum
|
101
|
+
@grad_norm_mean = @params[:decay] * @grad_norm_mean + (1 - @params[:decay]) * Math.sqrt(grad_sqr + 1.0e-8)
|
102
|
+
@curve_mean = @params[:decay] * @curve_mean + (1 - @params[:decay]) * grad_sqr
|
103
|
+
@distance_mean = @params[:decay] * @distance_mean + (1 - @params[:decay]) * (@grad_norm_mean / @curve_mean)
|
104
|
+
end
|
105
|
+
|
106
|
+
# Dump marshal data.
|
107
|
+
# @return [Hash] The marshal data.
|
108
|
+
def marshal_dump
|
109
|
+
{ params: @params,
|
110
|
+
smth_learning_rate: @smth_learning_rate,
|
111
|
+
smth_momentum: @smth_momentum,
|
112
|
+
grad_norms: @grad_norms,
|
113
|
+
grad_norm_min: @grad_norm_min,
|
114
|
+
grad_norm_max: @grad_norm_max,
|
115
|
+
grad_mean_sqr: @grad_mean_sqr,
|
116
|
+
grad_mean: @grad_mean,
|
117
|
+
grad_var: @grad_var,
|
118
|
+
grad_norm_mean: @grad_norm_mean,
|
119
|
+
curve_mean: @curve_mean,
|
120
|
+
distance_mean: @distance_mean,
|
121
|
+
update: @update }
|
122
|
+
end
|
123
|
+
|
124
|
+
# Load marshal data.
|
125
|
+
# @return [nis]
|
126
|
+
def marshal_load(obj)
|
127
|
+
@params = obj[:params]
|
128
|
+
@smth_learning_rate = obj[:smth_learning_rate]
|
129
|
+
@smth_momentum = obj[:smth_momentum]
|
130
|
+
@grad_norms = obj[:grad_norms]
|
131
|
+
@grad_norm_min = obj[:grad_norm_min]
|
132
|
+
@grad_norm_max = obj[:grad_norm_max]
|
133
|
+
@grad_mean_sqr = obj[:grad_mean_sqr]
|
134
|
+
@grad_mean = obj[:grad_mean]
|
135
|
+
@grad_var = obj[:grad_var]
|
136
|
+
@grad_norm_mean = obj[:grad_norm_mean]
|
137
|
+
@curve_mean = obj[:curve_mean]
|
138
|
+
@distance_mean = obj[:distance_mean]
|
139
|
+
@update = obj[:update]
|
140
|
+
nil
|
141
|
+
end
|
142
|
+
end
|
143
|
+
end
|
144
|
+
end
|