rumale 0.18.7 → 0.20.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +66 -1
- data/CHANGELOG.md +46 -0
- data/Gemfile +2 -0
- data/README.md +5 -36
- data/lib/rumale.rb +5 -10
- data/lib/rumale/clustering/hdbscan.rb +1 -1
- data/lib/rumale/clustering/k_means.rb +1 -1
- data/lib/rumale/clustering/k_medoids.rb +1 -1
- data/lib/rumale/clustering/mini_batch_k_means.rb +139 -0
- data/lib/rumale/dataset.rb +3 -3
- data/lib/rumale/decomposition/pca.rb +23 -5
- data/lib/rumale/feature_extraction/feature_hasher.rb +14 -1
- data/lib/rumale/feature_extraction/tfidf_transformer.rb +113 -0
- data/lib/rumale/kernel_approximation/nystroem.rb +1 -1
- data/lib/rumale/kernel_machine/kernel_svc.rb +1 -1
- data/lib/rumale/linear_model/base_sgd.rb +1 -1
- data/lib/rumale/metric_learning/neighbourhood_component_analysis.rb +13 -1
- data/lib/rumale/model_selection/cross_validation.rb +3 -2
- data/lib/rumale/model_selection/k_fold.rb +1 -1
- data/lib/rumale/model_selection/shuffle_split.rb +1 -1
- data/lib/rumale/multiclass/one_vs_rest_classifier.rb +2 -2
- data/lib/rumale/nearest_neighbors/vp_tree.rb +1 -1
- data/lib/rumale/neural_network/adam.rb +1 -1
- data/lib/rumale/neural_network/base_mlp.rb +1 -1
- data/lib/rumale/preprocessing/binarizer.rb +60 -0
- data/lib/rumale/preprocessing/l1_normalizer.rb +62 -0
- data/lib/rumale/preprocessing/l2_normalizer.rb +2 -1
- data/lib/rumale/preprocessing/max_normalizer.rb +62 -0
- data/lib/rumale/version.rb +1 -1
- data/rumale.gemspec +1 -3
- metadata +11 -44
- data/lib/rumale/linear_model/base_linear_model.rb +0 -101
- data/lib/rumale/optimizer/ada_grad.rb +0 -39
- data/lib/rumale/optimizer/adam.rb +0 -53
- data/lib/rumale/optimizer/nadam.rb +0 -62
- data/lib/rumale/optimizer/rmsprop.rb +0 -47
- data/lib/rumale/optimizer/sgd.rb +0 -43
- data/lib/rumale/optimizer/yellow_fin.rb +0 -101
- data/lib/rumale/polynomial_model/base_factorization_machine.rb +0 -121
- data/lib/rumale/polynomial_model/factorization_machine_classifier.rb +0 -215
- data/lib/rumale/polynomial_model/factorization_machine_regressor.rb +0 -129
@@ -0,0 +1,62 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/base_estimator'
|
4
|
+
require 'rumale/base/transformer'
|
5
|
+
|
6
|
+
module Rumale
|
7
|
+
module Preprocessing
|
8
|
+
# Normalize samples to unit L1-norm.
|
9
|
+
#
|
10
|
+
# @example
|
11
|
+
# normalizer = Rumale::Preprocessing::L1Normalizer.new
|
12
|
+
# new_samples = normalizer.fit_transform(samples)
|
13
|
+
class L1Normalizer
|
14
|
+
include Base::BaseEstimator
|
15
|
+
include Base::Transformer
|
16
|
+
|
17
|
+
# Return the vector consists of L1-norm for each sample.
|
18
|
+
# @return [Numo::DFloat] (shape: [n_samples])
|
19
|
+
attr_reader :norm_vec # :nodoc:
|
20
|
+
|
21
|
+
# Create a new normalizer for normaliing to L1-norm.
|
22
|
+
def initialize
|
23
|
+
@params = {}
|
24
|
+
@norm_vec = nil
|
25
|
+
end
|
26
|
+
|
27
|
+
# Calculate L1-norms of each sample.
|
28
|
+
#
|
29
|
+
# @overload fit(x) -> L1Normalizer
|
30
|
+
#
|
31
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate L1-norms.
|
32
|
+
# @return [L1Normalizer]
|
33
|
+
def fit(x, _y = nil)
|
34
|
+
x = check_convert_sample_array(x)
|
35
|
+
@norm_vec = x.abs.sum(1)
|
36
|
+
@norm_vec[@norm_vec.eq(0)] = 1
|
37
|
+
self
|
38
|
+
end
|
39
|
+
|
40
|
+
# Calculate L1-norms of each sample, and then normalize samples to L1-norm.
|
41
|
+
#
|
42
|
+
# @overload fit_transform(x) -> Numo::DFloat
|
43
|
+
#
|
44
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate L1-norms.
|
45
|
+
# @return [Numo::DFloat] The normalized samples.
|
46
|
+
def fit_transform(x, _y = nil)
|
47
|
+
x = check_convert_sample_array(x)
|
48
|
+
fit(x)
|
49
|
+
x / @norm_vec.expand_dims(1)
|
50
|
+
end
|
51
|
+
|
52
|
+
# Calculate L1-norms of each sample, and then normalize samples to L1-norm.
|
53
|
+
# This method calls the fit_transform method. This method exists for the Pipeline class.
|
54
|
+
#
|
55
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate L1-norms.
|
56
|
+
# @return [Numo::DFloat] The normalized samples.
|
57
|
+
def transform(x)
|
58
|
+
fit_transform(x)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
@@ -34,6 +34,7 @@ module Rumale
|
|
34
34
|
def fit(x, _y = nil)
|
35
35
|
x = check_convert_sample_array(x)
|
36
36
|
@norm_vec = Numo::NMath.sqrt((x**2).sum(1))
|
37
|
+
@norm_vec[@norm_vec.eq(0)] = 1
|
37
38
|
self
|
38
39
|
end
|
39
40
|
|
@@ -46,7 +47,7 @@ module Rumale
|
|
46
47
|
def fit_transform(x, _y = nil)
|
47
48
|
x = check_convert_sample_array(x)
|
48
49
|
fit(x)
|
49
|
-
x / @norm_vec.
|
50
|
+
x / @norm_vec.expand_dims(1)
|
50
51
|
end
|
51
52
|
|
52
53
|
# Calculate L2-norms of each sample, and then normalize samples to unit L2-norm.
|
@@ -0,0 +1,62 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/base_estimator'
|
4
|
+
require 'rumale/base/transformer'
|
5
|
+
|
6
|
+
module Rumale
|
7
|
+
module Preprocessing
|
8
|
+
# Normalize samples with the maximum of the absolute values.
|
9
|
+
#
|
10
|
+
# @example
|
11
|
+
# normalizer = Rumale::Preprocessing::MaxNormalizer.new
|
12
|
+
# new_samples = normalizer.fit_transform(samples)
|
13
|
+
class MaxNormalizer
|
14
|
+
include Base::BaseEstimator
|
15
|
+
include Base::Transformer
|
16
|
+
|
17
|
+
# Return the vector consists of the maximum norm for each sample.
|
18
|
+
# @return [Numo::DFloat] (shape: [n_samples])
|
19
|
+
attr_reader :norm_vec # :nodoc:
|
20
|
+
|
21
|
+
# Create a new normalizer for normaliing to max-norm.
|
22
|
+
def initialize
|
23
|
+
@params = {}
|
24
|
+
@norm_vec = nil
|
25
|
+
end
|
26
|
+
|
27
|
+
# Calculate the maximum norms of each sample.
|
28
|
+
#
|
29
|
+
# @overload fit(x) -> MaxNormalizer
|
30
|
+
#
|
31
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate the maximum norms.
|
32
|
+
# @return [MaxNormalizer]
|
33
|
+
def fit(x, _y = nil)
|
34
|
+
x = check_convert_sample_array(x)
|
35
|
+
@norm_vec = x.abs.max(1)
|
36
|
+
@norm_vec[@norm_vec.eq(0)] = 1
|
37
|
+
self
|
38
|
+
end
|
39
|
+
|
40
|
+
# Calculate the maximums norm of each sample, and then normalize samples with the norms.
|
41
|
+
#
|
42
|
+
# @overload fit_transform(x) -> Numo::DFloat
|
43
|
+
#
|
44
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate maximum norms.
|
45
|
+
# @return [Numo::DFloat] The normalized samples.
|
46
|
+
def fit_transform(x, _y = nil)
|
47
|
+
x = check_convert_sample_array(x)
|
48
|
+
fit(x)
|
49
|
+
x / @norm_vec.expand_dims(1)
|
50
|
+
end
|
51
|
+
|
52
|
+
# Calculate the maximum norms of each sample, and then normalize samples with the norms.
|
53
|
+
# This method calls the fit_transform method. This method exists for the Pipeline class.
|
54
|
+
#
|
55
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate maximum norms.
|
56
|
+
# @return [Numo::DFloat] The normalized samples.
|
57
|
+
def transform(x)
|
58
|
+
fit_transform(x)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
data/lib/rumale/version.rb
CHANGED
data/rumale.gemspec
CHANGED
@@ -16,7 +16,7 @@ Gem::Specification.new do |spec|
|
|
16
16
|
Rumale is a machine learning library in Ruby.
|
17
17
|
Rumale provides machine learning algorithms with interfaces similar to Scikit-Learn in Python.
|
18
18
|
Rumale supports Support Vector Machine,
|
19
|
-
Logistic Regression, Ridge, Lasso,
|
19
|
+
Logistic Regression, Ridge, Lasso,
|
20
20
|
Multi-layer Perceptron,
|
21
21
|
Naive Bayes, Decision Tree, Gradient Tree Boosting, Random Forest,
|
22
22
|
K-Means, Gaussian Mixture Model, DBSCAN, Spectral Clustering,
|
@@ -45,6 +45,4 @@ Gem::Specification.new do |spec|
|
|
45
45
|
}
|
46
46
|
|
47
47
|
spec.add_runtime_dependency 'numo-narray', '>= 0.9.1'
|
48
|
-
spec.add_runtime_dependency 'mopti', '>= 0.1.0'
|
49
|
-
spec.add_runtime_dependency 'mmh3', '>= 0.1.0'
|
50
48
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rumale
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.20.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-08-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: numo-narray
|
@@ -24,39 +24,11 @@ dependencies:
|
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: 0.9.1
|
27
|
-
- !ruby/object:Gem::Dependency
|
28
|
-
name: mopti
|
29
|
-
requirement: !ruby/object:Gem::Requirement
|
30
|
-
requirements:
|
31
|
-
- - ">="
|
32
|
-
- !ruby/object:Gem::Version
|
33
|
-
version: 0.1.0
|
34
|
-
type: :runtime
|
35
|
-
prerelease: false
|
36
|
-
version_requirements: !ruby/object:Gem::Requirement
|
37
|
-
requirements:
|
38
|
-
- - ">="
|
39
|
-
- !ruby/object:Gem::Version
|
40
|
-
version: 0.1.0
|
41
|
-
- !ruby/object:Gem::Dependency
|
42
|
-
name: mmh3
|
43
|
-
requirement: !ruby/object:Gem::Requirement
|
44
|
-
requirements:
|
45
|
-
- - ">="
|
46
|
-
- !ruby/object:Gem::Version
|
47
|
-
version: 0.1.0
|
48
|
-
type: :runtime
|
49
|
-
prerelease: false
|
50
|
-
version_requirements: !ruby/object:Gem::Requirement
|
51
|
-
requirements:
|
52
|
-
- - ">="
|
53
|
-
- !ruby/object:Gem::Version
|
54
|
-
version: 0.1.0
|
55
27
|
description: |
|
56
28
|
Rumale is a machine learning library in Ruby.
|
57
29
|
Rumale provides machine learning algorithms with interfaces similar to Scikit-Learn in Python.
|
58
30
|
Rumale supports Support Vector Machine,
|
59
|
-
Logistic Regression, Ridge, Lasso,
|
31
|
+
Logistic Regression, Ridge, Lasso,
|
60
32
|
Multi-layer Perceptron,
|
61
33
|
Naive Bayes, Decision Tree, Gradient Tree Boosting, Random Forest,
|
62
34
|
K-Means, Gaussian Mixture Model, DBSCAN, Spectral Clustering,
|
@@ -100,6 +72,7 @@ files:
|
|
100
72
|
- lib/rumale/clustering/hdbscan.rb
|
101
73
|
- lib/rumale/clustering/k_means.rb
|
102
74
|
- lib/rumale/clustering/k_medoids.rb
|
75
|
+
- lib/rumale/clustering/mini_batch_k_means.rb
|
103
76
|
- lib/rumale/clustering/power_iteration.rb
|
104
77
|
- lib/rumale/clustering/single_linkage.rb
|
105
78
|
- lib/rumale/clustering/snn.rb
|
@@ -140,13 +113,13 @@ files:
|
|
140
113
|
- lib/rumale/evaluation_measure/silhouette_score.rb
|
141
114
|
- lib/rumale/feature_extraction/feature_hasher.rb
|
142
115
|
- lib/rumale/feature_extraction/hash_vectorizer.rb
|
116
|
+
- lib/rumale/feature_extraction/tfidf_transformer.rb
|
143
117
|
- lib/rumale/kernel_approximation/nystroem.rb
|
144
118
|
- lib/rumale/kernel_approximation/rbf.rb
|
145
119
|
- lib/rumale/kernel_machine/kernel_fda.rb
|
146
120
|
- lib/rumale/kernel_machine/kernel_pca.rb
|
147
121
|
- lib/rumale/kernel_machine/kernel_ridge.rb
|
148
122
|
- lib/rumale/kernel_machine/kernel_svc.rb
|
149
|
-
- lib/rumale/linear_model/base_linear_model.rb
|
150
123
|
- lib/rumale/linear_model/base_sgd.rb
|
151
124
|
- lib/rumale/linear_model/elastic_net.rb
|
152
125
|
- lib/rumale/linear_model/lasso.rb
|
@@ -180,23 +153,17 @@ files:
|
|
180
153
|
- lib/rumale/neural_network/base_mlp.rb
|
181
154
|
- lib/rumale/neural_network/mlp_classifier.rb
|
182
155
|
- lib/rumale/neural_network/mlp_regressor.rb
|
183
|
-
- lib/rumale/optimizer/ada_grad.rb
|
184
|
-
- lib/rumale/optimizer/adam.rb
|
185
|
-
- lib/rumale/optimizer/nadam.rb
|
186
|
-
- lib/rumale/optimizer/rmsprop.rb
|
187
|
-
- lib/rumale/optimizer/sgd.rb
|
188
|
-
- lib/rumale/optimizer/yellow_fin.rb
|
189
156
|
- lib/rumale/pairwise_metric.rb
|
190
157
|
- lib/rumale/pipeline/feature_union.rb
|
191
158
|
- lib/rumale/pipeline/pipeline.rb
|
192
|
-
- lib/rumale/polynomial_model/base_factorization_machine.rb
|
193
|
-
- lib/rumale/polynomial_model/factorization_machine_classifier.rb
|
194
|
-
- lib/rumale/polynomial_model/factorization_machine_regressor.rb
|
195
159
|
- lib/rumale/preprocessing/bin_discretizer.rb
|
160
|
+
- lib/rumale/preprocessing/binarizer.rb
|
161
|
+
- lib/rumale/preprocessing/l1_normalizer.rb
|
196
162
|
- lib/rumale/preprocessing/l2_normalizer.rb
|
197
163
|
- lib/rumale/preprocessing/label_binarizer.rb
|
198
164
|
- lib/rumale/preprocessing/label_encoder.rb
|
199
165
|
- lib/rumale/preprocessing/max_abs_scaler.rb
|
166
|
+
- lib/rumale/preprocessing/max_normalizer.rb
|
200
167
|
- lib/rumale/preprocessing/min_max_scaler.rb
|
201
168
|
- lib/rumale/preprocessing/one_hot_encoder.rb
|
202
169
|
- lib/rumale/preprocessing/ordinal_encoder.rb
|
@@ -224,7 +191,7 @@ metadata:
|
|
224
191
|
source_code_uri: https://github.com/yoshoku/rumale
|
225
192
|
documentation_uri: https://yoshoku.github.io/rumale/doc/
|
226
193
|
bug_tracker_uri: https://github.com/yoshoku/rumale/issues
|
227
|
-
post_install_message:
|
194
|
+
post_install_message:
|
228
195
|
rdoc_options: []
|
229
196
|
require_paths:
|
230
197
|
- lib
|
@@ -240,7 +207,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
240
207
|
version: '0'
|
241
208
|
requirements: []
|
242
209
|
rubygems_version: 3.1.2
|
243
|
-
signing_key:
|
210
|
+
signing_key:
|
244
211
|
specification_version: 4
|
245
212
|
summary: Rumale is a machine learning library in Ruby. Rumale provides machine learning
|
246
213
|
algorithms with interfaces similar to Scikit-Learn in Python.
|
@@ -1,101 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'rumale/base/base_estimator'
|
4
|
-
require 'rumale/optimizer/nadam'
|
5
|
-
|
6
|
-
module Rumale
|
7
|
-
module LinearModel
|
8
|
-
# @note
|
9
|
-
# In version 0.17.0, a new linear model abstract class called BaseSGD is introduced.
|
10
|
-
# BaseLienarModel is deprecated and will be removed in the future.
|
11
|
-
#
|
12
|
-
# BaseLinearModel is an abstract class for implementation of linear estimator
|
13
|
-
# with mini-batch stochastic gradient descent optimization.
|
14
|
-
# This class is used for internal process.
|
15
|
-
class BaseLinearModel
|
16
|
-
# :nocov:
|
17
|
-
include Base::BaseEstimator
|
18
|
-
|
19
|
-
# Initialize a linear estimator.
|
20
|
-
#
|
21
|
-
# @param reg_param [Float] The regularization parameter.
|
22
|
-
# @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
|
23
|
-
# @param bias_scale [Float] The scale of the bias term.
|
24
|
-
# @param max_iter [Integer] The maximum number of iterations.
|
25
|
-
# @param batch_size [Integer] The size of the mini batches.
|
26
|
-
# @param optimizer [Optimizer] The optimizer to calculate adaptive learning rate.
|
27
|
-
# If nil is given, Nadam is used.
|
28
|
-
# @param n_jobs [Integer] The number of jobs for running the fit and predict methods in parallel.
|
29
|
-
# If nil is given, the methods do not execute in parallel.
|
30
|
-
# If zero or less is given, it becomes equal to the number of processors.
|
31
|
-
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
32
|
-
def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0,
|
33
|
-
max_iter: 1000, batch_size: 10, optimizer: nil, n_jobs: nil, random_seed: nil)
|
34
|
-
warn 'warning: BaseLinearModel is deprecated. Use BaseSGD instead.'
|
35
|
-
@params = {}
|
36
|
-
@params[:reg_param] = reg_param
|
37
|
-
@params[:fit_bias] = fit_bias
|
38
|
-
@params[:bias_scale] = bias_scale
|
39
|
-
@params[:max_iter] = max_iter
|
40
|
-
@params[:batch_size] = batch_size
|
41
|
-
@params[:optimizer] = optimizer
|
42
|
-
@params[:optimizer] ||= Rumale::Optimizer::Nadam.new
|
43
|
-
@params[:n_jobs] = n_jobs
|
44
|
-
@params[:random_seed] = random_seed
|
45
|
-
@params[:random_seed] ||= srand
|
46
|
-
@weight_vec = nil
|
47
|
-
@bias_term = nil
|
48
|
-
@rng = Random.new(@params[:random_seed])
|
49
|
-
end
|
50
|
-
|
51
|
-
private
|
52
|
-
|
53
|
-
def partial_fit(x, y)
|
54
|
-
# Expand feature vectors for bias term.
|
55
|
-
samples = @params[:fit_bias] ? expand_feature(x) : x
|
56
|
-
# Initialize some variables.
|
57
|
-
n_samples, n_features = samples.shape
|
58
|
-
rand_ids = [*0...n_samples].shuffle(random: @rng.dup)
|
59
|
-
weight = Numo::DFloat.zeros(n_features)
|
60
|
-
optimizer = @params[:optimizer].dup
|
61
|
-
# Optimization.
|
62
|
-
@params[:max_iter].times do |_t|
|
63
|
-
# Random sampling
|
64
|
-
subset_ids = rand_ids.shift(@params[:batch_size])
|
65
|
-
rand_ids.concat(subset_ids)
|
66
|
-
sub_samples = samples[subset_ids, true]
|
67
|
-
sub_targets = y[subset_ids]
|
68
|
-
# Update weight.
|
69
|
-
loss_gradient = calc_loss_gradient(sub_samples, sub_targets, weight)
|
70
|
-
next if loss_gradient.ne(0.0).count.zero?
|
71
|
-
|
72
|
-
weight = calc_new_weight(optimizer, sub_samples, weight, loss_gradient)
|
73
|
-
end
|
74
|
-
split_weight(weight)
|
75
|
-
end
|
76
|
-
|
77
|
-
def calc_loss_gradient(_x, _y, _weight)
|
78
|
-
raise NotImplementedError, "#{__method__} has to be implemented in #{self.class}."
|
79
|
-
end
|
80
|
-
|
81
|
-
def calc_new_weight(optimizer, x, weight, loss_gradient)
|
82
|
-
weight_gradient = x.transpose.dot(loss_gradient) / @params[:batch_size] + @params[:reg_param] * weight
|
83
|
-
optimizer.call(weight, weight_gradient)
|
84
|
-
end
|
85
|
-
|
86
|
-
def expand_feature(x)
|
87
|
-
n_samples = x.shape[0]
|
88
|
-
Numo::NArray.hstack([x, Numo::DFloat.ones([n_samples, 1]) * @params[:bias_scale]])
|
89
|
-
end
|
90
|
-
|
91
|
-
def split_weight(weight)
|
92
|
-
if @params[:fit_bias]
|
93
|
-
[weight[0...-1].dup, weight[-1]]
|
94
|
-
else
|
95
|
-
[weight, 0.0]
|
96
|
-
end
|
97
|
-
end
|
98
|
-
# :nocov:
|
99
|
-
end
|
100
|
-
end
|
101
|
-
end
|
@@ -1,39 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'rumale/validation'
|
4
|
-
require 'rumale/base/base_estimator'
|
5
|
-
|
6
|
-
module Rumale
|
7
|
-
module Optimizer
|
8
|
-
# AdaGrad is a class that implements AdaGrad optimizer.
|
9
|
-
#
|
10
|
-
# *Reference*
|
11
|
-
# - Duchi, J., Hazan, E., and Singer, Y., "Adaptive Subgradient Methods for Online Learning and Stochastic Optimization," J. Machine Learning Research, vol. 12, pp. 2121--2159, 2011.
|
12
|
-
class AdaGrad
|
13
|
-
include Base::BaseEstimator
|
14
|
-
include Validation
|
15
|
-
|
16
|
-
# Create a new optimizer with AdaGrad.
|
17
|
-
#
|
18
|
-
# @param learning_rate [Float] The initial value of learning rate.
|
19
|
-
def initialize(learning_rate: 0.01)
|
20
|
-
check_params_numeric(learning_rate: learning_rate)
|
21
|
-
check_params_positive(learning_rate: learning_rate)
|
22
|
-
@params = {}
|
23
|
-
@params[:learning_rate] = learning_rate
|
24
|
-
@moment = nil
|
25
|
-
end
|
26
|
-
|
27
|
-
# Calculate the updated weight with AdaGrad adaptive learning rate.
|
28
|
-
#
|
29
|
-
# @param weight [Numo::DFloat] (shape: [n_features]) The weight to be updated.
|
30
|
-
# @param gradient [Numo::DFloat] (shape: [n_features]) The gradient for updating the weight.
|
31
|
-
# @return [Numo::DFloat] (shape: [n_feautres]) The updated weight.
|
32
|
-
def call(weight, gradient)
|
33
|
-
@moment ||= Numo::DFloat.zeros(weight.shape[0])
|
34
|
-
@moment += gradient**2
|
35
|
-
weight - (@params[:learning_rate] / (@moment**0.5 + 1.0e-8)) * gradient
|
36
|
-
end
|
37
|
-
end
|
38
|
-
end
|
39
|
-
end
|
@@ -1,53 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'rumale/validation'
|
4
|
-
require 'rumale/base/base_estimator'
|
5
|
-
|
6
|
-
module Rumale
|
7
|
-
module Optimizer
|
8
|
-
# Adam is a class that implements Adam optimizer.
|
9
|
-
#
|
10
|
-
# *Reference*
|
11
|
-
# - Kingma, D P., and Ba, J., "Adam: A Method for Stochastic Optimization," Proc. ICLR'15, 2015.
|
12
|
-
class Adam
|
13
|
-
include Base::BaseEstimator
|
14
|
-
include Validation
|
15
|
-
|
16
|
-
# Create a new optimizer with Adam
|
17
|
-
#
|
18
|
-
# @param learning_rate [Float] The initial value of learning rate.
|
19
|
-
# @param decay1 [Float] The smoothing parameter for the first moment.
|
20
|
-
# @param decay2 [Float] The smoothing parameter for the second moment.
|
21
|
-
def initialize(learning_rate: 0.001, decay1: 0.9, decay2: 0.999)
|
22
|
-
check_params_numeric(learning_rate: learning_rate, decay1: decay1, decay2: decay2)
|
23
|
-
check_params_positive(learning_rate: learning_rate, decay1: decay1, decay2: decay2)
|
24
|
-
@params = {}
|
25
|
-
@params[:learning_rate] = learning_rate
|
26
|
-
@params[:decay1] = decay1
|
27
|
-
@params[:decay2] = decay2
|
28
|
-
@fst_moment = nil
|
29
|
-
@sec_moment = nil
|
30
|
-
@iter = 0
|
31
|
-
end
|
32
|
-
|
33
|
-
# Calculate the updated weight with Nadam adaptive learning rate.
|
34
|
-
#
|
35
|
-
# @param weight [Numo::DFloat] (shape: [n_features]) The weight to be updated.
|
36
|
-
# @param gradient [Numo::DFloat] (shape: [n_features]) The gradient for updating the weight.
|
37
|
-
# @return [Numo::DFloat] (shape: [n_feautres]) The updated weight.
|
38
|
-
def call(weight, gradient)
|
39
|
-
@fst_moment ||= Numo::DFloat.zeros(weight.shape)
|
40
|
-
@sec_moment ||= Numo::DFloat.zeros(weight.shape)
|
41
|
-
|
42
|
-
@iter += 1
|
43
|
-
|
44
|
-
@fst_moment = @params[:decay1] * @fst_moment + (1.0 - @params[:decay1]) * gradient
|
45
|
-
@sec_moment = @params[:decay2] * @sec_moment + (1.0 - @params[:decay2]) * gradient**2
|
46
|
-
nm_fst_moment = @fst_moment / (1.0 - @params[:decay1]**@iter)
|
47
|
-
nm_sec_moment = @sec_moment / (1.0 - @params[:decay2]**@iter)
|
48
|
-
|
49
|
-
weight - @params[:learning_rate] * nm_fst_moment / (nm_sec_moment**0.5 + 1e-8)
|
50
|
-
end
|
51
|
-
end
|
52
|
-
end
|
53
|
-
end
|