rumale 0.19.3 → 0.20.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/lib/rumale.rb +0 -10
- data/lib/rumale/version.rb +1 -1
- metadata +2 -12
- data/lib/rumale/linear_model/base_linear_model.rb +0 -102
- data/lib/rumale/optimizer/ada_grad.rb +0 -42
- data/lib/rumale/optimizer/adam.rb +0 -56
- data/lib/rumale/optimizer/nadam.rb +0 -67
- data/lib/rumale/optimizer/rmsprop.rb +0 -50
- data/lib/rumale/optimizer/sgd.rb +0 -46
- data/lib/rumale/optimizer/yellow_fin.rb +0 -104
- data/lib/rumale/polynomial_model/base_factorization_machine.rb +0 -125
- data/lib/rumale/polynomial_model/factorization_machine_classifier.rb +0 -220
- data/lib/rumale/polynomial_model/factorization_machine_regressor.rb +0 -134
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 358515f8785eb3de2e6571a957ca76cece6b774bb022c1a0951c92d44ab422b4
|
4
|
+
data.tar.gz: '0289b7eb382cd3300845412af0fd43626f4f827bb719083c879b574e3ab37eb0'
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f03fc0f27f99ed4acea3fb7d7bf34017c1dbf923b20dabc9a78d6d44f0b151bc9dc78ba24d122f81607a43fd1852e398a603b75b87656a2f79109f87c0db0d98
|
7
|
+
data.tar.gz: 69f6b8892f6bfb4c43706513245c3fba687dcb6a347c1c5185a70d5e45a024b2848a019bfae48726e1f49212878e8d6d67c811ec5f4a990fdbb3a2841efdfe9b
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,7 @@
|
|
1
|
+
# 0.20.0
|
2
|
+
## Breaking changes
|
3
|
+
- Delete deprecated estimators such as PolynomialModel, Optimizer, and BaseLinearModel.
|
4
|
+
|
1
5
|
# 0.19.3
|
2
6
|
- Add preprocessing class for [Binarizer](https://yoshoku.github.io/rumale/doc/Rumale/Preprocessing/Binarizer.html)
|
3
7
|
- Add preprocessing class for [MaxNormalizer](https://yoshoku.github.io/rumale/doc/Rumale/Preprocessing/MaxNormalizer.html)
|
@@ -13,6 +17,7 @@
|
|
13
17
|
- Fix some typos.
|
14
18
|
|
15
19
|
# 0.19.0
|
20
|
+
## Breaking changes
|
16
21
|
- Change mmh3 and mopti gem to non-runtime dependent library.
|
17
22
|
- The mmh3 gem is used in [FeatureHasher](https://yoshoku.github.io/rumale/doc/Rumale/FeatureExtraction/FeatureHasher.html).
|
18
23
|
You only need to require mmh3 gem when using FeatureHasher.
|
data/lib/rumale.rb
CHANGED
@@ -18,17 +18,10 @@ require 'rumale/base/cluster_analyzer'
|
|
18
18
|
require 'rumale/base/transformer'
|
19
19
|
require 'rumale/base/splitter'
|
20
20
|
require 'rumale/base/evaluator'
|
21
|
-
require 'rumale/optimizer/sgd'
|
22
|
-
require 'rumale/optimizer/ada_grad'
|
23
|
-
require 'rumale/optimizer/rmsprop'
|
24
|
-
require 'rumale/optimizer/adam'
|
25
|
-
require 'rumale/optimizer/nadam'
|
26
|
-
require 'rumale/optimizer/yellow_fin'
|
27
21
|
require 'rumale/pipeline/pipeline'
|
28
22
|
require 'rumale/pipeline/feature_union'
|
29
23
|
require 'rumale/kernel_approximation/rbf'
|
30
24
|
require 'rumale/kernel_approximation/nystroem'
|
31
|
-
require 'rumale/linear_model/base_linear_model'
|
32
25
|
require 'rumale/linear_model/base_sgd'
|
33
26
|
require 'rumale/linear_model/svc'
|
34
27
|
require 'rumale/linear_model/svr'
|
@@ -41,9 +34,6 @@ require 'rumale/kernel_machine/kernel_svc'
|
|
41
34
|
require 'rumale/kernel_machine/kernel_pca'
|
42
35
|
require 'rumale/kernel_machine/kernel_fda'
|
43
36
|
require 'rumale/kernel_machine/kernel_ridge'
|
44
|
-
require 'rumale/polynomial_model/base_factorization_machine'
|
45
|
-
require 'rumale/polynomial_model/factorization_machine_classifier'
|
46
|
-
require 'rumale/polynomial_model/factorization_machine_regressor'
|
47
37
|
require 'rumale/multiclass/one_vs_rest_classifier'
|
48
38
|
require 'rumale/nearest_neighbors/vp_tree'
|
49
39
|
require 'rumale/nearest_neighbors/k_neighbors_classifier'
|
data/lib/rumale/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rumale
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.20.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-08-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: numo-narray
|
@@ -120,7 +120,6 @@ files:
|
|
120
120
|
- lib/rumale/kernel_machine/kernel_pca.rb
|
121
121
|
- lib/rumale/kernel_machine/kernel_ridge.rb
|
122
122
|
- lib/rumale/kernel_machine/kernel_svc.rb
|
123
|
-
- lib/rumale/linear_model/base_linear_model.rb
|
124
123
|
- lib/rumale/linear_model/base_sgd.rb
|
125
124
|
- lib/rumale/linear_model/elastic_net.rb
|
126
125
|
- lib/rumale/linear_model/lasso.rb
|
@@ -154,18 +153,9 @@ files:
|
|
154
153
|
- lib/rumale/neural_network/base_mlp.rb
|
155
154
|
- lib/rumale/neural_network/mlp_classifier.rb
|
156
155
|
- lib/rumale/neural_network/mlp_regressor.rb
|
157
|
-
- lib/rumale/optimizer/ada_grad.rb
|
158
|
-
- lib/rumale/optimizer/adam.rb
|
159
|
-
- lib/rumale/optimizer/nadam.rb
|
160
|
-
- lib/rumale/optimizer/rmsprop.rb
|
161
|
-
- lib/rumale/optimizer/sgd.rb
|
162
|
-
- lib/rumale/optimizer/yellow_fin.rb
|
163
156
|
- lib/rumale/pairwise_metric.rb
|
164
157
|
- lib/rumale/pipeline/feature_union.rb
|
165
158
|
- lib/rumale/pipeline/pipeline.rb
|
166
|
-
- lib/rumale/polynomial_model/base_factorization_machine.rb
|
167
|
-
- lib/rumale/polynomial_model/factorization_machine_classifier.rb
|
168
|
-
- lib/rumale/polynomial_model/factorization_machine_regressor.rb
|
169
159
|
- lib/rumale/preprocessing/bin_discretizer.rb
|
170
160
|
- lib/rumale/preprocessing/binarizer.rb
|
171
161
|
- lib/rumale/preprocessing/l1_normalizer.rb
|
@@ -1,102 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'rumale/base/base_estimator'
|
4
|
-
require 'rumale/optimizer/nadam'
|
5
|
-
|
6
|
-
module Rumale
|
7
|
-
module LinearModel
|
8
|
-
# @note
|
9
|
-
# In version 0.17.0, a new linear model abstract class called BaseSGD is introduced.
|
10
|
-
# BaseLienarModel is deprecated and will be removed in the future.
|
11
|
-
# @deprecated Use BaseSGD class instead. This class will be deleted in version 0.20.0.
|
12
|
-
#
|
13
|
-
# BaseLinearModel is an abstract class for implementation of linear estimator
|
14
|
-
# with mini-batch stochastic gradient descent optimization.
|
15
|
-
# This class is used for internal process.
|
16
|
-
class BaseLinearModel
|
17
|
-
# :nocov:
|
18
|
-
include Base::BaseEstimator
|
19
|
-
|
20
|
-
# Initialize a linear estimator.
|
21
|
-
#
|
22
|
-
# @param reg_param [Float] The regularization parameter.
|
23
|
-
# @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
|
24
|
-
# @param bias_scale [Float] The scale of the bias term.
|
25
|
-
# @param max_iter [Integer] The maximum number of iterations.
|
26
|
-
# @param batch_size [Integer] The size of the mini batches.
|
27
|
-
# @param optimizer [Optimizer] The optimizer to calculate adaptive learning rate.
|
28
|
-
# If nil is given, Nadam is used.
|
29
|
-
# @param n_jobs [Integer] The number of jobs for running the fit and predict methods in parallel.
|
30
|
-
# If nil is given, the methods do not execute in parallel.
|
31
|
-
# If zero or less is given, it becomes equal to the number of processors.
|
32
|
-
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
33
|
-
def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0,
|
34
|
-
max_iter: 1000, batch_size: 10, optimizer: nil, n_jobs: nil, random_seed: nil)
|
35
|
-
warn 'warning: BaseLinearModel is deprecated. Use BaseSGD instead.'
|
36
|
-
@params = {}
|
37
|
-
@params[:reg_param] = reg_param
|
38
|
-
@params[:fit_bias] = fit_bias
|
39
|
-
@params[:bias_scale] = bias_scale
|
40
|
-
@params[:max_iter] = max_iter
|
41
|
-
@params[:batch_size] = batch_size
|
42
|
-
@params[:optimizer] = optimizer
|
43
|
-
@params[:optimizer] ||= Rumale::Optimizer::Nadam.new
|
44
|
-
@params[:n_jobs] = n_jobs
|
45
|
-
@params[:random_seed] = random_seed
|
46
|
-
@params[:random_seed] ||= srand
|
47
|
-
@weight_vec = nil
|
48
|
-
@bias_term = nil
|
49
|
-
@rng = Random.new(@params[:random_seed])
|
50
|
-
end
|
51
|
-
|
52
|
-
private
|
53
|
-
|
54
|
-
def partial_fit(x, y)
|
55
|
-
# Expand feature vectors for bias term.
|
56
|
-
samples = @params[:fit_bias] ? expand_feature(x) : x
|
57
|
-
# Initialize some variables.
|
58
|
-
n_samples, n_features = samples.shape
|
59
|
-
rand_ids = Array(0...n_samples).shuffle(random: @rng.dup)
|
60
|
-
weight = Numo::DFloat.zeros(n_features)
|
61
|
-
optimizer = @params[:optimizer].dup
|
62
|
-
# Optimization.
|
63
|
-
@params[:max_iter].times do |_t|
|
64
|
-
# Random sampling
|
65
|
-
subset_ids = rand_ids.shift(@params[:batch_size])
|
66
|
-
rand_ids.concat(subset_ids)
|
67
|
-
sub_samples = samples[subset_ids, true]
|
68
|
-
sub_targets = y[subset_ids]
|
69
|
-
# Update weight.
|
70
|
-
loss_gradient = calc_loss_gradient(sub_samples, sub_targets, weight)
|
71
|
-
next if loss_gradient.ne(0.0).count.zero?
|
72
|
-
|
73
|
-
weight = calc_new_weight(optimizer, sub_samples, weight, loss_gradient)
|
74
|
-
end
|
75
|
-
split_weight(weight)
|
76
|
-
end
|
77
|
-
|
78
|
-
def calc_loss_gradient(_x, _y, _weight)
|
79
|
-
raise NotImplementedError, "#{__method__} has to be implemented in #{self.class}."
|
80
|
-
end
|
81
|
-
|
82
|
-
def calc_new_weight(optimizer, x, weight, loss_gradient)
|
83
|
-
weight_gradient = x.transpose.dot(loss_gradient) / @params[:batch_size] + @params[:reg_param] * weight
|
84
|
-
optimizer.call(weight, weight_gradient)
|
85
|
-
end
|
86
|
-
|
87
|
-
def expand_feature(x)
|
88
|
-
n_samples = x.shape[0]
|
89
|
-
Numo::NArray.hstack([x, Numo::DFloat.ones([n_samples, 1]) * @params[:bias_scale]])
|
90
|
-
end
|
91
|
-
|
92
|
-
def split_weight(weight)
|
93
|
-
if @params[:fit_bias]
|
94
|
-
[weight[0...-1].dup, weight[-1]]
|
95
|
-
else
|
96
|
-
[weight, 0.0]
|
97
|
-
end
|
98
|
-
end
|
99
|
-
# :nocov:
|
100
|
-
end
|
101
|
-
end
|
102
|
-
end
|
@@ -1,42 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'rumale/validation'
|
4
|
-
require 'rumale/base/base_estimator'
|
5
|
-
|
6
|
-
module Rumale
|
7
|
-
module Optimizer
|
8
|
-
# AdaGrad is a class that implements AdaGrad optimizer.
|
9
|
-
#
|
10
|
-
# @deprecated AdaGrad will be deleted in version 0.20.0.
|
11
|
-
#
|
12
|
-
# *Reference*
|
13
|
-
# - Duchi, J., Hazan, E., and Singer, Y., "Adaptive Subgradient Methods for Online Learning and Stochastic Optimization," J. Machine Learning Research, vol. 12, pp. 2121--2159, 2011.
|
14
|
-
class AdaGrad
|
15
|
-
include Base::BaseEstimator
|
16
|
-
include Validation
|
17
|
-
|
18
|
-
# Create a new optimizer with AdaGrad.
|
19
|
-
#
|
20
|
-
# @param learning_rate [Float] The initial value of learning rate.
|
21
|
-
def initialize(learning_rate: 0.01)
|
22
|
-
warn 'warning: AdaGrad is deprecated. This class will be deleted in version 0.20.0.'
|
23
|
-
check_params_numeric(learning_rate: learning_rate)
|
24
|
-
check_params_positive(learning_rate: learning_rate)
|
25
|
-
@params = {}
|
26
|
-
@params[:learning_rate] = learning_rate
|
27
|
-
@moment = nil
|
28
|
-
end
|
29
|
-
|
30
|
-
# Calculate the updated weight with AdaGrad adaptive learning rate.
|
31
|
-
#
|
32
|
-
# @param weight [Numo::DFloat] (shape: [n_features]) The weight to be updated.
|
33
|
-
# @param gradient [Numo::DFloat] (shape: [n_features]) The gradient for updating the weight.
|
34
|
-
# @return [Numo::DFloat] (shape: [n_feautres]) The updated weight.
|
35
|
-
def call(weight, gradient)
|
36
|
-
@moment ||= Numo::DFloat.zeros(weight.shape[0])
|
37
|
-
@moment += gradient**2
|
38
|
-
weight - (@params[:learning_rate] / (@moment**0.5 + 1.0e-8)) * gradient
|
39
|
-
end
|
40
|
-
end
|
41
|
-
end
|
42
|
-
end
|
@@ -1,56 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'rumale/validation'
|
4
|
-
require 'rumale/base/base_estimator'
|
5
|
-
|
6
|
-
module Rumale
|
7
|
-
module Optimizer
|
8
|
-
# Adam is a class that implements Adam optimizer.
|
9
|
-
#
|
10
|
-
# @deprecated Adam will be deleted in version 0.20.0.
|
11
|
-
#
|
12
|
-
# *Reference*
|
13
|
-
# - Kingma, D P., and Ba, J., "Adam: A Method for Stochastic Optimization," Proc. ICLR'15, 2015.
|
14
|
-
class Adam
|
15
|
-
include Base::BaseEstimator
|
16
|
-
include Validation
|
17
|
-
|
18
|
-
# Create a new optimizer with Adam
|
19
|
-
#
|
20
|
-
# @param learning_rate [Float] The initial value of learning rate.
|
21
|
-
# @param decay1 [Float] The smoothing parameter for the first moment.
|
22
|
-
# @param decay2 [Float] The smoothing parameter for the second moment.
|
23
|
-
def initialize(learning_rate: 0.001, decay1: 0.9, decay2: 0.999)
|
24
|
-
warn 'warning: Adam is deprecated. This class will be deleted in version 0.20.0.'
|
25
|
-
check_params_numeric(learning_rate: learning_rate, decay1: decay1, decay2: decay2)
|
26
|
-
check_params_positive(learning_rate: learning_rate, decay1: decay1, decay2: decay2)
|
27
|
-
@params = {}
|
28
|
-
@params[:learning_rate] = learning_rate
|
29
|
-
@params[:decay1] = decay1
|
30
|
-
@params[:decay2] = decay2
|
31
|
-
@fst_moment = nil
|
32
|
-
@sec_moment = nil
|
33
|
-
@iter = 0
|
34
|
-
end
|
35
|
-
|
36
|
-
# Calculate the updated weight with Nadam adaptive learning rate.
|
37
|
-
#
|
38
|
-
# @param weight [Numo::DFloat] (shape: [n_features]) The weight to be updated.
|
39
|
-
# @param gradient [Numo::DFloat] (shape: [n_features]) The gradient for updating the weight.
|
40
|
-
# @return [Numo::DFloat] (shape: [n_feautres]) The updated weight.
|
41
|
-
def call(weight, gradient)
|
42
|
-
@fst_moment ||= Numo::DFloat.zeros(weight.shape)
|
43
|
-
@sec_moment ||= Numo::DFloat.zeros(weight.shape)
|
44
|
-
|
45
|
-
@iter += 1
|
46
|
-
|
47
|
-
@fst_moment = @params[:decay1] * @fst_moment + (1.0 - @params[:decay1]) * gradient
|
48
|
-
@sec_moment = @params[:decay2] * @sec_moment + (1.0 - @params[:decay2]) * gradient**2
|
49
|
-
nm_fst_moment = @fst_moment / (1.0 - @params[:decay1]**@iter)
|
50
|
-
nm_sec_moment = @sec_moment / (1.0 - @params[:decay2]**@iter)
|
51
|
-
|
52
|
-
weight - @params[:learning_rate] * nm_fst_moment / (nm_sec_moment**0.5 + 1e-8)
|
53
|
-
end
|
54
|
-
end
|
55
|
-
end
|
56
|
-
end
|
@@ -1,67 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'rumale/validation'
|
4
|
-
require 'rumale/base/base_estimator'
|
5
|
-
|
6
|
-
module Rumale
|
7
|
-
# This module consists of the classes that implement optimizers adaptively tuning hyperparameters.
|
8
|
-
#
|
9
|
-
# @deprecated Optimizer module will be deleted in version 0.20.0.
|
10
|
-
module Optimizer
|
11
|
-
# Nadam is a class that implements Nadam optimizer.
|
12
|
-
#
|
13
|
-
# @deprecated Nadam will be deleted in version 0.20.0.
|
14
|
-
#
|
15
|
-
# *Reference*
|
16
|
-
# - Dozat, T., "Incorporating Nesterov Momentum into Adam," Tech. Repo. Stanford University, 2015.
|
17
|
-
class Nadam
|
18
|
-
include Base::BaseEstimator
|
19
|
-
include Validation
|
20
|
-
|
21
|
-
# Create a new optimizer with Nadam
|
22
|
-
#
|
23
|
-
# @param learning_rate [Float] The initial value of learning rate.
|
24
|
-
# @param decay1 [Float] The smoothing parameter for the first moment.
|
25
|
-
# @param decay2 [Float] The smoothing parameter for the second moment.
|
26
|
-
def initialize(learning_rate: 0.01, decay1: 0.9, decay2: 0.999)
|
27
|
-
warn 'warning: Nadam is deprecated. This class will be deleted in version 0.20.0.'
|
28
|
-
check_params_numeric(learning_rate: learning_rate, decay1: decay1, decay2: decay2)
|
29
|
-
check_params_positive(learning_rate: learning_rate, decay1: decay1, decay2: decay2)
|
30
|
-
@params = {}
|
31
|
-
@params[:learning_rate] = learning_rate
|
32
|
-
@params[:decay1] = decay1
|
33
|
-
@params[:decay2] = decay2
|
34
|
-
@fst_moment = nil
|
35
|
-
@sec_moment = nil
|
36
|
-
@decay1_prod = 1.0
|
37
|
-
@iter = 0
|
38
|
-
end
|
39
|
-
|
40
|
-
# Calculate the updated weight with Nadam adaptive learning rate.
|
41
|
-
#
|
42
|
-
# @param weight [Numo::DFloat] (shape: [n_features]) The weight to be updated.
|
43
|
-
# @param gradient [Numo::DFloat] (shape: [n_features]) The gradient for updating the weight.
|
44
|
-
# @return [Numo::DFloat] (shape: [n_feautres]) The updated weight.
|
45
|
-
def call(weight, gradient)
|
46
|
-
@fst_moment ||= Numo::DFloat.zeros(weight.shape[0])
|
47
|
-
@sec_moment ||= Numo::DFloat.zeros(weight.shape[0])
|
48
|
-
|
49
|
-
@iter += 1
|
50
|
-
|
51
|
-
decay1_curr = @params[:decay1] * (1.0 - 0.5 * 0.96**(@iter * 0.004))
|
52
|
-
decay1_next = @params[:decay1] * (1.0 - 0.5 * 0.96**((@iter + 1) * 0.004))
|
53
|
-
decay1_prod_curr = @decay1_prod * decay1_curr
|
54
|
-
decay1_prod_next = @decay1_prod * decay1_curr * decay1_next
|
55
|
-
@decay1_prod = decay1_prod_curr
|
56
|
-
|
57
|
-
@fst_moment = @params[:decay1] * @fst_moment + (1.0 - @params[:decay1]) * gradient
|
58
|
-
@sec_moment = @params[:decay2] * @sec_moment + (1.0 - @params[:decay2]) * gradient**2
|
59
|
-
nm_gradient = gradient / (1.0 - decay1_prod_curr)
|
60
|
-
nm_fst_moment = @fst_moment / (1.0 - decay1_prod_next)
|
61
|
-
nm_sec_moment = @sec_moment / (1.0 - @params[:decay2]**@iter)
|
62
|
-
|
63
|
-
weight - (@params[:learning_rate] / (nm_sec_moment**0.5 + 1e-8)) * ((1 - decay1_curr) * nm_gradient + decay1_next * nm_fst_moment)
|
64
|
-
end
|
65
|
-
end
|
66
|
-
end
|
67
|
-
end
|
@@ -1,50 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'rumale/validation'
|
4
|
-
require 'rumale/base/base_estimator'
|
5
|
-
|
6
|
-
module Rumale
|
7
|
-
module Optimizer
|
8
|
-
# RMSProp is a class that implements RMSProp optimizer.
|
9
|
-
#
|
10
|
-
# @deprecated RMSProp will be deleted in version 0.20.0.
|
11
|
-
#
|
12
|
-
# *Reference*
|
13
|
-
# - Sutskever, I., Martens, J., Dahl, G., and Hinton, G., "On the importance of initialization and momentum in deep learning," Proc. ICML' 13, pp. 1139--1147, 2013.
|
14
|
-
# - Hinton, G., Srivastava, N., and Swersky, K., "Lecture 6e rmsprop," Neural Networks for Machine Learning, 2012.
|
15
|
-
class RMSProp
|
16
|
-
include Base::BaseEstimator
|
17
|
-
include Validation
|
18
|
-
|
19
|
-
# Create a new optimizer with RMSProp.
|
20
|
-
#
|
21
|
-
# @param learning_rate [Float] The initial value of learning rate.
|
22
|
-
# @param momentum [Float] The initial value of momentum.
|
23
|
-
# @param decay [Float] The smooting parameter.
|
24
|
-
def initialize(learning_rate: 0.01, momentum: 0.9, decay: 0.9)
|
25
|
-
warn 'warning: RMSProp is deprecated. This class will be deleted in version 0.20.0.'
|
26
|
-
check_params_numeric(learning_rate: learning_rate, momentum: momentum, decay: decay)
|
27
|
-
check_params_positive(learning_rate: learning_rate, momentum: momentum, decay: decay)
|
28
|
-
@params = {}
|
29
|
-
@params[:learning_rate] = learning_rate
|
30
|
-
@params[:momentum] = momentum
|
31
|
-
@params[:decay] = decay
|
32
|
-
@moment = nil
|
33
|
-
@update = nil
|
34
|
-
end
|
35
|
-
|
36
|
-
# Calculate the updated weight with RMSProp adaptive learning rate.
|
37
|
-
#
|
38
|
-
# @param weight [Numo::DFloat] (shape: [n_features]) The weight to be updated.
|
39
|
-
# @param gradient [Numo::DFloat] (shape: [n_features]) The gradient for updating the weight.
|
40
|
-
# @return [Numo::DFloat] (shape: [n_feautres]) The updated weight.
|
41
|
-
def call(weight, gradient)
|
42
|
-
@moment ||= Numo::DFloat.zeros(weight.shape[0])
|
43
|
-
@update ||= Numo::DFloat.zeros(weight.shape[0])
|
44
|
-
@moment = @params[:decay] * @moment + (1.0 - @params[:decay]) * gradient**2
|
45
|
-
@update = @params[:momentum] * @update - (@params[:learning_rate] / (@moment**0.5 + 1.0e-8)) * gradient
|
46
|
-
weight + @update
|
47
|
-
end
|
48
|
-
end
|
49
|
-
end
|
50
|
-
end
|
data/lib/rumale/optimizer/sgd.rb
DELETED
@@ -1,46 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'rumale/validation'
|
4
|
-
require 'rumale/base/base_estimator'
|
5
|
-
|
6
|
-
module Rumale
|
7
|
-
module Optimizer
|
8
|
-
# SGD is a class that implements SGD optimizer.
|
9
|
-
#
|
10
|
-
# @deprecated SGD will be deleted in version 0.20.0.
|
11
|
-
class SGD
|
12
|
-
include Base::BaseEstimator
|
13
|
-
include Validation
|
14
|
-
|
15
|
-
# Create a new optimizer with SGD.
|
16
|
-
#
|
17
|
-
# @param learning_rate [Float] The initial value of learning rate.
|
18
|
-
# @param momentum [Float] The initial value of momentum.
|
19
|
-
# @param decay [Float] The smooting parameter.
|
20
|
-
def initialize(learning_rate: 0.01, momentum: 0.0, decay: 0.0)
|
21
|
-
warn 'warning: SGD is deprecated. This class will be deleted in version 0.20.0.'
|
22
|
-
check_params_numeric(learning_rate: learning_rate, momentum: momentum, decay: decay)
|
23
|
-
check_params_positive(learning_rate: learning_rate, momentum: momentum, decay: decay)
|
24
|
-
@params = {}
|
25
|
-
@params[:learning_rate] = learning_rate
|
26
|
-
@params[:momentum] = momentum
|
27
|
-
@params[:decay] = decay
|
28
|
-
@iter = 0
|
29
|
-
@update = nil
|
30
|
-
end
|
31
|
-
|
32
|
-
# Calculate the updated weight with SGD.
|
33
|
-
#
|
34
|
-
# @param weight [Numo::DFloat] (shape: [n_features]) The weight to be updated.
|
35
|
-
# @param gradient [Numo::DFloat] (shape: [n_features]) The gradient for updating the weight.
|
36
|
-
# @return [Numo::DFloat] (shape: [n_feautres]) The updated weight.
|
37
|
-
def call(weight, gradient)
|
38
|
-
@update ||= Numo::DFloat.zeros(weight.shape[0])
|
39
|
-
current_learning_rate = @params[:learning_rate] / (1.0 + @params[:decay] * @iter)
|
40
|
-
@iter += 1
|
41
|
-
@update = @params[:momentum] * @update - current_learning_rate * gradient
|
42
|
-
weight + @update
|
43
|
-
end
|
44
|
-
end
|
45
|
-
end
|
46
|
-
end
|
@@ -1,104 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'rumale/validation'
|
4
|
-
require 'rumale/base/base_estimator'
|
5
|
-
|
6
|
-
module Rumale
|
7
|
-
module Optimizer
|
8
|
-
# YellowFin is a class that implements YellowFin optimizer.
|
9
|
-
#
|
10
|
-
# @deprecated YellowFin will be deleted in version 0.20.0.
|
11
|
-
#
|
12
|
-
# *Reference*
|
13
|
-
# - Zhang, J., and Mitliagkas, I., "YellowFin and the Art of Momentum Tuning," CoRR abs/1706.03471, 2017.
|
14
|
-
class YellowFin
|
15
|
-
include Base::BaseEstimator
|
16
|
-
include Validation
|
17
|
-
|
18
|
-
# Create a new optimizer with YellowFin.
|
19
|
-
#
|
20
|
-
# @param learning_rate [Float] The initial value of learning rate.
|
21
|
-
# @param momentum [Float] The initial value of momentum.
|
22
|
-
# @param decay [Float] The smooting parameter.
|
23
|
-
# @param window_width [Integer] The sliding window width for searching curvature range.
|
24
|
-
def initialize(learning_rate: 0.01, momentum: 0.9, decay: 0.999, window_width: 20)
|
25
|
-
warn 'warning: YellowFin is deprecated. This class will be deleted in version 0.20.0.'
|
26
|
-
check_params_numeric(learning_rate: learning_rate, momentum: momentum, decay: decay, window_width: window_width)
|
27
|
-
check_params_positive(learning_rate: learning_rate, momentum: momentum, decay: decay, window_width: window_width)
|
28
|
-
@params = {}
|
29
|
-
@params[:learning_rate] = learning_rate
|
30
|
-
@params[:momentum] = momentum
|
31
|
-
@params[:decay] = decay
|
32
|
-
@params[:window_width] = window_width
|
33
|
-
@smth_learning_rate = learning_rate
|
34
|
-
@smth_momentum = momentum
|
35
|
-
@grad_norms = nil
|
36
|
-
@grad_norm_min = 0.0
|
37
|
-
@grad_norm_max = 0.0
|
38
|
-
@grad_mean_sqr = 0.0
|
39
|
-
@grad_mean = 0.0
|
40
|
-
@grad_var = 0.0
|
41
|
-
@grad_norm_mean = 0.0
|
42
|
-
@curve_mean = 0.0
|
43
|
-
@distance_mean = 0.0
|
44
|
-
@update = nil
|
45
|
-
end
|
46
|
-
|
47
|
-
# Calculate the updated weight with adaptive momentum coefficient and learning rate.
|
48
|
-
#
|
49
|
-
# @param weight [Numo::DFloat] (shape: [n_features]) The weight to be updated.
|
50
|
-
# @param gradient [Numo::DFloat] (shape: [n_features]) The gradient for updating the weight.
|
51
|
-
# @return [Numo::DFloat] (shape: [n_feautres]) The updated weight.
|
52
|
-
def call(weight, gradient)
|
53
|
-
@update ||= Numo::DFloat.zeros(weight.shape[0])
|
54
|
-
curvature_range(gradient)
|
55
|
-
gradient_variance(gradient)
|
56
|
-
distance_to_optimum(gradient)
|
57
|
-
@smth_momentum = @params[:decay] * @smth_momentum + (1 - @params[:decay]) * current_momentum
|
58
|
-
@smth_learning_rate = @params[:decay] * @smth_learning_rate + (1 - @params[:decay]) * current_learning_rate
|
59
|
-
@update = @smth_momentum * @update - @smth_learning_rate * gradient
|
60
|
-
weight + @update
|
61
|
-
end
|
62
|
-
|
63
|
-
private
|
64
|
-
|
65
|
-
def current_momentum
|
66
|
-
dr = Math.sqrt(@grad_norm_max / @grad_norm_min + 1.0e-8)
|
67
|
-
[cubic_root**2, ((dr - 1) / (dr + 1))**2].max
|
68
|
-
end
|
69
|
-
|
70
|
-
def current_learning_rate
|
71
|
-
(1.0 - Math.sqrt(@params[:momentum]))**2 / (@grad_norm_min + 1.0e-8)
|
72
|
-
end
|
73
|
-
|
74
|
-
def cubic_root
|
75
|
-
p = (@distance_mean**2 * @grad_norm_min**2) / (2 * @grad_var + 1.0e-8)
|
76
|
-
w3 = (-Math.sqrt(p**2 + 4.fdiv(27) * p**3) - p).fdiv(2)
|
77
|
-
w = (w3 >= 0.0 ? 1 : -1) * w3.abs**1.fdiv(3)
|
78
|
-
y = w - p / (3 * w + 1.0e-8)
|
79
|
-
y + 1
|
80
|
-
end
|
81
|
-
|
82
|
-
def curvature_range(gradient)
|
83
|
-
@grad_norms ||= []
|
84
|
-
@grad_norms.push((gradient**2).sum)
|
85
|
-
@grad_norms.shift(@grad_norms.size - @params[:window_width]) if @grad_norms.size > @params[:window_width]
|
86
|
-
@grad_norm_min = @params[:decay] * @grad_norm_min + (1 - @params[:decay]) * @grad_norms.min
|
87
|
-
@grad_norm_max = @params[:decay] * @grad_norm_max + (1 - @params[:decay]) * @grad_norms.max
|
88
|
-
end
|
89
|
-
|
90
|
-
def gradient_variance(gradient)
|
91
|
-
@grad_mean_sqr = @params[:decay] * @grad_mean_sqr + (1 - @params[:decay]) * gradient**2
|
92
|
-
@grad_mean = @params[:decay] * @grad_mean + (1 - @params[:decay]) * gradient
|
93
|
-
@grad_var = (@grad_mean_sqr - @grad_mean**2).sum
|
94
|
-
end
|
95
|
-
|
96
|
-
def distance_to_optimum(gradient)
|
97
|
-
grad_sqr = (gradient**2).sum
|
98
|
-
@grad_norm_mean = @params[:decay] * @grad_norm_mean + (1 - @params[:decay]) * Math.sqrt(grad_sqr + 1.0e-8)
|
99
|
-
@curve_mean = @params[:decay] * @curve_mean + (1 - @params[:decay]) * grad_sqr
|
100
|
-
@distance_mean = @params[:decay] * @distance_mean + (1 - @params[:decay]) * (@grad_norm_mean / @curve_mean)
|
101
|
-
end
|
102
|
-
end
|
103
|
-
end
|
104
|
-
end
|
@@ -1,125 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'rumale/base/base_estimator'
|
4
|
-
require 'rumale/optimizer/nadam'
|
5
|
-
|
6
|
-
module Rumale
|
7
|
-
# This module consists of the classes that implement polynomial models.
|
8
|
-
#
|
9
|
-
# @deprecated PolynomialModel module will be deleted in version 0.20.0.
|
10
|
-
module PolynomialModel
|
11
|
-
# BaseFactorizationMachine is an abstract class for implementation of Factorization Machine-based estimators.
|
12
|
-
# This class is used internally.
|
13
|
-
#
|
14
|
-
# @deprecated BaseFactorizationMachine will be deleted in version 0.20.0.
|
15
|
-
class BaseFactorizationMachine
|
16
|
-
include Base::BaseEstimator
|
17
|
-
|
18
|
-
# Initialize a Factorization Machine-based estimator.
|
19
|
-
#
|
20
|
-
# @param n_factors [Integer] The maximum number of iterations.
|
21
|
-
# @param loss [String] The loss function ('hinge' or 'logistic' or nil).
|
22
|
-
# @param reg_param_linear [Float] The regularization parameter for linear model.
|
23
|
-
# @param reg_param_factor [Float] The regularization parameter for factor matrix.
|
24
|
-
# @param max_iter [Integer] The maximum number of epochs that indicates
|
25
|
-
# how many times the whole data is given to the training process.
|
26
|
-
# @param batch_size [Integer] The size of the mini batches.
|
27
|
-
# @param tol [Float] The tolerance of loss for terminating optimization.
|
28
|
-
# @param optimizer [Optimizer] The optimizer to calculate adaptive learning rate.
|
29
|
-
# If nil is given, Nadam is used.
|
30
|
-
# @param n_jobs [Integer] The number of jobs for running the fit and predict methods in parallel.
|
31
|
-
# If nil is given, the methods do not execute in parallel.
|
32
|
-
# If zero or less is given, it becomes equal to the number of processors.
|
33
|
-
# This parameter is ignored if the Parallel gem is not loaded.
|
34
|
-
# @param verbose [Boolean] The flag indicating whether to output loss during iteration.
|
35
|
-
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
36
|
-
def initialize(n_factors: 2, loss: nil, reg_param_linear: 1.0, reg_param_factor: 1.0,
|
37
|
-
max_iter: 200, batch_size: 50, tol: 1e-4,
|
38
|
-
optimizer: nil, n_jobs: nil, verbose: false, random_seed: nil)
|
39
|
-
@params = {}
|
40
|
-
@params[:n_factors] = n_factors
|
41
|
-
@params[:loss] = loss unless loss.nil?
|
42
|
-
@params[:reg_param_linear] = reg_param_linear
|
43
|
-
@params[:reg_param_factor] = reg_param_factor
|
44
|
-
@params[:max_iter] = max_iter
|
45
|
-
@params[:batch_size] = batch_size
|
46
|
-
@params[:tol] = tol
|
47
|
-
@params[:optimizer] = optimizer
|
48
|
-
@params[:optimizer] ||= Optimizer::Nadam.new
|
49
|
-
@params[:n_jobs] = n_jobs
|
50
|
-
@params[:verbose] = verbose
|
51
|
-
@params[:random_seed] = random_seed
|
52
|
-
@params[:random_seed] ||= srand
|
53
|
-
@factor_mat = nil
|
54
|
-
@weight_vec = nil
|
55
|
-
@bias_term = nil
|
56
|
-
@rng = Random.new(@params[:random_seed])
|
57
|
-
end
|
58
|
-
|
59
|
-
private
|
60
|
-
|
61
|
-
def partial_fit(x, y)
|
62
|
-
# Initialize some variables.
|
63
|
-
class_name = self.class.to_s.split('::').last if @params[:verbose]
|
64
|
-
n_samples, n_features = x.shape
|
65
|
-
sub_rng = @rng.dup
|
66
|
-
weight_vec = Numo::DFloat.zeros(n_features + 1)
|
67
|
-
factor_mat = Rumale::Utils.rand_normal([@params[:n_factors], n_features], sub_rng)
|
68
|
-
weight_optimizer = @params[:optimizer].dup
|
69
|
-
factor_optimizers = Array.new(@params[:n_factors]) { @params[:optimizer].dup }
|
70
|
-
# Start optimization.
|
71
|
-
@params[:max_iter].times do |t|
|
72
|
-
sample_ids = Array(0...n_samples)
|
73
|
-
sample_ids.shuffle!(random: sub_rng)
|
74
|
-
until (subset_ids = sample_ids.shift(@params[:batch_size])).empty?
|
75
|
-
# Sampling.
|
76
|
-
sub_x = x[subset_ids, true]
|
77
|
-
sub_y = y[subset_ids]
|
78
|
-
ex_sub_x = expand_feature(sub_x)
|
79
|
-
# Calculate gradients for loss function.
|
80
|
-
loss_grad = loss_gradient(sub_x, ex_sub_x, sub_y, factor_mat, weight_vec)
|
81
|
-
next if loss_grad.ne(0.0).count.zero?
|
82
|
-
|
83
|
-
# Update each parameter.
|
84
|
-
weight_vec = weight_optimizer.call(weight_vec, weight_gradient(loss_grad, ex_sub_x, weight_vec))
|
85
|
-
@params[:n_factors].times do |n|
|
86
|
-
factor_mat[n, true] = factor_optimizers[n].call(factor_mat[n, true],
|
87
|
-
factor_gradient(loss_grad, sub_x, factor_mat[n, true]))
|
88
|
-
end
|
89
|
-
end
|
90
|
-
loss = loss_func(x, expand_feature(x), y, factor_mat, weight_vec)
|
91
|
-
puts "[#{class_name}] Loss after #{t + 1} epochs: #{loss}" if @params[:verbose]
|
92
|
-
break if loss < @params[:tol]
|
93
|
-
end
|
94
|
-
[factor_mat, *split_weight_vec_bias(weight_vec)]
|
95
|
-
end
|
96
|
-
|
97
|
-
def loss_func(_x, _expanded_x, _y, _factor, _weight)
|
98
|
-
raise NotImplementedError, "#{__method__} has to be implemented in #{self.class}."
|
99
|
-
end
|
100
|
-
|
101
|
-
def loss_gradient(_x, _expanded_x, _y, _factor, _weight)
|
102
|
-
raise NotImplementedError, "#{__method__} has to be implemented in #{self.class}."
|
103
|
-
end
|
104
|
-
|
105
|
-
def weight_gradient(loss_grad, data, weight)
|
106
|
-
(loss_grad.expand_dims(1) * data).mean(0) + @params[:reg_param_linear] * weight
|
107
|
-
end
|
108
|
-
|
109
|
-
def factor_gradient(loss_grad, data, factor)
|
110
|
-
(loss_grad.expand_dims(1) * (data * data.dot(factor).expand_dims(1) - factor * (data**2))).mean(0) +
|
111
|
-
@params[:reg_param_factor] * factor
|
112
|
-
end
|
113
|
-
|
114
|
-
def expand_feature(x)
|
115
|
-
Numo::NArray.hstack([x, Numo::DFloat.ones([x.shape[0], 1])])
|
116
|
-
end
|
117
|
-
|
118
|
-
def split_weight_vec_bias(weight_vec)
|
119
|
-
weights = weight_vec[0...-1].dup
|
120
|
-
bias = weight_vec[-1]
|
121
|
-
[weights, bias]
|
122
|
-
end
|
123
|
-
end
|
124
|
-
end
|
125
|
-
end
|
@@ -1,220 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'rumale/base/classifier'
|
4
|
-
require 'rumale/polynomial_model/base_factorization_machine'
|
5
|
-
|
6
|
-
module Rumale
|
7
|
-
# This module consists of the classes that implement polynomial models.
|
8
|
-
module PolynomialModel
|
9
|
-
# FactorizationMachineClassifier is a class that implements Factorization Machine
|
10
|
-
# with stochastic gradient descent (SGD) optimization.
|
11
|
-
# For multiclass classification problem, it uses one-vs-the-rest strategy.
|
12
|
-
#
|
13
|
-
# @deprecated
|
14
|
-
# FactorizationMachineClassifier will be deleted in version 0.20.0.
|
15
|
-
# The Ruamle author recommends using the xlearn gem instead.
|
16
|
-
#
|
17
|
-
# @example
|
18
|
-
# estimator =
|
19
|
-
# Rumale::PolynomialModel::FactorizationMachineClassifier.new(
|
20
|
-
# n_factors: 10, loss: 'hinge', reg_param_linear: 0.001, reg_param_factor: 0.001,
|
21
|
-
# max_iter: 500, batch_size: 50, random_seed: 1)
|
22
|
-
# estimator.fit(training_samples, traininig_labels)
|
23
|
-
# results = estimator.predict(testing_samples)
|
24
|
-
#
|
25
|
-
# *Reference*
|
26
|
-
# - Rendle, S., "Factorization Machines with libFM," ACM TIST, vol. 3 (3), pp. 57:1--57:22, 2012.
|
27
|
-
# - Rendle, S., "Factorization Machines," Proc. ICDM'10, pp. 995--1000, 2010.
|
28
|
-
class FactorizationMachineClassifier < BaseFactorizationMachine
|
29
|
-
include Base::Classifier
|
30
|
-
|
31
|
-
# Return the factor matrix for Factorization Machine.
|
32
|
-
# @return [Numo::DFloat] (shape: [n_classes, n_factors, n_features])
|
33
|
-
attr_reader :factor_mat
|
34
|
-
|
35
|
-
# Return the weight vector for Factorization Machine.
|
36
|
-
# @return [Numo::DFloat] (shape: [n_classes, n_features])
|
37
|
-
attr_reader :weight_vec
|
38
|
-
|
39
|
-
# Return the bias term for Factoriazation Machine.
|
40
|
-
# @return [Numo::DFloat] (shape: [n_classes])
|
41
|
-
attr_reader :bias_term
|
42
|
-
|
43
|
-
# Return the class labels.
|
44
|
-
# @return [Numo::Int32] (shape: [n_classes])
|
45
|
-
attr_reader :classes
|
46
|
-
|
47
|
-
# Return the random generator for random sampling.
|
48
|
-
# @return [Random]
|
49
|
-
attr_reader :rng
|
50
|
-
|
51
|
-
# Create a new classifier with Factorization Machine.
|
52
|
-
#
|
53
|
-
# @param n_factors [Integer] The maximum number of iterations.
|
54
|
-
# @param loss [String] The loss function ('hinge' or 'logistic').
|
55
|
-
# @param reg_param_linear [Float] The regularization parameter for linear model.
|
56
|
-
# @param reg_param_factor [Float] The regularization parameter for factor matrix.
|
57
|
-
# @param max_iter [Integer] The maximum number of epochs that indicates
|
58
|
-
# how many times the whole data is given to the training process.
|
59
|
-
# @param batch_size [Integer] The size of the mini batches.
|
60
|
-
# @param tol [Float] The tolerance of loss for terminating optimization.
|
61
|
-
# @param optimizer [Optimizer] The optimizer to calculate adaptive learning rate.
|
62
|
-
# If nil is given, Nadam is used.
|
63
|
-
# @param n_jobs [Integer] The number of jobs for running the fit and predict methods in parallel.
|
64
|
-
# If nil is given, the methods do not execute in parallel.
|
65
|
-
# If zero or less is given, it becomes equal to the number of processors.
|
66
|
-
# This parameter is ignored if the Parallel gem is not loaded.
|
67
|
-
# @param verbose [Boolean] The flag indicating whether to output loss during iteration.
|
68
|
-
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
69
|
-
def initialize(n_factors: 2, loss: 'hinge', reg_param_linear: 1.0, reg_param_factor: 1.0,
|
70
|
-
max_iter: 200, batch_size: 50, tol: 1e-4,
|
71
|
-
optimizer: nil, n_jobs: nil, verbose: false, random_seed: nil)
|
72
|
-
warn 'warning: FactorizationMachineClassifier is deprecated. This class will be deleted in version 0.20.0.'
|
73
|
-
check_params_numeric(reg_param_linear: reg_param_linear, reg_param_factor: reg_param_factor,
|
74
|
-
n_factors: n_factors, max_iter: max_iter, batch_size: batch_size, tol: tol)
|
75
|
-
check_params_string(loss: loss)
|
76
|
-
check_params_boolean(verbose: verbose)
|
77
|
-
check_params_numeric_or_nil(n_jobs: n_jobs, random_seed: random_seed)
|
78
|
-
check_params_positive(n_factors: n_factors,
|
79
|
-
reg_param_linear: reg_param_linear, reg_param_factor: reg_param_factor,
|
80
|
-
max_iter: max_iter, batch_size: batch_size)
|
81
|
-
super
|
82
|
-
@classes = nil
|
83
|
-
end
|
84
|
-
|
85
|
-
# Fit the model with given training data.
|
86
|
-
#
|
87
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
88
|
-
# @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
|
89
|
-
# @return [FactorizationMachineClassifier] The learned classifier itself.
|
90
|
-
def fit(x, y)
|
91
|
-
x = check_convert_sample_array(x)
|
92
|
-
y = check_convert_label_array(y)
|
93
|
-
check_sample_label_size(x, y)
|
94
|
-
|
95
|
-
@classes = Numo::Int32[*y.to_a.uniq.sort]
|
96
|
-
|
97
|
-
if multiclass_problem?
|
98
|
-
n_classes = @classes.size
|
99
|
-
n_features = x.shape[1]
|
100
|
-
@factor_mat = Numo::DFloat.zeros(n_classes, @params[:n_factors], n_features)
|
101
|
-
@weight_vec = Numo::DFloat.zeros(n_classes, n_features)
|
102
|
-
@bias_term = Numo::DFloat.zeros(n_classes)
|
103
|
-
if enable_parallel?
|
104
|
-
# :nocov:
|
105
|
-
models = parallel_map(n_classes) do |n|
|
106
|
-
bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
|
107
|
-
partial_fit(x, bin_y)
|
108
|
-
end
|
109
|
-
# :nocov:
|
110
|
-
n_classes.times { |n| @factor_mat[n, true, true], @weight_vec[n, true], @bias_term[n] = models[n] }
|
111
|
-
else
|
112
|
-
n_classes.times do |n|
|
113
|
-
bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
|
114
|
-
@factor_mat[n, true, true], @weight_vec[n, true], @bias_term[n] = partial_fit(x, bin_y)
|
115
|
-
end
|
116
|
-
end
|
117
|
-
else
|
118
|
-
negative_label = @classes[0]
|
119
|
-
bin_y = Numo::Int32.cast(y.ne(negative_label)) * 2 - 1
|
120
|
-
@factor_mat, @weight_vec, @bias_term = partial_fit(x, bin_y)
|
121
|
-
end
|
122
|
-
|
123
|
-
self
|
124
|
-
end
|
125
|
-
|
126
|
-
# Calculate confidence scores for samples.
|
127
|
-
#
|
128
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
|
129
|
-
# @return [Numo::DFloat] (shape: [n_samples]) Confidence score per sample.
|
130
|
-
def decision_function(x)
|
131
|
-
x = check_convert_sample_array(x)
|
132
|
-
linear_term = @bias_term + x.dot(@weight_vec.transpose)
|
133
|
-
factor_term = if multiclass_problem?
|
134
|
-
0.5 * (@factor_mat.dot(x.transpose)**2 - (@factor_mat**2).dot(x.transpose**2)).sum(1).transpose
|
135
|
-
else
|
136
|
-
0.5 * (@factor_mat.dot(x.transpose)**2 - (@factor_mat**2).dot(x.transpose**2)).sum(0)
|
137
|
-
end
|
138
|
-
linear_term + factor_term
|
139
|
-
end
|
140
|
-
|
141
|
-
# Predict class labels for samples.
|
142
|
-
#
|
143
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
|
144
|
-
# @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
|
145
|
-
def predict(x)
|
146
|
-
x = check_convert_sample_array(x)
|
147
|
-
|
148
|
-
n_samples = x.shape[0]
|
149
|
-
predicted = if multiclass_problem?
|
150
|
-
decision_values = decision_function(x)
|
151
|
-
if enable_parallel?
|
152
|
-
parallel_map(n_samples) { |n| @classes[decision_values[n, true].max_index] }
|
153
|
-
else
|
154
|
-
Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] }
|
155
|
-
end
|
156
|
-
else
|
157
|
-
decision_values = decision_function(x).ge(0.0).to_a
|
158
|
-
Array.new(n_samples) { |n| @classes[decision_values[n]] }
|
159
|
-
end
|
160
|
-
Numo::Int32.asarray(predicted)
|
161
|
-
end
|
162
|
-
|
163
|
-
# Predict probability for samples.
|
164
|
-
#
|
165
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
|
166
|
-
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
|
167
|
-
def predict_proba(x)
|
168
|
-
x = check_convert_sample_array(x)
|
169
|
-
proba = 1.0 / (Numo::NMath.exp(-decision_function(x)) + 1.0)
|
170
|
-
return (proba.transpose / proba.sum(axis: 1)).transpose.dup if multiclass_problem?
|
171
|
-
|
172
|
-
n_samples, = x.shape
|
173
|
-
probs = Numo::DFloat.zeros(n_samples, 2)
|
174
|
-
probs[true, 1] = proba
|
175
|
-
probs[true, 0] = 1.0 - proba
|
176
|
-
probs
|
177
|
-
end
|
178
|
-
|
179
|
-
private
|
180
|
-
|
181
|
-
def bin_decision_function(x, ex_x, factor, weight)
|
182
|
-
ex_x.dot(weight) + 0.5 * (factor.dot(x.transpose)**2 - (factor**2).dot(x.transpose**2)).sum(0)
|
183
|
-
end
|
184
|
-
|
185
|
-
def loss_func(x, ex_x, y, factor, weight)
|
186
|
-
z = bin_decision_function(x, ex_x, factor, weight)
|
187
|
-
if @params[:loss] == 'hinge'
|
188
|
-
z.class.maximum(0.0, 1 - y * z).sum.fdiv(y.shape[0])
|
189
|
-
else
|
190
|
-
Numo::NMath.log(1 + Numo::NMath.exp(-y * z)).sum.fdiv(y.shape[0])
|
191
|
-
end
|
192
|
-
end
|
193
|
-
|
194
|
-
def hinge_loss_gradient(x, ex_x, y, factor, weight)
|
195
|
-
evaluated = y * bin_decision_function(x, ex_x, factor, weight)
|
196
|
-
gradient = Numo::DFloat.zeros(evaluated.size)
|
197
|
-
gradient[evaluated < 1.0] = -y[evaluated < 1.0]
|
198
|
-
gradient
|
199
|
-
end
|
200
|
-
|
201
|
-
def logistic_loss_gradient(x, ex_x, y, factor, weight)
|
202
|
-
evaluated = y * bin_decision_function(x, ex_x, factor, weight)
|
203
|
-
sigmoid_func = 1.0 / (Numo::NMath.exp(-evaluated) + 1.0)
|
204
|
-
(sigmoid_func - 1.0) * y
|
205
|
-
end
|
206
|
-
|
207
|
-
def loss_gradient(x, ex_x, y, factor, weight)
|
208
|
-
if @params[:loss] == 'hinge'
|
209
|
-
hinge_loss_gradient(x, ex_x, y, factor, weight)
|
210
|
-
else
|
211
|
-
logistic_loss_gradient(x, ex_x, y, factor, weight)
|
212
|
-
end
|
213
|
-
end
|
214
|
-
|
215
|
-
def multiclass_problem?
|
216
|
-
@classes.size > 2
|
217
|
-
end
|
218
|
-
end
|
219
|
-
end
|
220
|
-
end
|
@@ -1,134 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'rumale/base/regressor'
|
4
|
-
require 'rumale/polynomial_model/base_factorization_machine'
|
5
|
-
|
6
|
-
module Rumale
|
7
|
-
module PolynomialModel
|
8
|
-
# FactorizationMachineRegressor is a class that implements Factorization Machine
|
9
|
-
# with stochastic gradient descent (SGD) optimization.
|
10
|
-
#
|
11
|
-
# @deprecated
|
12
|
-
# FactorizationMachineRegressor will be deleted in version 0.20.0.
|
13
|
-
# The Ruamle author recommends using the xlearn gem instead.
|
14
|
-
#
|
15
|
-
# @example
|
16
|
-
# estimator =
|
17
|
-
# Rumale::PolynomialModel::FactorizationMachineRegressor.new(
|
18
|
-
# n_factors: 10, reg_param_linear: 0.1, reg_param_factor: 0.1,
|
19
|
-
# max_iter: 500, batch_size: 50, random_seed: 1)
|
20
|
-
# estimator.fit(training_samples, traininig_values)
|
21
|
-
# results = estimator.predict(testing_samples)
|
22
|
-
#
|
23
|
-
# *Reference*
|
24
|
-
# - Rendle, S., "Factorization Machines with libFM," ACM TIST, vol. 3 (3), pp. 57:1--57:22, 2012.
|
25
|
-
# - Rendle, S., "Factorization Machines," Proc. ICDM'10, pp. 995--1000, 2010.
|
26
|
-
class FactorizationMachineRegressor < BaseFactorizationMachine
|
27
|
-
include Base::Regressor
|
28
|
-
|
29
|
-
# Return the factor matrix for Factorization Machine.
|
30
|
-
# @return [Numo::DFloat] (shape: [n_outputs, n_factors, n_features])
|
31
|
-
attr_reader :factor_mat
|
32
|
-
|
33
|
-
# Return the weight vector for Factorization Machine.
|
34
|
-
# @return [Numo::DFloat] (shape: [n_outputs, n_features])
|
35
|
-
attr_reader :weight_vec
|
36
|
-
|
37
|
-
# Return the bias term for Factoriazation Machine.
|
38
|
-
# @return [Numo::DFloat] (shape: [n_outputs])
|
39
|
-
attr_reader :bias_term
|
40
|
-
|
41
|
-
# Return the random generator for random sampling.
|
42
|
-
# @return [Random]
|
43
|
-
attr_reader :rng
|
44
|
-
|
45
|
-
# Create a new regressor with Factorization Machine.
|
46
|
-
#
|
47
|
-
# @param n_factors [Integer] The maximum number of iterations.
|
48
|
-
# @param reg_param_linear [Float] The regularization parameter for linear model.
|
49
|
-
# @param reg_param_factor [Float] The regularization parameter for factor matrix.
|
50
|
-
# @param max_iter [Integer] The maximum number of epochs that indicates
|
51
|
-
# how many times the whole data is given to the training process.
|
52
|
-
# @param batch_size [Integer] The size of the mini batches.
|
53
|
-
# @param tol [Float] The tolerance of loss for terminating optimization.
|
54
|
-
# @param optimizer [Optimizer] The optimizer to calculate adaptive learning rate.
|
55
|
-
# If nil is given, Nadam is used.
|
56
|
-
# @param n_jobs [Integer] The number of jobs for running the fit method in parallel.
|
57
|
-
# If nil is given, the method does not execute in parallel.
|
58
|
-
# If zero or less is given, it becomes equal to the number of processors.
|
59
|
-
# This parameter is ignored if the Parallel gem is not loaded.
|
60
|
-
# @param verbose [Boolean] The flag indicating whether to output loss during iteration.
|
61
|
-
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
62
|
-
def initialize(n_factors: 2, reg_param_linear: 1.0, reg_param_factor: 1.0,
|
63
|
-
max_iter: 200, batch_size: 50, tol: 1e-4,
|
64
|
-
optimizer: nil, n_jobs: nil, verbose: false, random_seed: nil)
|
65
|
-
warn 'warning: FactorizationMachineClassifier is deprecated. This class will be deleted in version 0.20.0.'
|
66
|
-
check_params_numeric(reg_param_linear: reg_param_linear, reg_param_factor: reg_param_factor,
|
67
|
-
n_factors: n_factors, max_iter: max_iter, batch_size: batch_size, tol: tol)
|
68
|
-
check_params_boolean(verbose: verbose)
|
69
|
-
check_params_numeric_or_nil(n_jobs: n_jobs, random_seed: random_seed)
|
70
|
-
check_params_positive(n_factors: n_factors, reg_param_linear: reg_param_linear, reg_param_factor: reg_param_factor,
|
71
|
-
max_iter: max_iter, batch_size: batch_size)
|
72
|
-
keywd_args = method(:initialize).parameters.map { |_t, arg| [arg, binding.local_variable_get(arg)] }.to_h.merge(loss: nil)
|
73
|
-
super(**keywd_args)
|
74
|
-
end
|
75
|
-
|
76
|
-
# Fit the model with given training data.
|
77
|
-
#
|
78
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
79
|
-
# @param y [Numo::Int32] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
|
80
|
-
# @return [FactorizationMachineRegressor] The learned regressor itself.
|
81
|
-
def fit(x, y)
|
82
|
-
x = check_convert_sample_array(x)
|
83
|
-
y = check_convert_tvalue_array(y)
|
84
|
-
check_sample_tvalue_size(x, y)
|
85
|
-
|
86
|
-
n_outputs = y.shape[1].nil? ? 1 : y.shape[1]
|
87
|
-
_n_samples, n_features = x.shape
|
88
|
-
|
89
|
-
if n_outputs > 1
|
90
|
-
@factor_mat = Numo::DFloat.zeros(n_outputs, @params[:n_factors], n_features)
|
91
|
-
@weight_vec = Numo::DFloat.zeros(n_outputs, n_features)
|
92
|
-
@bias_term = Numo::DFloat.zeros(n_outputs)
|
93
|
-
if enable_parallel?
|
94
|
-
models = parallel_map(n_outputs) { |n| partial_fit(x, y[true, n]) }
|
95
|
-
n_outputs.times { |n| @factor_mat[n, true, true], @weight_vec[n, true], @bias_term[n] = models[n] }
|
96
|
-
else
|
97
|
-
n_outputs.times { |n| @factor_mat[n, true, true], @weight_vec[n, true], @bias_term[n] = partial_fit(x, y[true, n]) }
|
98
|
-
end
|
99
|
-
else
|
100
|
-
@factor_mat, @weight_vec, @bias_term = partial_fit(x, y)
|
101
|
-
end
|
102
|
-
|
103
|
-
self
|
104
|
-
end
|
105
|
-
|
106
|
-
# Predict values for samples.
|
107
|
-
#
|
108
|
-
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
|
109
|
-
# @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted values per sample.
|
110
|
-
def predict(x)
|
111
|
-
x = check_convert_sample_array(x)
|
112
|
-
linear_term = @bias_term + x.dot(@weight_vec.transpose)
|
113
|
-
factor_term = if @weight_vec.shape[1].nil?
|
114
|
-
0.5 * (@factor_mat.dot(x.transpose)**2 - (@factor_mat**2).dot(x.transpose**2)).sum(0)
|
115
|
-
else
|
116
|
-
0.5 * (@factor_mat.dot(x.transpose)**2 - (@factor_mat**2).dot(x.transpose**2)).sum(1).transpose
|
117
|
-
end
|
118
|
-
linear_term + factor_term
|
119
|
-
end
|
120
|
-
|
121
|
-
private
|
122
|
-
|
123
|
-
def loss_func(x, ex_x, y, factor, weight)
|
124
|
-
z = ex_x.dot(weight) + 0.5 * (factor.dot(x.transpose)**2 - (factor**2).dot(x.transpose**2)).sum(0)
|
125
|
-
((z - y)**2).sum.fdiv(y.shape[0])
|
126
|
-
end
|
127
|
-
|
128
|
-
def loss_gradient(x, ex_x, y, factor, weight)
|
129
|
-
z = ex_x.dot(weight) + 0.5 * (factor.dot(x.transpose)**2 - (factor**2).dot(x.transpose**2)).sum(0)
|
130
|
-
2.0 * (z - y)
|
131
|
-
end
|
132
|
-
end
|
133
|
-
end
|
134
|
-
end
|