rumale 0.18.7 → 0.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +66 -1
  3. data/CHANGELOG.md +46 -0
  4. data/Gemfile +2 -0
  5. data/README.md +5 -36
  6. data/lib/rumale.rb +5 -10
  7. data/lib/rumale/clustering/hdbscan.rb +1 -1
  8. data/lib/rumale/clustering/k_means.rb +1 -1
  9. data/lib/rumale/clustering/k_medoids.rb +1 -1
  10. data/lib/rumale/clustering/mini_batch_k_means.rb +139 -0
  11. data/lib/rumale/dataset.rb +3 -3
  12. data/lib/rumale/decomposition/pca.rb +23 -5
  13. data/lib/rumale/feature_extraction/feature_hasher.rb +14 -1
  14. data/lib/rumale/feature_extraction/tfidf_transformer.rb +113 -0
  15. data/lib/rumale/kernel_approximation/nystroem.rb +1 -1
  16. data/lib/rumale/kernel_machine/kernel_svc.rb +1 -1
  17. data/lib/rumale/linear_model/base_sgd.rb +1 -1
  18. data/lib/rumale/metric_learning/neighbourhood_component_analysis.rb +13 -1
  19. data/lib/rumale/model_selection/cross_validation.rb +3 -2
  20. data/lib/rumale/model_selection/k_fold.rb +1 -1
  21. data/lib/rumale/model_selection/shuffle_split.rb +1 -1
  22. data/lib/rumale/multiclass/one_vs_rest_classifier.rb +2 -2
  23. data/lib/rumale/nearest_neighbors/vp_tree.rb +1 -1
  24. data/lib/rumale/neural_network/adam.rb +1 -1
  25. data/lib/rumale/neural_network/base_mlp.rb +1 -1
  26. data/lib/rumale/preprocessing/binarizer.rb +60 -0
  27. data/lib/rumale/preprocessing/l1_normalizer.rb +62 -0
  28. data/lib/rumale/preprocessing/l2_normalizer.rb +2 -1
  29. data/lib/rumale/preprocessing/max_normalizer.rb +62 -0
  30. data/lib/rumale/version.rb +1 -1
  31. data/rumale.gemspec +1 -3
  32. metadata +11 -44
  33. data/lib/rumale/linear_model/base_linear_model.rb +0 -101
  34. data/lib/rumale/optimizer/ada_grad.rb +0 -39
  35. data/lib/rumale/optimizer/adam.rb +0 -53
  36. data/lib/rumale/optimizer/nadam.rb +0 -62
  37. data/lib/rumale/optimizer/rmsprop.rb +0 -47
  38. data/lib/rumale/optimizer/sgd.rb +0 -43
  39. data/lib/rumale/optimizer/yellow_fin.rb +0 -101
  40. data/lib/rumale/polynomial_model/base_factorization_machine.rb +0 -121
  41. data/lib/rumale/polynomial_model/factorization_machine_classifier.rb +0 -215
  42. data/lib/rumale/polynomial_model/factorization_machine_regressor.rb +0 -129
@@ -1,62 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rumale/validation'
4
- require 'rumale/base/base_estimator'
5
-
6
- module Rumale
7
- # This module consists of the classes that implement optimizers adaptively tuning hyperparameters.
8
- module Optimizer
9
- # Nadam is a class that implements Nadam optimizer.
10
- #
11
- # *Reference*
12
- # - Dozat, T., "Incorporating Nesterov Momentum into Adam," Tech. Repo. Stanford University, 2015.
13
- class Nadam
14
- include Base::BaseEstimator
15
- include Validation
16
-
17
- # Create a new optimizer with Nadam
18
- #
19
- # @param learning_rate [Float] The initial value of learning rate.
20
- # @param decay1 [Float] The smoothing parameter for the first moment.
21
- # @param decay2 [Float] The smoothing parameter for the second moment.
22
- def initialize(learning_rate: 0.01, decay1: 0.9, decay2: 0.999)
23
- check_params_numeric(learning_rate: learning_rate, decay1: decay1, decay2: decay2)
24
- check_params_positive(learning_rate: learning_rate, decay1: decay1, decay2: decay2)
25
- @params = {}
26
- @params[:learning_rate] = learning_rate
27
- @params[:decay1] = decay1
28
- @params[:decay2] = decay2
29
- @fst_moment = nil
30
- @sec_moment = nil
31
- @decay1_prod = 1.0
32
- @iter = 0
33
- end
34
-
35
- # Calculate the updated weight with Nadam adaptive learning rate.
36
- #
37
- # @param weight [Numo::DFloat] (shape: [n_features]) The weight to be updated.
38
- # @param gradient [Numo::DFloat] (shape: [n_features]) The gradient for updating the weight.
39
- # @return [Numo::DFloat] (shape: [n_feautres]) The updated weight.
40
- def call(weight, gradient)
41
- @fst_moment ||= Numo::DFloat.zeros(weight.shape[0])
42
- @sec_moment ||= Numo::DFloat.zeros(weight.shape[0])
43
-
44
- @iter += 1
45
-
46
- decay1_curr = @params[:decay1] * (1.0 - 0.5 * 0.96**(@iter * 0.004))
47
- decay1_next = @params[:decay1] * (1.0 - 0.5 * 0.96**((@iter + 1) * 0.004))
48
- decay1_prod_curr = @decay1_prod * decay1_curr
49
- decay1_prod_next = @decay1_prod * decay1_curr * decay1_next
50
- @decay1_prod = decay1_prod_curr
51
-
52
- @fst_moment = @params[:decay1] * @fst_moment + (1.0 - @params[:decay1]) * gradient
53
- @sec_moment = @params[:decay2] * @sec_moment + (1.0 - @params[:decay2]) * gradient**2
54
- nm_gradient = gradient / (1.0 - decay1_prod_curr)
55
- nm_fst_moment = @fst_moment / (1.0 - decay1_prod_next)
56
- nm_sec_moment = @sec_moment / (1.0 - @params[:decay2]**@iter)
57
-
58
- weight - (@params[:learning_rate] / (nm_sec_moment**0.5 + 1e-8)) * ((1 - decay1_curr) * nm_gradient + decay1_next * nm_fst_moment)
59
- end
60
- end
61
- end
62
- end
@@ -1,47 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rumale/validation'
4
- require 'rumale/base/base_estimator'
5
-
6
- module Rumale
7
- module Optimizer
8
- # RMSProp is a class that implements RMSProp optimizer.
9
- #
10
- # *Reference*
11
- # - Sutskever, I., Martens, J., Dahl, G., and Hinton, G., "On the importance of initialization and momentum in deep learning," Proc. ICML' 13, pp. 1139--1147, 2013.
12
- # - Hinton, G., Srivastava, N., and Swersky, K., "Lecture 6e rmsprop," Neural Networks for Machine Learning, 2012.
13
- class RMSProp
14
- include Base::BaseEstimator
15
- include Validation
16
-
17
- # Create a new optimizer with RMSProp.
18
- #
19
- # @param learning_rate [Float] The initial value of learning rate.
20
- # @param momentum [Float] The initial value of momentum.
21
- # @param decay [Float] The smooting parameter.
22
- def initialize(learning_rate: 0.01, momentum: 0.9, decay: 0.9)
23
- check_params_numeric(learning_rate: learning_rate, momentum: momentum, decay: decay)
24
- check_params_positive(learning_rate: learning_rate, momentum: momentum, decay: decay)
25
- @params = {}
26
- @params[:learning_rate] = learning_rate
27
- @params[:momentum] = momentum
28
- @params[:decay] = decay
29
- @moment = nil
30
- @update = nil
31
- end
32
-
33
- # Calculate the updated weight with RMSProp adaptive learning rate.
34
- #
35
- # @param weight [Numo::DFloat] (shape: [n_features]) The weight to be updated.
36
- # @param gradient [Numo::DFloat] (shape: [n_features]) The gradient for updating the weight.
37
- # @return [Numo::DFloat] (shape: [n_feautres]) The updated weight.
38
- def call(weight, gradient)
39
- @moment ||= Numo::DFloat.zeros(weight.shape[0])
40
- @update ||= Numo::DFloat.zeros(weight.shape[0])
41
- @moment = @params[:decay] * @moment + (1.0 - @params[:decay]) * gradient**2
42
- @update = @params[:momentum] * @update - (@params[:learning_rate] / (@moment**0.5 + 1.0e-8)) * gradient
43
- weight + @update
44
- end
45
- end
46
- end
47
- end
@@ -1,43 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rumale/validation'
4
- require 'rumale/base/base_estimator'
5
-
6
- module Rumale
7
- module Optimizer
8
- # SGD is a class that implements SGD optimizer.
9
- class SGD
10
- include Base::BaseEstimator
11
- include Validation
12
-
13
- # Create a new optimizer with SGD.
14
- #
15
- # @param learning_rate [Float] The initial value of learning rate.
16
- # @param momentum [Float] The initial value of momentum.
17
- # @param decay [Float] The smooting parameter.
18
- def initialize(learning_rate: 0.01, momentum: 0.0, decay: 0.0)
19
- check_params_numeric(learning_rate: learning_rate, momentum: momentum, decay: decay)
20
- check_params_positive(learning_rate: learning_rate, momentum: momentum, decay: decay)
21
- @params = {}
22
- @params[:learning_rate] = learning_rate
23
- @params[:momentum] = momentum
24
- @params[:decay] = decay
25
- @iter = 0
26
- @update = nil
27
- end
28
-
29
- # Calculate the updated weight with SGD.
30
- #
31
- # @param weight [Numo::DFloat] (shape: [n_features]) The weight to be updated.
32
- # @param gradient [Numo::DFloat] (shape: [n_features]) The gradient for updating the weight.
33
- # @return [Numo::DFloat] (shape: [n_feautres]) The updated weight.
34
- def call(weight, gradient)
35
- @update ||= Numo::DFloat.zeros(weight.shape[0])
36
- current_learning_rate = @params[:learning_rate] / (1.0 + @params[:decay] * @iter)
37
- @iter += 1
38
- @update = @params[:momentum] * @update - current_learning_rate * gradient
39
- weight + @update
40
- end
41
- end
42
- end
43
- end
@@ -1,101 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rumale/validation'
4
- require 'rumale/base/base_estimator'
5
-
6
- module Rumale
7
- module Optimizer
8
- # YellowFin is a class that implements YellowFin optimizer.
9
- #
10
- # *Reference*
11
- # - Zhang, J., and Mitliagkas, I., "YellowFin and the Art of Momentum Tuning," CoRR abs/1706.03471, 2017.
12
- class YellowFin
13
- include Base::BaseEstimator
14
- include Validation
15
-
16
- # Create a new optimizer with YellowFin.
17
- #
18
- # @param learning_rate [Float] The initial value of learning rate.
19
- # @param momentum [Float] The initial value of momentum.
20
- # @param decay [Float] The smooting parameter.
21
- # @param window_width [Integer] The sliding window width for searching curvature range.
22
- def initialize(learning_rate: 0.01, momentum: 0.9, decay: 0.999, window_width: 20)
23
- check_params_numeric(learning_rate: learning_rate, momentum: momentum, decay: decay, window_width: window_width)
24
- check_params_positive(learning_rate: learning_rate, momentum: momentum, decay: decay, window_width: window_width)
25
- @params = {}
26
- @params[:learning_rate] = learning_rate
27
- @params[:momentum] = momentum
28
- @params[:decay] = decay
29
- @params[:window_width] = window_width
30
- @smth_learning_rate = learning_rate
31
- @smth_momentum = momentum
32
- @grad_norms = nil
33
- @grad_norm_min = 0.0
34
- @grad_norm_max = 0.0
35
- @grad_mean_sqr = 0.0
36
- @grad_mean = 0.0
37
- @grad_var = 0.0
38
- @grad_norm_mean = 0.0
39
- @curve_mean = 0.0
40
- @distance_mean = 0.0
41
- @update = nil
42
- end
43
-
44
- # Calculate the updated weight with adaptive momentum coefficient and learning rate.
45
- #
46
- # @param weight [Numo::DFloat] (shape: [n_features]) The weight to be updated.
47
- # @param gradient [Numo::DFloat] (shape: [n_features]) The gradient for updating the weight.
48
- # @return [Numo::DFloat] (shape: [n_feautres]) The updated weight.
49
- def call(weight, gradient)
50
- @update ||= Numo::DFloat.zeros(weight.shape[0])
51
- curvature_range(gradient)
52
- gradient_variance(gradient)
53
- distance_to_optimum(gradient)
54
- @smth_momentum = @params[:decay] * @smth_momentum + (1 - @params[:decay]) * current_momentum
55
- @smth_learning_rate = @params[:decay] * @smth_learning_rate + (1 - @params[:decay]) * current_learning_rate
56
- @update = @smth_momentum * @update - @smth_learning_rate * gradient
57
- weight + @update
58
- end
59
-
60
- private
61
-
62
- def current_momentum
63
- dr = Math.sqrt(@grad_norm_max / @grad_norm_min + 1.0e-8)
64
- [cubic_root**2, ((dr - 1) / (dr + 1))**2].max
65
- end
66
-
67
- def current_learning_rate
68
- (1.0 - Math.sqrt(@params[:momentum]))**2 / (@grad_norm_min + 1.0e-8)
69
- end
70
-
71
- def cubic_root
72
- p = (@distance_mean**2 * @grad_norm_min**2) / (2 * @grad_var + 1.0e-8)
73
- w3 = (-Math.sqrt(p**2 + 4.fdiv(27) * p**3) - p).fdiv(2)
74
- w = (w3 >= 0.0 ? 1 : -1) * w3.abs**1.fdiv(3)
75
- y = w - p / (3 * w + 1.0e-8)
76
- y + 1
77
- end
78
-
79
- def curvature_range(gradient)
80
- @grad_norms ||= []
81
- @grad_norms.push((gradient**2).sum)
82
- @grad_norms.shift(@grad_norms.size - @params[:window_width]) if @grad_norms.size > @params[:window_width]
83
- @grad_norm_min = @params[:decay] * @grad_norm_min + (1 - @params[:decay]) * @grad_norms.min
84
- @grad_norm_max = @params[:decay] * @grad_norm_max + (1 - @params[:decay]) * @grad_norms.max
85
- end
86
-
87
- def gradient_variance(gradient)
88
- @grad_mean_sqr = @params[:decay] * @grad_mean_sqr + (1 - @params[:decay]) * gradient**2
89
- @grad_mean = @params[:decay] * @grad_mean + (1 - @params[:decay]) * gradient
90
- @grad_var = (@grad_mean_sqr - @grad_mean**2).sum
91
- end
92
-
93
- def distance_to_optimum(gradient)
94
- grad_sqr = (gradient**2).sum
95
- @grad_norm_mean = @params[:decay] * @grad_norm_mean + (1 - @params[:decay]) * Math.sqrt(grad_sqr + 1.0e-8)
96
- @curve_mean = @params[:decay] * @curve_mean + (1 - @params[:decay]) * grad_sqr
97
- @distance_mean = @params[:decay] * @distance_mean + (1 - @params[:decay]) * (@grad_norm_mean / @curve_mean)
98
- end
99
- end
100
- end
101
- end
@@ -1,121 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rumale/base/base_estimator'
4
- require 'rumale/optimizer/nadam'
5
-
6
- module Rumale
7
- # This module consists of the classes that implement polynomial models.
8
- module PolynomialModel
9
- # BaseFactorizationMachine is an abstract class for implementation of Factorization Machine-based estimators.
10
- # This class is used internally.
11
- class BaseFactorizationMachine
12
- include Base::BaseEstimator
13
-
14
- # Initialize a Factorization Machine-based estimator.
15
- #
16
- # @param n_factors [Integer] The maximum number of iterations.
17
- # @param loss [String] The loss function ('hinge' or 'logistic' or nil).
18
- # @param reg_param_linear [Float] The regularization parameter for linear model.
19
- # @param reg_param_factor [Float] The regularization parameter for factor matrix.
20
- # @param max_iter [Integer] The maximum number of epochs that indicates
21
- # how many times the whole data is given to the training process.
22
- # @param batch_size [Integer] The size of the mini batches.
23
- # @param tol [Float] The tolerance of loss for terminating optimization.
24
- # @param optimizer [Optimizer] The optimizer to calculate adaptive learning rate.
25
- # If nil is given, Nadam is used.
26
- # @param n_jobs [Integer] The number of jobs for running the fit and predict methods in parallel.
27
- # If nil is given, the methods do not execute in parallel.
28
- # If zero or less is given, it becomes equal to the number of processors.
29
- # This parameter is ignored if the Parallel gem is not loaded.
30
- # @param verbose [Boolean] The flag indicating whether to output loss during iteration.
31
- # @param random_seed [Integer] The seed value using to initialize the random generator.
32
- def initialize(n_factors: 2, loss: nil, reg_param_linear: 1.0, reg_param_factor: 1.0,
33
- max_iter: 200, batch_size: 50, tol: 1e-4,
34
- optimizer: nil, n_jobs: nil, verbose: false, random_seed: nil)
35
- @params = {}
36
- @params[:n_factors] = n_factors
37
- @params[:loss] = loss unless loss.nil?
38
- @params[:reg_param_linear] = reg_param_linear
39
- @params[:reg_param_factor] = reg_param_factor
40
- @params[:max_iter] = max_iter
41
- @params[:batch_size] = batch_size
42
- @params[:tol] = tol
43
- @params[:optimizer] = optimizer
44
- @params[:optimizer] ||= Optimizer::Nadam.new
45
- @params[:n_jobs] = n_jobs
46
- @params[:verbose] = verbose
47
- @params[:random_seed] = random_seed
48
- @params[:random_seed] ||= srand
49
- @factor_mat = nil
50
- @weight_vec = nil
51
- @bias_term = nil
52
- @rng = Random.new(@params[:random_seed])
53
- end
54
-
55
- private
56
-
57
- def partial_fit(x, y)
58
- # Initialize some variables.
59
- class_name = self.class.to_s.split('::').last if @params[:verbose]
60
- n_samples, n_features = x.shape
61
- sub_rng = @rng.dup
62
- weight_vec = Numo::DFloat.zeros(n_features + 1)
63
- factor_mat = Rumale::Utils.rand_normal([@params[:n_factors], n_features], sub_rng)
64
- weight_optimizer = @params[:optimizer].dup
65
- factor_optimizers = Array.new(@params[:n_factors]) { @params[:optimizer].dup }
66
- # Start optimization.
67
- @params[:max_iter].times do |t|
68
- sample_ids = [*0...n_samples]
69
- sample_ids.shuffle!(random: sub_rng)
70
- until (subset_ids = sample_ids.shift(@params[:batch_size])).empty?
71
- # Sampling.
72
- sub_x = x[subset_ids, true]
73
- sub_y = y[subset_ids]
74
- ex_sub_x = expand_feature(sub_x)
75
- # Calculate gradients for loss function.
76
- loss_grad = loss_gradient(sub_x, ex_sub_x, sub_y, factor_mat, weight_vec)
77
- next if loss_grad.ne(0.0).count.zero?
78
-
79
- # Update each parameter.
80
- weight_vec = weight_optimizer.call(weight_vec, weight_gradient(loss_grad, ex_sub_x, weight_vec))
81
- @params[:n_factors].times do |n|
82
- factor_mat[n, true] = factor_optimizers[n].call(factor_mat[n, true],
83
- factor_gradient(loss_grad, sub_x, factor_mat[n, true]))
84
- end
85
- end
86
- loss = loss_func(x, expand_feature(x), y, factor_mat, weight_vec)
87
- puts "[#{class_name}] Loss after #{t + 1} epochs: #{loss}" if @params[:verbose]
88
- break if loss < @params[:tol]
89
- end
90
- [factor_mat, *split_weight_vec_bias(weight_vec)]
91
- end
92
-
93
- def loss_func(_x, _expanded_x, _y, _factor, _weight)
94
- raise NotImplementedError, "#{__method__} has to be implemented in #{self.class}."
95
- end
96
-
97
- def loss_gradient(_x, _expanded_x, _y, _factor, _weight)
98
- raise NotImplementedError, "#{__method__} has to be implemented in #{self.class}."
99
- end
100
-
101
- def weight_gradient(loss_grad, data, weight)
102
- (loss_grad.expand_dims(1) * data).mean(0) + @params[:reg_param_linear] * weight
103
- end
104
-
105
- def factor_gradient(loss_grad, data, factor)
106
- (loss_grad.expand_dims(1) * (data * data.dot(factor).expand_dims(1) - factor * (data**2))).mean(0) +
107
- @params[:reg_param_factor] * factor
108
- end
109
-
110
- def expand_feature(x)
111
- Numo::NArray.hstack([x, Numo::DFloat.ones([x.shape[0], 1])])
112
- end
113
-
114
- def split_weight_vec_bias(weight_vec)
115
- weights = weight_vec[0...-1].dup
116
- bias = weight_vec[-1]
117
- [weights, bias]
118
- end
119
- end
120
- end
121
- end
@@ -1,215 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rumale/base/classifier'
4
- require 'rumale/polynomial_model/base_factorization_machine'
5
-
6
- module Rumale
7
- # This module consists of the classes that implement polynomial models.
8
- module PolynomialModel
9
- # FactorizationMachineClassifier is a class that implements Factorization Machine
10
- # with stochastic gradient descent (SGD) optimization.
11
- # For multiclass classification problem, it uses one-vs-the-rest strategy.
12
- #
13
- # @example
14
- # estimator =
15
- # Rumale::PolynomialModel::FactorizationMachineClassifier.new(
16
- # n_factors: 10, loss: 'hinge', reg_param_linear: 0.001, reg_param_factor: 0.001,
17
- # max_iter: 500, batch_size: 50, random_seed: 1)
18
- # estimator.fit(training_samples, traininig_labels)
19
- # results = estimator.predict(testing_samples)
20
- #
21
- # *Reference*
22
- # - Rendle, S., "Factorization Machines with libFM," ACM TIST, vol. 3 (3), pp. 57:1--57:22, 2012.
23
- # - Rendle, S., "Factorization Machines," Proc. ICDM'10, pp. 995--1000, 2010.
24
- class FactorizationMachineClassifier < BaseFactorizationMachine
25
- include Base::Classifier
26
-
27
- # Return the factor matrix for Factorization Machine.
28
- # @return [Numo::DFloat] (shape: [n_classes, n_factors, n_features])
29
- attr_reader :factor_mat
30
-
31
- # Return the weight vector for Factorization Machine.
32
- # @return [Numo::DFloat] (shape: [n_classes, n_features])
33
- attr_reader :weight_vec
34
-
35
- # Return the bias term for Factoriazation Machine.
36
- # @return [Numo::DFloat] (shape: [n_classes])
37
- attr_reader :bias_term
38
-
39
- # Return the class labels.
40
- # @return [Numo::Int32] (shape: [n_classes])
41
- attr_reader :classes
42
-
43
- # Return the random generator for random sampling.
44
- # @return [Random]
45
- attr_reader :rng
46
-
47
- # Create a new classifier with Factorization Machine.
48
- #
49
- # @param n_factors [Integer] The maximum number of iterations.
50
- # @param loss [String] The loss function ('hinge' or 'logistic').
51
- # @param reg_param_linear [Float] The regularization parameter for linear model.
52
- # @param reg_param_factor [Float] The regularization parameter for factor matrix.
53
- # @param max_iter [Integer] The maximum number of epochs that indicates
54
- # how many times the whole data is given to the training process.
55
- # @param batch_size [Integer] The size of the mini batches.
56
- # @param tol [Float] The tolerance of loss for terminating optimization.
57
- # @param optimizer [Optimizer] The optimizer to calculate adaptive learning rate.
58
- # If nil is given, Nadam is used.
59
- # @param n_jobs [Integer] The number of jobs for running the fit and predict methods in parallel.
60
- # If nil is given, the methods do not execute in parallel.
61
- # If zero or less is given, it becomes equal to the number of processors.
62
- # This parameter is ignored if the Parallel gem is not loaded.
63
- # @param verbose [Boolean] The flag indicating whether to output loss during iteration.
64
- # @param random_seed [Integer] The seed value using to initialize the random generator.
65
- def initialize(n_factors: 2, loss: 'hinge', reg_param_linear: 1.0, reg_param_factor: 1.0,
66
- max_iter: 200, batch_size: 50, tol: 1e-4,
67
- optimizer: nil, n_jobs: nil, verbose: false, random_seed: nil)
68
- check_params_numeric(reg_param_linear: reg_param_linear, reg_param_factor: reg_param_factor,
69
- n_factors: n_factors, max_iter: max_iter, batch_size: batch_size, tol: tol)
70
- check_params_string(loss: loss)
71
- check_params_boolean(verbose: verbose)
72
- check_params_numeric_or_nil(n_jobs: n_jobs, random_seed: random_seed)
73
- check_params_positive(n_factors: n_factors,
74
- reg_param_linear: reg_param_linear, reg_param_factor: reg_param_factor,
75
- max_iter: max_iter, batch_size: batch_size)
76
- super
77
- @classes = nil
78
- end
79
-
80
- # Fit the model with given training data.
81
- #
82
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
83
- # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
84
- # @return [FactorizationMachineClassifier] The learned classifier itself.
85
- def fit(x, y)
86
- x = check_convert_sample_array(x)
87
- y = check_convert_label_array(y)
88
- check_sample_label_size(x, y)
89
-
90
- @classes = Numo::Int32[*y.to_a.uniq.sort]
91
-
92
- if multiclass_problem?
93
- n_classes = @classes.size
94
- n_features = x.shape[1]
95
- @factor_mat = Numo::DFloat.zeros(n_classes, @params[:n_factors], n_features)
96
- @weight_vec = Numo::DFloat.zeros(n_classes, n_features)
97
- @bias_term = Numo::DFloat.zeros(n_classes)
98
- if enable_parallel?
99
- # :nocov:
100
- models = parallel_map(n_classes) do |n|
101
- bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
102
- partial_fit(x, bin_y)
103
- end
104
- # :nocov:
105
- n_classes.times { |n| @factor_mat[n, true, true], @weight_vec[n, true], @bias_term[n] = models[n] }
106
- else
107
- n_classes.times do |n|
108
- bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
109
- @factor_mat[n, true, true], @weight_vec[n, true], @bias_term[n] = partial_fit(x, bin_y)
110
- end
111
- end
112
- else
113
- negative_label = @classes[0]
114
- bin_y = Numo::Int32.cast(y.ne(negative_label)) * 2 - 1
115
- @factor_mat, @weight_vec, @bias_term = partial_fit(x, bin_y)
116
- end
117
-
118
- self
119
- end
120
-
121
- # Calculate confidence scores for samples.
122
- #
123
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
124
- # @return [Numo::DFloat] (shape: [n_samples]) Confidence score per sample.
125
- def decision_function(x)
126
- x = check_convert_sample_array(x)
127
- linear_term = @bias_term + x.dot(@weight_vec.transpose)
128
- factor_term = if multiclass_problem?
129
- 0.5 * (@factor_mat.dot(x.transpose)**2 - (@factor_mat**2).dot(x.transpose**2)).sum(1).transpose
130
- else
131
- 0.5 * (@factor_mat.dot(x.transpose)**2 - (@factor_mat**2).dot(x.transpose**2)).sum(0)
132
- end
133
- linear_term + factor_term
134
- end
135
-
136
- # Predict class labels for samples.
137
- #
138
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
139
- # @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
140
- def predict(x)
141
- x = check_convert_sample_array(x)
142
-
143
- n_samples = x.shape[0]
144
- predicted = if multiclass_problem?
145
- decision_values = decision_function(x)
146
- if enable_parallel?
147
- parallel_map(n_samples) { |n| @classes[decision_values[n, true].max_index] }
148
- else
149
- Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] }
150
- end
151
- else
152
- decision_values = decision_function(x).ge(0.0).to_a
153
- Array.new(n_samples) { |n| @classes[decision_values[n]] }
154
- end
155
- Numo::Int32.asarray(predicted)
156
- end
157
-
158
- # Predict probability for samples.
159
- #
160
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
161
- # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
162
- def predict_proba(x)
163
- x = check_convert_sample_array(x)
164
- proba = 1.0 / (Numo::NMath.exp(-decision_function(x)) + 1.0)
165
- return (proba.transpose / proba.sum(axis: 1)).transpose.dup if multiclass_problem?
166
-
167
- n_samples, = x.shape
168
- probs = Numo::DFloat.zeros(n_samples, 2)
169
- probs[true, 1] = proba
170
- probs[true, 0] = 1.0 - proba
171
- probs
172
- end
173
-
174
- private
175
-
176
- def bin_decision_function(x, ex_x, factor, weight)
177
- ex_x.dot(weight) + 0.5 * (factor.dot(x.transpose)**2 - (factor**2).dot(x.transpose**2)).sum(0)
178
- end
179
-
180
- def loss_func(x, ex_x, y, factor, weight)
181
- z = bin_decision_function(x, ex_x, factor, weight)
182
- if @params[:loss] == 'hinge'
183
- z.class.maximum(0.0, 1 - y * z).sum.fdiv(y.shape[0])
184
- else
185
- Numo::NMath.log(1 + Numo::NMath.exp(-y * z)).sum.fdiv(y.shape[0])
186
- end
187
- end
188
-
189
- def hinge_loss_gradient(x, ex_x, y, factor, weight)
190
- evaluated = y * bin_decision_function(x, ex_x, factor, weight)
191
- gradient = Numo::DFloat.zeros(evaluated.size)
192
- gradient[evaluated < 1.0] = -y[evaluated < 1.0]
193
- gradient
194
- end
195
-
196
- def logistic_loss_gradient(x, ex_x, y, factor, weight)
197
- evaluated = y * bin_decision_function(x, ex_x, factor, weight)
198
- sigmoid_func = 1.0 / (Numo::NMath.exp(-evaluated) + 1.0)
199
- (sigmoid_func - 1.0) * y
200
- end
201
-
202
- def loss_gradient(x, ex_x, y, factor, weight)
203
- if @params[:loss] == 'hinge'
204
- hinge_loss_gradient(x, ex_x, y, factor, weight)
205
- else
206
- logistic_loss_gradient(x, ex_x, y, factor, weight)
207
- end
208
- end
209
-
210
- def multiclass_problem?
211
- @classes.size > 2
212
- end
213
- end
214
- end
215
- end