rumale 0.18.7 → 0.20.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +66 -1
  3. data/CHANGELOG.md +46 -0
  4. data/Gemfile +2 -0
  5. data/README.md +5 -36
  6. data/lib/rumale.rb +5 -10
  7. data/lib/rumale/clustering/hdbscan.rb +1 -1
  8. data/lib/rumale/clustering/k_means.rb +1 -1
  9. data/lib/rumale/clustering/k_medoids.rb +1 -1
  10. data/lib/rumale/clustering/mini_batch_k_means.rb +139 -0
  11. data/lib/rumale/dataset.rb +3 -3
  12. data/lib/rumale/decomposition/pca.rb +23 -5
  13. data/lib/rumale/feature_extraction/feature_hasher.rb +14 -1
  14. data/lib/rumale/feature_extraction/tfidf_transformer.rb +113 -0
  15. data/lib/rumale/kernel_approximation/nystroem.rb +1 -1
  16. data/lib/rumale/kernel_machine/kernel_svc.rb +1 -1
  17. data/lib/rumale/linear_model/base_sgd.rb +1 -1
  18. data/lib/rumale/metric_learning/neighbourhood_component_analysis.rb +13 -1
  19. data/lib/rumale/model_selection/cross_validation.rb +3 -2
  20. data/lib/rumale/model_selection/k_fold.rb +1 -1
  21. data/lib/rumale/model_selection/shuffle_split.rb +1 -1
  22. data/lib/rumale/multiclass/one_vs_rest_classifier.rb +2 -2
  23. data/lib/rumale/nearest_neighbors/vp_tree.rb +1 -1
  24. data/lib/rumale/neural_network/adam.rb +1 -1
  25. data/lib/rumale/neural_network/base_mlp.rb +1 -1
  26. data/lib/rumale/preprocessing/binarizer.rb +60 -0
  27. data/lib/rumale/preprocessing/l1_normalizer.rb +62 -0
  28. data/lib/rumale/preprocessing/l2_normalizer.rb +2 -1
  29. data/lib/rumale/preprocessing/max_normalizer.rb +62 -0
  30. data/lib/rumale/version.rb +1 -1
  31. data/rumale.gemspec +1 -3
  32. metadata +11 -44
  33. data/lib/rumale/linear_model/base_linear_model.rb +0 -101
  34. data/lib/rumale/optimizer/ada_grad.rb +0 -39
  35. data/lib/rumale/optimizer/adam.rb +0 -53
  36. data/lib/rumale/optimizer/nadam.rb +0 -62
  37. data/lib/rumale/optimizer/rmsprop.rb +0 -47
  38. data/lib/rumale/optimizer/sgd.rb +0 -43
  39. data/lib/rumale/optimizer/yellow_fin.rb +0 -101
  40. data/lib/rumale/polynomial_model/base_factorization_machine.rb +0 -121
  41. data/lib/rumale/polynomial_model/factorization_machine_classifier.rb +0 -215
  42. data/lib/rumale/polynomial_model/factorization_machine_regressor.rb +0 -129
@@ -1,129 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rumale/base/regressor'
4
- require 'rumale/polynomial_model/base_factorization_machine'
5
-
6
- module Rumale
7
- module PolynomialModel
8
- # FactorizationMachineRegressor is a class that implements Factorization Machine
9
- # with stochastic gradient descent (SGD) optimization.
10
- #
11
- # @example
12
- # estimator =
13
- # Rumale::PolynomialModel::FactorizationMachineRegressor.new(
14
- # n_factors: 10, reg_param_linear: 0.1, reg_param_factor: 0.1,
15
- # max_iter: 500, batch_size: 50, random_seed: 1)
16
- # estimator.fit(training_samples, traininig_values)
17
- # results = estimator.predict(testing_samples)
18
- #
19
- # *Reference*
20
- # - Rendle, S., "Factorization Machines with libFM," ACM TIST, vol. 3 (3), pp. 57:1--57:22, 2012.
21
- # - Rendle, S., "Factorization Machines," Proc. ICDM'10, pp. 995--1000, 2010.
22
- class FactorizationMachineRegressor < BaseFactorizationMachine
23
- include Base::Regressor
24
-
25
- # Return the factor matrix for Factorization Machine.
26
- # @return [Numo::DFloat] (shape: [n_outputs, n_factors, n_features])
27
- attr_reader :factor_mat
28
-
29
- # Return the weight vector for Factorization Machine.
30
- # @return [Numo::DFloat] (shape: [n_outputs, n_features])
31
- attr_reader :weight_vec
32
-
33
- # Return the bias term for Factoriazation Machine.
34
- # @return [Numo::DFloat] (shape: [n_outputs])
35
- attr_reader :bias_term
36
-
37
- # Return the random generator for random sampling.
38
- # @return [Random]
39
- attr_reader :rng
40
-
41
- # Create a new regressor with Factorization Machine.
42
- #
43
- # @param n_factors [Integer] The maximum number of iterations.
44
- # @param reg_param_linear [Float] The regularization parameter for linear model.
45
- # @param reg_param_factor [Float] The regularization parameter for factor matrix.
46
- # @param max_iter [Integer] The maximum number of epochs that indicates
47
- # how many times the whole data is given to the training process.
48
- # @param batch_size [Integer] The size of the mini batches.
49
- # @param tol [Float] The tolerance of loss for terminating optimization.
50
- # @param optimizer [Optimizer] The optimizer to calculate adaptive learning rate.
51
- # If nil is given, Nadam is used.
52
- # @param n_jobs [Integer] The number of jobs for running the fit method in parallel.
53
- # If nil is given, the method does not execute in parallel.
54
- # If zero or less is given, it becomes equal to the number of processors.
55
- # This parameter is ignored if the Parallel gem is not loaded.
56
- # @param verbose [Boolean] The flag indicating whether to output loss during iteration.
57
- # @param random_seed [Integer] The seed value using to initialize the random generator.
58
- def initialize(n_factors: 2, reg_param_linear: 1.0, reg_param_factor: 1.0,
59
- max_iter: 200, batch_size: 50, tol: 1e-4,
60
- optimizer: nil, n_jobs: nil, verbose: false, random_seed: nil)
61
- check_params_numeric(reg_param_linear: reg_param_linear, reg_param_factor: reg_param_factor,
62
- n_factors: n_factors, max_iter: max_iter, batch_size: batch_size, tol: tol)
63
- check_params_boolean(verbose: verbose)
64
- check_params_numeric_or_nil(n_jobs: n_jobs, random_seed: random_seed)
65
- check_params_positive(n_factors: n_factors, reg_param_linear: reg_param_linear, reg_param_factor: reg_param_factor,
66
- max_iter: max_iter, batch_size: batch_size)
67
- keywd_args = method(:initialize).parameters.map { |_t, arg| [arg, binding.local_variable_get(arg)] }.to_h.merge(loss: nil)
68
- super(**keywd_args)
69
- end
70
-
71
- # Fit the model with given training data.
72
- #
73
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
74
- # @param y [Numo::Int32] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
75
- # @return [FactorizationMachineRegressor] The learned regressor itself.
76
- def fit(x, y)
77
- x = check_convert_sample_array(x)
78
- y = check_convert_tvalue_array(y)
79
- check_sample_tvalue_size(x, y)
80
-
81
- n_outputs = y.shape[1].nil? ? 1 : y.shape[1]
82
- _n_samples, n_features = x.shape
83
-
84
- if n_outputs > 1
85
- @factor_mat = Numo::DFloat.zeros(n_outputs, @params[:n_factors], n_features)
86
- @weight_vec = Numo::DFloat.zeros(n_outputs, n_features)
87
- @bias_term = Numo::DFloat.zeros(n_outputs)
88
- if enable_parallel?
89
- models = parallel_map(n_outputs) { |n| partial_fit(x, y[true, n]) }
90
- n_outputs.times { |n| @factor_mat[n, true, true], @weight_vec[n, true], @bias_term[n] = models[n] }
91
- else
92
- n_outputs.times { |n| @factor_mat[n, true, true], @weight_vec[n, true], @bias_term[n] = partial_fit(x, y[true, n]) }
93
- end
94
- else
95
- @factor_mat, @weight_vec, @bias_term = partial_fit(x, y)
96
- end
97
-
98
- self
99
- end
100
-
101
- # Predict values for samples.
102
- #
103
- # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
104
- # @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted values per sample.
105
- def predict(x)
106
- x = check_convert_sample_array(x)
107
- linear_term = @bias_term + x.dot(@weight_vec.transpose)
108
- factor_term = if @weight_vec.shape[1].nil?
109
- 0.5 * (@factor_mat.dot(x.transpose)**2 - (@factor_mat**2).dot(x.transpose**2)).sum(0)
110
- else
111
- 0.5 * (@factor_mat.dot(x.transpose)**2 - (@factor_mat**2).dot(x.transpose**2)).sum(1).transpose
112
- end
113
- linear_term + factor_term
114
- end
115
-
116
- private
117
-
118
- def loss_func(x, ex_x, y, factor, weight)
119
- z = ex_x.dot(weight) + 0.5 * (factor.dot(x.transpose)**2 - (factor**2).dot(x.transpose**2)).sum(0)
120
- ((z - y)**2).sum.fdiv(y.shape[0])
121
- end
122
-
123
- def loss_gradient(x, ex_x, y, factor, weight)
124
- z = ex_x.dot(weight) + 0.5 * (factor.dot(x.transpose)**2 - (factor**2).dot(x.transpose**2)).sum(0)
125
- 2.0 * (z - y)
126
- end
127
- end
128
- end
129
- end