rumale 0.8.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (85) hide show
  1. checksums.yaml +7 -0
  2. data/.coveralls.yml +1 -0
  3. data/.gitignore +20 -0
  4. data/.rspec +3 -0
  5. data/.rubocop.yml +47 -0
  6. data/.rubocop_todo.yml +58 -0
  7. data/.travis.yml +13 -0
  8. data/CHANGELOG.md +2 -0
  9. data/CODE_OF_CONDUCT.md +74 -0
  10. data/Gemfile +4 -0
  11. data/LICENSE.txt +23 -0
  12. data/README.md +175 -0
  13. data/Rakefile +6 -0
  14. data/bin/console +14 -0
  15. data/bin/setup +8 -0
  16. data/lib/rumale.rb +70 -0
  17. data/lib/rumale/base/base_estimator.rb +13 -0
  18. data/lib/rumale/base/classifier.rb +36 -0
  19. data/lib/rumale/base/cluster_analyzer.rb +31 -0
  20. data/lib/rumale/base/evaluator.rb +17 -0
  21. data/lib/rumale/base/regressor.rb +36 -0
  22. data/lib/rumale/base/splitter.rb +21 -0
  23. data/lib/rumale/base/transformer.rb +22 -0
  24. data/lib/rumale/clustering/dbscan.rb +125 -0
  25. data/lib/rumale/clustering/k_means.rb +138 -0
  26. data/lib/rumale/dataset.rb +110 -0
  27. data/lib/rumale/decomposition/nmf.rb +141 -0
  28. data/lib/rumale/decomposition/pca.rb +148 -0
  29. data/lib/rumale/ensemble/ada_boost_classifier.rb +196 -0
  30. data/lib/rumale/ensemble/ada_boost_regressor.rb +178 -0
  31. data/lib/rumale/ensemble/random_forest_classifier.rb +180 -0
  32. data/lib/rumale/ensemble/random_forest_regressor.rb +141 -0
  33. data/lib/rumale/evaluation_measure/accuracy.rb +29 -0
  34. data/lib/rumale/evaluation_measure/f_score.rb +50 -0
  35. data/lib/rumale/evaluation_measure/log_loss.rb +45 -0
  36. data/lib/rumale/evaluation_measure/mean_absolute_error.rb +29 -0
  37. data/lib/rumale/evaluation_measure/mean_squared_error.rb +29 -0
  38. data/lib/rumale/evaluation_measure/normalized_mutual_information.rb +62 -0
  39. data/lib/rumale/evaluation_measure/precision.rb +50 -0
  40. data/lib/rumale/evaluation_measure/precision_recall.rb +91 -0
  41. data/lib/rumale/evaluation_measure/purity.rb +40 -0
  42. data/lib/rumale/evaluation_measure/r2_score.rb +43 -0
  43. data/lib/rumale/evaluation_measure/recall.rb +50 -0
  44. data/lib/rumale/kernel_approximation/rbf.rb +121 -0
  45. data/lib/rumale/kernel_machine/kernel_svc.rb +193 -0
  46. data/lib/rumale/linear_model/base_linear_model.rb +89 -0
  47. data/lib/rumale/linear_model/lasso.rb +136 -0
  48. data/lib/rumale/linear_model/linear_regression.rb +110 -0
  49. data/lib/rumale/linear_model/logistic_regression.rb +159 -0
  50. data/lib/rumale/linear_model/ridge.rb +110 -0
  51. data/lib/rumale/linear_model/svc.rb +183 -0
  52. data/lib/rumale/linear_model/svr.rb +122 -0
  53. data/lib/rumale/model_selection/cross_validation.rb +123 -0
  54. data/lib/rumale/model_selection/grid_search_cv.rb +247 -0
  55. data/lib/rumale/model_selection/k_fold.rb +76 -0
  56. data/lib/rumale/model_selection/stratified_k_fold.rb +94 -0
  57. data/lib/rumale/multiclass/one_vs_rest_classifier.rb +100 -0
  58. data/lib/rumale/naive_bayes/naive_bayes.rb +315 -0
  59. data/lib/rumale/nearest_neighbors/k_neighbors_classifier.rb +111 -0
  60. data/lib/rumale/nearest_neighbors/k_neighbors_regressor.rb +93 -0
  61. data/lib/rumale/optimizer/nadam.rb +90 -0
  62. data/lib/rumale/optimizer/rmsprop.rb +69 -0
  63. data/lib/rumale/optimizer/sgd.rb +65 -0
  64. data/lib/rumale/optimizer/yellow_fin.rb +144 -0
  65. data/lib/rumale/pairwise_metric.rb +91 -0
  66. data/lib/rumale/pipeline/pipeline.rb +197 -0
  67. data/lib/rumale/polynomial_model/base_factorization_machine.rb +99 -0
  68. data/lib/rumale/polynomial_model/factorization_machine_classifier.rb +197 -0
  69. data/lib/rumale/polynomial_model/factorization_machine_regressor.rb +131 -0
  70. data/lib/rumale/preprocessing/l2_normalizer.rb +62 -0
  71. data/lib/rumale/preprocessing/label_encoder.rb +94 -0
  72. data/lib/rumale/preprocessing/min_max_scaler.rb +92 -0
  73. data/lib/rumale/preprocessing/one_hot_encoder.rb +98 -0
  74. data/lib/rumale/preprocessing/standard_scaler.rb +86 -0
  75. data/lib/rumale/probabilistic_output.rb +112 -0
  76. data/lib/rumale/tree/base_decision_tree.rb +153 -0
  77. data/lib/rumale/tree/decision_tree_classifier.rb +163 -0
  78. data/lib/rumale/tree/decision_tree_regressor.rb +135 -0
  79. data/lib/rumale/tree/node.rb +70 -0
  80. data/lib/rumale/utils.rb +37 -0
  81. data/lib/rumale/validation.rb +79 -0
  82. data/lib/rumale/values.rb +13 -0
  83. data/lib/rumale/version.rb +6 -0
  84. data/rumale.gemspec +41 -0
  85. metadata +204 -0
@@ -0,0 +1,131 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/regressor'
4
+ require 'rumale/polynomial_model/base_factorization_machine'
5
+
6
+ module Rumale
7
+ module PolynomialModel
8
+ # FactorizationMachineRegressor is a class that implements Factorization Machine
9
+ # with stochastic gradient descent (SGD) optimization.
10
+ #
11
+ # @example
12
+ # estimator =
13
+ # Rumale::PolynomialModel::FactorizationMachineRegressor.new(
14
+ # n_factors: 10, reg_param_linear: 0.1, reg_param_factor: 0.1,
15
+ # max_iter: 5000, batch_size: 50, random_seed: 1)
16
+ # estimator.fit(training_samples, traininig_values)
17
+ # results = estimator.predict(testing_samples)
18
+ #
19
+ # *Reference*
20
+ # - S. Rendle, "Factorization Machines with libFM," ACM TIST, vol. 3 (3), pp. 57:1--57:22, 2012.
21
+ # - S. Rendle, "Factorization Machines," Proc. ICDM'10, pp. 995--1000, 2010.
22
+ class FactorizationMachineRegressor < BaseFactorizationMachine
23
+ include Base::Regressor
24
+
25
+ # Return the factor matrix for Factorization Machine.
26
+ # @return [Numo::DFloat] (shape: [n_outputs, n_factors, n_features])
27
+ attr_reader :factor_mat
28
+
29
+ # Return the weight vector for Factorization Machine.
30
+ # @return [Numo::DFloat] (shape: [n_outputs, n_features])
31
+ attr_reader :weight_vec
32
+
33
+ # Return the bias term for Factoriazation Machine.
34
+ # @return [Numo::DFloat] (shape: [n_outputs])
35
+ attr_reader :bias_term
36
+
37
+ # Return the random generator for random sampling.
38
+ # @return [Random]
39
+ attr_reader :rng
40
+
41
+ # Create a new regressor with Factorization Machine.
42
+ #
43
+ # @param n_factors [Integer] The maximum number of iterations.
44
+ # @param reg_param_linear [Float] The regularization parameter for linear model.
45
+ # @param reg_param_factor [Float] The regularization parameter for factor matrix.
46
+ # @param max_iter [Integer] The maximum number of iterations.
47
+ # @param batch_size [Integer] The size of the mini batches.
48
+ # @param optimizer [Optimizer] The optimizer to calculate adaptive learning rate.
49
+ # If nil is given, Nadam is used.
50
+ # @param random_seed [Integer] The seed value using to initialize the random generator.
51
+ def initialize(n_factors: 2, reg_param_linear: 1.0, reg_param_factor: 1.0,
52
+ max_iter: 1000, batch_size: 10, optimizer: nil, random_seed: nil)
53
+ check_params_float(reg_param_linear: reg_param_linear, reg_param_factor: reg_param_factor)
54
+ check_params_integer(n_factors: n_factors, max_iter: max_iter, batch_size: batch_size)
55
+ check_params_type_or_nil(Integer, random_seed: random_seed)
56
+ check_params_positive(n_factors: n_factors, reg_param_linear: reg_param_linear, reg_param_factor: reg_param_factor,
57
+ max_iter: max_iter, batch_size: batch_size)
58
+ keywd_args = method(:initialize).parameters.map { |_t, arg| [arg, binding.local_variable_get(arg)] }.to_h.merge(loss: nil)
59
+ super(keywd_args)
60
+ end
61
+
62
+ # Fit the model with given training data.
63
+ #
64
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
65
+ # @param y [Numo::Int32] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
66
+ # @return [FactorizationMachineRegressor] The learned regressor itself.
67
+ def fit(x, y)
68
+ check_sample_array(x)
69
+ check_tvalue_array(y)
70
+ check_sample_tvalue_size(x, y)
71
+
72
+ n_outputs = y.shape[1].nil? ? 1 : y.shape[1]
73
+ _n_samples, n_features = x.shape
74
+
75
+ if n_outputs > 1
76
+ @factor_mat = Numo::DFloat.zeros(n_outputs, @params[:n_factors], n_features)
77
+ @weight_vec = Numo::DFloat.zeros(n_outputs, n_features)
78
+ @bias_term = Numo::DFloat.zeros(n_outputs)
79
+ n_outputs.times { |n| @factor_mat[n, true, true], @weight_vec[n, true], @bias_term[n] = partial_fit(x, y[true, n]) }
80
+ else
81
+ @factor_mat, @weight_vec, @bias_term = partial_fit(x, y)
82
+ end
83
+
84
+ self
85
+ end
86
+
87
+ # Predict values for samples.
88
+ #
89
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
90
+ # @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted values per sample.
91
+ def predict(x)
92
+ check_sample_array(x)
93
+ linear_term = @bias_term + x.dot(@weight_vec.transpose)
94
+ factor_term = if @weight_vec.shape[1].nil?
95
+ 0.5 * (@factor_mat.dot(x.transpose)**2 - (@factor_mat**2).dot(x.transpose**2)).sum(0)
96
+ else
97
+ 0.5 * (@factor_mat.dot(x.transpose)**2 - (@factor_mat**2).dot(x.transpose**2)).sum(1).transpose
98
+ end
99
+ linear_term + factor_term
100
+ end
101
+
102
+ # Dump marshal data.
103
+ # @return [Hash] The marshal data about FactorizationMachineRegressor.
104
+ def marshal_dump
105
+ { params: @params,
106
+ factor_mat: @factor_mat,
107
+ weight_vec: @weight_vec,
108
+ bias_term: @bias_term,
109
+ rng: @rng }
110
+ end
111
+
112
+ # Load marshal data.
113
+ # @return [nil]
114
+ def marshal_load(obj)
115
+ @params = obj[:params]
116
+ @factor_mat = obj[:factor_mat]
117
+ @weight_vec = obj[:weight_vec]
118
+ @bias_term = obj[:bias_term]
119
+ @rng = obj[:rng]
120
+ nil
121
+ end
122
+
123
+ private
124
+
125
+ def loss_gradient(x, ex_x, y, factor, weight)
126
+ z = ex_x.dot(weight) + 0.5 * (factor.dot(x.transpose)**2 - (factor**2).dot(x.transpose**2)).sum(0)
127
+ 2.0 * (z - y)
128
+ end
129
+ end
130
+ end
131
+ end
@@ -0,0 +1,62 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/base_estimator'
4
+ require 'rumale/base/transformer'
5
+
6
+ module Rumale
7
+ # This module consists of the classes that perform preprocessings.
8
+ module Preprocessing
9
+ # Normalize samples to unit L2-norm.
10
+ #
11
+ # @example
12
+ # normalizer = Rumale::Preprocessing::StandardScaler.new
13
+ # new_samples = normalizer.fit_transform(samples)
14
+ class L2Normalizer
15
+ include Base::BaseEstimator
16
+ include Base::Transformer
17
+
18
+ # Return the vector consists of L2-norm for each sample.
19
+ # @return [Numo::DFloat] (shape: [n_samples])
20
+ attr_reader :norm_vec # :nodoc:
21
+
22
+ # Create a new normalizer for normaliing to unit L2-norm.
23
+ def initialize
24
+ @params = {}
25
+ @norm_vec = nil
26
+ end
27
+
28
+ # Calculate L2-norms of each sample.
29
+ #
30
+ # @overload fit(x) -> L2Normalizer
31
+ #
32
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate L2-norms.
33
+ # @return [L2Normalizer]
34
+ def fit(x, _y = nil)
35
+ check_sample_array(x)
36
+ @norm_vec = Numo::NMath.sqrt((x**2).sum(1))
37
+ self
38
+ end
39
+
40
+ # Calculate L2-norms of each sample, and then normalize samples to unit L2-norm.
41
+ #
42
+ # @overload fit_transform(x) -> Numo::DFloat
43
+ #
44
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate L2-norms.
45
+ # @return [Numo::DFloat] The normalized samples.
46
+ def fit_transform(x, _y = nil)
47
+ check_sample_array(x)
48
+ fit(x)
49
+ x / @norm_vec.tile(x.shape[1], 1).transpose
50
+ end
51
+
52
+ # Calculate L2-norms of each sample, and then normalize samples to unit L2-norm.
53
+ # This method calls the fit_transform method. This method exists for the Pipeline class.
54
+ #
55
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate L2-norms.
56
+ # @return [Numo::DFloat] The normalized samples.
57
+ def transform(x)
58
+ fit_transform(x)
59
+ end
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,94 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/base_estimator'
4
+ require 'rumale/base/transformer'
5
+
6
+ module Rumale
7
+ module Preprocessing
8
+ # Encode labels to values between 0 and n_classes - 1.
9
+ #
10
+ # @example
11
+ # encoder = Rumale::Preprocessing::LabelEncoder.new
12
+ # labels = Numo::Int32[1, 8, 8, 15, 0]
13
+ # encoded_labels = encoder.fit_transform(labels)
14
+ # # > pp encoded_labels
15
+ # # Numo::Int32#shape=[5]
16
+ # # [1, 2, 2, 3, 0]
17
+ # decoded_labels = encoder.inverse_transform(encoded_labels)
18
+ # # > pp decoded_labels
19
+ # # [1, 8, 8, 15, 0]
20
+ class LabelEncoder
21
+ include Base::BaseEstimator
22
+ include Base::Transformer
23
+
24
+ # Return the class labels.
25
+ # @return [Array] (size: [n_classes])
26
+ attr_reader :classes
27
+
28
+ # Create a new encoder for encoding labels to values between 0 and n_classes - 1.
29
+ def initialize
30
+ @params = {}
31
+ @classes = nil
32
+ end
33
+
34
+ # Fit label-encoder to labels.
35
+ #
36
+ # @overload fit(x) -> LabelEncoder
37
+ #
38
+ # @param x [Array] (shape: [n_samples]) The labels to fit label-encoder.
39
+ # @return [LabelEncoder]
40
+ def fit(x, _y = nil)
41
+ x = x.to_a if x.is_a?(Numo::NArray)
42
+ check_params_type(Array, x: x)
43
+ @classes = x.sort.uniq
44
+ self
45
+ end
46
+
47
+ # Fit label-encoder to labels, then return encoded labels.
48
+ #
49
+ # @overload fit_transform(x) -> Numo::DFloat
50
+ #
51
+ # @param x [Array] (shape: [n_samples]) The labels to fit label-encoder.
52
+ # @return [Numo::Int32] The encoded labels.
53
+ def fit_transform(x, _y = nil)
54
+ x = x.to_a if x.is_a?(Numo::NArray)
55
+ check_params_type(Array, x: x)
56
+ fit(x).transform(x)
57
+ end
58
+
59
+ # Encode labels.
60
+ #
61
+ # @param x [Array] (shape: [n_samples]) The labels to be encoded.
62
+ # @return [Numo::Int32] The encoded labels.
63
+ def transform(x)
64
+ x = x.to_a if x.is_a?(Numo::NArray)
65
+ check_params_type(Array, x: x)
66
+ Numo::Int32[*(x.map { |v| @classes.index(v) })]
67
+ end
68
+
69
+ # Decode encoded labels.
70
+ #
71
+ # @param x [Numo::Int32] (shape: [n_samples]) The labels to be decoded.
72
+ # @return [Array] The decoded labels.
73
+ def inverse_transform(x)
74
+ check_label_array(x)
75
+ x.to_a.map { |n| @classes[n] }
76
+ end
77
+
78
+ # Dump marshal data.
79
+ # @return [Hash] The marshal data about LabelEncoder
80
+ def marshal_dump
81
+ { params: @params,
82
+ classes: @classes }
83
+ end
84
+
85
+ # Load marshal data.
86
+ # @return [nil]
87
+ def marshal_load(obj)
88
+ @params = obj[:params]
89
+ @classes = obj[:classes]
90
+ nil
91
+ end
92
+ end
93
+ end
94
+ end
@@ -0,0 +1,92 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/base_estimator'
4
+ require 'rumale/base/transformer'
5
+
6
+ module Rumale
7
+ # This module consists of the classes that perform preprocessings.
8
+ module Preprocessing
9
+ # Normalize samples by scaling each feature to a given range.
10
+ #
11
+ # @example
12
+ # normalizer = Rumale::Preprocessing::MinMaxScaler.new(feature_range: [0.0, 1.0])
13
+ # new_training_samples = normalizer.fit_transform(training_samples)
14
+ # new_testing_samples = normalizer.transform(testing_samples)
15
+ class MinMaxScaler
16
+ include Base::BaseEstimator
17
+ include Base::Transformer
18
+
19
+ # Return the vector consists of the minimum value for each feature.
20
+ # @return [Numo::DFloat] (shape: [n_features])
21
+ attr_reader :min_vec
22
+
23
+ # Return the vector consists of the maximum value for each feature.
24
+ # @return [Numo::DFloat] (shape: [n_features])
25
+ attr_reader :max_vec
26
+
27
+ # Creates a new normalizer for scaling each feature to a given range.
28
+ #
29
+ # @param feature_range [Array<Float>] The desired range of samples.
30
+ def initialize(feature_range: [0.0, 1.0])
31
+ check_params_type(Array, feature_range: feature_range)
32
+ @params = {}
33
+ @params[:feature_range] = feature_range
34
+ @min_vec = nil
35
+ @max_vec = nil
36
+ end
37
+
38
+ # Calculate the minimum and maximum value of each feature for scaling.
39
+ #
40
+ # @overload fit(x) -> MinMaxScaler
41
+ #
42
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate the minimum and maximum values.
43
+ # @return [MinMaxScaler]
44
+ def fit(x, _y = nil)
45
+ check_sample_array(x)
46
+ @min_vec = x.min(0)
47
+ @max_vec = x.max(0)
48
+ self
49
+ end
50
+
51
+ # Calculate the minimum and maximum values, and then normalize samples to feature_range.
52
+ #
53
+ # @overload fit_transform(x) -> Numo::DFloat
54
+ #
55
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate the minimum and maximum values.
56
+ # @return [Numo::DFloat] The scaled samples.
57
+ def fit_transform(x, _y = nil)
58
+ check_sample_array(x)
59
+ fit(x).transform(x)
60
+ end
61
+
62
+ # Perform scaling the given samples according to feature_range.
63
+ #
64
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be scaled.
65
+ # @return [Numo::DFloat] The scaled samples.
66
+ def transform(x)
67
+ check_sample_array(x)
68
+ n_samples, = x.shape
69
+ dif_vec = @max_vec - @min_vec
70
+ nx = (x - @min_vec.tile(n_samples, 1)) / dif_vec.tile(n_samples, 1)
71
+ nx * (@params[:feature_range][1] - @params[:feature_range][0]) + @params[:feature_range][0]
72
+ end
73
+
74
+ # Dump marshal data.
75
+ # @return [Hash] The marshal data about MinMaxScaler.
76
+ def marshal_dump
77
+ { params: @params,
78
+ min_vec: @min_vec,
79
+ max_vec: @max_vec }
80
+ end
81
+
82
+ # Load marshal data.
83
+ # @return [nil]
84
+ def marshal_load(obj)
85
+ @params = obj[:params]
86
+ @min_vec = obj[:min_vec]
87
+ @max_vec = obj[:max_vec]
88
+ nil
89
+ end
90
+ end
91
+ end
92
+ end
@@ -0,0 +1,98 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/base_estimator'
4
+ require 'rumale/base/transformer'
5
+
6
+ module Rumale
7
+ module Preprocessing
8
+ # Encode categorical integer features to one-hot-vectors.
9
+ #
10
+ # @example
11
+ # encoder = Rumale::Preprocessing::OneHotEncoder.new
12
+ # labels = Numo::Int32[0, 0, 2, 3, 2, 1]
13
+ # one_hot_vectors = encoder.fit_transform(labels)
14
+ # # > pp one_hot_vectors
15
+ # # Numo::DFloat#shape[6, 4]
16
+ # # [[1, 0, 0, 0],
17
+ # # [1, 0, 0, 0],
18
+ # # [0, 0, 1, 0],
19
+ # # [0, 0, 0, 1],
20
+ # # [0, 0, 1, 0],
21
+ # # [0, 1, 0, 0]]
22
+ class OneHotEncoder
23
+ include Base::BaseEstimator
24
+ include Base::Transformer
25
+
26
+ # Return the maximum values for each feature.
27
+ # @return [Numo::Int32] (shape: [n_features])
28
+ attr_reader :n_values
29
+
30
+ # Return the indices to feature ranges.
31
+ # @return [Numo::Int32] (shape: [n_features + 1])
32
+ attr_reader :feature_indices
33
+
34
+ # Create a new encoder for encoding categorical integer features to one-hot-vectors
35
+ def initialize
36
+ @params = {}
37
+ @n_values = nil
38
+ @feature_indices = nil
39
+ end
40
+
41
+ # Fit one-hot-encoder to samples.
42
+ #
43
+ # @overload fit(x) -> OneHotEncoder
44
+ #
45
+ # @param x [Numo::Int32] (shape: [n_samples, n_features]) The samples to fit one-hot-encoder.
46
+ # @return [OneHotEncoder]
47
+ def fit(x, _y = nil)
48
+ check_params_type(Numo::Int32, x: x)
49
+ @n_values = x.max(0) + 1
50
+ @feature_indices = Numo::Int32.hstack([[0], @n_values]).cumsum
51
+ self
52
+ end
53
+
54
+ # Fit one-hot-encoder to samples, then encode samples into one-hot-vectors
55
+ #
56
+ # @overload fit_transform(x) -> Numo::DFloat
57
+ #
58
+ # @param x [Numo::Int32] (shape: [n_samples, n_features]) The samples to encode into one-hot-vectors.
59
+ # @return [Numo::DFloat] The one-hot-vectors.
60
+ def fit_transform(x, _y = nil)
61
+ check_params_type(Numo::Int32, x: x)
62
+ fit(x).transform(x)
63
+ end
64
+
65
+ # Encode samples into one-hot-vectors.
66
+ #
67
+ # @param x [Numo::Int32] (shape: [n_samples, n_features]) The samples to encode into one-hot-vectors.
68
+ # @return [Numo::DFloat] The one-hot-vectors.
69
+ def transform(x)
70
+ check_params_type(Numo::Int32, x: x)
71
+ n_samples, n_features = x.shape
72
+ n_features = 1 if n_features.nil?
73
+ column_indices = (x + @feature_indices[0...-1]).flatten.to_a
74
+ row_indices = Numo::Int32.new(n_samples).seq.repeat(n_features).to_a
75
+ codes = Numo::DFloat.zeros(n_samples, @feature_indices[-1])
76
+ row_indices.zip(column_indices).each { |r, c| codes[r, c] = 1.0 }
77
+ codes
78
+ end
79
+
80
+ # Dump marshal data.
81
+ # @return [Hash] The marshal data about OneHotEncoder.
82
+ def marshal_dump
83
+ { params: @params,
84
+ n_values: @n_values,
85
+ feature_indices: @feature_indices }
86
+ end
87
+
88
+ # Load marshal data.
89
+ # @return [nil]
90
+ def marshal_load(obj)
91
+ @params = obj[:params]
92
+ @n_values = obj[:n_values]
93
+ @feature_indices = obj[:feature_indices]
94
+ nil
95
+ end
96
+ end
97
+ end
98
+ end