rumale 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. checksums.yaml +7 -0
  2. data/.coveralls.yml +1 -0
  3. data/.gitignore +20 -0
  4. data/.rspec +3 -0
  5. data/.rubocop.yml +47 -0
  6. data/.rubocop_todo.yml +58 -0
  7. data/.travis.yml +13 -0
  8. data/CHANGELOG.md +2 -0
  9. data/CODE_OF_CONDUCT.md +74 -0
  10. data/Gemfile +4 -0
  11. data/LICENSE.txt +23 -0
  12. data/README.md +175 -0
  13. data/Rakefile +6 -0
  14. data/bin/console +14 -0
  15. data/bin/setup +8 -0
  16. data/lib/rumale.rb +70 -0
  17. data/lib/rumale/base/base_estimator.rb +13 -0
  18. data/lib/rumale/base/classifier.rb +36 -0
  19. data/lib/rumale/base/cluster_analyzer.rb +31 -0
  20. data/lib/rumale/base/evaluator.rb +17 -0
  21. data/lib/rumale/base/regressor.rb +36 -0
  22. data/lib/rumale/base/splitter.rb +21 -0
  23. data/lib/rumale/base/transformer.rb +22 -0
  24. data/lib/rumale/clustering/dbscan.rb +125 -0
  25. data/lib/rumale/clustering/k_means.rb +138 -0
  26. data/lib/rumale/dataset.rb +110 -0
  27. data/lib/rumale/decomposition/nmf.rb +141 -0
  28. data/lib/rumale/decomposition/pca.rb +148 -0
  29. data/lib/rumale/ensemble/ada_boost_classifier.rb +196 -0
  30. data/lib/rumale/ensemble/ada_boost_regressor.rb +178 -0
  31. data/lib/rumale/ensemble/random_forest_classifier.rb +180 -0
  32. data/lib/rumale/ensemble/random_forest_regressor.rb +141 -0
  33. data/lib/rumale/evaluation_measure/accuracy.rb +29 -0
  34. data/lib/rumale/evaluation_measure/f_score.rb +50 -0
  35. data/lib/rumale/evaluation_measure/log_loss.rb +45 -0
  36. data/lib/rumale/evaluation_measure/mean_absolute_error.rb +29 -0
  37. data/lib/rumale/evaluation_measure/mean_squared_error.rb +29 -0
  38. data/lib/rumale/evaluation_measure/normalized_mutual_information.rb +62 -0
  39. data/lib/rumale/evaluation_measure/precision.rb +50 -0
  40. data/lib/rumale/evaluation_measure/precision_recall.rb +91 -0
  41. data/lib/rumale/evaluation_measure/purity.rb +40 -0
  42. data/lib/rumale/evaluation_measure/r2_score.rb +43 -0
  43. data/lib/rumale/evaluation_measure/recall.rb +50 -0
  44. data/lib/rumale/kernel_approximation/rbf.rb +121 -0
  45. data/lib/rumale/kernel_machine/kernel_svc.rb +193 -0
  46. data/lib/rumale/linear_model/base_linear_model.rb +89 -0
  47. data/lib/rumale/linear_model/lasso.rb +136 -0
  48. data/lib/rumale/linear_model/linear_regression.rb +110 -0
  49. data/lib/rumale/linear_model/logistic_regression.rb +159 -0
  50. data/lib/rumale/linear_model/ridge.rb +110 -0
  51. data/lib/rumale/linear_model/svc.rb +183 -0
  52. data/lib/rumale/linear_model/svr.rb +122 -0
  53. data/lib/rumale/model_selection/cross_validation.rb +123 -0
  54. data/lib/rumale/model_selection/grid_search_cv.rb +247 -0
  55. data/lib/rumale/model_selection/k_fold.rb +76 -0
  56. data/lib/rumale/model_selection/stratified_k_fold.rb +94 -0
  57. data/lib/rumale/multiclass/one_vs_rest_classifier.rb +100 -0
  58. data/lib/rumale/naive_bayes/naive_bayes.rb +315 -0
  59. data/lib/rumale/nearest_neighbors/k_neighbors_classifier.rb +111 -0
  60. data/lib/rumale/nearest_neighbors/k_neighbors_regressor.rb +93 -0
  61. data/lib/rumale/optimizer/nadam.rb +90 -0
  62. data/lib/rumale/optimizer/rmsprop.rb +69 -0
  63. data/lib/rumale/optimizer/sgd.rb +65 -0
  64. data/lib/rumale/optimizer/yellow_fin.rb +144 -0
  65. data/lib/rumale/pairwise_metric.rb +91 -0
  66. data/lib/rumale/pipeline/pipeline.rb +197 -0
  67. data/lib/rumale/polynomial_model/base_factorization_machine.rb +99 -0
  68. data/lib/rumale/polynomial_model/factorization_machine_classifier.rb +197 -0
  69. data/lib/rumale/polynomial_model/factorization_machine_regressor.rb +131 -0
  70. data/lib/rumale/preprocessing/l2_normalizer.rb +62 -0
  71. data/lib/rumale/preprocessing/label_encoder.rb +94 -0
  72. data/lib/rumale/preprocessing/min_max_scaler.rb +92 -0
  73. data/lib/rumale/preprocessing/one_hot_encoder.rb +98 -0
  74. data/lib/rumale/preprocessing/standard_scaler.rb +86 -0
  75. data/lib/rumale/probabilistic_output.rb +112 -0
  76. data/lib/rumale/tree/base_decision_tree.rb +153 -0
  77. data/lib/rumale/tree/decision_tree_classifier.rb +163 -0
  78. data/lib/rumale/tree/decision_tree_regressor.rb +135 -0
  79. data/lib/rumale/tree/node.rb +70 -0
  80. data/lib/rumale/utils.rb +37 -0
  81. data/lib/rumale/validation.rb +79 -0
  82. data/lib/rumale/values.rb +13 -0
  83. data/lib/rumale/version.rb +6 -0
  84. data/rumale.gemspec +41 -0
  85. metadata +204 -0
@@ -0,0 +1,131 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/regressor'
4
+ require 'rumale/polynomial_model/base_factorization_machine'
5
+
6
+ module Rumale
7
+ module PolynomialModel
8
+ # FactorizationMachineRegressor is a class that implements Factorization Machine
9
+ # with stochastic gradient descent (SGD) optimization.
10
+ #
11
+ # @example
12
+ # estimator =
13
+ # Rumale::PolynomialModel::FactorizationMachineRegressor.new(
14
+ # n_factors: 10, reg_param_linear: 0.1, reg_param_factor: 0.1,
15
+ # max_iter: 5000, batch_size: 50, random_seed: 1)
16
+ # estimator.fit(training_samples, traininig_values)
17
+ # results = estimator.predict(testing_samples)
18
+ #
19
+ # *Reference*
20
+ # - S. Rendle, "Factorization Machines with libFM," ACM TIST, vol. 3 (3), pp. 57:1--57:22, 2012.
21
+ # - S. Rendle, "Factorization Machines," Proc. ICDM'10, pp. 995--1000, 2010.
22
+ class FactorizationMachineRegressor < BaseFactorizationMachine
23
+ include Base::Regressor
24
+
25
+ # Return the factor matrix for Factorization Machine.
26
+ # @return [Numo::DFloat] (shape: [n_outputs, n_factors, n_features])
27
+ attr_reader :factor_mat
28
+
29
+ # Return the weight vector for Factorization Machine.
30
+ # @return [Numo::DFloat] (shape: [n_outputs, n_features])
31
+ attr_reader :weight_vec
32
+
33
+ # Return the bias term for Factoriazation Machine.
34
+ # @return [Numo::DFloat] (shape: [n_outputs])
35
+ attr_reader :bias_term
36
+
37
+ # Return the random generator for random sampling.
38
+ # @return [Random]
39
+ attr_reader :rng
40
+
41
+ # Create a new regressor with Factorization Machine.
42
+ #
43
+ # @param n_factors [Integer] The maximum number of iterations.
44
+ # @param reg_param_linear [Float] The regularization parameter for linear model.
45
+ # @param reg_param_factor [Float] The regularization parameter for factor matrix.
46
+ # @param max_iter [Integer] The maximum number of iterations.
47
+ # @param batch_size [Integer] The size of the mini batches.
48
+ # @param optimizer [Optimizer] The optimizer to calculate adaptive learning rate.
49
+ # If nil is given, Nadam is used.
50
+ # @param random_seed [Integer] The seed value using to initialize the random generator.
51
+ def initialize(n_factors: 2, reg_param_linear: 1.0, reg_param_factor: 1.0,
52
+ max_iter: 1000, batch_size: 10, optimizer: nil, random_seed: nil)
53
+ check_params_float(reg_param_linear: reg_param_linear, reg_param_factor: reg_param_factor)
54
+ check_params_integer(n_factors: n_factors, max_iter: max_iter, batch_size: batch_size)
55
+ check_params_type_or_nil(Integer, random_seed: random_seed)
56
+ check_params_positive(n_factors: n_factors, reg_param_linear: reg_param_linear, reg_param_factor: reg_param_factor,
57
+ max_iter: max_iter, batch_size: batch_size)
58
+ keywd_args = method(:initialize).parameters.map { |_t, arg| [arg, binding.local_variable_get(arg)] }.to_h.merge(loss: nil)
59
+ super(keywd_args)
60
+ end
61
+
62
+ # Fit the model with given training data.
63
+ #
64
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
65
+ # @param y [Numo::Int32] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
66
+ # @return [FactorizationMachineRegressor] The learned regressor itself.
67
+ def fit(x, y)
68
+ check_sample_array(x)
69
+ check_tvalue_array(y)
70
+ check_sample_tvalue_size(x, y)
71
+
72
+ n_outputs = y.shape[1].nil? ? 1 : y.shape[1]
73
+ _n_samples, n_features = x.shape
74
+
75
+ if n_outputs > 1
76
+ @factor_mat = Numo::DFloat.zeros(n_outputs, @params[:n_factors], n_features)
77
+ @weight_vec = Numo::DFloat.zeros(n_outputs, n_features)
78
+ @bias_term = Numo::DFloat.zeros(n_outputs)
79
+ n_outputs.times { |n| @factor_mat[n, true, true], @weight_vec[n, true], @bias_term[n] = partial_fit(x, y[true, n]) }
80
+ else
81
+ @factor_mat, @weight_vec, @bias_term = partial_fit(x, y)
82
+ end
83
+
84
+ self
85
+ end
86
+
87
+ # Predict values for samples.
88
+ #
89
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
90
+ # @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted values per sample.
91
+ def predict(x)
92
+ check_sample_array(x)
93
+ linear_term = @bias_term + x.dot(@weight_vec.transpose)
94
+ factor_term = if @weight_vec.shape[1].nil?
95
+ 0.5 * (@factor_mat.dot(x.transpose)**2 - (@factor_mat**2).dot(x.transpose**2)).sum(0)
96
+ else
97
+ 0.5 * (@factor_mat.dot(x.transpose)**2 - (@factor_mat**2).dot(x.transpose**2)).sum(1).transpose
98
+ end
99
+ linear_term + factor_term
100
+ end
101
+
102
+ # Dump marshal data.
103
+ # @return [Hash] The marshal data about FactorizationMachineRegressor.
104
+ def marshal_dump
105
+ { params: @params,
106
+ factor_mat: @factor_mat,
107
+ weight_vec: @weight_vec,
108
+ bias_term: @bias_term,
109
+ rng: @rng }
110
+ end
111
+
112
+ # Load marshal data.
113
+ # @return [nil]
114
+ def marshal_load(obj)
115
+ @params = obj[:params]
116
+ @factor_mat = obj[:factor_mat]
117
+ @weight_vec = obj[:weight_vec]
118
+ @bias_term = obj[:bias_term]
119
+ @rng = obj[:rng]
120
+ nil
121
+ end
122
+
123
+ private
124
+
125
+ def loss_gradient(x, ex_x, y, factor, weight)
126
+ z = ex_x.dot(weight) + 0.5 * (factor.dot(x.transpose)**2 - (factor**2).dot(x.transpose**2)).sum(0)
127
+ 2.0 * (z - y)
128
+ end
129
+ end
130
+ end
131
+ end
@@ -0,0 +1,62 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/base_estimator'
4
+ require 'rumale/base/transformer'
5
+
6
+ module Rumale
7
+ # This module consists of the classes that perform preprocessings.
8
+ module Preprocessing
9
+ # Normalize samples to unit L2-norm.
10
+ #
11
+ # @example
12
+ # normalizer = Rumale::Preprocessing::StandardScaler.new
13
+ # new_samples = normalizer.fit_transform(samples)
14
+ class L2Normalizer
15
+ include Base::BaseEstimator
16
+ include Base::Transformer
17
+
18
+ # Return the vector consists of L2-norm for each sample.
19
+ # @return [Numo::DFloat] (shape: [n_samples])
20
+ attr_reader :norm_vec # :nodoc:
21
+
22
+ # Create a new normalizer for normaliing to unit L2-norm.
23
+ def initialize
24
+ @params = {}
25
+ @norm_vec = nil
26
+ end
27
+
28
+ # Calculate L2-norms of each sample.
29
+ #
30
+ # @overload fit(x) -> L2Normalizer
31
+ #
32
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate L2-norms.
33
+ # @return [L2Normalizer]
34
+ def fit(x, _y = nil)
35
+ check_sample_array(x)
36
+ @norm_vec = Numo::NMath.sqrt((x**2).sum(1))
37
+ self
38
+ end
39
+
40
+ # Calculate L2-norms of each sample, and then normalize samples to unit L2-norm.
41
+ #
42
+ # @overload fit_transform(x) -> Numo::DFloat
43
+ #
44
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate L2-norms.
45
+ # @return [Numo::DFloat] The normalized samples.
46
+ def fit_transform(x, _y = nil)
47
+ check_sample_array(x)
48
+ fit(x)
49
+ x / @norm_vec.tile(x.shape[1], 1).transpose
50
+ end
51
+
52
+ # Calculate L2-norms of each sample, and then normalize samples to unit L2-norm.
53
+ # This method calls the fit_transform method. This method exists for the Pipeline class.
54
+ #
55
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate L2-norms.
56
+ # @return [Numo::DFloat] The normalized samples.
57
+ def transform(x)
58
+ fit_transform(x)
59
+ end
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,94 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/base_estimator'
4
+ require 'rumale/base/transformer'
5
+
6
+ module Rumale
7
+ module Preprocessing
8
+ # Encode labels to values between 0 and n_classes - 1.
9
+ #
10
+ # @example
11
+ # encoder = Rumale::Preprocessing::LabelEncoder.new
12
+ # labels = Numo::Int32[1, 8, 8, 15, 0]
13
+ # encoded_labels = encoder.fit_transform(labels)
14
+ # # > pp encoded_labels
15
+ # # Numo::Int32#shape=[5]
16
+ # # [1, 2, 2, 3, 0]
17
+ # decoded_labels = encoder.inverse_transform(encoded_labels)
18
+ # # > pp decoded_labels
19
+ # # [1, 8, 8, 15, 0]
20
+ class LabelEncoder
21
+ include Base::BaseEstimator
22
+ include Base::Transformer
23
+
24
+ # Return the class labels.
25
+ # @return [Array] (size: [n_classes])
26
+ attr_reader :classes
27
+
28
+ # Create a new encoder for encoding labels to values between 0 and n_classes - 1.
29
+ def initialize
30
+ @params = {}
31
+ @classes = nil
32
+ end
33
+
34
+ # Fit label-encoder to labels.
35
+ #
36
+ # @overload fit(x) -> LabelEncoder
37
+ #
38
+ # @param x [Array] (shape: [n_samples]) The labels to fit label-encoder.
39
+ # @return [LabelEncoder]
40
+ def fit(x, _y = nil)
41
+ x = x.to_a if x.is_a?(Numo::NArray)
42
+ check_params_type(Array, x: x)
43
+ @classes = x.sort.uniq
44
+ self
45
+ end
46
+
47
+ # Fit label-encoder to labels, then return encoded labels.
48
+ #
49
+ # @overload fit_transform(x) -> Numo::DFloat
50
+ #
51
+ # @param x [Array] (shape: [n_samples]) The labels to fit label-encoder.
52
+ # @return [Numo::Int32] The encoded labels.
53
+ def fit_transform(x, _y = nil)
54
+ x = x.to_a if x.is_a?(Numo::NArray)
55
+ check_params_type(Array, x: x)
56
+ fit(x).transform(x)
57
+ end
58
+
59
+ # Encode labels.
60
+ #
61
+ # @param x [Array] (shape: [n_samples]) The labels to be encoded.
62
+ # @return [Numo::Int32] The encoded labels.
63
+ def transform(x)
64
+ x = x.to_a if x.is_a?(Numo::NArray)
65
+ check_params_type(Array, x: x)
66
+ Numo::Int32[*(x.map { |v| @classes.index(v) })]
67
+ end
68
+
69
+ # Decode encoded labels.
70
+ #
71
+ # @param x [Numo::Int32] (shape: [n_samples]) The labels to be decoded.
72
+ # @return [Array] The decoded labels.
73
+ def inverse_transform(x)
74
+ check_label_array(x)
75
+ x.to_a.map { |n| @classes[n] }
76
+ end
77
+
78
+ # Dump marshal data.
79
+ # @return [Hash] The marshal data about LabelEncoder
80
+ def marshal_dump
81
+ { params: @params,
82
+ classes: @classes }
83
+ end
84
+
85
+ # Load marshal data.
86
+ # @return [nil]
87
+ def marshal_load(obj)
88
+ @params = obj[:params]
89
+ @classes = obj[:classes]
90
+ nil
91
+ end
92
+ end
93
+ end
94
+ end
@@ -0,0 +1,92 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/base_estimator'
4
+ require 'rumale/base/transformer'
5
+
6
+ module Rumale
7
+ # This module consists of the classes that perform preprocessings.
8
+ module Preprocessing
9
+ # Normalize samples by scaling each feature to a given range.
10
+ #
11
+ # @example
12
+ # normalizer = Rumale::Preprocessing::MinMaxScaler.new(feature_range: [0.0, 1.0])
13
+ # new_training_samples = normalizer.fit_transform(training_samples)
14
+ # new_testing_samples = normalizer.transform(testing_samples)
15
+ class MinMaxScaler
16
+ include Base::BaseEstimator
17
+ include Base::Transformer
18
+
19
+ # Return the vector consists of the minimum value for each feature.
20
+ # @return [Numo::DFloat] (shape: [n_features])
21
+ attr_reader :min_vec
22
+
23
+ # Return the vector consists of the maximum value for each feature.
24
+ # @return [Numo::DFloat] (shape: [n_features])
25
+ attr_reader :max_vec
26
+
27
+ # Creates a new normalizer for scaling each feature to a given range.
28
+ #
29
+ # @param feature_range [Array<Float>] The desired range of samples.
30
+ def initialize(feature_range: [0.0, 1.0])
31
+ check_params_type(Array, feature_range: feature_range)
32
+ @params = {}
33
+ @params[:feature_range] = feature_range
34
+ @min_vec = nil
35
+ @max_vec = nil
36
+ end
37
+
38
+ # Calculate the minimum and maximum value of each feature for scaling.
39
+ #
40
+ # @overload fit(x) -> MinMaxScaler
41
+ #
42
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate the minimum and maximum values.
43
+ # @return [MinMaxScaler]
44
+ def fit(x, _y = nil)
45
+ check_sample_array(x)
46
+ @min_vec = x.min(0)
47
+ @max_vec = x.max(0)
48
+ self
49
+ end
50
+
51
+ # Calculate the minimum and maximum values, and then normalize samples to feature_range.
52
+ #
53
+ # @overload fit_transform(x) -> Numo::DFloat
54
+ #
55
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate the minimum and maximum values.
56
+ # @return [Numo::DFloat] The scaled samples.
57
+ def fit_transform(x, _y = nil)
58
+ check_sample_array(x)
59
+ fit(x).transform(x)
60
+ end
61
+
62
+ # Perform scaling the given samples according to feature_range.
63
+ #
64
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be scaled.
65
+ # @return [Numo::DFloat] The scaled samples.
66
+ def transform(x)
67
+ check_sample_array(x)
68
+ n_samples, = x.shape
69
+ dif_vec = @max_vec - @min_vec
70
+ nx = (x - @min_vec.tile(n_samples, 1)) / dif_vec.tile(n_samples, 1)
71
+ nx * (@params[:feature_range][1] - @params[:feature_range][0]) + @params[:feature_range][0]
72
+ end
73
+
74
+ # Dump marshal data.
75
+ # @return [Hash] The marshal data about MinMaxScaler.
76
+ def marshal_dump
77
+ { params: @params,
78
+ min_vec: @min_vec,
79
+ max_vec: @max_vec }
80
+ end
81
+
82
+ # Load marshal data.
83
+ # @return [nil]
84
+ def marshal_load(obj)
85
+ @params = obj[:params]
86
+ @min_vec = obj[:min_vec]
87
+ @max_vec = obj[:max_vec]
88
+ nil
89
+ end
90
+ end
91
+ end
92
+ end
@@ -0,0 +1,98 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/base_estimator'
4
+ require 'rumale/base/transformer'
5
+
6
+ module Rumale
7
+ module Preprocessing
8
+ # Encode categorical integer features to one-hot-vectors.
9
+ #
10
+ # @example
11
+ # encoder = Rumale::Preprocessing::OneHotEncoder.new
12
+ # labels = Numo::Int32[0, 0, 2, 3, 2, 1]
13
+ # one_hot_vectors = encoder.fit_transform(labels)
14
+ # # > pp one_hot_vectors
15
+ # # Numo::DFloat#shape[6, 4]
16
+ # # [[1, 0, 0, 0],
17
+ # # [1, 0, 0, 0],
18
+ # # [0, 0, 1, 0],
19
+ # # [0, 0, 0, 1],
20
+ # # [0, 0, 1, 0],
21
+ # # [0, 1, 0, 0]]
22
+ class OneHotEncoder
23
+ include Base::BaseEstimator
24
+ include Base::Transformer
25
+
26
+ # Return the maximum values for each feature.
27
+ # @return [Numo::Int32] (shape: [n_features])
28
+ attr_reader :n_values
29
+
30
+ # Return the indices to feature ranges.
31
+ # @return [Numo::Int32] (shape: [n_features + 1])
32
+ attr_reader :feature_indices
33
+
34
+ # Create a new encoder for encoding categorical integer features to one-hot-vectors
35
+ def initialize
36
+ @params = {}
37
+ @n_values = nil
38
+ @feature_indices = nil
39
+ end
40
+
41
+ # Fit one-hot-encoder to samples.
42
+ #
43
+ # @overload fit(x) -> OneHotEncoder
44
+ #
45
+ # @param x [Numo::Int32] (shape: [n_samples, n_features]) The samples to fit one-hot-encoder.
46
+ # @return [OneHotEncoder]
47
+ def fit(x, _y = nil)
48
+ check_params_type(Numo::Int32, x: x)
49
+ @n_values = x.max(0) + 1
50
+ @feature_indices = Numo::Int32.hstack([[0], @n_values]).cumsum
51
+ self
52
+ end
53
+
54
+ # Fit one-hot-encoder to samples, then encode samples into one-hot-vectors
55
+ #
56
+ # @overload fit_transform(x) -> Numo::DFloat
57
+ #
58
+ # @param x [Numo::Int32] (shape: [n_samples, n_features]) The samples to encode into one-hot-vectors.
59
+ # @return [Numo::DFloat] The one-hot-vectors.
60
+ def fit_transform(x, _y = nil)
61
+ check_params_type(Numo::Int32, x: x)
62
+ fit(x).transform(x)
63
+ end
64
+
65
+ # Encode samples into one-hot-vectors.
66
+ #
67
+ # @param x [Numo::Int32] (shape: [n_samples, n_features]) The samples to encode into one-hot-vectors.
68
+ # @return [Numo::DFloat] The one-hot-vectors.
69
+ def transform(x)
70
+ check_params_type(Numo::Int32, x: x)
71
+ n_samples, n_features = x.shape
72
+ n_features = 1 if n_features.nil?
73
+ column_indices = (x + @feature_indices[0...-1]).flatten.to_a
74
+ row_indices = Numo::Int32.new(n_samples).seq.repeat(n_features).to_a
75
+ codes = Numo::DFloat.zeros(n_samples, @feature_indices[-1])
76
+ row_indices.zip(column_indices).each { |r, c| codes[r, c] = 1.0 }
77
+ codes
78
+ end
79
+
80
+ # Dump marshal data.
81
+ # @return [Hash] The marshal data about OneHotEncoder.
82
+ def marshal_dump
83
+ { params: @params,
84
+ n_values: @n_values,
85
+ feature_indices: @feature_indices }
86
+ end
87
+
88
+ # Load marshal data.
89
+ # @return [nil]
90
+ def marshal_load(obj)
91
+ @params = obj[:params]
92
+ @n_values = obj[:n_values]
93
+ @feature_indices = obj[:feature_indices]
94
+ nil
95
+ end
96
+ end
97
+ end
98
+ end