rumale 0.19.0 → 0.20.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +5 -29
  3. data/CHANGELOG.md +28 -0
  4. data/lib/rumale.rb +7 -10
  5. data/lib/rumale/clustering/hdbscan.rb +3 -3
  6. data/lib/rumale/clustering/k_means.rb +1 -1
  7. data/lib/rumale/clustering/k_medoids.rb +1 -1
  8. data/lib/rumale/clustering/mini_batch_k_means.rb +139 -0
  9. data/lib/rumale/dataset.rb +4 -4
  10. data/lib/rumale/decomposition/nmf.rb +2 -2
  11. data/lib/rumale/ensemble/random_forest_classifier.rb +1 -1
  12. data/lib/rumale/ensemble/random_forest_regressor.rb +1 -1
  13. data/lib/rumale/feature_extraction/feature_hasher.rb +1 -1
  14. data/lib/rumale/feature_extraction/hash_vectorizer.rb +1 -1
  15. data/lib/rumale/feature_extraction/tfidf_transformer.rb +113 -0
  16. data/lib/rumale/kernel_approximation/nystroem.rb +1 -1
  17. data/lib/rumale/kernel_machine/kernel_svc.rb +1 -1
  18. data/lib/rumale/linear_model/base_sgd.rb +1 -1
  19. data/lib/rumale/manifold/tsne.rb +1 -1
  20. data/lib/rumale/model_selection/cross_validation.rb +3 -2
  21. data/lib/rumale/model_selection/group_k_fold.rb +93 -0
  22. data/lib/rumale/model_selection/group_shuffle_split.rb +115 -0
  23. data/lib/rumale/model_selection/k_fold.rb +1 -1
  24. data/lib/rumale/model_selection/shuffle_split.rb +5 -5
  25. data/lib/rumale/model_selection/stratified_k_fold.rb +1 -1
  26. data/lib/rumale/model_selection/stratified_shuffle_split.rb +13 -9
  27. data/lib/rumale/multiclass/one_vs_rest_classifier.rb +2 -2
  28. data/lib/rumale/nearest_neighbors/vp_tree.rb +1 -1
  29. data/lib/rumale/neural_network/adam.rb +1 -1
  30. data/lib/rumale/neural_network/base_mlp.rb +1 -1
  31. data/lib/rumale/preprocessing/binarizer.rb +60 -0
  32. data/lib/rumale/preprocessing/l1_normalizer.rb +62 -0
  33. data/lib/rumale/preprocessing/l2_normalizer.rb +2 -1
  34. data/lib/rumale/preprocessing/max_normalizer.rb +62 -0
  35. data/lib/rumale/probabilistic_output.rb +1 -1
  36. data/lib/rumale/version.rb +1 -1
  37. metadata +12 -15
  38. data/lib/rumale/linear_model/base_linear_model.rb +0 -102
  39. data/lib/rumale/optimizer/ada_grad.rb +0 -42
  40. data/lib/rumale/optimizer/adam.rb +0 -56
  41. data/lib/rumale/optimizer/nadam.rb +0 -67
  42. data/lib/rumale/optimizer/rmsprop.rb +0 -50
  43. data/lib/rumale/optimizer/sgd.rb +0 -46
  44. data/lib/rumale/optimizer/yellow_fin.rb +0 -104
  45. data/lib/rumale/polynomial_model/base_factorization_machine.rb +0 -125
  46. data/lib/rumale/polynomial_model/factorization_machine_classifier.rb +0 -220
  47. data/lib/rumale/polynomial_model/factorization_machine_regressor.rb +0 -134
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1cd1cdc16e6c72743d064db7d254c74eb98ca33e97cfdf9e8a76cc1fbe5dd29b
4
- data.tar.gz: 2077cae2629f2c403cc0afc415dc4b4151a2eac2ab7a3230402bf761bb653829
3
+ metadata.gz: 0f361026cd2922a2d36846a817eee855bf0c000156ed6c756bca29d2e42d67a2
4
+ data.tar.gz: 016fa40aa2546824cacbc32353263cbfc9427f0ceabb7e703f99854914bb9a2e
5
5
  SHA512:
6
- metadata.gz: 2bbcdce6d0a31c95500a81a7d4a55407786068fac65ce1e5ede1bc3f56d97b2ec93fd9ca1dc52fc1a24782dba469099b25d0398a5993716da011851f18f8179c
7
- data.tar.gz: cc9fc19ea73dfa76e8ede18df7cb57f931cccdea2c546f414746ed681afbd272e3866913c75534bbdce6b275d057baa2c793b4046c8e46f697e30d5b87dba066
6
+ metadata.gz: 7a53a958db7ec8b56236018505370b9908ae81a9afc9d7c8ff0b16d83971539c1ad729b5ab350eb49ae9b90ada43a8912ed2404a37eef97a4d34dad90b1d3e9f
7
+ data.tar.gz: 2f2b3d48625c7120464179bc7759c01ba7de85cb0d54720665eaf1e4822f24c1870474ebc24a47cff123e44a8626b0e0fac6a7e81216c057286071770ea5ba79
@@ -3,6 +3,7 @@ require:
3
3
  - rubocop-rspec
4
4
 
5
5
  AllCops:
6
+ NewCops: enable
6
7
  TargetRubyVersion: 2.5
7
8
  DisplayCopNames: true
8
9
  DisplayStyleGuide: true
@@ -15,25 +16,12 @@ AllCops:
15
16
  Style/Documentation:
16
17
  Enabled: false
17
18
 
18
- Style/HashEachMethods:
19
- Enabled: true
20
-
21
- Style/HashTransformKeys:
22
- Enabled: true
23
-
24
- Style/HashTransformValues:
25
- Enabled: true
26
-
27
- Lint/RaiseException:
28
- Enabled: true
29
-
30
- Lint/StructNewOverride:
31
- Enabled: true
32
-
33
19
  Layout/LineLength:
34
20
  Max: 145
35
21
  IgnoredPatterns: ['(\A|\s)#']
36
22
 
23
+ Lint/MissingSuper:
24
+ Enabled: false
37
25
 
38
26
  Metrics/ModuleLength:
39
27
  Max: 200
@@ -70,26 +58,14 @@ Naming/MethodParameterName:
70
58
  Naming/ConstantName:
71
59
  Enabled: false
72
60
 
73
- Style/ExponentialNotation:
74
- Enabled: true
75
-
76
61
  Style/FormatStringToken:
77
62
  Enabled: false
78
63
 
79
64
  Style/NumericLiterals:
80
65
  Enabled: false
81
66
 
82
- Style/SlicingWithRange:
83
- Enabled: true
84
-
85
- Layout/EmptyLineAfterGuardClause:
86
- Enabled: true
87
-
88
- Layout/EmptyLinesAroundAttributeAccessor:
89
- Enabled: true
90
-
91
- Layout/SpaceAroundMethodCallOperator:
92
- Enabled: true
67
+ Style/StringConcatenation:
68
+ Enabled: false
93
69
 
94
70
  RSpec/MultipleExpectations:
95
71
  Enabled: false
@@ -1,4 +1,32 @@
1
+ # 0.20.1
2
+ - Add cross-validator classes that split data according group labels.
3
+ - [GroupKFold](https://yoshoku.github.io/rumale/doc/Rumale/ModelSelection/GroupKFold.html)
4
+ - [GroupShuffleSplit](https://yoshoku.github.io/rumale/doc/Rumale/ModelSelection/GroupShuffleSplit.html)
5
+ - Fix fraction treating of the number of samples on shuffle split cross-validator classes.
6
+ - [ShuffleSplit](https://yoshoku.github.io/rumale/doc/Rumale/ModelSelection/ShuffleSplit.html)
7
+ - [StratifiedShuffleSplit](https://yoshoku.github.io/rumale/doc/Rumale/ModelSelection/StratifiedShuffleSplit.html)
8
+ - Refactor some codes with Rubocop.
9
+
10
+ # 0.20.0
11
+ ## Breaking changes
12
+ - Delete deprecated estimators such as PolynomialModel, Optimizer, and BaseLinearModel.
13
+
14
+ # 0.19.3
15
+ - Add preprocessing class for [Binarizer](https://yoshoku.github.io/rumale/doc/Rumale/Preprocessing/Binarizer.html)
16
+ - Add preprocessing class for [MaxNormalizer](https://yoshoku.github.io/rumale/doc/Rumale/Preprocessing/MaxNormalizer.html)
17
+ - Refactor some codes with Rubocop.
18
+
19
+ # 0.19.2
20
+ - Fix L2Normalizer to avoid zero divide.
21
+ - Add preprocssing class for [L1Normalizer](https://yoshoku.github.io/rumale/doc/Rumale/Preprocessing/L1Normalizer.html).
22
+ - Add transformer class for [TfidfTransformer](https://yoshoku.github.io/rumale/doc/Rumale/FeatureExtraction/TfidfTransformer.html).
23
+
24
+ # 0.19.1
25
+ - Add cluster analysis class for [mini-batch K-Means](https://yoshoku.github.io/rumale/doc/Rumale/Clustering/MiniBatchKMeans.html).
26
+ - Fix some typos.
27
+
1
28
  # 0.19.0
29
+ ## Breaking changes
2
30
  - Change mmh3 and mopti gem to non-runtime dependent library.
3
31
  - The mmh3 gem is used in [FeatureHasher](https://yoshoku.github.io/rumale/doc/Rumale/FeatureExtraction/FeatureHasher.html).
4
32
  You only need to require mmh3 gem when using FeatureHasher.
@@ -18,17 +18,10 @@ require 'rumale/base/cluster_analyzer'
18
18
  require 'rumale/base/transformer'
19
19
  require 'rumale/base/splitter'
20
20
  require 'rumale/base/evaluator'
21
- require 'rumale/optimizer/sgd'
22
- require 'rumale/optimizer/ada_grad'
23
- require 'rumale/optimizer/rmsprop'
24
- require 'rumale/optimizer/adam'
25
- require 'rumale/optimizer/nadam'
26
- require 'rumale/optimizer/yellow_fin'
27
21
  require 'rumale/pipeline/pipeline'
28
22
  require 'rumale/pipeline/feature_union'
29
23
  require 'rumale/kernel_approximation/rbf'
30
24
  require 'rumale/kernel_approximation/nystroem'
31
- require 'rumale/linear_model/base_linear_model'
32
25
  require 'rumale/linear_model/base_sgd'
33
26
  require 'rumale/linear_model/svc'
34
27
  require 'rumale/linear_model/svr'
@@ -41,9 +34,6 @@ require 'rumale/kernel_machine/kernel_svc'
41
34
  require 'rumale/kernel_machine/kernel_pca'
42
35
  require 'rumale/kernel_machine/kernel_fda'
43
36
  require 'rumale/kernel_machine/kernel_ridge'
44
- require 'rumale/polynomial_model/base_factorization_machine'
45
- require 'rumale/polynomial_model/factorization_machine_classifier'
46
- require 'rumale/polynomial_model/factorization_machine_regressor'
47
37
  require 'rumale/multiclass/one_vs_rest_classifier'
48
38
  require 'rumale/nearest_neighbors/vp_tree'
49
39
  require 'rumale/nearest_neighbors/k_neighbors_classifier'
@@ -70,6 +60,7 @@ require 'rumale/ensemble/random_forest_regressor'
70
60
  require 'rumale/ensemble/extra_trees_classifier'
71
61
  require 'rumale/ensemble/extra_trees_regressor'
72
62
  require 'rumale/clustering/k_means'
63
+ require 'rumale/clustering/mini_batch_k_means'
73
64
  require 'rumale/clustering/k_medoids'
74
65
  require 'rumale/clustering/gaussian_mixture'
75
66
  require 'rumale/clustering/dbscan'
@@ -92,7 +83,10 @@ require 'rumale/neural_network/mlp_regressor'
92
83
  require 'rumale/neural_network/mlp_classifier'
93
84
  require 'rumale/feature_extraction/hash_vectorizer'
94
85
  require 'rumale/feature_extraction/feature_hasher'
86
+ require 'rumale/feature_extraction/tfidf_transformer'
95
87
  require 'rumale/preprocessing/l2_normalizer'
88
+ require 'rumale/preprocessing/l1_normalizer'
89
+ require 'rumale/preprocessing/max_normalizer'
96
90
  require 'rumale/preprocessing/min_max_scaler'
97
91
  require 'rumale/preprocessing/max_abs_scaler'
98
92
  require 'rumale/preprocessing/standard_scaler'
@@ -101,10 +95,13 @@ require 'rumale/preprocessing/label_binarizer'
101
95
  require 'rumale/preprocessing/label_encoder'
102
96
  require 'rumale/preprocessing/one_hot_encoder'
103
97
  require 'rumale/preprocessing/ordinal_encoder'
98
+ require 'rumale/preprocessing/binarizer'
104
99
  require 'rumale/preprocessing/polynomial_features'
105
100
  require 'rumale/model_selection/k_fold'
101
+ require 'rumale/model_selection/group_k_fold'
106
102
  require 'rumale/model_selection/stratified_k_fold'
107
103
  require 'rumale/model_selection/shuffle_split'
104
+ require 'rumale/model_selection/group_shuffle_split'
108
105
  require 'rumale/model_selection/stratified_shuffle_split'
109
106
  require 'rumale/model_selection/cross_validation'
110
107
  require 'rumale/model_selection/grid_search_cv'
@@ -136,7 +136,7 @@ module Rumale
136
136
  res
137
137
  end
138
138
 
139
- # rubocop:disable Metrics/AbcSize, Metrics/MethodLength, Metrics/PerceivedComplexity
139
+ # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
140
140
  def condense_tree(hierarchy, min_cluster_size)
141
141
  n_edges = hierarchy.size
142
142
  root = 2 * n_edges
@@ -232,7 +232,7 @@ module Rumale
232
232
  end
233
233
 
234
234
  def flatten(tree, stabilities)
235
- node_ids = stabilities.keys.sort { |a, b| b <=> a }.slice(0, stabilities.size - 1)
235
+ node_ids = stabilities.keys.sort.reverse.slice(0, stabilities.size - 1)
236
236
 
237
237
  cluster_tree = tree.select { |edge| edge.n_elements > 1 }
238
238
  is_cluster = node_ids.each_with_object({}) { |n_id, h| h[n_id] = true }
@@ -265,7 +265,7 @@ module Rumale
265
265
  end
266
266
  res
267
267
  end
268
- # rubocop:enable Metrics/AbcSize, Metrics/MethodLength, Metrics/PerceivedComplexity
268
+ # rubocop:enable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/MethodLength, Metrics/PerceivedComplexity
269
269
  end
270
270
  end
271
271
  end
@@ -103,7 +103,7 @@ module Rumale
103
103
  # random initialize
104
104
  n_samples = x.shape[0]
105
105
  sub_rng = @rng.dup
106
- rand_id = [*0...n_samples].sample(@params[:n_clusters], random: sub_rng)
106
+ rand_id = Array(0...n_samples).sample(@params[:n_clusters], random: sub_rng)
107
107
  @cluster_centers = x[rand_id, true].dup
108
108
  return unless @params[:init] == 'k-means++'
109
109
 
@@ -124,7 +124,7 @@ module Rumale
124
124
  # random initialize
125
125
  n_samples = distance_mat.shape[0]
126
126
  sub_rng = @rng.dup
127
- @medoid_ids = Numo::Int32.asarray([*0...n_samples].sample(@params[:n_clusters], random: sub_rng))
127
+ @medoid_ids = Numo::Int32.asarray(Array(0...n_samples).sample(@params[:n_clusters], random: sub_rng))
128
128
  return unless @params[:init] == 'k-means++'
129
129
 
130
130
  # k-means++ initialize
@@ -0,0 +1,139 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/base_estimator'
4
+ require 'rumale/base/cluster_analyzer'
5
+ require 'rumale/pairwise_metric'
6
+
7
+ module Rumale
8
+ module Clustering
9
+ # MniBatchKMeans is a class that implements K-Means cluster analysis
10
+ # with mini-batch stochastic gradient descent (SGD).
11
+ #
12
+ # @example
13
+ # analyzer = Rumale::Clustering::MiniBatchKMeans.new(n_clusters: 10, max_iter: 50, batch_size: 50, random_seed: 1)
14
+ # cluster_labels = analyzer.fit_predict(samples)
15
+ #
16
+ # *Reference*
17
+ # - Sculley, D., "Web-scale k-means clustering," Proc. WWW'10, pp. 1177--1178, 2010.
18
+ class MiniBatchKMeans
19
+ include Base::BaseEstimator
20
+ include Base::ClusterAnalyzer
21
+
22
+ # Return the centroids.
23
+ # @return [Numo::DFloat] (shape: [n_clusters, n_features])
24
+ attr_reader :cluster_centers
25
+
26
+ # Return the random generator.
27
+ # @return [Random]
28
+ attr_reader :rng
29
+
30
+ # Create a new cluster analyzer with K-Means method with mini-batch SGD.
31
+ #
32
+ # @param n_clusters [Integer] The number of clusters.
33
+ # @param init [String] The initialization method for centroids ('random' or 'k-means++').
34
+ # @param max_iter [Integer] The maximum number of iterations.
35
+ # @param batch_size [Integer] The size of the mini batches.
36
+ # @param tol [Float] The tolerance of termination criterion.
37
+ # @param random_seed [Integer] The seed value using to initialize the random generator.
38
+ def initialize(n_clusters: 8, init: 'k-means++', max_iter: 100, batch_size: 100, tol: 1.0e-4, random_seed: nil)
39
+ check_params_numeric(n_clusters: n_clusters, max_iter: max_iter, batch_size: batch_size, tol: tol)
40
+ check_params_string(init: init)
41
+ check_params_numeric_or_nil(random_seed: random_seed)
42
+ check_params_positive(n_clusters: n_clusters, max_iter: max_iter)
43
+ @params = {}
44
+ @params[:n_clusters] = n_clusters
45
+ @params[:init] = init == 'random' ? 'random' : 'k-means++'
46
+ @params[:max_iter] = max_iter
47
+ @params[:batch_size] = batch_size
48
+ @params[:tol] = tol
49
+ @params[:random_seed] = random_seed
50
+ @params[:random_seed] ||= srand
51
+ @cluster_centers = nil
52
+ @rng = Random.new(@params[:random_seed])
53
+ end
54
+
55
+ # Analysis clusters with given training data.
56
+ #
57
+ # @overload fit(x) -> MiniBatchKMeans
58
+ #
59
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for cluster analysis.
60
+ # @return [KMeans] The learned cluster analyzer itself.
61
+ def fit(x, _y = nil)
62
+ x = check_convert_sample_array(x)
63
+ # initialization.
64
+ n_samples = x.shape[0]
65
+ update_counter = Numo::Int32.zeros(@params[:n_clusters])
66
+ sub_rng = @rng.dup
67
+ init_cluster_centers(x, sub_rng)
68
+ # optimization with mini-batch sgd.
69
+ @params[:max_iter].times do |_t|
70
+ sample_ids = Array(0...n_samples).shuffle(random: sub_rng)
71
+ old_centers = @cluster_centers.dup
72
+ until (subset_ids = sample_ids.shift(@params[:batch_size])).empty?
73
+ # sub sampling
74
+ sub_x = x[subset_ids, true]
75
+ # assign nearest centroids
76
+ cluster_labels = assign_cluster(sub_x)
77
+ # update centroids
78
+ @params[:n_clusters].times do |c|
79
+ assigned_bits = cluster_labels.eq(c)
80
+ next unless assigned_bits.count.positive?
81
+
82
+ update_counter[c] += 1
83
+ learning_rate = 1.fdiv(update_counter[c])
84
+ update = sub_x[assigned_bits.where, true].mean(axis: 0)
85
+ @cluster_centers[c, true] = (1 - learning_rate) * @cluster_centers[c, true] + learning_rate * update
86
+ end
87
+ end
88
+ error = Numo::NMath.sqrt(((old_centers - @cluster_centers)**2).sum(axis: 1)).mean
89
+ break if error <= @params[:tol]
90
+ end
91
+ self
92
+ end
93
+
94
+ # Predict cluster labels for samples.
95
+ #
96
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the cluster label.
97
+ # @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
98
+ def predict(x)
99
+ x = check_convert_sample_array(x)
100
+ assign_cluster(x)
101
+ end
102
+
103
+ # Analysis clusters and assign samples to clusters.
104
+ #
105
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for cluster analysis.
106
+ # @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
107
+ def fit_predict(x)
108
+ x = check_convert_sample_array(x)
109
+ fit(x)
110
+ predict(x)
111
+ end
112
+
113
+ private
114
+
115
+ def assign_cluster(x)
116
+ distance_matrix = PairwiseMetric.euclidean_distance(x, @cluster_centers)
117
+ distance_matrix.min_index(axis: 1) - Numo::Int32[*0.step(distance_matrix.size - 1, @cluster_centers.shape[0])]
118
+ end
119
+
120
+ def init_cluster_centers(x, sub_rng)
121
+ # random initialize
122
+ n_samples = x.shape[0]
123
+ rand_id = Array(0...n_samples).sample(@params[:n_clusters], random: sub_rng)
124
+ @cluster_centers = x[rand_id, true].dup
125
+ return unless @params[:init] == 'k-means++'
126
+
127
+ # k-means++ initialize
128
+ (1...@params[:n_clusters]).each do |n|
129
+ distance_matrix = PairwiseMetric.euclidean_distance(x, @cluster_centers[0...n, true])
130
+ min_distances = distance_matrix.flatten[distance_matrix.min_index(axis: 1)]
131
+ probs = min_distances**2 / (min_distances**2).sum
132
+ cum_probs = probs.cumsum
133
+ selected_id = cum_probs.gt(sub_rng.rand).where.to_a.first
134
+ @cluster_centers[n, true] = x[selected_id, true].dup
135
+ end
136
+ end
137
+ end
138
+ end
139
+ end
@@ -81,7 +81,7 @@ module Rumale
81
81
  y = Numo::Int32.hstack([Numo::Int32.zeros(n_samples_out), Numo::Int32.ones(n_samples_in)])
82
82
  # shuffle data indices.
83
83
  if shuffle
84
- rand_ids = [*0...n_samples].shuffle(random: rng.dup)
84
+ rand_ids = Array(0...n_samples).shuffle(random: rng.dup)
85
85
  x = x[rand_ids, true].dup
86
86
  y = y[rand_ids].dup
87
87
  end
@@ -118,7 +118,7 @@ module Rumale
118
118
  y = Numo::Int32.hstack([Numo::Int32.zeros(n_samples_out), Numo::Int32.ones(n_samples_in)])
119
119
  # shuffle data indices.
120
120
  if shuffle
121
- rand_ids = [*0...n_samples].shuffle(random: rng.dup)
121
+ rand_ids = Array(0...n_samples).shuffle(random: rng.dup)
122
122
  x = x[rand_ids, true].dup
123
123
  y = y[rand_ids].dup
124
124
  end
@@ -173,7 +173,7 @@ module Rumale
173
173
  end
174
174
  # shuffle data.
175
175
  if shuffle
176
- rand_ids = [*0...n_samples].shuffle(random: rng.dup)
176
+ rand_ids = Array(0...n_samples).shuffle(random: rng.dup)
177
177
  x = x[rand_ids, true].dup
178
178
  y = y[rand_ids].dup
179
179
  end
@@ -225,7 +225,7 @@ module Rumale
225
225
  line = dump_label(label, label_type.to_s)
226
226
  ftvec.to_a.each_with_index do |val, n|
227
227
  idx = n + (zero_based == false ? 1 : 0)
228
- line += format(" %d:#{value_type}", idx, val) if val != 0.0
228
+ line += format(" %d:#{value_type}", idx, val) if val != 0
229
229
  end
230
230
  line
231
231
  end
@@ -77,7 +77,7 @@ module Rumale
77
77
  # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data.
78
78
  def transform(x)
79
79
  x = check_convert_sample_array(x)
80
- partial_fit(x, false)
80
+ partial_fit(x, update_comps: false)
81
81
  end
82
82
 
83
83
  # Inverse transform the given transformed data with the learned model.
@@ -91,7 +91,7 @@ module Rumale
91
91
 
92
92
  private
93
93
 
94
- def partial_fit(x, update_comps = true)
94
+ def partial_fit(x, update_comps: true)
95
95
  # initialize some variables.
96
96
  n_samples, n_features = x.shape
97
97
  scale = Math.sqrt(x.mean / @params[:n_components])
@@ -85,7 +85,7 @@ module Rumale
85
85
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
86
86
  # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
87
87
  # @return [RandomForestClassifier] The learned classifier itself.
88
- def fit(x, y)
88
+ def fit(x, y) # rubocop:disable Metrics/AbcSize
89
89
  x = check_convert_sample_array(x)
90
90
  y = check_convert_label_array(y)
91
91
  check_sample_label_size(x, y)
@@ -79,7 +79,7 @@ module Rumale
79
79
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
80
80
  # @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
81
81
  # @return [RandomForestRegressor] The learned regressor itself.
82
- def fit(x, y)
82
+ def fit(x, y) # rubocop:disable Metrics/AbcSize
83
83
  x = check_convert_sample_array(x)
84
84
  y = check_convert_tvalue_array(y)
85
85
  check_sample_tvalue_size(x, y)
@@ -67,7 +67,7 @@ module Rumale
67
67
  def transform(x)
68
68
  raise 'FeatureHasher#transform requires Mmh3 but that is not loaded.' unless enable_mmh3?
69
69
 
70
- x = [x] unless x.is_a?(Array)
70
+ x = [x] unless x.is_a?(Array) # rubocop:disable Style/ArrayCoercion
71
71
  n_samples = x.size
72
72
 
73
73
  z = Numo::DFloat.zeros(n_samples, n_features)
@@ -99,7 +99,7 @@ module Rumale
99
99
  # @param x [Array<Hash>] (shape: [n_samples]) The array of hash consisting of feature names and values.
100
100
  # @return [Numo::DFloat] (shape: [n_samples, n_features]) The encoded sample array.
101
101
  def transform(x)
102
- x = [x] unless x.is_a?(Array)
102
+ x = [x] unless x.is_a?(Array) # rubocop:disable Style/ArrayCoercion
103
103
  n_samples = x.size
104
104
  n_features = @vocabulary.size
105
105
  z = Numo::DFloat.zeros(n_samples, n_features)