rumale 0.19.0 → 0.20.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +5 -29
  3. data/CHANGELOG.md +28 -0
  4. data/lib/rumale.rb +7 -10
  5. data/lib/rumale/clustering/hdbscan.rb +3 -3
  6. data/lib/rumale/clustering/k_means.rb +1 -1
  7. data/lib/rumale/clustering/k_medoids.rb +1 -1
  8. data/lib/rumale/clustering/mini_batch_k_means.rb +139 -0
  9. data/lib/rumale/dataset.rb +4 -4
  10. data/lib/rumale/decomposition/nmf.rb +2 -2
  11. data/lib/rumale/ensemble/random_forest_classifier.rb +1 -1
  12. data/lib/rumale/ensemble/random_forest_regressor.rb +1 -1
  13. data/lib/rumale/feature_extraction/feature_hasher.rb +1 -1
  14. data/lib/rumale/feature_extraction/hash_vectorizer.rb +1 -1
  15. data/lib/rumale/feature_extraction/tfidf_transformer.rb +113 -0
  16. data/lib/rumale/kernel_approximation/nystroem.rb +1 -1
  17. data/lib/rumale/kernel_machine/kernel_svc.rb +1 -1
  18. data/lib/rumale/linear_model/base_sgd.rb +1 -1
  19. data/lib/rumale/manifold/tsne.rb +1 -1
  20. data/lib/rumale/model_selection/cross_validation.rb +3 -2
  21. data/lib/rumale/model_selection/group_k_fold.rb +93 -0
  22. data/lib/rumale/model_selection/group_shuffle_split.rb +115 -0
  23. data/lib/rumale/model_selection/k_fold.rb +1 -1
  24. data/lib/rumale/model_selection/shuffle_split.rb +5 -5
  25. data/lib/rumale/model_selection/stratified_k_fold.rb +1 -1
  26. data/lib/rumale/model_selection/stratified_shuffle_split.rb +13 -9
  27. data/lib/rumale/multiclass/one_vs_rest_classifier.rb +2 -2
  28. data/lib/rumale/nearest_neighbors/vp_tree.rb +1 -1
  29. data/lib/rumale/neural_network/adam.rb +1 -1
  30. data/lib/rumale/neural_network/base_mlp.rb +1 -1
  31. data/lib/rumale/preprocessing/binarizer.rb +60 -0
  32. data/lib/rumale/preprocessing/l1_normalizer.rb +62 -0
  33. data/lib/rumale/preprocessing/l2_normalizer.rb +2 -1
  34. data/lib/rumale/preprocessing/max_normalizer.rb +62 -0
  35. data/lib/rumale/probabilistic_output.rb +1 -1
  36. data/lib/rumale/version.rb +1 -1
  37. metadata +12 -15
  38. data/lib/rumale/linear_model/base_linear_model.rb +0 -102
  39. data/lib/rumale/optimizer/ada_grad.rb +0 -42
  40. data/lib/rumale/optimizer/adam.rb +0 -56
  41. data/lib/rumale/optimizer/nadam.rb +0 -67
  42. data/lib/rumale/optimizer/rmsprop.rb +0 -50
  43. data/lib/rumale/optimizer/sgd.rb +0 -46
  44. data/lib/rumale/optimizer/yellow_fin.rb +0 -104
  45. data/lib/rumale/polynomial_model/base_factorization_machine.rb +0 -125
  46. data/lib/rumale/polynomial_model/factorization_machine_classifier.rb +0 -220
  47. data/lib/rumale/polynomial_model/factorization_machine_regressor.rb +0 -134
@@ -1,7 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'rumale/base/base_estimator.rb'
4
- require 'rumale/base/classifier.rb'
3
+ require 'rumale/base/base_estimator'
4
+ require 'rumale/base/classifier'
5
5
 
6
6
  module Rumale
7
7
  # This module consists of the classes that implement multi-class classification strategy.
@@ -30,7 +30,7 @@ module Rumale
30
30
  @params = {}
31
31
  @params[:min_samples_leaf] = min_samples_leaf
32
32
  @data = x
33
- @tree = build_tree(Numo::Int32.cast([*0...@data.shape[0]]))
33
+ @tree = build_tree(Numo::Int32.cast(Array(0...@data.shape[0])))
34
34
  end
35
35
 
36
36
  # Search k-nearest neighbors of given query point.
@@ -32,7 +32,7 @@ module Rumale
32
32
  end
33
33
 
34
34
  # @!visibility private
35
- # Calculate the updated weight with Nadam adaptive learning rate.
35
+ # Calculate the updated weight with Adam adaptive learning rate.
36
36
  #
37
37
  # @param weight [Numo::DFloat] (shape: [n_features]) The weight to be updated.
38
38
  # @param gradient [Numo::DFloat] (shape: [n_features]) The gradient for updating the weight.
@@ -222,7 +222,7 @@ module Rumale
222
222
  n_samples = x.shape[0]
223
223
 
224
224
  @params[:max_iter].times do |t|
225
- sample_ids = [*0...n_samples]
225
+ sample_ids = Array(0...n_samples)
226
226
  sample_ids.shuffle!(random: srng)
227
227
  until (subset_ids = sample_ids.shift(@params[:batch_size])).empty?
228
228
  # random sampling
@@ -0,0 +1,60 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/base_estimator'
4
+ require 'rumale/base/transformer'
5
+
6
+ module Rumale
7
+ module Preprocessing
8
+ # Binarize samples according to a threshold
9
+ #
10
+ # @example
11
+ # binarizer = Rumale::Preprocessing::Binarizer.new
12
+ # x = Numo::DFloat[[-1.2, 3.2], [2.4, -0.5], [4.5, 0.8]]
13
+ # b = binarizer.transform(x)
14
+ # p b
15
+ #
16
+ # # Numo::DFloat#shape=[3, 2]
17
+ # # [[0, 1],
18
+ # # [1, 0],
19
+ # # [1, 1]]
20
+ class Binarizer
21
+ include Base::BaseEstimator
22
+ include Base::Transformer
23
+
24
+ # Create a new transformer for binarization.
25
+ # @param threshold [Float] The threshold value for binarization.
26
+ def initialize(threshold: 0.0)
27
+ check_params_numeric(threshold: threshold)
28
+ @params = { threshold: threshold }
29
+ end
30
+
31
+ # This method does nothing and returns the object itself.
32
+ # For compatibility with other transformer, this method exists.
33
+ #
34
+ # @overload fit() -> Binarizer
35
+ #
36
+ # @return [Binarizer]
37
+ def fit(_x = nil, _y = nil)
38
+ self
39
+ end
40
+
41
+ # Binarize each sample.
42
+ #
43
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be binarized.
44
+ # @return [Numo::DFloat] The binarized samples.
45
+ def transform(x)
46
+ x = check_convert_sample_array(x)
47
+ x.class.cast(x.gt(@params[:threshold]))
48
+ end
49
+
50
+ # The output of this method is the same as that of the transform method.
51
+ # For compatibility with other transformer, this method exists.
52
+ #
53
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be binarized.
54
+ # @return [Numo::DFloat] The binarized samples.
55
+ def fit_transform(x, _y = nil)
56
+ fit(x).transform(x)
57
+ end
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,62 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/base_estimator'
4
+ require 'rumale/base/transformer'
5
+
6
+ module Rumale
7
+ module Preprocessing
8
+ # Normalize samples to unit L1-norm.
9
+ #
10
+ # @example
11
+ # normalizer = Rumale::Preprocessing::L1Normalizer.new
12
+ # new_samples = normalizer.fit_transform(samples)
13
+ class L1Normalizer
14
+ include Base::BaseEstimator
15
+ include Base::Transformer
16
+
17
+ # Return the vector consists of L1-norm for each sample.
18
+ # @return [Numo::DFloat] (shape: [n_samples])
19
+ attr_reader :norm_vec # :nodoc:
20
+
21
+ # Create a new normalizer for normaliing to L1-norm.
22
+ def initialize
23
+ @params = {}
24
+ @norm_vec = nil
25
+ end
26
+
27
+ # Calculate L1-norms of each sample.
28
+ #
29
+ # @overload fit(x) -> L1Normalizer
30
+ #
31
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate L1-norms.
32
+ # @return [L1Normalizer]
33
+ def fit(x, _y = nil)
34
+ x = check_convert_sample_array(x)
35
+ @norm_vec = x.abs.sum(1)
36
+ @norm_vec[@norm_vec.eq(0)] = 1
37
+ self
38
+ end
39
+
40
+ # Calculate L1-norms of each sample, and then normalize samples to L1-norm.
41
+ #
42
+ # @overload fit_transform(x) -> Numo::DFloat
43
+ #
44
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate L1-norms.
45
+ # @return [Numo::DFloat] The normalized samples.
46
+ def fit_transform(x, _y = nil)
47
+ x = check_convert_sample_array(x)
48
+ fit(x)
49
+ x / @norm_vec.expand_dims(1)
50
+ end
51
+
52
+ # Calculate L1-norms of each sample, and then normalize samples to L1-norm.
53
+ # This method calls the fit_transform method. This method exists for the Pipeline class.
54
+ #
55
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate L1-norms.
56
+ # @return [Numo::DFloat] The normalized samples.
57
+ def transform(x)
58
+ fit_transform(x)
59
+ end
60
+ end
61
+ end
62
+ end
@@ -34,6 +34,7 @@ module Rumale
34
34
  def fit(x, _y = nil)
35
35
  x = check_convert_sample_array(x)
36
36
  @norm_vec = Numo::NMath.sqrt((x**2).sum(1))
37
+ @norm_vec[@norm_vec.eq(0)] = 1
37
38
  self
38
39
  end
39
40
 
@@ -46,7 +47,7 @@ module Rumale
46
47
  def fit_transform(x, _y = nil)
47
48
  x = check_convert_sample_array(x)
48
49
  fit(x)
49
- x / @norm_vec.tile(x.shape[1], 1).transpose
50
+ x / @norm_vec.expand_dims(1)
50
51
  end
51
52
 
52
53
  # Calculate L2-norms of each sample, and then normalize samples to unit L2-norm.
@@ -0,0 +1,62 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/base_estimator'
4
+ require 'rumale/base/transformer'
5
+
6
+ module Rumale
7
+ module Preprocessing
8
+ # Normalize samples with the maximum of the absolute values.
9
+ #
10
+ # @example
11
+ # normalizer = Rumale::Preprocessing::MaxNormalizer.new
12
+ # new_samples = normalizer.fit_transform(samples)
13
+ class MaxNormalizer
14
+ include Base::BaseEstimator
15
+ include Base::Transformer
16
+
17
+ # Return the vector consists of the maximum norm for each sample.
18
+ # @return [Numo::DFloat] (shape: [n_samples])
19
+ attr_reader :norm_vec # :nodoc:
20
+
21
+ # Create a new normalizer for normaliing to max-norm.
22
+ def initialize
23
+ @params = {}
24
+ @norm_vec = nil
25
+ end
26
+
27
+ # Calculate the maximum norms of each sample.
28
+ #
29
+ # @overload fit(x) -> MaxNormalizer
30
+ #
31
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate the maximum norms.
32
+ # @return [MaxNormalizer]
33
+ def fit(x, _y = nil)
34
+ x = check_convert_sample_array(x)
35
+ @norm_vec = x.abs.max(1)
36
+ @norm_vec[@norm_vec.eq(0)] = 1
37
+ self
38
+ end
39
+
40
+ # Calculate the maximums norm of each sample, and then normalize samples with the norms.
41
+ #
42
+ # @overload fit_transform(x) -> Numo::DFloat
43
+ #
44
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate maximum norms.
45
+ # @return [Numo::DFloat] The normalized samples.
46
+ def fit_transform(x, _y = nil)
47
+ x = check_convert_sample_array(x)
48
+ fit(x)
49
+ x / @norm_vec.expand_dims(1)
50
+ end
51
+
52
+ # Calculate the maximum norms of each sample, and then normalize samples with the norms.
53
+ # This method calls the fit_transform method. This method exists for the Pipeline class.
54
+ #
55
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate maximum norms.
56
+ # @return [Numo::DFloat] The normalized samples.
57
+ def transform(x)
58
+ fit_transform(x)
59
+ end
60
+ end
61
+ end
62
+ end
@@ -98,7 +98,7 @@ module Rumale
98
98
 
99
99
  def hessian_matrix(probs, df, sigma)
100
100
  sub = probs * (1 - probs)
101
- h11 = (df * df * sub).sum + sigma
101
+ h11 = (df**2 * sub).sum + sigma
102
102
  h22 = sub.sum + sigma
103
103
  h21 = (df * sub).sum
104
104
  Numo::DFloat[[h11, h21], [h21, h22]]
@@ -3,5 +3,5 @@
3
3
  # Rumale is a machine learning library in Ruby.
4
4
  module Rumale
5
5
  # The version of Rumale you are using.
6
- VERSION = '0.19.0'
6
+ VERSION = '0.20.1'
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rumale
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.19.0
4
+ version: 0.20.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
- autorequire:
8
+ autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-05-23 00:00:00.000000000 Z
11
+ date: 2020-08-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: numo-narray
@@ -72,6 +72,7 @@ files:
72
72
  - lib/rumale/clustering/hdbscan.rb
73
73
  - lib/rumale/clustering/k_means.rb
74
74
  - lib/rumale/clustering/k_medoids.rb
75
+ - lib/rumale/clustering/mini_batch_k_means.rb
75
76
  - lib/rumale/clustering/power_iteration.rb
76
77
  - lib/rumale/clustering/single_linkage.rb
77
78
  - lib/rumale/clustering/snn.rb
@@ -112,13 +113,13 @@ files:
112
113
  - lib/rumale/evaluation_measure/silhouette_score.rb
113
114
  - lib/rumale/feature_extraction/feature_hasher.rb
114
115
  - lib/rumale/feature_extraction/hash_vectorizer.rb
116
+ - lib/rumale/feature_extraction/tfidf_transformer.rb
115
117
  - lib/rumale/kernel_approximation/nystroem.rb
116
118
  - lib/rumale/kernel_approximation/rbf.rb
117
119
  - lib/rumale/kernel_machine/kernel_fda.rb
118
120
  - lib/rumale/kernel_machine/kernel_pca.rb
119
121
  - lib/rumale/kernel_machine/kernel_ridge.rb
120
122
  - lib/rumale/kernel_machine/kernel_svc.rb
121
- - lib/rumale/linear_model/base_linear_model.rb
122
123
  - lib/rumale/linear_model/base_sgd.rb
123
124
  - lib/rumale/linear_model/elastic_net.rb
124
125
  - lib/rumale/linear_model/lasso.rb
@@ -134,6 +135,8 @@ files:
134
135
  - lib/rumale/model_selection/cross_validation.rb
135
136
  - lib/rumale/model_selection/function.rb
136
137
  - lib/rumale/model_selection/grid_search_cv.rb
138
+ - lib/rumale/model_selection/group_k_fold.rb
139
+ - lib/rumale/model_selection/group_shuffle_split.rb
137
140
  - lib/rumale/model_selection/k_fold.rb
138
141
  - lib/rumale/model_selection/shuffle_split.rb
139
142
  - lib/rumale/model_selection/stratified_k_fold.rb
@@ -152,23 +155,17 @@ files:
152
155
  - lib/rumale/neural_network/base_mlp.rb
153
156
  - lib/rumale/neural_network/mlp_classifier.rb
154
157
  - lib/rumale/neural_network/mlp_regressor.rb
155
- - lib/rumale/optimizer/ada_grad.rb
156
- - lib/rumale/optimizer/adam.rb
157
- - lib/rumale/optimizer/nadam.rb
158
- - lib/rumale/optimizer/rmsprop.rb
159
- - lib/rumale/optimizer/sgd.rb
160
- - lib/rumale/optimizer/yellow_fin.rb
161
158
  - lib/rumale/pairwise_metric.rb
162
159
  - lib/rumale/pipeline/feature_union.rb
163
160
  - lib/rumale/pipeline/pipeline.rb
164
- - lib/rumale/polynomial_model/base_factorization_machine.rb
165
- - lib/rumale/polynomial_model/factorization_machine_classifier.rb
166
- - lib/rumale/polynomial_model/factorization_machine_regressor.rb
167
161
  - lib/rumale/preprocessing/bin_discretizer.rb
162
+ - lib/rumale/preprocessing/binarizer.rb
163
+ - lib/rumale/preprocessing/l1_normalizer.rb
168
164
  - lib/rumale/preprocessing/l2_normalizer.rb
169
165
  - lib/rumale/preprocessing/label_binarizer.rb
170
166
  - lib/rumale/preprocessing/label_encoder.rb
171
167
  - lib/rumale/preprocessing/max_abs_scaler.rb
168
+ - lib/rumale/preprocessing/max_normalizer.rb
172
169
  - lib/rumale/preprocessing/min_max_scaler.rb
173
170
  - lib/rumale/preprocessing/one_hot_encoder.rb
174
171
  - lib/rumale/preprocessing/ordinal_encoder.rb
@@ -196,7 +193,7 @@ metadata:
196
193
  source_code_uri: https://github.com/yoshoku/rumale
197
194
  documentation_uri: https://yoshoku.github.io/rumale/doc/
198
195
  bug_tracker_uri: https://github.com/yoshoku/rumale/issues
199
- post_install_message:
196
+ post_install_message:
200
197
  rdoc_options: []
201
198
  require_paths:
202
199
  - lib
@@ -212,7 +209,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
212
209
  version: '0'
213
210
  requirements: []
214
211
  rubygems_version: 3.1.2
215
- signing_key:
212
+ signing_key:
216
213
  specification_version: 4
217
214
  summary: Rumale is a machine learning library in Ruby. Rumale provides machine learning
218
215
  algorithms with interfaces similar to Scikit-Learn in Python.
@@ -1,102 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rumale/base/base_estimator'
4
- require 'rumale/optimizer/nadam'
5
-
6
- module Rumale
7
- module LinearModel
8
- # @note
9
- # In version 0.17.0, a new linear model abstract class called BaseSGD is introduced.
10
- # BaseLienarModel is deprecated and will be removed in the future.
11
- # @deprecated Use BaseSGD class instead. This class will be deleted in version 0.20.0.
12
- #
13
- # BaseLinearModel is an abstract class for implementation of linear estimator
14
- # with mini-batch stochastic gradient descent optimization.
15
- # This class is used for internal process.
16
- class BaseLinearModel
17
- # :nocov:
18
- include Base::BaseEstimator
19
-
20
- # Initialize a linear estimator.
21
- #
22
- # @param reg_param [Float] The regularization parameter.
23
- # @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
24
- # @param bias_scale [Float] The scale of the bias term.
25
- # @param max_iter [Integer] The maximum number of iterations.
26
- # @param batch_size [Integer] The size of the mini batches.
27
- # @param optimizer [Optimizer] The optimizer to calculate adaptive learning rate.
28
- # If nil is given, Nadam is used.
29
- # @param n_jobs [Integer] The number of jobs for running the fit and predict methods in parallel.
30
- # If nil is given, the methods do not execute in parallel.
31
- # If zero or less is given, it becomes equal to the number of processors.
32
- # @param random_seed [Integer] The seed value using to initialize the random generator.
33
- def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0,
34
- max_iter: 1000, batch_size: 10, optimizer: nil, n_jobs: nil, random_seed: nil)
35
- warn 'warning: BaseLinearModel is deprecated. Use BaseSGD instead.'
36
- @params = {}
37
- @params[:reg_param] = reg_param
38
- @params[:fit_bias] = fit_bias
39
- @params[:bias_scale] = bias_scale
40
- @params[:max_iter] = max_iter
41
- @params[:batch_size] = batch_size
42
- @params[:optimizer] = optimizer
43
- @params[:optimizer] ||= Rumale::Optimizer::Nadam.new
44
- @params[:n_jobs] = n_jobs
45
- @params[:random_seed] = random_seed
46
- @params[:random_seed] ||= srand
47
- @weight_vec = nil
48
- @bias_term = nil
49
- @rng = Random.new(@params[:random_seed])
50
- end
51
-
52
- private
53
-
54
- def partial_fit(x, y)
55
- # Expand feature vectors for bias term.
56
- samples = @params[:fit_bias] ? expand_feature(x) : x
57
- # Initialize some variables.
58
- n_samples, n_features = samples.shape
59
- rand_ids = [*0...n_samples].shuffle(random: @rng.dup)
60
- weight = Numo::DFloat.zeros(n_features)
61
- optimizer = @params[:optimizer].dup
62
- # Optimization.
63
- @params[:max_iter].times do |_t|
64
- # Random sampling
65
- subset_ids = rand_ids.shift(@params[:batch_size])
66
- rand_ids.concat(subset_ids)
67
- sub_samples = samples[subset_ids, true]
68
- sub_targets = y[subset_ids]
69
- # Update weight.
70
- loss_gradient = calc_loss_gradient(sub_samples, sub_targets, weight)
71
- next if loss_gradient.ne(0.0).count.zero?
72
-
73
- weight = calc_new_weight(optimizer, sub_samples, weight, loss_gradient)
74
- end
75
- split_weight(weight)
76
- end
77
-
78
- def calc_loss_gradient(_x, _y, _weight)
79
- raise NotImplementedError, "#{__method__} has to be implemented in #{self.class}."
80
- end
81
-
82
- def calc_new_weight(optimizer, x, weight, loss_gradient)
83
- weight_gradient = x.transpose.dot(loss_gradient) / @params[:batch_size] + @params[:reg_param] * weight
84
- optimizer.call(weight, weight_gradient)
85
- end
86
-
87
- def expand_feature(x)
88
- n_samples = x.shape[0]
89
- Numo::NArray.hstack([x, Numo::DFloat.ones([n_samples, 1]) * @params[:bias_scale]])
90
- end
91
-
92
- def split_weight(weight)
93
- if @params[:fit_bias]
94
- [weight[0...-1].dup, weight[-1]]
95
- else
96
- [weight, 0.0]
97
- end
98
- end
99
- # :nocov:
100
- end
101
- end
102
- end