rumale 0.19.2 → 0.19.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1bff2e1e6182aa954be00ed107ed1bd81220298f89514b4b31304f8890ff27c4
4
- data.tar.gz: '09b185f468baf9dbec6280fa6c06984c95919308f1d2247277bf30348ed392bc'
3
+ metadata.gz: dc3413c05ad7c365117adc4abbc304ff1851fa9a3ff69fef3c69e730d9a2b834
4
+ data.tar.gz: 8895cce8b350c4e245aabb5e3e4c4036655fa8d24a72f6481e0d2f8c9869fa54
5
5
  SHA512:
6
- metadata.gz: 6d8f1fcaffcd6714c6156fc615d87e6b6950e82ab40fc7434cfc5a014d6c08eb0170ee7c45d8fed978c2a52f839b1ce647fd6e088cbab2ea45e517b34c88407a
7
- data.tar.gz: b255ae4c24cdc91ebad59f79ee5a58c5d2a5ffa79bda0ac221e3a33bd824d2fd94e5cd83f3a06e54a2dc537a074276cea5a71651deeee2a304d23e963ff92c9d
6
+ metadata.gz: fe10c975f286a4c9ac155d29310d61d1f180cbcc909ec7bdba3925973b6b9857635befc9bf4938cf28a6ef50c8011894b7b15768735f6200c27ce912907e5fb1
7
+ data.tar.gz: 327cce25145c1ca3f5623f84b4163560bdbee8245009c3d8e1c3318f61dec94b58a7395ebc3538e7b04a9702af08b077e7f426c54f7c5d4fd3b0fcf11c4744cf
@@ -24,6 +24,15 @@ Style/HashTransformKeys:
24
24
  Style/HashTransformValues:
25
25
  Enabled: true
26
26
 
27
+ Lint/DeprecatedOpenSSLConstant:
28
+ Enabled: true
29
+
30
+ Lint/DuplicateElsifCondition:
31
+ Enabled: true
32
+
33
+ Lint/MixedRegexpCaptureTypes:
34
+ Enabled: true
35
+
27
36
  Lint/RaiseException:
28
37
  Enabled: true
29
38
 
@@ -34,7 +43,6 @@ Layout/LineLength:
34
43
  Max: 145
35
44
  IgnoredPatterns: ['(\A|\s)#']
36
45
 
37
-
38
46
  Metrics/ModuleLength:
39
47
  Max: 200
40
48
 
@@ -70,15 +78,48 @@ Naming/MethodParameterName:
70
78
  Naming/ConstantName:
71
79
  Enabled: false
72
80
 
81
+ Style/AccessorGrouping:
82
+ Enabled: true
83
+
84
+ Style/ArrayCoercion:
85
+ Enabled: true
86
+
87
+ Style/BisectedAttrAccessor:
88
+ Enabled: true
89
+
90
+ Style/CaseLikeIf:
91
+ Enabled: true
92
+
73
93
  Style/ExponentialNotation:
74
94
  Enabled: true
75
95
 
76
96
  Style/FormatStringToken:
77
97
  Enabled: false
78
98
 
99
+ Style/HashAsLastArrayItem:
100
+ Enabled: true
101
+
102
+ Style/HashLikeCase:
103
+ Enabled: true
104
+
79
105
  Style/NumericLiterals:
80
106
  Enabled: false
81
107
 
108
+ Style/RedundantAssignment:
109
+ Enabled: true
110
+
111
+ Style/RedundantFetchBlock:
112
+ Enabled: true
113
+
114
+ Style/RedundantFileExtensionInRequire:
115
+ Enabled: true
116
+
117
+ Style/RedundantRegexpCharacterClass:
118
+ Enabled: true
119
+
120
+ Style/RedundantRegexpEscape:
121
+ Enabled: true
122
+
82
123
  Style/SlicingWithRange:
83
124
  Enabled: true
84
125
 
@@ -91,6 +132,30 @@ Layout/EmptyLinesAroundAttributeAccessor:
91
132
  Layout/SpaceAroundMethodCallOperator:
92
133
  Enabled: true
93
134
 
135
+ Performance/AncestorsInclude:
136
+ Enabled: true
137
+
138
+ Performance/BigDecimalWithNumericArgument:
139
+ Enabled: true
140
+
141
+ Performance/RedundantSortBlock:
142
+ Enabled: true
143
+
144
+ Performance/RedundantStringChars:
145
+ Enabled: true
146
+
147
+ Performance/ReverseFirst:
148
+ Enabled: true
149
+
150
+ Performance/SortReverse:
151
+ Enabled: true
152
+
153
+ Performance/Squeeze:
154
+ Enabled: true
155
+
156
+ Performance/StringInclude:
157
+ Enabled: true
158
+
94
159
  RSpec/MultipleExpectations:
95
160
  Enabled: false
96
161
 
@@ -1,3 +1,8 @@
1
+ # 0.19.3
2
+ - Add preprocessing class for [Binarizer](https://yoshoku.github.io/rumale/doc/Rumale/Preprocessing/Binarizer.html)
3
+ - Add preprocessing class for [MaxNormalizer](https://yoshoku.github.io/rumale/doc/Rumale/Preprocessing/MaxNormalizer.html)
4
+ - Refactor some codes with Rubocop.
5
+
1
6
  # 0.19.2
2
7
  - Fix L2Normalizer to avoid zero divide.
3
8
  - Add preprocssing class for [L1Normalizer](https://yoshoku.github.io/rumale/doc/Rumale/Preprocessing/L1Normalizer.html).
@@ -96,6 +96,7 @@ require 'rumale/feature_extraction/feature_hasher'
96
96
  require 'rumale/feature_extraction/tfidf_transformer'
97
97
  require 'rumale/preprocessing/l2_normalizer'
98
98
  require 'rumale/preprocessing/l1_normalizer'
99
+ require 'rumale/preprocessing/max_normalizer'
99
100
  require 'rumale/preprocessing/min_max_scaler'
100
101
  require 'rumale/preprocessing/max_abs_scaler'
101
102
  require 'rumale/preprocessing/standard_scaler'
@@ -104,6 +105,7 @@ require 'rumale/preprocessing/label_binarizer'
104
105
  require 'rumale/preprocessing/label_encoder'
105
106
  require 'rumale/preprocessing/one_hot_encoder'
106
107
  require 'rumale/preprocessing/ordinal_encoder'
108
+ require 'rumale/preprocessing/binarizer'
107
109
  require 'rumale/preprocessing/polynomial_features'
108
110
  require 'rumale/model_selection/k_fold'
109
111
  require 'rumale/model_selection/stratified_k_fold'
@@ -232,7 +232,7 @@ module Rumale
232
232
  end
233
233
 
234
234
  def flatten(tree, stabilities)
235
- node_ids = stabilities.keys.sort { |a, b| b <=> a }.slice(0, stabilities.size - 1)
235
+ node_ids = stabilities.keys.sort.reverse.slice(0, stabilities.size - 1)
236
236
 
237
237
  cluster_tree = tree.select { |edge| edge.n_elements > 1 }
238
238
  is_cluster = node_ids.each_with_object({}) { |n_id, h| h[n_id] = true }
@@ -103,7 +103,7 @@ module Rumale
103
103
  # random initialize
104
104
  n_samples = x.shape[0]
105
105
  sub_rng = @rng.dup
106
- rand_id = [*0...n_samples].sample(@params[:n_clusters], random: sub_rng)
106
+ rand_id = Array(0...n_samples).sample(@params[:n_clusters], random: sub_rng)
107
107
  @cluster_centers = x[rand_id, true].dup
108
108
  return unless @params[:init] == 'k-means++'
109
109
 
@@ -124,7 +124,7 @@ module Rumale
124
124
  # random initialize
125
125
  n_samples = distance_mat.shape[0]
126
126
  sub_rng = @rng.dup
127
- @medoid_ids = Numo::Int32.asarray([*0...n_samples].sample(@params[:n_clusters], random: sub_rng))
127
+ @medoid_ids = Numo::Int32.asarray(Array(0...n_samples).sample(@params[:n_clusters], random: sub_rng))
128
128
  return unless @params[:init] == 'k-means++'
129
129
 
130
130
  # k-means++ initialize
@@ -67,7 +67,7 @@ module Rumale
67
67
  init_cluster_centers(x, sub_rng)
68
68
  # optimization with mini-batch sgd.
69
69
  @params[:max_iter].times do |_t|
70
- sample_ids = [*0...n_samples].shuffle(random: sub_rng)
70
+ sample_ids = Array(0...n_samples).shuffle(random: sub_rng)
71
71
  old_centers = @cluster_centers.dup
72
72
  until (subset_ids = sample_ids.shift(@params[:batch_size])).empty?
73
73
  # sub sampling
@@ -120,7 +120,7 @@ module Rumale
120
120
  def init_cluster_centers(x, sub_rng)
121
121
  # random initialize
122
122
  n_samples = x.shape[0]
123
- rand_id = [*0...n_samples].sample(@params[:n_clusters], random: sub_rng)
123
+ rand_id = Array(0...n_samples).sample(@params[:n_clusters], random: sub_rng)
124
124
  @cluster_centers = x[rand_id, true].dup
125
125
  return unless @params[:init] == 'k-means++'
126
126
 
@@ -81,7 +81,7 @@ module Rumale
81
81
  y = Numo::Int32.hstack([Numo::Int32.zeros(n_samples_out), Numo::Int32.ones(n_samples_in)])
82
82
  # shuffle data indices.
83
83
  if shuffle
84
- rand_ids = [*0...n_samples].shuffle(random: rng.dup)
84
+ rand_ids = Array(0...n_samples).shuffle(random: rng.dup)
85
85
  x = x[rand_ids, true].dup
86
86
  y = y[rand_ids].dup
87
87
  end
@@ -118,7 +118,7 @@ module Rumale
118
118
  y = Numo::Int32.hstack([Numo::Int32.zeros(n_samples_out), Numo::Int32.ones(n_samples_in)])
119
119
  # shuffle data indices.
120
120
  if shuffle
121
- rand_ids = [*0...n_samples].shuffle(random: rng.dup)
121
+ rand_ids = Array(0...n_samples).shuffle(random: rng.dup)
122
122
  x = x[rand_ids, true].dup
123
123
  y = y[rand_ids].dup
124
124
  end
@@ -173,7 +173,7 @@ module Rumale
173
173
  end
174
174
  # shuffle data.
175
175
  if shuffle
176
- rand_ids = [*0...n_samples].shuffle(random: rng.dup)
176
+ rand_ids = Array(0...n_samples).shuffle(random: rng.dup)
177
177
  x = x[rand_ids, true].dup
178
178
  y = y[rand_ids].dup
179
179
  end
@@ -69,7 +69,7 @@ module Rumale
69
69
  n_components = [1, [@params[:n_components], n_samples].min].max
70
70
 
71
71
  # random sampling.
72
- @component_indices = Numo::Int32.cast([*0...n_samples].shuffle(random: sub_rng)[0...n_components])
72
+ @component_indices = Numo::Int32.cast(Array(0...n_samples).shuffle(random: sub_rng)[0...n_components])
73
73
  @components = x[@component_indices, true]
74
74
 
75
75
  # calculate normalizing factor.
@@ -172,7 +172,7 @@ module Rumale
172
172
  # Start optimization.
173
173
  @params[:max_iter].times do |t|
174
174
  # random sampling
175
- rand_ids = [*0...n_training_samples].shuffle(random: sub_rng) if rand_ids.empty?
175
+ rand_ids = Array(0...n_training_samples).shuffle(random: sub_rng) if rand_ids.empty?
176
176
  target_id = rand_ids.shift
177
177
  # update the weight vector
178
178
  func = (weight_vec * bin_y).dot(x[target_id, true].transpose).to_f
@@ -56,7 +56,7 @@ module Rumale
56
56
  samples = @params[:fit_bias] ? expand_feature(x) : x
57
57
  # Initialize some variables.
58
58
  n_samples, n_features = samples.shape
59
- rand_ids = [*0...n_samples].shuffle(random: @rng.dup)
59
+ rand_ids = Array(0...n_samples).shuffle(random: @rng.dup)
60
60
  weight = Numo::DFloat.zeros(n_features)
61
61
  optimizer = @params[:optimizer].dup
62
62
  # Optimization.
@@ -209,7 +209,7 @@ module Rumale
209
209
  l1_penalty = LinearModel::Penalty::L1Penalty.new(reg_param: l1_reg_param) if apply_l1_penalty?
210
210
  # Optimization.
211
211
  @params[:max_iter].times do |t|
212
- sample_ids = [*0...n_samples]
212
+ sample_ids = Array(0...n_samples)
213
213
  sample_ids.shuffle!(random: sub_rng)
214
214
  until (subset_ids = sample_ids.shift(@params[:batch_size])).empty?
215
215
  # sampling
@@ -69,10 +69,11 @@ module Rumale
69
69
  # the return_train_score is false.
70
70
  def perform(x, y)
71
71
  x = check_convert_sample_array(x)
72
- if @estimator.is_a?(Rumale::Base::Classifier)
72
+ case @estimator
73
+ when Rumale::Base::Classifier
73
74
  y = check_convert_label_array(y)
74
75
  check_sample_label_size(x, y)
75
- elsif @estimator.is_a?(Rumale::Base::Regressor)
76
+ when Rumale::Base::Regressor
76
77
  y = check_convert_tvalue_array(y)
77
78
  check_sample_tvalue_size(x, y)
78
79
  else
@@ -62,7 +62,7 @@ module Rumale
62
62
  end
63
63
  sub_rng = @rng.dup
64
64
  # Splits dataset ids to each fold.
65
- dataset_ids = [*0...n_samples]
65
+ dataset_ids = Array(0...n_samples)
66
66
  dataset_ids.shuffle!(random: sub_rng) if @shuffle
67
67
  fold_sets = Array.new(@n_splits) do |n|
68
68
  n_fold_samples = n_samples / @n_splits
@@ -74,7 +74,7 @@ module Rumale
74
74
  end
75
75
  sub_rng = @rng.dup
76
76
  # Returns array consisting of the training and testing ids for each fold.
77
- dataset_ids = [*0...n_samples]
77
+ dataset_ids = Array(0...n_samples)
78
78
  Array.new(@n_splits) do
79
79
  test_ids = dataset_ids.sample(n_test_samples, random: sub_rng)
80
80
  train_ids = if @train_size.nil?
@@ -1,7 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'rumale/base/base_estimator.rb'
4
- require 'rumale/base/classifier.rb'
3
+ require 'rumale/base/base_estimator'
4
+ require 'rumale/base/classifier'
5
5
 
6
6
  module Rumale
7
7
  # This module consists of the classes that implement multi-class classification strategy.
@@ -30,7 +30,7 @@ module Rumale
30
30
  @params = {}
31
31
  @params[:min_samples_leaf] = min_samples_leaf
32
32
  @data = x
33
- @tree = build_tree(Numo::Int32.cast([*0...@data.shape[0]]))
33
+ @tree = build_tree(Numo::Int32.cast(Array(0...@data.shape[0])))
34
34
  end
35
35
 
36
36
  # Search k-nearest neighbors of given query point.
@@ -222,7 +222,7 @@ module Rumale
222
222
  n_samples = x.shape[0]
223
223
 
224
224
  @params[:max_iter].times do |t|
225
- sample_ids = [*0...n_samples]
225
+ sample_ids = Array(0...n_samples)
226
226
  sample_ids.shuffle!(random: srng)
227
227
  until (subset_ids = sample_ids.shift(@params[:batch_size])).empty?
228
228
  # random sampling
@@ -69,7 +69,7 @@ module Rumale
69
69
  factor_optimizers = Array.new(@params[:n_factors]) { @params[:optimizer].dup }
70
70
  # Start optimization.
71
71
  @params[:max_iter].times do |t|
72
- sample_ids = [*0...n_samples]
72
+ sample_ids = Array(0...n_samples)
73
73
  sample_ids.shuffle!(random: sub_rng)
74
74
  until (subset_ids = sample_ids.shift(@params[:batch_size])).empty?
75
75
  # Sampling.
@@ -0,0 +1,60 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/base_estimator'
4
+ require 'rumale/base/transformer'
5
+
6
+ module Rumale
7
+ module Preprocessing
8
+ # Binarize samples according to a threshold
9
+ #
10
+ # @example
11
+ # binarizer = Rumale::Preprocessing::Binarizer.new
12
+ # x = Numo::DFloat[[-1.2, 3.2], [2.4, -0.5], [4.5, 0.8]]
13
+ # b = binarizer.transform(x)
14
+ # p b
15
+ #
16
+ # # Numo::DFloat#shape=[3, 2]
17
+ # # [[0, 1],
18
+ # # [1, 0],
19
+ # # [1, 1]]
20
+ class Binarizer
21
+ include Base::BaseEstimator
22
+ include Base::Transformer
23
+
24
+ # Create a new transformer for binarization.
25
+ # @param threshold [Float] The threshold value for binarization.
26
+ def initialize(threshold: 0.0)
27
+ check_params_numeric(threshold: threshold)
28
+ @params = { threshold: threshold }
29
+ end
30
+
31
+ # This method does nothing and returns the object itself.
32
+ # For compatibility with other transformer, this method exists.
33
+ #
34
+ # @overload fit() -> Binarizer
35
+ #
36
+ # @return [Binarizer]
37
+ def fit(_x = nil, _y = nil)
38
+ self
39
+ end
40
+
41
+ # Binarize each sample.
42
+ #
43
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be binarized.
44
+ # @return [Numo::DFloat] The binarized samples.
45
+ def transform(x)
46
+ x = check_convert_sample_array(x)
47
+ x.class.cast(x.gt(@params[:threshold]))
48
+ end
49
+
50
+ # The output of this method is the same as that of the transform method.
51
+ # For compatibility with other transformer, this method exists.
52
+ #
53
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be binarized.
54
+ # @return [Numo::DFloat] The binarized samples.
55
+ def fit_transform(x, _y = nil)
56
+ fit(x).transform(x)
57
+ end
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,62 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/base_estimator'
4
+ require 'rumale/base/transformer'
5
+
6
+ module Rumale
7
+ module Preprocessing
8
+ # Normalize samples with the maximum of the absolute values.
9
+ #
10
+ # @example
11
+ # normalizer = Rumale::Preprocessing::MaxNormalizer.new
12
+ # new_samples = normalizer.fit_transform(samples)
13
+ class MaxNormalizer
14
+ include Base::BaseEstimator
15
+ include Base::Transformer
16
+
17
+ # Return the vector consists of the maximum norm for each sample.
18
+ # @return [Numo::DFloat] (shape: [n_samples])
19
+ attr_reader :norm_vec # :nodoc:
20
+
21
+ # Create a new normalizer for normaliing to max-norm.
22
+ def initialize
23
+ @params = {}
24
+ @norm_vec = nil
25
+ end
26
+
27
+ # Calculate the maximum norms of each sample.
28
+ #
29
+ # @overload fit(x) -> MaxNormalizer
30
+ #
31
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate the maximum norms.
32
+ # @return [MaxNormalizer]
33
+ def fit(x, _y = nil)
34
+ x = check_convert_sample_array(x)
35
+ @norm_vec = x.abs.max(1)
36
+ @norm_vec[@norm_vec.eq(0)] = 1
37
+ self
38
+ end
39
+
40
+ # Calculate the maximums norm of each sample, and then normalize samples with the norms.
41
+ #
42
+ # @overload fit_transform(x) -> Numo::DFloat
43
+ #
44
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate maximum norms.
45
+ # @return [Numo::DFloat] The normalized samples.
46
+ def fit_transform(x, _y = nil)
47
+ x = check_convert_sample_array(x)
48
+ fit(x)
49
+ x / @norm_vec.expand_dims(1)
50
+ end
51
+
52
+ # Calculate the maximum norms of each sample, and then normalize samples with the norms.
53
+ # This method calls the fit_transform method. This method exists for the Pipeline class.
54
+ #
55
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate maximum norms.
56
+ # @return [Numo::DFloat] The normalized samples.
57
+ def transform(x)
58
+ fit_transform(x)
59
+ end
60
+ end
61
+ end
62
+ end
@@ -3,5 +3,5 @@
3
3
  # Rumale is a machine learning library in Ruby.
4
4
  module Rumale
5
5
  # The version of Rumale you are using.
6
- VERSION = '0.19.2'
6
+ VERSION = '0.19.3'
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rumale
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.19.2
4
+ version: 0.19.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-06-20 00:00:00.000000000 Z
11
+ date: 2020-07-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: numo-narray
@@ -167,11 +167,13 @@ files:
167
167
  - lib/rumale/polynomial_model/factorization_machine_classifier.rb
168
168
  - lib/rumale/polynomial_model/factorization_machine_regressor.rb
169
169
  - lib/rumale/preprocessing/bin_discretizer.rb
170
+ - lib/rumale/preprocessing/binarizer.rb
170
171
  - lib/rumale/preprocessing/l1_normalizer.rb
171
172
  - lib/rumale/preprocessing/l2_normalizer.rb
172
173
  - lib/rumale/preprocessing/label_binarizer.rb
173
174
  - lib/rumale/preprocessing/label_encoder.rb
174
175
  - lib/rumale/preprocessing/max_abs_scaler.rb
176
+ - lib/rumale/preprocessing/max_normalizer.rb
175
177
  - lib/rumale/preprocessing/min_max_scaler.rb
176
178
  - lib/rumale/preprocessing/one_hot_encoder.rb
177
179
  - lib/rumale/preprocessing/ordinal_encoder.rb