rumale 0.19.2 → 0.19.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1bff2e1e6182aa954be00ed107ed1bd81220298f89514b4b31304f8890ff27c4
4
- data.tar.gz: '09b185f468baf9dbec6280fa6c06984c95919308f1d2247277bf30348ed392bc'
3
+ metadata.gz: dc3413c05ad7c365117adc4abbc304ff1851fa9a3ff69fef3c69e730d9a2b834
4
+ data.tar.gz: 8895cce8b350c4e245aabb5e3e4c4036655fa8d24a72f6481e0d2f8c9869fa54
5
5
  SHA512:
6
- metadata.gz: 6d8f1fcaffcd6714c6156fc615d87e6b6950e82ab40fc7434cfc5a014d6c08eb0170ee7c45d8fed978c2a52f839b1ce647fd6e088cbab2ea45e517b34c88407a
7
- data.tar.gz: b255ae4c24cdc91ebad59f79ee5a58c5d2a5ffa79bda0ac221e3a33bd824d2fd94e5cd83f3a06e54a2dc537a074276cea5a71651deeee2a304d23e963ff92c9d
6
+ metadata.gz: fe10c975f286a4c9ac155d29310d61d1f180cbcc909ec7bdba3925973b6b9857635befc9bf4938cf28a6ef50c8011894b7b15768735f6200c27ce912907e5fb1
7
+ data.tar.gz: 327cce25145c1ca3f5623f84b4163560bdbee8245009c3d8e1c3318f61dec94b58a7395ebc3538e7b04a9702af08b077e7f426c54f7c5d4fd3b0fcf11c4744cf
@@ -24,6 +24,15 @@ Style/HashTransformKeys:
24
24
  Style/HashTransformValues:
25
25
  Enabled: true
26
26
 
27
+ Lint/DeprecatedOpenSSLConstant:
28
+ Enabled: true
29
+
30
+ Lint/DuplicateElsifCondition:
31
+ Enabled: true
32
+
33
+ Lint/MixedRegexpCaptureTypes:
34
+ Enabled: true
35
+
27
36
  Lint/RaiseException:
28
37
  Enabled: true
29
38
 
@@ -34,7 +43,6 @@ Layout/LineLength:
34
43
  Max: 145
35
44
  IgnoredPatterns: ['(\A|\s)#']
36
45
 
37
-
38
46
  Metrics/ModuleLength:
39
47
  Max: 200
40
48
 
@@ -70,15 +78,48 @@ Naming/MethodParameterName:
70
78
  Naming/ConstantName:
71
79
  Enabled: false
72
80
 
81
+ Style/AccessorGrouping:
82
+ Enabled: true
83
+
84
+ Style/ArrayCoercion:
85
+ Enabled: true
86
+
87
+ Style/BisectedAttrAccessor:
88
+ Enabled: true
89
+
90
+ Style/CaseLikeIf:
91
+ Enabled: true
92
+
73
93
  Style/ExponentialNotation:
74
94
  Enabled: true
75
95
 
76
96
  Style/FormatStringToken:
77
97
  Enabled: false
78
98
 
99
+ Style/HashAsLastArrayItem:
100
+ Enabled: true
101
+
102
+ Style/HashLikeCase:
103
+ Enabled: true
104
+
79
105
  Style/NumericLiterals:
80
106
  Enabled: false
81
107
 
108
+ Style/RedundantAssignment:
109
+ Enabled: true
110
+
111
+ Style/RedundantFetchBlock:
112
+ Enabled: true
113
+
114
+ Style/RedundantFileExtensionInRequire:
115
+ Enabled: true
116
+
117
+ Style/RedundantRegexpCharacterClass:
118
+ Enabled: true
119
+
120
+ Style/RedundantRegexpEscape:
121
+ Enabled: true
122
+
82
123
  Style/SlicingWithRange:
83
124
  Enabled: true
84
125
 
@@ -91,6 +132,30 @@ Layout/EmptyLinesAroundAttributeAccessor:
91
132
  Layout/SpaceAroundMethodCallOperator:
92
133
  Enabled: true
93
134
 
135
+ Performance/AncestorsInclude:
136
+ Enabled: true
137
+
138
+ Performance/BigDecimalWithNumericArgument:
139
+ Enabled: true
140
+
141
+ Performance/RedundantSortBlock:
142
+ Enabled: true
143
+
144
+ Performance/RedundantStringChars:
145
+ Enabled: true
146
+
147
+ Performance/ReverseFirst:
148
+ Enabled: true
149
+
150
+ Performance/SortReverse:
151
+ Enabled: true
152
+
153
+ Performance/Squeeze:
154
+ Enabled: true
155
+
156
+ Performance/StringInclude:
157
+ Enabled: true
158
+
94
159
  RSpec/MultipleExpectations:
95
160
  Enabled: false
96
161
 
@@ -1,3 +1,8 @@
1
+ # 0.19.3
2
+ - Add preprocessing class for [Binarizer](https://yoshoku.github.io/rumale/doc/Rumale/Preprocessing/Binarizer.html)
3
+ - Add preprocessing class for [MaxNormalizer](https://yoshoku.github.io/rumale/doc/Rumale/Preprocessing/MaxNormalizer.html)
4
+ - Refactor some codes with Rubocop.
5
+
1
6
  # 0.19.2
2
7
  - Fix L2Normalizer to avoid zero divide.
3
8
  - Add preprocssing class for [L1Normalizer](https://yoshoku.github.io/rumale/doc/Rumale/Preprocessing/L1Normalizer.html).
@@ -96,6 +96,7 @@ require 'rumale/feature_extraction/feature_hasher'
96
96
  require 'rumale/feature_extraction/tfidf_transformer'
97
97
  require 'rumale/preprocessing/l2_normalizer'
98
98
  require 'rumale/preprocessing/l1_normalizer'
99
+ require 'rumale/preprocessing/max_normalizer'
99
100
  require 'rumale/preprocessing/min_max_scaler'
100
101
  require 'rumale/preprocessing/max_abs_scaler'
101
102
  require 'rumale/preprocessing/standard_scaler'
@@ -104,6 +105,7 @@ require 'rumale/preprocessing/label_binarizer'
104
105
  require 'rumale/preprocessing/label_encoder'
105
106
  require 'rumale/preprocessing/one_hot_encoder'
106
107
  require 'rumale/preprocessing/ordinal_encoder'
108
+ require 'rumale/preprocessing/binarizer'
107
109
  require 'rumale/preprocessing/polynomial_features'
108
110
  require 'rumale/model_selection/k_fold'
109
111
  require 'rumale/model_selection/stratified_k_fold'
@@ -232,7 +232,7 @@ module Rumale
232
232
  end
233
233
 
234
234
  def flatten(tree, stabilities)
235
- node_ids = stabilities.keys.sort { |a, b| b <=> a }.slice(0, stabilities.size - 1)
235
+ node_ids = stabilities.keys.sort.reverse.slice(0, stabilities.size - 1)
236
236
 
237
237
  cluster_tree = tree.select { |edge| edge.n_elements > 1 }
238
238
  is_cluster = node_ids.each_with_object({}) { |n_id, h| h[n_id] = true }
@@ -103,7 +103,7 @@ module Rumale
103
103
  # random initialize
104
104
  n_samples = x.shape[0]
105
105
  sub_rng = @rng.dup
106
- rand_id = [*0...n_samples].sample(@params[:n_clusters], random: sub_rng)
106
+ rand_id = Array(0...n_samples).sample(@params[:n_clusters], random: sub_rng)
107
107
  @cluster_centers = x[rand_id, true].dup
108
108
  return unless @params[:init] == 'k-means++'
109
109
 
@@ -124,7 +124,7 @@ module Rumale
124
124
  # random initialize
125
125
  n_samples = distance_mat.shape[0]
126
126
  sub_rng = @rng.dup
127
- @medoid_ids = Numo::Int32.asarray([*0...n_samples].sample(@params[:n_clusters], random: sub_rng))
127
+ @medoid_ids = Numo::Int32.asarray(Array(0...n_samples).sample(@params[:n_clusters], random: sub_rng))
128
128
  return unless @params[:init] == 'k-means++'
129
129
 
130
130
  # k-means++ initialize
@@ -67,7 +67,7 @@ module Rumale
67
67
  init_cluster_centers(x, sub_rng)
68
68
  # optimization with mini-batch sgd.
69
69
  @params[:max_iter].times do |_t|
70
- sample_ids = [*0...n_samples].shuffle(random: sub_rng)
70
+ sample_ids = Array(0...n_samples).shuffle(random: sub_rng)
71
71
  old_centers = @cluster_centers.dup
72
72
  until (subset_ids = sample_ids.shift(@params[:batch_size])).empty?
73
73
  # sub sampling
@@ -120,7 +120,7 @@ module Rumale
120
120
  def init_cluster_centers(x, sub_rng)
121
121
  # random initialize
122
122
  n_samples = x.shape[0]
123
- rand_id = [*0...n_samples].sample(@params[:n_clusters], random: sub_rng)
123
+ rand_id = Array(0...n_samples).sample(@params[:n_clusters], random: sub_rng)
124
124
  @cluster_centers = x[rand_id, true].dup
125
125
  return unless @params[:init] == 'k-means++'
126
126
 
@@ -81,7 +81,7 @@ module Rumale
81
81
  y = Numo::Int32.hstack([Numo::Int32.zeros(n_samples_out), Numo::Int32.ones(n_samples_in)])
82
82
  # shuffle data indices.
83
83
  if shuffle
84
- rand_ids = [*0...n_samples].shuffle(random: rng.dup)
84
+ rand_ids = Array(0...n_samples).shuffle(random: rng.dup)
85
85
  x = x[rand_ids, true].dup
86
86
  y = y[rand_ids].dup
87
87
  end
@@ -118,7 +118,7 @@ module Rumale
118
118
  y = Numo::Int32.hstack([Numo::Int32.zeros(n_samples_out), Numo::Int32.ones(n_samples_in)])
119
119
  # shuffle data indices.
120
120
  if shuffle
121
- rand_ids = [*0...n_samples].shuffle(random: rng.dup)
121
+ rand_ids = Array(0...n_samples).shuffle(random: rng.dup)
122
122
  x = x[rand_ids, true].dup
123
123
  y = y[rand_ids].dup
124
124
  end
@@ -173,7 +173,7 @@ module Rumale
173
173
  end
174
174
  # shuffle data.
175
175
  if shuffle
176
- rand_ids = [*0...n_samples].shuffle(random: rng.dup)
176
+ rand_ids = Array(0...n_samples).shuffle(random: rng.dup)
177
177
  x = x[rand_ids, true].dup
178
178
  y = y[rand_ids].dup
179
179
  end
@@ -69,7 +69,7 @@ module Rumale
69
69
  n_components = [1, [@params[:n_components], n_samples].min].max
70
70
 
71
71
  # random sampling.
72
- @component_indices = Numo::Int32.cast([*0...n_samples].shuffle(random: sub_rng)[0...n_components])
72
+ @component_indices = Numo::Int32.cast(Array(0...n_samples).shuffle(random: sub_rng)[0...n_components])
73
73
  @components = x[@component_indices, true]
74
74
 
75
75
  # calculate normalizing factor.
@@ -172,7 +172,7 @@ module Rumale
172
172
  # Start optimization.
173
173
  @params[:max_iter].times do |t|
174
174
  # random sampling
175
- rand_ids = [*0...n_training_samples].shuffle(random: sub_rng) if rand_ids.empty?
175
+ rand_ids = Array(0...n_training_samples).shuffle(random: sub_rng) if rand_ids.empty?
176
176
  target_id = rand_ids.shift
177
177
  # update the weight vector
178
178
  func = (weight_vec * bin_y).dot(x[target_id, true].transpose).to_f
@@ -56,7 +56,7 @@ module Rumale
56
56
  samples = @params[:fit_bias] ? expand_feature(x) : x
57
57
  # Initialize some variables.
58
58
  n_samples, n_features = samples.shape
59
- rand_ids = [*0...n_samples].shuffle(random: @rng.dup)
59
+ rand_ids = Array(0...n_samples).shuffle(random: @rng.dup)
60
60
  weight = Numo::DFloat.zeros(n_features)
61
61
  optimizer = @params[:optimizer].dup
62
62
  # Optimization.
@@ -209,7 +209,7 @@ module Rumale
209
209
  l1_penalty = LinearModel::Penalty::L1Penalty.new(reg_param: l1_reg_param) if apply_l1_penalty?
210
210
  # Optimization.
211
211
  @params[:max_iter].times do |t|
212
- sample_ids = [*0...n_samples]
212
+ sample_ids = Array(0...n_samples)
213
213
  sample_ids.shuffle!(random: sub_rng)
214
214
  until (subset_ids = sample_ids.shift(@params[:batch_size])).empty?
215
215
  # sampling
@@ -69,10 +69,11 @@ module Rumale
69
69
  # the return_train_score is false.
70
70
  def perform(x, y)
71
71
  x = check_convert_sample_array(x)
72
- if @estimator.is_a?(Rumale::Base::Classifier)
72
+ case @estimator
73
+ when Rumale::Base::Classifier
73
74
  y = check_convert_label_array(y)
74
75
  check_sample_label_size(x, y)
75
- elsif @estimator.is_a?(Rumale::Base::Regressor)
76
+ when Rumale::Base::Regressor
76
77
  y = check_convert_tvalue_array(y)
77
78
  check_sample_tvalue_size(x, y)
78
79
  else
@@ -62,7 +62,7 @@ module Rumale
62
62
  end
63
63
  sub_rng = @rng.dup
64
64
  # Splits dataset ids to each fold.
65
- dataset_ids = [*0...n_samples]
65
+ dataset_ids = Array(0...n_samples)
66
66
  dataset_ids.shuffle!(random: sub_rng) if @shuffle
67
67
  fold_sets = Array.new(@n_splits) do |n|
68
68
  n_fold_samples = n_samples / @n_splits
@@ -74,7 +74,7 @@ module Rumale
74
74
  end
75
75
  sub_rng = @rng.dup
76
76
  # Returns array consisting of the training and testing ids for each fold.
77
- dataset_ids = [*0...n_samples]
77
+ dataset_ids = Array(0...n_samples)
78
78
  Array.new(@n_splits) do
79
79
  test_ids = dataset_ids.sample(n_test_samples, random: sub_rng)
80
80
  train_ids = if @train_size.nil?
@@ -1,7 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'rumale/base/base_estimator.rb'
4
- require 'rumale/base/classifier.rb'
3
+ require 'rumale/base/base_estimator'
4
+ require 'rumale/base/classifier'
5
5
 
6
6
  module Rumale
7
7
  # This module consists of the classes that implement multi-class classification strategy.
@@ -30,7 +30,7 @@ module Rumale
30
30
  @params = {}
31
31
  @params[:min_samples_leaf] = min_samples_leaf
32
32
  @data = x
33
- @tree = build_tree(Numo::Int32.cast([*0...@data.shape[0]]))
33
+ @tree = build_tree(Numo::Int32.cast(Array(0...@data.shape[0])))
34
34
  end
35
35
 
36
36
  # Search k-nearest neighbors of given query point.
@@ -222,7 +222,7 @@ module Rumale
222
222
  n_samples = x.shape[0]
223
223
 
224
224
  @params[:max_iter].times do |t|
225
- sample_ids = [*0...n_samples]
225
+ sample_ids = Array(0...n_samples)
226
226
  sample_ids.shuffle!(random: srng)
227
227
  until (subset_ids = sample_ids.shift(@params[:batch_size])).empty?
228
228
  # random sampling
@@ -69,7 +69,7 @@ module Rumale
69
69
  factor_optimizers = Array.new(@params[:n_factors]) { @params[:optimizer].dup }
70
70
  # Start optimization.
71
71
  @params[:max_iter].times do |t|
72
- sample_ids = [*0...n_samples]
72
+ sample_ids = Array(0...n_samples)
73
73
  sample_ids.shuffle!(random: sub_rng)
74
74
  until (subset_ids = sample_ids.shift(@params[:batch_size])).empty?
75
75
  # Sampling.
@@ -0,0 +1,60 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/base_estimator'
4
+ require 'rumale/base/transformer'
5
+
6
+ module Rumale
7
+ module Preprocessing
8
+ # Binarize samples according to a threshold
9
+ #
10
+ # @example
11
+ # binarizer = Rumale::Preprocessing::Binarizer.new
12
+ # x = Numo::DFloat[[-1.2, 3.2], [2.4, -0.5], [4.5, 0.8]]
13
+ # b = binarizer.transform(x)
14
+ # p b
15
+ #
16
+ # # Numo::DFloat#shape=[3, 2]
17
+ # # [[0, 1],
18
+ # # [1, 0],
19
+ # # [1, 1]]
20
+ class Binarizer
21
+ include Base::BaseEstimator
22
+ include Base::Transformer
23
+
24
+ # Create a new transformer for binarization.
25
+ # @param threshold [Float] The threshold value for binarization.
26
+ def initialize(threshold: 0.0)
27
+ check_params_numeric(threshold: threshold)
28
+ @params = { threshold: threshold }
29
+ end
30
+
31
+ # This method does nothing and returns the object itself.
32
+ # For compatibility with other transformer, this method exists.
33
+ #
34
+ # @overload fit() -> Binarizer
35
+ #
36
+ # @return [Binarizer]
37
+ def fit(_x = nil, _y = nil)
38
+ self
39
+ end
40
+
41
+ # Binarize each sample.
42
+ #
43
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be binarized.
44
+ # @return [Numo::DFloat] The binarized samples.
45
+ def transform(x)
46
+ x = check_convert_sample_array(x)
47
+ x.class.cast(x.gt(@params[:threshold]))
48
+ end
49
+
50
+ # The output of this method is the same as that of the transform method.
51
+ # For compatibility with other transformer, this method exists.
52
+ #
53
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be binarized.
54
+ # @return [Numo::DFloat] The binarized samples.
55
+ def fit_transform(x, _y = nil)
56
+ fit(x).transform(x)
57
+ end
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,62 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/base_estimator'
4
+ require 'rumale/base/transformer'
5
+
6
+ module Rumale
7
+ module Preprocessing
8
+ # Normalize samples with the maximum of the absolute values.
9
+ #
10
+ # @example
11
+ # normalizer = Rumale::Preprocessing::MaxNormalizer.new
12
+ # new_samples = normalizer.fit_transform(samples)
13
+ class MaxNormalizer
14
+ include Base::BaseEstimator
15
+ include Base::Transformer
16
+
17
+ # Return the vector consists of the maximum norm for each sample.
18
+ # @return [Numo::DFloat] (shape: [n_samples])
19
+ attr_reader :norm_vec # :nodoc:
20
+
21
+ # Create a new normalizer for normaliing to max-norm.
22
+ def initialize
23
+ @params = {}
24
+ @norm_vec = nil
25
+ end
26
+
27
+ # Calculate the maximum norms of each sample.
28
+ #
29
+ # @overload fit(x) -> MaxNormalizer
30
+ #
31
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate the maximum norms.
32
+ # @return [MaxNormalizer]
33
+ def fit(x, _y = nil)
34
+ x = check_convert_sample_array(x)
35
+ @norm_vec = x.abs.max(1)
36
+ @norm_vec[@norm_vec.eq(0)] = 1
37
+ self
38
+ end
39
+
40
+ # Calculate the maximums norm of each sample, and then normalize samples with the norms.
41
+ #
42
+ # @overload fit_transform(x) -> Numo::DFloat
43
+ #
44
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate maximum norms.
45
+ # @return [Numo::DFloat] The normalized samples.
46
+ def fit_transform(x, _y = nil)
47
+ x = check_convert_sample_array(x)
48
+ fit(x)
49
+ x / @norm_vec.expand_dims(1)
50
+ end
51
+
52
+ # Calculate the maximum norms of each sample, and then normalize samples with the norms.
53
+ # This method calls the fit_transform method. This method exists for the Pipeline class.
54
+ #
55
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate maximum norms.
56
+ # @return [Numo::DFloat] The normalized samples.
57
+ def transform(x)
58
+ fit_transform(x)
59
+ end
60
+ end
61
+ end
62
+ end
@@ -3,5 +3,5 @@
3
3
  # Rumale is a machine learning library in Ruby.
4
4
  module Rumale
5
5
  # The version of Rumale you are using.
6
- VERSION = '0.19.2'
6
+ VERSION = '0.19.3'
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rumale
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.19.2
4
+ version: 0.19.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-06-20 00:00:00.000000000 Z
11
+ date: 2020-07-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: numo-narray
@@ -167,11 +167,13 @@ files:
167
167
  - lib/rumale/polynomial_model/factorization_machine_classifier.rb
168
168
  - lib/rumale/polynomial_model/factorization_machine_regressor.rb
169
169
  - lib/rumale/preprocessing/bin_discretizer.rb
170
+ - lib/rumale/preprocessing/binarizer.rb
170
171
  - lib/rumale/preprocessing/l1_normalizer.rb
171
172
  - lib/rumale/preprocessing/l2_normalizer.rb
172
173
  - lib/rumale/preprocessing/label_binarizer.rb
173
174
  - lib/rumale/preprocessing/label_encoder.rb
174
175
  - lib/rumale/preprocessing/max_abs_scaler.rb
176
+ - lib/rumale/preprocessing/max_normalizer.rb
175
177
  - lib/rumale/preprocessing/min_max_scaler.rb
176
178
  - lib/rumale/preprocessing/one_hot_encoder.rb
177
179
  - lib/rumale/preprocessing/ordinal_encoder.rb