RubyGems - rumale - Versions diffs - 0.19.2 → 0.19.3 - Mend

rumale 0.19.2 → 0.19.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

checksums.yaml +4 -4
data/.rubocop.yml +66 -1
data/CHANGELOG.md +5 -0
data/lib/rumale.rb +2 -0
data/lib/rumale/clustering/hdbscan.rb +1 -1
data/lib/rumale/clustering/k_means.rb +1 -1
data/lib/rumale/clustering/k_medoids.rb +1 -1
data/lib/rumale/clustering/mini_batch_k_means.rb +2 -2
data/lib/rumale/dataset.rb +3 -3
data/lib/rumale/kernel_approximation/nystroem.rb +1 -1
data/lib/rumale/kernel_machine/kernel_svc.rb +1 -1
data/lib/rumale/linear_model/base_linear_model.rb +1 -1
data/lib/rumale/linear_model/base_sgd.rb +1 -1
data/lib/rumale/model_selection/cross_validation.rb +3 -2
data/lib/rumale/model_selection/k_fold.rb +1 -1
data/lib/rumale/model_selection/shuffle_split.rb +1 -1
data/lib/rumale/multiclass/one_vs_rest_classifier.rb +2 -2
data/lib/rumale/nearest_neighbors/vp_tree.rb +1 -1
data/lib/rumale/neural_network/base_mlp.rb +1 -1
data/lib/rumale/polynomial_model/base_factorization_machine.rb +1 -1
data/lib/rumale/preprocessing/binarizer.rb +60 -0
data/lib/rumale/preprocessing/max_normalizer.rb +62 -0
data/lib/rumale/version.rb +1 -1
metadata +4 -2

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 1bff2e1e6182aa954be00ed107ed1bd81220298f89514b4b31304f8890ff27c4
-  data.tar.gz: '09b185f468baf9dbec6280fa6c06984c95919308f1d2247277bf30348ed392bc'
+  metadata.gz: dc3413c05ad7c365117adc4abbc304ff1851fa9a3ff69fef3c69e730d9a2b834
+  data.tar.gz: 8895cce8b350c4e245aabb5e3e4c4036655fa8d24a72f6481e0d2f8c9869fa54
 SHA512:
-  metadata.gz: 6d8f1fcaffcd6714c6156fc615d87e6b6950e82ab40fc7434cfc5a014d6c08eb0170ee7c45d8fed978c2a52f839b1ce647fd6e088cbab2ea45e517b34c88407a
-  data.tar.gz: b255ae4c24cdc91ebad59f79ee5a58c5d2a5ffa79bda0ac221e3a33bd824d2fd94e5cd83f3a06e54a2dc537a074276cea5a71651deeee2a304d23e963ff92c9d
+  metadata.gz: fe10c975f286a4c9ac155d29310d61d1f180cbcc909ec7bdba3925973b6b9857635befc9bf4938cf28a6ef50c8011894b7b15768735f6200c27ce912907e5fb1
+  data.tar.gz: 327cce25145c1ca3f5623f84b4163560bdbee8245009c3d8e1c3318f61dec94b58a7395ebc3538e7b04a9702af08b077e7f426c54f7c5d4fd3b0fcf11c4744cf

data/.rubocop.yml CHANGED

@@ -24,6 +24,15 @@ Style/HashTransformKeys:
 Style/HashTransformValues:
   Enabled: true
+Lint/DeprecatedOpenSSLConstant:
+  Enabled: true
+Lint/DuplicateElsifCondition:
+  Enabled: true
+Lint/MixedRegexpCaptureTypes:
+  Enabled: true
 Lint/RaiseException:
   Enabled: true
@@ -34,7 +43,6 @@ Layout/LineLength:
   Max: 145
   IgnoredPatterns: ['(\A|\s)#']
 Metrics/ModuleLength:
   Max: 200
@@ -70,15 +78,48 @@ Naming/MethodParameterName:
 Naming/ConstantName:
   Enabled: false
+Style/AccessorGrouping:
+  Enabled: true
+Style/ArrayCoercion:
+  Enabled: true
+Style/BisectedAttrAccessor:
+  Enabled: true
+Style/CaseLikeIf:
+  Enabled: true
 Style/ExponentialNotation:
   Enabled: true
 Style/FormatStringToken:
   Enabled: false
+Style/HashAsLastArrayItem:
+  Enabled: true
+Style/HashLikeCase:
+  Enabled: true
 Style/NumericLiterals:
   Enabled: false
+Style/RedundantAssignment:
+  Enabled: true
+Style/RedundantFetchBlock:
+  Enabled: true
+Style/RedundantFileExtensionInRequire:
+  Enabled: true
+Style/RedundantRegexpCharacterClass:
+  Enabled: true
+Style/RedundantRegexpEscape:
+  Enabled: true
 Style/SlicingWithRange:
   Enabled: true
@@ -91,6 +132,30 @@ Layout/EmptyLinesAroundAttributeAccessor:
 Layout/SpaceAroundMethodCallOperator:
   Enabled: true
+Performance/AncestorsInclude:
+  Enabled: true
+Performance/BigDecimalWithNumericArgument:
+  Enabled: true
+Performance/RedundantSortBlock:
+  Enabled: true
+Performance/RedundantStringChars:
+  Enabled: true
+Performance/ReverseFirst:
+  Enabled: true
+Performance/SortReverse:
+  Enabled: true
+Performance/Squeeze:
+  Enabled: true
+Performance/StringInclude:
+  Enabled: true
 RSpec/MultipleExpectations:
   Enabled: false

data/CHANGELOG.md CHANGED

@@ -1,3 +1,8 @@
+# 0.19.3
+- Add preprocessing class for [Binarizer](https://yoshoku.github.io/rumale/doc/Rumale/Preprocessing/Binarizer.html)
+- Add preprocessing class for [MaxNormalizer](https://yoshoku.github.io/rumale/doc/Rumale/Preprocessing/MaxNormalizer.html)
+- Refactor some codes with Rubocop.
 # 0.19.2
 - Fix L2Normalizer to avoid zero divide.
 - Add preprocssing class for [L1Normalizer](https://yoshoku.github.io/rumale/doc/Rumale/Preprocessing/L1Normalizer.html).

data/lib/rumale.rb CHANGED

@@ -96,6 +96,7 @@ require 'rumale/feature_extraction/feature_hasher'
 require 'rumale/feature_extraction/tfidf_transformer'
 require 'rumale/preprocessing/l2_normalizer'
 require 'rumale/preprocessing/l1_normalizer'
+require 'rumale/preprocessing/max_normalizer'
 require 'rumale/preprocessing/min_max_scaler'
 require 'rumale/preprocessing/max_abs_scaler'
 require 'rumale/preprocessing/standard_scaler'
@@ -104,6 +105,7 @@ require 'rumale/preprocessing/label_binarizer'
 require 'rumale/preprocessing/label_encoder'
 require 'rumale/preprocessing/one_hot_encoder'
 require 'rumale/preprocessing/ordinal_encoder'
+require 'rumale/preprocessing/binarizer'
 require 'rumale/preprocessing/polynomial_features'
 require 'rumale/model_selection/k_fold'
 require 'rumale/model_selection/stratified_k_fold'

data/lib/rumale/clustering/hdbscan.rb CHANGED

@@ -232,7 +232,7 @@ module Rumale
       end
       def flatten(tree, stabilities)
-        node_ids = stabilities.keys.sort { |a, b| b <=> a }.slice(0, stabilities.size - 1)
+        node_ids = stabilities.keys.sort.reverse.slice(0, stabilities.size - 1)
         cluster_tree = tree.select { |edge| edge.n_elements > 1 }
         is_cluster = node_ids.each_with_object({}) { |n_id, h| h[n_id] = true }

data/lib/rumale/clustering/k_means.rb CHANGED

@@ -103,7 +103,7 @@ module Rumale
         # random initialize
         n_samples = x.shape[0]
         sub_rng = @rng.dup
-        rand_id = [*0...n_samples].sample(@params[:n_clusters], random: sub_rng)
+        rand_id = Array(0...n_samples).sample(@params[:n_clusters], random: sub_rng)
         @cluster_centers = x[rand_id, true].dup
         return unless @params[:init] == 'k-means++'

data/lib/rumale/clustering/k_medoids.rb CHANGED

@@ -124,7 +124,7 @@ module Rumale
         # random initialize
         n_samples = distance_mat.shape[0]
         sub_rng = @rng.dup
-        @medoid_ids = Numo::Int32.asarray([*0...n_samples].sample(@params[:n_clusters], random: sub_rng))
+        @medoid_ids = Numo::Int32.asarray(Array(0...n_samples).sample(@params[:n_clusters], random: sub_rng))
         return unless @params[:init] == 'k-means++'
         # k-means++ initialize

data/lib/rumale/clustering/mini_batch_k_means.rb CHANGED

@@ -67,7 +67,7 @@ module Rumale
         init_cluster_centers(x, sub_rng)
         # optimization with mini-batch sgd.
         @params[:max_iter].times do |_t|
-          sample_ids = [*0...n_samples].shuffle(random: sub_rng)
+          sample_ids = Array(0...n_samples).shuffle(random: sub_rng)
           old_centers = @cluster_centers.dup
           until (subset_ids = sample_ids.shift(@params[:batch_size])).empty?
             # sub sampling
@@ -120,7 +120,7 @@ module Rumale
       def init_cluster_centers(x, sub_rng)
         # random initialize
         n_samples = x.shape[0]
-        rand_id = [*0...n_samples].sample(@params[:n_clusters], random: sub_rng)
+        rand_id = Array(0...n_samples).sample(@params[:n_clusters], random: sub_rng)
         @cluster_centers = x[rand_id, true].dup
         return unless @params[:init] == 'k-means++'

data/lib/rumale/dataset.rb CHANGED

@@ -81,7 +81,7 @@ module Rumale
         y = Numo::Int32.hstack([Numo::Int32.zeros(n_samples_out), Numo::Int32.ones(n_samples_in)])
         # shuffle data indices.
         if shuffle
-          rand_ids = [*0...n_samples].shuffle(random: rng.dup)
+          rand_ids = Array(0...n_samples).shuffle(random: rng.dup)
           x = x[rand_ids, true].dup
           y = y[rand_ids].dup
         end
@@ -118,7 +118,7 @@ module Rumale
         y = Numo::Int32.hstack([Numo::Int32.zeros(n_samples_out), Numo::Int32.ones(n_samples_in)])
         # shuffle data indices.
         if shuffle
-          rand_ids = [*0...n_samples].shuffle(random: rng.dup)
+          rand_ids = Array(0...n_samples).shuffle(random: rng.dup)
           x = x[rand_ids, true].dup
           y = y[rand_ids].dup
         end
@@ -173,7 +173,7 @@ module Rumale
         end
         # shuffle data.
         if shuffle
-          rand_ids = [*0...n_samples].shuffle(random: rng.dup)
+          rand_ids = Array(0...n_samples).shuffle(random: rng.dup)
           x = x[rand_ids, true].dup
           y = y[rand_ids].dup
         end

data/lib/rumale/kernel_approximation/nystroem.rb CHANGED

@@ -69,7 +69,7 @@ module Rumale
         n_components = [1, [@params[:n_components], n_samples].min].max
         # random sampling.
-        @component_indices = Numo::Int32.cast([*0...n_samples].shuffle(random: sub_rng)[0...n_components])
+        @component_indices = Numo::Int32.cast(Array(0...n_samples).shuffle(random: sub_rng)[0...n_components])
         @components = x[@component_indices, true]
         # calculate normalizing factor.

data/lib/rumale/kernel_machine/kernel_svc.rb CHANGED

@@ -172,7 +172,7 @@ module Rumale
         # Start optimization.
         @params[:max_iter].times do |t|
           # random sampling
-          rand_ids = [*0...n_training_samples].shuffle(random: sub_rng) if rand_ids.empty?
+          rand_ids = Array(0...n_training_samples).shuffle(random: sub_rng) if rand_ids.empty?
           target_id = rand_ids.shift
           # update the weight vector
           func = (weight_vec * bin_y).dot(x[target_id, true].transpose).to_f

data/lib/rumale/linear_model/base_linear_model.rb CHANGED

@@ -56,7 +56,7 @@ module Rumale
         samples = @params[:fit_bias] ? expand_feature(x) : x
         # Initialize some variables.
         n_samples, n_features = samples.shape
-        rand_ids = [*0...n_samples].shuffle(random: @rng.dup)
+        rand_ids = Array(0...n_samples).shuffle(random: @rng.dup)
         weight = Numo::DFloat.zeros(n_features)
         optimizer = @params[:optimizer].dup
         # Optimization.

data/lib/rumale/linear_model/base_sgd.rb CHANGED

@@ -209,7 +209,7 @@ module Rumale
         l1_penalty = LinearModel::Penalty::L1Penalty.new(reg_param: l1_reg_param) if apply_l1_penalty?
         # Optimization.
         @params[:max_iter].times do |t|
-          sample_ids = [*0...n_samples]
+          sample_ids = Array(0...n_samples)
           sample_ids.shuffle!(random: sub_rng)
           until (subset_ids = sample_ids.shift(@params[:batch_size])).empty?
             # sampling

data/lib/rumale/model_selection/cross_validation.rb CHANGED

@@ -69,10 +69,11 @@ module Rumale
       #     the return_train_score is false.
       def perform(x, y)
         x = check_convert_sample_array(x)
-        if @estimator.is_a?(Rumale::Base::Classifier)
+        case @estimator
+        when Rumale::Base::Classifier
           y = check_convert_label_array(y)
           check_sample_label_size(x, y)
-        elsif @estimator.is_a?(Rumale::Base::Regressor)
+        when Rumale::Base::Regressor
           y = check_convert_tvalue_array(y)
           check_sample_tvalue_size(x, y)
         else

data/lib/rumale/model_selection/k_fold.rb CHANGED

@@ -62,7 +62,7 @@ module Rumale
         end
         sub_rng = @rng.dup
         # Splits dataset ids to each fold.
-        dataset_ids = [*0...n_samples]
+        dataset_ids = Array(0...n_samples)
         dataset_ids.shuffle!(random: sub_rng) if @shuffle
         fold_sets = Array.new(@n_splits) do |n|
           n_fold_samples = n_samples / @n_splits

data/lib/rumale/model_selection/shuffle_split.rb CHANGED

@@ -74,7 +74,7 @@ module Rumale
         end
         sub_rng = @rng.dup
         # Returns array consisting of the training and testing ids for each fold.
-        dataset_ids = [*0...n_samples]
+        dataset_ids = Array(0...n_samples)
         Array.new(@n_splits) do
           test_ids = dataset_ids.sample(n_test_samples, random: sub_rng)
           train_ids = if @train_size.nil?

data/lib/rumale/multiclass/one_vs_rest_classifier.rb CHANGED

@@ -1,7 +1,7 @@
 # frozen_string_literal: true
-require 'rumale/base/base_estimator.rb'
-require 'rumale/base/classifier.rb'
+require 'rumale/base/base_estimator'
+require 'rumale/base/classifier'
 module Rumale
   # This module consists of the classes that implement multi-class classification strategy.

data/lib/rumale/nearest_neighbors/vp_tree.rb CHANGED

@@ -30,7 +30,7 @@ module Rumale
         @params = {}
         @params[:min_samples_leaf] = min_samples_leaf
         @data = x
-        @tree = build_tree(Numo::Int32.cast([*0...@data.shape[0]]))
+        @tree = build_tree(Numo::Int32.cast(Array(0...@data.shape[0])))
       end
       # Search k-nearest neighbors of given query point.

data/lib/rumale/neural_network/base_mlp.rb CHANGED

@@ -222,7 +222,7 @@ module Rumale
         n_samples = x.shape[0]
         @params[:max_iter].times do |t|
-          sample_ids = [*0...n_samples]
+          sample_ids = Array(0...n_samples)
           sample_ids.shuffle!(random: srng)
           until (subset_ids = sample_ids.shift(@params[:batch_size])).empty?
             # random sampling

data/lib/rumale/polynomial_model/base_factorization_machine.rb CHANGED

@@ -69,7 +69,7 @@ module Rumale
         factor_optimizers = Array.new(@params[:n_factors]) { @params[:optimizer].dup }
         # Start optimization.
         @params[:max_iter].times do |t|
-          sample_ids = [*0...n_samples]
+          sample_ids = Array(0...n_samples)
           sample_ids.shuffle!(random: sub_rng)
           until (subset_ids = sample_ids.shift(@params[:batch_size])).empty?
             # Sampling.

data/lib/rumale/preprocessing/binarizer.rb ADDED

@@ -0,0 +1,60 @@
+# frozen_string_literal: true
+require 'rumale/base/base_estimator'
+require 'rumale/base/transformer'
+module Rumale
+  module Preprocessing
+    # Binarize samples according to a threshold
+    #
+    # @example
+    #   binarizer = Rumale::Preprocessing::Binarizer.new
+    #   x = Numo::DFloat[[-1.2, 3.2], [2.4, -0.5], [4.5, 0.8]]
+    #   b = binarizer.transform(x)
+    #   p b
+    #
+    #   # Numo::DFloat#shape=[3, 2]
+    #   # [[0, 1],
+    #   #  [1, 0],
+    #   #  [1, 1]]
+    class Binarizer
+      include Base::BaseEstimator
+      include Base::Transformer
+      # Create a new transformer for binarization.
+      # @param threshold [Float] The threshold value for binarization.
+      def initialize(threshold: 0.0)
+        check_params_numeric(threshold: threshold)
+        @params = { threshold: threshold }
+      end
+      # This method does nothing and returns the object itself.
+      # For compatibility with other transformer, this method exists.
+      #
+      # @overload fit() -> Binarizer
+      #
+      # @return [Binarizer]
+      def fit(_x = nil, _y = nil)
+        self
+      end
+      # Binarize each sample.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be binarized.
+      # @return [Numo::DFloat] The binarized samples.
+      def transform(x)
+        x = check_convert_sample_array(x)
+        x.class.cast(x.gt(@params[:threshold]))
+      end
+      # The output of this method is the same as that of the transform method.
+      # For compatibility with other transformer, this method exists.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be binarized.
+      # @return [Numo::DFloat] The binarized samples.
+      def fit_transform(x, _y = nil)
+        fit(x).transform(x)
+      end
+    end
+  end
+end

data/lib/rumale/preprocessing/max_normalizer.rb ADDED

@@ -0,0 +1,62 @@
+# frozen_string_literal: true
+require 'rumale/base/base_estimator'
+require 'rumale/base/transformer'
+module Rumale
+  module Preprocessing
+    # Normalize samples with the maximum of the absolute values.
+    #
+    # @example
+    #   normalizer = Rumale::Preprocessing::MaxNormalizer.new
+    #   new_samples = normalizer.fit_transform(samples)
+    class MaxNormalizer
+      include Base::BaseEstimator
+      include Base::Transformer
+      # Return the vector consists of the maximum norm for each sample.
+      # @return [Numo::DFloat] (shape: [n_samples])
+      attr_reader :norm_vec # :nodoc:
+      # Create a new normalizer for normaliing to max-norm.
+      def initialize
+        @params = {}
+        @norm_vec = nil
+      end
+      # Calculate the maximum norms of each sample.
+      #
+      # @overload fit(x) -> MaxNormalizer
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate the maximum norms.
+      # @return [MaxNormalizer]
+      def fit(x, _y = nil)
+        x = check_convert_sample_array(x)
+        @norm_vec = x.abs.max(1)
+        @norm_vec[@norm_vec.eq(0)] = 1
+        self
+      end
+      # Calculate the maximums norm of each sample, and then normalize samples with the norms.
+      #
+      # @overload fit_transform(x) -> Numo::DFloat
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate maximum norms.
+      # @return [Numo::DFloat] The normalized samples.
+      def fit_transform(x, _y = nil)
+        x = check_convert_sample_array(x)
+        fit(x)
+        x / @norm_vec.expand_dims(1)
+      end
+      # Calculate the maximum norms of each sample, and then normalize samples with the norms.
+      # This method calls the fit_transform method. This method exists for the Pipeline class.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate maximum norms.
+      # @return [Numo::DFloat] The normalized samples.
+      def transform(x)
+        fit_transform(x)
+      end
+    end
+  end
+end

data/lib/rumale/version.rb CHANGED

@@ -3,5 +3,5 @@
 # Rumale is a machine learning library in Ruby.
 module Rumale
   # The version of Rumale you are using.
-  VERSION = '0.19.2'
+  VERSION = '0.19.3'
 end

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: rumale
 version: !ruby/object:Gem::Version
-  version: 0.19.2
+  version: 0.19.3
 platform: ruby
 authors:
 - yoshoku
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2020-06-20 00:00:00.000000000 Z
+date: 2020-07-23 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: numo-narray
@@ -167,11 +167,13 @@ files:
 - lib/rumale/polynomial_model/factorization_machine_classifier.rb
 - lib/rumale/polynomial_model/factorization_machine_regressor.rb
 - lib/rumale/preprocessing/bin_discretizer.rb
+- lib/rumale/preprocessing/binarizer.rb
 - lib/rumale/preprocessing/l1_normalizer.rb
 - lib/rumale/preprocessing/l2_normalizer.rb
 - lib/rumale/preprocessing/label_binarizer.rb
 - lib/rumale/preprocessing/label_encoder.rb
 - lib/rumale/preprocessing/max_abs_scaler.rb
+- lib/rumale/preprocessing/max_normalizer.rb
 - lib/rumale/preprocessing/min_max_scaler.rb
 - lib/rumale/preprocessing/one_hot_encoder.rb
 - lib/rumale/preprocessing/ordinal_encoder.rb