RubyGems - rumale - Versions diffs - 0.18.7 → 0.20.0 - Mend

rumale 0.18.7 → 0.20.0

Files changed (42) hide show

checksums.yaml +4 -4
data/.rubocop.yml +66 -1
data/CHANGELOG.md +46 -0
data/Gemfile +2 -0
data/README.md +5 -36
data/lib/rumale.rb +5 -10
data/lib/rumale/clustering/hdbscan.rb +1 -1
data/lib/rumale/clustering/k_means.rb +1 -1
data/lib/rumale/clustering/k_medoids.rb +1 -1
data/lib/rumale/clustering/mini_batch_k_means.rb +139 -0
data/lib/rumale/dataset.rb +3 -3
data/lib/rumale/decomposition/pca.rb +23 -5
data/lib/rumale/feature_extraction/feature_hasher.rb +14 -1
data/lib/rumale/feature_extraction/tfidf_transformer.rb +113 -0
data/lib/rumale/kernel_approximation/nystroem.rb +1 -1
data/lib/rumale/kernel_machine/kernel_svc.rb +1 -1
data/lib/rumale/linear_model/base_sgd.rb +1 -1
data/lib/rumale/metric_learning/neighbourhood_component_analysis.rb +13 -1
data/lib/rumale/model_selection/cross_validation.rb +3 -2
data/lib/rumale/model_selection/k_fold.rb +1 -1
data/lib/rumale/model_selection/shuffle_split.rb +1 -1
data/lib/rumale/multiclass/one_vs_rest_classifier.rb +2 -2
data/lib/rumale/nearest_neighbors/vp_tree.rb +1 -1
data/lib/rumale/neural_network/adam.rb +1 -1
data/lib/rumale/neural_network/base_mlp.rb +1 -1
data/lib/rumale/preprocessing/binarizer.rb +60 -0
data/lib/rumale/preprocessing/l1_normalizer.rb +62 -0
data/lib/rumale/preprocessing/l2_normalizer.rb +2 -1
data/lib/rumale/preprocessing/max_normalizer.rb +62 -0
data/lib/rumale/version.rb +1 -1
data/rumale.gemspec +1 -3
metadata +11 -44
data/lib/rumale/linear_model/base_linear_model.rb +0 -101
data/lib/rumale/optimizer/ada_grad.rb +0 -39
data/lib/rumale/optimizer/adam.rb +0 -53
data/lib/rumale/optimizer/nadam.rb +0 -62
data/lib/rumale/optimizer/rmsprop.rb +0 -47
data/lib/rumale/optimizer/sgd.rb +0 -43
data/lib/rumale/optimizer/yellow_fin.rb +0 -101
data/lib/rumale/polynomial_model/base_factorization_machine.rb +0 -121
data/lib/rumale/polynomial_model/factorization_machine_classifier.rb +0 -215
data/lib/rumale/polynomial_model/factorization_machine_regressor.rb +0 -129

data/lib/rumale/preprocessing/l1_normalizer.rb ADDED

@@ -0,0 +1,62 @@
+# frozen_string_literal: true
+require 'rumale/base/base_estimator'
+require 'rumale/base/transformer'
+module Rumale
+  module Preprocessing
+    # Normalize samples to unit L1-norm.
+    #
+    # @example
+    #   normalizer = Rumale::Preprocessing::L1Normalizer.new
+    #   new_samples = normalizer.fit_transform(samples)
+    class L1Normalizer
+      include Base::BaseEstimator
+      include Base::Transformer
+      # Return the vector consists of L1-norm for each sample.
+      # @return [Numo::DFloat] (shape: [n_samples])
+      attr_reader :norm_vec # :nodoc:
+      # Create a new normalizer for normaliing to L1-norm.
+      def initialize
+        @params = {}
+        @norm_vec = nil
+      end
+      # Calculate L1-norms of each sample.
+      #
+      # @overload fit(x) -> L1Normalizer
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate L1-norms.
+      # @return [L1Normalizer]
+      def fit(x, _y = nil)
+        x = check_convert_sample_array(x)
+        @norm_vec = x.abs.sum(1)
+        @norm_vec[@norm_vec.eq(0)] = 1
+        self
+      end
+      # Calculate L1-norms of each sample, and then normalize samples to L1-norm.
+      #
+      # @overload fit_transform(x) -> Numo::DFloat
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate L1-norms.
+      # @return [Numo::DFloat] The normalized samples.
+      def fit_transform(x, _y = nil)
+        x = check_convert_sample_array(x)
+        fit(x)
+        x / @norm_vec.expand_dims(1)
+      end
+      # Calculate L1-norms of each sample, and then normalize samples to L1-norm.
+      # This method calls the fit_transform method. This method exists for the Pipeline class.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate L1-norms.
+      # @return [Numo::DFloat] The normalized samples.
+      def transform(x)
+        fit_transform(x)
+      end
+    end
+  end
+end

data/lib/rumale/preprocessing/l2_normalizer.rb CHANGED

@@ -34,6 +34,7 @@ module Rumale
       def fit(x, _y = nil)
         x = check_convert_sample_array(x)
         @norm_vec = Numo::NMath.sqrt((x**2).sum(1))
+        @norm_vec[@norm_vec.eq(0)] = 1
         self
       end
@@ -46,7 +47,7 @@ module Rumale
       def fit_transform(x, _y = nil)
         x = check_convert_sample_array(x)
         fit(x)
-        x / @norm_vec.tile(x.shape[1], 1).transpose
+        x / @norm_vec.expand_dims(1)
       end
       # Calculate L2-norms of each sample, and then normalize samples to unit L2-norm.

data/lib/rumale/preprocessing/max_normalizer.rb ADDED

@@ -0,0 +1,62 @@
+# frozen_string_literal: true
+require 'rumale/base/base_estimator'
+require 'rumale/base/transformer'
+module Rumale
+  module Preprocessing
+    # Normalize samples with the maximum of the absolute values.
+    #
+    # @example
+    #   normalizer = Rumale::Preprocessing::MaxNormalizer.new
+    #   new_samples = normalizer.fit_transform(samples)
+    class MaxNormalizer
+      include Base::BaseEstimator
+      include Base::Transformer
+      # Return the vector consists of the maximum norm for each sample.
+      # @return [Numo::DFloat] (shape: [n_samples])
+      attr_reader :norm_vec # :nodoc:
+      # Create a new normalizer for normaliing to max-norm.
+      def initialize
+        @params = {}
+        @norm_vec = nil
+      end
+      # Calculate the maximum norms of each sample.
+      #
+      # @overload fit(x) -> MaxNormalizer
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate the maximum norms.
+      # @return [MaxNormalizer]
+      def fit(x, _y = nil)
+        x = check_convert_sample_array(x)
+        @norm_vec = x.abs.max(1)
+        @norm_vec[@norm_vec.eq(0)] = 1
+        self
+      end
+      # Calculate the maximums norm of each sample, and then normalize samples with the norms.
+      #
+      # @overload fit_transform(x) -> Numo::DFloat
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate maximum norms.
+      # @return [Numo::DFloat] The normalized samples.
+      def fit_transform(x, _y = nil)
+        x = check_convert_sample_array(x)
+        fit(x)
+        x / @norm_vec.expand_dims(1)
+      end
+      # Calculate the maximum norms of each sample, and then normalize samples with the norms.
+      # This method calls the fit_transform method. This method exists for the Pipeline class.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate maximum norms.
+      # @return [Numo::DFloat] The normalized samples.
+      def transform(x)
+        fit_transform(x)
+      end
+    end
+  end
+end

data/lib/rumale/version.rb CHANGED

@@ -3,5 +3,5 @@
 # Rumale is a machine learning library in Ruby.
 module Rumale
   # The version of Rumale you are using.
-  VERSION = '0.18.7'
+  VERSION = '0.20.0'
 end

data/rumale.gemspec CHANGED

@@ -16,7 +16,7 @@ Gem::Specification.new do |spec|
     Rumale is a machine learning library in Ruby.
     Rumale provides machine learning algorithms with interfaces similar to Scikit-Learn in Python.
     Rumale supports Support Vector Machine,
-    Logistic Regression, Ridge, Lasso, Factorization Machine,
+    Logistic Regression, Ridge, Lasso,
     Multi-layer Perceptron,
     Naive Bayes, Decision Tree, Gradient Tree Boosting, Random Forest,
     K-Means, Gaussian Mixture Model, DBSCAN, Spectral Clustering,
@@ -45,6 +45,4 @@ Gem::Specification.new do |spec|
   }
   spec.add_runtime_dependency 'numo-narray', '>= 0.9.1'
-  spec.add_runtime_dependency 'mopti', '>= 0.1.0'
-  spec.add_runtime_dependency 'mmh3', '>= 0.1.0'
 end

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: rumale
 version: !ruby/object:Gem::Version
-  version: 0.18.7
+  version: 0.20.0
 platform: ruby
 authors:
 - yoshoku
-autorequire:
+autorequire:
 bindir: exe
 cert_chain: []
-date: 2020-05-16 00:00:00.000000000 Z
+date: 2020-08-01 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: numo-narray
@@ -24,39 +24,11 @@ dependencies:
     - - ">="
       - !ruby/object:Gem::Version
         version: 0.9.1
-- !ruby/object:Gem::Dependency
-  name: mopti
-  requirement: !ruby/object:Gem::Requirement
-    requirements:
-    - - ">="
-      - !ruby/object:Gem::Version
-        version: 0.1.0
-  type: :runtime
-  prerelease: false
-  version_requirements: !ruby/object:Gem::Requirement
-    requirements:
-    - - ">="
-      - !ruby/object:Gem::Version
-        version: 0.1.0
-- !ruby/object:Gem::Dependency
-  name: mmh3
-  requirement: !ruby/object:Gem::Requirement
-    requirements:
-    - - ">="
-      - !ruby/object:Gem::Version
-        version: 0.1.0
-  type: :runtime
-  prerelease: false
-  version_requirements: !ruby/object:Gem::Requirement
-    requirements:
-    - - ">="
-      - !ruby/object:Gem::Version
-        version: 0.1.0
 description: |
   Rumale is a machine learning library in Ruby.
   Rumale provides machine learning algorithms with interfaces similar to Scikit-Learn in Python.
   Rumale supports Support Vector Machine,
-  Logistic Regression, Ridge, Lasso, Factorization Machine,
+  Logistic Regression, Ridge, Lasso,
   Multi-layer Perceptron,
   Naive Bayes, Decision Tree, Gradient Tree Boosting, Random Forest,
   K-Means, Gaussian Mixture Model, DBSCAN, Spectral Clustering,
@@ -100,6 +72,7 @@ files:
 - lib/rumale/clustering/hdbscan.rb
 - lib/rumale/clustering/k_means.rb
 - lib/rumale/clustering/k_medoids.rb
+- lib/rumale/clustering/mini_batch_k_means.rb
 - lib/rumale/clustering/power_iteration.rb
 - lib/rumale/clustering/single_linkage.rb
 - lib/rumale/clustering/snn.rb
@@ -140,13 +113,13 @@ files:
 - lib/rumale/evaluation_measure/silhouette_score.rb
 - lib/rumale/feature_extraction/feature_hasher.rb
 - lib/rumale/feature_extraction/hash_vectorizer.rb
+- lib/rumale/feature_extraction/tfidf_transformer.rb
 - lib/rumale/kernel_approximation/nystroem.rb
 - lib/rumale/kernel_approximation/rbf.rb
 - lib/rumale/kernel_machine/kernel_fda.rb
 - lib/rumale/kernel_machine/kernel_pca.rb
 - lib/rumale/kernel_machine/kernel_ridge.rb
 - lib/rumale/kernel_machine/kernel_svc.rb
-- lib/rumale/linear_model/base_linear_model.rb
 - lib/rumale/linear_model/base_sgd.rb
 - lib/rumale/linear_model/elastic_net.rb
 - lib/rumale/linear_model/lasso.rb
@@ -180,23 +153,17 @@ files:
 - lib/rumale/neural_network/base_mlp.rb
 - lib/rumale/neural_network/mlp_classifier.rb
 - lib/rumale/neural_network/mlp_regressor.rb
-- lib/rumale/optimizer/ada_grad.rb
-- lib/rumale/optimizer/adam.rb
-- lib/rumale/optimizer/nadam.rb
-- lib/rumale/optimizer/rmsprop.rb
-- lib/rumale/optimizer/sgd.rb
-- lib/rumale/optimizer/yellow_fin.rb
 - lib/rumale/pairwise_metric.rb
 - lib/rumale/pipeline/feature_union.rb
 - lib/rumale/pipeline/pipeline.rb
-- lib/rumale/polynomial_model/base_factorization_machine.rb
-- lib/rumale/polynomial_model/factorization_machine_classifier.rb
-- lib/rumale/polynomial_model/factorization_machine_regressor.rb
 - lib/rumale/preprocessing/bin_discretizer.rb
+- lib/rumale/preprocessing/binarizer.rb
+- lib/rumale/preprocessing/l1_normalizer.rb
 - lib/rumale/preprocessing/l2_normalizer.rb
 - lib/rumale/preprocessing/label_binarizer.rb
 - lib/rumale/preprocessing/label_encoder.rb
 - lib/rumale/preprocessing/max_abs_scaler.rb
+- lib/rumale/preprocessing/max_normalizer.rb
 - lib/rumale/preprocessing/min_max_scaler.rb
 - lib/rumale/preprocessing/one_hot_encoder.rb
 - lib/rumale/preprocessing/ordinal_encoder.rb
@@ -224,7 +191,7 @@ metadata:
   source_code_uri: https://github.com/yoshoku/rumale
   documentation_uri: https://yoshoku.github.io/rumale/doc/
   bug_tracker_uri: https://github.com/yoshoku/rumale/issues
-post_install_message:
+post_install_message:
 rdoc_options: []
 require_paths:
 - lib
@@ -240,7 +207,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
       version: '0'
 requirements: []
 rubygems_version: 3.1.2
-signing_key:
+signing_key:
 specification_version: 4
 summary: Rumale is a machine learning library in Ruby. Rumale provides machine learning
   algorithms with interfaces similar to Scikit-Learn in Python.

data/lib/rumale/linear_model/base_linear_model.rb DELETED

@@ -1,101 +0,0 @@
-# frozen_string_literal: true
-require 'rumale/base/base_estimator'
-require 'rumale/optimizer/nadam'
-module Rumale
-  module LinearModel
-    # @note
-    #   In version 0.17.0, a new linear model abstract class called BaseSGD is introduced.
-    #   BaseLienarModel is deprecated and will be removed in the future.
-    #
-    # BaseLinearModel is an abstract class for implementation of linear estimator
-    # with mini-batch stochastic gradient descent optimization.
-    # This class is used for internal process.
-    class BaseLinearModel
-      # :nocov:
-      include Base::BaseEstimator
-      # Initialize a linear estimator.
-      #
-      # @param reg_param [Float] The regularization parameter.
-      # @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
-      # @param bias_scale [Float] The scale of the bias term.
-      # @param max_iter [Integer] The maximum number of iterations.
-      # @param batch_size [Integer] The size of the mini batches.
-      # @param optimizer [Optimizer] The optimizer to calculate adaptive learning rate.
-      #   If nil is given, Nadam is used.
-      # @param n_jobs [Integer] The number of jobs for running the fit and predict methods in parallel.
-      #   If nil is given, the methods do not execute in parallel.
-      #   If zero or less is given, it becomes equal to the number of processors.
-      # @param random_seed [Integer] The seed value using to initialize the random generator.
-      def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0,
-                     max_iter: 1000, batch_size: 10, optimizer: nil, n_jobs: nil, random_seed: nil)
-        warn 'warning: BaseLinearModel is deprecated. Use BaseSGD instead.'
-        @params = {}
-        @params[:reg_param] = reg_param
-        @params[:fit_bias] = fit_bias
-        @params[:bias_scale] = bias_scale
-        @params[:max_iter] = max_iter
-        @params[:batch_size] = batch_size
-        @params[:optimizer] = optimizer
-        @params[:optimizer] ||= Rumale::Optimizer::Nadam.new
-        @params[:n_jobs] = n_jobs
-        @params[:random_seed] = random_seed
-        @params[:random_seed] ||= srand
-        @weight_vec = nil
-        @bias_term = nil
-        @rng = Random.new(@params[:random_seed])
-      end
-      private
-      def partial_fit(x, y)
-        # Expand feature vectors for bias term.
-        samples = @params[:fit_bias] ? expand_feature(x) : x
-        # Initialize some variables.
-        n_samples, n_features = samples.shape
-        rand_ids = [*0...n_samples].shuffle(random: @rng.dup)
-        weight = Numo::DFloat.zeros(n_features)
-        optimizer = @params[:optimizer].dup
-        # Optimization.
-        @params[:max_iter].times do |_t|
-          # Random sampling
-          subset_ids = rand_ids.shift(@params[:batch_size])
-          rand_ids.concat(subset_ids)
-          sub_samples = samples[subset_ids, true]
-          sub_targets = y[subset_ids]
-          # Update weight.
-          loss_gradient = calc_loss_gradient(sub_samples, sub_targets, weight)
-          next if loss_gradient.ne(0.0).count.zero?
-          weight = calc_new_weight(optimizer, sub_samples, weight, loss_gradient)
-        end
-        split_weight(weight)
-      end
-      def calc_loss_gradient(_x, _y, _weight)
-        raise NotImplementedError, "#{__method__} has to be implemented in #{self.class}."
-      end
-      def calc_new_weight(optimizer, x, weight, loss_gradient)
-        weight_gradient = x.transpose.dot(loss_gradient) / @params[:batch_size] + @params[:reg_param] * weight
-        optimizer.call(weight, weight_gradient)
-      end
-      def expand_feature(x)
-        n_samples = x.shape[0]
-        Numo::NArray.hstack([x, Numo::DFloat.ones([n_samples, 1]) * @params[:bias_scale]])
-      end
-      def split_weight(weight)
-        if @params[:fit_bias]
-          [weight[0...-1].dup, weight[-1]]
-        else
-          [weight, 0.0]
-        end
-      end
-      # :nocov:
-    end
-  end
-end

data/lib/rumale/optimizer/ada_grad.rb DELETED

@@ -1,39 +0,0 @@
-# frozen_string_literal: true
-require 'rumale/validation'
-require 'rumale/base/base_estimator'
-module Rumale
-  module Optimizer
-    # AdaGrad is a class that implements AdaGrad optimizer.
-    #
-    # *Reference*
-    # - Duchi, J., Hazan, E., and Singer, Y., "Adaptive Subgradient Methods for Online Learning and Stochastic Optimization," J. Machine Learning Research, vol. 12, pp. 2121--2159, 2011.
-    class AdaGrad
-      include Base::BaseEstimator
-      include Validation
-      # Create a new optimizer with AdaGrad.
-      #
-      # @param learning_rate [Float] The initial value of learning rate.
-      def initialize(learning_rate: 0.01)
-        check_params_numeric(learning_rate: learning_rate)
-        check_params_positive(learning_rate: learning_rate)
-        @params = {}
-        @params[:learning_rate] = learning_rate
-        @moment = nil
-      end
-      # Calculate the updated weight with AdaGrad adaptive learning rate.
-      #
-      # @param weight [Numo::DFloat] (shape: [n_features]) The weight to be updated.
-      # @param gradient [Numo::DFloat] (shape: [n_features]) The gradient for updating the weight.
-      # @return [Numo::DFloat] (shape: [n_feautres]) The updated weight.
-      def call(weight, gradient)
-        @moment ||= Numo::DFloat.zeros(weight.shape[0])
-        @moment += gradient**2
-        weight - (@params[:learning_rate] / (@moment**0.5 + 1.0e-8)) * gradient
-      end
-    end
-  end
-end

data/lib/rumale/optimizer/adam.rb DELETED

@@ -1,53 +0,0 @@
-# frozen_string_literal: true
-require 'rumale/validation'
-require 'rumale/base/base_estimator'
-module Rumale
-  module Optimizer
-    # Adam is a class that implements Adam optimizer.
-    #
-    # *Reference*
-    # - Kingma, D P., and Ba, J., "Adam: A Method for Stochastic Optimization," Proc. ICLR'15, 2015.
-    class Adam
-      include Base::BaseEstimator
-      include Validation
-      # Create a new optimizer with Adam
-      #
-      # @param learning_rate [Float] The initial value of learning rate.
-      # @param decay1 [Float] The smoothing parameter for the first moment.
-      # @param decay2 [Float] The smoothing parameter for the second moment.
-      def initialize(learning_rate: 0.001, decay1: 0.9, decay2: 0.999)
-        check_params_numeric(learning_rate: learning_rate, decay1: decay1, decay2: decay2)
-        check_params_positive(learning_rate: learning_rate, decay1: decay1, decay2: decay2)
-        @params = {}
-        @params[:learning_rate] = learning_rate
-        @params[:decay1] = decay1
-        @params[:decay2] = decay2
-        @fst_moment = nil
-        @sec_moment = nil
-        @iter = 0
-      end
-      # Calculate the updated weight with Nadam adaptive learning rate.
-      #
-      # @param weight [Numo::DFloat] (shape: [n_features]) The weight to be updated.
-      # @param gradient [Numo::DFloat] (shape: [n_features]) The gradient for updating the weight.
-      # @return [Numo::DFloat] (shape: [n_feautres]) The updated weight.
-      def call(weight, gradient)
-        @fst_moment ||= Numo::DFloat.zeros(weight.shape)
-        @sec_moment ||= Numo::DFloat.zeros(weight.shape)
-        @iter += 1
-        @fst_moment = @params[:decay1] * @fst_moment + (1.0 - @params[:decay1]) * gradient
-        @sec_moment = @params[:decay2] * @sec_moment + (1.0 - @params[:decay2]) * gradient**2
-        nm_fst_moment = @fst_moment / (1.0 - @params[:decay1]**@iter)
-        nm_sec_moment = @sec_moment / (1.0 - @params[:decay2]**@iter)
-        weight - @params[:learning_rate] * nm_fst_moment / (nm_sec_moment**0.5 + 1e-8)
-      end
-    end
-  end
-end