svmkit 0.1.3 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/HISTORY.md +4 -0
- data/README.md +3 -5
- data/lib/svmkit.rb +4 -6
- data/lib/svmkit/dataset.rb +90 -0
- data/lib/svmkit/kernel_approximation/rbf.rb +28 -35
- data/lib/svmkit/kernel_machine/kernel_svc.rb +27 -34
- data/lib/svmkit/linear_model/logistic_regression.rb +43 -35
- data/lib/svmkit/linear_model/{pegasos_svc.rb → svc.rb} +45 -39
- data/lib/svmkit/multiclass/one_vs_rest_classifier.rb +20 -31
- data/lib/svmkit/pairwise_metric.rb +20 -20
- data/lib/svmkit/preprocessing/l2_normalizer.rb +9 -12
- data/lib/svmkit/preprocessing/min_max_scaler.rb +17 -24
- data/lib/svmkit/preprocessing/standard_scaler.rb +16 -17
- data/lib/svmkit/version.rb +1 -1
- data/svmkit.gemspec +15 -3
- metadata +43 -9
- data/lib/svmkit/utils.rb +0 -24
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA1:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: 4a53bee5e11b90721544b873d144b149b38aafe1
         | 
| 4 | 
            +
              data.tar.gz: f1ded6552e6cbdd8af3c29c4d8d403d3c8a62128
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: c3e3073f1afd4470cc21e1241d1f3666bbfefcd871700f711cfe377bb04c490f2f3ff10bc4d8ef764e05e0015faf09aef114e45ad0affde35a18641b064ed389
         | 
| 7 | 
            +
              data.tar.gz: 90144eea5e5f848dffb1325cd4576f27dbf61e5032917493e4957f5acba96489cc2b556f88a2078f5f2d9b5d2842c32454e6e56c003bfd2e36f6d5263cefc4c6
         | 
    
        data/HISTORY.md
    CHANGED
    
    | @@ -1,3 +1,7 @@ | |
| 1 | 
            +
            # 0.2.0
         | 
| 2 | 
            +
            - Migrated the linear algebra library to Numo::NArray.
         | 
| 3 | 
            +
            - Added module for loading and saving libsvm format file.
         | 
| 4 | 
            +
             | 
| 1 5 | 
             
            # 0.1.3
         | 
| 2 6 | 
             
            - Added class for Kernel Support Vector Machine with Pegasos algorithm.
         | 
| 3 7 | 
             
            - Added module for calculating pairwise kernel fuctions and euclidean distances.
         | 
    
        data/README.md
    CHANGED
    
    | @@ -30,9 +30,8 @@ Training phase: | |
| 30 30 |  | 
| 31 31 | 
             
            ```ruby
         | 
| 32 32 | 
             
            require 'svmkit'
         | 
| 33 | 
            -
            require 'libsvmloader'
         | 
| 34 33 |  | 
| 35 | 
            -
            samples, labels =  | 
| 34 | 
            +
            samples, labels = SVMKit::Dataset.load_libsvm_file('pendigits')
         | 
| 36 35 |  | 
| 37 36 | 
             
            normalizer = SVMKit::Preprocessing::MinMaxScaler.new
         | 
| 38 37 | 
             
            normalized = normalizer.fit_transform(samples)
         | 
| @@ -41,7 +40,7 @@ transformer = SVMKit::KernelApproximation::RBF.new(gamma: 2.0, n_components: 102 | |
| 41 40 | 
             
            transformed = transformer.fit_transform(normalized)
         | 
| 42 41 |  | 
| 43 42 | 
             
            base_classifier =
         | 
| 44 | 
            -
              SVMKit::LinearModel:: | 
| 43 | 
            +
              SVMKit::LinearModel::SVC.new(reg_param: 1.0, max_iter: 1000, batch_size: 20, random_seed: 1)
         | 
| 45 44 | 
             
            classifier = SVMKit::Multiclass::OneVsRestClassifier.new(estimator: base_classifier)
         | 
| 46 45 | 
             
            classifier.fit(transformed, labels)
         | 
| 47 46 |  | 
| @@ -54,9 +53,8 @@ Testing phase: | |
| 54 53 |  | 
| 55 54 | 
             
            ```ruby
         | 
| 56 55 | 
             
            require 'svmkit'
         | 
| 57 | 
            -
            require 'libsvmloader'
         | 
| 58 56 |  | 
| 59 | 
            -
            samples, labels =  | 
| 57 | 
            +
            samples, labels = SVMKit::Dataset.load_libsvm_file('pendigits.t')
         | 
| 60 58 |  | 
| 61 59 | 
             
            normalizer = Marshal.load(File.binread('trained_normalizer.dat'))
         | 
| 62 60 | 
             
            transformer = Marshal.load(File.binread('trained_transformer.dat'))
         | 
    
        data/lib/svmkit.rb
    CHANGED
    
    | @@ -1,16 +1,14 @@ | |
| 1 | 
            -
             | 
| 2 | 
            -
             | 
| 3 | 
            -
            rescue LoadError
         | 
| 4 | 
            -
            end
         | 
| 1 | 
            +
             | 
| 2 | 
            +
            require 'numo/narray'
         | 
| 5 3 |  | 
| 6 4 | 
             
            require 'svmkit/version'
         | 
| 7 | 
            -
            require 'svmkit/utils'
         | 
| 8 5 | 
             
            require 'svmkit/pairwise_metric'
         | 
| 6 | 
            +
            require 'svmkit/dataset'
         | 
| 9 7 | 
             
            require 'svmkit/base/base_estimator'
         | 
| 10 8 | 
             
            require 'svmkit/base/classifier'
         | 
| 11 9 | 
             
            require 'svmkit/base/transformer'
         | 
| 12 10 | 
             
            require 'svmkit/kernel_approximation/rbf'
         | 
| 13 | 
            -
            require 'svmkit/linear_model/ | 
| 11 | 
            +
            require 'svmkit/linear_model/svc'
         | 
| 14 12 | 
             
            require 'svmkit/linear_model/logistic_regression'
         | 
| 15 13 | 
             
            require 'svmkit/kernel_machine/kernel_svc'
         | 
| 16 14 | 
             
            require 'svmkit/multiclass/one_vs_rest_classifier'
         | 
| @@ -0,0 +1,90 @@ | |
| 1 | 
            +
            module SVMKit
         | 
| 2 | 
            +
              # Module for loading and saving a dataset file.
         | 
| 3 | 
            +
              module Dataset
         | 
| 4 | 
            +
                class << self
         | 
| 5 | 
            +
                  # Load a dataset with the libsvm file format into Numo::NArray.
         | 
| 6 | 
            +
                  #
         | 
| 7 | 
            +
                  # @param filename [String] A path to a dataset file.
         | 
| 8 | 
            +
                  # @param zero_based [Boolean] Whether the column index starts from 0 (true) or 1 (false).
         | 
| 9 | 
            +
                  #
         | 
| 10 | 
            +
                  # @return [Array<Numo::NArray>]
         | 
| 11 | 
            +
                  #   Returns array containing the (n_samples x n_features) matrix for feature vectors
         | 
| 12 | 
            +
                  #   and (n_samples) vector for labels or target values.
         | 
| 13 | 
            +
                  def load_libsvm_file(filename, zero_based: false)
         | 
| 14 | 
            +
                    ftvecs = []
         | 
| 15 | 
            +
                    labels = []
         | 
| 16 | 
            +
                    n_features = 0
         | 
| 17 | 
            +
                    File.read(filename).split("\n").each do |line|
         | 
| 18 | 
            +
                      label, ftvec, max_idx = parse_libsvm_line(line, zero_based)
         | 
| 19 | 
            +
                      labels.push(label)
         | 
| 20 | 
            +
                      ftvecs.push(ftvec)
         | 
| 21 | 
            +
                      n_features = [n_features, max_idx].max
         | 
| 22 | 
            +
                    end
         | 
| 23 | 
            +
                    [convert_to_matrix(ftvecs, n_features), Numo::NArray.asarray(labels)]
         | 
| 24 | 
            +
                  end
         | 
| 25 | 
            +
             | 
| 26 | 
            +
                  # Dump the dataset with the libsvm file format.
         | 
| 27 | 
            +
                  #
         | 
| 28 | 
            +
                  # @param data [Numo::NArray] (shape: [n_samples, n_features]) matrix consisting of feature vectors.
         | 
| 29 | 
            +
                  # @param labels [Numo::NArray] (shape: [n_samples]) matrix consisting of labels or target values.
         | 
| 30 | 
            +
                  # @param filename [String] A path to the output libsvm file.
         | 
| 31 | 
            +
                  # @param zero_based [Boolean] Whether the column index starts from 0 (true) or 1 (false).
         | 
| 32 | 
            +
                  def dump_libsvm_file(data, labels, filename, zero_based: false)
         | 
| 33 | 
            +
                    n_samples = [data.shape[0], labels.shape[0]].min
         | 
| 34 | 
            +
                    label_type = detect_dtype(labels)
         | 
| 35 | 
            +
                    value_type = detect_dtype(data)
         | 
| 36 | 
            +
                    File.open(filename, 'w') do |file|
         | 
| 37 | 
            +
                      n_samples.times do |n|
         | 
| 38 | 
            +
                        file.puts(dump_libsvm_line(labels[n], data[n, true],
         | 
| 39 | 
            +
                                                   label_type, value_type, zero_based))
         | 
| 40 | 
            +
                      end
         | 
| 41 | 
            +
                    end
         | 
| 42 | 
            +
                  end
         | 
| 43 | 
            +
             | 
| 44 | 
            +
                  private
         | 
| 45 | 
            +
             | 
| 46 | 
            +
                  def parse_libsvm_line(line, zero_based)
         | 
| 47 | 
            +
                    tokens = line.split
         | 
| 48 | 
            +
                    label = tokens.shift
         | 
| 49 | 
            +
                    label = label.to_i.to_s == label ? label.to_i : label.to_f
         | 
| 50 | 
            +
                    ftvec = tokens.map do |el|
         | 
| 51 | 
            +
                      idx, val = el.split(':')
         | 
| 52 | 
            +
                      idx = idx.to_i - (zero_based == false ? 1 : 0)
         | 
| 53 | 
            +
                      val = val.to_i.to_s == val ? val.to_i : val.to_f
         | 
| 54 | 
            +
                      [idx, val]
         | 
| 55 | 
            +
                    end
         | 
| 56 | 
            +
                    max_idx = ftvec.map { |el| el[0] }.max
         | 
| 57 | 
            +
                    max_idx ||= 0
         | 
| 58 | 
            +
                    [label, ftvec, max_idx]
         | 
| 59 | 
            +
                  end
         | 
| 60 | 
            +
             | 
| 61 | 
            +
                  def convert_to_matrix(data, n_features)
         | 
| 62 | 
            +
                    mat = []
         | 
| 63 | 
            +
                    data.each do |ft|
         | 
| 64 | 
            +
                      vec = Array.new(n_features) { 0 }
         | 
| 65 | 
            +
                      ft.each { |el| vec[el[0]] = el[1] }
         | 
| 66 | 
            +
                      mat.push(vec)
         | 
| 67 | 
            +
                    end
         | 
| 68 | 
            +
                    Numo::NArray.asarray(mat)
         | 
| 69 | 
            +
                  end
         | 
| 70 | 
            +
             | 
| 71 | 
            +
                  def detect_dtype(data)
         | 
| 72 | 
            +
                    arr_type_str = Numo::NArray.array_type(data).to_s
         | 
| 73 | 
            +
                    type = '%s'
         | 
| 74 | 
            +
                    type = '%d' if ['Numo::Int8', 'Numo::Int16', 'Numo::Int32', 'Numo::Int64'].include?(arr_type_str)
         | 
| 75 | 
            +
                    type = '%d' if ['Numo::UInt8', 'Numo::UInt16', 'Numo::UInt32', 'Numo::UInt64'].include?(arr_type_str)
         | 
| 76 | 
            +
                    type = '%.10g' if ['Numo::SFloat', 'Numo::DFloat'].include?(arr_type_str)
         | 
| 77 | 
            +
                    type
         | 
| 78 | 
            +
                  end
         | 
| 79 | 
            +
             | 
| 80 | 
            +
                  def dump_libsvm_line(label, ftvec, label_type, value_type, zero_based)
         | 
| 81 | 
            +
                    line = format(label_type.to_s, label)
         | 
| 82 | 
            +
                    ftvec.to_a.each_with_index do |val, n|
         | 
| 83 | 
            +
                      idx = n + (zero_based == false ? 1 : 0)
         | 
| 84 | 
            +
                      line += format(" %d:#{value_type}", idx, val) if val != 0.0
         | 
| 85 | 
            +
                    end
         | 
| 86 | 
            +
                    line
         | 
| 87 | 
            +
                  end
         | 
| 88 | 
            +
                end
         | 
| 89 | 
            +
              end
         | 
| 90 | 
            +
            end
         | 
| @@ -17,19 +17,12 @@ module SVMKit | |
| 17 17 | 
             
                  include Base::BaseEstimator
         | 
| 18 18 | 
             
                  include Base::Transformer
         | 
| 19 19 |  | 
| 20 | 
            -
                  # @!visibility private
         | 
| 21 | 
            -
                  DEFAULT_PARAMS = {
         | 
| 22 | 
            -
                    gamma: 1.0,
         | 
| 23 | 
            -
                    n_components: 128,
         | 
| 24 | 
            -
                    random_seed: nil
         | 
| 25 | 
            -
                  }.freeze
         | 
| 26 | 
            -
             | 
| 27 20 | 
             
                  # Return the random matrix for transformation.
         | 
| 28 | 
            -
                  # @return [ | 
| 21 | 
            +
                  # @return [Numo::DFloat] (shape: [n_features, n_components])
         | 
| 29 22 | 
             
                  attr_reader :random_mat
         | 
| 30 23 |  | 
| 31 24 | 
             
                  # Return the random vector for transformation.
         | 
| 32 | 
            -
                  # @return [ | 
| 25 | 
            +
                  # @return [Numo::DFloat] (shape: [n_components])
         | 
| 33 26 | 
             
                  attr_reader :random_vec
         | 
| 34 27 |  | 
| 35 28 | 
             
                  # Return the random generator for transformation.
         | 
| @@ -38,14 +31,14 @@ module SVMKit | |
| 38 31 |  | 
| 39 32 | 
             
                  # Create a new transformer for mapping to RBF kernel feature space.
         | 
| 40 33 | 
             
                  #
         | 
| 41 | 
            -
                  # @ | 
| 42 | 
            -
                  #
         | 
| 43 | 
            -
                  # @param  | 
| 44 | 
            -
                   | 
| 45 | 
            -
             | 
| 46 | 
            -
             | 
| 47 | 
            -
             | 
| 48 | 
            -
                    self.params =  | 
| 34 | 
            +
                  # @param gamma [Float] The parameter of RBF kernel: exp(-gamma * x^2).
         | 
| 35 | 
            +
                  # @param n_components [Integer] The number of dimensions of the RBF kernel feature space.
         | 
| 36 | 
            +
                  # @param random_seed [Integer] The seed value using to initialize the random generator.
         | 
| 37 | 
            +
                  def initialize(gamma: 1.0, n_components: 128, random_seed: nil)
         | 
| 38 | 
            +
                    self.params = {}
         | 
| 39 | 
            +
                    self.params[:gamma] = gamma
         | 
| 40 | 
            +
                    self.params[:n_components] = n_components
         | 
| 41 | 
            +
                    self.params[:random_seed] = random_seed
         | 
| 49 42 | 
             
                    self.params[:random_seed] ||= srand
         | 
| 50 43 | 
             
                    @rng = Random.new(self.params[:random_seed])
         | 
| 51 44 | 
             
                    @random_mat = nil
         | 
| @@ -56,7 +49,7 @@ module SVMKit | |
| 56 49 | 
             
                  #
         | 
| 57 50 | 
             
                  # @overload fit(x) -> RBF
         | 
| 58 51 | 
             
                  #
         | 
| 59 | 
            -
                  # @param x [ | 
| 52 | 
            +
                  # @param x [Numo::NArray] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
         | 
| 60 53 | 
             
                  #   This method uses only the number of features of the data.
         | 
| 61 54 | 
             
                  # @return [RBF] The learned transformer itself.
         | 
| 62 55 | 
             
                  def fit(x, _y = nil)
         | 
| @@ -64,40 +57,40 @@ module SVMKit | |
| 64 57 | 
             
                    params[:n_components] = 2 * n_features if params[:n_components] <= 0
         | 
| 65 58 | 
             
                    @random_mat = rand_normal([n_features, params[:n_components]]) * (2.0 * params[:gamma])**0.5
         | 
| 66 59 | 
             
                    n_half_components = params[:n_components] / 2
         | 
| 67 | 
            -
                    @random_vec =  | 
| 68 | 
            -
                       | 
| 60 | 
            +
                    @random_vec = Numo::DFloat.zeros(params[:n_components] - n_half_components).concatenate(
         | 
| 61 | 
            +
                      Numo::DFloat.ones(n_half_components) * (0.5 * Math::PI)
         | 
| 69 62 | 
             
                    )
         | 
| 70 63 | 
             
                    self
         | 
| 71 64 | 
             
                  end
         | 
| 72 65 |  | 
| 73 66 | 
             
                  # Fit the model with training data, and then transform them with the learned model.
         | 
| 74 67 | 
             
                  #
         | 
| 75 | 
            -
                  # @overload fit_transform(x) ->  | 
| 68 | 
            +
                  # @overload fit_transform(x) -> Numo::DFloat
         | 
| 76 69 | 
             
                  #
         | 
| 77 | 
            -
                  # @param x [ | 
| 78 | 
            -
                  # @return [ | 
| 70 | 
            +
                  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
         | 
| 71 | 
            +
                  # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data
         | 
| 79 72 | 
             
                  def fit_transform(x, _y = nil)
         | 
| 80 73 | 
             
                    fit(x).transform(x)
         | 
| 81 74 | 
             
                  end
         | 
| 82 75 |  | 
| 83 76 | 
             
                  # Transform the given data with the learned model.
         | 
| 84 77 | 
             
                  #
         | 
| 85 | 
            -
                  # @overload transform(x) ->  | 
| 78 | 
            +
                  # @overload transform(x) -> Numo::DFloat
         | 
| 86 79 | 
             
                  #
         | 
| 87 | 
            -
                  # @param x [ | 
| 88 | 
            -
                  # @return [ | 
| 80 | 
            +
                  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be transformed with the learned model.
         | 
| 81 | 
            +
                  # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data.
         | 
| 89 82 | 
             
                  def transform(x)
         | 
| 90 83 | 
             
                    n_samples, = x.shape
         | 
| 91 | 
            -
                    projection = x.dot(@random_mat) + @random_vec. | 
| 92 | 
            -
                     | 
| 84 | 
            +
                    projection = x.dot(@random_mat) + @random_vec.tile(n_samples, 1)
         | 
| 85 | 
            +
                    Numo::NMath.sin(projection) * ((2.0 / params[:n_components])**0.5)
         | 
| 93 86 | 
             
                  end
         | 
| 94 87 |  | 
| 95 88 | 
             
                  # Dump marshal data.
         | 
| 96 89 | 
             
                  # @return [Hash] The marshal data about RBF.
         | 
| 97 90 | 
             
                  def marshal_dump
         | 
| 98 91 | 
             
                    { params: params,
         | 
| 99 | 
            -
                      random_mat:  | 
| 100 | 
            -
                      random_vec:  | 
| 92 | 
            +
                      random_mat: @random_mat,
         | 
| 93 | 
            +
                      random_vec: @random_vec,
         | 
| 101 94 | 
             
                      rng: @rng }
         | 
| 102 95 | 
             
                  end
         | 
| 103 96 |  | 
| @@ -105,8 +98,8 @@ module SVMKit | |
| 105 98 | 
             
                  # @return [nil]
         | 
| 106 99 | 
             
                  def marshal_load(obj)
         | 
| 107 100 | 
             
                    self.params = obj[:params]
         | 
| 108 | 
            -
                    @random_mat =  | 
| 109 | 
            -
                    @random_vec =  | 
| 101 | 
            +
                    @random_mat = obj[:random_mat]
         | 
| 102 | 
            +
                    @random_vec = obj[:random_vec]
         | 
| 110 103 | 
             
                    @rng = obj[:rng]
         | 
| 111 104 | 
             
                    nil
         | 
| 112 105 | 
             
                  end
         | 
| @@ -115,15 +108,15 @@ module SVMKit | |
| 115 108 |  | 
| 116 109 | 
             
                  # Generate the uniform random matrix with the given shape.
         | 
| 117 110 | 
             
                  def rand_uniform(shape)
         | 
| 118 | 
            -
                    rnd_vals = Array.new( | 
| 119 | 
            -
                     | 
| 111 | 
            +
                    rnd_vals = Array.new(shape.inject(:*)) { @rng.rand }
         | 
| 112 | 
            +
                    Numo::DFloat.asarray(rnd_vals).reshape(shape[0], shape[1])
         | 
| 120 113 | 
             
                  end
         | 
| 121 114 |  | 
| 122 115 | 
             
                  # Generate the normal random matrix with the given shape, mean, and standard deviation.
         | 
| 123 116 | 
             
                  def rand_normal(shape, mu = 0.0, sigma = 1.0)
         | 
| 124 117 | 
             
                    a = rand_uniform(shape)
         | 
| 125 118 | 
             
                    b = rand_uniform(shape)
         | 
| 126 | 
            -
                    (( | 
| 119 | 
            +
                    (Numo::NMath.sqrt(Numo::NMath.log(a) * -2.0) * Numo::NMath.sin(b * 2.0 * Math::PI)) * sigma + mu
         | 
| 127 120 | 
             
                  end
         | 
| 128 121 | 
             
                end
         | 
| 129 122 | 
             
              end
         | 
| @@ -2,7 +2,7 @@ require 'svmkit/base/base_estimator' | |
| 2 2 | 
             
            require 'svmkit/base/classifier'
         | 
| 3 3 |  | 
| 4 4 | 
             
            module SVMKit
         | 
| 5 | 
            -
              # This module consists of the classes that implement  | 
| 5 | 
            +
              # This module consists of the classes that implement kernel method-based estimator.
         | 
| 6 6 | 
             
              module KernelMachine
         | 
| 7 7 | 
             
                # KernelSVC is a class that implements (Nonlinear) Kernel Support Vector Classifier with the Pegasos algorithm.
         | 
| 8 8 | 
             
                #
         | 
| @@ -20,15 +20,8 @@ module SVMKit | |
| 20 20 | 
             
                  include Base::BaseEstimator
         | 
| 21 21 | 
             
                  include Base::Classifier
         | 
| 22 22 |  | 
| 23 | 
            -
                  # @!visibility private
         | 
| 24 | 
            -
                  DEFAULT_PARAMS = {
         | 
| 25 | 
            -
                    reg_param: 1.0,
         | 
| 26 | 
            -
                    max_iter: 1000,
         | 
| 27 | 
            -
                    random_seed: nil
         | 
| 28 | 
            -
                  }.freeze
         | 
| 29 | 
            -
             | 
| 30 23 | 
             
                  # Return the weight vector for Kernel SVC.
         | 
| 31 | 
            -
                  # @return [ | 
| 24 | 
            +
                  # @return [Numo::DFloat] (shape: [n_trainig_sample])
         | 
| 32 25 | 
             
                  attr_reader :weight_vec
         | 
| 33 26 |  | 
| 34 27 | 
             
                  # Return the random generator for performing random sampling in the Pegasos algorithm.
         | 
| @@ -37,14 +30,14 @@ module SVMKit | |
| 37 30 |  | 
| 38 31 | 
             
                  # Create a new classifier with Kernel Support Vector Machine by the Pegasos algorithm.
         | 
| 39 32 | 
             
                  #
         | 
| 40 | 
            -
                  # @ | 
| 41 | 
            -
                  #
         | 
| 42 | 
            -
                  # @param  | 
| 43 | 
            -
                   | 
| 44 | 
            -
             | 
| 45 | 
            -
             | 
| 46 | 
            -
             | 
| 47 | 
            -
                    self.params =  | 
| 33 | 
            +
                  # @param reg_param [Float] The regularization parameter.
         | 
| 34 | 
            +
                  # @param max_iter [Integer] The maximum number of iterations.
         | 
| 35 | 
            +
                  # @param random_seed [Integer] The seed value using to initialize the random generator.
         | 
| 36 | 
            +
                  def initialize(reg_param: 1.0, max_iter: 1000, random_seed: nil)
         | 
| 37 | 
            +
                    self.params = {}
         | 
| 38 | 
            +
                    self.params[:reg_param] = reg_param
         | 
| 39 | 
            +
                    self.params[:max_iter] = max_iter
         | 
| 40 | 
            +
                    self.params[:random_seed] = random_seed
         | 
| 48 41 | 
             
                    self.params[:random_seed] ||= srand
         | 
| 49 42 | 
             
                    @weight_vec = nil
         | 
| 50 43 | 
             
                    @rng = Random.new(self.params[:random_seed])
         | 
| @@ -52,74 +45,74 @@ module SVMKit | |
| 52 45 |  | 
| 53 46 | 
             
                  # Fit the model with given training data.
         | 
| 54 47 | 
             
                  #
         | 
| 55 | 
            -
                  # @param x [ | 
| 48 | 
            +
                  # @param x [Numo::DFloat] (shape: [n_training_samples, n_training_samples])
         | 
| 56 49 | 
             
                  #   The kernel matrix of the training data to be used for fitting the model.
         | 
| 57 | 
            -
                  # @param y [ | 
| 50 | 
            +
                  # @param y [Numo::Int32] (shape: [n_training_samples]) The labels to be used for fitting the model.
         | 
| 58 51 | 
             
                  # @return [KernelSVC] The learned classifier itself.
         | 
| 59 52 | 
             
                  def fit(x, y)
         | 
| 60 53 | 
             
                    # Generate binary labels
         | 
| 61 | 
            -
                    negative_label = y.uniq.sort.shift
         | 
| 62 | 
            -
                    bin_y = y. | 
| 54 | 
            +
                    negative_label = y.to_a.uniq.sort.shift
         | 
| 55 | 
            +
                    bin_y = y.to_a.map { |l| l != negative_label ? 1 : -1 }
         | 
| 63 56 | 
             
                    # Initialize some variables.
         | 
| 64 57 | 
             
                    n_training_samples = x.shape[0]
         | 
| 65 58 | 
             
                    rand_ids = []
         | 
| 66 | 
            -
                    weight_vec =  | 
| 59 | 
            +
                    weight_vec = Numo::DFloat.zeros(n_training_samples)
         | 
| 67 60 | 
             
                    # Start optimization.
         | 
| 68 61 | 
             
                    params[:max_iter].times do |t|
         | 
| 69 62 | 
             
                      # random sampling
         | 
| 70 63 | 
             
                      rand_ids = [*0...n_training_samples].shuffle(random: @rng) if rand_ids.empty?
         | 
| 71 64 | 
             
                      target_id = rand_ids.shift
         | 
| 72 65 | 
             
                      # update the weight vector
         | 
| 73 | 
            -
                      func = (weight_vec * bin_y[target_id]).dot(x | 
| 66 | 
            +
                      func = (weight_vec * bin_y[target_id]).dot(x[target_id, true].transpose).to_f
         | 
| 74 67 | 
             
                      func *= bin_y[target_id] / (params[:reg_param] * (t + 1))
         | 
| 75 68 | 
             
                      weight_vec[target_id] += 1.0 if func < 1.0
         | 
| 76 69 | 
             
                    end
         | 
| 77 70 | 
             
                    # Store the learned model.
         | 
| 78 | 
            -
                    @weight_vec = weight_vec *  | 
| 71 | 
            +
                    @weight_vec = weight_vec * Numo::DFloat.asarray(bin_y)
         | 
| 79 72 | 
             
                    self
         | 
| 80 73 | 
             
                  end
         | 
| 81 74 |  | 
| 82 75 | 
             
                  # Calculate confidence scores for samples.
         | 
| 83 76 | 
             
                  #
         | 
| 84 | 
            -
                  # @param x [ | 
| 77 | 
            +
                  # @param x [Numo::DFloat] (shape: [n_testing_samples, n_training_samples])
         | 
| 85 78 | 
             
                  #     The kernel matrix between testing samples and training samples to compute the scores.
         | 
| 86 | 
            -
                  # @return [ | 
| 79 | 
            +
                  # @return [Numo::DFloat] (shape: [n_testing_samples]) Confidence score per sample.
         | 
| 87 80 | 
             
                  def decision_function(x)
         | 
| 88 81 | 
             
                    @weight_vec.dot(x.transpose)
         | 
| 89 82 | 
             
                  end
         | 
| 90 83 |  | 
| 91 84 | 
             
                  # Predict class labels for samples.
         | 
| 92 85 | 
             
                  #
         | 
| 93 | 
            -
                  # @param x [ | 
| 86 | 
            +
                  # @param x [Numo::DFloat] (shape: [n_testing_samples, n_training_samples])
         | 
| 94 87 | 
             
                  #     The kernel matrix between testing samples and training samples to predict the labels.
         | 
| 95 | 
            -
                  # @return [ | 
| 88 | 
            +
                  # @return [Numo::Int32] (shape: [n_testing_samples]) Predicted class label per sample.
         | 
| 96 89 | 
             
                  def predict(x)
         | 
| 97 | 
            -
                    decision_function(x).map { |v| v >= 0 ? 1 : -1 }
         | 
| 90 | 
            +
                    Numo::Int32.cast(decision_function(x).map { |v| v >= 0 ? 1 : -1 })
         | 
| 98 91 | 
             
                  end
         | 
| 99 92 |  | 
| 100 93 | 
             
                  # Claculate the mean accuracy of the given testing data.
         | 
| 101 94 | 
             
                  #
         | 
| 102 | 
            -
                  # @param x [ | 
| 95 | 
            +
                  # @param x [Numo::DFloat] (shape: [n_testing_samples, n_training_samples])
         | 
| 103 96 | 
             
                  #     The kernel matrix between testing samples and training samples.
         | 
| 104 | 
            -
                  # @param y [ | 
| 97 | 
            +
                  # @param y [Numo::Int32] (shape: [n_testing_samples]) True labels for testing data.
         | 
| 105 98 | 
             
                  # @return [Float] Mean accuracy
         | 
| 106 99 | 
             
                  def score(x, y)
         | 
| 107 100 | 
             
                    p = predict(x)
         | 
| 108 | 
            -
                    n_hits = (y. | 
| 101 | 
            +
                    n_hits = (y.to_a.map.with_index { |l, n| l == p[n] ? 1 : 0 }).inject(:+)
         | 
| 109 102 | 
             
                    n_hits / y.size.to_f
         | 
| 110 103 | 
             
                  end
         | 
| 111 104 |  | 
| 112 105 | 
             
                  # Dump marshal data.
         | 
| 113 106 | 
             
                  # @return [Hash] The marshal data about KernelSVC.
         | 
| 114 107 | 
             
                  def marshal_dump
         | 
| 115 | 
            -
                    { params: params, weight_vec:  | 
| 108 | 
            +
                    { params: params, weight_vec: @weight_vec, rng: @rng }
         | 
| 116 109 | 
             
                  end
         | 
| 117 110 |  | 
| 118 111 | 
             
                  # Load marshal data.
         | 
| 119 112 | 
             
                  # @return [nil]
         | 
| 120 113 | 
             
                  def marshal_load(obj)
         | 
| 121 114 | 
             
                    self.params = obj[:params]
         | 
| 122 | 
            -
                    @weight_vec =  | 
| 115 | 
            +
                    @weight_vec = obj[:weight_vec]
         | 
| 123 116 | 
             
                    @rng = obj[:rng]
         | 
| 124 117 | 
             
                    nil
         | 
| 125 118 | 
             
                  end
         | 
| @@ -31,7 +31,7 @@ module SVMKit | |
| 31 31 | 
             
                  }.freeze
         | 
| 32 32 |  | 
| 33 33 | 
             
                  # Return the weight vector for Logistic Regression.
         | 
| 34 | 
            -
                  # @return [ | 
| 34 | 
            +
                  # @return [Numo::DFloat] (shape: [n_features])
         | 
| 35 35 | 
             
                  attr_reader :weight_vec
         | 
| 36 36 |  | 
| 37 37 | 
             
                  # Return the bias term (a.k.a. intercept) for Logistic Regression.
         | 
| @@ -44,18 +44,21 @@ module SVMKit | |
| 44 44 |  | 
| 45 45 | 
             
                  # Create a new classifier with Logisitc Regression by the SGD optimization.
         | 
| 46 46 | 
             
                  #
         | 
| 47 | 
            -
                  # @ | 
| 48 | 
            -
                  #
         | 
| 49 | 
            -
                  # @param  | 
| 50 | 
            -
                  # @option params [Float]   :reg_param (1.0) The regularization parameter.
         | 
| 51 | 
            -
                  # @option params [Boolean] :fit_bias (false) The flag indicating whether to fit the bias term.
         | 
| 52 | 
            -
                  # @option params [Float]   :bias_scale (1.0) The scale of the bias term.
         | 
| 47 | 
            +
                  # @param reg_param [Float] The regularization parameter.
         | 
| 48 | 
            +
                  # @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
         | 
| 49 | 
            +
                  # @param bias_scale [Float] The scale of the bias term.
         | 
| 53 50 | 
             
                  #   If fit_bias is true, the feature vector v becoms [v; bias_scale].
         | 
| 54 | 
            -
                  # @ | 
| 55 | 
            -
                  # @ | 
| 56 | 
            -
                  # @ | 
| 57 | 
            -
                  def initialize( | 
| 58 | 
            -
                    self.params =  | 
| 51 | 
            +
                  # @param max_iter [Integer] The maximum number of iterations.
         | 
| 52 | 
            +
                  # @param batch_size [Integer] The size of the mini batches.
         | 
| 53 | 
            +
                  # @param random_seed [Integer] The seed value using to initialize the random generator.
         | 
| 54 | 
            +
                  def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0, max_iter: 100, batch_size: 50, random_seed: nil)
         | 
| 55 | 
            +
                    self.params = {}
         | 
| 56 | 
            +
                    self.params[:reg_param] = reg_param
         | 
| 57 | 
            +
                    self.params[:fit_bias] = fit_bias
         | 
| 58 | 
            +
                    self.params[:bias_scale] = bias_scale
         | 
| 59 | 
            +
                    self.params[:max_iter] = max_iter
         | 
| 60 | 
            +
                    self.params[:batch_size] = batch_size
         | 
| 61 | 
            +
                    self.params[:random_seed] = random_seed
         | 
| 59 62 | 
             
                    self.params[:random_seed] ||= srand
         | 
| 60 63 | 
             
                    @weight_vec = nil
         | 
| 61 64 | 
             
                    @bias_term = 0.0
         | 
| @@ -64,21 +67,25 @@ module SVMKit | |
| 64 67 |  | 
| 65 68 | 
             
                  # Fit the model with given training data.
         | 
| 66 69 | 
             
                  #
         | 
| 67 | 
            -
                  # @param x [ | 
| 68 | 
            -
                  # @param y [ | 
| 70 | 
            +
                  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
         | 
| 71 | 
            +
                  # @param y [Numo::Int32] (shape: [n_samples]) The categorical variables (e.g. labels)
         | 
| 69 72 | 
             
                  #   to be used for fitting the model.
         | 
| 70 73 | 
             
                  # @return [LogisticRegression] The learned classifier itself.
         | 
| 71 74 | 
             
                  def fit(x, y)
         | 
| 72 75 | 
             
                    # Generate binary labels.
         | 
| 73 | 
            -
                    negative_label = y.uniq.sort.shift
         | 
| 74 | 
            -
                    bin_y = y. | 
| 76 | 
            +
                    negative_label = y.to_a.uniq.sort.shift
         | 
| 77 | 
            +
                    bin_y = y.to_a.map { |l| l != negative_label ? 1 : 0 }
         | 
| 75 78 | 
             
                    # Expand feature vectors for bias term.
         | 
| 76 79 | 
             
                    samples = x
         | 
| 77 | 
            -
                     | 
| 80 | 
            +
                    if params[:fit_bias]
         | 
| 81 | 
            +
                      samples = Numo::NArray.hstack(
         | 
| 82 | 
            +
                        [samples, Numo::DFloat.ones([x.shape[0], 1]) * params[:bias_scale]]
         | 
| 83 | 
            +
                      )
         | 
| 84 | 
            +
                    end
         | 
| 78 85 | 
             
                    # Initialize some variables.
         | 
| 79 86 | 
             
                    n_samples, n_features = samples.shape
         | 
| 80 87 | 
             
                    rand_ids = [*0..n_samples - 1].shuffle(random: @rng)
         | 
| 81 | 
            -
                    weight_vec =  | 
| 88 | 
            +
                    weight_vec = Numo::DFloat.zeros(n_features)
         | 
| 82 89 | 
             
                    # Start optimization.
         | 
| 83 90 | 
             
                    params[:max_iter].times do |t|
         | 
| 84 91 | 
             
                      # random sampling
         | 
| @@ -86,16 +93,17 @@ module SVMKit | |
| 86 93 | 
             
                      rand_ids.concat(subset_ids)
         | 
| 87 94 | 
             
                      # update the weight vector.
         | 
| 88 95 | 
             
                      eta = 1.0 / (params[:reg_param] * (t + 1))
         | 
| 89 | 
            -
                      mean_vec =  | 
| 96 | 
            +
                      mean_vec = Numo::DFloat.zeros(n_features)
         | 
| 90 97 | 
             
                      subset_ids.each do |n|
         | 
| 91 | 
            -
                        z = weight_vec.dot(samples | 
| 98 | 
            +
                        z = weight_vec.dot(samples[n, true])
         | 
| 92 99 | 
             
                        coef = bin_y[n] / (1.0 + Math.exp(bin_y[n] * z))
         | 
| 93 | 
            -
                        mean_vec += samples | 
| 100 | 
            +
                        mean_vec += samples[n, true] * coef
         | 
| 94 101 | 
             
                      end
         | 
| 95 102 | 
             
                      mean_vec *= eta / params[:batch_size]
         | 
| 96 103 | 
             
                      weight_vec = weight_vec * (1.0 - eta * params[:reg_param]) + mean_vec
         | 
| 97 104 | 
             
                      # scale the weight vector.
         | 
| 98 | 
            -
                       | 
| 105 | 
            +
                      norm = Math.sqrt(weight_vec.dot(weight_vec))
         | 
| 106 | 
            +
                      scaler = (1.0 / params[:reg_param]**0.5) / (norm + 1.0e-12)
         | 
| 99 107 | 
             
                      weight_vec *= [1.0, scaler].min
         | 
| 100 108 | 
             
                    end
         | 
| 101 109 | 
             
                    # Store the learned model.
         | 
| @@ -111,51 +119,51 @@ module SVMKit | |
| 111 119 |  | 
| 112 120 | 
             
                  # Calculate confidence scores for samples.
         | 
| 113 121 | 
             
                  #
         | 
| 114 | 
            -
                  # @param x [ | 
| 115 | 
            -
                  # @return [ | 
| 122 | 
            +
                  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
         | 
| 123 | 
            +
                  # @return [Numo::DFloat] (shape: [n_samples]) Confidence score per sample.
         | 
| 116 124 | 
             
                  def decision_function(x)
         | 
| 117 | 
            -
                    w = ((@weight_vec.dot(x.transpose) + @bias_term) * -1.0) | 
| 125 | 
            +
                    w = Numo::NMath.exp(((@weight_vec.dot(x.transpose) + @bias_term) * -1.0)) + 1.0
         | 
| 118 126 | 
             
                    w.map { |v| 1.0 / v }
         | 
| 119 127 | 
             
                  end
         | 
| 120 128 |  | 
| 121 129 | 
             
                  # Predict class labels for samples.
         | 
| 122 130 | 
             
                  #
         | 
| 123 | 
            -
                  # @param x [ | 
| 124 | 
            -
                  # @return [ | 
| 131 | 
            +
                  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
         | 
| 132 | 
            +
                  # @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
         | 
| 125 133 | 
             
                  def predict(x)
         | 
| 126 | 
            -
                    decision_function(x).map { |v| v >= 0.5 ? 1 : -1 }
         | 
| 134 | 
            +
                    Numo::Int32.cast(decision_function(x).map { |v| v >= 0.5 ? 1 : -1 })
         | 
| 127 135 | 
             
                  end
         | 
| 128 136 |  | 
| 129 137 | 
             
                  # Predict probability for samples.
         | 
| 130 138 | 
             
                  #
         | 
| 131 | 
            -
                  # @param x [ | 
| 132 | 
            -
                  # @return [ | 
| 139 | 
            +
                  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
         | 
| 140 | 
            +
                  # @return [Numo::DFloat] (shape: [n_samples]) Predicted probability per sample.
         | 
| 133 141 | 
             
                  def predict_proba(x)
         | 
| 134 142 | 
             
                    decision_function(x)
         | 
| 135 143 | 
             
                  end
         | 
| 136 144 |  | 
| 137 145 | 
             
                  # Claculate the mean accuracy of the given testing data.
         | 
| 138 146 | 
             
                  #
         | 
| 139 | 
            -
                  # @param x [ | 
| 140 | 
            -
                  # @param y [ | 
| 147 | 
            +
                  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) Testing data.
         | 
| 148 | 
            +
                  # @param y [Numo::Int32] (shape: [n_samples]) True labels for testing data.
         | 
| 141 149 | 
             
                  # @return [Float] Mean accuracy
         | 
| 142 150 | 
             
                  def score(x, y)
         | 
| 143 151 | 
             
                    p = predict(x)
         | 
| 144 | 
            -
                    n_hits = (y. | 
| 152 | 
            +
                    n_hits = (y.to_a.map.with_index { |l, n| l == p[n] ? 1 : 0 }).inject(:+)
         | 
| 145 153 | 
             
                    n_hits / y.size.to_f
         | 
| 146 154 | 
             
                  end
         | 
| 147 155 |  | 
| 148 156 | 
             
                  # Dump marshal data.
         | 
| 149 157 | 
             
                  # @return [Hash] The marshal data about LogisticRegression.
         | 
| 150 158 | 
             
                  def marshal_dump
         | 
| 151 | 
            -
                    { params: params, weight_vec:  | 
| 159 | 
            +
                    { params: params, weight_vec: @weight_vec, bias_term: @bias_term, rng: @rng }
         | 
| 152 160 | 
             
                  end
         | 
| 153 161 |  | 
| 154 162 | 
             
                  # Load marshal data.
         | 
| 155 163 | 
             
                  # @return [nil]
         | 
| 156 164 | 
             
                  def marshal_load(obj)
         | 
| 157 165 | 
             
                    self.params = obj[:params]
         | 
| 158 | 
            -
                    @weight_vec =  | 
| 166 | 
            +
                    @weight_vec = obj[:weight_vec]
         | 
| 159 167 | 
             
                    @bias_term = obj[:bias_term]
         | 
| 160 168 | 
             
                    @rng = obj[:rng]
         | 
| 161 169 | 
             
                    nil
         |