RubyGems - svmkit - Versions diffs - 0.2.2 → 0.2.3 - Mend

svmkit 0.2.2 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

checksums.yaml +4 -4
data/.travis.yml +4 -3
data/HISTORY.md +6 -1
data/README.md +21 -0
data/lib/svmkit.rb +1 -0
data/lib/svmkit/base/splitter.rb +1 -1
data/lib/svmkit/kernel_machine/kernel_svc.rb +2 -2
data/lib/svmkit/linear_model/logistic_regression.rb +1 -1
data/lib/svmkit/linear_model/svc.rb +1 -1
data/lib/svmkit/model_selection/cross_validation.rb +82 -0
data/lib/svmkit/model_selection/k_fold.rb +1 -1
data/lib/svmkit/model_selection/stratified_k_fold.rb +6 -2
data/lib/svmkit/version.rb +1 -1
metadata +3 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: c7c326db290b1847234f890914fe5d670a4b1d36
-  data.tar.gz: ee5a624c92bf6b35edcccf4df469f9f552c2174d
+  metadata.gz: 6271a50754a13199f7c3c12c6f1b9e2a0a2075d5
+  data.tar.gz: ecdc84a2987f22d49ad1b8435397862771f96f37
 SHA512:
-  metadata.gz: 9256fc3d36e6247fae44ac1a14672eaf9b3ba414176b48c592be5aa8631d232dbaddba9f3884198e0ba751616a3017ad461da2fb7ec40ef26b9ab2b2417aadf5
-  data.tar.gz: e198dcbe0c7e782162a31e7131b961f619e76bf509b43b596299412ba5bb5ea16ea02e21bb2a79909d70bb230c81484083df94ec65db6e770e4e5adc712da174
+  metadata.gz: 9974eb62cd19ebca32ca92cafbcbf2e34a978d41c0aa9bb0765c001d56963189aa43f71806fa5a2492a4b3e50bcd939a018aa784198f5aef08e2159682682dd2
+  data.tar.gz: eee0f089449a71f79576165aa083c41fe1d9812fcc251c31c2a5c7cf06e6dfd4293dddd6685fa6285c8d89a3744bb3a72bd9db6f36b7cbc586b7a71800c1fb78

data/.travis.yml CHANGED Viewed

@@ -3,8 +3,9 @@ os: linux
 dist: trusty
 language: ruby
 rvm:
-  - 2.2.9
-  - 2.3.6
-  - 2.4.3
+  - 2.2
+  - 2.3
+  - 2.4
+  - 2.5
 before_install:
   - gem install --no-document bundler -v '~> 1.16'

data/HISTORY.md CHANGED Viewed

@@ -1,5 +1,10 @@
+# 0.2.3
+- Added class for cross validation.
+- Added specs for base modules.
+- Fixed validation of the number of splits when a negative label is given.
 # 0.2.2
-- Added classes for K-fold cross validation.
+- Added data splitter classes for K-fold cross validation.
 # 0.2.1
 - Added class for K-nearest neighbors classifier.

data/README.md CHANGED Viewed

@@ -66,6 +66,27 @@ transformed = transformer.transform(normalized)
 puts(sprintf("Accuracy: %.1f%%", 100.0 * classifier.score(transformed, labels)))
 ```
+5-fold cross-validation:
+```ruby
+require 'svmkit'
+samples, labels = SVMKit::Dataset.load_libsvm_file('pendigits')
+kernel_svc =
+  SVMKit::KernelMachine::KernelSVC.new(reg_param: 1.0, max_iter: 1000, random_seed: 1)
+ovr_kernel_svc = SVMKit::Multiclass::OneVsRestClassifier.new(estimator: kernel_svc)
+kf = SVMKit::ModelSelection::StratifiedKFold.new(n_splits: 5, shuffle: true, random_seed: 1)
+cv = SVMKit::ModelSelection::CrossValidation.new(estimator: ovr_kernel_svc, splitter: kf)
+kernel_mat = SVMKit::PairwiseMetric::rbf_kernel(samples, nil, 0.005)
+report = cv.perform(kernel_mat, labels)
+mean_accuracy = report[:test_score].inject(:+) / kf.n_splits
+puts(sprintf("Mean Accuracy: %.1f%%", 100.0 * mean_accuracy))
+```
 ## Development
 After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.

data/lib/svmkit.rb CHANGED Viewed

@@ -19,3 +19,4 @@ require 'svmkit/preprocessing/min_max_scaler'
 require 'svmkit/preprocessing/standard_scaler'
 require 'svmkit/model_selection/k_fold'
 require 'svmkit/model_selection/stratified_k_fold'
+require 'svmkit/model_selection/cross_validation'

data/lib/svmkit/base/splitter.rb CHANGED Viewed

@@ -9,7 +9,7 @@ module SVMKit
       # An abstract method for splitting dataset.
       def split
-        raise NoImplementedError, "#{__method__} has to be implemented in #{self.class}."
+        raise NotImplementedError, "#{__method__} has to be implemented in #{self.class}."
       end
     end
   end

data/lib/svmkit/kernel_machine/kernel_svc.rb CHANGED Viewed

@@ -68,7 +68,7 @@ module SVMKit
           weight_vec[target_id] += 1.0 if func < 1.0
         end
         # Store the learned model.
-        @weight_vec = weight_vec * Numo::DFloat.asarray(bin_y)
+        @weight_vec = weight_vec * Numo::DFloat[*bin_y]
         self
       end
@@ -78,7 +78,7 @@ module SVMKit
       #     The kernel matrix between testing samples and training samples to compute the scores.
       # @return [Numo::DFloat] (shape: [n_testing_samples]) Confidence score per sample.
       def decision_function(x)
-        @weight_vec.dot(x.transpose)
+        x.dot(@weight_vec)
       end
       # Predict class labels for samples.

data/lib/svmkit/linear_model/logistic_regression.rb CHANGED Viewed

@@ -74,7 +74,7 @@ module SVMKit
         end
         # Initialize some variables.
         n_samples, n_features = samples.shape
-        rand_ids = [*0..n_samples - 1].shuffle(random: @rng)
+        rand_ids = [*0...n_samples].shuffle(random: @rng)
         weight_vec = Numo::DFloat.zeros(n_features)
         # Start optimization.
         @params[:max_iter].times do |t|

data/lib/svmkit/linear_model/svc.rb CHANGED Viewed

@@ -70,7 +70,7 @@ module SVMKit
         end
         # Initialize some variables.
         n_samples, n_features = samples.shape
-        rand_ids = [*0..n_samples - 1].shuffle(random: @rng)
+        rand_ids = [*0...n_samples].shuffle(random: @rng)
         weight_vec = Numo::DFloat.zeros(n_features)
         # Start optimization.
         @params[:max_iter].times do |t|

data/lib/svmkit/model_selection/cross_validation.rb ADDED Viewed

@@ -0,0 +1,82 @@
+require 'svmkit/base/splitter'
+module SVMKit
+  # This module consists of the classes for model validation techniques.
+  module ModelSelection
+    # CrossValidation is a class that evaluates a given classifier with cross-validation method.
+    #
+    # @example
+    #   svc = SVMKit::LinearModel::SVC.new
+    #   kf = SVMKit::ModelSelection::StratifiedKFold.new(n_splits: 5)
+    #   cv = SVMKit::ModelSelection::CrossValidation.new(estimator: svc, splitter: kf)
+    #   report = cv.perform(samples, lables)
+    #   mean_test_score = report[:test_score].inject(:+) / kf.n_splits
+    #
+    class CrossValidation
+      # Return the classifier of which performance is evaluated.
+      # @return [Classifier]
+      attr_reader :estimator
+      # Return the splitter that divides dataset.
+      # @return [Splitter]
+      attr_reader :splitter
+      # Return the flag indicating whether to caculate the score of training dataset.
+      # @return [Boolean]
+      attr_reader :return_train_score
+      # Create a new evaluator with cross-validation method.
+      #
+      # @param estimator [Classifier] The classifier of which performance is evaluated.
+      # @param splitter [Splitter] The splitter that divides dataset to training and testing dataset.
+      # @param return_train_score [Boolean] The flag indicating whether to calculate the score of training dataset.
+      def initialize(estimator: nil, splitter: nil, return_train_score: false)
+        @estimator = estimator
+        @splitter = splitter
+        @return_train_score = return_train_score
+      end
+      # Perform the evalution of given classifier with cross-validation method.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features])
+      #   The dataset to be used to evaluate the classifier.
+      # @param y [Numo::Int32] (shape: [n_samples])
+      #   The labels to be used to evaluate the classifier.
+      # @return [Hash] The report summarizing the results of cross-validation.
+      #   * :fit_time (Array<Float>) The calculation times of fitting the estimator for each split.
+      #   * :test_score (Array<Float>) The scores of testing dataset for each split.
+      #   * :train_score (Array<Float>) The scores of training dataset for each split. This option is nil if
+      #     the return_train_score is false.
+      def perform(x, y)
+        # Initialize the report of cross validation.
+        report = {test_score: [], train_score: nil, fit_time: []}
+        report[:train_score] = [] if @return_train_score
+        # Evaluate the estimator on each split.
+        @splitter.split(x, y).each do |train_ids, test_ids|
+          # Split dataset into training and testing dataset.
+          feature_ids = !kernel_machine? || train_ids
+          train_x = x[train_ids, feature_ids]
+          train_y = y[train_ids]
+          test_x = x[test_ids, feature_ids]
+          test_y = y[test_ids]
+          # Fit the estimator.
+          start_time = Time.now.to_i
+          @estimator.fit(train_x, train_y)
+          # Calculate scores and prepare the report.
+          report[:fit_time].push(Time.now.to_i - start_time)
+          report[:test_score].push(@estimator.score(test_x, test_y))
+          report[:train_score].push(@estimator.score(train_x, train_y)) if @return_train_score
+        end
+        report
+      end
+      private
+      def kernel_machine?
+        class_name = @estimator.class.to_s
+        class_name = @estimator.params[:estimator].class.to_s if class_name.include?('Multiclass')
+        class_name.include?('KernelMachine')
+      end
+    end
+  end
+end

data/lib/svmkit/model_selection/k_fold.rb CHANGED Viewed

@@ -16,7 +16,7 @@ module SVMKit
     class KFold
       include Base::Splitter
-      # Return the proportion of the test set to the dataset.
+      # Return the flag indicating whether to shuffle the dataset.
       # @return [Boolean]
       attr_reader :shuffle

data/lib/svmkit/model_selection/stratified_k_fold.rb CHANGED Viewed

@@ -16,7 +16,7 @@ module SVMKit
     class StratifiedKFold
       include Base::Splitter
-      # Return the proportion of the test set to the dataset.
+      # Return the flag indicating whether to shuffle the dataset.
       # @return [Boolean]
       attr_reader :shuffle
@@ -47,7 +47,7 @@ module SVMKit
       # @return [Array] The set of data indices for constructing the training and testing dataset in each fold.
       def split(x, y) # rubocop:disable Lint/UnusedMethodArgument
         # Check the number of samples in each class.
-        unless y.bincount.to_a.all? { |n_samples| @n_splits.between?(2, n_samples) }
+        unless valid_n_splits?(y)
           raise ArgumentError,
                 'The value of n_splits must be not less than 2 and not more than the number of samples in each class.'
         end
@@ -59,6 +59,10 @@ module SVMKit
       private
+      def valid_n_splits?(y)
+        y.to_a.uniq.map { |label| y.eq(label).where.size }.all? { |n_samples| @n_splits.between?(2, n_samples) }
+      end
       def fold_sets(y, label)
         sample_ids = y.eq(label).where.to_a
         sample_ids.shuffle!(random: @rng) if @shuffle

data/lib/svmkit/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # SVMKit is an experimental library of machine learning in Ruby.
 module SVMKit
   # @!visibility private
-  VERSION = '0.2.2'.freeze
+  VERSION = '0.2.3'.freeze
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: svmkit
 version: !ruby/object:Gem::Version
-  version: 0.2.2
+  version: 0.2.3
 platform: ruby
 authors:
 - yoshoku
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2018-01-13 00:00:00.000000000 Z
+date: 2018-01-15 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: numo-narray
@@ -112,6 +112,7 @@ files:
 - lib/svmkit/kernel_machine/kernel_svc.rb
 - lib/svmkit/linear_model/logistic_regression.rb
 - lib/svmkit/linear_model/svc.rb
+- lib/svmkit/model_selection/cross_validation.rb
 - lib/svmkit/model_selection/k_fold.rb
 - lib/svmkit/model_selection/stratified_k_fold.rb
 - lib/svmkit/multiclass/one_vs_rest_classifier.rb