svmkit 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c2538f07704c57e4dc90caa85411d0795ac23763
4
- data.tar.gz: 3815cf50ee5978cca24d944c6e7f577216dae0e3
3
+ metadata.gz: 6a6941fcd2f0f465de1d6a3b6f658e7ee0fdc8fb
4
+ data.tar.gz: b9dc50c6fa8e3d7470adf89ffc950b2ae63db4e1
5
5
  SHA512:
6
- metadata.gz: e448dd5f8fddb4a2a805b1dcddb7ab9c53d7c3db3460760b3165940d0ab93ae82ba1b0fec089e7a1d6651154b5f0437f3d4400531cc11017fd16f9e2029e2611
7
- data.tar.gz: 1416d8c3ea1f55abd1fb269bdaf86f80faaa31be298d0ed1349f6b708d05e64545bf0ad4c3865c7ced26057441ff0999dd82ca77eae54209190527b87ba4ec27
6
+ metadata.gz: 4795995b936f2902dc50e19dc30c46fdb2a1b6a732869a311efa791da8ec09305f6ea6dbfd9aab7da8c934465c8eebe9c45dcaab57090b09f0cc20c1ccacff77
7
+ data.tar.gz: b8afdc306dba4a10922e63756bb6d18731e54a4a5f04293a478b7c897b6a58622c9f88eb6bdb47837fa7114ab80d879e6e1ea3e993a5937f291d69e5d72f1699
data/HISTORY.md CHANGED
@@ -1,3 +1,7 @@
1
+ # 0.1.3
2
+ - Added class for Kernel Support Vector Machine with Pegasos algorithm.
3
+ - Added module for calculating pairwise kernel fuctions and euclidean distances.
4
+
1
5
  # 0.1.2
2
6
  - Added the function learning a model with bias term to the PegasosSVC and LogisticRegression classes.
3
7
  - Rewrited the document with yard notation.
data/lib/svmkit.rb CHANGED
@@ -5,12 +5,14 @@ end
5
5
 
6
6
  require 'svmkit/version'
7
7
  require 'svmkit/utils'
8
+ require 'svmkit/pairwise_metric'
8
9
  require 'svmkit/base/base_estimator'
9
10
  require 'svmkit/base/classifier'
10
11
  require 'svmkit/base/transformer'
11
12
  require 'svmkit/kernel_approximation/rbf'
12
13
  require 'svmkit/linear_model/pegasos_svc'
13
14
  require 'svmkit/linear_model/logistic_regression'
15
+ require 'svmkit/kernel_machine/kernel_svc'
14
16
  require 'svmkit/multiclass/one_vs_rest_classifier'
15
17
  require 'svmkit/preprocessing/l2_normalizer'
16
18
  require 'svmkit/preprocessing/min_max_scaler'
@@ -40,9 +40,10 @@ module SVMKit
40
40
  #
41
41
  # @overload new(gamma: 1.0, n_components: 128, random_seed: 1) -> RBF
42
42
  #
43
- # @param gamma [Float] (defaults to: 1.0) The parameter of RBF kernel: exp(-gamma * x^2).
44
- # @param n_components [Integer] (defaults to: 128) The number of dimensions of the RBF kernel feature space.
45
- # @param random_seed [Integer] (defaults to: nil) The seed value using to initialize the random generator.
43
+ # @param params [Hash] The parameters for RBF kernel approximation.
44
+ # @option params [Float] :gamma (1.0) The parameter of RBF kernel: exp(-gamma * x^2).
45
+ # @option params [Integer] :n_components (128) The number of dimensions of the RBF kernel feature space.
46
+ # @option params [Integer] :random_seed (nil) The seed value using to initialize the random generator.
46
47
  def initialize(params = {})
47
48
  self.params = DEFAULT_PARAMS.merge(Hash[params.map { |k, v| [k.to_sym, v] }])
48
49
  self.params[:random_seed] ||= srand
@@ -0,0 +1,128 @@
1
+ require 'svmkit/base/base_estimator'
2
+ require 'svmkit/base/classifier'
3
+
4
+ module SVMKit
5
+ # This module consists of the classes that implement generalized linear models.
6
+ module KernelMachine
7
+ # KernelSVC is a class that implements (Nonlinear) Kernel Support Vector Classifier with the Pegasos algorithm.
8
+ #
9
+ # @example
10
+ # training_kernel_matrix = SVMKit::PairwiseMetric::rbf_kernel(training_samples)
11
+ # estimator =
12
+ # SVMKit::KernelMachine::KernelSVC.new(reg_param: 1.0, max_iter: 1000, random_seed: 1)
13
+ # estimator.fit(training_kernel_matrix, traininig_labels)
14
+ # testing_kernel_matrix = SVMKit::PairwiseMetric::rbf_kernel(testing_samples, training_samples)
15
+ # results = estimator.predict(testing_kernel_matrix)
16
+ #
17
+ # *Reference*
18
+ # 1. S. Shalev-Shwartz, Y. Singer, N. Srebro, and A. Cotter, "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Mathematical Programming, vol. 127 (1), pp. 3--30, 2011.
19
+ class KernelSVC
20
+ include Base::BaseEstimator
21
+ include Base::Classifier
22
+
23
+ # @!visibility private
24
+ DEFAULT_PARAMS = {
25
+ reg_param: 1.0,
26
+ max_iter: 1000,
27
+ random_seed: nil
28
+ }.freeze
29
+
30
+ # Return the weight vector for Kernel SVC.
31
+ # @return [NMatrix] (shape: [1, n_trainig_sample])
32
+ attr_reader :weight_vec
33
+
34
+ # Return the random generator for performing random sampling in the Pegasos algorithm.
35
+ # @return [Random]
36
+ attr_reader :rng
37
+
38
+ # Create a new classifier with Kernel Support Vector Machine by the Pegasos algorithm.
39
+ #
40
+ # @overload new(reg_param: 1.0, max_iter: 1000, random_seed: 1) -> KernelSVC
41
+ #
42
+ # @param params [Hash] The parameters for Kernel SVC.
43
+ # @option params [Float] :reg_param (1.0) The regularization parameter.
44
+ # @option params [Integer] :max_iter (1000) The maximum number of iterations.
45
+ # @option params [Integer] :random_seed (nil) The seed value using to initialize the random generator.
46
+ def initialize(params = {})
47
+ self.params = DEFAULT_PARAMS.merge(Hash[params.map { |k, v| [k.to_sym, v] }])
48
+ self.params[:random_seed] ||= srand
49
+ @weight_vec = nil
50
+ @rng = Random.new(self.params[:random_seed])
51
+ end
52
+
53
+ # Fit the model with given training data.
54
+ #
55
+ # @param x [NMatrix] (shape: [n_training_samples, n_training_samples])
56
+ # The kernel matrix of the training data to be used for fitting the model.
57
+ # @param y [NMatrix] (shape: [1, n_training_samples]) The labels to be used for fitting the model.
58
+ # @return [KernelSVC] The learned classifier itself.
59
+ def fit(x, y)
60
+ # Generate binary labels
61
+ negative_label = y.uniq.sort.shift
62
+ bin_y = y.to_flat_a.map { |l| l != negative_label ? 1 : -1 }
63
+ # Initialize some variables.
64
+ n_training_samples = x.shape[0]
65
+ rand_ids = []
66
+ weight_vec = NMatrix.zeros([1, n_training_samples])
67
+ # Start optimization.
68
+ params[:max_iter].times do |t|
69
+ # random sampling
70
+ rand_ids = [*0...n_training_samples].shuffle(random: @rng) if rand_ids.empty?
71
+ target_id = rand_ids.shift
72
+ # update the weight vector
73
+ func = (weight_vec * bin_y[target_id]).dot(x.row(target_id).transpose).to_f
74
+ func *= bin_y[target_id] / (params[:reg_param] * (t + 1))
75
+ weight_vec[target_id] += 1.0 if func < 1.0
76
+ end
77
+ # Store the learned model.
78
+ @weight_vec = weight_vec * NMatrix.new([1, n_training_samples], bin_y)
79
+ self
80
+ end
81
+
82
+ # Calculate confidence scores for samples.
83
+ #
84
+ # @param x [NMatrix] (shape: [n_testing_samples, n_training_samples])
85
+ # The kernel matrix between testing samples and training samples to compute the scores.
86
+ # @return [NMatrix] (shape: [1, n_testing_samples]) Confidence score per sample.
87
+ def decision_function(x)
88
+ @weight_vec.dot(x.transpose)
89
+ end
90
+
91
+ # Predict class labels for samples.
92
+ #
93
+ # @param x [NMatrix] (shape: [n_testing_samples, n_training_samples])
94
+ # The kernel matrix between testing samples and training samples to predict the labels.
95
+ # @return [NMatrix] (shape: [1, n_testing_samples]) Predicted class label per sample.
96
+ def predict(x)
97
+ decision_function(x).map { |v| v >= 0 ? 1 : -1 }
98
+ end
99
+
100
+ # Claculate the mean accuracy of the given testing data.
101
+ #
102
+ # @param x [NMatrix] (shape: [n_testing_samples, n_training_samples])
103
+ # The kernel matrix between testing samples and training samples.
104
+ # @param y [NMatrix] (shape: [1, n_testing_samples]) True labels for testing data.
105
+ # @return [Float] Mean accuracy
106
+ def score(x, y)
107
+ p = predict(x)
108
+ n_hits = (y.to_flat_a.map.with_index { |l, n| l == p[n] ? 1 : 0 }).inject(:+)
109
+ n_hits / y.size.to_f
110
+ end
111
+
112
+ # Dump marshal data.
113
+ # @return [Hash] The marshal data about KernelSVC.
114
+ def marshal_dump
115
+ { params: params, weight_vec: Utils.dump_nmatrix(@weight_vec), rng: @rng }
116
+ end
117
+
118
+ # Load marshal data.
119
+ # @return [nil]
120
+ def marshal_load(obj)
121
+ self.params = obj[:params]
122
+ @weight_vec = Utils.restore_nmatrix(obj[:weight_vec])
123
+ @rng = obj[:rng]
124
+ nil
125
+ end
126
+ end
127
+ end
128
+ end
@@ -46,13 +46,14 @@ module SVMKit
46
46
  #
47
47
  # @overload new(reg_param: 1.0, max_iter: 100, batch_size: 50, random_seed: 1) -> LogisiticRegression
48
48
  #
49
- # @param reg_param [Float] (defaults to: 1.0) The regularization parameter.
50
- # @param fit_bias [Boolean] (defaults to: false) The flag indicating whether to fit the bias term.
51
- # @param bias_scale [Float] (defaults to: 1.0) The scale of the bias term.
49
+ # @param params [Hash] The parameters for Logistic Regression.
50
+ # @option params [Float] :reg_param (1.0) The regularization parameter.
51
+ # @option params [Boolean] :fit_bias (false) The flag indicating whether to fit the bias term.
52
+ # @option params [Float] :bias_scale (1.0) The scale of the bias term.
52
53
  # If fit_bias is true, the feature vector v becoms [v; bias_scale].
53
- # @param max_iter [Integer] (defaults to: 100) The maximum number of iterations.
54
- # @param batch_size [Integer] (defaults to: 50) The size of the mini batches.
55
- # @param random_seed [Integer] (defaults to: nil) The seed value using to initialize the random generator.
54
+ # @option params [Integer] :max_iter (100) The maximum number of iterations.
55
+ # @option params [Integer] :batch_size (50) The size of the mini batches.
56
+ # @option params [Integer] :random_seed (nil) The seed value using to initialize the random generator.
56
57
  def initialize(params = {})
57
58
  self.params = DEFAULT_PARAMS.merge(Hash[params.map { |k, v| [k.to_sym, v] }])
58
59
  self.params[:random_seed] ||= srand
@@ -44,12 +44,13 @@ module SVMKit
44
44
  #
45
45
  # @overload new(reg_param: 1.0, max_iter: 100, batch_size: 50, random_seed: 1) -> PegasosSVC
46
46
  #
47
- # @param reg_param [Float] (defaults to: 1.0) The regularization parameter.
48
- # @param fit_bias [Boolean] (defaults to: false) The flag indicating whether to fit the bias term.
49
- # @param bias_scale [Float] (defaults to: 1.0) The scale of the bias term.
50
- # @param max_iter [Integer] (defaults to: 100) The maximum number of iterations.
51
- # @param batch_size [Integer] (defaults to: 50) The size of the mini batches.
52
- # @param random_seed [Integer] (defaults to: nil) The seed value using to initialize the random generator.
47
+ # @param params [Hash] The parameters for SVC.
48
+ # @option params [Float] :reg_param (1.0) The regularization parameter.
49
+ # @option params [Boolean] :fit_bias (false) The flag indicating whether to fit the bias term.
50
+ # @option params [Float] :bias_scale (1.0) The scale of the bias term.
51
+ # @option params [Integer] :max_iter (100) The maximum number of iterations.
52
+ # @option params [Integer] :batch_size (50) The size of the mini batches.
53
+ # @option params [Integer] :random_seed (nil) The seed value using to initialize the random generator.
53
54
  def initialize(params = {})
54
55
  self.params = DEFAULT_PARAMS.merge(Hash[params.map { |k, v| [k.to_sym, v] }])
55
56
  self.params[:random_seed] ||= srand
@@ -32,8 +32,9 @@ module SVMKit
32
32
  # Create a new multi-label classifier with the one-vs-rest startegy.
33
33
  #
34
34
  # @overload new(estimator: base_estimator) -> OneVsRestClassifier
35
- # @param estimator [Classifier] (defaults to: nil)
36
- # The (binary) classifier for construction a multi-label classifier.
35
+ #
36
+ # @param params [Hash] The parameters for OneVsRestClassifier.
37
+ # @option params [Classifier] :estimator (nil) The (binary) classifier for construction a multi-label classifier.
37
38
  def initialize(params = {})
38
39
  self.params = DEFAULT_PARAMS.merge(Hash[params.map { |k, v| [k.to_sym, v] }])
39
40
  @estimators = nil
@@ -0,0 +1,72 @@
1
+ module SVMKit
2
+ # Module for calculating pairwise distances, similarities, and kernels.
3
+ module PairwiseMetric
4
+ class << self
5
+ # Calculate the pairwise euclidean distances between x and y.
6
+ #
7
+ # @param x [NMatrix] (shape: [n_samples_x, n_features])
8
+ # @param y [NMatrix] (shape: [n_samples_y, n_features])
9
+ # @return [NMatrix] (shape: [n_samples_x, n_samples_x] or [n_samples_x, n_samples_y] if y is given)
10
+ def euclidean_distance(x, y = nil)
11
+ y = x if y.nil?
12
+ sum_x_vec = (x**2).sum(1)
13
+ sum_y_vec = (y**2).sum(1)
14
+ dot_xy_mat = x.dot(y.transpose)
15
+ distance_matrix = dot_xy_mat * -2.0 +
16
+ sum_x_vec.repeat(y.shape[0], 1) +
17
+ sum_y_vec.transpose.repeat(x.shape[0], 0)
18
+ distance_matrix.abs.sqrt
19
+ end
20
+
21
+ # Calculate the rbf kernel between x and y.
22
+ #
23
+ # @param x [NMatrix] (shape: [n_samples_x, n_features])
24
+ # @param y [NMatrix] (shape: [n_samples_y, n_features])
25
+ # @param gamma [Float] The parameter of rbf kernel, if nil it is 1 / n_features.
26
+ # @return [NMatrix] (shape: [n_samples_x, n_samples_x] or [n_samples_x, n_samples_y] if y is given)
27
+ def rbf_kernel(x, y = nil, gamma = nil)
28
+ y = x if y.nil?
29
+ gamma ||= 1.0 / x.shape[1]
30
+ distance_matrix = euclidean_distance(x, y)
31
+ ((distance_matrix**2) * -gamma).exp
32
+ end
33
+
34
+ # Calculate the linear kernel between x and y.
35
+ #
36
+ # @param x [NMatrix] (shape: [n_samples_x, n_features])
37
+ # @param y [NMatrix] (shape: [n_samples_y, n_features])
38
+ # @return [NMatrix] (shape: [n_samples_x, n_samples_x] or [n_samples_x, n_samples_y] if y is given)
39
+ def linear_kernel(x, y = nil)
40
+ y = x if y.nil?
41
+ x.dot(y.transpose)
42
+ end
43
+
44
+ # Calculate the polynomial kernel between x and y.
45
+ #
46
+ # @param x [NMatrix] (shape: [n_samples_x, n_features])
47
+ # @param y [NMatrix] (shape: [n_samples_y, n_features])
48
+ # @param degree [Integer] The parameter of polynomial kernel.
49
+ # @param gamma [Float] The parameter of polynomial kernel, if nil it is 1 / n_features.
50
+ # @param coef [Integer] The parameter of polynomial kernel.
51
+ # @return [NMatrix] (shape: [n_samples_x, n_samples_x] or [n_samples_x, n_samples_y] if y is given)
52
+ def polynomial_kernel(x, y = nil, degree = 3, gamma = nil, coef = 1)
53
+ y = x if y.nil?
54
+ gamma ||= 1.0 / x.shape[1]
55
+ (x.dot(y.transpose) * gamma + coef)**degree
56
+ end
57
+
58
+ # Calculate the sigmoid kernel between x and y.
59
+ #
60
+ # @param x [NMatrix] (shape: [n_samples_x, n_features])
61
+ # @param y [NMatrix] (shape: [n_samples_y, n_features])
62
+ # @param gamma [Float] The parameter of polynomial kernel, if nil it is 1 / n_features.
63
+ # @param coef [Integer] The parameter of polynomial kernel.
64
+ # @return [NMatrix] (shape: [n_samples_x, n_samples_x] or [n_samples_x, n_samples_y] if y is given)
65
+ def sigmoid_kernel(x, y = nil, gamma = nil, coef = 1)
66
+ y = x if y.nil?
67
+ gamma ||= 1.0 / x.shape[1]
68
+ (x.dot(y.transpose) * gamma + coef).tanh
69
+ end
70
+ end
71
+ end
72
+ end
@@ -31,7 +31,8 @@ module SVMKit
31
31
  #
32
32
  # @overload new(feature_range: [0.0, 1.0]) -> MinMaxScaler
33
33
  #
34
- # @param feature_range [Array] (defaults to: [0.0, 1.0]) The desired range of samples.
34
+ # @param params [Hash] The parameters for MinMaxScaler.
35
+ # @option params [Array<Float>] :feature_range ([0.0, 1.0]) The desired range of samples.
35
36
  def initialize(params = {})
36
37
  @params = DEFAULT_PARAMS.merge(Hash[params.map { |k, v| [k.to_sym, v] }])
37
38
  @min_vec = nil
@@ -1,5 +1,5 @@
1
1
  # SVMKit is an experimental library of machine learning in Ruby.
2
2
  module SVMKit
3
3
  # @!visibility private
4
- VERSION = '0.1.2'.freeze
4
+ VERSION = '0.1.3'.freeze
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: svmkit
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-10-08 00:00:00.000000000 Z
11
+ date: 2017-10-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -93,9 +93,11 @@ files:
93
93
  - lib/svmkit/base/classifier.rb
94
94
  - lib/svmkit/base/transformer.rb
95
95
  - lib/svmkit/kernel_approximation/rbf.rb
96
+ - lib/svmkit/kernel_machine/kernel_svc.rb
96
97
  - lib/svmkit/linear_model/logistic_regression.rb
97
98
  - lib/svmkit/linear_model/pegasos_svc.rb
98
99
  - lib/svmkit/multiclass/one_vs_rest_classifier.rb
100
+ - lib/svmkit/pairwise_metric.rb
99
101
  - lib/svmkit/preprocessing/l2_normalizer.rb
100
102
  - lib/svmkit/preprocessing/min_max_scaler.rb
101
103
  - lib/svmkit/preprocessing/standard_scaler.rb