svmkit 0.1.2 → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c2538f07704c57e4dc90caa85411d0795ac23763
4
- data.tar.gz: 3815cf50ee5978cca24d944c6e7f577216dae0e3
3
+ metadata.gz: 6a6941fcd2f0f465de1d6a3b6f658e7ee0fdc8fb
4
+ data.tar.gz: b9dc50c6fa8e3d7470adf89ffc950b2ae63db4e1
5
5
  SHA512:
6
- metadata.gz: e448dd5f8fddb4a2a805b1dcddb7ab9c53d7c3db3460760b3165940d0ab93ae82ba1b0fec089e7a1d6651154b5f0437f3d4400531cc11017fd16f9e2029e2611
7
- data.tar.gz: 1416d8c3ea1f55abd1fb269bdaf86f80faaa31be298d0ed1349f6b708d05e64545bf0ad4c3865c7ced26057441ff0999dd82ca77eae54209190527b87ba4ec27
6
+ metadata.gz: 4795995b936f2902dc50e19dc30c46fdb2a1b6a732869a311efa791da8ec09305f6ea6dbfd9aab7da8c934465c8eebe9c45dcaab57090b09f0cc20c1ccacff77
7
+ data.tar.gz: b8afdc306dba4a10922e63756bb6d18731e54a4a5f04293a478b7c897b6a58622c9f88eb6bdb47837fa7114ab80d879e6e1ea3e993a5937f291d69e5d72f1699
data/HISTORY.md CHANGED
@@ -1,3 +1,7 @@
1
+ # 0.1.3
2
+ - Added class for Kernel Support Vector Machine with Pegasos algorithm.
3
+ - Added module for calculating pairwise kernel fuctions and euclidean distances.
4
+
1
5
  # 0.1.2
2
6
  - Added the function learning a model with bias term to the PegasosSVC and LogisticRegression classes.
3
7
  - Rewrited the document with yard notation.
data/lib/svmkit.rb CHANGED
@@ -5,12 +5,14 @@ end
5
5
 
6
6
  require 'svmkit/version'
7
7
  require 'svmkit/utils'
8
+ require 'svmkit/pairwise_metric'
8
9
  require 'svmkit/base/base_estimator'
9
10
  require 'svmkit/base/classifier'
10
11
  require 'svmkit/base/transformer'
11
12
  require 'svmkit/kernel_approximation/rbf'
12
13
  require 'svmkit/linear_model/pegasos_svc'
13
14
  require 'svmkit/linear_model/logistic_regression'
15
+ require 'svmkit/kernel_machine/kernel_svc'
14
16
  require 'svmkit/multiclass/one_vs_rest_classifier'
15
17
  require 'svmkit/preprocessing/l2_normalizer'
16
18
  require 'svmkit/preprocessing/min_max_scaler'
@@ -40,9 +40,10 @@ module SVMKit
40
40
  #
41
41
  # @overload new(gamma: 1.0, n_components: 128, random_seed: 1) -> RBF
42
42
  #
43
- # @param gamma [Float] (defaults to: 1.0) The parameter of RBF kernel: exp(-gamma * x^2).
44
- # @param n_components [Integer] (defaults to: 128) The number of dimensions of the RBF kernel feature space.
45
- # @param random_seed [Integer] (defaults to: nil) The seed value using to initialize the random generator.
43
+ # @param params [Hash] The parameters for RBF kernel approximation.
44
+ # @option params [Float] :gamma (1.0) The parameter of RBF kernel: exp(-gamma * x^2).
45
+ # @option params [Integer] :n_components (128) The number of dimensions of the RBF kernel feature space.
46
+ # @option params [Integer] :random_seed (nil) The seed value using to initialize the random generator.
46
47
  def initialize(params = {})
47
48
  self.params = DEFAULT_PARAMS.merge(Hash[params.map { |k, v| [k.to_sym, v] }])
48
49
  self.params[:random_seed] ||= srand
@@ -0,0 +1,128 @@
1
+ require 'svmkit/base/base_estimator'
2
+ require 'svmkit/base/classifier'
3
+
4
+ module SVMKit
5
+ # This module consists of the classes that implement generalized linear models.
6
+ module KernelMachine
7
+ # KernelSVC is a class that implements (Nonlinear) Kernel Support Vector Classifier with the Pegasos algorithm.
8
+ #
9
+ # @example
10
+ # training_kernel_matrix = SVMKit::PairwiseMetric::rbf_kernel(training_samples)
11
+ # estimator =
12
+ # SVMKit::KernelMachine::KernelSVC.new(reg_param: 1.0, max_iter: 1000, random_seed: 1)
13
+ # estimator.fit(training_kernel_matrix, traininig_labels)
14
+ # testing_kernel_matrix = SVMKit::PairwiseMetric::rbf_kernel(testing_samples, training_samples)
15
+ # results = estimator.predict(testing_kernel_matrix)
16
+ #
17
+ # *Reference*
18
+ # 1. S. Shalev-Shwartz, Y. Singer, N. Srebro, and A. Cotter, "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Mathematical Programming, vol. 127 (1), pp. 3--30, 2011.
19
+ class KernelSVC
20
+ include Base::BaseEstimator
21
+ include Base::Classifier
22
+
23
+ # @!visibility private
24
+ DEFAULT_PARAMS = {
25
+ reg_param: 1.0,
26
+ max_iter: 1000,
27
+ random_seed: nil
28
+ }.freeze
29
+
30
+ # Return the weight vector for Kernel SVC.
31
+ # @return [NMatrix] (shape: [1, n_trainig_sample])
32
+ attr_reader :weight_vec
33
+
34
+ # Return the random generator for performing random sampling in the Pegasos algorithm.
35
+ # @return [Random]
36
+ attr_reader :rng
37
+
38
+ # Create a new classifier with Kernel Support Vector Machine by the Pegasos algorithm.
39
+ #
40
+ # @overload new(reg_param: 1.0, max_iter: 1000, random_seed: 1) -> KernelSVC
41
+ #
42
+ # @param params [Hash] The parameters for Kernel SVC.
43
+ # @option params [Float] :reg_param (1.0) The regularization parameter.
44
+ # @option params [Integer] :max_iter (1000) The maximum number of iterations.
45
+ # @option params [Integer] :random_seed (nil) The seed value using to initialize the random generator.
46
+ def initialize(params = {})
47
+ self.params = DEFAULT_PARAMS.merge(Hash[params.map { |k, v| [k.to_sym, v] }])
48
+ self.params[:random_seed] ||= srand
49
+ @weight_vec = nil
50
+ @rng = Random.new(self.params[:random_seed])
51
+ end
52
+
53
+ # Fit the model with given training data.
54
+ #
55
+ # @param x [NMatrix] (shape: [n_training_samples, n_training_samples])
56
+ # The kernel matrix of the training data to be used for fitting the model.
57
+ # @param y [NMatrix] (shape: [1, n_training_samples]) The labels to be used for fitting the model.
58
+ # @return [KernelSVC] The learned classifier itself.
59
+ def fit(x, y)
60
+ # Generate binary labels
61
+ negative_label = y.uniq.sort.shift
62
+ bin_y = y.to_flat_a.map { |l| l != negative_label ? 1 : -1 }
63
+ # Initialize some variables.
64
+ n_training_samples = x.shape[0]
65
+ rand_ids = []
66
+ weight_vec = NMatrix.zeros([1, n_training_samples])
67
+ # Start optimization.
68
+ params[:max_iter].times do |t|
69
+ # random sampling
70
+ rand_ids = [*0...n_training_samples].shuffle(random: @rng) if rand_ids.empty?
71
+ target_id = rand_ids.shift
72
+ # update the weight vector
73
+ func = (weight_vec * bin_y[target_id]).dot(x.row(target_id).transpose).to_f
74
+ func *= bin_y[target_id] / (params[:reg_param] * (t + 1))
75
+ weight_vec[target_id] += 1.0 if func < 1.0
76
+ end
77
+ # Store the learned model.
78
+ @weight_vec = weight_vec * NMatrix.new([1, n_training_samples], bin_y)
79
+ self
80
+ end
81
+
82
+ # Calculate confidence scores for samples.
83
+ #
84
+ # @param x [NMatrix] (shape: [n_testing_samples, n_training_samples])
85
+ # The kernel matrix between testing samples and training samples to compute the scores.
86
+ # @return [NMatrix] (shape: [1, n_testing_samples]) Confidence score per sample.
87
+ def decision_function(x)
88
+ @weight_vec.dot(x.transpose)
89
+ end
90
+
91
+ # Predict class labels for samples.
92
+ #
93
+ # @param x [NMatrix] (shape: [n_testing_samples, n_training_samples])
94
+ # The kernel matrix between testing samples and training samples to predict the labels.
95
+ # @return [NMatrix] (shape: [1, n_testing_samples]) Predicted class label per sample.
96
+ def predict(x)
97
+ decision_function(x).map { |v| v >= 0 ? 1 : -1 }
98
+ end
99
+
100
+ # Claculate the mean accuracy of the given testing data.
101
+ #
102
+ # @param x [NMatrix] (shape: [n_testing_samples, n_training_samples])
103
+ # The kernel matrix between testing samples and training samples.
104
+ # @param y [NMatrix] (shape: [1, n_testing_samples]) True labels for testing data.
105
+ # @return [Float] Mean accuracy
106
+ def score(x, y)
107
+ p = predict(x)
108
+ n_hits = (y.to_flat_a.map.with_index { |l, n| l == p[n] ? 1 : 0 }).inject(:+)
109
+ n_hits / y.size.to_f
110
+ end
111
+
112
+ # Dump marshal data.
113
+ # @return [Hash] The marshal data about KernelSVC.
114
+ def marshal_dump
115
+ { params: params, weight_vec: Utils.dump_nmatrix(@weight_vec), rng: @rng }
116
+ end
117
+
118
+ # Load marshal data.
119
+ # @return [nil]
120
+ def marshal_load(obj)
121
+ self.params = obj[:params]
122
+ @weight_vec = Utils.restore_nmatrix(obj[:weight_vec])
123
+ @rng = obj[:rng]
124
+ nil
125
+ end
126
+ end
127
+ end
128
+ end
@@ -46,13 +46,14 @@ module SVMKit
46
46
  #
47
47
  # @overload new(reg_param: 1.0, max_iter: 100, batch_size: 50, random_seed: 1) -> LogisiticRegression
48
48
  #
49
- # @param reg_param [Float] (defaults to: 1.0) The regularization parameter.
50
- # @param fit_bias [Boolean] (defaults to: false) The flag indicating whether to fit the bias term.
51
- # @param bias_scale [Float] (defaults to: 1.0) The scale of the bias term.
49
+ # @param params [Hash] The parameters for Logistic Regression.
50
+ # @option params [Float] :reg_param (1.0) The regularization parameter.
51
+ # @option params [Boolean] :fit_bias (false) The flag indicating whether to fit the bias term.
52
+ # @option params [Float] :bias_scale (1.0) The scale of the bias term.
52
53
  # If fit_bias is true, the feature vector v becoms [v; bias_scale].
53
- # @param max_iter [Integer] (defaults to: 100) The maximum number of iterations.
54
- # @param batch_size [Integer] (defaults to: 50) The size of the mini batches.
55
- # @param random_seed [Integer] (defaults to: nil) The seed value using to initialize the random generator.
54
+ # @option params [Integer] :max_iter (100) The maximum number of iterations.
55
+ # @option params [Integer] :batch_size (50) The size of the mini batches.
56
+ # @option params [Integer] :random_seed (nil) The seed value using to initialize the random generator.
56
57
  def initialize(params = {})
57
58
  self.params = DEFAULT_PARAMS.merge(Hash[params.map { |k, v| [k.to_sym, v] }])
58
59
  self.params[:random_seed] ||= srand
@@ -44,12 +44,13 @@ module SVMKit
44
44
  #
45
45
  # @overload new(reg_param: 1.0, max_iter: 100, batch_size: 50, random_seed: 1) -> PegasosSVC
46
46
  #
47
- # @param reg_param [Float] (defaults to: 1.0) The regularization parameter.
48
- # @param fit_bias [Boolean] (defaults to: false) The flag indicating whether to fit the bias term.
49
- # @param bias_scale [Float] (defaults to: 1.0) The scale of the bias term.
50
- # @param max_iter [Integer] (defaults to: 100) The maximum number of iterations.
51
- # @param batch_size [Integer] (defaults to: 50) The size of the mini batches.
52
- # @param random_seed [Integer] (defaults to: nil) The seed value using to initialize the random generator.
47
+ # @param params [Hash] The parameters for SVC.
48
+ # @option params [Float] :reg_param (1.0) The regularization parameter.
49
+ # @option params [Boolean] :fit_bias (false) The flag indicating whether to fit the bias term.
50
+ # @option params [Float] :bias_scale (1.0) The scale of the bias term.
51
+ # @option params [Integer] :max_iter (100) The maximum number of iterations.
52
+ # @option params [Integer] :batch_size (50) The size of the mini batches.
53
+ # @option params [Integer] :random_seed (nil) The seed value using to initialize the random generator.
53
54
  def initialize(params = {})
54
55
  self.params = DEFAULT_PARAMS.merge(Hash[params.map { |k, v| [k.to_sym, v] }])
55
56
  self.params[:random_seed] ||= srand
@@ -32,8 +32,9 @@ module SVMKit
32
32
  # Create a new multi-label classifier with the one-vs-rest startegy.
33
33
  #
34
34
  # @overload new(estimator: base_estimator) -> OneVsRestClassifier
35
- # @param estimator [Classifier] (defaults to: nil)
36
- # The (binary) classifier for construction a multi-label classifier.
35
+ #
36
+ # @param params [Hash] The parameters for OneVsRestClassifier.
37
+ # @option params [Classifier] :estimator (nil) The (binary) classifier for construction a multi-label classifier.
37
38
  def initialize(params = {})
38
39
  self.params = DEFAULT_PARAMS.merge(Hash[params.map { |k, v| [k.to_sym, v] }])
39
40
  @estimators = nil
@@ -0,0 +1,72 @@
1
+ module SVMKit
2
+ # Module for calculating pairwise distances, similarities, and kernels.
3
+ module PairwiseMetric
4
+ class << self
5
+ # Calculate the pairwise euclidean distances between x and y.
6
+ #
7
+ # @param x [NMatrix] (shape: [n_samples_x, n_features])
8
+ # @param y [NMatrix] (shape: [n_samples_y, n_features])
9
+ # @return [NMatrix] (shape: [n_samples_x, n_samples_x] or [n_samples_x, n_samples_y] if y is given)
10
+ def euclidean_distance(x, y = nil)
11
+ y = x if y.nil?
12
+ sum_x_vec = (x**2).sum(1)
13
+ sum_y_vec = (y**2).sum(1)
14
+ dot_xy_mat = x.dot(y.transpose)
15
+ distance_matrix = dot_xy_mat * -2.0 +
16
+ sum_x_vec.repeat(y.shape[0], 1) +
17
+ sum_y_vec.transpose.repeat(x.shape[0], 0)
18
+ distance_matrix.abs.sqrt
19
+ end
20
+
21
+ # Calculate the rbf kernel between x and y.
22
+ #
23
+ # @param x [NMatrix] (shape: [n_samples_x, n_features])
24
+ # @param y [NMatrix] (shape: [n_samples_y, n_features])
25
+ # @param gamma [Float] The parameter of rbf kernel, if nil it is 1 / n_features.
26
+ # @return [NMatrix] (shape: [n_samples_x, n_samples_x] or [n_samples_x, n_samples_y] if y is given)
27
+ def rbf_kernel(x, y = nil, gamma = nil)
28
+ y = x if y.nil?
29
+ gamma ||= 1.0 / x.shape[1]
30
+ distance_matrix = euclidean_distance(x, y)
31
+ ((distance_matrix**2) * -gamma).exp
32
+ end
33
+
34
+ # Calculate the linear kernel between x and y.
35
+ #
36
+ # @param x [NMatrix] (shape: [n_samples_x, n_features])
37
+ # @param y [NMatrix] (shape: [n_samples_y, n_features])
38
+ # @return [NMatrix] (shape: [n_samples_x, n_samples_x] or [n_samples_x, n_samples_y] if y is given)
39
+ def linear_kernel(x, y = nil)
40
+ y = x if y.nil?
41
+ x.dot(y.transpose)
42
+ end
43
+
44
+ # Calculate the polynomial kernel between x and y.
45
+ #
46
+ # @param x [NMatrix] (shape: [n_samples_x, n_features])
47
+ # @param y [NMatrix] (shape: [n_samples_y, n_features])
48
+ # @param degree [Integer] The parameter of polynomial kernel.
49
+ # @param gamma [Float] The parameter of polynomial kernel, if nil it is 1 / n_features.
50
+ # @param coef [Integer] The parameter of polynomial kernel.
51
+ # @return [NMatrix] (shape: [n_samples_x, n_samples_x] or [n_samples_x, n_samples_y] if y is given)
52
+ def polynomial_kernel(x, y = nil, degree = 3, gamma = nil, coef = 1)
53
+ y = x if y.nil?
54
+ gamma ||= 1.0 / x.shape[1]
55
+ (x.dot(y.transpose) * gamma + coef)**degree
56
+ end
57
+
58
+ # Calculate the sigmoid kernel between x and y.
59
+ #
60
+ # @param x [NMatrix] (shape: [n_samples_x, n_features])
61
+ # @param y [NMatrix] (shape: [n_samples_y, n_features])
62
+ # @param gamma [Float] The parameter of polynomial kernel, if nil it is 1 / n_features.
63
+ # @param coef [Integer] The parameter of polynomial kernel.
64
+ # @return [NMatrix] (shape: [n_samples_x, n_samples_x] or [n_samples_x, n_samples_y] if y is given)
65
+ def sigmoid_kernel(x, y = nil, gamma = nil, coef = 1)
66
+ y = x if y.nil?
67
+ gamma ||= 1.0 / x.shape[1]
68
+ (x.dot(y.transpose) * gamma + coef).tanh
69
+ end
70
+ end
71
+ end
72
+ end
@@ -31,7 +31,8 @@ module SVMKit
31
31
  #
32
32
  # @overload new(feature_range: [0.0, 1.0]) -> MinMaxScaler
33
33
  #
34
- # @param feature_range [Array] (defaults to: [0.0, 1.0]) The desired range of samples.
34
+ # @param params [Hash] The parameters for MinMaxScaler.
35
+ # @option params [Array<Float>] :feature_range ([0.0, 1.0]) The desired range of samples.
35
36
  def initialize(params = {})
36
37
  @params = DEFAULT_PARAMS.merge(Hash[params.map { |k, v| [k.to_sym, v] }])
37
38
  @min_vec = nil
@@ -1,5 +1,5 @@
1
1
  # SVMKit is an experimental library of machine learning in Ruby.
2
2
  module SVMKit
3
3
  # @!visibility private
4
- VERSION = '0.1.2'.freeze
4
+ VERSION = '0.1.3'.freeze
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: svmkit
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-10-08 00:00:00.000000000 Z
11
+ date: 2017-10-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -93,9 +93,11 @@ files:
93
93
  - lib/svmkit/base/classifier.rb
94
94
  - lib/svmkit/base/transformer.rb
95
95
  - lib/svmkit/kernel_approximation/rbf.rb
96
+ - lib/svmkit/kernel_machine/kernel_svc.rb
96
97
  - lib/svmkit/linear_model/logistic_regression.rb
97
98
  - lib/svmkit/linear_model/pegasos_svc.rb
98
99
  - lib/svmkit/multiclass/one_vs_rest_classifier.rb
100
+ - lib/svmkit/pairwise_metric.rb
99
101
  - lib/svmkit/preprocessing/l2_normalizer.rb
100
102
  - lib/svmkit/preprocessing/min_max_scaler.rb
101
103
  - lib/svmkit/preprocessing/standard_scaler.rb