svmkit 0.1.1 → 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1d3f4f2c398faa6b9e936ec813dac3023d9d1728
4
- data.tar.gz: 6fe4f9b6ac5a656fb1a3f49662997d0ebbdd8d4b
3
+ metadata.gz: c2538f07704c57e4dc90caa85411d0795ac23763
4
+ data.tar.gz: 3815cf50ee5978cca24d944c6e7f577216dae0e3
5
5
  SHA512:
6
- metadata.gz: fb1ac8798124f25cdd4dd0738dd856c8bae7e87aacac260a8ea8b1fb7388e3a966045f2382e48f241292111312eb7cb6cd69035010ee1487645f725f364ee16b
7
- data.tar.gz: 99bdb17d5a2d2825e904ce2e788e31d100e1850b1d9bfc32f7e7cc48ba1b13da59b667d1ed117a6768d852d5c15d3a4c3132994bc13350315c8b07016bcbcd41
6
+ metadata.gz: e448dd5f8fddb4a2a805b1dcddb7ab9c53d7c3db3460760b3165940d0ab93ae82ba1b0fec089e7a1d6651154b5f0437f3d4400531cc11017fd16f9e2029e2611
7
+ data.tar.gz: 1416d8c3ea1f55abd1fb269bdaf86f80faaa31be298d0ed1349f6b708d05e64545bf0ad4c3865c7ced26057441ff0999dd82ca77eae54209190527b87ba4ec27
data/HISTORY.md CHANGED
@@ -1,3 +1,11 @@
1
+ # 0.1.2
2
+ - Added the function learning a model with bias term to the PegasosSVC and LogisticRegression classes.
3
+ - Rewrited the document with yard notation.
4
+
5
+ # 0.1.1
6
+ - Added class for Logistic Regression with SGD optimization.
7
+ - Fixed some mistakes on the document.
8
+
1
9
  # 0.1.0
2
10
  - Added basic classes.
3
11
  - Added an utility module.
@@ -5,4 +13,3 @@
5
13
  - Added class for Support Vector Machine with Pegasos alogrithm.
6
14
  - Added class that performs mutlclass classification with one-vs.-rest strategy.
7
15
  - Added classes for preprocessing such as min-max scaling, standardization, and L2 normalization.
8
-
data/README.md CHANGED
@@ -1,5 +1,9 @@
1
1
  # SVMKit
2
2
 
3
+ [![Build Status](https://travis-ci.org/yoshoku/SVMKit.svg?branch=master)](https://travis-ci.org/yoshoku/SVMKit)
4
+ [![Gem Version](https://badge.fury.io/rb/svmkit.svg)](https://badge.fury.io/rb/svmkit)
5
+ [![BSD 2-Clause License](https://img.shields.io/badge/License-BSD%202--Clause-orange.svg)](https://github.com/yoshoku/SVMKit/blob/master/LICENSE.txt)
6
+
3
7
  SVMKit is a library for machine learninig in Ruby.
4
8
  SVMKit implements machine learning algorithms with an interface similar to Scikit-Learn in Python.
5
9
  However, since SVMKit is an experimental library, there are few machine learning algorithms implemented.
@@ -23,6 +27,7 @@ Or install it yourself as:
23
27
  ## Usage
24
28
 
25
29
  Training phase:
30
+
26
31
  ```ruby
27
32
  require 'svmkit'
28
33
  require 'libsvmloader'
@@ -46,6 +51,7 @@ File.open('trained_classifier.dat', 'wb') { |f| f.write(Marshal.dump(classifier)
46
51
  ```
47
52
 
48
53
  Testing phase:
54
+
49
55
  ```ruby
50
56
  require 'svmkit'
51
57
  require 'libsvmloader'
@@ -4,7 +4,8 @@ module SVMKit
4
4
  module Base
5
5
  # Base module for all estimators in SVMKit.
6
6
  module BaseEstimator
7
- # Parameters for this estimator.
7
+ # Return parameters about an estimator.
8
+ # @return [Hash]
8
9
  attr_accessor :params
9
10
  end
10
11
  end
@@ -6,40 +6,43 @@ module SVMKit
6
6
  module KernelApproximation
7
7
  # Class for RBF kernel feature mapping.
8
8
  #
9
+ # @example
9
10
  # transformer = SVMKit::KernelApproximation::RBF.new(gamma: 1.0, n_coponents: 128, random_seed: 1)
10
11
  # new_training_samples = transformer.fit_transform(training_samples)
11
12
  # new_testing_samples = transformer.transform(testing_samples)
12
13
  #
13
- # * *Refernce*:
14
- # - A. Rahimi and B. Recht, "Random Features for Large-Scale Kernel Machines," Proc. NIPS'07, pp.1177--1184, 2007.
14
+ # *Refernce*:
15
+ # 1. A. Rahimi and B. Recht, "Random Features for Large-Scale Kernel Machines," Proc. NIPS'07, pp.1177--1184, 2007.
15
16
  class RBF
16
17
  include Base::BaseEstimator
17
18
  include Base::Transformer
18
19
 
19
- DEFAULT_PARAMS = { # :nodoc:
20
+ # @!visibility private
21
+ DEFAULT_PARAMS = {
20
22
  gamma: 1.0,
21
23
  n_components: 128,
22
24
  random_seed: nil
23
25
  }.freeze
24
26
 
25
- # The random matrix for transformation.
26
- attr_reader :random_mat # :nodoc:
27
+ # Return the random matrix for transformation.
28
+ # @return [NMatrix] (shape: [n_features, n_components])
29
+ attr_reader :random_mat
27
30
 
28
- # The random vector for transformation.
29
- attr_reader :random_vec # :nodoc:
31
+ # Return the random vector for transformation.
32
+ # @return [NMatrix] (shape: [1, n_components])
33
+ attr_reader :random_vec
30
34
 
31
- # The random generator for transformation.
32
- attr_reader :rng # :nodoc:
35
+ # Return the random generator for transformation.
36
+ # @return [Random]
37
+ attr_reader :rng
33
38
 
34
- # Creates a new transformer for mapping to RBF kernel feature space.
39
+ # Create a new transformer for mapping to RBF kernel feature space.
35
40
  #
36
- # call-seq:
37
- # new(gamma: 1.0, n_components: 128, random_seed: 1) -> RBF
41
+ # @overload new(gamma: 1.0, n_components: 128, random_seed: 1) -> RBF
38
42
  #
39
- # * *Arguments* :
40
- # - +:gamma+ (Float) (defaults to: 1.0) -- The parameter of RBF kernel: exp(-gamma * x^2)
41
- # - +:n_components+ (Integer) (defaults to: 128) -- The number of dimensions of the RBF kernel feature space.
42
- # - +:random_seed+ (Integer) (defaults to: nil) -- The seed value using to initialize the random generator.
43
+ # @param gamma [Float] (defaults to: 1.0) The parameter of RBF kernel: exp(-gamma * x^2).
44
+ # @param n_components [Integer] (defaults to: 128) The number of dimensions of the RBF kernel feature space.
45
+ # @param random_seed [Integer] (defaults to: nil) The seed value using to initialize the random generator.
43
46
  def initialize(params = {})
44
47
  self.params = DEFAULT_PARAMS.merge(Hash[params.map { |k, v| [k.to_sym, v] }])
45
48
  self.params[:random_seed] ||= srand
@@ -50,13 +53,11 @@ module SVMKit
50
53
 
51
54
  # Fit the model with given training data.
52
55
  #
53
- # call-seq:
54
- # fit(x) -> RBF
56
+ # @overload fit(x) -> RBF
55
57
  #
56
- # * *Arguments* :
57
- # - +x+ (NMatrix, shape: [n_samples, n_features]) -- The training data to be used for fitting the model. This method uses only the number of features of the data.
58
- # * *Returns* :
59
- # - The learned transformer itself.
58
+ # @param x [NMatrix] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
59
+ # This method uses only the number of features of the data.
60
+ # @return [RBF] The learned transformer itself.
60
61
  def fit(x, _y = nil)
61
62
  n_features = x.shape[1]
62
63
  params[:n_components] = 2 * n_features if params[:n_components] <= 0
@@ -65,48 +66,43 @@ module SVMKit
65
66
  @random_vec = NMatrix.zeros([1, params[:n_components] - n_half_components]).hconcat(
66
67
  NMatrix.ones([1, n_half_components]) * (0.5 * Math::PI)
67
68
  )
68
- #@random_vec = rand_uniform([1, self.params[:n_components]]) * (2.0 * Math::PI)
69
69
  self
70
70
  end
71
71
 
72
72
  # Fit the model with training data, and then transform them with the learned model.
73
73
  #
74
- # call-seq:
75
- # fit_transform(x) -> NMatrix
74
+ # @overload fit_transform(x) -> NMatrix
76
75
  #
77
- # * *Arguments* :
78
- # - +x+ (NMatrix, shape: [n_samples, n_features]) -- The training data to be used for fitting the model.
79
- # * *Returns* :
80
- # - The transformed data (NMatrix, shape: [n_samples, n_components]).
76
+ # @param x [NMatrix] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
77
+ # @return [NMatrix] (shape: [n_samples, n_components]) The transformed data
81
78
  def fit_transform(x, _y = nil)
82
79
  fit(x).transform(x)
83
80
  end
84
81
 
85
82
  # Transform the given data with the learned model.
86
83
  #
87
- # call-seq:
88
- # transform(x) -> NMatrix
84
+ # @overload transform(x) -> NMatrix
89
85
  #
90
- # * *Arguments* :
91
- # - +x+ (NMatrix, shape: [n_samples, n_features]) -- The data to be transformed with the learned model.
92
- # * *Returns* :
93
- # - The transformed data (NMatrix, shape: [n_samples, n_components]).
86
+ # @param x [NMatrix] (shape: [n_samples, n_features]) The data to be transformed with the learned model.
87
+ # @return [NMatrix] (shape: [n_samples, n_components]) The transformed data.
94
88
  def transform(x)
95
89
  n_samples, = x.shape
96
90
  projection = x.dot(@random_mat) + @random_vec.repeat(n_samples, 0)
97
91
  projection.sin * ((2.0 / params[:n_components])**0.5)
98
92
  end
99
93
 
100
- # Serializes object through Marshal#dump.
101
- def marshal_dump # :nodoc:
94
+ # Dump marshal data.
95
+ # @return [Hash] The marshal data about RBF.
96
+ def marshal_dump
102
97
  { params: params,
103
98
  random_mat: Utils.dump_nmatrix(@random_mat),
104
99
  random_vec: Utils.dump_nmatrix(@random_vec),
105
100
  rng: @rng }
106
101
  end
107
102
 
108
- # Deserialize object through Marshal#load.
109
- def marshal_load(obj) # :nodoc:
103
+ # Load marshal data.
104
+ # @return [nil]
105
+ def marshal_load(obj)
110
106
  self.params = obj[:params]
111
107
  @random_mat = Utils.restore_nmatrix(obj[:random_mat])
112
108
  @random_vec = Utils.restore_nmatrix(obj[:random_vec])
@@ -117,13 +113,13 @@ module SVMKit
117
113
  protected
118
114
 
119
115
  # Generate the uniform random matrix with the given shape.
120
- def rand_uniform(shape) # :nodoc:
116
+ def rand_uniform(shape)
121
117
  rnd_vals = Array.new(NMatrix.size(shape)) { @rng.rand }
122
118
  NMatrix.new(shape, rnd_vals, dtype: :float64, stype: :dense)
123
119
  end
124
120
 
125
121
  # Generate the normal random matrix with the given shape, mean, and standard deviation.
126
- def rand_normal(shape, mu = 0.0, sigma = 1.0) # :nodoc:
122
+ def rand_normal(shape, mu = 0.0, sigma = 1.0)
127
123
  a = rand_uniform(shape)
128
124
  b = rand_uniform(shape)
129
125
  ((a.log * -2.0).sqrt * (b * 2.0 * Math::PI).sin) * sigma + mu
@@ -4,69 +4,80 @@ require 'svmkit/base/classifier'
4
4
  module SVMKit
5
5
  # This module consists of the classes that implement generalized linear models.
6
6
  module LinearModel
7
- # LogisticRegression is a class that implements Logistic Regression with stochastic gradient descent (SGD) optimization.
8
- # Note that the Logistic Regression of SVMKit performs as a binary classifier.
7
+ # LogisticRegression is a class that implements Logistic Regression
8
+ # with stochastic gradient descent (SGD) optimization.
9
+ # Note that the class performs as a binary classifier.
9
10
  #
11
+ # @example
10
12
  # estimator =
11
13
  # SVMKit::LinearModel::LogisticRegression.new(reg_param: 1.0, max_iter: 100, batch_size: 20, random_seed: 1)
12
14
  # estimator.fit(training_samples, traininig_labels)
13
15
  # results = estimator.predict(testing_samples)
14
16
  #
15
- # * *Reference*:
16
- # - S. Shalev-Shwartz, Y. Singer, N. Srebro, and A. Cotter, "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Mathematical Programming, vol. 127 (1), pp. 3--30, 2011.
17
- #
17
+ # *Reference*
18
+ # 1. S. Shalev-Shwartz, Y. Singer, N. Srebro, and A. Cotter, "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Mathematical Programming, vol. 127 (1), pp. 3--30, 2011.
18
19
  class LogisticRegression
19
20
  include Base::BaseEstimator
20
21
  include Base::Classifier
21
22
 
22
- DEFAULT_PARAMS = { # :nodoc:
23
+ # @!visibility private
24
+ DEFAULT_PARAMS = {
23
25
  reg_param: 1.0,
26
+ fit_bias: false,
27
+ bias_scale: 1.0,
24
28
  max_iter: 100,
25
29
  batch_size: 50,
26
30
  random_seed: nil
27
31
  }.freeze
28
32
 
29
- # The weight vector for Logistic Regression.
33
+ # Return the weight vector for Logistic Regression.
34
+ # @return [NMatrix] (shape: [1, n_features])
30
35
  attr_reader :weight_vec
31
36
 
32
- # The random generator for performing random sampling in the SGD optimization.
37
+ # Return the bias term (a.k.a. intercept) for Logistic Regression.
38
+ # @return [Float]
39
+ attr_reader :bias_term
40
+
41
+ # Return the random generator for transformation.
42
+ # @return [Random]
33
43
  attr_reader :rng
34
44
 
35
45
  # Create a new classifier with Logisitc Regression by the SGD optimization.
36
46
  #
37
- # :call-seq:
38
- # new(reg_param: 1.0, max_iter: 100, batch_size: 50, random_seed: 1) -> LogisiticRegression
47
+ # @overload new(reg_param: 1.0, max_iter: 100, batch_size: 50, random_seed: 1) -> LogisiticRegression
39
48
  #
40
- # * *Arguments* :
41
- # - +:reg_param+ (Float) (defaults to: 1.0) -- The regularization parameter.
42
- # - +:max_iter+ (Integer) (defaults to: 100) -- The maximum number of iterations.
43
- # - +:batch_size+ (Integer) (defaults to: 50) -- The size of the mini batches.
44
- # - +:random_seed+ (Integer) (defaults to: nil) -- The seed value using to initialize the random generator.
49
+ # @param reg_param [Float] (defaults to: 1.0) The regularization parameter.
50
+ # @param fit_bias [Boolean] (defaults to: false) The flag indicating whether to fit the bias term.
51
+ # @param bias_scale [Float] (defaults to: 1.0) The scale of the bias term.
52
+ # If fit_bias is true, the feature vector v becoms [v; bias_scale].
53
+ # @param max_iter [Integer] (defaults to: 100) The maximum number of iterations.
54
+ # @param batch_size [Integer] (defaults to: 50) The size of the mini batches.
55
+ # @param random_seed [Integer] (defaults to: nil) The seed value using to initialize the random generator.
45
56
  def initialize(params = {})
46
57
  self.params = DEFAULT_PARAMS.merge(Hash[params.map { |k, v| [k.to_sym, v] }])
47
58
  self.params[:random_seed] ||= srand
48
59
  @weight_vec = nil
60
+ @bias_term = 0.0
49
61
  @rng = Random.new(self.params[:random_seed])
50
62
  end
51
63
 
52
64
  # Fit the model with given training data.
53
65
  #
54
- # :call-seq:
55
- # fit(x, y) -> LogisticRegression
56
- #
57
- # * *Arguments* :
58
- # - +x+ (NMatrix, shape: [n_samples, n_features]) -- The training data to be used for fitting the model.
59
- # - +y+ (NMatrix, shape: [1, n_samples]) -- The categorical variables (e.g. labels) to be used for fitting the model.
60
- # * *Returns* :
61
- # - The learned classifier itself.
66
+ # @param x [NMatrix] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
67
+ # @param y [NMatrix] (shape: [1, n_samples]) The categorical variables (e.g. labels)
68
+ # to be used for fitting the model.
69
+ # @return [LogisticRegression] The learned classifier itself.
62
70
  def fit(x, y)
63
- # Generate binary labels
71
+ # Generate binary labels.
64
72
  negative_label = y.uniq.sort.shift
65
73
  bin_y = y.to_flat_a.map { |l| l != negative_label ? 1 : 0 }
74
+ # Expand feature vectors for bias term.
75
+ samples = x
76
+ samples = samples.hconcat(NMatrix.ones([x.shape[0], 1]) * params[:bias_scale]) if params[:fit_bias]
66
77
  # Initialize some variables.
67
- n_samples, n_features = x.shape
78
+ n_samples, n_features = samples.shape
68
79
  rand_ids = [*0..n_samples - 1].shuffle(random: @rng)
69
- @weight_vec = NMatrix.zeros([1, n_features])
80
+ weight_vec = NMatrix.zeros([1, n_features])
70
81
  # Start optimization.
71
82
  params[:max_iter].times do |t|
72
83
  # random sampling
@@ -76,84 +87,75 @@ module SVMKit
76
87
  eta = 1.0 / (params[:reg_param] * (t + 1))
77
88
  mean_vec = NMatrix.zeros([1, n_features])
78
89
  subset_ids.each do |n|
79
- z = @weight_vec.dot(x.row(n).transpose)[0]
90
+ z = weight_vec.dot(samples.row(n).transpose)[0]
80
91
  coef = bin_y[n] / (1.0 + Math.exp(bin_y[n] * z))
81
- mean_vec += x.row(n) * coef
92
+ mean_vec += samples.row(n) * coef
82
93
  end
83
94
  mean_vec *= eta / params[:batch_size]
84
- @weight_vec = @weight_vec * (1.0 - eta * params[:reg_param]) + mean_vec
95
+ weight_vec = weight_vec * (1.0 - eta * params[:reg_param]) + mean_vec
85
96
  # scale the weight vector.
86
- scaler = (1.0 / params[:reg_param]**0.5) / @weight_vec.norm2
87
- @weight_vec *= [1.0, scaler].min
97
+ scaler = (1.0 / params[:reg_param]**0.5) / weight_vec.norm2
98
+ weight_vec *= [1.0, scaler].min
99
+ end
100
+ # Store the learned model.
101
+ if params[:fit_bias]
102
+ @weight_vec = weight_vec[0...n_features - 1]
103
+ @bias_term = weight_vec[n_features - 1]
104
+ else
105
+ @weight_vec = weight_vec[0...n_features]
106
+ @bias_term = 0.0
88
107
  end
89
108
  self
90
109
  end
91
110
 
92
111
  # Calculate confidence scores for samples.
93
112
  #
94
- # :call-seq:
95
- # decision_function(x) -> NMatrix, shape: [1, n_samples]
96
- #
97
- # * *Arguments* :
98
- # - +x+ (NMatrix, shape: [n_samples, n_features]) -- The samples to compute the scores.
99
- # * *Returns* :
100
- # - Confidence score per sample.
113
+ # @param x [NMatrix] (shape: [n_samples, n_features]) The samples to compute the scores.
114
+ # @return [NMatrix] (shape: [1, n_samples]) Confidence score per sample.
101
115
  def decision_function(x)
102
- w = (@weight_vec.dot(x.transpose) * -1.0).exp + 1.0
116
+ w = ((@weight_vec.dot(x.transpose) + @bias_term) * -1.0).exp + 1.0
103
117
  w.map { |v| 1.0 / v }
104
118
  end
105
119
 
106
120
  # Predict class labels for samples.
107
121
  #
108
- # :call-seq:
109
- # predict(x) -> NMatrix, shape: [1, n_samples]
110
- #
111
- # * *Arguments* :
112
- # - +x+ (NMatrix, shape: [n_samples, n_features]) -- The samples to predict the labels.
113
- # * *Returns* :
114
- # - Predicted class label per sample.
122
+ # @param x [NMatrix] (shape: [n_samples, n_features]) The samples to predict the labels.
123
+ # @return [NMatrix] (shape: [1, n_samples]) Predicted class label per sample.
115
124
  def predict(x)
116
125
  decision_function(x).map { |v| v >= 0.5 ? 1 : -1 }
117
126
  end
118
127
 
119
128
  # Predict probability for samples.
120
129
  #
121
- # :call-seq:
122
- # predict_proba(x) -> NMatrix, shape: [1, n_samples]
123
- #
124
- # * *Arguments* :
125
- # - +x+ (NMatrix, shape: [n_samples, n_features]) -- The samples to predict the probailities.
126
- # * *Returns* :
127
- # - Predicted probability per sample.
130
+ # @param x [NMatrix] (shape: [n_samples, n_features]) The samples to predict the probailities.
131
+ # @return [NMatrix] (shape: [1, n_samples]) Predicted probability per sample.
128
132
  def predict_proba(x)
129
133
  decision_function(x)
130
134
  end
131
135
 
132
136
  # Claculate the mean accuracy of the given testing data.
133
137
  #
134
- # :call-seq:
135
- # score(x, y) -> Float
136
- #
137
- # * *Arguments* :
138
- # - +x+ (NMatrix, shape: [n_samples, n_features]) -- Testing data.
139
- # - +y+ (NMatrix, shape: [1, n_samples]) -- True labels for testing data.
140
- # * *Returns* :
141
- # - Mean accuracy
138
+ # @param x [NMatrix] (shape: [n_samples, n_features]) Testing data.
139
+ # @param y [NMatrix] (shape: [1, n_samples]) True labels for testing data.
140
+ # @return [Float] Mean accuracy
142
141
  def score(x, y)
143
142
  p = predict(x)
144
143
  n_hits = (y.to_flat_a.map.with_index { |l, n| l == p[n] ? 1 : 0 }).inject(:+)
145
144
  n_hits / y.size.to_f
146
145
  end
147
146
 
148
- # Serializes object through Marshal#dump.
149
- def marshal_dump # :nodoc:
150
- { params: params, weight_vec: Utils.dump_nmatrix(@weight_vec), rng: @rng }
147
+ # Dump marshal data.
148
+ # @return [Hash] The marshal data about LogisticRegression.
149
+ def marshal_dump
150
+ { params: params, weight_vec: Utils.dump_nmatrix(@weight_vec), bias_term: @bias_term, rng: @rng }
151
151
  end
152
152
 
153
- # Deserialize object through Marshal#load.
154
- def marshal_load(obj) # :nodoc:
153
+ # Load marshal data.
154
+ # @return [nil]
155
+ def marshal_load(obj)
155
156
  self.params = obj[:params]
156
157
  @weight_vec = Utils.restore_nmatrix(obj[:weight_vec])
158
+ @bias_term = obj[:bias_term]
157
159
  @rng = obj[:rng]
158
160
  nil
159
161
  end
@@ -6,140 +6,144 @@ module SVMKit
6
6
  module LinearModel
7
7
  # PegasosSVC is a class that implements Support Vector Classifier with the Pegasos algorithm.
8
8
  #
9
+ # @example
9
10
  # estimator =
10
11
  # SVMKit::LinearModel::PegasosSVC.new(reg_param: 1.0, max_iter: 100, batch_size: 20, random_seed: 1)
11
12
  # estimator.fit(training_samples, traininig_labels)
12
13
  # results = estimator.predict(testing_samples)
13
14
  #
14
- # * *Reference*:
15
- # - S. Shalev-Shwartz and Y. Singer, "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Proc. ICML'07, pp. 807--814, 2007.
16
- #
15
+ # *Reference*
16
+ # 1. S. Shalev-Shwartz and Y. Singer, "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Proc. ICML'07, pp. 807--814, 2007.
17
17
  class PegasosSVC
18
18
  include Base::BaseEstimator
19
19
  include Base::Classifier
20
20
 
21
- DEFAULT_PARAMS = { # :nodoc:
21
+ # @!visibility private
22
+ DEFAULT_PARAMS = {
22
23
  reg_param: 1.0,
24
+ fit_bias: false,
25
+ bias_scale: 1.0,
23
26
  max_iter: 100,
24
27
  batch_size: 50,
25
28
  random_seed: nil
26
29
  }.freeze
27
30
 
28
- # The weight vector for SVC.
31
+ # Return the weight vector for SVC.
32
+ # @return [NMatrix] (shape: [1, n_features])
29
33
  attr_reader :weight_vec
30
34
 
31
- # The random generator for performing random sampling in the Pegasos algorithm.
35
+ # Return the bias term (a.k.a. intercept) for SVC.
36
+ # @return [Float]
37
+ attr_reader :bias_term
38
+
39
+ # Return the random generator for performing random sampling in the Pegasos algorithm.
40
+ # @return [Random]
32
41
  attr_reader :rng
33
42
 
34
43
  # Create a new classifier with Support Vector Machine by the Pegasos algorithm.
35
44
  #
36
- # :call-seq:
37
- # new(reg_param: 1.0, max_iter: 100, batch_size: 50, random_seed: 1) -> PegasosSVC
45
+ # @overload new(reg_param: 1.0, max_iter: 100, batch_size: 50, random_seed: 1) -> PegasosSVC
38
46
  #
39
- # * *Arguments* :
40
- # - +:reg_param+ (Float) (defaults to: 1.0) -- The regularization parameter.
41
- # - +:max_iter+ (Integer) (defaults to: 100) -- The maximum number of iterations.
42
- # - +:batch_size+ (Integer) (defaults to: 50) -- The size of the mini batches.
43
- # - +:random_seed+ (Integer) (defaults to: nil) -- The seed value using to initialize the random generator.
47
+ # @param reg_param [Float] (defaults to: 1.0) The regularization parameter.
48
+ # @param fit_bias [Boolean] (defaults to: false) The flag indicating whether to fit the bias term.
49
+ # @param bias_scale [Float] (defaults to: 1.0) The scale of the bias term.
50
+ # @param max_iter [Integer] (defaults to: 100) The maximum number of iterations.
51
+ # @param batch_size [Integer] (defaults to: 50) The size of the mini batches.
52
+ # @param random_seed [Integer] (defaults to: nil) The seed value using to initialize the random generator.
44
53
  def initialize(params = {})
45
54
  self.params = DEFAULT_PARAMS.merge(Hash[params.map { |k, v| [k.to_sym, v] }])
46
55
  self.params[:random_seed] ||= srand
47
56
  @weight_vec = nil
57
+ @bias_term = 0.0
48
58
  @rng = Random.new(self.params[:random_seed])
49
59
  end
50
60
 
51
61
  # Fit the model with given training data.
52
62
  #
53
- # :call-seq:
54
- # fit(x, y) -> PegasosSVC
55
- #
56
- # * *Arguments* :
57
- # - +x+ (NMatrix, shape: [n_samples, n_features]) -- The training data to be used for fitting the model.
58
- # - +y+ (NMatrix, shape: [1, n_samples]) -- The labels to be used for fitting the model.
59
- # * *Returns* :
60
- # - The learned classifier itself.
63
+ # @param x [NMatrix] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
64
+ # @param y [NMatrix] (shape: [1, n_samples]) The labels to be used for fitting the model.
65
+ # @return [PegasosSVC] The learned classifier itself.
61
66
  def fit(x, y)
62
67
  # Generate binary labels
63
68
  negative_label = y.uniq.sort.shift
64
69
  bin_y = y.to_flat_a.map { |l| l != negative_label ? 1 : -1 }
70
+ # Expand feature vectors for bias term.
71
+ samples = x
72
+ samples = samples.hconcat(NMatrix.ones([x.shape[0], 1]) * params[:bias_scale]) if params[:fit_bias]
65
73
  # Initialize some variables.
66
- n_samples, n_features = x.shape
74
+ n_samples, n_features = samples.shape
67
75
  rand_ids = [*0..n_samples - 1].shuffle(random: @rng)
68
- @weight_vec = NMatrix.zeros([1, n_features])
76
+ weight_vec = NMatrix.zeros([1, n_features])
69
77
  # Start optimization.
70
78
  params[:max_iter].times do |t|
71
79
  # random sampling
72
80
  subset_ids = rand_ids.shift(params[:batch_size])
73
81
  rand_ids.concat(subset_ids)
74
82
  target_ids = subset_ids.map do |n|
75
- n if @weight_vec.dot(x.row(n).transpose) * bin_y[n] < 1
83
+ n if weight_vec.dot(samples.row(n).transpose) * bin_y[n] < 1
76
84
  end
77
85
  n_subsamples = target_ids.size
78
86
  next if n_subsamples.zero?
79
87
  # update the weight vector.
80
88
  eta = 1.0 / (params[:reg_param] * (t + 1))
81
89
  mean_vec = NMatrix.zeros([1, n_features])
82
- target_ids.each { |n| mean_vec += x.row(n) * bin_y[n] }
90
+ target_ids.each { |n| mean_vec += samples.row(n) * bin_y[n] }
83
91
  mean_vec *= eta / n_subsamples
84
- @weight_vec = @weight_vec * (1.0 - eta * params[:reg_param]) + mean_vec
92
+ weight_vec = weight_vec * (1.0 - eta * params[:reg_param]) + mean_vec
85
93
  # scale the weight vector.
86
- scaler = (1.0 / params[:reg_param]**0.5) / @weight_vec.norm2
87
- @weight_vec *= [1.0, scaler].min
94
+ scaler = (1.0 / params[:reg_param]**0.5) / weight_vec.norm2
95
+ weight_vec *= [1.0, scaler].min
96
+ end
97
+ # Store the learned model.
98
+ if params[:fit_bias]
99
+ @weight_vec = weight_vec[0...n_features - 1]
100
+ @bias_term = weight_vec[n_features - 1]
101
+ else
102
+ @weight_vec = weight_vec[0...n_features]
103
+ @bias_term = 0.0
88
104
  end
89
105
  self
90
106
  end
91
107
 
92
108
  # Calculate confidence scores for samples.
93
109
  #
94
- # :call-seq:
95
- # decision_function(x) -> NMatrix, shape: [1, n_samples]
96
- #
97
- # * *Arguments* :
98
- # - +x+ (NMatrix, shape: [n_samples, n_features]) -- The samples to compute the scores.
99
- # * *Returns* :
100
- # - Confidence score per sample.
110
+ # @param x [NMatrix] (shape: [n_samples, n_features]) The samples to compute the scores.
111
+ # @return [NMatrix] (shape: [1, n_samples]) Confidence score per sample.
101
112
  def decision_function(x)
102
- @weight_vec.dot(x.transpose)
113
+ @weight_vec.dot(x.transpose) + @bias_term
103
114
  end
104
115
 
105
116
  # Predict class labels for samples.
106
117
  #
107
- # :call-seq:
108
- # predict(x) -> NMatrix, shape: [1, n_samples]
109
- #
110
- # * *Arguments* :
111
- # - +x+ (NMatrix, shape: [n_samples, n_features]) -- The samples to predict the labels.
112
- # * *Returns* :
113
- # - Predicted class label per sample.
118
+ # @param x [NMatrix] (shape: [n_samples, n_features]) The samples to predict the labels.
119
+ # @return [NMatrix] (shape: [1, n_samples]) Predicted class label per sample.
114
120
  def predict(x)
115
121
  decision_function(x).map { |v| v >= 0 ? 1 : -1 }
116
122
  end
117
123
 
118
124
  # Claculate the mean accuracy of the given testing data.
119
125
  #
120
- # :call-seq:
121
- # score(x, y) -> Float
122
- #
123
- # * *Arguments* :
124
- # - +x+ (NMatrix, shape: [n_samples, n_features]) -- Testing data.
125
- # - +y+ (NMatrix, shape: [1, n_samples]) -- True labels for testing data.
126
- # * *Returns* :
127
- # - Mean accuracy
126
+ # @param x [NMatrix] (shape: [n_samples, n_features]) Testing data.
127
+ # @param y [NMatrix] (shape: [1, n_samples]) True labels for testing data.
128
+ # @return [Float] Mean accuracy
128
129
  def score(x, y)
129
130
  p = predict(x)
130
131
  n_hits = (y.to_flat_a.map.with_index { |l, n| l == p[n] ? 1 : 0 }).inject(:+)
131
132
  n_hits / y.size.to_f
132
133
  end
133
134
 
134
- # Serializes object through Marshal#dump.
135
- def marshal_dump # :nodoc:
136
- { params: params, weight_vec: Utils.dump_nmatrix(@weight_vec), rng: @rng }
135
+ # Dump marshal data.
136
+ # @return [Hash] The marshal data about PegasosSVC.
137
+ def marshal_dump
138
+ { params: params, weight_vec: Utils.dump_nmatrix(@weight_vec), bias_term: @bias_term, rng: @rng }
137
139
  end
138
140
 
139
- # Deserialize object through Marshal#load.
140
- def marshal_load(obj) # :nodoc:
141
+ # Load marshal data.
142
+ # @return [nil]
143
+ def marshal_load(obj)
141
144
  self.params = obj[:params]
142
145
  @weight_vec = Utils.restore_nmatrix(obj[:weight_vec])
146
+ @bias_term = obj[:bias_term]
143
147
  @rng = obj[:rng]
144
148
  nil
145
149
  end
@@ -6,33 +6,34 @@ module SVMKit
6
6
  module Multiclass
7
7
  # OneVsRestClassifier is a class that implements One-vs-Rest (OvR) strategy for multi-label classification.
8
8
  #
9
+ # @example
9
10
  # base_estimator =
10
11
  # SVMKit::LinearModel::PegasosSVC.new(penalty: 1.0, max_iter: 100, batch_size: 20, random_seed: 1)
11
12
  # estimator = SVMKit::Multiclass::OneVsRestClassifier.new(estimator: base_estimator)
12
13
  # estimator.fit(training_samples, training_labels)
13
14
  # results = estimator.predict(testing_samples)
14
- #
15
15
  class OneVsRestClassifier
16
16
  include Base::BaseEstimator
17
17
  include Base::Classifier
18
18
 
19
- DEFAULT_PARAMS = { # :nodoc:
19
+ # @!visibility private
20
+ DEFAULT_PARAMS = {
20
21
  estimator: nil
21
22
  }.freeze
22
23
 
23
- # The set of estimators.
24
+ # Return the set of estimators.
25
+ # @return [Array<Classifier>]
24
26
  attr_reader :estimators
25
27
 
26
- # The class labels.
28
+ # Return the class labels.
29
+ # @return [NMatrix] (shape: [1, n_classes])
27
30
  attr_reader :classes
28
31
 
29
32
  # Create a new multi-label classifier with the one-vs-rest startegy.
30
33
  #
31
- # :call-seq:
32
- # new(estimator: base_estimator) -> OneVsRestClassifier
33
- #
34
- # * *Arguments* :
35
- # - +:estimator+ (Classifier) (defaults to: nil) -- The (binary) classifier for construction a multi-label classifier.
34
+ # @overload new(estimator: base_estimator) -> OneVsRestClassifier
35
+ # @param estimator [Classifier] (defaults to: nil)
36
+ # The (binary) classifier for construction a multi-label classifier.
36
37
  def initialize(params = {})
37
38
  self.params = DEFAULT_PARAMS.merge(Hash[params.map { |k, v| [k.to_sym, v] }])
38
39
  @estimators = nil
@@ -41,14 +42,9 @@ module SVMKit
41
42
 
42
43
  # Fit the model with given training data.
43
44
  #
44
- # :call-seq:
45
- # fit(x, y) -> OneVsRestClassifier
46
- #
47
- # * *Arguments* :
48
- # - +x+ (NMatrix, shape: [n_samples, n_features]) -- The training data to be used for fitting the model.
49
- # - +y+ (NMatrix, shape: [1, n_samples]) -- The labels to be used for fitting the model.
50
- # * *Returns* :
51
- # - The learned classifier itself.
45
+ # @param x [NMatrix] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
46
+ # @param y [NMatrix] (shape: [1, n_samples]) The labels to be used for fitting the model.
47
+ # @return [OneVsRestClassifier] The learned classifier itself.
52
48
  def fit(x, y)
53
49
  @classes = y.uniq.sort
54
50
  @estimators = @classes.map do |label|
@@ -60,13 +56,8 @@ module SVMKit
60
56
 
61
57
  # Calculate confidence scores for samples.
62
58
  #
63
- # :call-seq:
64
- # decision_function(x) -> NMatrix, shape: [n_samples, n_classes]
65
- #
66
- # * *Arguments* :
67
- # - +x+ (NMatrix, shape: [n_samples, n_features]) -- The samples to compute the scores.
68
- # * *Returns* :
69
- # - Confidence scores per sample for each class.
59
+ # @param x [NMatrix] (shape: [n_samples, n_features]) The samples to compute the scores.
60
+ # @return [NMatrix] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
70
61
  def decision_function(x)
71
62
  n_samples, = x.shape
72
63
  n_classes = @classes.size
@@ -78,13 +69,8 @@ module SVMKit
78
69
 
79
70
  # Predict class labels for samples.
80
71
  #
81
- # :call-seq:
82
- # predict(x) -> NMatrix, shape: [1, n_samples]
83
- #
84
- # * *Arguments* :
85
- # - +x+ (NMatrix, shape: [n_samples, n_features]) -- The samples to predict the labels.
86
- # * *Returns* :
87
- # - Predicted class label per sample.
72
+ # @param x [NMatrix] (shape: [n_samples, n_features]) The samples to predict the labels.
73
+ # @return [NMatrix] (shape: [1, n_samples]) Predicted class label per sample.
88
74
  def predict(x)
89
75
  n_samples, = x.shape
90
76
  decision_values = decision_function(x)
@@ -94,29 +80,26 @@ module SVMKit
94
80
 
95
81
  # Claculate the mean accuracy of the given testing data.
96
82
  #
97
- # :call-seq:
98
- # predict(x, y) -> Float
99
- #
100
- # * *Arguments* :
101
- # - +x+ (NMatrix, shape: [n_samples, n_features]) -- Testing data.
102
- # - +y+ (NMatrix, shape: [1, n_samples]) -- True labels for testing data.
103
- # * *Returns* :
104
- # - Mean accuracy
83
+ # @param x [NMatrix] (shape: [n_samples, n_features]) Testing data.
84
+ # @param y [NMatrix] (shape: [1, n_samples]) True labels for testing data.
85
+ # @return [Float] Mean accuracy
105
86
  def score(x, y)
106
87
  p = predict(x)
107
88
  n_hits = (y.to_flat_a.map.with_index { |l, n| l == p[n] ? 1 : 0 }).inject(:+)
108
89
  n_hits / y.size.to_f
109
90
  end
110
91
 
111
- # Serializes object through Marshal#dump.
112
- def marshal_dump # :nodoc:
92
+ # Dump marshal data.
93
+ # @return [Hash] The marshal data about OneVsRestClassifier.
94
+ def marshal_dump
113
95
  { params: params,
114
96
  classes: @classes,
115
97
  estimators: @estimators.map { |e| Marshal.dump(e) } }
116
98
  end
117
99
 
118
- # Deserialize object through Marshal#load.
119
- def marshal_load(obj) # :nodoc:
100
+ # Load marshal data.
101
+ # @return [nil]
102
+ def marshal_load(obj)
120
103
  self.params = obj[:params]
121
104
  @classes = obj[:classes]
122
105
  @estimators = obj[:estimators].map { |e| Marshal.load(e) }
@@ -6,32 +6,30 @@ module SVMKit
6
6
  module Preprocessing
7
7
  # Normalize samples to unit L2-norm.
8
8
  #
9
+ # @example
9
10
  # normalizer = SVMKit::Preprocessing::StandardScaler.new
10
11
  # new_samples = normalizer.fit_transform(samples)
11
12
  class L2Normalizer
12
13
  include Base::BaseEstimator
13
14
  include Base::Transformer
14
15
 
15
- # The vector consists of norms of each sample.
16
+ # Return the vector consists of L2-norm for each sample.
17
+ # @return [NMatrix] (shape: [1, n_samples])
16
18
  attr_reader :norm_vec # :nodoc:
17
19
 
18
20
  # Create a new normalizer for normaliing to unit L2-norm.
19
21
  #
20
- # :call-seq:
21
- # new() -> L2Normalizer
22
+ # @overload new() -> L2Normalizer
22
23
  def initialize(_params = {})
23
24
  @norm_vec = nil
24
25
  end
25
26
 
26
- # Calculate L2 norms of each sample.
27
+ # Calculate L2-norms of each sample.
27
28
  #
28
- # :call-seq:
29
- # fit(x) -> L2Normalizer
29
+ # @overload fit(x) -> L2Normalizer
30
30
  #
31
- # * *Arguments* :
32
- # - +x+ (NMatrix, shape: [n_samples, n_features]) -- The samples to calculate L2-norms.
33
- # * *Returns* :
34
- # - L2Normalizer
31
+ # @param x [NMatrix] (shape: [n_samples, n_features]) The samples to calculate L2-norms.
32
+ # @return [L2Normalizer]
35
33
  def fit(x, _y = nil)
36
34
  n_samples, = x.shape
37
35
  @norm_vec = NMatrix.new([1, n_samples],
@@ -39,15 +37,12 @@ module SVMKit
39
37
  self
40
38
  end
41
39
 
42
- # Calculate L2 norms of each sample, and then normalize samples to unit L2-norm.
40
+ # Calculate L2-norms of each sample, and then normalize samples to unit L2-norm.
43
41
  #
44
- # :call-seq:
45
- # fit_transform(x) -> NMatrix
42
+ # @overload fit_transform(x) -> NMatrix
46
43
  #
47
- # * *Arguments* :
48
- # - +x+ (NMatrix, shape: [n_samples, n_features]) -- The samples to calculate L2-norms.
49
- # * *Returns* :
50
- # - The normalized samples (NMatrix)
44
+ # @param x [NMatrix] (shape: [n_samples, n_features]) The samples to calculate L2-norms.
45
+ # @return [NMatrix] The normalized samples.
51
46
  def fit_transform(x, _y = nil)
52
47
  fit(x)
53
48
  x / @norm_vec.transpose.repeat(x.shape[1], 1)
@@ -6,6 +6,7 @@ module SVMKit
6
6
  module Preprocessing
7
7
  # Normalize samples by scaling each feature to a given range.
8
8
  #
9
+ # @example
9
10
  # normalizer = SVMKit::Preprocessing::MinMaxScaler.new(feature_range: [0.0, 1.0])
10
11
  # new_training_samples = normalizer.fit_transform(training_samples)
11
12
  # new_testing_samples = normalizer.transform(testing_samples)
@@ -13,23 +14,24 @@ module SVMKit
13
14
  include Base::BaseEstimator
14
15
  include Base::Transformer
15
16
 
16
- DEFAULT_PARAMS = { # :nodoc:
17
+ # @!visibility private
18
+ DEFAULT_PARAMS = {
17
19
  feature_range: [0.0, 1.0]
18
20
  }.freeze
19
21
 
20
- # The vector consists of the minimum value for each feature.
21
- attr_reader :min_vec # :nodoc:
22
+ # Return the vector consists of the minimum value for each feature.
23
+ # @return [NMatrix] (shape: [1, n_features])
24
+ attr_reader :min_vec
22
25
 
23
- # The vector consists of the maximum value for each feature.
24
- attr_reader :max_vec # :nodoc:
26
+ # Return the vector consists of the maximum value for each feature.
27
+ # @return [NMatrix] (shape: [1, n_features])
28
+ attr_reader :max_vec
25
29
 
26
30
  # Creates a new normalizer for scaling each feature to a given range.
27
31
  #
28
- # call-seq:
29
- # new(feature_range: [0.0, 1.0]) -> MinMaxScaler
32
+ # @overload new(feature_range: [0.0, 1.0]) -> MinMaxScaler
30
33
  #
31
- # * *Arguments* :
32
- # - +:feature_range+ (Array) (defaults to: [0.0, 1.0]) -- The desired range of samples.
34
+ # @param feature_range [Array] (defaults to: [0.0, 1.0]) The desired range of samples.
33
35
  def initialize(params = {})
34
36
  @params = DEFAULT_PARAMS.merge(Hash[params.map { |k, v| [k.to_sym, v] }])
35
37
  @min_vec = nil
@@ -38,13 +40,10 @@ module SVMKit
38
40
 
39
41
  # Calculate the minimum and maximum value of each feature for scaling.
40
42
  #
41
- # :call-seq:
42
- # fit(x) -> MinMaxScaler
43
+ # @overload fit(x) -> MinMaxScaler
43
44
  #
44
- # * *Arguments* :
45
- # - +x+ (NMatrix, shape: [n_samples, n_features]) -- The samples to calculate the minimum and maximum values.
46
- # * *Returns* :
47
- # - MinMaxScaler
45
+ # @param x [NMatrix] (shape: [n_samples, n_features]) The samples to calculate the minimum and maximum values.
46
+ # @return [MinMaxScaler]
48
47
  def fit(x, _y = nil)
49
48
  @min_vec = x.min(0)
50
49
  @max_vec = x.max(0)
@@ -53,26 +52,18 @@ module SVMKit
53
52
 
54
53
  # Calculate the minimum and maximum values, and then normalize samples to feature_range.
55
54
  #
56
- # :call-seq:
57
- # fit_transform(x) -> NMatrix
55
+ # @overload fit_transform(x) -> NMatrix
58
56
  #
59
- # * *Arguments* :
60
- # - +x+ (NMatrix, shape: [n_samples, n_features]) -- The samples to calculate the minimum and maximum values.
61
- # * *Returns* :
62
- # - The scaled samples (NMatrix)
57
+ # @param x [NMatrix] (shape: [n_samples, n_features]) The samples to calculate the minimum and maximum values.
58
+ # @return [NMatrix] The scaled samples.
63
59
  def fit_transform(x, _y = nil)
64
60
  fit(x).transform(x)
65
61
  end
66
62
 
67
63
  # Perform scaling the given samples according to feature_range.
68
64
  #
69
- # call-seq:
70
- # transform(x) -> NMatrix
71
- #
72
- # * *Arguments* :
73
- # - +x+ (NMatrix, shape: [n_samples, n_features]) -- The samples to be scaled.
74
- # * *Returns* :
75
- # - The scaled samples (NMatrix)
65
+ # @param x [NMatrix] (shape: [n_samples, n_features]) The samples to be scaled.
66
+ # @return [NMatrix] The scaled samples.
76
67
  def transform(x)
77
68
  n_samples, = x.shape
78
69
  dif_vec = @max_vec - @min_vec
@@ -80,15 +71,17 @@ module SVMKit
80
71
  nx * (@params[:feature_range][1] - @params[:feature_range][0]) + @params[:feature_range][0]
81
72
  end
82
73
 
83
- # Serializes object through Marshal#dump.
84
- def marshal_dump # :nodoc:
74
+ # Dump marshal data.
75
+ # @return [Hash] The marshal data about MinMaxScaler.
76
+ def marshal_dump
85
77
  { params: @params,
86
78
  min_vec: Utils.dump_nmatrix(@min_vec),
87
79
  max_vec: Utils.dump_nmatrix(@max_vec) }
88
80
  end
89
81
 
90
- # Deserialize object through Marshal#load.
91
- def marshal_load(obj) # :nodoc:
82
+ # Load marshal data.
83
+ # @return [nil]
84
+ def marshal_load(obj)
92
85
  @params = obj[:params]
93
86
  @min_vec = Utils.restore_nmatrix(obj[:min_vec])
94
87
  @max_vec = Utils.restore_nmatrix(obj[:max_vec])
@@ -6,6 +6,7 @@ module SVMKit
6
6
  module Preprocessing
7
7
  # Normalize samples by centering and scaling to unit variance.
8
8
  #
9
+ # @example
9
10
  # normalizer = SVMKit::Preprocessing::StandardScaler.new
10
11
  # new_training_samples = normalizer.fit_transform(training_samples)
11
12
  # new_testing_samples = normalizer.transform(testing_samples)
@@ -13,16 +14,17 @@ module SVMKit
13
14
  include Base::BaseEstimator
14
15
  include Base::Transformer
15
16
 
16
- # The vector consists of the mean value for each feature.
17
- attr_reader :mean_vec # :nodoc:
17
+ # Return the vector consists of the mean value for each feature.
18
+ # @return [NMatrix] (shape: [1, n_features])
19
+ attr_reader :mean_vec
18
20
 
19
- # The vector consists of the standard deviation for each feature.
20
- attr_reader :std_vec # :nodoc:
21
+ # Return the vector consists of the standard deviation for each feature.
22
+ # @return [NMatrix] (shape: [1, n_features])
23
+ attr_reader :std_vec
21
24
 
22
25
  # Create a new normalizer for centering and scaling to unit variance.
23
26
  #
24
- # :call-seq:
25
- # new() -> StandardScaler
27
+ # @overload new() -> StandardScaler
26
28
  def initialize(_params = {})
27
29
  @mean_vec = nil
28
30
  @std_vec = nil
@@ -30,13 +32,11 @@ module SVMKit
30
32
 
31
33
  # Calculate the mean value and standard deviation of each feature for scaling.
32
34
  #
33
- # :call-seq:
34
- # fit(x) -> StandardScaler
35
+ # @overload fit(x) -> StandardScaler
35
36
  #
36
- # * *Arguments* :
37
- # - +x+ (NMatrix, shape: [n_samples, n_features]) -- The samples to calculate the mean values and standard deviations.
38
- # * *Returns* :
39
- # - StandardScaler
37
+ # @param x [NMatrix] (shape: [n_samples, n_features])
38
+ # The samples to calculate the mean values and standard deviations.
39
+ # @return [StandardScaler]
40
40
  def fit(x, _y = nil)
41
41
  @mean_vec = x.mean(0)
42
42
  @std_vec = x.std(0)
@@ -45,39 +45,34 @@ module SVMKit
45
45
 
46
46
  # Calculate the mean values and standard deviations, and then normalize samples using them.
47
47
  #
48
- # :call-seq:
49
- # fit_transform(x) -> NMatrix
48
+ # @overload fit_transform(x) -> NMatrix
50
49
  #
51
- # * *Arguments* :
52
- # - +x+ (NMatrix, shape: [n_samples, n_features]) -- The samples to calculate the mean values and standard deviations.
53
- # * *Returns* :
54
- # - The scaled samples (NMatrix)
50
+ # @param x [NMatrix] (shape: [n_samples, n_features])
51
+ # The samples to calculate the mean values and standard deviations.
52
+ # @return [NMatrix] The scaled samples.
55
53
  def fit_transform(x, _y = nil)
56
54
  fit(x).transform(x)
57
55
  end
58
56
 
59
57
  # Perform standardization the given samples.
60
58
  #
61
- # call-seq:
62
- # transform(x) -> NMatrix
63
- #
64
- # * *Arguments* :
65
- # - +x+ (NMatrix, shape: [n_samples, n_features]) -- The samples to be scaled.
66
- # * *Returns* :
67
- # - The scaled samples (NMatrix)
59
+ # @param x [NMatrix] (shape: [n_samples, n_features]) The samples to be scaled.
60
+ # @return [NMatrix] The scaled samples.
68
61
  def transform(x)
69
62
  n_samples, = x.shape
70
63
  (x - @mean_vec.repeat(n_samples, 0)) / @std_vec.repeat(n_samples, 0)
71
64
  end
72
65
 
73
- # Serializes object through Marshal#dump.
74
- def marshal_dump # :nodoc:
66
+ # Dump marshal data.
67
+ # @return [Hash] The marshal data about StandardScaler.
68
+ def marshal_dump
75
69
  { mean_vec: Utils.dump_nmatrix(@mean_vec),
76
70
  std_vec: Utils.dump_nmatrix(@std_vec) }
77
71
  end
78
72
 
79
- # Deserialize object through Marshal#load.
80
- def marshal_load(obj) # :nodoc:
73
+ # Load marshal data.
74
+ # @return [nil]
75
+ def marshal_load(obj)
81
76
  @mean_vec = Utils.restore_nmatrix(obj[:mean_vec])
82
77
  @std_vec = Utils.restore_nmatrix(obj[:std_vec])
83
78
  nil
@@ -3,13 +3,9 @@ module SVMKit
3
3
  module Utils
4
4
  class << self
5
5
  # Dump an NMatrix object converted to a Ruby Hash.
6
- # # call-seq:
7
- # dump_nmatrix(mat) -> Hash
8
6
  #
9
- # * *Arguments* :
10
- # - +mat+ -- An NMatrix object converted to a Ruby Hash.
11
- # * *Returns* :
12
- # - A Ruby Hash containing matrix information.
7
+ # @param mat [NMatrix] An NMatrix object converted to a Ruby Hash.
8
+ # @return [Hash] A Ruby Hash containing matrix information.
13
9
  def dump_nmatrix(mat)
14
10
  return nil if mat.class != NMatrix
15
11
  { shape: mat.shape, array: mat.to_flat_a, dtype: mat.dtype, stype: mat.stype }
@@ -17,13 +13,8 @@ module SVMKit
17
13
 
18
14
  # Return the results of converting the dumped data into an NMatrix object.
19
15
  #
20
- # call-seq:
21
- # restore_nmatrix(dumped_mat) -> NMatrix
22
- #
23
- # * *Arguments* :
24
- # - +dumpted_mat+ -- A Ruby Hash about NMatrix object created with SVMKit::Utils.dump_nmatrix method.
25
- # * *Returns* :
26
- # - An NMatrix object restored from the given Hash.
16
+ # @param dmp [Hash] A Ruby Hash about NMatrix object created with SVMKit::Utils.dump_nmatrix method.
17
+ # @return [NMatrix] An NMatrix object restored from the given Hash.
27
18
  def restore_nmatrix(dmp = {})
28
19
  return nil unless dmp.class == Hash && %i[shape array dtype stype].all?(&dmp.method(:has_key?))
29
20
  NMatrix.new(dmp[:shape], dmp[:array], dtype: dmp[:dtype], stype: dmp[:stype])
@@ -1,3 +1,5 @@
1
+ # SVMKit is an experimental library of machine learning in Ruby.
1
2
  module SVMKit
2
- VERSION = '0.1.1'.freeze
3
+ # @!visibility private
4
+ VERSION = '0.1.2'.freeze
3
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: svmkit
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-10-03 00:00:00.000000000 Z
11
+ date: 2017-10-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler