svmkit 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1d3f4f2c398faa6b9e936ec813dac3023d9d1728
4
- data.tar.gz: 6fe4f9b6ac5a656fb1a3f49662997d0ebbdd8d4b
3
+ metadata.gz: c2538f07704c57e4dc90caa85411d0795ac23763
4
+ data.tar.gz: 3815cf50ee5978cca24d944c6e7f577216dae0e3
5
5
  SHA512:
6
- metadata.gz: fb1ac8798124f25cdd4dd0738dd856c8bae7e87aacac260a8ea8b1fb7388e3a966045f2382e48f241292111312eb7cb6cd69035010ee1487645f725f364ee16b
7
- data.tar.gz: 99bdb17d5a2d2825e904ce2e788e31d100e1850b1d9bfc32f7e7cc48ba1b13da59b667d1ed117a6768d852d5c15d3a4c3132994bc13350315c8b07016bcbcd41
6
+ metadata.gz: e448dd5f8fddb4a2a805b1dcddb7ab9c53d7c3db3460760b3165940d0ab93ae82ba1b0fec089e7a1d6651154b5f0437f3d4400531cc11017fd16f9e2029e2611
7
+ data.tar.gz: 1416d8c3ea1f55abd1fb269bdaf86f80faaa31be298d0ed1349f6b708d05e64545bf0ad4c3865c7ced26057441ff0999dd82ca77eae54209190527b87ba4ec27
data/HISTORY.md CHANGED
@@ -1,3 +1,11 @@
1
+ # 0.1.2
2
+ - Added the function learning a model with bias term to the PegasosSVC and LogisticRegression classes.
3
+ - Rewrited the document with yard notation.
4
+
5
+ # 0.1.1
6
+ - Added class for Logistic Regression with SGD optimization.
7
+ - Fixed some mistakes on the document.
8
+
1
9
  # 0.1.0
2
10
  - Added basic classes.
3
11
  - Added an utility module.
@@ -5,4 +13,3 @@
5
13
  - Added class for Support Vector Machine with Pegasos alogrithm.
6
14
  - Added class that performs mutlclass classification with one-vs.-rest strategy.
7
15
  - Added classes for preprocessing such as min-max scaling, standardization, and L2 normalization.
8
-
data/README.md CHANGED
@@ -1,5 +1,9 @@
1
1
  # SVMKit
2
2
 
3
+ [![Build Status](https://travis-ci.org/yoshoku/SVMKit.svg?branch=master)](https://travis-ci.org/yoshoku/SVMKit)
4
+ [![Gem Version](https://badge.fury.io/rb/svmkit.svg)](https://badge.fury.io/rb/svmkit)
5
+ [![BSD 2-Clause License](https://img.shields.io/badge/License-BSD%202--Clause-orange.svg)](https://github.com/yoshoku/SVMKit/blob/master/LICENSE.txt)
6
+
3
7
  SVMKit is a library for machine learninig in Ruby.
4
8
  SVMKit implements machine learning algorithms with an interface similar to Scikit-Learn in Python.
5
9
  However, since SVMKit is an experimental library, there are few machine learning algorithms implemented.
@@ -23,6 +27,7 @@ Or install it yourself as:
23
27
  ## Usage
24
28
 
25
29
  Training phase:
30
+
26
31
  ```ruby
27
32
  require 'svmkit'
28
33
  require 'libsvmloader'
@@ -46,6 +51,7 @@ File.open('trained_classifier.dat', 'wb') { |f| f.write(Marshal.dump(classifier)
46
51
  ```
47
52
 
48
53
  Testing phase:
54
+
49
55
  ```ruby
50
56
  require 'svmkit'
51
57
  require 'libsvmloader'
@@ -4,7 +4,8 @@ module SVMKit
4
4
  module Base
5
5
  # Base module for all estimators in SVMKit.
6
6
  module BaseEstimator
7
- # Parameters for this estimator.
7
+ # Return parameters about an estimator.
8
+ # @return [Hash]
8
9
  attr_accessor :params
9
10
  end
10
11
  end
@@ -6,40 +6,43 @@ module SVMKit
6
6
  module KernelApproximation
7
7
  # Class for RBF kernel feature mapping.
8
8
  #
9
+ # @example
9
10
  # transformer = SVMKit::KernelApproximation::RBF.new(gamma: 1.0, n_coponents: 128, random_seed: 1)
10
11
  # new_training_samples = transformer.fit_transform(training_samples)
11
12
  # new_testing_samples = transformer.transform(testing_samples)
12
13
  #
13
- # * *Refernce*:
14
- # - A. Rahimi and B. Recht, "Random Features for Large-Scale Kernel Machines," Proc. NIPS'07, pp.1177--1184, 2007.
14
+ # *Refernce*:
15
+ # 1. A. Rahimi and B. Recht, "Random Features for Large-Scale Kernel Machines," Proc. NIPS'07, pp.1177--1184, 2007.
15
16
  class RBF
16
17
  include Base::BaseEstimator
17
18
  include Base::Transformer
18
19
 
19
- DEFAULT_PARAMS = { # :nodoc:
20
+ # @!visibility private
21
+ DEFAULT_PARAMS = {
20
22
  gamma: 1.0,
21
23
  n_components: 128,
22
24
  random_seed: nil
23
25
  }.freeze
24
26
 
25
- # The random matrix for transformation.
26
- attr_reader :random_mat # :nodoc:
27
+ # Return the random matrix for transformation.
28
+ # @return [NMatrix] (shape: [n_features, n_components])
29
+ attr_reader :random_mat
27
30
 
28
- # The random vector for transformation.
29
- attr_reader :random_vec # :nodoc:
31
+ # Return the random vector for transformation.
32
+ # @return [NMatrix] (shape: [1, n_components])
33
+ attr_reader :random_vec
30
34
 
31
- # The random generator for transformation.
32
- attr_reader :rng # :nodoc:
35
+ # Return the random generator for transformation.
36
+ # @return [Random]
37
+ attr_reader :rng
33
38
 
34
- # Creates a new transformer for mapping to RBF kernel feature space.
39
+ # Create a new transformer for mapping to RBF kernel feature space.
35
40
  #
36
- # call-seq:
37
- # new(gamma: 1.0, n_components: 128, random_seed: 1) -> RBF
41
+ # @overload new(gamma: 1.0, n_components: 128, random_seed: 1) -> RBF
38
42
  #
39
- # * *Arguments* :
40
- # - +:gamma+ (Float) (defaults to: 1.0) -- The parameter of RBF kernel: exp(-gamma * x^2)
41
- # - +:n_components+ (Integer) (defaults to: 128) -- The number of dimensions of the RBF kernel feature space.
42
- # - +:random_seed+ (Integer) (defaults to: nil) -- The seed value using to initialize the random generator.
43
+ # @param gamma [Float] (defaults to: 1.0) The parameter of RBF kernel: exp(-gamma * x^2).
44
+ # @param n_components [Integer] (defaults to: 128) The number of dimensions of the RBF kernel feature space.
45
+ # @param random_seed [Integer] (defaults to: nil) The seed value using to initialize the random generator.
43
46
  def initialize(params = {})
44
47
  self.params = DEFAULT_PARAMS.merge(Hash[params.map { |k, v| [k.to_sym, v] }])
45
48
  self.params[:random_seed] ||= srand
@@ -50,13 +53,11 @@ module SVMKit
50
53
 
51
54
  # Fit the model with given training data.
52
55
  #
53
- # call-seq:
54
- # fit(x) -> RBF
56
+ # @overload fit(x) -> RBF
55
57
  #
56
- # * *Arguments* :
57
- # - +x+ (NMatrix, shape: [n_samples, n_features]) -- The training data to be used for fitting the model. This method uses only the number of features of the data.
58
- # * *Returns* :
59
- # - The learned transformer itself.
58
+ # @param x [NMatrix] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
59
+ # This method uses only the number of features of the data.
60
+ # @return [RBF] The learned transformer itself.
60
61
  def fit(x, _y = nil)
61
62
  n_features = x.shape[1]
62
63
  params[:n_components] = 2 * n_features if params[:n_components] <= 0
@@ -65,48 +66,43 @@ module SVMKit
65
66
  @random_vec = NMatrix.zeros([1, params[:n_components] - n_half_components]).hconcat(
66
67
  NMatrix.ones([1, n_half_components]) * (0.5 * Math::PI)
67
68
  )
68
- #@random_vec = rand_uniform([1, self.params[:n_components]]) * (2.0 * Math::PI)
69
69
  self
70
70
  end
71
71
 
72
72
  # Fit the model with training data, and then transform them with the learned model.
73
73
  #
74
- # call-seq:
75
- # fit_transform(x) -> NMatrix
74
+ # @overload fit_transform(x) -> NMatrix
76
75
  #
77
- # * *Arguments* :
78
- # - +x+ (NMatrix, shape: [n_samples, n_features]) -- The training data to be used for fitting the model.
79
- # * *Returns* :
80
- # - The transformed data (NMatrix, shape: [n_samples, n_components]).
76
+ # @param x [NMatrix] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
77
+ # @return [NMatrix] (shape: [n_samples, n_components]) The transformed data
81
78
  def fit_transform(x, _y = nil)
82
79
  fit(x).transform(x)
83
80
  end
84
81
 
85
82
  # Transform the given data with the learned model.
86
83
  #
87
- # call-seq:
88
- # transform(x) -> NMatrix
84
+ # @overload transform(x) -> NMatrix
89
85
  #
90
- # * *Arguments* :
91
- # - +x+ (NMatrix, shape: [n_samples, n_features]) -- The data to be transformed with the learned model.
92
- # * *Returns* :
93
- # - The transformed data (NMatrix, shape: [n_samples, n_components]).
86
+ # @param x [NMatrix] (shape: [n_samples, n_features]) The data to be transformed with the learned model.
87
+ # @return [NMatrix] (shape: [n_samples, n_components]) The transformed data.
94
88
  def transform(x)
95
89
  n_samples, = x.shape
96
90
  projection = x.dot(@random_mat) + @random_vec.repeat(n_samples, 0)
97
91
  projection.sin * ((2.0 / params[:n_components])**0.5)
98
92
  end
99
93
 
100
- # Serializes object through Marshal#dump.
101
- def marshal_dump # :nodoc:
94
+ # Dump marshal data.
95
+ # @return [Hash] The marshal data about RBF.
96
+ def marshal_dump
102
97
  { params: params,
103
98
  random_mat: Utils.dump_nmatrix(@random_mat),
104
99
  random_vec: Utils.dump_nmatrix(@random_vec),
105
100
  rng: @rng }
106
101
  end
107
102
 
108
- # Deserialize object through Marshal#load.
109
- def marshal_load(obj) # :nodoc:
103
+ # Load marshal data.
104
+ # @return [nil]
105
+ def marshal_load(obj)
110
106
  self.params = obj[:params]
111
107
  @random_mat = Utils.restore_nmatrix(obj[:random_mat])
112
108
  @random_vec = Utils.restore_nmatrix(obj[:random_vec])
@@ -117,13 +113,13 @@ module SVMKit
117
113
  protected
118
114
 
119
115
  # Generate the uniform random matrix with the given shape.
120
- def rand_uniform(shape) # :nodoc:
116
+ def rand_uniform(shape)
121
117
  rnd_vals = Array.new(NMatrix.size(shape)) { @rng.rand }
122
118
  NMatrix.new(shape, rnd_vals, dtype: :float64, stype: :dense)
123
119
  end
124
120
 
125
121
  # Generate the normal random matrix with the given shape, mean, and standard deviation.
126
- def rand_normal(shape, mu = 0.0, sigma = 1.0) # :nodoc:
122
+ def rand_normal(shape, mu = 0.0, sigma = 1.0)
127
123
  a = rand_uniform(shape)
128
124
  b = rand_uniform(shape)
129
125
  ((a.log * -2.0).sqrt * (b * 2.0 * Math::PI).sin) * sigma + mu
@@ -4,69 +4,80 @@ require 'svmkit/base/classifier'
4
4
  module SVMKit
5
5
  # This module consists of the classes that implement generalized linear models.
6
6
  module LinearModel
7
- # LogisticRegression is a class that implements Logistic Regression with stochastic gradient descent (SGD) optimization.
8
- # Note that the Logistic Regression of SVMKit performs as a binary classifier.
7
+ # LogisticRegression is a class that implements Logistic Regression
8
+ # with stochastic gradient descent (SGD) optimization.
9
+ # Note that the class performs as a binary classifier.
9
10
  #
11
+ # @example
10
12
  # estimator =
11
13
  # SVMKit::LinearModel::LogisticRegression.new(reg_param: 1.0, max_iter: 100, batch_size: 20, random_seed: 1)
12
14
  # estimator.fit(training_samples, traininig_labels)
13
15
  # results = estimator.predict(testing_samples)
14
16
  #
15
- # * *Reference*:
16
- # - S. Shalev-Shwartz, Y. Singer, N. Srebro, and A. Cotter, "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Mathematical Programming, vol. 127 (1), pp. 3--30, 2011.
17
- #
17
+ # *Reference*
18
+ # 1. S. Shalev-Shwartz, Y. Singer, N. Srebro, and A. Cotter, "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Mathematical Programming, vol. 127 (1), pp. 3--30, 2011.
18
19
  class LogisticRegression
19
20
  include Base::BaseEstimator
20
21
  include Base::Classifier
21
22
 
22
- DEFAULT_PARAMS = { # :nodoc:
23
+ # @!visibility private
24
+ DEFAULT_PARAMS = {
23
25
  reg_param: 1.0,
26
+ fit_bias: false,
27
+ bias_scale: 1.0,
24
28
  max_iter: 100,
25
29
  batch_size: 50,
26
30
  random_seed: nil
27
31
  }.freeze
28
32
 
29
- # The weight vector for Logistic Regression.
33
+ # Return the weight vector for Logistic Regression.
34
+ # @return [NMatrix] (shape: [1, n_features])
30
35
  attr_reader :weight_vec
31
36
 
32
- # The random generator for performing random sampling in the SGD optimization.
37
+ # Return the bias term (a.k.a. intercept) for Logistic Regression.
38
+ # @return [Float]
39
+ attr_reader :bias_term
40
+
41
+ # Return the random generator for transformation.
42
+ # @return [Random]
33
43
  attr_reader :rng
34
44
 
35
45
  # Create a new classifier with Logisitc Regression by the SGD optimization.
36
46
  #
37
- # :call-seq:
38
- # new(reg_param: 1.0, max_iter: 100, batch_size: 50, random_seed: 1) -> LogisiticRegression
47
+ # @overload new(reg_param: 1.0, max_iter: 100, batch_size: 50, random_seed: 1) -> LogisiticRegression
39
48
  #
40
- # * *Arguments* :
41
- # - +:reg_param+ (Float) (defaults to: 1.0) -- The regularization parameter.
42
- # - +:max_iter+ (Integer) (defaults to: 100) -- The maximum number of iterations.
43
- # - +:batch_size+ (Integer) (defaults to: 50) -- The size of the mini batches.
44
- # - +:random_seed+ (Integer) (defaults to: nil) -- The seed value using to initialize the random generator.
49
+ # @param reg_param [Float] (defaults to: 1.0) The regularization parameter.
50
+ # @param fit_bias [Boolean] (defaults to: false) The flag indicating whether to fit the bias term.
51
+ # @param bias_scale [Float] (defaults to: 1.0) The scale of the bias term.
52
+ # If fit_bias is true, the feature vector v becoms [v; bias_scale].
53
+ # @param max_iter [Integer] (defaults to: 100) The maximum number of iterations.
54
+ # @param batch_size [Integer] (defaults to: 50) The size of the mini batches.
55
+ # @param random_seed [Integer] (defaults to: nil) The seed value using to initialize the random generator.
45
56
  def initialize(params = {})
46
57
  self.params = DEFAULT_PARAMS.merge(Hash[params.map { |k, v| [k.to_sym, v] }])
47
58
  self.params[:random_seed] ||= srand
48
59
  @weight_vec = nil
60
+ @bias_term = 0.0
49
61
  @rng = Random.new(self.params[:random_seed])
50
62
  end
51
63
 
52
64
  # Fit the model with given training data.
53
65
  #
54
- # :call-seq:
55
- # fit(x, y) -> LogisticRegression
56
- #
57
- # * *Arguments* :
58
- # - +x+ (NMatrix, shape: [n_samples, n_features]) -- The training data to be used for fitting the model.
59
- # - +y+ (NMatrix, shape: [1, n_samples]) -- The categorical variables (e.g. labels) to be used for fitting the model.
60
- # * *Returns* :
61
- # - The learned classifier itself.
66
+ # @param x [NMatrix] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
67
+ # @param y [NMatrix] (shape: [1, n_samples]) The categorical variables (e.g. labels)
68
+ # to be used for fitting the model.
69
+ # @return [LogisticRegression] The learned classifier itself.
62
70
  def fit(x, y)
63
- # Generate binary labels
71
+ # Generate binary labels.
64
72
  negative_label = y.uniq.sort.shift
65
73
  bin_y = y.to_flat_a.map { |l| l != negative_label ? 1 : 0 }
74
+ # Expand feature vectors for bias term.
75
+ samples = x
76
+ samples = samples.hconcat(NMatrix.ones([x.shape[0], 1]) * params[:bias_scale]) if params[:fit_bias]
66
77
  # Initialize some variables.
67
- n_samples, n_features = x.shape
78
+ n_samples, n_features = samples.shape
68
79
  rand_ids = [*0..n_samples - 1].shuffle(random: @rng)
69
- @weight_vec = NMatrix.zeros([1, n_features])
80
+ weight_vec = NMatrix.zeros([1, n_features])
70
81
  # Start optimization.
71
82
  params[:max_iter].times do |t|
72
83
  # random sampling
@@ -76,84 +87,75 @@ module SVMKit
76
87
  eta = 1.0 / (params[:reg_param] * (t + 1))
77
88
  mean_vec = NMatrix.zeros([1, n_features])
78
89
  subset_ids.each do |n|
79
- z = @weight_vec.dot(x.row(n).transpose)[0]
90
+ z = weight_vec.dot(samples.row(n).transpose)[0]
80
91
  coef = bin_y[n] / (1.0 + Math.exp(bin_y[n] * z))
81
- mean_vec += x.row(n) * coef
92
+ mean_vec += samples.row(n) * coef
82
93
  end
83
94
  mean_vec *= eta / params[:batch_size]
84
- @weight_vec = @weight_vec * (1.0 - eta * params[:reg_param]) + mean_vec
95
+ weight_vec = weight_vec * (1.0 - eta * params[:reg_param]) + mean_vec
85
96
  # scale the weight vector.
86
- scaler = (1.0 / params[:reg_param]**0.5) / @weight_vec.norm2
87
- @weight_vec *= [1.0, scaler].min
97
+ scaler = (1.0 / params[:reg_param]**0.5) / weight_vec.norm2
98
+ weight_vec *= [1.0, scaler].min
99
+ end
100
+ # Store the learned model.
101
+ if params[:fit_bias]
102
+ @weight_vec = weight_vec[0...n_features - 1]
103
+ @bias_term = weight_vec[n_features - 1]
104
+ else
105
+ @weight_vec = weight_vec[0...n_features]
106
+ @bias_term = 0.0
88
107
  end
89
108
  self
90
109
  end
91
110
 
92
111
  # Calculate confidence scores for samples.
93
112
  #
94
- # :call-seq:
95
- # decision_function(x) -> NMatrix, shape: [1, n_samples]
96
- #
97
- # * *Arguments* :
98
- # - +x+ (NMatrix, shape: [n_samples, n_features]) -- The samples to compute the scores.
99
- # * *Returns* :
100
- # - Confidence score per sample.
113
+ # @param x [NMatrix] (shape: [n_samples, n_features]) The samples to compute the scores.
114
+ # @return [NMatrix] (shape: [1, n_samples]) Confidence score per sample.
101
115
  def decision_function(x)
102
- w = (@weight_vec.dot(x.transpose) * -1.0).exp + 1.0
116
+ w = ((@weight_vec.dot(x.transpose) + @bias_term) * -1.0).exp + 1.0
103
117
  w.map { |v| 1.0 / v }
104
118
  end
105
119
 
106
120
  # Predict class labels for samples.
107
121
  #
108
- # :call-seq:
109
- # predict(x) -> NMatrix, shape: [1, n_samples]
110
- #
111
- # * *Arguments* :
112
- # - +x+ (NMatrix, shape: [n_samples, n_features]) -- The samples to predict the labels.
113
- # * *Returns* :
114
- # - Predicted class label per sample.
122
+ # @param x [NMatrix] (shape: [n_samples, n_features]) The samples to predict the labels.
123
+ # @return [NMatrix] (shape: [1, n_samples]) Predicted class label per sample.
115
124
  def predict(x)
116
125
  decision_function(x).map { |v| v >= 0.5 ? 1 : -1 }
117
126
  end
118
127
 
119
128
  # Predict probability for samples.
120
129
  #
121
- # :call-seq:
122
- # predict_proba(x) -> NMatrix, shape: [1, n_samples]
123
- #
124
- # * *Arguments* :
125
- # - +x+ (NMatrix, shape: [n_samples, n_features]) -- The samples to predict the probailities.
126
- # * *Returns* :
127
- # - Predicted probability per sample.
130
+ # @param x [NMatrix] (shape: [n_samples, n_features]) The samples to predict the probailities.
131
+ # @return [NMatrix] (shape: [1, n_samples]) Predicted probability per sample.
128
132
  def predict_proba(x)
129
133
  decision_function(x)
130
134
  end
131
135
 
132
136
  # Claculate the mean accuracy of the given testing data.
133
137
  #
134
- # :call-seq:
135
- # score(x, y) -> Float
136
- #
137
- # * *Arguments* :
138
- # - +x+ (NMatrix, shape: [n_samples, n_features]) -- Testing data.
139
- # - +y+ (NMatrix, shape: [1, n_samples]) -- True labels for testing data.
140
- # * *Returns* :
141
- # - Mean accuracy
138
+ # @param x [NMatrix] (shape: [n_samples, n_features]) Testing data.
139
+ # @param y [NMatrix] (shape: [1, n_samples]) True labels for testing data.
140
+ # @return [Float] Mean accuracy
142
141
  def score(x, y)
143
142
  p = predict(x)
144
143
  n_hits = (y.to_flat_a.map.with_index { |l, n| l == p[n] ? 1 : 0 }).inject(:+)
145
144
  n_hits / y.size.to_f
146
145
  end
147
146
 
148
- # Serializes object through Marshal#dump.
149
- def marshal_dump # :nodoc:
150
- { params: params, weight_vec: Utils.dump_nmatrix(@weight_vec), rng: @rng }
147
+ # Dump marshal data.
148
+ # @return [Hash] The marshal data about LogisticRegression.
149
+ def marshal_dump
150
+ { params: params, weight_vec: Utils.dump_nmatrix(@weight_vec), bias_term: @bias_term, rng: @rng }
151
151
  end
152
152
 
153
- # Deserialize object through Marshal#load.
154
- def marshal_load(obj) # :nodoc:
153
+ # Load marshal data.
154
+ # @return [nil]
155
+ def marshal_load(obj)
155
156
  self.params = obj[:params]
156
157
  @weight_vec = Utils.restore_nmatrix(obj[:weight_vec])
158
+ @bias_term = obj[:bias_term]
157
159
  @rng = obj[:rng]
158
160
  nil
159
161
  end
@@ -6,140 +6,144 @@ module SVMKit
6
6
  module LinearModel
7
7
  # PegasosSVC is a class that implements Support Vector Classifier with the Pegasos algorithm.
8
8
  #
9
+ # @example
9
10
  # estimator =
10
11
  # SVMKit::LinearModel::PegasosSVC.new(reg_param: 1.0, max_iter: 100, batch_size: 20, random_seed: 1)
11
12
  # estimator.fit(training_samples, traininig_labels)
12
13
  # results = estimator.predict(testing_samples)
13
14
  #
14
- # * *Reference*:
15
- # - S. Shalev-Shwartz and Y. Singer, "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Proc. ICML'07, pp. 807--814, 2007.
16
- #
15
+ # *Reference*
16
+ # 1. S. Shalev-Shwartz and Y. Singer, "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Proc. ICML'07, pp. 807--814, 2007.
17
17
  class PegasosSVC
18
18
  include Base::BaseEstimator
19
19
  include Base::Classifier
20
20
 
21
- DEFAULT_PARAMS = { # :nodoc:
21
+ # @!visibility private
22
+ DEFAULT_PARAMS = {
22
23
  reg_param: 1.0,
24
+ fit_bias: false,
25
+ bias_scale: 1.0,
23
26
  max_iter: 100,
24
27
  batch_size: 50,
25
28
  random_seed: nil
26
29
  }.freeze
27
30
 
28
- # The weight vector for SVC.
31
+ # Return the weight vector for SVC.
32
+ # @return [NMatrix] (shape: [1, n_features])
29
33
  attr_reader :weight_vec
30
34
 
31
- # The random generator for performing random sampling in the Pegasos algorithm.
35
+ # Return the bias term (a.k.a. intercept) for SVC.
36
+ # @return [Float]
37
+ attr_reader :bias_term
38
+
39
+ # Return the random generator for performing random sampling in the Pegasos algorithm.
40
+ # @return [Random]
32
41
  attr_reader :rng
33
42
 
34
43
  # Create a new classifier with Support Vector Machine by the Pegasos algorithm.
35
44
  #
36
- # :call-seq:
37
- # new(reg_param: 1.0, max_iter: 100, batch_size: 50, random_seed: 1) -> PegasosSVC
45
+ # @overload new(reg_param: 1.0, max_iter: 100, batch_size: 50, random_seed: 1) -> PegasosSVC
38
46
  #
39
- # * *Arguments* :
40
- # - +:reg_param+ (Float) (defaults to: 1.0) -- The regularization parameter.
41
- # - +:max_iter+ (Integer) (defaults to: 100) -- The maximum number of iterations.
42
- # - +:batch_size+ (Integer) (defaults to: 50) -- The size of the mini batches.
43
- # - +:random_seed+ (Integer) (defaults to: nil) -- The seed value using to initialize the random generator.
47
+ # @param reg_param [Float] (defaults to: 1.0) The regularization parameter.
48
+ # @param fit_bias [Boolean] (defaults to: false) The flag indicating whether to fit the bias term.
49
+ # @param bias_scale [Float] (defaults to: 1.0) The scale of the bias term.
50
+ # @param max_iter [Integer] (defaults to: 100) The maximum number of iterations.
51
+ # @param batch_size [Integer] (defaults to: 50) The size of the mini batches.
52
+ # @param random_seed [Integer] (defaults to: nil) The seed value using to initialize the random generator.
44
53
  def initialize(params = {})
45
54
  self.params = DEFAULT_PARAMS.merge(Hash[params.map { |k, v| [k.to_sym, v] }])
46
55
  self.params[:random_seed] ||= srand
47
56
  @weight_vec = nil
57
+ @bias_term = 0.0
48
58
  @rng = Random.new(self.params[:random_seed])
49
59
  end
50
60
 
51
61
  # Fit the model with given training data.
52
62
  #
53
- # :call-seq:
54
- # fit(x, y) -> PegasosSVC
55
- #
56
- # * *Arguments* :
57
- # - +x+ (NMatrix, shape: [n_samples, n_features]) -- The training data to be used for fitting the model.
58
- # - +y+ (NMatrix, shape: [1, n_samples]) -- The labels to be used for fitting the model.
59
- # * *Returns* :
60
- # - The learned classifier itself.
63
+ # @param x [NMatrix] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
64
+ # @param y [NMatrix] (shape: [1, n_samples]) The labels to be used for fitting the model.
65
+ # @return [PegasosSVC] The learned classifier itself.
61
66
  def fit(x, y)
62
67
  # Generate binary labels
63
68
  negative_label = y.uniq.sort.shift
64
69
  bin_y = y.to_flat_a.map { |l| l != negative_label ? 1 : -1 }
70
+ # Expand feature vectors for bias term.
71
+ samples = x
72
+ samples = samples.hconcat(NMatrix.ones([x.shape[0], 1]) * params[:bias_scale]) if params[:fit_bias]
65
73
  # Initialize some variables.
66
- n_samples, n_features = x.shape
74
+ n_samples, n_features = samples.shape
67
75
  rand_ids = [*0..n_samples - 1].shuffle(random: @rng)
68
- @weight_vec = NMatrix.zeros([1, n_features])
76
+ weight_vec = NMatrix.zeros([1, n_features])
69
77
  # Start optimization.
70
78
  params[:max_iter].times do |t|
71
79
  # random sampling
72
80
  subset_ids = rand_ids.shift(params[:batch_size])
73
81
  rand_ids.concat(subset_ids)
74
82
  target_ids = subset_ids.map do |n|
75
- n if @weight_vec.dot(x.row(n).transpose) * bin_y[n] < 1
83
+ n if weight_vec.dot(samples.row(n).transpose) * bin_y[n] < 1
76
84
  end
77
85
  n_subsamples = target_ids.size
78
86
  next if n_subsamples.zero?
79
87
  # update the weight vector.
80
88
  eta = 1.0 / (params[:reg_param] * (t + 1))
81
89
  mean_vec = NMatrix.zeros([1, n_features])
82
- target_ids.each { |n| mean_vec += x.row(n) * bin_y[n] }
90
+ target_ids.each { |n| mean_vec += samples.row(n) * bin_y[n] }
83
91
  mean_vec *= eta / n_subsamples
84
- @weight_vec = @weight_vec * (1.0 - eta * params[:reg_param]) + mean_vec
92
+ weight_vec = weight_vec * (1.0 - eta * params[:reg_param]) + mean_vec
85
93
  # scale the weight vector.
86
- scaler = (1.0 / params[:reg_param]**0.5) / @weight_vec.norm2
87
- @weight_vec *= [1.0, scaler].min
94
+ scaler = (1.0 / params[:reg_param]**0.5) / weight_vec.norm2
95
+ weight_vec *= [1.0, scaler].min
96
+ end
97
+ # Store the learned model.
98
+ if params[:fit_bias]
99
+ @weight_vec = weight_vec[0...n_features - 1]
100
+ @bias_term = weight_vec[n_features - 1]
101
+ else
102
+ @weight_vec = weight_vec[0...n_features]
103
+ @bias_term = 0.0
88
104
  end
89
105
  self
90
106
  end
91
107
 
92
108
  # Calculate confidence scores for samples.
93
109
  #
94
- # :call-seq:
95
- # decision_function(x) -> NMatrix, shape: [1, n_samples]
96
- #
97
- # * *Arguments* :
98
- # - +x+ (NMatrix, shape: [n_samples, n_features]) -- The samples to compute the scores.
99
- # * *Returns* :
100
- # - Confidence score per sample.
110
+ # @param x [NMatrix] (shape: [n_samples, n_features]) The samples to compute the scores.
111
+ # @return [NMatrix] (shape: [1, n_samples]) Confidence score per sample.
101
112
  def decision_function(x)
102
- @weight_vec.dot(x.transpose)
113
+ @weight_vec.dot(x.transpose) + @bias_term
103
114
  end
104
115
 
105
116
  # Predict class labels for samples.
106
117
  #
107
- # :call-seq:
108
- # predict(x) -> NMatrix, shape: [1, n_samples]
109
- #
110
- # * *Arguments* :
111
- # - +x+ (NMatrix, shape: [n_samples, n_features]) -- The samples to predict the labels.
112
- # * *Returns* :
113
- # - Predicted class label per sample.
118
+ # @param x [NMatrix] (shape: [n_samples, n_features]) The samples to predict the labels.
119
+ # @return [NMatrix] (shape: [1, n_samples]) Predicted class label per sample.
114
120
  def predict(x)
115
121
  decision_function(x).map { |v| v >= 0 ? 1 : -1 }
116
122
  end
117
123
 
118
124
  # Claculate the mean accuracy of the given testing data.
119
125
  #
120
- # :call-seq:
121
- # score(x, y) -> Float
122
- #
123
- # * *Arguments* :
124
- # - +x+ (NMatrix, shape: [n_samples, n_features]) -- Testing data.
125
- # - +y+ (NMatrix, shape: [1, n_samples]) -- True labels for testing data.
126
- # * *Returns* :
127
- # - Mean accuracy
126
+ # @param x [NMatrix] (shape: [n_samples, n_features]) Testing data.
127
+ # @param y [NMatrix] (shape: [1, n_samples]) True labels for testing data.
128
+ # @return [Float] Mean accuracy
128
129
  def score(x, y)
129
130
  p = predict(x)
130
131
  n_hits = (y.to_flat_a.map.with_index { |l, n| l == p[n] ? 1 : 0 }).inject(:+)
131
132
  n_hits / y.size.to_f
132
133
  end
133
134
 
134
- # Serializes object through Marshal#dump.
135
- def marshal_dump # :nodoc:
136
- { params: params, weight_vec: Utils.dump_nmatrix(@weight_vec), rng: @rng }
135
+ # Dump marshal data.
136
+ # @return [Hash] The marshal data about PegasosSVC.
137
+ def marshal_dump
138
+ { params: params, weight_vec: Utils.dump_nmatrix(@weight_vec), bias_term: @bias_term, rng: @rng }
137
139
  end
138
140
 
139
- # Deserialize object through Marshal#load.
140
- def marshal_load(obj) # :nodoc:
141
+ # Load marshal data.
142
+ # @return [nil]
143
+ def marshal_load(obj)
141
144
  self.params = obj[:params]
142
145
  @weight_vec = Utils.restore_nmatrix(obj[:weight_vec])
146
+ @bias_term = obj[:bias_term]
143
147
  @rng = obj[:rng]
144
148
  nil
145
149
  end
@@ -6,33 +6,34 @@ module SVMKit
6
6
  module Multiclass
7
7
  # OneVsRestClassifier is a class that implements One-vs-Rest (OvR) strategy for multi-label classification.
8
8
  #
9
+ # @example
9
10
  # base_estimator =
10
11
  # SVMKit::LinearModel::PegasosSVC.new(penalty: 1.0, max_iter: 100, batch_size: 20, random_seed: 1)
11
12
  # estimator = SVMKit::Multiclass::OneVsRestClassifier.new(estimator: base_estimator)
12
13
  # estimator.fit(training_samples, training_labels)
13
14
  # results = estimator.predict(testing_samples)
14
- #
15
15
  class OneVsRestClassifier
16
16
  include Base::BaseEstimator
17
17
  include Base::Classifier
18
18
 
19
- DEFAULT_PARAMS = { # :nodoc:
19
+ # @!visibility private
20
+ DEFAULT_PARAMS = {
20
21
  estimator: nil
21
22
  }.freeze
22
23
 
23
- # The set of estimators.
24
+ # Return the set of estimators.
25
+ # @return [Array<Classifier>]
24
26
  attr_reader :estimators
25
27
 
26
- # The class labels.
28
+ # Return the class labels.
29
+ # @return [NMatrix] (shape: [1, n_classes])
27
30
  attr_reader :classes
28
31
 
29
32
  # Create a new multi-label classifier with the one-vs-rest startegy.
30
33
  #
31
- # :call-seq:
32
- # new(estimator: base_estimator) -> OneVsRestClassifier
33
- #
34
- # * *Arguments* :
35
- # - +:estimator+ (Classifier) (defaults to: nil) -- The (binary) classifier for construction a multi-label classifier.
34
+ # @overload new(estimator: base_estimator) -> OneVsRestClassifier
35
+ # @param estimator [Classifier] (defaults to: nil)
36
+ # The (binary) classifier for construction a multi-label classifier.
36
37
  def initialize(params = {})
37
38
  self.params = DEFAULT_PARAMS.merge(Hash[params.map { |k, v| [k.to_sym, v] }])
38
39
  @estimators = nil
@@ -41,14 +42,9 @@ module SVMKit
41
42
 
42
43
  # Fit the model with given training data.
43
44
  #
44
- # :call-seq:
45
- # fit(x, y) -> OneVsRestClassifier
46
- #
47
- # * *Arguments* :
48
- # - +x+ (NMatrix, shape: [n_samples, n_features]) -- The training data to be used for fitting the model.
49
- # - +y+ (NMatrix, shape: [1, n_samples]) -- The labels to be used for fitting the model.
50
- # * *Returns* :
51
- # - The learned classifier itself.
45
+ # @param x [NMatrix] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
46
+ # @param y [NMatrix] (shape: [1, n_samples]) The labels to be used for fitting the model.
47
+ # @return [OneVsRestClassifier] The learned classifier itself.
52
48
  def fit(x, y)
53
49
  @classes = y.uniq.sort
54
50
  @estimators = @classes.map do |label|
@@ -60,13 +56,8 @@ module SVMKit
60
56
 
61
57
  # Calculate confidence scores for samples.
62
58
  #
63
- # :call-seq:
64
- # decision_function(x) -> NMatrix, shape: [n_samples, n_classes]
65
- #
66
- # * *Arguments* :
67
- # - +x+ (NMatrix, shape: [n_samples, n_features]) -- The samples to compute the scores.
68
- # * *Returns* :
69
- # - Confidence scores per sample for each class.
59
+ # @param x [NMatrix] (shape: [n_samples, n_features]) The samples to compute the scores.
60
+ # @return [NMatrix] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
70
61
  def decision_function(x)
71
62
  n_samples, = x.shape
72
63
  n_classes = @classes.size
@@ -78,13 +69,8 @@ module SVMKit
78
69
 
79
70
  # Predict class labels for samples.
80
71
  #
81
- # :call-seq:
82
- # predict(x) -> NMatrix, shape: [1, n_samples]
83
- #
84
- # * *Arguments* :
85
- # - +x+ (NMatrix, shape: [n_samples, n_features]) -- The samples to predict the labels.
86
- # * *Returns* :
87
- # - Predicted class label per sample.
72
+ # @param x [NMatrix] (shape: [n_samples, n_features]) The samples to predict the labels.
73
+ # @return [NMatrix] (shape: [1, n_samples]) Predicted class label per sample.
88
74
  def predict(x)
89
75
  n_samples, = x.shape
90
76
  decision_values = decision_function(x)
@@ -94,29 +80,26 @@ module SVMKit
94
80
 
95
81
  # Claculate the mean accuracy of the given testing data.
96
82
  #
97
- # :call-seq:
98
- # predict(x, y) -> Float
99
- #
100
- # * *Arguments* :
101
- # - +x+ (NMatrix, shape: [n_samples, n_features]) -- Testing data.
102
- # - +y+ (NMatrix, shape: [1, n_samples]) -- True labels for testing data.
103
- # * *Returns* :
104
- # - Mean accuracy
83
+ # @param x [NMatrix] (shape: [n_samples, n_features]) Testing data.
84
+ # @param y [NMatrix] (shape: [1, n_samples]) True labels for testing data.
85
+ # @return [Float] Mean accuracy
105
86
  def score(x, y)
106
87
  p = predict(x)
107
88
  n_hits = (y.to_flat_a.map.with_index { |l, n| l == p[n] ? 1 : 0 }).inject(:+)
108
89
  n_hits / y.size.to_f
109
90
  end
110
91
 
111
- # Serializes object through Marshal#dump.
112
- def marshal_dump # :nodoc:
92
+ # Dump marshal data.
93
+ # @return [Hash] The marshal data about OneVsRestClassifier.
94
+ def marshal_dump
113
95
  { params: params,
114
96
  classes: @classes,
115
97
  estimators: @estimators.map { |e| Marshal.dump(e) } }
116
98
  end
117
99
 
118
- # Deserialize object through Marshal#load.
119
- def marshal_load(obj) # :nodoc:
100
+ # Load marshal data.
101
+ # @return [nil]
102
+ def marshal_load(obj)
120
103
  self.params = obj[:params]
121
104
  @classes = obj[:classes]
122
105
  @estimators = obj[:estimators].map { |e| Marshal.load(e) }
@@ -6,32 +6,30 @@ module SVMKit
6
6
  module Preprocessing
7
7
  # Normalize samples to unit L2-norm.
8
8
  #
9
+ # @example
9
10
  # normalizer = SVMKit::Preprocessing::StandardScaler.new
10
11
  # new_samples = normalizer.fit_transform(samples)
11
12
  class L2Normalizer
12
13
  include Base::BaseEstimator
13
14
  include Base::Transformer
14
15
 
15
- # The vector consists of norms of each sample.
16
+ # Return the vector consists of L2-norm for each sample.
17
+ # @return [NMatrix] (shape: [1, n_samples])
16
18
  attr_reader :norm_vec # :nodoc:
17
19
 
18
20
  # Create a new normalizer for normaliing to unit L2-norm.
19
21
  #
20
- # :call-seq:
21
- # new() -> L2Normalizer
22
+ # @overload new() -> L2Normalizer
22
23
  def initialize(_params = {})
23
24
  @norm_vec = nil
24
25
  end
25
26
 
26
- # Calculate L2 norms of each sample.
27
+ # Calculate L2-norms of each sample.
27
28
  #
28
- # :call-seq:
29
- # fit(x) -> L2Normalizer
29
+ # @overload fit(x) -> L2Normalizer
30
30
  #
31
- # * *Arguments* :
32
- # - +x+ (NMatrix, shape: [n_samples, n_features]) -- The samples to calculate L2-norms.
33
- # * *Returns* :
34
- # - L2Normalizer
31
+ # @param x [NMatrix] (shape: [n_samples, n_features]) The samples to calculate L2-norms.
32
+ # @return [L2Normalizer]
35
33
  def fit(x, _y = nil)
36
34
  n_samples, = x.shape
37
35
  @norm_vec = NMatrix.new([1, n_samples],
@@ -39,15 +37,12 @@ module SVMKit
39
37
  self
40
38
  end
41
39
 
42
- # Calculate L2 norms of each sample, and then normalize samples to unit L2-norm.
40
+ # Calculate L2-norms of each sample, and then normalize samples to unit L2-norm.
43
41
  #
44
- # :call-seq:
45
- # fit_transform(x) -> NMatrix
42
+ # @overload fit_transform(x) -> NMatrix
46
43
  #
47
- # * *Arguments* :
48
- # - +x+ (NMatrix, shape: [n_samples, n_features]) -- The samples to calculate L2-norms.
49
- # * *Returns* :
50
- # - The normalized samples (NMatrix)
44
+ # @param x [NMatrix] (shape: [n_samples, n_features]) The samples to calculate L2-norms.
45
+ # @return [NMatrix] The normalized samples.
51
46
  def fit_transform(x, _y = nil)
52
47
  fit(x)
53
48
  x / @norm_vec.transpose.repeat(x.shape[1], 1)
@@ -6,6 +6,7 @@ module SVMKit
6
6
  module Preprocessing
7
7
  # Normalize samples by scaling each feature to a given range.
8
8
  #
9
+ # @example
9
10
  # normalizer = SVMKit::Preprocessing::MinMaxScaler.new(feature_range: [0.0, 1.0])
10
11
  # new_training_samples = normalizer.fit_transform(training_samples)
11
12
  # new_testing_samples = normalizer.transform(testing_samples)
@@ -13,23 +14,24 @@ module SVMKit
13
14
  include Base::BaseEstimator
14
15
  include Base::Transformer
15
16
 
16
- DEFAULT_PARAMS = { # :nodoc:
17
+ # @!visibility private
18
+ DEFAULT_PARAMS = {
17
19
  feature_range: [0.0, 1.0]
18
20
  }.freeze
19
21
 
20
- # The vector consists of the minimum value for each feature.
21
- attr_reader :min_vec # :nodoc:
22
+ # Return the vector consists of the minimum value for each feature.
23
+ # @return [NMatrix] (shape: [1, n_features])
24
+ attr_reader :min_vec
22
25
 
23
- # The vector consists of the maximum value for each feature.
24
- attr_reader :max_vec # :nodoc:
26
+ # Return the vector consists of the maximum value for each feature.
27
+ # @return [NMatrix] (shape: [1, n_features])
28
+ attr_reader :max_vec
25
29
 
26
30
  # Creates a new normalizer for scaling each feature to a given range.
27
31
  #
28
- # call-seq:
29
- # new(feature_range: [0.0, 1.0]) -> MinMaxScaler
32
+ # @overload new(feature_range: [0.0, 1.0]) -> MinMaxScaler
30
33
  #
31
- # * *Arguments* :
32
- # - +:feature_range+ (Array) (defaults to: [0.0, 1.0]) -- The desired range of samples.
34
+ # @param feature_range [Array] (defaults to: [0.0, 1.0]) The desired range of samples.
33
35
  def initialize(params = {})
34
36
  @params = DEFAULT_PARAMS.merge(Hash[params.map { |k, v| [k.to_sym, v] }])
35
37
  @min_vec = nil
@@ -38,13 +40,10 @@ module SVMKit
38
40
 
39
41
  # Calculate the minimum and maximum value of each feature for scaling.
40
42
  #
41
- # :call-seq:
42
- # fit(x) -> MinMaxScaler
43
+ # @overload fit(x) -> MinMaxScaler
43
44
  #
44
- # * *Arguments* :
45
- # - +x+ (NMatrix, shape: [n_samples, n_features]) -- The samples to calculate the minimum and maximum values.
46
- # * *Returns* :
47
- # - MinMaxScaler
45
+ # @param x [NMatrix] (shape: [n_samples, n_features]) The samples to calculate the minimum and maximum values.
46
+ # @return [MinMaxScaler]
48
47
  def fit(x, _y = nil)
49
48
  @min_vec = x.min(0)
50
49
  @max_vec = x.max(0)
@@ -53,26 +52,18 @@ module SVMKit
53
52
 
54
53
  # Calculate the minimum and maximum values, and then normalize samples to feature_range.
55
54
  #
56
- # :call-seq:
57
- # fit_transform(x) -> NMatrix
55
+ # @overload fit_transform(x) -> NMatrix
58
56
  #
59
- # * *Arguments* :
60
- # - +x+ (NMatrix, shape: [n_samples, n_features]) -- The samples to calculate the minimum and maximum values.
61
- # * *Returns* :
62
- # - The scaled samples (NMatrix)
57
+ # @param x [NMatrix] (shape: [n_samples, n_features]) The samples to calculate the minimum and maximum values.
58
+ # @return [NMatrix] The scaled samples.
63
59
  def fit_transform(x, _y = nil)
64
60
  fit(x).transform(x)
65
61
  end
66
62
 
67
63
  # Perform scaling the given samples according to feature_range.
68
64
  #
69
- # call-seq:
70
- # transform(x) -> NMatrix
71
- #
72
- # * *Arguments* :
73
- # - +x+ (NMatrix, shape: [n_samples, n_features]) -- The samples to be scaled.
74
- # * *Returns* :
75
- # - The scaled samples (NMatrix)
65
+ # @param x [NMatrix] (shape: [n_samples, n_features]) The samples to be scaled.
66
+ # @return [NMatrix] The scaled samples.
76
67
  def transform(x)
77
68
  n_samples, = x.shape
78
69
  dif_vec = @max_vec - @min_vec
@@ -80,15 +71,17 @@ module SVMKit
80
71
  nx * (@params[:feature_range][1] - @params[:feature_range][0]) + @params[:feature_range][0]
81
72
  end
82
73
 
83
- # Serializes object through Marshal#dump.
84
- def marshal_dump # :nodoc:
74
+ # Dump marshal data.
75
+ # @return [Hash] The marshal data about MinMaxScaler.
76
+ def marshal_dump
85
77
  { params: @params,
86
78
  min_vec: Utils.dump_nmatrix(@min_vec),
87
79
  max_vec: Utils.dump_nmatrix(@max_vec) }
88
80
  end
89
81
 
90
- # Deserialize object through Marshal#load.
91
- def marshal_load(obj) # :nodoc:
82
+ # Load marshal data.
83
+ # @return [nil]
84
+ def marshal_load(obj)
92
85
  @params = obj[:params]
93
86
  @min_vec = Utils.restore_nmatrix(obj[:min_vec])
94
87
  @max_vec = Utils.restore_nmatrix(obj[:max_vec])
@@ -6,6 +6,7 @@ module SVMKit
6
6
  module Preprocessing
7
7
  # Normalize samples by centering and scaling to unit variance.
8
8
  #
9
+ # @example
9
10
  # normalizer = SVMKit::Preprocessing::StandardScaler.new
10
11
  # new_training_samples = normalizer.fit_transform(training_samples)
11
12
  # new_testing_samples = normalizer.transform(testing_samples)
@@ -13,16 +14,17 @@ module SVMKit
13
14
  include Base::BaseEstimator
14
15
  include Base::Transformer
15
16
 
16
- # The vector consists of the mean value for each feature.
17
- attr_reader :mean_vec # :nodoc:
17
+ # Return the vector consists of the mean value for each feature.
18
+ # @return [NMatrix] (shape: [1, n_features])
19
+ attr_reader :mean_vec
18
20
 
19
- # The vector consists of the standard deviation for each feature.
20
- attr_reader :std_vec # :nodoc:
21
+ # Return the vector consists of the standard deviation for each feature.
22
+ # @return [NMatrix] (shape: [1, n_features])
23
+ attr_reader :std_vec
21
24
 
22
25
  # Create a new normalizer for centering and scaling to unit variance.
23
26
  #
24
- # :call-seq:
25
- # new() -> StandardScaler
27
+ # @overload new() -> StandardScaler
26
28
  def initialize(_params = {})
27
29
  @mean_vec = nil
28
30
  @std_vec = nil
@@ -30,13 +32,11 @@ module SVMKit
30
32
 
31
33
  # Calculate the mean value and standard deviation of each feature for scaling.
32
34
  #
33
- # :call-seq:
34
- # fit(x) -> StandardScaler
35
+ # @overload fit(x) -> StandardScaler
35
36
  #
36
- # * *Arguments* :
37
- # - +x+ (NMatrix, shape: [n_samples, n_features]) -- The samples to calculate the mean values and standard deviations.
38
- # * *Returns* :
39
- # - StandardScaler
37
+ # @param x [NMatrix] (shape: [n_samples, n_features])
38
+ # The samples to calculate the mean values and standard deviations.
39
+ # @return [StandardScaler]
40
40
  def fit(x, _y = nil)
41
41
  @mean_vec = x.mean(0)
42
42
  @std_vec = x.std(0)
@@ -45,39 +45,34 @@ module SVMKit
45
45
 
46
46
  # Calculate the mean values and standard deviations, and then normalize samples using them.
47
47
  #
48
- # :call-seq:
49
- # fit_transform(x) -> NMatrix
48
+ # @overload fit_transform(x) -> NMatrix
50
49
  #
51
- # * *Arguments* :
52
- # - +x+ (NMatrix, shape: [n_samples, n_features]) -- The samples to calculate the mean values and standard deviations.
53
- # * *Returns* :
54
- # - The scaled samples (NMatrix)
50
+ # @param x [NMatrix] (shape: [n_samples, n_features])
51
+ # The samples to calculate the mean values and standard deviations.
52
+ # @return [NMatrix] The scaled samples.
55
53
  def fit_transform(x, _y = nil)
56
54
  fit(x).transform(x)
57
55
  end
58
56
 
59
57
  # Perform standardization the given samples.
60
58
  #
61
- # call-seq:
62
- # transform(x) -> NMatrix
63
- #
64
- # * *Arguments* :
65
- # - +x+ (NMatrix, shape: [n_samples, n_features]) -- The samples to be scaled.
66
- # * *Returns* :
67
- # - The scaled samples (NMatrix)
59
+ # @param x [NMatrix] (shape: [n_samples, n_features]) The samples to be scaled.
60
+ # @return [NMatrix] The scaled samples.
68
61
  def transform(x)
69
62
  n_samples, = x.shape
70
63
  (x - @mean_vec.repeat(n_samples, 0)) / @std_vec.repeat(n_samples, 0)
71
64
  end
72
65
 
73
- # Serializes object through Marshal#dump.
74
- def marshal_dump # :nodoc:
66
+ # Dump marshal data.
67
+ # @return [Hash] The marshal data about StandardScaler.
68
+ def marshal_dump
75
69
  { mean_vec: Utils.dump_nmatrix(@mean_vec),
76
70
  std_vec: Utils.dump_nmatrix(@std_vec) }
77
71
  end
78
72
 
79
- # Deserialize object through Marshal#load.
80
- def marshal_load(obj) # :nodoc:
73
+ # Load marshal data.
74
+ # @return [nil]
75
+ def marshal_load(obj)
81
76
  @mean_vec = Utils.restore_nmatrix(obj[:mean_vec])
82
77
  @std_vec = Utils.restore_nmatrix(obj[:std_vec])
83
78
  nil
@@ -3,13 +3,9 @@ module SVMKit
3
3
  module Utils
4
4
  class << self
5
5
  # Dump an NMatrix object converted to a Ruby Hash.
6
- # # call-seq:
7
- # dump_nmatrix(mat) -> Hash
8
6
  #
9
- # * *Arguments* :
10
- # - +mat+ -- An NMatrix object converted to a Ruby Hash.
11
- # * *Returns* :
12
- # - A Ruby Hash containing matrix information.
7
+ # @param mat [NMatrix] An NMatrix object converted to a Ruby Hash.
8
+ # @return [Hash] A Ruby Hash containing matrix information.
13
9
  def dump_nmatrix(mat)
14
10
  return nil if mat.class != NMatrix
15
11
  { shape: mat.shape, array: mat.to_flat_a, dtype: mat.dtype, stype: mat.stype }
@@ -17,13 +13,8 @@ module SVMKit
17
13
 
18
14
  # Return the results of converting the dumped data into an NMatrix object.
19
15
  #
20
- # call-seq:
21
- # restore_nmatrix(dumped_mat) -> NMatrix
22
- #
23
- # * *Arguments* :
24
- # - +dumpted_mat+ -- A Ruby Hash about NMatrix object created with SVMKit::Utils.dump_nmatrix method.
25
- # * *Returns* :
26
- # - An NMatrix object restored from the given Hash.
16
+ # @param dmp [Hash] A Ruby Hash about NMatrix object created with SVMKit::Utils.dump_nmatrix method.
17
+ # @return [NMatrix] An NMatrix object restored from the given Hash.
27
18
  def restore_nmatrix(dmp = {})
28
19
  return nil unless dmp.class == Hash && %i[shape array dtype stype].all?(&dmp.method(:has_key?))
29
20
  NMatrix.new(dmp[:shape], dmp[:array], dtype: dmp[:dtype], stype: dmp[:stype])
@@ -1,3 +1,5 @@
1
+ # SVMKit is an experimental library of machine learning in Ruby.
1
2
  module SVMKit
2
- VERSION = '0.1.1'.freeze
3
+ # @!visibility private
4
+ VERSION = '0.1.2'.freeze
3
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: svmkit
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-10-03 00:00:00.000000000 Z
11
+ date: 2017-10-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler