svmkit 0.1.3 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6a6941fcd2f0f465de1d6a3b6f658e7ee0fdc8fb
4
- data.tar.gz: b9dc50c6fa8e3d7470adf89ffc950b2ae63db4e1
3
+ metadata.gz: 4a53bee5e11b90721544b873d144b149b38aafe1
4
+ data.tar.gz: f1ded6552e6cbdd8af3c29c4d8d403d3c8a62128
5
5
  SHA512:
6
- metadata.gz: 4795995b936f2902dc50e19dc30c46fdb2a1b6a732869a311efa791da8ec09305f6ea6dbfd9aab7da8c934465c8eebe9c45dcaab57090b09f0cc20c1ccacff77
7
- data.tar.gz: b8afdc306dba4a10922e63756bb6d18731e54a4a5f04293a478b7c897b6a58622c9f88eb6bdb47837fa7114ab80d879e6e1ea3e993a5937f291d69e5d72f1699
6
+ metadata.gz: c3e3073f1afd4470cc21e1241d1f3666bbfefcd871700f711cfe377bb04c490f2f3ff10bc4d8ef764e05e0015faf09aef114e45ad0affde35a18641b064ed389
7
+ data.tar.gz: 90144eea5e5f848dffb1325cd4576f27dbf61e5032917493e4957f5acba96489cc2b556f88a2078f5f2d9b5d2842c32454e6e56c003bfd2e36f6d5263cefc4c6
data/HISTORY.md CHANGED
@@ -1,3 +1,7 @@
1
+ # 0.2.0
2
+ - Migrated the linear algebra library to Numo::NArray.
3
+ - Added module for loading and saving libsvm format file.
4
+
1
5
  # 0.1.3
2
6
  - Added class for Kernel Support Vector Machine with Pegasos algorithm.
3
7
  - Added module for calculating pairwise kernel fuctions and euclidean distances.
data/README.md CHANGED
@@ -30,9 +30,8 @@ Training phase:
30
30
 
31
31
  ```ruby
32
32
  require 'svmkit'
33
- require 'libsvmloader'
34
33
 
35
- samples, labels = LibSVMLoader.load_libsvm_file('pendigits', stype: :dense)
34
+ samples, labels = SVMKit::Dataset.load_libsvm_file('pendigits')
36
35
 
37
36
  normalizer = SVMKit::Preprocessing::MinMaxScaler.new
38
37
  normalized = normalizer.fit_transform(samples)
@@ -41,7 +40,7 @@ transformer = SVMKit::KernelApproximation::RBF.new(gamma: 2.0, n_components: 102
41
40
  transformed = transformer.fit_transform(normalized)
42
41
 
43
42
  base_classifier =
44
- SVMKit::LinearModel::PegasosSVC.new(reg_param: 1.0, max_iter: 50, batch_size: 20, random_seed: 1)
43
+ SVMKit::LinearModel::SVC.new(reg_param: 1.0, max_iter: 1000, batch_size: 20, random_seed: 1)
45
44
  classifier = SVMKit::Multiclass::OneVsRestClassifier.new(estimator: base_classifier)
46
45
  classifier.fit(transformed, labels)
47
46
 
@@ -54,9 +53,8 @@ Testing phase:
54
53
 
55
54
  ```ruby
56
55
  require 'svmkit'
57
- require 'libsvmloader'
58
56
 
59
- samples, labels = LibSVMLoader.load_libsvm_file('pendigits.t', stype: :dense)
57
+ samples, labels = SVMKit::Dataset.load_libsvm_file('pendigits.t')
60
58
 
61
59
  normalizer = Marshal.load(File.binread('trained_normalizer.dat'))
62
60
  transformer = Marshal.load(File.binread('trained_transformer.dat'))
data/lib/svmkit.rb CHANGED
@@ -1,16 +1,14 @@
1
- begin
2
- require 'nmatrix/nmatrix'
3
- rescue LoadError
4
- end
1
+
2
+ require 'numo/narray'
5
3
 
6
4
  require 'svmkit/version'
7
- require 'svmkit/utils'
8
5
  require 'svmkit/pairwise_metric'
6
+ require 'svmkit/dataset'
9
7
  require 'svmkit/base/base_estimator'
10
8
  require 'svmkit/base/classifier'
11
9
  require 'svmkit/base/transformer'
12
10
  require 'svmkit/kernel_approximation/rbf'
13
- require 'svmkit/linear_model/pegasos_svc'
11
+ require 'svmkit/linear_model/svc'
14
12
  require 'svmkit/linear_model/logistic_regression'
15
13
  require 'svmkit/kernel_machine/kernel_svc'
16
14
  require 'svmkit/multiclass/one_vs_rest_classifier'
@@ -0,0 +1,90 @@
1
+ module SVMKit
2
+ # Module for loading and saving a dataset file.
3
+ module Dataset
4
+ class << self
5
+ # Load a dataset with the libsvm file format into Numo::NArray.
6
+ #
7
+ # @param filename [String] A path to a dataset file.
8
+ # @param zero_based [Boolean] Whether the column index starts from 0 (true) or 1 (false).
9
+ #
10
+ # @return [Array<Numo::NArray>]
11
+ # Returns array containing the (n_samples x n_features) matrix for feature vectors
12
+ # and (n_samples) vector for labels or target values.
13
+ def load_libsvm_file(filename, zero_based: false)
14
+ ftvecs = []
15
+ labels = []
16
+ n_features = 0
17
+ File.read(filename).split("\n").each do |line|
18
+ label, ftvec, max_idx = parse_libsvm_line(line, zero_based)
19
+ labels.push(label)
20
+ ftvecs.push(ftvec)
21
+ n_features = [n_features, max_idx].max
22
+ end
23
+ [convert_to_matrix(ftvecs, n_features), Numo::NArray.asarray(labels)]
24
+ end
25
+
26
+ # Dump the dataset with the libsvm file format.
27
+ #
28
+ # @param data [Numo::NArray] (shape: [n_samples, n_features]) matrix consisting of feature vectors.
29
+ # @param labels [Numo::NArray] (shape: [n_samples]) matrix consisting of labels or target values.
30
+ # @param filename [String] A path to the output libsvm file.
31
+ # @param zero_based [Boolean] Whether the column index starts from 0 (true) or 1 (false).
32
+ def dump_libsvm_file(data, labels, filename, zero_based: false)
33
+ n_samples = [data.shape[0], labels.shape[0]].min
34
+ label_type = detect_dtype(labels)
35
+ value_type = detect_dtype(data)
36
+ File.open(filename, 'w') do |file|
37
+ n_samples.times do |n|
38
+ file.puts(dump_libsvm_line(labels[n], data[n, true],
39
+ label_type, value_type, zero_based))
40
+ end
41
+ end
42
+ end
43
+
44
+ private
45
+
46
+ def parse_libsvm_line(line, zero_based)
47
+ tokens = line.split
48
+ label = tokens.shift
49
+ label = label.to_i.to_s == label ? label.to_i : label.to_f
50
+ ftvec = tokens.map do |el|
51
+ idx, val = el.split(':')
52
+ idx = idx.to_i - (zero_based == false ? 1 : 0)
53
+ val = val.to_i.to_s == val ? val.to_i : val.to_f
54
+ [idx, val]
55
+ end
56
+ max_idx = ftvec.map { |el| el[0] }.max
57
+ max_idx ||= 0
58
+ [label, ftvec, max_idx]
59
+ end
60
+
61
+ def convert_to_matrix(data, n_features)
62
+ mat = []
63
+ data.each do |ft|
64
+ vec = Array.new(n_features) { 0 }
65
+ ft.each { |el| vec[el[0]] = el[1] }
66
+ mat.push(vec)
67
+ end
68
+ Numo::NArray.asarray(mat)
69
+ end
70
+
71
+ def detect_dtype(data)
72
+ arr_type_str = Numo::NArray.array_type(data).to_s
73
+ type = '%s'
74
+ type = '%d' if ['Numo::Int8', 'Numo::Int16', 'Numo::Int32', 'Numo::Int64'].include?(arr_type_str)
75
+ type = '%d' if ['Numo::UInt8', 'Numo::UInt16', 'Numo::UInt32', 'Numo::UInt64'].include?(arr_type_str)
76
+ type = '%.10g' if ['Numo::SFloat', 'Numo::DFloat'].include?(arr_type_str)
77
+ type
78
+ end
79
+
80
+ def dump_libsvm_line(label, ftvec, label_type, value_type, zero_based)
81
+ line = format(label_type.to_s, label)
82
+ ftvec.to_a.each_with_index do |val, n|
83
+ idx = n + (zero_based == false ? 1 : 0)
84
+ line += format(" %d:#{value_type}", idx, val) if val != 0.0
85
+ end
86
+ line
87
+ end
88
+ end
89
+ end
90
+ end
@@ -17,19 +17,12 @@ module SVMKit
17
17
  include Base::BaseEstimator
18
18
  include Base::Transformer
19
19
 
20
- # @!visibility private
21
- DEFAULT_PARAMS = {
22
- gamma: 1.0,
23
- n_components: 128,
24
- random_seed: nil
25
- }.freeze
26
-
27
20
  # Return the random matrix for transformation.
28
- # @return [NMatrix] (shape: [n_features, n_components])
21
+ # @return [Numo::DFloat] (shape: [n_features, n_components])
29
22
  attr_reader :random_mat
30
23
 
31
24
  # Return the random vector for transformation.
32
- # @return [NMatrix] (shape: [1, n_components])
25
+ # @return [Numo::DFloat] (shape: [n_components])
33
26
  attr_reader :random_vec
34
27
 
35
28
  # Return the random generator for transformation.
@@ -38,14 +31,14 @@ module SVMKit
38
31
 
39
32
  # Create a new transformer for mapping to RBF kernel feature space.
40
33
  #
41
- # @overload new(gamma: 1.0, n_components: 128, random_seed: 1) -> RBF
42
- #
43
- # @param params [Hash] The parameters for RBF kernel approximation.
44
- # @option params [Float] :gamma (1.0) The parameter of RBF kernel: exp(-gamma * x^2).
45
- # @option params [Integer] :n_components (128) The number of dimensions of the RBF kernel feature space.
46
- # @option params [Integer] :random_seed (nil) The seed value using to initialize the random generator.
47
- def initialize(params = {})
48
- self.params = DEFAULT_PARAMS.merge(Hash[params.map { |k, v| [k.to_sym, v] }])
34
+ # @param gamma [Float] The parameter of RBF kernel: exp(-gamma * x^2).
35
+ # @param n_components [Integer] The number of dimensions of the RBF kernel feature space.
36
+ # @param random_seed [Integer] The seed value using to initialize the random generator.
37
+ def initialize(gamma: 1.0, n_components: 128, random_seed: nil)
38
+ self.params = {}
39
+ self.params[:gamma] = gamma
40
+ self.params[:n_components] = n_components
41
+ self.params[:random_seed] = random_seed
49
42
  self.params[:random_seed] ||= srand
50
43
  @rng = Random.new(self.params[:random_seed])
51
44
  @random_mat = nil
@@ -56,7 +49,7 @@ module SVMKit
56
49
  #
57
50
  # @overload fit(x) -> RBF
58
51
  #
59
- # @param x [NMatrix] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
52
+ # @param x [Numo::NArray] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
60
53
  # This method uses only the number of features of the data.
61
54
  # @return [RBF] The learned transformer itself.
62
55
  def fit(x, _y = nil)
@@ -64,40 +57,40 @@ module SVMKit
64
57
  params[:n_components] = 2 * n_features if params[:n_components] <= 0
65
58
  @random_mat = rand_normal([n_features, params[:n_components]]) * (2.0 * params[:gamma])**0.5
66
59
  n_half_components = params[:n_components] / 2
67
- @random_vec = NMatrix.zeros([1, params[:n_components] - n_half_components]).hconcat(
68
- NMatrix.ones([1, n_half_components]) * (0.5 * Math::PI)
60
+ @random_vec = Numo::DFloat.zeros(params[:n_components] - n_half_components).concatenate(
61
+ Numo::DFloat.ones(n_half_components) * (0.5 * Math::PI)
69
62
  )
70
63
  self
71
64
  end
72
65
 
73
66
  # Fit the model with training data, and then transform them with the learned model.
74
67
  #
75
- # @overload fit_transform(x) -> NMatrix
68
+ # @overload fit_transform(x) -> Numo::DFloat
76
69
  #
77
- # @param x [NMatrix] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
78
- # @return [NMatrix] (shape: [n_samples, n_components]) The transformed data
70
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
71
+ # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data
79
72
  def fit_transform(x, _y = nil)
80
73
  fit(x).transform(x)
81
74
  end
82
75
 
83
76
  # Transform the given data with the learned model.
84
77
  #
85
- # @overload transform(x) -> NMatrix
78
+ # @overload transform(x) -> Numo::DFloat
86
79
  #
87
- # @param x [NMatrix] (shape: [n_samples, n_features]) The data to be transformed with the learned model.
88
- # @return [NMatrix] (shape: [n_samples, n_components]) The transformed data.
80
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be transformed with the learned model.
81
+ # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data.
89
82
  def transform(x)
90
83
  n_samples, = x.shape
91
- projection = x.dot(@random_mat) + @random_vec.repeat(n_samples, 0)
92
- projection.sin * ((2.0 / params[:n_components])**0.5)
84
+ projection = x.dot(@random_mat) + @random_vec.tile(n_samples, 1)
85
+ Numo::NMath.sin(projection) * ((2.0 / params[:n_components])**0.5)
93
86
  end
94
87
 
95
88
  # Dump marshal data.
96
89
  # @return [Hash] The marshal data about RBF.
97
90
  def marshal_dump
98
91
  { params: params,
99
- random_mat: Utils.dump_nmatrix(@random_mat),
100
- random_vec: Utils.dump_nmatrix(@random_vec),
92
+ random_mat: @random_mat,
93
+ random_vec: @random_vec,
101
94
  rng: @rng }
102
95
  end
103
96
 
@@ -105,8 +98,8 @@ module SVMKit
105
98
  # @return [nil]
106
99
  def marshal_load(obj)
107
100
  self.params = obj[:params]
108
- @random_mat = Utils.restore_nmatrix(obj[:random_mat])
109
- @random_vec = Utils.restore_nmatrix(obj[:random_vec])
101
+ @random_mat = obj[:random_mat]
102
+ @random_vec = obj[:random_vec]
110
103
  @rng = obj[:rng]
111
104
  nil
112
105
  end
@@ -115,15 +108,15 @@ module SVMKit
115
108
 
116
109
  # Generate the uniform random matrix with the given shape.
117
110
  def rand_uniform(shape)
118
- rnd_vals = Array.new(NMatrix.size(shape)) { @rng.rand }
119
- NMatrix.new(shape, rnd_vals, dtype: :float64, stype: :dense)
111
+ rnd_vals = Array.new(shape.inject(:*)) { @rng.rand }
112
+ Numo::DFloat.asarray(rnd_vals).reshape(shape[0], shape[1])
120
113
  end
121
114
 
122
115
  # Generate the normal random matrix with the given shape, mean, and standard deviation.
123
116
  def rand_normal(shape, mu = 0.0, sigma = 1.0)
124
117
  a = rand_uniform(shape)
125
118
  b = rand_uniform(shape)
126
- ((a.log * -2.0).sqrt * (b * 2.0 * Math::PI).sin) * sigma + mu
119
+ (Numo::NMath.sqrt(Numo::NMath.log(a) * -2.0) * Numo::NMath.sin(b * 2.0 * Math::PI)) * sigma + mu
127
120
  end
128
121
  end
129
122
  end
@@ -2,7 +2,7 @@ require 'svmkit/base/base_estimator'
2
2
  require 'svmkit/base/classifier'
3
3
 
4
4
  module SVMKit
5
- # This module consists of the classes that implement generalized linear models.
5
+ # This module consists of the classes that implement kernel method-based estimator.
6
6
  module KernelMachine
7
7
  # KernelSVC is a class that implements (Nonlinear) Kernel Support Vector Classifier with the Pegasos algorithm.
8
8
  #
@@ -20,15 +20,8 @@ module SVMKit
20
20
  include Base::BaseEstimator
21
21
  include Base::Classifier
22
22
 
23
- # @!visibility private
24
- DEFAULT_PARAMS = {
25
- reg_param: 1.0,
26
- max_iter: 1000,
27
- random_seed: nil
28
- }.freeze
29
-
30
23
  # Return the weight vector for Kernel SVC.
31
- # @return [NMatrix] (shape: [1, n_trainig_sample])
24
+ # @return [Numo::DFloat] (shape: [n_trainig_sample])
32
25
  attr_reader :weight_vec
33
26
 
34
27
  # Return the random generator for performing random sampling in the Pegasos algorithm.
@@ -37,14 +30,14 @@ module SVMKit
37
30
 
38
31
  # Create a new classifier with Kernel Support Vector Machine by the Pegasos algorithm.
39
32
  #
40
- # @overload new(reg_param: 1.0, max_iter: 1000, random_seed: 1) -> KernelSVC
41
- #
42
- # @param params [Hash] The parameters for Kernel SVC.
43
- # @option params [Float] :reg_param (1.0) The regularization parameter.
44
- # @option params [Integer] :max_iter (1000) The maximum number of iterations.
45
- # @option params [Integer] :random_seed (nil) The seed value using to initialize the random generator.
46
- def initialize(params = {})
47
- self.params = DEFAULT_PARAMS.merge(Hash[params.map { |k, v| [k.to_sym, v] }])
33
+ # @param reg_param [Float] The regularization parameter.
34
+ # @param max_iter [Integer] The maximum number of iterations.
35
+ # @param random_seed [Integer] The seed value using to initialize the random generator.
36
+ def initialize(reg_param: 1.0, max_iter: 1000, random_seed: nil)
37
+ self.params = {}
38
+ self.params[:reg_param] = reg_param
39
+ self.params[:max_iter] = max_iter
40
+ self.params[:random_seed] = random_seed
48
41
  self.params[:random_seed] ||= srand
49
42
  @weight_vec = nil
50
43
  @rng = Random.new(self.params[:random_seed])
@@ -52,74 +45,74 @@ module SVMKit
52
45
 
53
46
  # Fit the model with given training data.
54
47
  #
55
- # @param x [NMatrix] (shape: [n_training_samples, n_training_samples])
48
+ # @param x [Numo::DFloat] (shape: [n_training_samples, n_training_samples])
56
49
  # The kernel matrix of the training data to be used for fitting the model.
57
- # @param y [NMatrix] (shape: [1, n_training_samples]) The labels to be used for fitting the model.
50
+ # @param y [Numo::Int32] (shape: [n_training_samples]) The labels to be used for fitting the model.
58
51
  # @return [KernelSVC] The learned classifier itself.
59
52
  def fit(x, y)
60
53
  # Generate binary labels
61
- negative_label = y.uniq.sort.shift
62
- bin_y = y.to_flat_a.map { |l| l != negative_label ? 1 : -1 }
54
+ negative_label = y.to_a.uniq.sort.shift
55
+ bin_y = y.to_a.map { |l| l != negative_label ? 1 : -1 }
63
56
  # Initialize some variables.
64
57
  n_training_samples = x.shape[0]
65
58
  rand_ids = []
66
- weight_vec = NMatrix.zeros([1, n_training_samples])
59
+ weight_vec = Numo::DFloat.zeros(n_training_samples)
67
60
  # Start optimization.
68
61
  params[:max_iter].times do |t|
69
62
  # random sampling
70
63
  rand_ids = [*0...n_training_samples].shuffle(random: @rng) if rand_ids.empty?
71
64
  target_id = rand_ids.shift
72
65
  # update the weight vector
73
- func = (weight_vec * bin_y[target_id]).dot(x.row(target_id).transpose).to_f
66
+ func = (weight_vec * bin_y[target_id]).dot(x[target_id, true].transpose).to_f
74
67
  func *= bin_y[target_id] / (params[:reg_param] * (t + 1))
75
68
  weight_vec[target_id] += 1.0 if func < 1.0
76
69
  end
77
70
  # Store the learned model.
78
- @weight_vec = weight_vec * NMatrix.new([1, n_training_samples], bin_y)
71
+ @weight_vec = weight_vec * Numo::DFloat.asarray(bin_y)
79
72
  self
80
73
  end
81
74
 
82
75
  # Calculate confidence scores for samples.
83
76
  #
84
- # @param x [NMatrix] (shape: [n_testing_samples, n_training_samples])
77
+ # @param x [Numo::DFloat] (shape: [n_testing_samples, n_training_samples])
85
78
  # The kernel matrix between testing samples and training samples to compute the scores.
86
- # @return [NMatrix] (shape: [1, n_testing_samples]) Confidence score per sample.
79
+ # @return [Numo::DFloat] (shape: [n_testing_samples]) Confidence score per sample.
87
80
  def decision_function(x)
88
81
  @weight_vec.dot(x.transpose)
89
82
  end
90
83
 
91
84
  # Predict class labels for samples.
92
85
  #
93
- # @param x [NMatrix] (shape: [n_testing_samples, n_training_samples])
86
+ # @param x [Numo::DFloat] (shape: [n_testing_samples, n_training_samples])
94
87
  # The kernel matrix between testing samples and training samples to predict the labels.
95
- # @return [NMatrix] (shape: [1, n_testing_samples]) Predicted class label per sample.
88
+ # @return [Numo::Int32] (shape: [n_testing_samples]) Predicted class label per sample.
96
89
  def predict(x)
97
- decision_function(x).map { |v| v >= 0 ? 1 : -1 }
90
+ Numo::Int32.cast(decision_function(x).map { |v| v >= 0 ? 1 : -1 })
98
91
  end
99
92
 
100
93
  # Claculate the mean accuracy of the given testing data.
101
94
  #
102
- # @param x [NMatrix] (shape: [n_testing_samples, n_training_samples])
95
+ # @param x [Numo::DFloat] (shape: [n_testing_samples, n_training_samples])
103
96
  # The kernel matrix between testing samples and training samples.
104
- # @param y [NMatrix] (shape: [1, n_testing_samples]) True labels for testing data.
97
+ # @param y [Numo::Int32] (shape: [n_testing_samples]) True labels for testing data.
105
98
  # @return [Float] Mean accuracy
106
99
  def score(x, y)
107
100
  p = predict(x)
108
- n_hits = (y.to_flat_a.map.with_index { |l, n| l == p[n] ? 1 : 0 }).inject(:+)
101
+ n_hits = (y.to_a.map.with_index { |l, n| l == p[n] ? 1 : 0 }).inject(:+)
109
102
  n_hits / y.size.to_f
110
103
  end
111
104
 
112
105
  # Dump marshal data.
113
106
  # @return [Hash] The marshal data about KernelSVC.
114
107
  def marshal_dump
115
- { params: params, weight_vec: Utils.dump_nmatrix(@weight_vec), rng: @rng }
108
+ { params: params, weight_vec: @weight_vec, rng: @rng }
116
109
  end
117
110
 
118
111
  # Load marshal data.
119
112
  # @return [nil]
120
113
  def marshal_load(obj)
121
114
  self.params = obj[:params]
122
- @weight_vec = Utils.restore_nmatrix(obj[:weight_vec])
115
+ @weight_vec = obj[:weight_vec]
123
116
  @rng = obj[:rng]
124
117
  nil
125
118
  end
@@ -31,7 +31,7 @@ module SVMKit
31
31
  }.freeze
32
32
 
33
33
  # Return the weight vector for Logistic Regression.
34
- # @return [NMatrix] (shape: [1, n_features])
34
+ # @return [Numo::DFloat] (shape: [n_features])
35
35
  attr_reader :weight_vec
36
36
 
37
37
  # Return the bias term (a.k.a. intercept) for Logistic Regression.
@@ -44,18 +44,21 @@ module SVMKit
44
44
 
45
45
  # Create a new classifier with Logisitc Regression by the SGD optimization.
46
46
  #
47
- # @overload new(reg_param: 1.0, max_iter: 100, batch_size: 50, random_seed: 1) -> LogisiticRegression
48
- #
49
- # @param params [Hash] The parameters for Logistic Regression.
50
- # @option params [Float] :reg_param (1.0) The regularization parameter.
51
- # @option params [Boolean] :fit_bias (false) The flag indicating whether to fit the bias term.
52
- # @option params [Float] :bias_scale (1.0) The scale of the bias term.
47
+ # @param reg_param [Float] The regularization parameter.
48
+ # @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
49
+ # @param bias_scale [Float] The scale of the bias term.
53
50
  # If fit_bias is true, the feature vector v becoms [v; bias_scale].
54
- # @option params [Integer] :max_iter (100) The maximum number of iterations.
55
- # @option params [Integer] :batch_size (50) The size of the mini batches.
56
- # @option params [Integer] :random_seed (nil) The seed value using to initialize the random generator.
57
- def initialize(params = {})
58
- self.params = DEFAULT_PARAMS.merge(Hash[params.map { |k, v| [k.to_sym, v] }])
51
+ # @param max_iter [Integer] The maximum number of iterations.
52
+ # @param batch_size [Integer] The size of the mini batches.
53
+ # @param random_seed [Integer] The seed value using to initialize the random generator.
54
+ def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0, max_iter: 100, batch_size: 50, random_seed: nil)
55
+ self.params = {}
56
+ self.params[:reg_param] = reg_param
57
+ self.params[:fit_bias] = fit_bias
58
+ self.params[:bias_scale] = bias_scale
59
+ self.params[:max_iter] = max_iter
60
+ self.params[:batch_size] = batch_size
61
+ self.params[:random_seed] = random_seed
59
62
  self.params[:random_seed] ||= srand
60
63
  @weight_vec = nil
61
64
  @bias_term = 0.0
@@ -64,21 +67,25 @@ module SVMKit
64
67
 
65
68
  # Fit the model with given training data.
66
69
  #
67
- # @param x [NMatrix] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
68
- # @param y [NMatrix] (shape: [1, n_samples]) The categorical variables (e.g. labels)
70
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
71
+ # @param y [Numo::Int32] (shape: [n_samples]) The categorical variables (e.g. labels)
69
72
  # to be used for fitting the model.
70
73
  # @return [LogisticRegression] The learned classifier itself.
71
74
  def fit(x, y)
72
75
  # Generate binary labels.
73
- negative_label = y.uniq.sort.shift
74
- bin_y = y.to_flat_a.map { |l| l != negative_label ? 1 : 0 }
76
+ negative_label = y.to_a.uniq.sort.shift
77
+ bin_y = y.to_a.map { |l| l != negative_label ? 1 : 0 }
75
78
  # Expand feature vectors for bias term.
76
79
  samples = x
77
- samples = samples.hconcat(NMatrix.ones([x.shape[0], 1]) * params[:bias_scale]) if params[:fit_bias]
80
+ if params[:fit_bias]
81
+ samples = Numo::NArray.hstack(
82
+ [samples, Numo::DFloat.ones([x.shape[0], 1]) * params[:bias_scale]]
83
+ )
84
+ end
78
85
  # Initialize some variables.
79
86
  n_samples, n_features = samples.shape
80
87
  rand_ids = [*0..n_samples - 1].shuffle(random: @rng)
81
- weight_vec = NMatrix.zeros([1, n_features])
88
+ weight_vec = Numo::DFloat.zeros(n_features)
82
89
  # Start optimization.
83
90
  params[:max_iter].times do |t|
84
91
  # random sampling
@@ -86,16 +93,17 @@ module SVMKit
86
93
  rand_ids.concat(subset_ids)
87
94
  # update the weight vector.
88
95
  eta = 1.0 / (params[:reg_param] * (t + 1))
89
- mean_vec = NMatrix.zeros([1, n_features])
96
+ mean_vec = Numo::DFloat.zeros(n_features)
90
97
  subset_ids.each do |n|
91
- z = weight_vec.dot(samples.row(n).transpose)[0]
98
+ z = weight_vec.dot(samples[n, true])
92
99
  coef = bin_y[n] / (1.0 + Math.exp(bin_y[n] * z))
93
- mean_vec += samples.row(n) * coef
100
+ mean_vec += samples[n, true] * coef
94
101
  end
95
102
  mean_vec *= eta / params[:batch_size]
96
103
  weight_vec = weight_vec * (1.0 - eta * params[:reg_param]) + mean_vec
97
104
  # scale the weight vector.
98
- scaler = (1.0 / params[:reg_param]**0.5) / weight_vec.norm2
105
+ norm = Math.sqrt(weight_vec.dot(weight_vec))
106
+ scaler = (1.0 / params[:reg_param]**0.5) / (norm + 1.0e-12)
99
107
  weight_vec *= [1.0, scaler].min
100
108
  end
101
109
  # Store the learned model.
@@ -111,51 +119,51 @@ module SVMKit
111
119
 
112
120
  # Calculate confidence scores for samples.
113
121
  #
114
- # @param x [NMatrix] (shape: [n_samples, n_features]) The samples to compute the scores.
115
- # @return [NMatrix] (shape: [1, n_samples]) Confidence score per sample.
122
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
123
+ # @return [Numo::DFloat] (shape: [n_samples]) Confidence score per sample.
116
124
  def decision_function(x)
117
- w = ((@weight_vec.dot(x.transpose) + @bias_term) * -1.0).exp + 1.0
125
+ w = Numo::NMath.exp(((@weight_vec.dot(x.transpose) + @bias_term) * -1.0)) + 1.0
118
126
  w.map { |v| 1.0 / v }
119
127
  end
120
128
 
121
129
  # Predict class labels for samples.
122
130
  #
123
- # @param x [NMatrix] (shape: [n_samples, n_features]) The samples to predict the labels.
124
- # @return [NMatrix] (shape: [1, n_samples]) Predicted class label per sample.
131
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
132
+ # @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
125
133
  def predict(x)
126
- decision_function(x).map { |v| v >= 0.5 ? 1 : -1 }
134
+ Numo::Int32.cast(decision_function(x).map { |v| v >= 0.5 ? 1 : -1 })
127
135
  end
128
136
 
129
137
  # Predict probability for samples.
130
138
  #
131
- # @param x [NMatrix] (shape: [n_samples, n_features]) The samples to predict the probailities.
132
- # @return [NMatrix] (shape: [1, n_samples]) Predicted probability per sample.
139
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
140
+ # @return [Numo::DFloat] (shape: [n_samples]) Predicted probability per sample.
133
141
  def predict_proba(x)
134
142
  decision_function(x)
135
143
  end
136
144
 
137
145
  # Claculate the mean accuracy of the given testing data.
138
146
  #
139
- # @param x [NMatrix] (shape: [n_samples, n_features]) Testing data.
140
- # @param y [NMatrix] (shape: [1, n_samples]) True labels for testing data.
147
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) Testing data.
148
+ # @param y [Numo::Int32] (shape: [n_samples]) True labels for testing data.
141
149
  # @return [Float] Mean accuracy
142
150
  def score(x, y)
143
151
  p = predict(x)
144
- n_hits = (y.to_flat_a.map.with_index { |l, n| l == p[n] ? 1 : 0 }).inject(:+)
152
+ n_hits = (y.to_a.map.with_index { |l, n| l == p[n] ? 1 : 0 }).inject(:+)
145
153
  n_hits / y.size.to_f
146
154
  end
147
155
 
148
156
  # Dump marshal data.
149
157
  # @return [Hash] The marshal data about LogisticRegression.
150
158
  def marshal_dump
151
- { params: params, weight_vec: Utils.dump_nmatrix(@weight_vec), bias_term: @bias_term, rng: @rng }
159
+ { params: params, weight_vec: @weight_vec, bias_term: @bias_term, rng: @rng }
152
160
  end
153
161
 
154
162
  # Load marshal data.
155
163
  # @return [nil]
156
164
  def marshal_load(obj)
157
165
  self.params = obj[:params]
158
- @weight_vec = Utils.restore_nmatrix(obj[:weight_vec])
166
+ @weight_vec = obj[:weight_vec]
159
167
  @bias_term = obj[:bias_term]
160
168
  @rng = obj[:rng]
161
169
  nil