svmkit 0.1.3 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6a6941fcd2f0f465de1d6a3b6f658e7ee0fdc8fb
4
- data.tar.gz: b9dc50c6fa8e3d7470adf89ffc950b2ae63db4e1
3
+ metadata.gz: 4a53bee5e11b90721544b873d144b149b38aafe1
4
+ data.tar.gz: f1ded6552e6cbdd8af3c29c4d8d403d3c8a62128
5
5
  SHA512:
6
- metadata.gz: 4795995b936f2902dc50e19dc30c46fdb2a1b6a732869a311efa791da8ec09305f6ea6dbfd9aab7da8c934465c8eebe9c45dcaab57090b09f0cc20c1ccacff77
7
- data.tar.gz: b8afdc306dba4a10922e63756bb6d18731e54a4a5f04293a478b7c897b6a58622c9f88eb6bdb47837fa7114ab80d879e6e1ea3e993a5937f291d69e5d72f1699
6
+ metadata.gz: c3e3073f1afd4470cc21e1241d1f3666bbfefcd871700f711cfe377bb04c490f2f3ff10bc4d8ef764e05e0015faf09aef114e45ad0affde35a18641b064ed389
7
+ data.tar.gz: 90144eea5e5f848dffb1325cd4576f27dbf61e5032917493e4957f5acba96489cc2b556f88a2078f5f2d9b5d2842c32454e6e56c003bfd2e36f6d5263cefc4c6
data/HISTORY.md CHANGED
@@ -1,3 +1,7 @@
1
+ # 0.2.0
2
+ - Migrated the linear algebra library to Numo::NArray.
3
+ - Added module for loading and saving libsvm format file.
4
+
1
5
  # 0.1.3
2
6
  - Added class for Kernel Support Vector Machine with Pegasos algorithm.
3
7
  - Added module for calculating pairwise kernel fuctions and euclidean distances.
data/README.md CHANGED
@@ -30,9 +30,8 @@ Training phase:
30
30
 
31
31
  ```ruby
32
32
  require 'svmkit'
33
- require 'libsvmloader'
34
33
 
35
- samples, labels = LibSVMLoader.load_libsvm_file('pendigits', stype: :dense)
34
+ samples, labels = SVMKit::Dataset.load_libsvm_file('pendigits')
36
35
 
37
36
  normalizer = SVMKit::Preprocessing::MinMaxScaler.new
38
37
  normalized = normalizer.fit_transform(samples)
@@ -41,7 +40,7 @@ transformer = SVMKit::KernelApproximation::RBF.new(gamma: 2.0, n_components: 102
41
40
  transformed = transformer.fit_transform(normalized)
42
41
 
43
42
  base_classifier =
44
- SVMKit::LinearModel::PegasosSVC.new(reg_param: 1.0, max_iter: 50, batch_size: 20, random_seed: 1)
43
+ SVMKit::LinearModel::SVC.new(reg_param: 1.0, max_iter: 1000, batch_size: 20, random_seed: 1)
45
44
  classifier = SVMKit::Multiclass::OneVsRestClassifier.new(estimator: base_classifier)
46
45
  classifier.fit(transformed, labels)
47
46
 
@@ -54,9 +53,8 @@ Testing phase:
54
53
 
55
54
  ```ruby
56
55
  require 'svmkit'
57
- require 'libsvmloader'
58
56
 
59
- samples, labels = LibSVMLoader.load_libsvm_file('pendigits.t', stype: :dense)
57
+ samples, labels = SVMKit::Dataset.load_libsvm_file('pendigits.t')
60
58
 
61
59
  normalizer = Marshal.load(File.binread('trained_normalizer.dat'))
62
60
  transformer = Marshal.load(File.binread('trained_transformer.dat'))
data/lib/svmkit.rb CHANGED
@@ -1,16 +1,14 @@
1
- begin
2
- require 'nmatrix/nmatrix'
3
- rescue LoadError
4
- end
1
+
2
+ require 'numo/narray'
5
3
 
6
4
  require 'svmkit/version'
7
- require 'svmkit/utils'
8
5
  require 'svmkit/pairwise_metric'
6
+ require 'svmkit/dataset'
9
7
  require 'svmkit/base/base_estimator'
10
8
  require 'svmkit/base/classifier'
11
9
  require 'svmkit/base/transformer'
12
10
  require 'svmkit/kernel_approximation/rbf'
13
- require 'svmkit/linear_model/pegasos_svc'
11
+ require 'svmkit/linear_model/svc'
14
12
  require 'svmkit/linear_model/logistic_regression'
15
13
  require 'svmkit/kernel_machine/kernel_svc'
16
14
  require 'svmkit/multiclass/one_vs_rest_classifier'
@@ -0,0 +1,90 @@
1
+ module SVMKit
2
+ # Module for loading and saving a dataset file.
3
+ module Dataset
4
+ class << self
5
+ # Load a dataset with the libsvm file format into Numo::NArray.
6
+ #
7
+ # @param filename [String] A path to a dataset file.
8
+ # @param zero_based [Boolean] Whether the column index starts from 0 (true) or 1 (false).
9
+ #
10
+ # @return [Array<Numo::NArray>]
11
+ # Returns array containing the (n_samples x n_features) matrix for feature vectors
12
+ # and (n_samples) vector for labels or target values.
13
+ def load_libsvm_file(filename, zero_based: false)
14
+ ftvecs = []
15
+ labels = []
16
+ n_features = 0
17
+ File.read(filename).split("\n").each do |line|
18
+ label, ftvec, max_idx = parse_libsvm_line(line, zero_based)
19
+ labels.push(label)
20
+ ftvecs.push(ftvec)
21
+ n_features = [n_features, max_idx].max
22
+ end
23
+ [convert_to_matrix(ftvecs, n_features), Numo::NArray.asarray(labels)]
24
+ end
25
+
26
+ # Dump the dataset with the libsvm file format.
27
+ #
28
+ # @param data [Numo::NArray] (shape: [n_samples, n_features]) matrix consisting of feature vectors.
29
+ # @param labels [Numo::NArray] (shape: [n_samples]) matrix consisting of labels or target values.
30
+ # @param filename [String] A path to the output libsvm file.
31
+ # @param zero_based [Boolean] Whether the column index starts from 0 (true) or 1 (false).
32
+ def dump_libsvm_file(data, labels, filename, zero_based: false)
33
+ n_samples = [data.shape[0], labels.shape[0]].min
34
+ label_type = detect_dtype(labels)
35
+ value_type = detect_dtype(data)
36
+ File.open(filename, 'w') do |file|
37
+ n_samples.times do |n|
38
+ file.puts(dump_libsvm_line(labels[n], data[n, true],
39
+ label_type, value_type, zero_based))
40
+ end
41
+ end
42
+ end
43
+
44
+ private
45
+
46
+ def parse_libsvm_line(line, zero_based)
47
+ tokens = line.split
48
+ label = tokens.shift
49
+ label = label.to_i.to_s == label ? label.to_i : label.to_f
50
+ ftvec = tokens.map do |el|
51
+ idx, val = el.split(':')
52
+ idx = idx.to_i - (zero_based == false ? 1 : 0)
53
+ val = val.to_i.to_s == val ? val.to_i : val.to_f
54
+ [idx, val]
55
+ end
56
+ max_idx = ftvec.map { |el| el[0] }.max
57
+ max_idx ||= 0
58
+ [label, ftvec, max_idx]
59
+ end
60
+
61
+ def convert_to_matrix(data, n_features)
62
+ mat = []
63
+ data.each do |ft|
64
+ vec = Array.new(n_features) { 0 }
65
+ ft.each { |el| vec[el[0]] = el[1] }
66
+ mat.push(vec)
67
+ end
68
+ Numo::NArray.asarray(mat)
69
+ end
70
+
71
+ def detect_dtype(data)
72
+ arr_type_str = Numo::NArray.array_type(data).to_s
73
+ type = '%s'
74
+ type = '%d' if ['Numo::Int8', 'Numo::Int16', 'Numo::Int32', 'Numo::Int64'].include?(arr_type_str)
75
+ type = '%d' if ['Numo::UInt8', 'Numo::UInt16', 'Numo::UInt32', 'Numo::UInt64'].include?(arr_type_str)
76
+ type = '%.10g' if ['Numo::SFloat', 'Numo::DFloat'].include?(arr_type_str)
77
+ type
78
+ end
79
+
80
+ def dump_libsvm_line(label, ftvec, label_type, value_type, zero_based)
81
+ line = format(label_type.to_s, label)
82
+ ftvec.to_a.each_with_index do |val, n|
83
+ idx = n + (zero_based == false ? 1 : 0)
84
+ line += format(" %d:#{value_type}", idx, val) if val != 0.0
85
+ end
86
+ line
87
+ end
88
+ end
89
+ end
90
+ end
@@ -17,19 +17,12 @@ module SVMKit
17
17
  include Base::BaseEstimator
18
18
  include Base::Transformer
19
19
 
20
- # @!visibility private
21
- DEFAULT_PARAMS = {
22
- gamma: 1.0,
23
- n_components: 128,
24
- random_seed: nil
25
- }.freeze
26
-
27
20
  # Return the random matrix for transformation.
28
- # @return [NMatrix] (shape: [n_features, n_components])
21
+ # @return [Numo::DFloat] (shape: [n_features, n_components])
29
22
  attr_reader :random_mat
30
23
 
31
24
  # Return the random vector for transformation.
32
- # @return [NMatrix] (shape: [1, n_components])
25
+ # @return [Numo::DFloat] (shape: [n_components])
33
26
  attr_reader :random_vec
34
27
 
35
28
  # Return the random generator for transformation.
@@ -38,14 +31,14 @@ module SVMKit
38
31
 
39
32
  # Create a new transformer for mapping to RBF kernel feature space.
40
33
  #
41
- # @overload new(gamma: 1.0, n_components: 128, random_seed: 1) -> RBF
42
- #
43
- # @param params [Hash] The parameters for RBF kernel approximation.
44
- # @option params [Float] :gamma (1.0) The parameter of RBF kernel: exp(-gamma * x^2).
45
- # @option params [Integer] :n_components (128) The number of dimensions of the RBF kernel feature space.
46
- # @option params [Integer] :random_seed (nil) The seed value using to initialize the random generator.
47
- def initialize(params = {})
48
- self.params = DEFAULT_PARAMS.merge(Hash[params.map { |k, v| [k.to_sym, v] }])
34
+ # @param gamma [Float] The parameter of RBF kernel: exp(-gamma * x^2).
35
+ # @param n_components [Integer] The number of dimensions of the RBF kernel feature space.
36
+ # @param random_seed [Integer] The seed value using to initialize the random generator.
37
+ def initialize(gamma: 1.0, n_components: 128, random_seed: nil)
38
+ self.params = {}
39
+ self.params[:gamma] = gamma
40
+ self.params[:n_components] = n_components
41
+ self.params[:random_seed] = random_seed
49
42
  self.params[:random_seed] ||= srand
50
43
  @rng = Random.new(self.params[:random_seed])
51
44
  @random_mat = nil
@@ -56,7 +49,7 @@ module SVMKit
56
49
  #
57
50
  # @overload fit(x) -> RBF
58
51
  #
59
- # @param x [NMatrix] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
52
+ # @param x [Numo::NArray] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
60
53
  # This method uses only the number of features of the data.
61
54
  # @return [RBF] The learned transformer itself.
62
55
  def fit(x, _y = nil)
@@ -64,40 +57,40 @@ module SVMKit
64
57
  params[:n_components] = 2 * n_features if params[:n_components] <= 0
65
58
  @random_mat = rand_normal([n_features, params[:n_components]]) * (2.0 * params[:gamma])**0.5
66
59
  n_half_components = params[:n_components] / 2
67
- @random_vec = NMatrix.zeros([1, params[:n_components] - n_half_components]).hconcat(
68
- NMatrix.ones([1, n_half_components]) * (0.5 * Math::PI)
60
+ @random_vec = Numo::DFloat.zeros(params[:n_components] - n_half_components).concatenate(
61
+ Numo::DFloat.ones(n_half_components) * (0.5 * Math::PI)
69
62
  )
70
63
  self
71
64
  end
72
65
 
73
66
  # Fit the model with training data, and then transform them with the learned model.
74
67
  #
75
- # @overload fit_transform(x) -> NMatrix
68
+ # @overload fit_transform(x) -> Numo::DFloat
76
69
  #
77
- # @param x [NMatrix] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
78
- # @return [NMatrix] (shape: [n_samples, n_components]) The transformed data
70
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
71
+ # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data
79
72
  def fit_transform(x, _y = nil)
80
73
  fit(x).transform(x)
81
74
  end
82
75
 
83
76
  # Transform the given data with the learned model.
84
77
  #
85
- # @overload transform(x) -> NMatrix
78
+ # @overload transform(x) -> Numo::DFloat
86
79
  #
87
- # @param x [NMatrix] (shape: [n_samples, n_features]) The data to be transformed with the learned model.
88
- # @return [NMatrix] (shape: [n_samples, n_components]) The transformed data.
80
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be transformed with the learned model.
81
+ # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data.
89
82
  def transform(x)
90
83
  n_samples, = x.shape
91
- projection = x.dot(@random_mat) + @random_vec.repeat(n_samples, 0)
92
- projection.sin * ((2.0 / params[:n_components])**0.5)
84
+ projection = x.dot(@random_mat) + @random_vec.tile(n_samples, 1)
85
+ Numo::NMath.sin(projection) * ((2.0 / params[:n_components])**0.5)
93
86
  end
94
87
 
95
88
  # Dump marshal data.
96
89
  # @return [Hash] The marshal data about RBF.
97
90
  def marshal_dump
98
91
  { params: params,
99
- random_mat: Utils.dump_nmatrix(@random_mat),
100
- random_vec: Utils.dump_nmatrix(@random_vec),
92
+ random_mat: @random_mat,
93
+ random_vec: @random_vec,
101
94
  rng: @rng }
102
95
  end
103
96
 
@@ -105,8 +98,8 @@ module SVMKit
105
98
  # @return [nil]
106
99
  def marshal_load(obj)
107
100
  self.params = obj[:params]
108
- @random_mat = Utils.restore_nmatrix(obj[:random_mat])
109
- @random_vec = Utils.restore_nmatrix(obj[:random_vec])
101
+ @random_mat = obj[:random_mat]
102
+ @random_vec = obj[:random_vec]
110
103
  @rng = obj[:rng]
111
104
  nil
112
105
  end
@@ -115,15 +108,15 @@ module SVMKit
115
108
 
116
109
  # Generate the uniform random matrix with the given shape.
117
110
  def rand_uniform(shape)
118
- rnd_vals = Array.new(NMatrix.size(shape)) { @rng.rand }
119
- NMatrix.new(shape, rnd_vals, dtype: :float64, stype: :dense)
111
+ rnd_vals = Array.new(shape.inject(:*)) { @rng.rand }
112
+ Numo::DFloat.asarray(rnd_vals).reshape(shape[0], shape[1])
120
113
  end
121
114
 
122
115
  # Generate the normal random matrix with the given shape, mean, and standard deviation.
123
116
  def rand_normal(shape, mu = 0.0, sigma = 1.0)
124
117
  a = rand_uniform(shape)
125
118
  b = rand_uniform(shape)
126
- ((a.log * -2.0).sqrt * (b * 2.0 * Math::PI).sin) * sigma + mu
119
+ (Numo::NMath.sqrt(Numo::NMath.log(a) * -2.0) * Numo::NMath.sin(b * 2.0 * Math::PI)) * sigma + mu
127
120
  end
128
121
  end
129
122
  end
@@ -2,7 +2,7 @@ require 'svmkit/base/base_estimator'
2
2
  require 'svmkit/base/classifier'
3
3
 
4
4
  module SVMKit
5
- # This module consists of the classes that implement generalized linear models.
5
+ # This module consists of the classes that implement kernel method-based estimator.
6
6
  module KernelMachine
7
7
  # KernelSVC is a class that implements (Nonlinear) Kernel Support Vector Classifier with the Pegasos algorithm.
8
8
  #
@@ -20,15 +20,8 @@ module SVMKit
20
20
  include Base::BaseEstimator
21
21
  include Base::Classifier
22
22
 
23
- # @!visibility private
24
- DEFAULT_PARAMS = {
25
- reg_param: 1.0,
26
- max_iter: 1000,
27
- random_seed: nil
28
- }.freeze
29
-
30
23
  # Return the weight vector for Kernel SVC.
31
- # @return [NMatrix] (shape: [1, n_trainig_sample])
24
+ # @return [Numo::DFloat] (shape: [n_trainig_sample])
32
25
  attr_reader :weight_vec
33
26
 
34
27
  # Return the random generator for performing random sampling in the Pegasos algorithm.
@@ -37,14 +30,14 @@ module SVMKit
37
30
 
38
31
  # Create a new classifier with Kernel Support Vector Machine by the Pegasos algorithm.
39
32
  #
40
- # @overload new(reg_param: 1.0, max_iter: 1000, random_seed: 1) -> KernelSVC
41
- #
42
- # @param params [Hash] The parameters for Kernel SVC.
43
- # @option params [Float] :reg_param (1.0) The regularization parameter.
44
- # @option params [Integer] :max_iter (1000) The maximum number of iterations.
45
- # @option params [Integer] :random_seed (nil) The seed value using to initialize the random generator.
46
- def initialize(params = {})
47
- self.params = DEFAULT_PARAMS.merge(Hash[params.map { |k, v| [k.to_sym, v] }])
33
+ # @param reg_param [Float] The regularization parameter.
34
+ # @param max_iter [Integer] The maximum number of iterations.
35
+ # @param random_seed [Integer] The seed value using to initialize the random generator.
36
+ def initialize(reg_param: 1.0, max_iter: 1000, random_seed: nil)
37
+ self.params = {}
38
+ self.params[:reg_param] = reg_param
39
+ self.params[:max_iter] = max_iter
40
+ self.params[:random_seed] = random_seed
48
41
  self.params[:random_seed] ||= srand
49
42
  @weight_vec = nil
50
43
  @rng = Random.new(self.params[:random_seed])
@@ -52,74 +45,74 @@ module SVMKit
52
45
 
53
46
  # Fit the model with given training data.
54
47
  #
55
- # @param x [NMatrix] (shape: [n_training_samples, n_training_samples])
48
+ # @param x [Numo::DFloat] (shape: [n_training_samples, n_training_samples])
56
49
  # The kernel matrix of the training data to be used for fitting the model.
57
- # @param y [NMatrix] (shape: [1, n_training_samples]) The labels to be used for fitting the model.
50
+ # @param y [Numo::Int32] (shape: [n_training_samples]) The labels to be used for fitting the model.
58
51
  # @return [KernelSVC] The learned classifier itself.
59
52
  def fit(x, y)
60
53
  # Generate binary labels
61
- negative_label = y.uniq.sort.shift
62
- bin_y = y.to_flat_a.map { |l| l != negative_label ? 1 : -1 }
54
+ negative_label = y.to_a.uniq.sort.shift
55
+ bin_y = y.to_a.map { |l| l != negative_label ? 1 : -1 }
63
56
  # Initialize some variables.
64
57
  n_training_samples = x.shape[0]
65
58
  rand_ids = []
66
- weight_vec = NMatrix.zeros([1, n_training_samples])
59
+ weight_vec = Numo::DFloat.zeros(n_training_samples)
67
60
  # Start optimization.
68
61
  params[:max_iter].times do |t|
69
62
  # random sampling
70
63
  rand_ids = [*0...n_training_samples].shuffle(random: @rng) if rand_ids.empty?
71
64
  target_id = rand_ids.shift
72
65
  # update the weight vector
73
- func = (weight_vec * bin_y[target_id]).dot(x.row(target_id).transpose).to_f
66
+ func = (weight_vec * bin_y[target_id]).dot(x[target_id, true].transpose).to_f
74
67
  func *= bin_y[target_id] / (params[:reg_param] * (t + 1))
75
68
  weight_vec[target_id] += 1.0 if func < 1.0
76
69
  end
77
70
  # Store the learned model.
78
- @weight_vec = weight_vec * NMatrix.new([1, n_training_samples], bin_y)
71
+ @weight_vec = weight_vec * Numo::DFloat.asarray(bin_y)
79
72
  self
80
73
  end
81
74
 
82
75
  # Calculate confidence scores for samples.
83
76
  #
84
- # @param x [NMatrix] (shape: [n_testing_samples, n_training_samples])
77
+ # @param x [Numo::DFloat] (shape: [n_testing_samples, n_training_samples])
85
78
  # The kernel matrix between testing samples and training samples to compute the scores.
86
- # @return [NMatrix] (shape: [1, n_testing_samples]) Confidence score per sample.
79
+ # @return [Numo::DFloat] (shape: [n_testing_samples]) Confidence score per sample.
87
80
  def decision_function(x)
88
81
  @weight_vec.dot(x.transpose)
89
82
  end
90
83
 
91
84
  # Predict class labels for samples.
92
85
  #
93
- # @param x [NMatrix] (shape: [n_testing_samples, n_training_samples])
86
+ # @param x [Numo::DFloat] (shape: [n_testing_samples, n_training_samples])
94
87
  # The kernel matrix between testing samples and training samples to predict the labels.
95
- # @return [NMatrix] (shape: [1, n_testing_samples]) Predicted class label per sample.
88
+ # @return [Numo::Int32] (shape: [n_testing_samples]) Predicted class label per sample.
96
89
  def predict(x)
97
- decision_function(x).map { |v| v >= 0 ? 1 : -1 }
90
+ Numo::Int32.cast(decision_function(x).map { |v| v >= 0 ? 1 : -1 })
98
91
  end
99
92
 
100
93
  # Claculate the mean accuracy of the given testing data.
101
94
  #
102
- # @param x [NMatrix] (shape: [n_testing_samples, n_training_samples])
95
+ # @param x [Numo::DFloat] (shape: [n_testing_samples, n_training_samples])
103
96
  # The kernel matrix between testing samples and training samples.
104
- # @param y [NMatrix] (shape: [1, n_testing_samples]) True labels for testing data.
97
+ # @param y [Numo::Int32] (shape: [n_testing_samples]) True labels for testing data.
105
98
  # @return [Float] Mean accuracy
106
99
  def score(x, y)
107
100
  p = predict(x)
108
- n_hits = (y.to_flat_a.map.with_index { |l, n| l == p[n] ? 1 : 0 }).inject(:+)
101
+ n_hits = (y.to_a.map.with_index { |l, n| l == p[n] ? 1 : 0 }).inject(:+)
109
102
  n_hits / y.size.to_f
110
103
  end
111
104
 
112
105
  # Dump marshal data.
113
106
  # @return [Hash] The marshal data about KernelSVC.
114
107
  def marshal_dump
115
- { params: params, weight_vec: Utils.dump_nmatrix(@weight_vec), rng: @rng }
108
+ { params: params, weight_vec: @weight_vec, rng: @rng }
116
109
  end
117
110
 
118
111
  # Load marshal data.
119
112
  # @return [nil]
120
113
  def marshal_load(obj)
121
114
  self.params = obj[:params]
122
- @weight_vec = Utils.restore_nmatrix(obj[:weight_vec])
115
+ @weight_vec = obj[:weight_vec]
123
116
  @rng = obj[:rng]
124
117
  nil
125
118
  end
@@ -31,7 +31,7 @@ module SVMKit
31
31
  }.freeze
32
32
 
33
33
  # Return the weight vector for Logistic Regression.
34
- # @return [NMatrix] (shape: [1, n_features])
34
+ # @return [Numo::DFloat] (shape: [n_features])
35
35
  attr_reader :weight_vec
36
36
 
37
37
  # Return the bias term (a.k.a. intercept) for Logistic Regression.
@@ -44,18 +44,21 @@ module SVMKit
44
44
 
45
45
  # Create a new classifier with Logisitc Regression by the SGD optimization.
46
46
  #
47
- # @overload new(reg_param: 1.0, max_iter: 100, batch_size: 50, random_seed: 1) -> LogisiticRegression
48
- #
49
- # @param params [Hash] The parameters for Logistic Regression.
50
- # @option params [Float] :reg_param (1.0) The regularization parameter.
51
- # @option params [Boolean] :fit_bias (false) The flag indicating whether to fit the bias term.
52
- # @option params [Float] :bias_scale (1.0) The scale of the bias term.
47
+ # @param reg_param [Float] The regularization parameter.
48
+ # @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
49
+ # @param bias_scale [Float] The scale of the bias term.
53
50
  # If fit_bias is true, the feature vector v becoms [v; bias_scale].
54
- # @option params [Integer] :max_iter (100) The maximum number of iterations.
55
- # @option params [Integer] :batch_size (50) The size of the mini batches.
56
- # @option params [Integer] :random_seed (nil) The seed value using to initialize the random generator.
57
- def initialize(params = {})
58
- self.params = DEFAULT_PARAMS.merge(Hash[params.map { |k, v| [k.to_sym, v] }])
51
+ # @param max_iter [Integer] The maximum number of iterations.
52
+ # @param batch_size [Integer] The size of the mini batches.
53
+ # @param random_seed [Integer] The seed value using to initialize the random generator.
54
+ def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0, max_iter: 100, batch_size: 50, random_seed: nil)
55
+ self.params = {}
56
+ self.params[:reg_param] = reg_param
57
+ self.params[:fit_bias] = fit_bias
58
+ self.params[:bias_scale] = bias_scale
59
+ self.params[:max_iter] = max_iter
60
+ self.params[:batch_size] = batch_size
61
+ self.params[:random_seed] = random_seed
59
62
  self.params[:random_seed] ||= srand
60
63
  @weight_vec = nil
61
64
  @bias_term = 0.0
@@ -64,21 +67,25 @@ module SVMKit
64
67
 
65
68
  # Fit the model with given training data.
66
69
  #
67
- # @param x [NMatrix] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
68
- # @param y [NMatrix] (shape: [1, n_samples]) The categorical variables (e.g. labels)
70
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
71
+ # @param y [Numo::Int32] (shape: [n_samples]) The categorical variables (e.g. labels)
69
72
  # to be used for fitting the model.
70
73
  # @return [LogisticRegression] The learned classifier itself.
71
74
  def fit(x, y)
72
75
  # Generate binary labels.
73
- negative_label = y.uniq.sort.shift
74
- bin_y = y.to_flat_a.map { |l| l != negative_label ? 1 : 0 }
76
+ negative_label = y.to_a.uniq.sort.shift
77
+ bin_y = y.to_a.map { |l| l != negative_label ? 1 : 0 }
75
78
  # Expand feature vectors for bias term.
76
79
  samples = x
77
- samples = samples.hconcat(NMatrix.ones([x.shape[0], 1]) * params[:bias_scale]) if params[:fit_bias]
80
+ if params[:fit_bias]
81
+ samples = Numo::NArray.hstack(
82
+ [samples, Numo::DFloat.ones([x.shape[0], 1]) * params[:bias_scale]]
83
+ )
84
+ end
78
85
  # Initialize some variables.
79
86
  n_samples, n_features = samples.shape
80
87
  rand_ids = [*0..n_samples - 1].shuffle(random: @rng)
81
- weight_vec = NMatrix.zeros([1, n_features])
88
+ weight_vec = Numo::DFloat.zeros(n_features)
82
89
  # Start optimization.
83
90
  params[:max_iter].times do |t|
84
91
  # random sampling
@@ -86,16 +93,17 @@ module SVMKit
86
93
  rand_ids.concat(subset_ids)
87
94
  # update the weight vector.
88
95
  eta = 1.0 / (params[:reg_param] * (t + 1))
89
- mean_vec = NMatrix.zeros([1, n_features])
96
+ mean_vec = Numo::DFloat.zeros(n_features)
90
97
  subset_ids.each do |n|
91
- z = weight_vec.dot(samples.row(n).transpose)[0]
98
+ z = weight_vec.dot(samples[n, true])
92
99
  coef = bin_y[n] / (1.0 + Math.exp(bin_y[n] * z))
93
- mean_vec += samples.row(n) * coef
100
+ mean_vec += samples[n, true] * coef
94
101
  end
95
102
  mean_vec *= eta / params[:batch_size]
96
103
  weight_vec = weight_vec * (1.0 - eta * params[:reg_param]) + mean_vec
97
104
  # scale the weight vector.
98
- scaler = (1.0 / params[:reg_param]**0.5) / weight_vec.norm2
105
+ norm = Math.sqrt(weight_vec.dot(weight_vec))
106
+ scaler = (1.0 / params[:reg_param]**0.5) / (norm + 1.0e-12)
99
107
  weight_vec *= [1.0, scaler].min
100
108
  end
101
109
  # Store the learned model.
@@ -111,51 +119,51 @@ module SVMKit
111
119
 
112
120
  # Calculate confidence scores for samples.
113
121
  #
114
- # @param x [NMatrix] (shape: [n_samples, n_features]) The samples to compute the scores.
115
- # @return [NMatrix] (shape: [1, n_samples]) Confidence score per sample.
122
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
123
+ # @return [Numo::DFloat] (shape: [n_samples]) Confidence score per sample.
116
124
  def decision_function(x)
117
- w = ((@weight_vec.dot(x.transpose) + @bias_term) * -1.0).exp + 1.0
125
+ w = Numo::NMath.exp(((@weight_vec.dot(x.transpose) + @bias_term) * -1.0)) + 1.0
118
126
  w.map { |v| 1.0 / v }
119
127
  end
120
128
 
121
129
  # Predict class labels for samples.
122
130
  #
123
- # @param x [NMatrix] (shape: [n_samples, n_features]) The samples to predict the labels.
124
- # @return [NMatrix] (shape: [1, n_samples]) Predicted class label per sample.
131
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
132
+ # @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
125
133
  def predict(x)
126
- decision_function(x).map { |v| v >= 0.5 ? 1 : -1 }
134
+ Numo::Int32.cast(decision_function(x).map { |v| v >= 0.5 ? 1 : -1 })
127
135
  end
128
136
 
129
137
  # Predict probability for samples.
130
138
  #
131
- # @param x [NMatrix] (shape: [n_samples, n_features]) The samples to predict the probailities.
132
- # @return [NMatrix] (shape: [1, n_samples]) Predicted probability per sample.
139
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
140
+ # @return [Numo::DFloat] (shape: [n_samples]) Predicted probability per sample.
133
141
  def predict_proba(x)
134
142
  decision_function(x)
135
143
  end
136
144
 
137
145
  # Claculate the mean accuracy of the given testing data.
138
146
  #
139
- # @param x [NMatrix] (shape: [n_samples, n_features]) Testing data.
140
- # @param y [NMatrix] (shape: [1, n_samples]) True labels for testing data.
147
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) Testing data.
148
+ # @param y [Numo::Int32] (shape: [n_samples]) True labels for testing data.
141
149
  # @return [Float] Mean accuracy
142
150
  def score(x, y)
143
151
  p = predict(x)
144
- n_hits = (y.to_flat_a.map.with_index { |l, n| l == p[n] ? 1 : 0 }).inject(:+)
152
+ n_hits = (y.to_a.map.with_index { |l, n| l == p[n] ? 1 : 0 }).inject(:+)
145
153
  n_hits / y.size.to_f
146
154
  end
147
155
 
148
156
  # Dump marshal data.
149
157
  # @return [Hash] The marshal data about LogisticRegression.
150
158
  def marshal_dump
151
- { params: params, weight_vec: Utils.dump_nmatrix(@weight_vec), bias_term: @bias_term, rng: @rng }
159
+ { params: params, weight_vec: @weight_vec, bias_term: @bias_term, rng: @rng }
152
160
  end
153
161
 
154
162
  # Load marshal data.
155
163
  # @return [nil]
156
164
  def marshal_load(obj)
157
165
  self.params = obj[:params]
158
- @weight_vec = Utils.restore_nmatrix(obj[:weight_vec])
166
+ @weight_vec = obj[:weight_vec]
159
167
  @bias_term = obj[:bias_term]
160
168
  @rng = obj[:rng]
161
169
  nil