rumale 0.22.4 → 0.22.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4936b7c7b0ed920383f88743f8eba2e827d586dae471e40a6974dd1fe19342fe
4
- data.tar.gz: 5a33c242b3cd881b0003db5e5f2d77905d0571442eb7494a64dff08262ce0c14
3
+ metadata.gz: '058078489d3ff66d67432e1418ae786292c263e05e75b6703fb5a7e65e88bd46'
4
+ data.tar.gz: bd7ed9b223e0cd0074ffdd3e521b01c195f82909013c93f4736ab338d5920c96
5
5
  SHA512:
6
- metadata.gz: b45a243c247610d918eeb6cfbb31c461e5773b5404c989fe7e0b8758e0482d165ea1e0cf1d61642d71233458821e1b92e45eb6ff0d0fcb11080c6c1e9692ef91
7
- data.tar.gz: feddfc807995b08e753b1ad635901f2db8e806e300478a1f6bdb24a5bf1123cb7fbd0ee402da92ddcdd079a8ad653eec4224e22be9d2c6609ea73ea84bc47ca1
6
+ metadata.gz: 79ce4715a503b1b5a618526832adad5912daac72af3e8f1892ff2df14b7695e546419d6f85e5ea735abd2ea06da649a763f96c82b95eee75341934fa65fce93e
7
+ data.tar.gz: 5948583ec6c5ca10b320e09c447f9cc1e244dd3bfbf95800338dba2eb7e1b46a342d44020fa24c26a7161786feb63c82b0677654e4aa087c311337a606880a22
data/CHANGELOG.md CHANGED
@@ -1,3 +1,11 @@
1
+ # 0.22.5
2
+ - Add transformer class for calculating kernel matrix.
3
+ - [KernelCalculator](https://yoshoku.github.io/rumale/doc/Rumale/Preprocessing/KernelCalculator.html)
4
+ - Add classifier class based on Ridge regression.
5
+ - [KernelRidgeClassifier](https://yoshoku.github.io/rumale/doc/Rumale/KernelMachine/KernelRidgeClassifier.html)
6
+ - Add supported kernel functions to [Nystroem](https://yoshoku.github.io/rumale/doc/Rumale/KernelApproximation/Nystroem.html).
7
+ - Add parameter for specifying the number of features to [load_libsvm_file](https://yoshoku.github.io/rumale/doc/Rumale/Dataset.html#load_libsvm_file-class_method).
8
+
1
9
  # 0.22.4
2
10
  - Add classifier and regressor classes for voting ensemble method.
3
11
  - [VotingClassifier](https://yoshoku.github.io/rumale/doc/Rumale/Ensemble/VotingClassifier.html)
data/lib/rumale.rb CHANGED
@@ -35,6 +35,7 @@ require 'rumale/kernel_machine/kernel_svc'
35
35
  require 'rumale/kernel_machine/kernel_pca'
36
36
  require 'rumale/kernel_machine/kernel_fda'
37
37
  require 'rumale/kernel_machine/kernel_ridge'
38
+ require 'rumale/kernel_machine/kernel_ridge_classifier'
38
39
  require 'rumale/multiclass/one_vs_rest_classifier'
39
40
  require 'rumale/nearest_neighbors/vp_tree'
40
41
  require 'rumale/nearest_neighbors/k_neighbors_classifier'
@@ -103,6 +104,7 @@ require 'rumale/preprocessing/one_hot_encoder'
103
104
  require 'rumale/preprocessing/ordinal_encoder'
104
105
  require 'rumale/preprocessing/binarizer'
105
106
  require 'rumale/preprocessing/polynomial_features'
107
+ require 'rumale/preprocessing/kernel_calculator'
106
108
  require 'rumale/model_selection/k_fold'
107
109
  require 'rumale/model_selection/group_k_fold'
108
110
  require 'rumale/model_selection/stratified_k_fold'
@@ -12,22 +12,26 @@ module Rumale
12
12
  # Load a dataset with the libsvm file format into Numo::NArray.
13
13
  #
14
14
  # @param filename [String] A path to a dataset file.
15
+ # @param n_features [Integer/Nil] The number of features of data to load.
16
+ # If nil is given, it will be detected automatically from given file.
15
17
  # @param zero_based [Boolean] Whether the column index starts from 0 (true) or 1 (false).
16
18
  # @param dtype [Numo::NArray] Data type of Numo::NArray for features to be loaded.
17
19
  #
18
20
  # @return [Array<Numo::NArray>]
19
21
  # Returns array containing the (n_samples x n_features) matrix for feature vectors
20
22
  # and (n_samples) vector for labels or target values.
21
- def load_libsvm_file(filename, zero_based: false, dtype: Numo::DFloat)
23
+ def load_libsvm_file(filename, n_features: nil, zero_based: false, dtype: Numo::DFloat)
22
24
  ftvecs = []
23
25
  labels = []
24
- n_features = 0
26
+ n_features_detected = 0
25
27
  CSV.foreach(filename, col_sep: "\s", headers: false) do |line|
26
28
  label, ftvec, max_idx = parse_libsvm_line(line, zero_based)
27
29
  labels.push(label)
28
30
  ftvecs.push(ftvec)
29
- n_features = max_idx if n_features < max_idx
31
+ n_features_detected = max_idx if n_features_detected < max_idx
30
32
  end
33
+ n_features ||= n_features_detected
34
+ n_features = [n_features, n_features_detected].max
31
35
  [convert_to_matrix(ftvecs, n_features, dtype), Numo::NArray.asarray(labels)]
32
36
  end
33
37
 
@@ -11,7 +11,7 @@ module Rumale
11
11
  # @example
12
12
  # require 'numo/linalg/autoloader'
13
13
  #
14
- # transformer = Rumale::KernelApproximation::Nystroem.new(gamma: 1, n_components: 128, random_seed: 1)
14
+ # transformer = Rumale::KernelApproximation::Nystroem.new(kernel: 'rbf', gamma: 1, n_components: 128, random_seed: 1)
15
15
  # new_training_samples = transformer.fit_transform(training_samples)
16
16
  # new_testing_samples = transformer.transform(testing_samples)
17
17
  #
@@ -39,12 +39,15 @@ module Rumale
39
39
 
40
40
  # Create a new transformer for mapping to kernel feature space with Nystrom method.
41
41
  #
42
- # @param kernel [String] The type of kernel. This parameter is ignored in the current implementation.
43
- # @param gamma [Float] The parameter of RBF kernel: exp(-gamma * x^2).
44
- # @param n_components [Integer] The number of dimensions of the RBF kernel feature space.
42
+ # @param kernel [String] The type of kernel function ('rbf', 'linear', 'poly', and 'sigmoid)
43
+ # @param gamma [Float] The gamma parameter in rbf/poly/sigmoid kernel function.
44
+ # @param degree [Integer] The degree parameter in polynomial kernel function.
45
+ # @param coef [Float] The coefficient in poly/sigmoid kernel function.
46
+ # @param n_components [Integer] The number of dimensions of the kernel feature space.
45
47
  # @param random_seed [Integer] The seed value using to initialize the random generator.
46
- def initialize(kernel: 'rbf', gamma: 1, n_components: 100, random_seed: nil)
47
- check_params_numeric(gamma: gamma, n_components: n_components)
48
+ def initialize(kernel: 'rbf', gamma: 1, degree: 3, coef: 1, n_components: 100, random_seed: nil)
49
+ check_params_string(kernel: kernel)
50
+ check_params_numeric(gamma: gamma, coef: coef, degree: degree, n_components: n_components)
48
51
  check_params_numeric_or_nil(random_seed: random_seed)
49
52
  @params = method(:initialize).parameters.map { |_t, arg| [arg, binding.local_variable_get(arg)] }.to_h
50
53
  @params[:random_seed] ||= srand
@@ -56,7 +59,7 @@ module Rumale
56
59
 
57
60
  # Fit the model with given training data.
58
61
  #
59
- # @overload fit(x) -> RBF
62
+ # @overload fit(x) -> Nystroem
60
63
  # @param x [Numo::NArray] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
61
64
  # @return [Nystroem] The learned transformer itself.
62
65
  def fit(x, _y = nil)
@@ -73,7 +76,7 @@ module Rumale
73
76
  @components = x[@component_indices, true]
74
77
 
75
78
  # calculate normalizing factor.
76
- kernel_mat = Rumale::PairwiseMetric.rbf_kernel(@components, nil, @params[:gamma])
79
+ kernel_mat = kernel_mat(@components)
77
80
  eig_vals, eig_vecs = Numo::Linalg.eigh(kernel_mat)
78
81
  la = eig_vals.class.maximum(eig_vals.reverse, 1e-12)
79
82
  u = eig_vecs.reverse(1)
@@ -98,9 +101,26 @@ module Rumale
98
101
  # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data.
99
102
  def transform(x)
100
103
  x = check_convert_sample_array(x)
101
- z = Rumale::PairwiseMetric.rbf_kernel(x, @components, @params[:gamma])
104
+ z = kernel_mat(x, @components)
102
105
  z.dot(@normalizer)
103
106
  end
107
+
108
+ private
109
+
110
+ def kernel_mat(x, y = nil)
111
+ case @params[:kernel]
112
+ when 'rbf'
113
+ Rumale::PairwiseMetric.rbf_kernel(x, y, @params[:gamma])
114
+ when 'poly'
115
+ Rumale::PairwiseMetric.polynomial_kernel(x, y, @params[:degree], @params[:gamma], @params[:coef])
116
+ when 'sigmoid'
117
+ Rumale::PairwiseMetric.sigmoid_kernel(x, y, @params[:gamma], @params[:coef])
118
+ when 'linear'
119
+ Rumale::PairwiseMetric.linear_kernel(x, y)
120
+ else
121
+ raise ArgumentError, "Expect kernel parameter to be given 'rbf', 'linear', 'poly', or 'sigmoid'."
122
+ end
123
+ end
104
124
  end
105
125
  end
106
126
  end
@@ -0,0 +1,92 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/base_estimator'
4
+ require 'rumale/base/classifier'
5
+ require 'rumale/preprocessing/label_binarizer'
6
+
7
+ module Rumale
8
+ module KernelMachine
9
+ # KernelRidgeClassifier is a class that implements classifier based-on kernel ridge regression.
10
+ # It learns a classifier by converting labels to target values { -1, 1 } and performing kernel ridge regression.
11
+ #
12
+ # @example
13
+ # require 'numo/linalg/autoloader'
14
+ # require 'rumale'
15
+ #
16
+ # kernel_mat_train = Rumale::PairwiseMetric::rbf_kernel(training_samples)
17
+ # kridge = Rumale::KernelMachine::KernelRidgeClassifier.new(reg_param: 0.5)
18
+ # kridge.fit(kernel_mat_train, traininig_values)
19
+ #
20
+ # kernel_mat_test = Rumale::PairwiseMetric::rbf_kernel(test_samples, training_samples)
21
+ # results = kridge.predict(kernel_mat_test)
22
+ class KernelRidgeClassifier
23
+ include Base::BaseEstimator
24
+ include Base::Classifier
25
+
26
+ # Return the class labels.
27
+ # @return [Numo::Int32] (size: n_classes)
28
+ attr_reader :classes
29
+
30
+ # Return the weight vector.
31
+ # @return [Numo::DFloat] (shape: [n_training_sample, n_classes])
32
+ attr_reader :weight_vec
33
+
34
+ # Create a new regressor with kernel ridge classifier.
35
+ #
36
+ # @param reg_param [Float/Numo::DFloat] The regularization parameter.
37
+ def initialize(reg_param: 1.0)
38
+ @params = {}
39
+ @params[:reg_param] = reg_param
40
+ @classes = nil
41
+ @weight_vec = nil
42
+ end
43
+
44
+ # Fit the model with given training data.
45
+ #
46
+ # @param x [Numo::DFloat] (shape: [n_training_samples, n_training_samples])
47
+ # The kernel matrix of the training data to be used for fitting the model.
48
+ # @param y [Numo::Int32] (shape: [n_training_samples]) The labels to be used for fitting the model.
49
+ # @return [KernelRidgeClassifier] The learned classifier itself.
50
+ def fit(x, y)
51
+ x = check_convert_sample_array(x)
52
+ y = check_convert_label_array(y)
53
+ check_sample_label_size(x, y)
54
+ raise ArgumentError, 'Expect the kernel matrix of training data to be square.' unless x.shape[0] == x.shape[1]
55
+ raise 'KernelRidgeClassifier#fit requires Numo::Linalg but that is not loaded.' unless enable_linalg?
56
+
57
+ @encoder = Rumale::Preprocessing::LabelBinarizer.new
58
+ y_encoded = Numo::DFloat.cast(@encoder.fit_transform(y)) * 2 - 1
59
+ @classes = Numo::NArray[*@encoder.classes]
60
+
61
+ n_samples = x.shape[0]
62
+ reg_kernel_mat = x + Numo::DFloat.eye(n_samples) * @params[:reg_param]
63
+ @weight_vec = Numo::Linalg.solve(reg_kernel_mat, y_encoded, driver: 'sym')
64
+
65
+ self
66
+ end
67
+
68
+ # Calculate confidence scores for samples.
69
+ #
70
+ # @param x [Numo::DFloat] (shape: [n_testing_samples, n_training_samples])
71
+ # The kernel matrix between testing samples and training samples to predict values.
72
+ # @return [Numo::DFloat] (shape: [n_samples, n_classes]) The confidence score per sample.
73
+ def decision_function(x)
74
+ x = check_convert_sample_array(x)
75
+ x.dot(@weight_vec)
76
+ end
77
+
78
+ # Predict class labels for samples.
79
+ #
80
+ # @param x [Numo::DFloat] (shape: [n_testing_samples, n_training_samples])
81
+ # The kernel matrix between testing samples and training samples to predict the labels.
82
+ # @return [Numo::Int32] (shape: [n_testing_samples]) Predicted class label per sample.
83
+ def predict(x)
84
+ x = check_convert_sample_array(x)
85
+ scores = decision_function(x)
86
+ n_samples, n_classes = scores.shape
87
+ label_ids = scores.max_index(axis: 1) - Numo::Int32.new(n_samples).seq * n_classes
88
+ @classes[label_ids].dup
89
+ end
90
+ end
91
+ end
92
+ end
@@ -0,0 +1,92 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/base_estimator'
4
+ require 'rumale/base/transformer'
5
+ require 'rumale/pairwise_metric'
6
+
7
+ module Rumale
8
+ module Preprocessing
9
+ # KernelCalculator is a class that calculates the kernel matrix with training data.
10
+ #
11
+ # @example
12
+ # transformer = Rumale::Preprocessing::KernelCalculator.new(kernel: 'rbf', gamma: 0.5)
13
+ # regressor = Rumale::KernelMachine::KernelRidge.new
14
+ # pipeline = Rumale::Pipeline::Pipeline.new(
15
+ # steps: { trs: transfomer, est: regressor }
16
+ # )
17
+ # pipeline.fit(x_train, y_train)
18
+ # results = pipeline.predict(x_test)
19
+ class KernelCalculator
20
+ include Base::BaseEstimator
21
+ include Base::Transformer
22
+
23
+ # Returns the training data for calculating kernel matrix.
24
+ # @return [Numo::DFloat] (shape: n_components, n_features)
25
+ attr_reader :components
26
+
27
+ # Create a new transformer that transforms feature vectors into a kernel matrix.
28
+ #
29
+ # @param kernel [String] The type of kernel function ('rbf', 'linear', 'poly', and 'sigmoid').
30
+ # @param gamma [Float] The gamma parameter in rbf/poly/sigmoid kernel function.
31
+ # @param degree [Integer] The degree parameter in polynomial kernel function.
32
+ # @param coef [Float] The coefficient in poly/sigmoid kernel function.
33
+ def initialize(kernel: 'rbf', gamma: 1, degree: 3, coef: 1)
34
+ check_params_string(kernel: kernel)
35
+ check_params_numeric(gamma: gamma, coef: coef, degree: degree)
36
+ @params = {}
37
+ @params[:kernel] = kernel
38
+ @params[:gamma] = gamma
39
+ @params[:degree] = degree
40
+ @params[:coef] = coef
41
+ @components = nil
42
+ end
43
+
44
+ # Fit the model with given training data.
45
+ #
46
+ # @overload fit(x) -> KernelCalculator
47
+ # @param x [Numo::NArray] (shape: [n_samples, n_features]) The training data to be used for calculating kernel matrix.
48
+ # @return [KernelCalculator] The learned transformer itself.
49
+ def fit(x, _y = nil)
50
+ x = check_convert_sample_array(x)
51
+ @components = x.dup
52
+ self
53
+ end
54
+
55
+ # Fit the model with training data, and then transform them with the learned model.
56
+ #
57
+ # @overload fit_transform(x) -> Numo::DFloat
58
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for calculating kernel matrix.
59
+ # @return [Numo::DFloat] (shape: [n_samples, n_samples]) The calculated kernel matrix.
60
+ def fit_transform(x, y = nil)
61
+ x = check_convert_sample_array(x)
62
+ fit(x, y).transform(x)
63
+ end
64
+
65
+ # Transform the given data with the learned model.
66
+ #
67
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be used for calculating kernel matrix with the training data.
68
+ # @return [Numo::DFloat] (shape: [n_samples, n_components]) The calculated kernel matrix.
69
+ def transform(x)
70
+ x = check_convert_sample_array(x)
71
+ kernel_mat(x, @components)
72
+ end
73
+
74
+ private
75
+
76
+ def kernel_mat(x, y)
77
+ case @params[:kernel]
78
+ when 'rbf'
79
+ Rumale::PairwiseMetric.rbf_kernel(x, y, @params[:gamma])
80
+ when 'poly'
81
+ Rumale::PairwiseMetric.polynomial_kernel(x, y, @params[:degree], @params[:gamma], @params[:coef])
82
+ when 'sigmoid'
83
+ Rumale::PairwiseMetric.sigmoid_kernel(x, y, @params[:gamma], @params[:coef])
84
+ when 'linear'
85
+ Rumale::PairwiseMetric.linear_kernel(x, y)
86
+ else
87
+ raise ArgumentError, "Expect kernel parameter to be given 'rbf', 'linear', 'poly', or 'sigmoid'."
88
+ end
89
+ end
90
+ end
91
+ end
92
+ end
@@ -3,5 +3,5 @@
3
3
  # Rumale is a machine learning library in Ruby.
4
4
  module Rumale
5
5
  # The version of Rumale you are using.
6
- VERSION = '0.22.4'
6
+ VERSION = '0.22.5'
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rumale
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.22.4
4
+ version: 0.22.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-02-22 00:00:00.000000000 Z
11
+ date: 2021-03-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: numo-narray
@@ -139,6 +139,7 @@ files:
139
139
  - lib/rumale/kernel_machine/kernel_fda.rb
140
140
  - lib/rumale/kernel_machine/kernel_pca.rb
141
141
  - lib/rumale/kernel_machine/kernel_ridge.rb
142
+ - lib/rumale/kernel_machine/kernel_ridge_classifier.rb
142
143
  - lib/rumale/kernel_machine/kernel_svc.rb
143
144
  - lib/rumale/linear_model/base_sgd.rb
144
145
  - lib/rumale/linear_model/elastic_net.rb
@@ -183,6 +184,7 @@ files:
183
184
  - lib/rumale/pipeline/pipeline.rb
184
185
  - lib/rumale/preprocessing/bin_discretizer.rb
185
186
  - lib/rumale/preprocessing/binarizer.rb
187
+ - lib/rumale/preprocessing/kernel_calculator.rb
186
188
  - lib/rumale/preprocessing/l1_normalizer.rb
187
189
  - lib/rumale/preprocessing/l2_normalizer.rb
188
190
  - lib/rumale/preprocessing/label_binarizer.rb