rumale 0.22.4 → 0.22.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4936b7c7b0ed920383f88743f8eba2e827d586dae471e40a6974dd1fe19342fe
4
- data.tar.gz: 5a33c242b3cd881b0003db5e5f2d77905d0571442eb7494a64dff08262ce0c14
3
+ metadata.gz: '058078489d3ff66d67432e1418ae786292c263e05e75b6703fb5a7e65e88bd46'
4
+ data.tar.gz: bd7ed9b223e0cd0074ffdd3e521b01c195f82909013c93f4736ab338d5920c96
5
5
  SHA512:
6
- metadata.gz: b45a243c247610d918eeb6cfbb31c461e5773b5404c989fe7e0b8758e0482d165ea1e0cf1d61642d71233458821e1b92e45eb6ff0d0fcb11080c6c1e9692ef91
7
- data.tar.gz: feddfc807995b08e753b1ad635901f2db8e806e300478a1f6bdb24a5bf1123cb7fbd0ee402da92ddcdd079a8ad653eec4224e22be9d2c6609ea73ea84bc47ca1
6
+ metadata.gz: 79ce4715a503b1b5a618526832adad5912daac72af3e8f1892ff2df14b7695e546419d6f85e5ea735abd2ea06da649a763f96c82b95eee75341934fa65fce93e
7
+ data.tar.gz: 5948583ec6c5ca10b320e09c447f9cc1e244dd3bfbf95800338dba2eb7e1b46a342d44020fa24c26a7161786feb63c82b0677654e4aa087c311337a606880a22
data/CHANGELOG.md CHANGED
@@ -1,3 +1,11 @@
1
+ # 0.22.5
2
+ - Add transformer class for calculating kernel matrix.
3
+ - [KernelCalculator](https://yoshoku.github.io/rumale/doc/Rumale/Preprocessing/KernelCalculator.html)
4
+ - Add classifier class based on Ridge regression.
5
+ - [KernelRidgeClassifier](https://yoshoku.github.io/rumale/doc/Rumale/KernelMachine/KernelRidgeClassifier.html)
6
+ - Add supported kernel functions to [Nystroem](https://yoshoku.github.io/rumale/doc/Rumale/KernelApproximation/Nystroem.html).
7
+ - Add parameter for specifying the number of features to [load_libsvm_file](https://yoshoku.github.io/rumale/doc/Rumale/Dataset.html#load_libsvm_file-class_method).
8
+
1
9
  # 0.22.4
2
10
  - Add classifier and regressor classes for voting ensemble method.
3
11
  - [VotingClassifier](https://yoshoku.github.io/rumale/doc/Rumale/Ensemble/VotingClassifier.html)
data/lib/rumale.rb CHANGED
@@ -35,6 +35,7 @@ require 'rumale/kernel_machine/kernel_svc'
35
35
  require 'rumale/kernel_machine/kernel_pca'
36
36
  require 'rumale/kernel_machine/kernel_fda'
37
37
  require 'rumale/kernel_machine/kernel_ridge'
38
+ require 'rumale/kernel_machine/kernel_ridge_classifier'
38
39
  require 'rumale/multiclass/one_vs_rest_classifier'
39
40
  require 'rumale/nearest_neighbors/vp_tree'
40
41
  require 'rumale/nearest_neighbors/k_neighbors_classifier'
@@ -103,6 +104,7 @@ require 'rumale/preprocessing/one_hot_encoder'
103
104
  require 'rumale/preprocessing/ordinal_encoder'
104
105
  require 'rumale/preprocessing/binarizer'
105
106
  require 'rumale/preprocessing/polynomial_features'
107
+ require 'rumale/preprocessing/kernel_calculator'
106
108
  require 'rumale/model_selection/k_fold'
107
109
  require 'rumale/model_selection/group_k_fold'
108
110
  require 'rumale/model_selection/stratified_k_fold'
@@ -12,22 +12,26 @@ module Rumale
12
12
  # Load a dataset with the libsvm file format into Numo::NArray.
13
13
  #
14
14
  # @param filename [String] A path to a dataset file.
15
+ # @param n_features [Integer/Nil] The number of features of data to load.
16
+ # If nil is given, it will be detected automatically from given file.
15
17
  # @param zero_based [Boolean] Whether the column index starts from 0 (true) or 1 (false).
16
18
  # @param dtype [Numo::NArray] Data type of Numo::NArray for features to be loaded.
17
19
  #
18
20
  # @return [Array<Numo::NArray>]
19
21
  # Returns array containing the (n_samples x n_features) matrix for feature vectors
20
22
  # and (n_samples) vector for labels or target values.
21
- def load_libsvm_file(filename, zero_based: false, dtype: Numo::DFloat)
23
+ def load_libsvm_file(filename, n_features: nil, zero_based: false, dtype: Numo::DFloat)
22
24
  ftvecs = []
23
25
  labels = []
24
- n_features = 0
26
+ n_features_detected = 0
25
27
  CSV.foreach(filename, col_sep: "\s", headers: false) do |line|
26
28
  label, ftvec, max_idx = parse_libsvm_line(line, zero_based)
27
29
  labels.push(label)
28
30
  ftvecs.push(ftvec)
29
- n_features = max_idx if n_features < max_idx
31
+ n_features_detected = max_idx if n_features_detected < max_idx
30
32
  end
33
+ n_features ||= n_features_detected
34
+ n_features = [n_features, n_features_detected].max
31
35
  [convert_to_matrix(ftvecs, n_features, dtype), Numo::NArray.asarray(labels)]
32
36
  end
33
37
 
@@ -11,7 +11,7 @@ module Rumale
11
11
  # @example
12
12
  # require 'numo/linalg/autoloader'
13
13
  #
14
- # transformer = Rumale::KernelApproximation::Nystroem.new(gamma: 1, n_components: 128, random_seed: 1)
14
+ # transformer = Rumale::KernelApproximation::Nystroem.new(kernel: 'rbf', gamma: 1, n_components: 128, random_seed: 1)
15
15
  # new_training_samples = transformer.fit_transform(training_samples)
16
16
  # new_testing_samples = transformer.transform(testing_samples)
17
17
  #
@@ -39,12 +39,15 @@ module Rumale
39
39
 
40
40
  # Create a new transformer for mapping to kernel feature space with Nystrom method.
41
41
  #
42
- # @param kernel [String] The type of kernel. This parameter is ignored in the current implementation.
43
- # @param gamma [Float] The parameter of RBF kernel: exp(-gamma * x^2).
44
- # @param n_components [Integer] The number of dimensions of the RBF kernel feature space.
42
+ # @param kernel [String] The type of kernel function ('rbf', 'linear', 'poly', and 'sigmoid)
43
+ # @param gamma [Float] The gamma parameter in rbf/poly/sigmoid kernel function.
44
+ # @param degree [Integer] The degree parameter in polynomial kernel function.
45
+ # @param coef [Float] The coefficient in poly/sigmoid kernel function.
46
+ # @param n_components [Integer] The number of dimensions of the kernel feature space.
45
47
  # @param random_seed [Integer] The seed value using to initialize the random generator.
46
- def initialize(kernel: 'rbf', gamma: 1, n_components: 100, random_seed: nil)
47
- check_params_numeric(gamma: gamma, n_components: n_components)
48
+ def initialize(kernel: 'rbf', gamma: 1, degree: 3, coef: 1, n_components: 100, random_seed: nil)
49
+ check_params_string(kernel: kernel)
50
+ check_params_numeric(gamma: gamma, coef: coef, degree: degree, n_components: n_components)
48
51
  check_params_numeric_or_nil(random_seed: random_seed)
49
52
  @params = method(:initialize).parameters.map { |_t, arg| [arg, binding.local_variable_get(arg)] }.to_h
50
53
  @params[:random_seed] ||= srand
@@ -56,7 +59,7 @@ module Rumale
56
59
 
57
60
  # Fit the model with given training data.
58
61
  #
59
- # @overload fit(x) -> RBF
62
+ # @overload fit(x) -> Nystroem
60
63
  # @param x [Numo::NArray] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
61
64
  # @return [Nystroem] The learned transformer itself.
62
65
  def fit(x, _y = nil)
@@ -73,7 +76,7 @@ module Rumale
73
76
  @components = x[@component_indices, true]
74
77
 
75
78
  # calculate normalizing factor.
76
- kernel_mat = Rumale::PairwiseMetric.rbf_kernel(@components, nil, @params[:gamma])
79
+ kernel_mat = kernel_mat(@components)
77
80
  eig_vals, eig_vecs = Numo::Linalg.eigh(kernel_mat)
78
81
  la = eig_vals.class.maximum(eig_vals.reverse, 1e-12)
79
82
  u = eig_vecs.reverse(1)
@@ -98,9 +101,26 @@ module Rumale
98
101
  # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data.
99
102
  def transform(x)
100
103
  x = check_convert_sample_array(x)
101
- z = Rumale::PairwiseMetric.rbf_kernel(x, @components, @params[:gamma])
104
+ z = kernel_mat(x, @components)
102
105
  z.dot(@normalizer)
103
106
  end
107
+
108
+ private
109
+
110
+ def kernel_mat(x, y = nil)
111
+ case @params[:kernel]
112
+ when 'rbf'
113
+ Rumale::PairwiseMetric.rbf_kernel(x, y, @params[:gamma])
114
+ when 'poly'
115
+ Rumale::PairwiseMetric.polynomial_kernel(x, y, @params[:degree], @params[:gamma], @params[:coef])
116
+ when 'sigmoid'
117
+ Rumale::PairwiseMetric.sigmoid_kernel(x, y, @params[:gamma], @params[:coef])
118
+ when 'linear'
119
+ Rumale::PairwiseMetric.linear_kernel(x, y)
120
+ else
121
+ raise ArgumentError, "Expect kernel parameter to be given 'rbf', 'linear', 'poly', or 'sigmoid'."
122
+ end
123
+ end
104
124
  end
105
125
  end
106
126
  end
@@ -0,0 +1,92 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/base_estimator'
4
+ require 'rumale/base/classifier'
5
+ require 'rumale/preprocessing/label_binarizer'
6
+
7
+ module Rumale
8
+ module KernelMachine
9
+ # KernelRidgeClassifier is a class that implements classifier based-on kernel ridge regression.
10
+ # It learns a classifier by converting labels to target values { -1, 1 } and performing kernel ridge regression.
11
+ #
12
+ # @example
13
+ # require 'numo/linalg/autoloader'
14
+ # require 'rumale'
15
+ #
16
+ # kernel_mat_train = Rumale::PairwiseMetric::rbf_kernel(training_samples)
17
+ # kridge = Rumale::KernelMachine::KernelRidgeClassifier.new(reg_param: 0.5)
18
+ # kridge.fit(kernel_mat_train, traininig_values)
19
+ #
20
+ # kernel_mat_test = Rumale::PairwiseMetric::rbf_kernel(test_samples, training_samples)
21
+ # results = kridge.predict(kernel_mat_test)
22
+ class KernelRidgeClassifier
23
+ include Base::BaseEstimator
24
+ include Base::Classifier
25
+
26
+ # Return the class labels.
27
+ # @return [Numo::Int32] (size: n_classes)
28
+ attr_reader :classes
29
+
30
+ # Return the weight vector.
31
+ # @return [Numo::DFloat] (shape: [n_training_sample, n_classes])
32
+ attr_reader :weight_vec
33
+
34
+ # Create a new regressor with kernel ridge classifier.
35
+ #
36
+ # @param reg_param [Float/Numo::DFloat] The regularization parameter.
37
+ def initialize(reg_param: 1.0)
38
+ @params = {}
39
+ @params[:reg_param] = reg_param
40
+ @classes = nil
41
+ @weight_vec = nil
42
+ end
43
+
44
+ # Fit the model with given training data.
45
+ #
46
+ # @param x [Numo::DFloat] (shape: [n_training_samples, n_training_samples])
47
+ # The kernel matrix of the training data to be used for fitting the model.
48
+ # @param y [Numo::Int32] (shape: [n_training_samples]) The labels to be used for fitting the model.
49
+ # @return [KernelRidgeClassifier] The learned classifier itself.
50
+ def fit(x, y)
51
+ x = check_convert_sample_array(x)
52
+ y = check_convert_label_array(y)
53
+ check_sample_label_size(x, y)
54
+ raise ArgumentError, 'Expect the kernel matrix of training data to be square.' unless x.shape[0] == x.shape[1]
55
+ raise 'KernelRidgeClassifier#fit requires Numo::Linalg but that is not loaded.' unless enable_linalg?
56
+
57
+ @encoder = Rumale::Preprocessing::LabelBinarizer.new
58
+ y_encoded = Numo::DFloat.cast(@encoder.fit_transform(y)) * 2 - 1
59
+ @classes = Numo::NArray[*@encoder.classes]
60
+
61
+ n_samples = x.shape[0]
62
+ reg_kernel_mat = x + Numo::DFloat.eye(n_samples) * @params[:reg_param]
63
+ @weight_vec = Numo::Linalg.solve(reg_kernel_mat, y_encoded, driver: 'sym')
64
+
65
+ self
66
+ end
67
+
68
+ # Calculate confidence scores for samples.
69
+ #
70
+ # @param x [Numo::DFloat] (shape: [n_testing_samples, n_training_samples])
71
+ # The kernel matrix between testing samples and training samples to predict values.
72
+ # @return [Numo::DFloat] (shape: [n_samples, n_classes]) The confidence score per sample.
73
+ def decision_function(x)
74
+ x = check_convert_sample_array(x)
75
+ x.dot(@weight_vec)
76
+ end
77
+
78
+ # Predict class labels for samples.
79
+ #
80
+ # @param x [Numo::DFloat] (shape: [n_testing_samples, n_training_samples])
81
+ # The kernel matrix between testing samples and training samples to predict the labels.
82
+ # @return [Numo::Int32] (shape: [n_testing_samples]) Predicted class label per sample.
83
+ def predict(x)
84
+ x = check_convert_sample_array(x)
85
+ scores = decision_function(x)
86
+ n_samples, n_classes = scores.shape
87
+ label_ids = scores.max_index(axis: 1) - Numo::Int32.new(n_samples).seq * n_classes
88
+ @classes[label_ids].dup
89
+ end
90
+ end
91
+ end
92
+ end
@@ -0,0 +1,92 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/base_estimator'
4
+ require 'rumale/base/transformer'
5
+ require 'rumale/pairwise_metric'
6
+
7
+ module Rumale
8
+ module Preprocessing
9
+ # KernelCalculator is a class that calculates the kernel matrix with training data.
10
+ #
11
+ # @example
12
+ # transformer = Rumale::Preprocessing::KernelCalculator.new(kernel: 'rbf', gamma: 0.5)
13
+ # regressor = Rumale::KernelMachine::KernelRidge.new
14
+ # pipeline = Rumale::Pipeline::Pipeline.new(
15
+ # steps: { trs: transfomer, est: regressor }
16
+ # )
17
+ # pipeline.fit(x_train, y_train)
18
+ # results = pipeline.predict(x_test)
19
+ class KernelCalculator
20
+ include Base::BaseEstimator
21
+ include Base::Transformer
22
+
23
+ # Returns the training data for calculating kernel matrix.
24
+ # @return [Numo::DFloat] (shape: n_components, n_features)
25
+ attr_reader :components
26
+
27
+ # Create a new transformer that transforms feature vectors into a kernel matrix.
28
+ #
29
+ # @param kernel [String] The type of kernel function ('rbf', 'linear', 'poly', and 'sigmoid').
30
+ # @param gamma [Float] The gamma parameter in rbf/poly/sigmoid kernel function.
31
+ # @param degree [Integer] The degree parameter in polynomial kernel function.
32
+ # @param coef [Float] The coefficient in poly/sigmoid kernel function.
33
+ def initialize(kernel: 'rbf', gamma: 1, degree: 3, coef: 1)
34
+ check_params_string(kernel: kernel)
35
+ check_params_numeric(gamma: gamma, coef: coef, degree: degree)
36
+ @params = {}
37
+ @params[:kernel] = kernel
38
+ @params[:gamma] = gamma
39
+ @params[:degree] = degree
40
+ @params[:coef] = coef
41
+ @components = nil
42
+ end
43
+
44
+ # Fit the model with given training data.
45
+ #
46
+ # @overload fit(x) -> KernelCalculator
47
+ # @param x [Numo::NArray] (shape: [n_samples, n_features]) The training data to be used for calculating kernel matrix.
48
+ # @return [KernelCalculator] The learned transformer itself.
49
+ def fit(x, _y = nil)
50
+ x = check_convert_sample_array(x)
51
+ @components = x.dup
52
+ self
53
+ end
54
+
55
+ # Fit the model with training data, and then transform them with the learned model.
56
+ #
57
+ # @overload fit_transform(x) -> Numo::DFloat
58
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for calculating kernel matrix.
59
+ # @return [Numo::DFloat] (shape: [n_samples, n_samples]) The calculated kernel matrix.
60
+ def fit_transform(x, y = nil)
61
+ x = check_convert_sample_array(x)
62
+ fit(x, y).transform(x)
63
+ end
64
+
65
+ # Transform the given data with the learned model.
66
+ #
67
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be used for calculating kernel matrix with the training data.
68
+ # @return [Numo::DFloat] (shape: [n_samples, n_components]) The calculated kernel matrix.
69
+ def transform(x)
70
+ x = check_convert_sample_array(x)
71
+ kernel_mat(x, @components)
72
+ end
73
+
74
+ private
75
+
76
+ def kernel_mat(x, y)
77
+ case @params[:kernel]
78
+ when 'rbf'
79
+ Rumale::PairwiseMetric.rbf_kernel(x, y, @params[:gamma])
80
+ when 'poly'
81
+ Rumale::PairwiseMetric.polynomial_kernel(x, y, @params[:degree], @params[:gamma], @params[:coef])
82
+ when 'sigmoid'
83
+ Rumale::PairwiseMetric.sigmoid_kernel(x, y, @params[:gamma], @params[:coef])
84
+ when 'linear'
85
+ Rumale::PairwiseMetric.linear_kernel(x, y)
86
+ else
87
+ raise ArgumentError, "Expect kernel parameter to be given 'rbf', 'linear', 'poly', or 'sigmoid'."
88
+ end
89
+ end
90
+ end
91
+ end
92
+ end
@@ -3,5 +3,5 @@
3
3
  # Rumale is a machine learning library in Ruby.
4
4
  module Rumale
5
5
  # The version of Rumale you are using.
6
- VERSION = '0.22.4'
6
+ VERSION = '0.22.5'
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rumale
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.22.4
4
+ version: 0.22.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-02-22 00:00:00.000000000 Z
11
+ date: 2021-03-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: numo-narray
@@ -139,6 +139,7 @@ files:
139
139
  - lib/rumale/kernel_machine/kernel_fda.rb
140
140
  - lib/rumale/kernel_machine/kernel_pca.rb
141
141
  - lib/rumale/kernel_machine/kernel_ridge.rb
142
+ - lib/rumale/kernel_machine/kernel_ridge_classifier.rb
142
143
  - lib/rumale/kernel_machine/kernel_svc.rb
143
144
  - lib/rumale/linear_model/base_sgd.rb
144
145
  - lib/rumale/linear_model/elastic_net.rb
@@ -183,6 +184,7 @@ files:
183
184
  - lib/rumale/pipeline/pipeline.rb
184
185
  - lib/rumale/preprocessing/bin_discretizer.rb
185
186
  - lib/rumale/preprocessing/binarizer.rb
187
+ - lib/rumale/preprocessing/kernel_calculator.rb
186
188
  - lib/rumale/preprocessing/l1_normalizer.rb
187
189
  - lib/rumale/preprocessing/l2_normalizer.rb
188
190
  - lib/rumale/preprocessing/label_binarizer.rb