rumale 0.22.4 → 0.22.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/lib/rumale.rb +2 -0
- data/lib/rumale/dataset.rb +7 -3
- data/lib/rumale/kernel_approximation/nystroem.rb +29 -9
- data/lib/rumale/kernel_machine/kernel_ridge_classifier.rb +92 -0
- data/lib/rumale/preprocessing/kernel_calculator.rb +92 -0
- data/lib/rumale/version.rb +1 -1
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: '058078489d3ff66d67432e1418ae786292c263e05e75b6703fb5a7e65e88bd46'
|
4
|
+
data.tar.gz: bd7ed9b223e0cd0074ffdd3e521b01c195f82909013c93f4736ab338d5920c96
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 79ce4715a503b1b5a618526832adad5912daac72af3e8f1892ff2df14b7695e546419d6f85e5ea735abd2ea06da649a763f96c82b95eee75341934fa65fce93e
|
7
|
+
data.tar.gz: 5948583ec6c5ca10b320e09c447f9cc1e244dd3bfbf95800338dba2eb7e1b46a342d44020fa24c26a7161786feb63c82b0677654e4aa087c311337a606880a22
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,11 @@
|
|
1
|
+
# 0.22.5
|
2
|
+
- Add transformer class for calculating kernel matrix.
|
3
|
+
- [KernelCalculator](https://yoshoku.github.io/rumale/doc/Rumale/Preprocessing/KernelCalculator.html)
|
4
|
+
- Add classifier class based on Ridge regression.
|
5
|
+
- [KernelRidgeClassifier](https://yoshoku.github.io/rumale/doc/Rumale/KernelMachine/KernelRidgeClassifier.html)
|
6
|
+
- Add supported kernel functions to [Nystroem](https://yoshoku.github.io/rumale/doc/Rumale/KernelApproximation/Nystroem.html).
|
7
|
+
- Add parameter for specifying the number of features to [load_libsvm_file](https://yoshoku.github.io/rumale/doc/Rumale/Dataset.html#load_libsvm_file-class_method).
|
8
|
+
|
1
9
|
# 0.22.4
|
2
10
|
- Add classifier and regressor classes for voting ensemble method.
|
3
11
|
- [VotingClassifier](https://yoshoku.github.io/rumale/doc/Rumale/Ensemble/VotingClassifier.html)
|
data/lib/rumale.rb
CHANGED
@@ -35,6 +35,7 @@ require 'rumale/kernel_machine/kernel_svc'
|
|
35
35
|
require 'rumale/kernel_machine/kernel_pca'
|
36
36
|
require 'rumale/kernel_machine/kernel_fda'
|
37
37
|
require 'rumale/kernel_machine/kernel_ridge'
|
38
|
+
require 'rumale/kernel_machine/kernel_ridge_classifier'
|
38
39
|
require 'rumale/multiclass/one_vs_rest_classifier'
|
39
40
|
require 'rumale/nearest_neighbors/vp_tree'
|
40
41
|
require 'rumale/nearest_neighbors/k_neighbors_classifier'
|
@@ -103,6 +104,7 @@ require 'rumale/preprocessing/one_hot_encoder'
|
|
103
104
|
require 'rumale/preprocessing/ordinal_encoder'
|
104
105
|
require 'rumale/preprocessing/binarizer'
|
105
106
|
require 'rumale/preprocessing/polynomial_features'
|
107
|
+
require 'rumale/preprocessing/kernel_calculator'
|
106
108
|
require 'rumale/model_selection/k_fold'
|
107
109
|
require 'rumale/model_selection/group_k_fold'
|
108
110
|
require 'rumale/model_selection/stratified_k_fold'
|
data/lib/rumale/dataset.rb
CHANGED
@@ -12,22 +12,26 @@ module Rumale
|
|
12
12
|
# Load a dataset with the libsvm file format into Numo::NArray.
|
13
13
|
#
|
14
14
|
# @param filename [String] A path to a dataset file.
|
15
|
+
# @param n_features [Integer/Nil] The number of features of data to load.
|
16
|
+
# If nil is given, it will be detected automatically from given file.
|
15
17
|
# @param zero_based [Boolean] Whether the column index starts from 0 (true) or 1 (false).
|
16
18
|
# @param dtype [Numo::NArray] Data type of Numo::NArray for features to be loaded.
|
17
19
|
#
|
18
20
|
# @return [Array<Numo::NArray>]
|
19
21
|
# Returns array containing the (n_samples x n_features) matrix for feature vectors
|
20
22
|
# and (n_samples) vector for labels or target values.
|
21
|
-
def load_libsvm_file(filename, zero_based: false, dtype: Numo::DFloat)
|
23
|
+
def load_libsvm_file(filename, n_features: nil, zero_based: false, dtype: Numo::DFloat)
|
22
24
|
ftvecs = []
|
23
25
|
labels = []
|
24
|
-
|
26
|
+
n_features_detected = 0
|
25
27
|
CSV.foreach(filename, col_sep: "\s", headers: false) do |line|
|
26
28
|
label, ftvec, max_idx = parse_libsvm_line(line, zero_based)
|
27
29
|
labels.push(label)
|
28
30
|
ftvecs.push(ftvec)
|
29
|
-
|
31
|
+
n_features_detected = max_idx if n_features_detected < max_idx
|
30
32
|
end
|
33
|
+
n_features ||= n_features_detected
|
34
|
+
n_features = [n_features, n_features_detected].max
|
31
35
|
[convert_to_matrix(ftvecs, n_features, dtype), Numo::NArray.asarray(labels)]
|
32
36
|
end
|
33
37
|
|
@@ -11,7 +11,7 @@ module Rumale
|
|
11
11
|
# @example
|
12
12
|
# require 'numo/linalg/autoloader'
|
13
13
|
#
|
14
|
-
# transformer = Rumale::KernelApproximation::Nystroem.new(gamma: 1, n_components: 128, random_seed: 1)
|
14
|
+
# transformer = Rumale::KernelApproximation::Nystroem.new(kernel: 'rbf', gamma: 1, n_components: 128, random_seed: 1)
|
15
15
|
# new_training_samples = transformer.fit_transform(training_samples)
|
16
16
|
# new_testing_samples = transformer.transform(testing_samples)
|
17
17
|
#
|
@@ -39,12 +39,15 @@ module Rumale
|
|
39
39
|
|
40
40
|
# Create a new transformer for mapping to kernel feature space with Nystrom method.
|
41
41
|
#
|
42
|
-
# @param kernel [String] The type of kernel
|
43
|
-
# @param gamma [Float] The parameter
|
44
|
-
# @param
|
42
|
+
# @param kernel [String] The type of kernel function ('rbf', 'linear', 'poly', and 'sigmoid)
|
43
|
+
# @param gamma [Float] The gamma parameter in rbf/poly/sigmoid kernel function.
|
44
|
+
# @param degree [Integer] The degree parameter in polynomial kernel function.
|
45
|
+
# @param coef [Float] The coefficient in poly/sigmoid kernel function.
|
46
|
+
# @param n_components [Integer] The number of dimensions of the kernel feature space.
|
45
47
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
46
|
-
def initialize(kernel: 'rbf', gamma: 1, n_components: 100, random_seed: nil)
|
47
|
-
|
48
|
+
def initialize(kernel: 'rbf', gamma: 1, degree: 3, coef: 1, n_components: 100, random_seed: nil)
|
49
|
+
check_params_string(kernel: kernel)
|
50
|
+
check_params_numeric(gamma: gamma, coef: coef, degree: degree, n_components: n_components)
|
48
51
|
check_params_numeric_or_nil(random_seed: random_seed)
|
49
52
|
@params = method(:initialize).parameters.map { |_t, arg| [arg, binding.local_variable_get(arg)] }.to_h
|
50
53
|
@params[:random_seed] ||= srand
|
@@ -56,7 +59,7 @@ module Rumale
|
|
56
59
|
|
57
60
|
# Fit the model with given training data.
|
58
61
|
#
|
59
|
-
# @overload fit(x) ->
|
62
|
+
# @overload fit(x) -> Nystroem
|
60
63
|
# @param x [Numo::NArray] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
61
64
|
# @return [Nystroem] The learned transformer itself.
|
62
65
|
def fit(x, _y = nil)
|
@@ -73,7 +76,7 @@ module Rumale
|
|
73
76
|
@components = x[@component_indices, true]
|
74
77
|
|
75
78
|
# calculate normalizing factor.
|
76
|
-
kernel_mat =
|
79
|
+
kernel_mat = kernel_mat(@components)
|
77
80
|
eig_vals, eig_vecs = Numo::Linalg.eigh(kernel_mat)
|
78
81
|
la = eig_vals.class.maximum(eig_vals.reverse, 1e-12)
|
79
82
|
u = eig_vecs.reverse(1)
|
@@ -98,9 +101,26 @@ module Rumale
|
|
98
101
|
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data.
|
99
102
|
def transform(x)
|
100
103
|
x = check_convert_sample_array(x)
|
101
|
-
z =
|
104
|
+
z = kernel_mat(x, @components)
|
102
105
|
z.dot(@normalizer)
|
103
106
|
end
|
107
|
+
|
108
|
+
private
|
109
|
+
|
110
|
+
def kernel_mat(x, y = nil)
|
111
|
+
case @params[:kernel]
|
112
|
+
when 'rbf'
|
113
|
+
Rumale::PairwiseMetric.rbf_kernel(x, y, @params[:gamma])
|
114
|
+
when 'poly'
|
115
|
+
Rumale::PairwiseMetric.polynomial_kernel(x, y, @params[:degree], @params[:gamma], @params[:coef])
|
116
|
+
when 'sigmoid'
|
117
|
+
Rumale::PairwiseMetric.sigmoid_kernel(x, y, @params[:gamma], @params[:coef])
|
118
|
+
when 'linear'
|
119
|
+
Rumale::PairwiseMetric.linear_kernel(x, y)
|
120
|
+
else
|
121
|
+
raise ArgumentError, "Expect kernel parameter to be given 'rbf', 'linear', 'poly', or 'sigmoid'."
|
122
|
+
end
|
123
|
+
end
|
104
124
|
end
|
105
125
|
end
|
106
126
|
end
|
@@ -0,0 +1,92 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/base_estimator'
|
4
|
+
require 'rumale/base/classifier'
|
5
|
+
require 'rumale/preprocessing/label_binarizer'
|
6
|
+
|
7
|
+
module Rumale
|
8
|
+
module KernelMachine
|
9
|
+
# KernelRidgeClassifier is a class that implements classifier based-on kernel ridge regression.
|
10
|
+
# It learns a classifier by converting labels to target values { -1, 1 } and performing kernel ridge regression.
|
11
|
+
#
|
12
|
+
# @example
|
13
|
+
# require 'numo/linalg/autoloader'
|
14
|
+
# require 'rumale'
|
15
|
+
#
|
16
|
+
# kernel_mat_train = Rumale::PairwiseMetric::rbf_kernel(training_samples)
|
17
|
+
# kridge = Rumale::KernelMachine::KernelRidgeClassifier.new(reg_param: 0.5)
|
18
|
+
# kridge.fit(kernel_mat_train, traininig_values)
|
19
|
+
#
|
20
|
+
# kernel_mat_test = Rumale::PairwiseMetric::rbf_kernel(test_samples, training_samples)
|
21
|
+
# results = kridge.predict(kernel_mat_test)
|
22
|
+
class KernelRidgeClassifier
|
23
|
+
include Base::BaseEstimator
|
24
|
+
include Base::Classifier
|
25
|
+
|
26
|
+
# Return the class labels.
|
27
|
+
# @return [Numo::Int32] (size: n_classes)
|
28
|
+
attr_reader :classes
|
29
|
+
|
30
|
+
# Return the weight vector.
|
31
|
+
# @return [Numo::DFloat] (shape: [n_training_sample, n_classes])
|
32
|
+
attr_reader :weight_vec
|
33
|
+
|
34
|
+
# Create a new regressor with kernel ridge classifier.
|
35
|
+
#
|
36
|
+
# @param reg_param [Float/Numo::DFloat] The regularization parameter.
|
37
|
+
def initialize(reg_param: 1.0)
|
38
|
+
@params = {}
|
39
|
+
@params[:reg_param] = reg_param
|
40
|
+
@classes = nil
|
41
|
+
@weight_vec = nil
|
42
|
+
end
|
43
|
+
|
44
|
+
# Fit the model with given training data.
|
45
|
+
#
|
46
|
+
# @param x [Numo::DFloat] (shape: [n_training_samples, n_training_samples])
|
47
|
+
# The kernel matrix of the training data to be used for fitting the model.
|
48
|
+
# @param y [Numo::Int32] (shape: [n_training_samples]) The labels to be used for fitting the model.
|
49
|
+
# @return [KernelRidgeClassifier] The learned classifier itself.
|
50
|
+
def fit(x, y)
|
51
|
+
x = check_convert_sample_array(x)
|
52
|
+
y = check_convert_label_array(y)
|
53
|
+
check_sample_label_size(x, y)
|
54
|
+
raise ArgumentError, 'Expect the kernel matrix of training data to be square.' unless x.shape[0] == x.shape[1]
|
55
|
+
raise 'KernelRidgeClassifier#fit requires Numo::Linalg but that is not loaded.' unless enable_linalg?
|
56
|
+
|
57
|
+
@encoder = Rumale::Preprocessing::LabelBinarizer.new
|
58
|
+
y_encoded = Numo::DFloat.cast(@encoder.fit_transform(y)) * 2 - 1
|
59
|
+
@classes = Numo::NArray[*@encoder.classes]
|
60
|
+
|
61
|
+
n_samples = x.shape[0]
|
62
|
+
reg_kernel_mat = x + Numo::DFloat.eye(n_samples) * @params[:reg_param]
|
63
|
+
@weight_vec = Numo::Linalg.solve(reg_kernel_mat, y_encoded, driver: 'sym')
|
64
|
+
|
65
|
+
self
|
66
|
+
end
|
67
|
+
|
68
|
+
# Calculate confidence scores for samples.
|
69
|
+
#
|
70
|
+
# @param x [Numo::DFloat] (shape: [n_testing_samples, n_training_samples])
|
71
|
+
# The kernel matrix between testing samples and training samples to predict values.
|
72
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) The confidence score per sample.
|
73
|
+
def decision_function(x)
|
74
|
+
x = check_convert_sample_array(x)
|
75
|
+
x.dot(@weight_vec)
|
76
|
+
end
|
77
|
+
|
78
|
+
# Predict class labels for samples.
|
79
|
+
#
|
80
|
+
# @param x [Numo::DFloat] (shape: [n_testing_samples, n_training_samples])
|
81
|
+
# The kernel matrix between testing samples and training samples to predict the labels.
|
82
|
+
# @return [Numo::Int32] (shape: [n_testing_samples]) Predicted class label per sample.
|
83
|
+
def predict(x)
|
84
|
+
x = check_convert_sample_array(x)
|
85
|
+
scores = decision_function(x)
|
86
|
+
n_samples, n_classes = scores.shape
|
87
|
+
label_ids = scores.max_index(axis: 1) - Numo::Int32.new(n_samples).seq * n_classes
|
88
|
+
@classes[label_ids].dup
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
@@ -0,0 +1,92 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/base_estimator'
|
4
|
+
require 'rumale/base/transformer'
|
5
|
+
require 'rumale/pairwise_metric'
|
6
|
+
|
7
|
+
module Rumale
|
8
|
+
module Preprocessing
|
9
|
+
# KernelCalculator is a class that calculates the kernel matrix with training data.
|
10
|
+
#
|
11
|
+
# @example
|
12
|
+
# transformer = Rumale::Preprocessing::KernelCalculator.new(kernel: 'rbf', gamma: 0.5)
|
13
|
+
# regressor = Rumale::KernelMachine::KernelRidge.new
|
14
|
+
# pipeline = Rumale::Pipeline::Pipeline.new(
|
15
|
+
# steps: { trs: transfomer, est: regressor }
|
16
|
+
# )
|
17
|
+
# pipeline.fit(x_train, y_train)
|
18
|
+
# results = pipeline.predict(x_test)
|
19
|
+
class KernelCalculator
|
20
|
+
include Base::BaseEstimator
|
21
|
+
include Base::Transformer
|
22
|
+
|
23
|
+
# Returns the training data for calculating kernel matrix.
|
24
|
+
# @return [Numo::DFloat] (shape: n_components, n_features)
|
25
|
+
attr_reader :components
|
26
|
+
|
27
|
+
# Create a new transformer that transforms feature vectors into a kernel matrix.
|
28
|
+
#
|
29
|
+
# @param kernel [String] The type of kernel function ('rbf', 'linear', 'poly', and 'sigmoid').
|
30
|
+
# @param gamma [Float] The gamma parameter in rbf/poly/sigmoid kernel function.
|
31
|
+
# @param degree [Integer] The degree parameter in polynomial kernel function.
|
32
|
+
# @param coef [Float] The coefficient in poly/sigmoid kernel function.
|
33
|
+
def initialize(kernel: 'rbf', gamma: 1, degree: 3, coef: 1)
|
34
|
+
check_params_string(kernel: kernel)
|
35
|
+
check_params_numeric(gamma: gamma, coef: coef, degree: degree)
|
36
|
+
@params = {}
|
37
|
+
@params[:kernel] = kernel
|
38
|
+
@params[:gamma] = gamma
|
39
|
+
@params[:degree] = degree
|
40
|
+
@params[:coef] = coef
|
41
|
+
@components = nil
|
42
|
+
end
|
43
|
+
|
44
|
+
# Fit the model with given training data.
|
45
|
+
#
|
46
|
+
# @overload fit(x) -> KernelCalculator
|
47
|
+
# @param x [Numo::NArray] (shape: [n_samples, n_features]) The training data to be used for calculating kernel matrix.
|
48
|
+
# @return [KernelCalculator] The learned transformer itself.
|
49
|
+
def fit(x, _y = nil)
|
50
|
+
x = check_convert_sample_array(x)
|
51
|
+
@components = x.dup
|
52
|
+
self
|
53
|
+
end
|
54
|
+
|
55
|
+
# Fit the model with training data, and then transform them with the learned model.
|
56
|
+
#
|
57
|
+
# @overload fit_transform(x) -> Numo::DFloat
|
58
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for calculating kernel matrix.
|
59
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_samples]) The calculated kernel matrix.
|
60
|
+
def fit_transform(x, y = nil)
|
61
|
+
x = check_convert_sample_array(x)
|
62
|
+
fit(x, y).transform(x)
|
63
|
+
end
|
64
|
+
|
65
|
+
# Transform the given data with the learned model.
|
66
|
+
#
|
67
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be used for calculating kernel matrix with the training data.
|
68
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The calculated kernel matrix.
|
69
|
+
def transform(x)
|
70
|
+
x = check_convert_sample_array(x)
|
71
|
+
kernel_mat(x, @components)
|
72
|
+
end
|
73
|
+
|
74
|
+
private
|
75
|
+
|
76
|
+
def kernel_mat(x, y)
|
77
|
+
case @params[:kernel]
|
78
|
+
when 'rbf'
|
79
|
+
Rumale::PairwiseMetric.rbf_kernel(x, y, @params[:gamma])
|
80
|
+
when 'poly'
|
81
|
+
Rumale::PairwiseMetric.polynomial_kernel(x, y, @params[:degree], @params[:gamma], @params[:coef])
|
82
|
+
when 'sigmoid'
|
83
|
+
Rumale::PairwiseMetric.sigmoid_kernel(x, y, @params[:gamma], @params[:coef])
|
84
|
+
when 'linear'
|
85
|
+
Rumale::PairwiseMetric.linear_kernel(x, y)
|
86
|
+
else
|
87
|
+
raise ArgumentError, "Expect kernel parameter to be given 'rbf', 'linear', 'poly', or 'sigmoid'."
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
data/lib/rumale/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rumale
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.22.
|
4
|
+
version: 0.22.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-03-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: numo-narray
|
@@ -139,6 +139,7 @@ files:
|
|
139
139
|
- lib/rumale/kernel_machine/kernel_fda.rb
|
140
140
|
- lib/rumale/kernel_machine/kernel_pca.rb
|
141
141
|
- lib/rumale/kernel_machine/kernel_ridge.rb
|
142
|
+
- lib/rumale/kernel_machine/kernel_ridge_classifier.rb
|
142
143
|
- lib/rumale/kernel_machine/kernel_svc.rb
|
143
144
|
- lib/rumale/linear_model/base_sgd.rb
|
144
145
|
- lib/rumale/linear_model/elastic_net.rb
|
@@ -183,6 +184,7 @@ files:
|
|
183
184
|
- lib/rumale/pipeline/pipeline.rb
|
184
185
|
- lib/rumale/preprocessing/bin_discretizer.rb
|
185
186
|
- lib/rumale/preprocessing/binarizer.rb
|
187
|
+
- lib/rumale/preprocessing/kernel_calculator.rb
|
186
188
|
- lib/rumale/preprocessing/l1_normalizer.rb
|
187
189
|
- lib/rumale/preprocessing/l2_normalizer.rb
|
188
190
|
- lib/rumale/preprocessing/label_binarizer.rb
|