rumale 0.22.4 → 0.22.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/lib/rumale.rb +2 -0
- data/lib/rumale/dataset.rb +7 -3
- data/lib/rumale/kernel_approximation/nystroem.rb +29 -9
- data/lib/rumale/kernel_machine/kernel_ridge_classifier.rb +92 -0
- data/lib/rumale/preprocessing/kernel_calculator.rb +92 -0
- data/lib/rumale/version.rb +1 -1
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: '058078489d3ff66d67432e1418ae786292c263e05e75b6703fb5a7e65e88bd46'
|
4
|
+
data.tar.gz: bd7ed9b223e0cd0074ffdd3e521b01c195f82909013c93f4736ab338d5920c96
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 79ce4715a503b1b5a618526832adad5912daac72af3e8f1892ff2df14b7695e546419d6f85e5ea735abd2ea06da649a763f96c82b95eee75341934fa65fce93e
|
7
|
+
data.tar.gz: 5948583ec6c5ca10b320e09c447f9cc1e244dd3bfbf95800338dba2eb7e1b46a342d44020fa24c26a7161786feb63c82b0677654e4aa087c311337a606880a22
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,11 @@
|
|
1
|
+
# 0.22.5
|
2
|
+
- Add transformer class for calculating kernel matrix.
|
3
|
+
- [KernelCalculator](https://yoshoku.github.io/rumale/doc/Rumale/Preprocessing/KernelCalculator.html)
|
4
|
+
- Add classifier class based on Ridge regression.
|
5
|
+
- [KernelRidgeClassifier](https://yoshoku.github.io/rumale/doc/Rumale/KernelMachine/KernelRidgeClassifier.html)
|
6
|
+
- Add supported kernel functions to [Nystroem](https://yoshoku.github.io/rumale/doc/Rumale/KernelApproximation/Nystroem.html).
|
7
|
+
- Add parameter for specifying the number of features to [load_libsvm_file](https://yoshoku.github.io/rumale/doc/Rumale/Dataset.html#load_libsvm_file-class_method).
|
8
|
+
|
1
9
|
# 0.22.4
|
2
10
|
- Add classifier and regressor classes for voting ensemble method.
|
3
11
|
- [VotingClassifier](https://yoshoku.github.io/rumale/doc/Rumale/Ensemble/VotingClassifier.html)
|
data/lib/rumale.rb
CHANGED
@@ -35,6 +35,7 @@ require 'rumale/kernel_machine/kernel_svc'
|
|
35
35
|
require 'rumale/kernel_machine/kernel_pca'
|
36
36
|
require 'rumale/kernel_machine/kernel_fda'
|
37
37
|
require 'rumale/kernel_machine/kernel_ridge'
|
38
|
+
require 'rumale/kernel_machine/kernel_ridge_classifier'
|
38
39
|
require 'rumale/multiclass/one_vs_rest_classifier'
|
39
40
|
require 'rumale/nearest_neighbors/vp_tree'
|
40
41
|
require 'rumale/nearest_neighbors/k_neighbors_classifier'
|
@@ -103,6 +104,7 @@ require 'rumale/preprocessing/one_hot_encoder'
|
|
103
104
|
require 'rumale/preprocessing/ordinal_encoder'
|
104
105
|
require 'rumale/preprocessing/binarizer'
|
105
106
|
require 'rumale/preprocessing/polynomial_features'
|
107
|
+
require 'rumale/preprocessing/kernel_calculator'
|
106
108
|
require 'rumale/model_selection/k_fold'
|
107
109
|
require 'rumale/model_selection/group_k_fold'
|
108
110
|
require 'rumale/model_selection/stratified_k_fold'
|
data/lib/rumale/dataset.rb
CHANGED
@@ -12,22 +12,26 @@ module Rumale
|
|
12
12
|
# Load a dataset with the libsvm file format into Numo::NArray.
|
13
13
|
#
|
14
14
|
# @param filename [String] A path to a dataset file.
|
15
|
+
# @param n_features [Integer/Nil] The number of features of data to load.
|
16
|
+
# If nil is given, it will be detected automatically from given file.
|
15
17
|
# @param zero_based [Boolean] Whether the column index starts from 0 (true) or 1 (false).
|
16
18
|
# @param dtype [Numo::NArray] Data type of Numo::NArray for features to be loaded.
|
17
19
|
#
|
18
20
|
# @return [Array<Numo::NArray>]
|
19
21
|
# Returns array containing the (n_samples x n_features) matrix for feature vectors
|
20
22
|
# and (n_samples) vector for labels or target values.
|
21
|
-
def load_libsvm_file(filename, zero_based: false, dtype: Numo::DFloat)
|
23
|
+
def load_libsvm_file(filename, n_features: nil, zero_based: false, dtype: Numo::DFloat)
|
22
24
|
ftvecs = []
|
23
25
|
labels = []
|
24
|
-
|
26
|
+
n_features_detected = 0
|
25
27
|
CSV.foreach(filename, col_sep: "\s", headers: false) do |line|
|
26
28
|
label, ftvec, max_idx = parse_libsvm_line(line, zero_based)
|
27
29
|
labels.push(label)
|
28
30
|
ftvecs.push(ftvec)
|
29
|
-
|
31
|
+
n_features_detected = max_idx if n_features_detected < max_idx
|
30
32
|
end
|
33
|
+
n_features ||= n_features_detected
|
34
|
+
n_features = [n_features, n_features_detected].max
|
31
35
|
[convert_to_matrix(ftvecs, n_features, dtype), Numo::NArray.asarray(labels)]
|
32
36
|
end
|
33
37
|
|
@@ -11,7 +11,7 @@ module Rumale
|
|
11
11
|
# @example
|
12
12
|
# require 'numo/linalg/autoloader'
|
13
13
|
#
|
14
|
-
# transformer = Rumale::KernelApproximation::Nystroem.new(gamma: 1, n_components: 128, random_seed: 1)
|
14
|
+
# transformer = Rumale::KernelApproximation::Nystroem.new(kernel: 'rbf', gamma: 1, n_components: 128, random_seed: 1)
|
15
15
|
# new_training_samples = transformer.fit_transform(training_samples)
|
16
16
|
# new_testing_samples = transformer.transform(testing_samples)
|
17
17
|
#
|
@@ -39,12 +39,15 @@ module Rumale
|
|
39
39
|
|
40
40
|
# Create a new transformer for mapping to kernel feature space with Nystrom method.
|
41
41
|
#
|
42
|
-
# @param kernel [String] The type of kernel
|
43
|
-
# @param gamma [Float] The parameter
|
44
|
-
# @param
|
42
|
+
# @param kernel [String] The type of kernel function ('rbf', 'linear', 'poly', and 'sigmoid)
|
43
|
+
# @param gamma [Float] The gamma parameter in rbf/poly/sigmoid kernel function.
|
44
|
+
# @param degree [Integer] The degree parameter in polynomial kernel function.
|
45
|
+
# @param coef [Float] The coefficient in poly/sigmoid kernel function.
|
46
|
+
# @param n_components [Integer] The number of dimensions of the kernel feature space.
|
45
47
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
46
|
-
def initialize(kernel: 'rbf', gamma: 1, n_components: 100, random_seed: nil)
|
47
|
-
|
48
|
+
def initialize(kernel: 'rbf', gamma: 1, degree: 3, coef: 1, n_components: 100, random_seed: nil)
|
49
|
+
check_params_string(kernel: kernel)
|
50
|
+
check_params_numeric(gamma: gamma, coef: coef, degree: degree, n_components: n_components)
|
48
51
|
check_params_numeric_or_nil(random_seed: random_seed)
|
49
52
|
@params = method(:initialize).parameters.map { |_t, arg| [arg, binding.local_variable_get(arg)] }.to_h
|
50
53
|
@params[:random_seed] ||= srand
|
@@ -56,7 +59,7 @@ module Rumale
|
|
56
59
|
|
57
60
|
# Fit the model with given training data.
|
58
61
|
#
|
59
|
-
# @overload fit(x) ->
|
62
|
+
# @overload fit(x) -> Nystroem
|
60
63
|
# @param x [Numo::NArray] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
61
64
|
# @return [Nystroem] The learned transformer itself.
|
62
65
|
def fit(x, _y = nil)
|
@@ -73,7 +76,7 @@ module Rumale
|
|
73
76
|
@components = x[@component_indices, true]
|
74
77
|
|
75
78
|
# calculate normalizing factor.
|
76
|
-
kernel_mat =
|
79
|
+
kernel_mat = kernel_mat(@components)
|
77
80
|
eig_vals, eig_vecs = Numo::Linalg.eigh(kernel_mat)
|
78
81
|
la = eig_vals.class.maximum(eig_vals.reverse, 1e-12)
|
79
82
|
u = eig_vecs.reverse(1)
|
@@ -98,9 +101,26 @@ module Rumale
|
|
98
101
|
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data.
|
99
102
|
def transform(x)
|
100
103
|
x = check_convert_sample_array(x)
|
101
|
-
z =
|
104
|
+
z = kernel_mat(x, @components)
|
102
105
|
z.dot(@normalizer)
|
103
106
|
end
|
107
|
+
|
108
|
+
private
|
109
|
+
|
110
|
+
def kernel_mat(x, y = nil)
|
111
|
+
case @params[:kernel]
|
112
|
+
when 'rbf'
|
113
|
+
Rumale::PairwiseMetric.rbf_kernel(x, y, @params[:gamma])
|
114
|
+
when 'poly'
|
115
|
+
Rumale::PairwiseMetric.polynomial_kernel(x, y, @params[:degree], @params[:gamma], @params[:coef])
|
116
|
+
when 'sigmoid'
|
117
|
+
Rumale::PairwiseMetric.sigmoid_kernel(x, y, @params[:gamma], @params[:coef])
|
118
|
+
when 'linear'
|
119
|
+
Rumale::PairwiseMetric.linear_kernel(x, y)
|
120
|
+
else
|
121
|
+
raise ArgumentError, "Expect kernel parameter to be given 'rbf', 'linear', 'poly', or 'sigmoid'."
|
122
|
+
end
|
123
|
+
end
|
104
124
|
end
|
105
125
|
end
|
106
126
|
end
|
@@ -0,0 +1,92 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/base_estimator'
|
4
|
+
require 'rumale/base/classifier'
|
5
|
+
require 'rumale/preprocessing/label_binarizer'
|
6
|
+
|
7
|
+
module Rumale
|
8
|
+
module KernelMachine
|
9
|
+
# KernelRidgeClassifier is a class that implements classifier based-on kernel ridge regression.
|
10
|
+
# It learns a classifier by converting labels to target values { -1, 1 } and performing kernel ridge regression.
|
11
|
+
#
|
12
|
+
# @example
|
13
|
+
# require 'numo/linalg/autoloader'
|
14
|
+
# require 'rumale'
|
15
|
+
#
|
16
|
+
# kernel_mat_train = Rumale::PairwiseMetric::rbf_kernel(training_samples)
|
17
|
+
# kridge = Rumale::KernelMachine::KernelRidgeClassifier.new(reg_param: 0.5)
|
18
|
+
# kridge.fit(kernel_mat_train, traininig_values)
|
19
|
+
#
|
20
|
+
# kernel_mat_test = Rumale::PairwiseMetric::rbf_kernel(test_samples, training_samples)
|
21
|
+
# results = kridge.predict(kernel_mat_test)
|
22
|
+
class KernelRidgeClassifier
|
23
|
+
include Base::BaseEstimator
|
24
|
+
include Base::Classifier
|
25
|
+
|
26
|
+
# Return the class labels.
|
27
|
+
# @return [Numo::Int32] (size: n_classes)
|
28
|
+
attr_reader :classes
|
29
|
+
|
30
|
+
# Return the weight vector.
|
31
|
+
# @return [Numo::DFloat] (shape: [n_training_sample, n_classes])
|
32
|
+
attr_reader :weight_vec
|
33
|
+
|
34
|
+
# Create a new regressor with kernel ridge classifier.
|
35
|
+
#
|
36
|
+
# @param reg_param [Float/Numo::DFloat] The regularization parameter.
|
37
|
+
def initialize(reg_param: 1.0)
|
38
|
+
@params = {}
|
39
|
+
@params[:reg_param] = reg_param
|
40
|
+
@classes = nil
|
41
|
+
@weight_vec = nil
|
42
|
+
end
|
43
|
+
|
44
|
+
# Fit the model with given training data.
|
45
|
+
#
|
46
|
+
# @param x [Numo::DFloat] (shape: [n_training_samples, n_training_samples])
|
47
|
+
# The kernel matrix of the training data to be used for fitting the model.
|
48
|
+
# @param y [Numo::Int32] (shape: [n_training_samples]) The labels to be used for fitting the model.
|
49
|
+
# @return [KernelRidgeClassifier] The learned classifier itself.
|
50
|
+
def fit(x, y)
|
51
|
+
x = check_convert_sample_array(x)
|
52
|
+
y = check_convert_label_array(y)
|
53
|
+
check_sample_label_size(x, y)
|
54
|
+
raise ArgumentError, 'Expect the kernel matrix of training data to be square.' unless x.shape[0] == x.shape[1]
|
55
|
+
raise 'KernelRidgeClassifier#fit requires Numo::Linalg but that is not loaded.' unless enable_linalg?
|
56
|
+
|
57
|
+
@encoder = Rumale::Preprocessing::LabelBinarizer.new
|
58
|
+
y_encoded = Numo::DFloat.cast(@encoder.fit_transform(y)) * 2 - 1
|
59
|
+
@classes = Numo::NArray[*@encoder.classes]
|
60
|
+
|
61
|
+
n_samples = x.shape[0]
|
62
|
+
reg_kernel_mat = x + Numo::DFloat.eye(n_samples) * @params[:reg_param]
|
63
|
+
@weight_vec = Numo::Linalg.solve(reg_kernel_mat, y_encoded, driver: 'sym')
|
64
|
+
|
65
|
+
self
|
66
|
+
end
|
67
|
+
|
68
|
+
# Calculate confidence scores for samples.
|
69
|
+
#
|
70
|
+
# @param x [Numo::DFloat] (shape: [n_testing_samples, n_training_samples])
|
71
|
+
# The kernel matrix between testing samples and training samples to predict values.
|
72
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) The confidence score per sample.
|
73
|
+
def decision_function(x)
|
74
|
+
x = check_convert_sample_array(x)
|
75
|
+
x.dot(@weight_vec)
|
76
|
+
end
|
77
|
+
|
78
|
+
# Predict class labels for samples.
|
79
|
+
#
|
80
|
+
# @param x [Numo::DFloat] (shape: [n_testing_samples, n_training_samples])
|
81
|
+
# The kernel matrix between testing samples and training samples to predict the labels.
|
82
|
+
# @return [Numo::Int32] (shape: [n_testing_samples]) Predicted class label per sample.
|
83
|
+
def predict(x)
|
84
|
+
x = check_convert_sample_array(x)
|
85
|
+
scores = decision_function(x)
|
86
|
+
n_samples, n_classes = scores.shape
|
87
|
+
label_ids = scores.max_index(axis: 1) - Numo::Int32.new(n_samples).seq * n_classes
|
88
|
+
@classes[label_ids].dup
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
@@ -0,0 +1,92 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/base_estimator'
|
4
|
+
require 'rumale/base/transformer'
|
5
|
+
require 'rumale/pairwise_metric'
|
6
|
+
|
7
|
+
module Rumale
|
8
|
+
module Preprocessing
|
9
|
+
# KernelCalculator is a class that calculates the kernel matrix with training data.
|
10
|
+
#
|
11
|
+
# @example
|
12
|
+
# transformer = Rumale::Preprocessing::KernelCalculator.new(kernel: 'rbf', gamma: 0.5)
|
13
|
+
# regressor = Rumale::KernelMachine::KernelRidge.new
|
14
|
+
# pipeline = Rumale::Pipeline::Pipeline.new(
|
15
|
+
# steps: { trs: transfomer, est: regressor }
|
16
|
+
# )
|
17
|
+
# pipeline.fit(x_train, y_train)
|
18
|
+
# results = pipeline.predict(x_test)
|
19
|
+
class KernelCalculator
|
20
|
+
include Base::BaseEstimator
|
21
|
+
include Base::Transformer
|
22
|
+
|
23
|
+
# Returns the training data for calculating kernel matrix.
|
24
|
+
# @return [Numo::DFloat] (shape: n_components, n_features)
|
25
|
+
attr_reader :components
|
26
|
+
|
27
|
+
# Create a new transformer that transforms feature vectors into a kernel matrix.
|
28
|
+
#
|
29
|
+
# @param kernel [String] The type of kernel function ('rbf', 'linear', 'poly', and 'sigmoid').
|
30
|
+
# @param gamma [Float] The gamma parameter in rbf/poly/sigmoid kernel function.
|
31
|
+
# @param degree [Integer] The degree parameter in polynomial kernel function.
|
32
|
+
# @param coef [Float] The coefficient in poly/sigmoid kernel function.
|
33
|
+
def initialize(kernel: 'rbf', gamma: 1, degree: 3, coef: 1)
|
34
|
+
check_params_string(kernel: kernel)
|
35
|
+
check_params_numeric(gamma: gamma, coef: coef, degree: degree)
|
36
|
+
@params = {}
|
37
|
+
@params[:kernel] = kernel
|
38
|
+
@params[:gamma] = gamma
|
39
|
+
@params[:degree] = degree
|
40
|
+
@params[:coef] = coef
|
41
|
+
@components = nil
|
42
|
+
end
|
43
|
+
|
44
|
+
# Fit the model with given training data.
|
45
|
+
#
|
46
|
+
# @overload fit(x) -> KernelCalculator
|
47
|
+
# @param x [Numo::NArray] (shape: [n_samples, n_features]) The training data to be used for calculating kernel matrix.
|
48
|
+
# @return [KernelCalculator] The learned transformer itself.
|
49
|
+
def fit(x, _y = nil)
|
50
|
+
x = check_convert_sample_array(x)
|
51
|
+
@components = x.dup
|
52
|
+
self
|
53
|
+
end
|
54
|
+
|
55
|
+
# Fit the model with training data, and then transform them with the learned model.
|
56
|
+
#
|
57
|
+
# @overload fit_transform(x) -> Numo::DFloat
|
58
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for calculating kernel matrix.
|
59
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_samples]) The calculated kernel matrix.
|
60
|
+
def fit_transform(x, y = nil)
|
61
|
+
x = check_convert_sample_array(x)
|
62
|
+
fit(x, y).transform(x)
|
63
|
+
end
|
64
|
+
|
65
|
+
# Transform the given data with the learned model.
|
66
|
+
#
|
67
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be used for calculating kernel matrix with the training data.
|
68
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The calculated kernel matrix.
|
69
|
+
def transform(x)
|
70
|
+
x = check_convert_sample_array(x)
|
71
|
+
kernel_mat(x, @components)
|
72
|
+
end
|
73
|
+
|
74
|
+
private
|
75
|
+
|
76
|
+
def kernel_mat(x, y)
|
77
|
+
case @params[:kernel]
|
78
|
+
when 'rbf'
|
79
|
+
Rumale::PairwiseMetric.rbf_kernel(x, y, @params[:gamma])
|
80
|
+
when 'poly'
|
81
|
+
Rumale::PairwiseMetric.polynomial_kernel(x, y, @params[:degree], @params[:gamma], @params[:coef])
|
82
|
+
when 'sigmoid'
|
83
|
+
Rumale::PairwiseMetric.sigmoid_kernel(x, y, @params[:gamma], @params[:coef])
|
84
|
+
when 'linear'
|
85
|
+
Rumale::PairwiseMetric.linear_kernel(x, y)
|
86
|
+
else
|
87
|
+
raise ArgumentError, "Expect kernel parameter to be given 'rbf', 'linear', 'poly', or 'sigmoid'."
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
data/lib/rumale/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rumale
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.22.
|
4
|
+
version: 0.22.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-03-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: numo-narray
|
@@ -139,6 +139,7 @@ files:
|
|
139
139
|
- lib/rumale/kernel_machine/kernel_fda.rb
|
140
140
|
- lib/rumale/kernel_machine/kernel_pca.rb
|
141
141
|
- lib/rumale/kernel_machine/kernel_ridge.rb
|
142
|
+
- lib/rumale/kernel_machine/kernel_ridge_classifier.rb
|
142
143
|
- lib/rumale/kernel_machine/kernel_svc.rb
|
143
144
|
- lib/rumale/linear_model/base_sgd.rb
|
144
145
|
- lib/rumale/linear_model/elastic_net.rb
|
@@ -183,6 +184,7 @@ files:
|
|
183
184
|
- lib/rumale/pipeline/pipeline.rb
|
184
185
|
- lib/rumale/preprocessing/bin_discretizer.rb
|
185
186
|
- lib/rumale/preprocessing/binarizer.rb
|
187
|
+
- lib/rumale/preprocessing/kernel_calculator.rb
|
186
188
|
- lib/rumale/preprocessing/l1_normalizer.rb
|
187
189
|
- lib/rumale/preprocessing/l2_normalizer.rb
|
188
190
|
- lib/rumale/preprocessing/label_binarizer.rb
|