rumale 0.13.2 → 0.13.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/README.md +2 -2
- data/lib/rumale/kernel_machine/kernel_pca.rb +115 -0
- data/lib/rumale/kernel_machine/kernel_ridge.rb +93 -0
- data/lib/rumale/version.rb +1 -1
- data/lib/rumale.rb +2 -0
- data/rumale.gemspec +2 -2
- metadata +6 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: '088ba275c0027e5f4a816a681bac8f0ff08d9d9c'
|
4
|
+
data.tar.gz: 61f2d2e2e8a2557eb18a045cfb76cbc36d1876dd
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e13dfbee846fd28b10f8f5fa04b166efad17269a14537c9dcee8ff50f56353ac740549f2e616d418907bcc83cf37ac80834c1d3c2a26da33ce0acaa632416790
|
7
|
+
data.tar.gz: 46c895ce3b5dee436d83887c2d1a028048cc934054d91482c7b3148f6f128d59c9de947ba19cd3a69aef2fc42273579a413257203a7e4ee46b66534acc71866b
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,7 @@
|
|
1
|
+
# 0.13.3
|
2
|
+
- Add transformer class for [Kernel PCA](https://yoshoku.github.io/rumale/doc/Rumale/KernelMachine/KernelPCA.html).
|
3
|
+
- Add regressor class for [Kernel Ridge](https://yoshoku.github.io/rumale/doc/Rumale/KernelMachine/KernelRidge.html).
|
4
|
+
|
1
5
|
# 0.13.2
|
2
6
|
- Add preprocessing class for label binarization.
|
3
7
|
- Fix to use LabelBinarizer instead of OneHotEncoder.
|
data/README.md
CHANGED
@@ -11,10 +11,10 @@
|
|
11
11
|
Rumale (**Ru**by **ma**chine **le**arning) is a machine learning library in Ruby.
|
12
12
|
Rumale provides machine learning algorithms with interfaces similar to Scikit-Learn in Python.
|
13
13
|
Rumale supports Linear / Kernel Support Vector Machine,
|
14
|
-
Logistic Regression, Linear Regression, Ridge, Lasso, Factorization Machine,
|
14
|
+
Logistic Regression, Linear Regression, Ridge, Lasso, Kernel Ridge, Factorization Machine,
|
15
15
|
Naive Bayes, Decision Tree, AdaBoost, Gradient Tree Boosting, Random Forest, Extra-Trees, K-nearest neighbor classifier,
|
16
16
|
K-Means, K-Medoids, Gaussian Mixture Model, DBSCAN, SNN, Power Iteration Clustering,
|
17
|
-
Mutidimensional Scaling, t-SNE, Principal Component Analysis, and Non-negative Matrix Factorization.
|
17
|
+
Mutidimensional Scaling, t-SNE, Principal Component Analysis, Kernel PCA and Non-negative Matrix Factorization.
|
18
18
|
|
19
19
|
This project was formerly known as "SVMKit".
|
20
20
|
If you are using SVMKit, please install Rumale and replace `SVMKit` constants with `Rumale`.
|
@@ -0,0 +1,115 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/base_estimator'
|
4
|
+
require 'rumale/base/transformer'
|
5
|
+
|
6
|
+
module Rumale
|
7
|
+
module KernelMachine
|
8
|
+
# KernelPCA is a class that implements Kernel Principal Component Analysis.
|
9
|
+
#
|
10
|
+
# @example
|
11
|
+
# kernel_mat_train = Rumale::PairwiseMetric::rbf_kernel(training_samples)
|
12
|
+
# kpca = Rumale::KernelMachine::KernelPCA(n_components: 2)
|
13
|
+
# mapped_traininig_samples = kpca.fit_transform(kernel_mat_train)
|
14
|
+
#
|
15
|
+
# kernel_mat_test = Rumale::PairwiseMetric::rbf_kernel(test_samples, training_samples)
|
16
|
+
# mapped_test_samples = kpca.transform(kernel_mat_test)
|
17
|
+
#
|
18
|
+
# *Reference*
|
19
|
+
# - B. Scholkopf, A. Smola, and K-R. Muller, "Nonlinear Component Analysis as a Kernel Eigenvalue Problem," Neural Computation, Vol. 10 (5), pp. 1299--1319, 1998.
|
20
|
+
class KernelPCA
|
21
|
+
include Base::BaseEstimator
|
22
|
+
include Base::Transformer
|
23
|
+
|
24
|
+
# Returns the eigenvalues of the centered kernel matrix.
|
25
|
+
# @return [Numo::DFloat] (shape: [n_components])
|
26
|
+
attr_reader :lambdas
|
27
|
+
|
28
|
+
# Returns the eigenvectros of the centered kernel matrix.
|
29
|
+
# @return [Numo::DFloat] (shape: [n_training_sampes, n_components])
|
30
|
+
attr_reader :alphas
|
31
|
+
|
32
|
+
# Create a new transformer with Kernel PCA.
|
33
|
+
#
|
34
|
+
# @param n_components [Integer] The number of components.
|
35
|
+
def initialize(n_components: 2)
|
36
|
+
check_params_integer(n_components: n_components)
|
37
|
+
@params = {}
|
38
|
+
@params[:n_components] = n_components
|
39
|
+
@alphas = nil
|
40
|
+
@lambdas = nil
|
41
|
+
@row_mean = nil
|
42
|
+
@all_mean = nil
|
43
|
+
end
|
44
|
+
|
45
|
+
# Fit the model with given training data.
|
46
|
+
# To execute this method, Numo::Linalg must be loaded.
|
47
|
+
#
|
48
|
+
# @overload fit(x) -> KernelPCA
|
49
|
+
# @param x [Numo::DFloat] (shape: [n_training_samples, n_training_samples])
|
50
|
+
# The kernel matrix of the training data to be used for fitting the model.
|
51
|
+
# @return [KernelPCA] The learned transformer itself.
|
52
|
+
def fit(x, _y = nil)
|
53
|
+
check_sample_array(x)
|
54
|
+
raise ArgumentError, 'Expect the kernel matrix of training data to be square.' unless x.shape[0] == x.shape[1]
|
55
|
+
raise 'KernelPCA#fit requires Numo::Linalg but that is not loaded.' unless enable_linalg?
|
56
|
+
|
57
|
+
n_samples = x.shape[0]
|
58
|
+
@row_mean = x.mean(0)
|
59
|
+
@all_mean = @row_mean.sum.fdiv(n_samples)
|
60
|
+
centered_kernel_mat = x - x.mean(1).expand_dims(1) - @row_mean + @all_mean
|
61
|
+
eig_vals, eig_vecs = Numo::Linalg.eigh(centered_kernel_mat, vals_range: (n_samples - @params[:n_components])...n_samples)
|
62
|
+
@alphas = eig_vecs.reverse(1).dup
|
63
|
+
@lambdas = eig_vals.reverse.dup
|
64
|
+
self
|
65
|
+
end
|
66
|
+
|
67
|
+
# Fit the model with training data, and then transform them with the learned model.
|
68
|
+
# To execute this method, Numo::Linalg must be loaded.
|
69
|
+
#
|
70
|
+
# @overload fit_transform(x) -> Numo::DFloat
|
71
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_samples])
|
72
|
+
# The kernel matrix of the training data to be used for fitting the model and transformed.
|
73
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data
|
74
|
+
def fit_transform(x, _y = nil)
|
75
|
+
check_sample_array(x)
|
76
|
+
fit(x).transform(x)
|
77
|
+
end
|
78
|
+
|
79
|
+
# Transform the given data with the learned model.
|
80
|
+
#
|
81
|
+
# @param x [Numo::DFloat] (shape: [n_testing_samples, n_training_samples])
|
82
|
+
# The kernel matrix between testing samples and training samples to be transformed.
|
83
|
+
# @return [Numo::DFloat] (shape: [n_testing_samples, n_components]) The transformed data.
|
84
|
+
def transform(x)
|
85
|
+
check_sample_array(x)
|
86
|
+
col_mean = x.sum(1) / @row_mean.shape[0]
|
87
|
+
centered_kernel_mat = x - col_mean.expand_dims(1) - @row_mean + @all_mean
|
88
|
+
transform_mat = @alphas.dot((1.0 / Numo::NMath.sqrt(@lambdas)).diag)
|
89
|
+
transformed = centered_kernel_mat.dot(transform_mat)
|
90
|
+
@params[:n_components] == 1 ? transformed[true, 0].dup : transformed
|
91
|
+
end
|
92
|
+
|
93
|
+
# Dump marshal data.
|
94
|
+
# @return [Hash] The marshal data.
|
95
|
+
def marshal_dump
|
96
|
+
{ params: @params,
|
97
|
+
row_mean: @row_mean,
|
98
|
+
all_mean: @all_mean,
|
99
|
+
alphas: @alphas,
|
100
|
+
lambdas: @lambdas }
|
101
|
+
end
|
102
|
+
|
103
|
+
# Load marshal data.
|
104
|
+
# @return [nil]
|
105
|
+
def marshal_load(obj)
|
106
|
+
@params = obj[:params]
|
107
|
+
@row_mean = obj[:row_mean]
|
108
|
+
@all_mean = obj[:all_mean]
|
109
|
+
@alphas = obj[:alphas]
|
110
|
+
@lambdas = obj[:lambdas]
|
111
|
+
nil
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
@@ -0,0 +1,93 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/base_estimator'
|
4
|
+
require 'rumale/base/regressor'
|
5
|
+
|
6
|
+
module Rumale
|
7
|
+
module KernelMachine
|
8
|
+
# KernelRidge is a class that implements kernel ridge regression.
|
9
|
+
#
|
10
|
+
# @example
|
11
|
+
# kernel_mat_train = Rumale::PairwiseMetric::rbf_kernel(training_samples)
|
12
|
+
# kridge = Rumale::KernelMachine::KernelRidge.new(reg_param: 1.0)
|
13
|
+
# kridge.fit(kernel_mat_train, traininig_values)
|
14
|
+
#
|
15
|
+
# kernel_mat_test = Rumale::PairwiseMetric::rbf_kernel(test_samples, training_samples)
|
16
|
+
# results = kridge.predict(kernel_mat_test)
|
17
|
+
class KernelRidge
|
18
|
+
include Base::BaseEstimator
|
19
|
+
include Base::Regressor
|
20
|
+
|
21
|
+
# Return the weight vector.
|
22
|
+
# @return [Numo::DFloat] (shape: [n_training_sample, n_outputs])
|
23
|
+
attr_reader :weight_vec
|
24
|
+
|
25
|
+
# Create a new regressor with kernel ridge regression.
|
26
|
+
#
|
27
|
+
# @param reg_param [Float/Numo::DFloat] The regularization parameter.
|
28
|
+
def initialize(reg_param: 1.0)
|
29
|
+
raise TypeError, 'Expect class of reg_param to be Float or Numo::DFloat' unless reg_param.is_a?(Float) || reg_param.is_a?(Numo::DFloat)
|
30
|
+
raise ArgumentError, 'Expect reg_param array to be 1-D arrray' if reg_param.is_a?(Numo::DFloat) && reg_param.shape.size != 1
|
31
|
+
@params = {}
|
32
|
+
@params[:reg_param] = reg_param
|
33
|
+
@weight_vec = nil
|
34
|
+
end
|
35
|
+
|
36
|
+
# Fit the model with given training data.
|
37
|
+
#
|
38
|
+
# @param x [Numo::DFloat] (shape: [n_training_samples, n_training_samples])
|
39
|
+
# The kernel matrix of the training data to be used for fitting the model.
|
40
|
+
# @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The taget values to be used for fitting the model.
|
41
|
+
# @return [KernelRidge] The learned regressor itself.
|
42
|
+
def fit(x, y)
|
43
|
+
check_sample_array(x)
|
44
|
+
check_tvalue_array(y)
|
45
|
+
check_sample_tvalue_size(x, y)
|
46
|
+
raise ArgumentError, 'Expect the kernel matrix of training data to be square.' unless x.shape[0] == x.shape[1]
|
47
|
+
raise 'KernelRidge#fit requires Numo::Linalg but that is not loaded.' unless enable_linalg?
|
48
|
+
|
49
|
+
n_samples = x.shape[0]
|
50
|
+
|
51
|
+
if @params[:reg_param].is_a?(Float)
|
52
|
+
reg_kernel_mat = x + Numo::DFloat.eye(n_samples) * @params[:reg_param]
|
53
|
+
@weight_vec = Numo::Linalg.solve(reg_kernel_mat, y, driver: 'sym')
|
54
|
+
else
|
55
|
+
raise ArgumentError, 'Expect y and reg_param to have the same number of elements.' unless y.shape[1] == @params[:reg_param].shape[0]
|
56
|
+
n_outputs = y.shape[1]
|
57
|
+
@weight_vec = Numo::DFloat.zeros(n_samples, n_outputs)
|
58
|
+
n_outputs.times do |n|
|
59
|
+
reg_kernel_mat = x + Numo::DFloat.eye(n_samples) * @params[:reg_param][n]
|
60
|
+
@weight_vec[true, n] = Numo::Linalg.solve(reg_kernel_mat, y[true, n], driver: 'sym')
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
self
|
65
|
+
end
|
66
|
+
|
67
|
+
# Predict values for samples.
|
68
|
+
#
|
69
|
+
# @param x [Numo::DFloat] (shape: [n_testing_samples, n_training_samples])
|
70
|
+
# The kernel matrix between testing samples and training samples to predict values.
|
71
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted values per sample.
|
72
|
+
def predict(x)
|
73
|
+
check_sample_array(x)
|
74
|
+
x.dot(@weight_vec)
|
75
|
+
end
|
76
|
+
|
77
|
+
# Dump marshal data.
|
78
|
+
# @return [Hash] The marshal data.
|
79
|
+
def marshal_dump
|
80
|
+
{ params: @params,
|
81
|
+
weight_vec: @weight_vec }
|
82
|
+
end
|
83
|
+
|
84
|
+
# Load marshal data.
|
85
|
+
# @return [nil]
|
86
|
+
def marshal_load(obj)
|
87
|
+
@params = obj[:params]
|
88
|
+
@weight_vec = obj[:weight_vec]
|
89
|
+
nil
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
data/lib/rumale/version.rb
CHANGED
data/lib/rumale.rb
CHANGED
@@ -34,6 +34,8 @@ require 'rumale/linear_model/linear_regression'
|
|
34
34
|
require 'rumale/linear_model/ridge'
|
35
35
|
require 'rumale/linear_model/lasso'
|
36
36
|
require 'rumale/kernel_machine/kernel_svc'
|
37
|
+
require 'rumale/kernel_machine/kernel_pca'
|
38
|
+
require 'rumale/kernel_machine/kernel_ridge'
|
37
39
|
require 'rumale/polynomial_model/base_factorization_machine'
|
38
40
|
require 'rumale/polynomial_model/factorization_machine_classifier'
|
39
41
|
require 'rumale/polynomial_model/factorization_machine_regressor'
|
data/rumale.gemspec
CHANGED
@@ -17,10 +17,10 @@ Gem::Specification.new do |spec|
|
|
17
17
|
Rumale is a machine learning library in Ruby.
|
18
18
|
Rumale provides machine learning algorithms with interfaces similar to Scikit-Learn in Python.
|
19
19
|
Rumale currently supports Linear / Kernel Support Vector Machine,
|
20
|
-
Logistic Regression, Linear Regression, Ridge, Lasso, Factorization Machine,
|
20
|
+
Logistic Regression, Linear Regression, Ridge, Lasso, Kernel Ridge, Factorization Machine,
|
21
21
|
Naive Bayes, Decision Tree, AdaBoost, Gradient Tree Boosting, Random Forest, Extra-Trees, K-nearest neighbor algorithm,
|
22
22
|
K-Means, K-Medoids, Gaussian Mixture Model, DBSCAN, SNN, Power Iteration Clustering,
|
23
|
-
Multidimensional Scaling, t-SNE, Principal Component Analysis, and Non-negative Matrix Factorization.
|
23
|
+
Multidimensional Scaling, t-SNE, Principal Component Analysis, Kernel PCA, and Non-negative Matrix Factorization.
|
24
24
|
MSG
|
25
25
|
spec.homepage = 'https://github.com/yoshoku/rumale'
|
26
26
|
spec.license = 'BSD-2-Clause'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rumale
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.13.
|
4
|
+
version: 0.13.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-09-
|
11
|
+
date: 2019-09-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: numo-narray
|
@@ -126,10 +126,10 @@ description: |
|
|
126
126
|
Rumale is a machine learning library in Ruby.
|
127
127
|
Rumale provides machine learning algorithms with interfaces similar to Scikit-Learn in Python.
|
128
128
|
Rumale currently supports Linear / Kernel Support Vector Machine,
|
129
|
-
Logistic Regression, Linear Regression, Ridge, Lasso, Factorization Machine,
|
129
|
+
Logistic Regression, Linear Regression, Ridge, Lasso, Kernel Ridge, Factorization Machine,
|
130
130
|
Naive Bayes, Decision Tree, AdaBoost, Gradient Tree Boosting, Random Forest, Extra-Trees, K-nearest neighbor algorithm,
|
131
131
|
K-Means, K-Medoids, Gaussian Mixture Model, DBSCAN, SNN, Power Iteration Clustering,
|
132
|
-
Multidimensional Scaling, t-SNE, Principal Component Analysis, and Non-negative Matrix Factorization.
|
132
|
+
Multidimensional Scaling, t-SNE, Principal Component Analysis, Kernel PCA, and Non-negative Matrix Factorization.
|
133
133
|
email:
|
134
134
|
- yoshoku@outlook.com
|
135
135
|
executables: []
|
@@ -196,6 +196,8 @@ files:
|
|
196
196
|
- lib/rumale/evaluation_measure/recall.rb
|
197
197
|
- lib/rumale/evaluation_measure/roc_auc.rb
|
198
198
|
- lib/rumale/kernel_approximation/rbf.rb
|
199
|
+
- lib/rumale/kernel_machine/kernel_pca.rb
|
200
|
+
- lib/rumale/kernel_machine/kernel_ridge.rb
|
199
201
|
- lib/rumale/kernel_machine/kernel_svc.rb
|
200
202
|
- lib/rumale/linear_model/base_linear_model.rb
|
201
203
|
- lib/rumale/linear_model/lasso.rb
|