rumale 0.22.0 → 0.22.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +10 -10
- data/lib/rumale.rb +1 -0
- data/lib/rumale/kernel_machine/kernel_svc.rb +4 -3
- data/lib/rumale/linear_model/svc.rb +4 -3
- data/lib/rumale/linear_model/svr.rb +4 -3
- data/lib/rumale/metric_learning/mlkr.rb +161 -0
- data/lib/rumale/metric_learning/neighbourhood_component_analysis.rb +7 -4
- data/lib/rumale/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 475798da3815141b5337bc7803eca26978bbc98c36a2be4d681bc63f778f5840
|
4
|
+
data.tar.gz: 71841127edccbeea2b30c4bd8a744735933c1fcf8c7d6afa507cd2d361c8b5c8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8c90eaffa3847e3cc4f31c58e3d74f66a86e2cf0bd1c6e5aa386f9519de3984ac1c605187119e0ec01585c82cf8c06e4f4aa2f19c7b40883b784e834e2e801d8
|
7
|
+
data.tar.gz: 336b1afcc35e52e1c13ced74f527d54c994ff66509cf9b8b2f81dce62692078964a453df790a5bcaf8ea2bd156277d719a33958de2f57aa64d97beeafdc48d01
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,8 @@
|
|
1
|
+
# 0.22.1
|
2
|
+
- Add transfomer class for MLKR, that implements Metric Learning for Kernel Regression.
|
3
|
+
- Refactor NeighbourhoodComponentAnalysis.
|
4
|
+
- Update API documentation.
|
5
|
+
|
1
6
|
# 0.22.0
|
2
7
|
## Breaking change
|
3
8
|
- Add lbfgsb.rb gem to runtime dependencies. Rumale uses lbfgsb gem for optimization.
|
data/README.md
CHANGED
@@ -113,10 +113,10 @@ require 'rumale'
|
|
113
113
|
samples, labels = Rumale::Dataset.load_libsvm_file('pendigits')
|
114
114
|
|
115
115
|
# Define the estimator to be evaluated.
|
116
|
-
lr = Rumale::LinearModel::LogisticRegression.new
|
116
|
+
lr = Rumale::LinearModel::LogisticRegression.new
|
117
117
|
|
118
118
|
# Define the evaluation measure, splitting strategy, and cross validation.
|
119
|
-
ev = Rumale::EvaluationMeasure::
|
119
|
+
ev = Rumale::EvaluationMeasure::Accuracy.new
|
120
120
|
kf = Rumale::ModelSelection::StratifiedKFold.new(n_splits: 5, shuffle: true, random_seed: 1)
|
121
121
|
cv = Rumale::ModelSelection::CrossValidation.new(estimator: lr, splitter: kf, evaluator: ev)
|
122
122
|
|
@@ -124,15 +124,15 @@ cv = Rumale::ModelSelection::CrossValidation.new(estimator: lr, splitter: kf, ev
|
|
124
124
|
report = cv.perform(samples, labels)
|
125
125
|
|
126
126
|
# Output result.
|
127
|
-
|
128
|
-
puts
|
127
|
+
mean_accuracy = report[:test_score].sum / kf.n_splits
|
128
|
+
puts "5-CV mean accuracy: %.1f%%" % (100.0 * mean_accuracy)
|
129
129
|
```
|
130
130
|
|
131
131
|
Execution of the above scripts result in the following.
|
132
132
|
|
133
133
|
```bash
|
134
134
|
$ ruby cross_validation.rb
|
135
|
-
5-CV mean
|
135
|
+
5-CV mean accuracy: 95.4%
|
136
136
|
```
|
137
137
|
|
138
138
|
### Example 3. Pipeline
|
@@ -143,10 +143,10 @@ require 'rumale'
|
|
143
143
|
# Load dataset.
|
144
144
|
samples, labels = Rumale::Dataset.load_libsvm_file('pendigits')
|
145
145
|
|
146
|
-
# Construct pipeline with kernel approximation and
|
147
|
-
rbf = Rumale::KernelApproximation::RBF.new(gamma:
|
148
|
-
|
149
|
-
pipeline = Rumale::Pipeline::Pipeline.new(steps: { trns: rbf, clsf:
|
146
|
+
# Construct pipeline with kernel approximation and LogisticRegression.
|
147
|
+
rbf = Rumale::KernelApproximation::RBF.new(gamma: 1e-4, n_components: 800, random_seed: 1)
|
148
|
+
lr = Rumale::LinearModel::LogisticRegression.new(reg_param: 1e-3)
|
149
|
+
pipeline = Rumale::Pipeline::Pipeline.new(steps: { trns: rbf, clsf: lr })
|
150
150
|
|
151
151
|
# Define the splitting strategy and cross validation.
|
152
152
|
kf = Rumale::ModelSelection::StratifiedKFold.new(n_splits: 5, shuffle: true, random_seed: 1)
|
@@ -156,7 +156,7 @@ cv = Rumale::ModelSelection::CrossValidation.new(estimator: pipeline, splitter:
|
|
156
156
|
report = cv.perform(samples, labels)
|
157
157
|
|
158
158
|
# Output result.
|
159
|
-
mean_accuracy = report[:test_score].
|
159
|
+
mean_accuracy = report[:test_score].sum / kf.n_splits
|
160
160
|
puts("5-CV mean accuracy: %.1f %%" % (mean_accuracy * 100.0))
|
161
161
|
```
|
162
162
|
|
data/lib/rumale.rb
CHANGED
@@ -77,6 +77,7 @@ require 'rumale/manifold/tsne'
|
|
77
77
|
require 'rumale/manifold/mds'
|
78
78
|
require 'rumale/metric_learning/fisher_discriminant_analysis'
|
79
79
|
require 'rumale/metric_learning/neighbourhood_component_analysis'
|
80
|
+
require 'rumale/metric_learning/mlkr'
|
80
81
|
require 'rumale/neural_network/adam'
|
81
82
|
require 'rumale/neural_network/base_mlp'
|
82
83
|
require 'rumale/neural_network/mlp_regressor'
|
@@ -11,9 +11,10 @@ module Rumale
|
|
11
11
|
# with stochastic gradient descent (SGD) optimization.
|
12
12
|
# For multiclass classification problem, it uses one-vs-the-rest strategy.
|
13
13
|
#
|
14
|
-
#
|
15
|
-
#
|
16
|
-
#
|
14
|
+
# @note
|
15
|
+
# Rumale::SVM provides kernel support vector classifier based on LIBSVM.
|
16
|
+
# If you prefer execution speed, you should use Rumale::SVM::SVC.
|
17
|
+
# https://github.com/yoshoku/rumale-svm
|
17
18
|
#
|
18
19
|
# @example
|
19
20
|
# training_kernel_matrix = Rumale::PairwiseMetric::rbf_kernel(training_samples)
|
@@ -11,9 +11,10 @@ module Rumale
|
|
11
11
|
# with stochastic gradient descent optimization.
|
12
12
|
# For multiclass classification problem, it uses one-vs-the-rest strategy.
|
13
13
|
#
|
14
|
-
#
|
15
|
-
#
|
16
|
-
#
|
14
|
+
# @note
|
15
|
+
# Rumale::SVM provides linear support vector classifier based on LIBLINEAR.
|
16
|
+
# If you prefer execution speed, you should use Rumale::SVM::LinearSVC.
|
17
|
+
# https://github.com/yoshoku/rumale-svm
|
17
18
|
#
|
18
19
|
# @example
|
19
20
|
# estimator =
|
@@ -8,9 +8,10 @@ module Rumale
|
|
8
8
|
# SVR is a class that implements Support Vector Regressor
|
9
9
|
# with stochastic gradient descent optimization.
|
10
10
|
#
|
11
|
-
#
|
12
|
-
#
|
13
|
-
#
|
11
|
+
# @note
|
12
|
+
# Rumale::SVM provides linear and kernel support vector regressor based on LIBLINEAR and LIBSVM.
|
13
|
+
# If you prefer execution speed, you should use Rumale::SVM::LinearSVR.
|
14
|
+
# https://github.com/yoshoku/rumale-svm
|
14
15
|
#
|
15
16
|
# @example
|
16
17
|
# estimator =
|
@@ -0,0 +1,161 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/base_estimator'
|
4
|
+
require 'rumale/base/transformer'
|
5
|
+
require 'rumale/decomposition/pca'
|
6
|
+
require 'rumale/pairwise_metric'
|
7
|
+
require 'rumale/utils'
|
8
|
+
require 'lbfgsb'
|
9
|
+
|
10
|
+
module Rumale
|
11
|
+
module MetricLearning
|
12
|
+
# MLKR is a class that implements Metric Learning for Kernel Regression.
|
13
|
+
#
|
14
|
+
# @example
|
15
|
+
# transformer = Rumale::MetricLearning::MLKR.new
|
16
|
+
# transformer.fit(training_samples, traininig_target_values)
|
17
|
+
# low_samples = transformer.transform(testing_samples)
|
18
|
+
#
|
19
|
+
# *Reference*
|
20
|
+
# - Weinberger, K. Q. and Tesauro, G., "Metric Learning for Kernel Regression," Proc. AISTATS'07, pp. 612--629, 2007.
|
21
|
+
class MLKR
|
22
|
+
include Base::BaseEstimator
|
23
|
+
include Base::Transformer
|
24
|
+
|
25
|
+
# Returns the metric components.
|
26
|
+
# @return [Numo::DFloat] (shape: [n_components, n_features])
|
27
|
+
attr_reader :components
|
28
|
+
|
29
|
+
# Return the number of iterations run for optimization
|
30
|
+
# @return [Integer]
|
31
|
+
attr_reader :n_iter
|
32
|
+
|
33
|
+
# Return the random generator.
|
34
|
+
# @return [Random]
|
35
|
+
attr_reader :rng
|
36
|
+
|
37
|
+
# Create a new transformer with MLKR.
|
38
|
+
#
|
39
|
+
# @param n_components [Integer] The number of components.
|
40
|
+
# @param init [String] The initialization method for components ('random' or 'pca').
|
41
|
+
# @param max_iter [Integer] The maximum number of iterations.
|
42
|
+
# @param tol [Float] The tolerance of termination criterion.
|
43
|
+
# This value is given as tol / Lbfgsb::DBL_EPSILON to the factr argument of Lbfgsb.minimize method.
|
44
|
+
# @param verbose [Boolean] The flag indicating whether to output loss during iteration.
|
45
|
+
# If true is given, 'iterate.dat' file is generated by lbfgsb.rb.
|
46
|
+
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
47
|
+
def initialize(n_components: nil, init: 'random', max_iter: 100, tol: 1e-6, verbose: false, random_seed: nil)
|
48
|
+
check_params_numeric_or_nil(n_components: n_components, random_seed: random_seed)
|
49
|
+
check_params_numeric(max_iter: max_iter, tol: tol)
|
50
|
+
check_params_string(init: init)
|
51
|
+
check_params_boolean(verbose: verbose)
|
52
|
+
@params = {}
|
53
|
+
@params[:n_components] = n_components
|
54
|
+
@params[:init] = init
|
55
|
+
@params[:max_iter] = max_iter
|
56
|
+
@params[:tol] = tol
|
57
|
+
@params[:verbose] = verbose
|
58
|
+
@params[:random_seed] = random_seed
|
59
|
+
@params[:random_seed] ||= srand
|
60
|
+
@components = nil
|
61
|
+
@n_iter = nil
|
62
|
+
@rng = Random.new(@params[:random_seed])
|
63
|
+
end
|
64
|
+
|
65
|
+
# Fit the model with given training data.
|
66
|
+
#
|
67
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
68
|
+
# @param y [Numo::DFloat] (shape: [n_samples]) The target values to be used for fitting the model.
|
69
|
+
# @return [MLKR] The learned classifier itself.
|
70
|
+
def fit(x, y)
|
71
|
+
x = check_convert_sample_array(x)
|
72
|
+
y = check_convert_tvalue_array(y)
|
73
|
+
check_sample_tvalue_size(x, y)
|
74
|
+
n_features = x.shape[1]
|
75
|
+
n_components = if @params[:n_components].nil?
|
76
|
+
n_features
|
77
|
+
else
|
78
|
+
[n_features, @params[:n_components]].min
|
79
|
+
end
|
80
|
+
@components, @n_iter = optimize_components(x, y, n_features, n_components)
|
81
|
+
@prototypes = x.dot(@components.transpose)
|
82
|
+
@values = y
|
83
|
+
self
|
84
|
+
end
|
85
|
+
|
86
|
+
# Fit the model with training data, and then transform them with the learned model.
|
87
|
+
#
|
88
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
89
|
+
# @param y [Numo::DFloat] (shape: [n_samples]) The target values to be used for fitting the model.
|
90
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data
|
91
|
+
def fit_transform(x, y)
|
92
|
+
x = check_convert_sample_array(x)
|
93
|
+
y = check_convert_tvalue_array(y)
|
94
|
+
check_sample_tvalue_size(x, y)
|
95
|
+
fit(x, y).transform(x)
|
96
|
+
end
|
97
|
+
|
98
|
+
# Transform the given data with the learned model.
|
99
|
+
#
|
100
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be transformed with the learned model.
|
101
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data.
|
102
|
+
def transform(x)
|
103
|
+
x = check_convert_sample_array(x)
|
104
|
+
x.dot(@components.transpose)
|
105
|
+
end
|
106
|
+
|
107
|
+
private
|
108
|
+
|
109
|
+
def init_components(x, n_features, n_components)
|
110
|
+
if @params[:init] == 'pca'
|
111
|
+
pca = Rumale::Decomposition::PCA.new(n_components: n_components)
|
112
|
+
pca.fit(x).components.flatten.dup
|
113
|
+
else
|
114
|
+
Rumale::Utils.rand_normal([n_features, n_components], @rng.dup).flatten.dup
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
def optimize_components(x, y, n_features, n_components)
|
119
|
+
# initialize components.
|
120
|
+
comp_init = init_components(x, n_features, n_components)
|
121
|
+
# initialize optimization results.
|
122
|
+
res = {}
|
123
|
+
res[:x] = comp_init
|
124
|
+
res[:n_iter] = 0
|
125
|
+
# perform optimization.
|
126
|
+
verbose = @params[:verbose] ? 1 : -1
|
127
|
+
res = Lbfgsb.minimize(
|
128
|
+
fnc: method(:mlkr_fnc), jcb: true, x_init: comp_init, args: [x, y],
|
129
|
+
maxiter: @params[:max_iter], factr: @params[:tol] / Lbfgsb::DBL_EPSILON, verbose: verbose
|
130
|
+
)
|
131
|
+
# return the results.
|
132
|
+
n_iter = res[:n_iter]
|
133
|
+
comps = n_components == 1 ? res[:x].dup : res[:x].reshape(n_components, n_features)
|
134
|
+
[comps, n_iter]
|
135
|
+
end
|
136
|
+
|
137
|
+
def mlkr_fnc(w, x, y)
|
138
|
+
# initialize some variables.
|
139
|
+
n_features = x.shape[1]
|
140
|
+
n_components = w.size / n_features
|
141
|
+
# projection.
|
142
|
+
w = w.reshape(n_components, n_features)
|
143
|
+
z = x.dot(w.transpose)
|
144
|
+
# predict values.
|
145
|
+
kernel_mat = Numo::NMath.exp(-Rumale::PairwiseMetric.squared_error(z))
|
146
|
+
kernel_mat[kernel_mat.diag_indices] = 0.0
|
147
|
+
norm = kernel_mat.sum(1)
|
148
|
+
norm[norm.eq(0)] = 1
|
149
|
+
y_pred = kernel_mat.dot(y) / norm
|
150
|
+
# calculate loss.
|
151
|
+
y_diff = y_pred - y
|
152
|
+
loss = (y_diff**2).sum
|
153
|
+
# calculate gradient.
|
154
|
+
weight_mat = y_diff * y_diff.expand_dims(1) * kernel_mat
|
155
|
+
weight_mat = weight_mat.sum(0).diag - weight_mat
|
156
|
+
gradient = 8 * z.transpose.dot(weight_mat).dot(x)
|
157
|
+
[loss, gradient.flatten.dup]
|
158
|
+
end
|
159
|
+
end
|
160
|
+
end
|
161
|
+
end
|
@@ -2,6 +2,8 @@
|
|
2
2
|
|
3
3
|
require 'rumale/base/base_estimator'
|
4
4
|
require 'rumale/base/transformer'
|
5
|
+
require 'rumale/utils'
|
6
|
+
require 'rumale/pairwise_metric'
|
5
7
|
require 'lbfgsb'
|
6
8
|
|
7
9
|
module Rumale
|
@@ -146,10 +148,11 @@ module Rumale
|
|
146
148
|
mask_mat = y.expand_dims(1).eq(y)
|
147
149
|
masked_prob_mat = prob_mat * mask_mat
|
148
150
|
loss = n_samples - masked_prob_mat.sum
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
151
|
+
sum_probs = masked_prob_mat.sum(1)
|
152
|
+
weight_mat = (sum_probs.expand_dims(1) * prob_mat - masked_prob_mat)
|
153
|
+
weight_mat += weight_mat.transpose
|
154
|
+
weight_mat = weight_mat.sum(0).diag - weight_mat
|
155
|
+
gradient = -2 * z.transpose.dot(weight_mat).dot(x)
|
153
156
|
[loss, gradient.flatten.dup]
|
154
157
|
end
|
155
158
|
|
data/lib/rumale/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rumale
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.22.
|
4
|
+
version: 0.22.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-12-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: numo-narray
|
@@ -145,6 +145,7 @@ files:
|
|
145
145
|
- lib/rumale/manifold/mds.rb
|
146
146
|
- lib/rumale/manifold/tsne.rb
|
147
147
|
- lib/rumale/metric_learning/fisher_discriminant_analysis.rb
|
148
|
+
- lib/rumale/metric_learning/mlkr.rb
|
148
149
|
- lib/rumale/metric_learning/neighbourhood_component_analysis.rb
|
149
150
|
- lib/rumale/model_selection/cross_validation.rb
|
150
151
|
- lib/rumale/model_selection/function.rb
|