rumale 0.22.0 → 0.22.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +10 -10
- data/lib/rumale.rb +1 -0
- data/lib/rumale/kernel_machine/kernel_svc.rb +4 -3
- data/lib/rumale/linear_model/svc.rb +4 -3
- data/lib/rumale/linear_model/svr.rb +4 -3
- data/lib/rumale/metric_learning/mlkr.rb +161 -0
- data/lib/rumale/metric_learning/neighbourhood_component_analysis.rb +7 -4
- data/lib/rumale/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 475798da3815141b5337bc7803eca26978bbc98c36a2be4d681bc63f778f5840
|
4
|
+
data.tar.gz: 71841127edccbeea2b30c4bd8a744735933c1fcf8c7d6afa507cd2d361c8b5c8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8c90eaffa3847e3cc4f31c58e3d74f66a86e2cf0bd1c6e5aa386f9519de3984ac1c605187119e0ec01585c82cf8c06e4f4aa2f19c7b40883b784e834e2e801d8
|
7
|
+
data.tar.gz: 336b1afcc35e52e1c13ced74f527d54c994ff66509cf9b8b2f81dce62692078964a453df790a5bcaf8ea2bd156277d719a33958de2f57aa64d97beeafdc48d01
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,8 @@
|
|
1
|
+
# 0.22.1
|
2
|
+
- Add transfomer class for MLKR, that implements Metric Learning for Kernel Regression.
|
3
|
+
- Refactor NeighbourhoodComponentAnalysis.
|
4
|
+
- Update API documentation.
|
5
|
+
|
1
6
|
# 0.22.0
|
2
7
|
## Breaking change
|
3
8
|
- Add lbfgsb.rb gem to runtime dependencies. Rumale uses lbfgsb gem for optimization.
|
data/README.md
CHANGED
@@ -113,10 +113,10 @@ require 'rumale'
|
|
113
113
|
samples, labels = Rumale::Dataset.load_libsvm_file('pendigits')
|
114
114
|
|
115
115
|
# Define the estimator to be evaluated.
|
116
|
-
lr = Rumale::LinearModel::LogisticRegression.new
|
116
|
+
lr = Rumale::LinearModel::LogisticRegression.new
|
117
117
|
|
118
118
|
# Define the evaluation measure, splitting strategy, and cross validation.
|
119
|
-
ev = Rumale::EvaluationMeasure::
|
119
|
+
ev = Rumale::EvaluationMeasure::Accuracy.new
|
120
120
|
kf = Rumale::ModelSelection::StratifiedKFold.new(n_splits: 5, shuffle: true, random_seed: 1)
|
121
121
|
cv = Rumale::ModelSelection::CrossValidation.new(estimator: lr, splitter: kf, evaluator: ev)
|
122
122
|
|
@@ -124,15 +124,15 @@ cv = Rumale::ModelSelection::CrossValidation.new(estimator: lr, splitter: kf, ev
|
|
124
124
|
report = cv.perform(samples, labels)
|
125
125
|
|
126
126
|
# Output result.
|
127
|
-
|
128
|
-
puts
|
127
|
+
mean_accuracy = report[:test_score].sum / kf.n_splits
|
128
|
+
puts "5-CV mean accuracy: %.1f%%" % (100.0 * mean_accuracy)
|
129
129
|
```
|
130
130
|
|
131
131
|
Execution of the above scripts result in the following.
|
132
132
|
|
133
133
|
```bash
|
134
134
|
$ ruby cross_validation.rb
|
135
|
-
5-CV mean
|
135
|
+
5-CV mean accuracy: 95.4%
|
136
136
|
```
|
137
137
|
|
138
138
|
### Example 3. Pipeline
|
@@ -143,10 +143,10 @@ require 'rumale'
|
|
143
143
|
# Load dataset.
|
144
144
|
samples, labels = Rumale::Dataset.load_libsvm_file('pendigits')
|
145
145
|
|
146
|
-
# Construct pipeline with kernel approximation and
|
147
|
-
rbf = Rumale::KernelApproximation::RBF.new(gamma:
|
148
|
-
|
149
|
-
pipeline = Rumale::Pipeline::Pipeline.new(steps: { trns: rbf, clsf:
|
146
|
+
# Construct pipeline with kernel approximation and LogisticRegression.
|
147
|
+
rbf = Rumale::KernelApproximation::RBF.new(gamma: 1e-4, n_components: 800, random_seed: 1)
|
148
|
+
lr = Rumale::LinearModel::LogisticRegression.new(reg_param: 1e-3)
|
149
|
+
pipeline = Rumale::Pipeline::Pipeline.new(steps: { trns: rbf, clsf: lr })
|
150
150
|
|
151
151
|
# Define the splitting strategy and cross validation.
|
152
152
|
kf = Rumale::ModelSelection::StratifiedKFold.new(n_splits: 5, shuffle: true, random_seed: 1)
|
@@ -156,7 +156,7 @@ cv = Rumale::ModelSelection::CrossValidation.new(estimator: pipeline, splitter:
|
|
156
156
|
report = cv.perform(samples, labels)
|
157
157
|
|
158
158
|
# Output result.
|
159
|
-
mean_accuracy = report[:test_score].
|
159
|
+
mean_accuracy = report[:test_score].sum / kf.n_splits
|
160
160
|
puts("5-CV mean accuracy: %.1f %%" % (mean_accuracy * 100.0))
|
161
161
|
```
|
162
162
|
|
data/lib/rumale.rb
CHANGED
@@ -77,6 +77,7 @@ require 'rumale/manifold/tsne'
|
|
77
77
|
require 'rumale/manifold/mds'
|
78
78
|
require 'rumale/metric_learning/fisher_discriminant_analysis'
|
79
79
|
require 'rumale/metric_learning/neighbourhood_component_analysis'
|
80
|
+
require 'rumale/metric_learning/mlkr'
|
80
81
|
require 'rumale/neural_network/adam'
|
81
82
|
require 'rumale/neural_network/base_mlp'
|
82
83
|
require 'rumale/neural_network/mlp_regressor'
|
@@ -11,9 +11,10 @@ module Rumale
|
|
11
11
|
# with stochastic gradient descent (SGD) optimization.
|
12
12
|
# For multiclass classification problem, it uses one-vs-the-rest strategy.
|
13
13
|
#
|
14
|
-
#
|
15
|
-
#
|
16
|
-
#
|
14
|
+
# @note
|
15
|
+
# Rumale::SVM provides kernel support vector classifier based on LIBSVM.
|
16
|
+
# If you prefer execution speed, you should use Rumale::SVM::SVC.
|
17
|
+
# https://github.com/yoshoku/rumale-svm
|
17
18
|
#
|
18
19
|
# @example
|
19
20
|
# training_kernel_matrix = Rumale::PairwiseMetric::rbf_kernel(training_samples)
|
@@ -11,9 +11,10 @@ module Rumale
|
|
11
11
|
# with stochastic gradient descent optimization.
|
12
12
|
# For multiclass classification problem, it uses one-vs-the-rest strategy.
|
13
13
|
#
|
14
|
-
#
|
15
|
-
#
|
16
|
-
#
|
14
|
+
# @note
|
15
|
+
# Rumale::SVM provides linear support vector classifier based on LIBLINEAR.
|
16
|
+
# If you prefer execution speed, you should use Rumale::SVM::LinearSVC.
|
17
|
+
# https://github.com/yoshoku/rumale-svm
|
17
18
|
#
|
18
19
|
# @example
|
19
20
|
# estimator =
|
@@ -8,9 +8,10 @@ module Rumale
|
|
8
8
|
# SVR is a class that implements Support Vector Regressor
|
9
9
|
# with stochastic gradient descent optimization.
|
10
10
|
#
|
11
|
-
#
|
12
|
-
#
|
13
|
-
#
|
11
|
+
# @note
|
12
|
+
# Rumale::SVM provides linear and kernel support vector regressor based on LIBLINEAR and LIBSVM.
|
13
|
+
# If you prefer execution speed, you should use Rumale::SVM::LinearSVR.
|
14
|
+
# https://github.com/yoshoku/rumale-svm
|
14
15
|
#
|
15
16
|
# @example
|
16
17
|
# estimator =
|
@@ -0,0 +1,161 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/base_estimator'
|
4
|
+
require 'rumale/base/transformer'
|
5
|
+
require 'rumale/decomposition/pca'
|
6
|
+
require 'rumale/pairwise_metric'
|
7
|
+
require 'rumale/utils'
|
8
|
+
require 'lbfgsb'
|
9
|
+
|
10
|
+
module Rumale
|
11
|
+
module MetricLearning
|
12
|
+
# MLKR is a class that implements Metric Learning for Kernel Regression.
|
13
|
+
#
|
14
|
+
# @example
|
15
|
+
# transformer = Rumale::MetricLearning::MLKR.new
|
16
|
+
# transformer.fit(training_samples, traininig_target_values)
|
17
|
+
# low_samples = transformer.transform(testing_samples)
|
18
|
+
#
|
19
|
+
# *Reference*
|
20
|
+
# - Weinberger, K. Q. and Tesauro, G., "Metric Learning for Kernel Regression," Proc. AISTATS'07, pp. 612--629, 2007.
|
21
|
+
class MLKR
|
22
|
+
include Base::BaseEstimator
|
23
|
+
include Base::Transformer
|
24
|
+
|
25
|
+
# Returns the metric components.
|
26
|
+
# @return [Numo::DFloat] (shape: [n_components, n_features])
|
27
|
+
attr_reader :components
|
28
|
+
|
29
|
+
# Return the number of iterations run for optimization
|
30
|
+
# @return [Integer]
|
31
|
+
attr_reader :n_iter
|
32
|
+
|
33
|
+
# Return the random generator.
|
34
|
+
# @return [Random]
|
35
|
+
attr_reader :rng
|
36
|
+
|
37
|
+
# Create a new transformer with MLKR.
|
38
|
+
#
|
39
|
+
# @param n_components [Integer] The number of components.
|
40
|
+
# @param init [String] The initialization method for components ('random' or 'pca').
|
41
|
+
# @param max_iter [Integer] The maximum number of iterations.
|
42
|
+
# @param tol [Float] The tolerance of termination criterion.
|
43
|
+
# This value is given as tol / Lbfgsb::DBL_EPSILON to the factr argument of Lbfgsb.minimize method.
|
44
|
+
# @param verbose [Boolean] The flag indicating whether to output loss during iteration.
|
45
|
+
# If true is given, 'iterate.dat' file is generated by lbfgsb.rb.
|
46
|
+
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
47
|
+
def initialize(n_components: nil, init: 'random', max_iter: 100, tol: 1e-6, verbose: false, random_seed: nil)
|
48
|
+
check_params_numeric_or_nil(n_components: n_components, random_seed: random_seed)
|
49
|
+
check_params_numeric(max_iter: max_iter, tol: tol)
|
50
|
+
check_params_string(init: init)
|
51
|
+
check_params_boolean(verbose: verbose)
|
52
|
+
@params = {}
|
53
|
+
@params[:n_components] = n_components
|
54
|
+
@params[:init] = init
|
55
|
+
@params[:max_iter] = max_iter
|
56
|
+
@params[:tol] = tol
|
57
|
+
@params[:verbose] = verbose
|
58
|
+
@params[:random_seed] = random_seed
|
59
|
+
@params[:random_seed] ||= srand
|
60
|
+
@components = nil
|
61
|
+
@n_iter = nil
|
62
|
+
@rng = Random.new(@params[:random_seed])
|
63
|
+
end
|
64
|
+
|
65
|
+
# Fit the model with given training data.
|
66
|
+
#
|
67
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
68
|
+
# @param y [Numo::DFloat] (shape: [n_samples]) The target values to be used for fitting the model.
|
69
|
+
# @return [MLKR] The learned classifier itself.
|
70
|
+
def fit(x, y)
|
71
|
+
x = check_convert_sample_array(x)
|
72
|
+
y = check_convert_tvalue_array(y)
|
73
|
+
check_sample_tvalue_size(x, y)
|
74
|
+
n_features = x.shape[1]
|
75
|
+
n_components = if @params[:n_components].nil?
|
76
|
+
n_features
|
77
|
+
else
|
78
|
+
[n_features, @params[:n_components]].min
|
79
|
+
end
|
80
|
+
@components, @n_iter = optimize_components(x, y, n_features, n_components)
|
81
|
+
@prototypes = x.dot(@components.transpose)
|
82
|
+
@values = y
|
83
|
+
self
|
84
|
+
end
|
85
|
+
|
86
|
+
# Fit the model with training data, and then transform them with the learned model.
|
87
|
+
#
|
88
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
89
|
+
# @param y [Numo::DFloat] (shape: [n_samples]) The target values to be used for fitting the model.
|
90
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data
|
91
|
+
def fit_transform(x, y)
|
92
|
+
x = check_convert_sample_array(x)
|
93
|
+
y = check_convert_tvalue_array(y)
|
94
|
+
check_sample_tvalue_size(x, y)
|
95
|
+
fit(x, y).transform(x)
|
96
|
+
end
|
97
|
+
|
98
|
+
# Transform the given data with the learned model.
|
99
|
+
#
|
100
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be transformed with the learned model.
|
101
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data.
|
102
|
+
def transform(x)
|
103
|
+
x = check_convert_sample_array(x)
|
104
|
+
x.dot(@components.transpose)
|
105
|
+
end
|
106
|
+
|
107
|
+
private
|
108
|
+
|
109
|
+
def init_components(x, n_features, n_components)
|
110
|
+
if @params[:init] == 'pca'
|
111
|
+
pca = Rumale::Decomposition::PCA.new(n_components: n_components)
|
112
|
+
pca.fit(x).components.flatten.dup
|
113
|
+
else
|
114
|
+
Rumale::Utils.rand_normal([n_features, n_components], @rng.dup).flatten.dup
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
def optimize_components(x, y, n_features, n_components)
|
119
|
+
# initialize components.
|
120
|
+
comp_init = init_components(x, n_features, n_components)
|
121
|
+
# initialize optimization results.
|
122
|
+
res = {}
|
123
|
+
res[:x] = comp_init
|
124
|
+
res[:n_iter] = 0
|
125
|
+
# perform optimization.
|
126
|
+
verbose = @params[:verbose] ? 1 : -1
|
127
|
+
res = Lbfgsb.minimize(
|
128
|
+
fnc: method(:mlkr_fnc), jcb: true, x_init: comp_init, args: [x, y],
|
129
|
+
maxiter: @params[:max_iter], factr: @params[:tol] / Lbfgsb::DBL_EPSILON, verbose: verbose
|
130
|
+
)
|
131
|
+
# return the results.
|
132
|
+
n_iter = res[:n_iter]
|
133
|
+
comps = n_components == 1 ? res[:x].dup : res[:x].reshape(n_components, n_features)
|
134
|
+
[comps, n_iter]
|
135
|
+
end
|
136
|
+
|
137
|
+
def mlkr_fnc(w, x, y)
|
138
|
+
# initialize some variables.
|
139
|
+
n_features = x.shape[1]
|
140
|
+
n_components = w.size / n_features
|
141
|
+
# projection.
|
142
|
+
w = w.reshape(n_components, n_features)
|
143
|
+
z = x.dot(w.transpose)
|
144
|
+
# predict values.
|
145
|
+
kernel_mat = Numo::NMath.exp(-Rumale::PairwiseMetric.squared_error(z))
|
146
|
+
kernel_mat[kernel_mat.diag_indices] = 0.0
|
147
|
+
norm = kernel_mat.sum(1)
|
148
|
+
norm[norm.eq(0)] = 1
|
149
|
+
y_pred = kernel_mat.dot(y) / norm
|
150
|
+
# calculate loss.
|
151
|
+
y_diff = y_pred - y
|
152
|
+
loss = (y_diff**2).sum
|
153
|
+
# calculate gradient.
|
154
|
+
weight_mat = y_diff * y_diff.expand_dims(1) * kernel_mat
|
155
|
+
weight_mat = weight_mat.sum(0).diag - weight_mat
|
156
|
+
gradient = 8 * z.transpose.dot(weight_mat).dot(x)
|
157
|
+
[loss, gradient.flatten.dup]
|
158
|
+
end
|
159
|
+
end
|
160
|
+
end
|
161
|
+
end
|
@@ -2,6 +2,8 @@
|
|
2
2
|
|
3
3
|
require 'rumale/base/base_estimator'
|
4
4
|
require 'rumale/base/transformer'
|
5
|
+
require 'rumale/utils'
|
6
|
+
require 'rumale/pairwise_metric'
|
5
7
|
require 'lbfgsb'
|
6
8
|
|
7
9
|
module Rumale
|
@@ -146,10 +148,11 @@ module Rumale
|
|
146
148
|
mask_mat = y.expand_dims(1).eq(y)
|
147
149
|
masked_prob_mat = prob_mat * mask_mat
|
148
150
|
loss = n_samples - masked_prob_mat.sum
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
151
|
+
sum_probs = masked_prob_mat.sum(1)
|
152
|
+
weight_mat = (sum_probs.expand_dims(1) * prob_mat - masked_prob_mat)
|
153
|
+
weight_mat += weight_mat.transpose
|
154
|
+
weight_mat = weight_mat.sum(0).diag - weight_mat
|
155
|
+
gradient = -2 * z.transpose.dot(weight_mat).dot(x)
|
153
156
|
[loss, gradient.flatten.dup]
|
154
157
|
end
|
155
158
|
|
data/lib/rumale/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rumale
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.22.
|
4
|
+
version: 0.22.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-12-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: numo-narray
|
@@ -145,6 +145,7 @@ files:
|
|
145
145
|
- lib/rumale/manifold/mds.rb
|
146
146
|
- lib/rumale/manifold/tsne.rb
|
147
147
|
- lib/rumale/metric_learning/fisher_discriminant_analysis.rb
|
148
|
+
- lib/rumale/metric_learning/mlkr.rb
|
148
149
|
- lib/rumale/metric_learning/neighbourhood_component_analysis.rb
|
149
150
|
- lib/rumale/model_selection/cross_validation.rb
|
150
151
|
- lib/rumale/model_selection/function.rb
|