rumale 0.17.1 → 0.17.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/README.md +4 -5
- data/lib/rumale.rb +1 -0
- data/lib/rumale/kernel_approximation/nystroem.rb +106 -0
- data/lib/rumale/pipeline/pipeline.rb +0 -25
- data/lib/rumale/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: '09585216da740231269c5675c48a8fa2ee3a8aba3f5c5b7d671429d113aaa4ba'
|
4
|
+
data.tar.gz: 66eb473c718f6a03938db19df06694373c781e098d68e2f2c20162865dc4f9f8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d68eaa297116c4b834cbf2745355d07e4e4b0e50b170c631c417f623d7a0e75a515389903b6e2cb6f22355f9cf5eded4968eac5c7f216f5a98698bb5283c2e00
|
7
|
+
data.tar.gz: ddb3be08dc88cc99f5c7086e8bea83496f2644316d382f052a954ce6d1056bc2aabfbd2664ff572a55fdb1017a9bbb0d790d4d83b3788b781d133a97d26bd92e
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,7 @@
|
|
1
|
+
# 0.17.2
|
2
|
+
- Add transformer class for kernel approximation with [Nystroem](https://yoshoku.github.io/rumale/doc/Rumale/KernelApproximation/Nystroem.html) method.
|
3
|
+
- Delete array validation on [Pipeline](https://yoshoku.github.io/rumale/doc/Rumale/Pipeline/Pipeline.html) class considering that array of hash is given to HashVectorizer.
|
4
|
+
|
1
5
|
# 0.17.1
|
2
6
|
- Add transformer class for [PolynomialFeatures](https://yoshoku.github.io/rumale/doc/Rumale/Preprocessing/PolynomialFeatures.html)
|
3
7
|
- Add verbose and tol parameter to [FactorizationMachineClassifier](https://yoshoku.github.io/rumale/doc/Rumale/PolynomialModel/FactorizationMachineClassifier.html)
|
data/README.md
CHANGED
@@ -50,22 +50,21 @@ First, let's classify simple xor data.
|
|
50
50
|
require 'rumale'
|
51
51
|
|
52
52
|
# Prepare XOR data.
|
53
|
-
|
53
|
+
samples = [[0, 0], [0, 1], [1, 0], [1, 1]]
|
54
54
|
labels = [0, 1, 1, 0]
|
55
55
|
|
56
56
|
# Train classifier with nearest neighbor rule.
|
57
57
|
estimator = Rumale::NearestNeighbors::KNeighborsClassifier.new(n_neighbors: 1)
|
58
|
-
estimator.fit(
|
58
|
+
estimator.fit(samples, labels)
|
59
59
|
|
60
60
|
# Predict labels.
|
61
|
-
p
|
62
|
-
p estimator.predict(
|
61
|
+
p labels
|
62
|
+
p estimator.predict(samples)
|
63
63
|
```
|
64
64
|
|
65
65
|
Execution of the above script result in the following.
|
66
66
|
|
67
67
|
```ruby
|
68
|
-
Numo::Int32#shape=[4]
|
69
68
|
[0, 1, 1, 0]
|
70
69
|
Numo::Int32#shape=[4]
|
71
70
|
[0, 1, 1, 0]
|
data/lib/rumale.rb
CHANGED
@@ -26,6 +26,7 @@ require 'rumale/optimizer/nadam'
|
|
26
26
|
require 'rumale/optimizer/yellow_fin'
|
27
27
|
require 'rumale/pipeline/pipeline'
|
28
28
|
require 'rumale/kernel_approximation/rbf'
|
29
|
+
require 'rumale/kernel_approximation/nystroem'
|
29
30
|
require 'rumale/linear_model/base_linear_model'
|
30
31
|
require 'rumale/linear_model/base_sgd'
|
31
32
|
require 'rumale/linear_model/svc'
|
@@ -0,0 +1,106 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/base_estimator'
|
4
|
+
require 'rumale/base/transformer'
|
5
|
+
require 'rumale/pairwise_metric'
|
6
|
+
|
7
|
+
module Rumale
|
8
|
+
module KernelApproximation
|
9
|
+
# Nystroem is a class that implements feature mapping with Nystroem method.
|
10
|
+
#
|
11
|
+
# @example
|
12
|
+
# require 'numo/linalg/autoloader'
|
13
|
+
#
|
14
|
+
# transformer = Rumale::KernelApproximation::Nystroem.new(gamma: 1, n_components: 128, random_seed: 1)
|
15
|
+
# new_training_samples = transformer.fit_transform(training_samples)
|
16
|
+
# new_testing_samples = transformer.transform(testing_samples)
|
17
|
+
#
|
18
|
+
# *Reference*
|
19
|
+
# 1. T. Yang, Y. Li, M. Mahdavi, R. Jin, and Z-H. Zhou, "Nystrom Method vs Random Fourier Features: A Theoretical and Empirical Comparison," Advances in NIPS'12, Vol. 1, pp. 476--484, 2012.
|
20
|
+
class Nystroem
|
21
|
+
include Base::BaseEstimator
|
22
|
+
include Base::Transformer
|
23
|
+
|
24
|
+
# Returns the randomly sampled training data for feature mapping.
|
25
|
+
# @return [Numo::DFloat] (shape: n_components, n_features])
|
26
|
+
attr_reader :components
|
27
|
+
|
28
|
+
# Returns the indices sampled training data.
|
29
|
+
# @return [Numo::Int32] (shape: [n_components])
|
30
|
+
attr_reader :component_indices
|
31
|
+
|
32
|
+
# Returns the normalizing factors.
|
33
|
+
# @return [Numo::DFloat] (shape: [n_components, n_components])
|
34
|
+
attr_reader :normalizer
|
35
|
+
|
36
|
+
# Return the random generator for transformation.
|
37
|
+
# @return [Random]
|
38
|
+
attr_reader :rng
|
39
|
+
|
40
|
+
# Create a new transformer for mapping to kernel feature space with Nystrom method.
|
41
|
+
#
|
42
|
+
# @param kernel [String] The type of kernel. This parameter is ignored in the current implementation.
|
43
|
+
# @param gamma [Float] The parameter of RBF kernel: exp(-gamma * x^2).
|
44
|
+
# @param n_components [Integer] The number of dimensions of the RBF kernel feature space.
|
45
|
+
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
46
|
+
def initialize(kernel: 'rbf', gamma: 1, n_components: 100, random_seed: nil)
|
47
|
+
check_params_numeric(gamma: gamma, n_components: n_components)
|
48
|
+
check_params_numeric_or_nil(random_seed: random_seed)
|
49
|
+
@params = method(:initialize).parameters.map { |_t, arg| [arg, binding.local_variable_get(arg)] }.to_h
|
50
|
+
@params[:random_seed] ||= srand
|
51
|
+
@rng = Random.new(@params[:random_seed])
|
52
|
+
@component_indices = nil
|
53
|
+
@components = nil
|
54
|
+
@normalizer = nil
|
55
|
+
end
|
56
|
+
|
57
|
+
# Fit the model with given training data.
|
58
|
+
#
|
59
|
+
# @overload fit(x) -> RBF
|
60
|
+
# @param x [Numo::NArray] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
61
|
+
# @return [Nystroem] The learned transformer itself.
|
62
|
+
def fit(x, _y = nil)
|
63
|
+
x = check_convert_sample_array(x)
|
64
|
+
raise 'Nystroem#fit requires Numo::Linalg but that is not loaded.' unless enable_linalg?
|
65
|
+
|
66
|
+
# initialize some variables.
|
67
|
+
sub_rng = @rng.dup
|
68
|
+
n_samples = x.shape[0]
|
69
|
+
n_components = [1, [@params[:n_components], n_samples].min].max
|
70
|
+
|
71
|
+
# random sampling.
|
72
|
+
@component_indices = Numo::Int32.cast([*0...n_samples].shuffle(random: sub_rng)[0...n_components])
|
73
|
+
@components = x[@component_indices, true]
|
74
|
+
|
75
|
+
# calculate normalizing factor.
|
76
|
+
kernel_mat = Rumale::PairwiseMetric.rbf_kernel(@components, nil, @params[:gamma])
|
77
|
+
eig_vals, eig_vecs = Numo::Linalg.eigh(kernel_mat)
|
78
|
+
la = eig_vals.class.maximum(eig_vals.reverse, 1e-12)
|
79
|
+
u = eig_vecs.reverse(1)
|
80
|
+
@normalizer = u.dot((1.0 / Numo::NMath.sqrt(la)).diag)
|
81
|
+
|
82
|
+
self
|
83
|
+
end
|
84
|
+
|
85
|
+
# Fit the model with training data, and then transform them with the learned model.
|
86
|
+
#
|
87
|
+
# @overload fit_transform(x) -> Numo::DFloat
|
88
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
89
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data
|
90
|
+
def fit_transform(x, _y = nil)
|
91
|
+
x = check_convert_sample_array(x)
|
92
|
+
fit(x).transform(x)
|
93
|
+
end
|
94
|
+
|
95
|
+
# Transform the given data with the learned model.
|
96
|
+
#
|
97
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be transformed with the learned model.
|
98
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data.
|
99
|
+
def transform(x)
|
100
|
+
x = check_convert_sample_array(x)
|
101
|
+
z = Rumale::PairwiseMetric.rbf_kernel(x, @components, @params[:gamma])
|
102
|
+
z.dot(@normalizer)
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
@@ -40,7 +40,6 @@ module Rumale
|
|
40
40
|
# @param y [Numo::NArray] (shape: [n_samples, n_outputs]) The target values or labels to be used for fitting the model.
|
41
41
|
# @return [Pipeline] The learned pipeline itself.
|
42
42
|
def fit(x, y)
|
43
|
-
x = check_convert_sample_array(x)
|
44
43
|
trans_x = apply_transforms(x, y, fit: true)
|
45
44
|
last_estimator&.fit(trans_x, y)
|
46
45
|
self
|
@@ -52,7 +51,6 @@ module Rumale
|
|
52
51
|
# @param y [Numo::NArray] (shape: [n_samples, n_outputs], default: nil) The target values or labels to be used for fitting the model.
|
53
52
|
# @return [Numo::NArray] The predicted results by last estimator.
|
54
53
|
def fit_predict(x, y = nil)
|
55
|
-
x = check_convert_sample_array(x)
|
56
54
|
trans_x = apply_transforms(x, y, fit: true)
|
57
55
|
last_estimator.fit_predict(trans_x)
|
58
56
|
end
|
@@ -63,7 +61,6 @@ module Rumale
|
|
63
61
|
# @param y [Numo::NArray] (shape: [n_samples, n_outputs], default: nil) The target values or labels to be used for fitting the model.
|
64
62
|
# @return [Numo::NArray] The predicted results by last estimator.
|
65
63
|
def fit_transform(x, y = nil)
|
66
|
-
x = check_convert_sample_array(x)
|
67
64
|
trans_x = apply_transforms(x, y, fit: true)
|
68
65
|
last_estimator.fit_transform(trans_x, y)
|
69
66
|
end
|
@@ -73,7 +70,6 @@ module Rumale
|
|
73
70
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
|
74
71
|
# @return [Numo::DFloat] (shape: [n_samples]) Confidence score per sample.
|
75
72
|
def decision_function(x)
|
76
|
-
x = check_convert_sample_array(x)
|
77
73
|
trans_x = apply_transforms(x)
|
78
74
|
last_estimator.decision_function(trans_x)
|
79
75
|
end
|
@@ -83,7 +79,6 @@ module Rumale
|
|
83
79
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to obtain prediction result.
|
84
80
|
# @return [Numo::NArray] The predicted results by last estimator.
|
85
81
|
def predict(x)
|
86
|
-
x = check_convert_sample_array(x)
|
87
82
|
trans_x = apply_transforms(x)
|
88
83
|
last_estimator.predict(trans_x)
|
89
84
|
end
|
@@ -93,7 +88,6 @@ module Rumale
|
|
93
88
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the log-probailities.
|
94
89
|
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted log-probability of each class per sample.
|
95
90
|
def predict_log_proba(x)
|
96
|
-
x = check_convert_sample_array(x)
|
97
91
|
trans_x = apply_transforms(x)
|
98
92
|
last_estimator.predict_log_proba(trans_x)
|
99
93
|
end
|
@@ -103,7 +97,6 @@ module Rumale
|
|
103
97
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
|
104
98
|
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
|
105
99
|
def predict_proba(x)
|
106
|
-
x = check_convert_sample_array(x)
|
107
100
|
trans_x = apply_transforms(x)
|
108
101
|
last_estimator.predict_proba(trans_x)
|
109
102
|
end
|
@@ -113,7 +106,6 @@ module Rumale
|
|
113
106
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be transformed.
|
114
107
|
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed samples.
|
115
108
|
def transform(x)
|
116
|
-
x = check_convert_sample_array(x)
|
117
109
|
trans_x = apply_transforms(x)
|
118
110
|
last_estimator.nil? ? trans_x : last_estimator.transform(trans_x)
|
119
111
|
end
|
@@ -123,7 +115,6 @@ module Rumale
|
|
123
115
|
# @param z [Numo::DFloat] (shape: [n_samples, n_components]) The transformed samples to be restored into original space.
|
124
116
|
# @return [Numo::DFloat] (shape: [n_samples, n_featuress]) The restored samples.
|
125
117
|
def inverse_transform(z)
|
126
|
-
z = check_convert_sample_array(z)
|
127
118
|
itrans_z = z
|
128
119
|
@steps.keys.reverse_each do |name|
|
129
120
|
transformer = @steps[name]
|
@@ -139,26 +130,10 @@ module Rumale
|
|
139
130
|
# @param y [Numo::NArray] (shape: [n_samples, n_outputs]) True target values or labels for testing data.
|
140
131
|
# @return [Float] The score of last estimator
|
141
132
|
def score(x, y)
|
142
|
-
x = check_convert_sample_array(x)
|
143
133
|
trans_x = apply_transforms(x)
|
144
134
|
last_estimator.score(trans_x, y)
|
145
135
|
end
|
146
136
|
|
147
|
-
# Dump marshal data.
|
148
|
-
# @return [Hash] The marshal data about Pipeline.
|
149
|
-
def marshal_dump
|
150
|
-
{ params: @params,
|
151
|
-
steps: @steps }
|
152
|
-
end
|
153
|
-
|
154
|
-
# Load marshal data.
|
155
|
-
# @return [nil]
|
156
|
-
def marshal_load(obj)
|
157
|
-
@params = obj[:params]
|
158
|
-
@steps = obj[:steps]
|
159
|
-
nil
|
160
|
-
end
|
161
|
-
|
162
137
|
private
|
163
138
|
|
164
139
|
def validate_steps(steps)
|
data/lib/rumale/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rumale
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.17.
|
4
|
+
version: 0.17.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-01
|
11
|
+
date: 2020-02-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: numo-narray
|
@@ -209,6 +209,7 @@ files:
|
|
209
209
|
- lib/rumale/evaluation_measure/silhouette_score.rb
|
210
210
|
- lib/rumale/feature_extraction/feature_hasher.rb
|
211
211
|
- lib/rumale/feature_extraction/hash_vectorizer.rb
|
212
|
+
- lib/rumale/kernel_approximation/nystroem.rb
|
212
213
|
- lib/rumale/kernel_approximation/rbf.rb
|
213
214
|
- lib/rumale/kernel_machine/kernel_pca.rb
|
214
215
|
- lib/rumale/kernel_machine/kernel_ridge.rb
|