rumale 0.17.1 → 0.17.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/README.md +4 -5
- data/lib/rumale.rb +1 -0
- data/lib/rumale/kernel_approximation/nystroem.rb +106 -0
- data/lib/rumale/pipeline/pipeline.rb +0 -25
- data/lib/rumale/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: '09585216da740231269c5675c48a8fa2ee3a8aba3f5c5b7d671429d113aaa4ba'
|
4
|
+
data.tar.gz: 66eb473c718f6a03938db19df06694373c781e098d68e2f2c20162865dc4f9f8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d68eaa297116c4b834cbf2745355d07e4e4b0e50b170c631c417f623d7a0e75a515389903b6e2cb6f22355f9cf5eded4968eac5c7f216f5a98698bb5283c2e00
|
7
|
+
data.tar.gz: ddb3be08dc88cc99f5c7086e8bea83496f2644316d382f052a954ce6d1056bc2aabfbd2664ff572a55fdb1017a9bbb0d790d4d83b3788b781d133a97d26bd92e
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,7 @@
|
|
1
|
+
# 0.17.2
|
2
|
+
- Add transformer class for kernel approximation with [Nystroem](https://yoshoku.github.io/rumale/doc/Rumale/KernelApproximation/Nystroem.html) method.
|
3
|
+
- Delete array validation on [Pipeline](https://yoshoku.github.io/rumale/doc/Rumale/Pipeline/Pipeline.html) class considering that array of hash is given to HashVectorizer.
|
4
|
+
|
1
5
|
# 0.17.1
|
2
6
|
- Add transformer class for [PolynomialFeatures](https://yoshoku.github.io/rumale/doc/Rumale/Preprocessing/PolynomialFeatures.html)
|
3
7
|
- Add verbose and tol parameter to [FactorizationMachineClassifier](https://yoshoku.github.io/rumale/doc/Rumale/PolynomialModel/FactorizationMachineClassifier.html)
|
data/README.md
CHANGED
@@ -50,22 +50,21 @@ First, let's classify simple xor data.
|
|
50
50
|
require 'rumale'
|
51
51
|
|
52
52
|
# Prepare XOR data.
|
53
|
-
|
53
|
+
samples = [[0, 0], [0, 1], [1, 0], [1, 1]]
|
54
54
|
labels = [0, 1, 1, 0]
|
55
55
|
|
56
56
|
# Train classifier with nearest neighbor rule.
|
57
57
|
estimator = Rumale::NearestNeighbors::KNeighborsClassifier.new(n_neighbors: 1)
|
58
|
-
estimator.fit(
|
58
|
+
estimator.fit(samples, labels)
|
59
59
|
|
60
60
|
# Predict labels.
|
61
|
-
p
|
62
|
-
p estimator.predict(
|
61
|
+
p labels
|
62
|
+
p estimator.predict(samples)
|
63
63
|
```
|
64
64
|
|
65
65
|
Execution of the above script result in the following.
|
66
66
|
|
67
67
|
```ruby
|
68
|
-
Numo::Int32#shape=[4]
|
69
68
|
[0, 1, 1, 0]
|
70
69
|
Numo::Int32#shape=[4]
|
71
70
|
[0, 1, 1, 0]
|
data/lib/rumale.rb
CHANGED
@@ -26,6 +26,7 @@ require 'rumale/optimizer/nadam'
|
|
26
26
|
require 'rumale/optimizer/yellow_fin'
|
27
27
|
require 'rumale/pipeline/pipeline'
|
28
28
|
require 'rumale/kernel_approximation/rbf'
|
29
|
+
require 'rumale/kernel_approximation/nystroem'
|
29
30
|
require 'rumale/linear_model/base_linear_model'
|
30
31
|
require 'rumale/linear_model/base_sgd'
|
31
32
|
require 'rumale/linear_model/svc'
|
@@ -0,0 +1,106 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/base_estimator'
|
4
|
+
require 'rumale/base/transformer'
|
5
|
+
require 'rumale/pairwise_metric'
|
6
|
+
|
7
|
+
module Rumale
|
8
|
+
module KernelApproximation
|
9
|
+
# Nystroem is a class that implements feature mapping with Nystroem method.
|
10
|
+
#
|
11
|
+
# @example
|
12
|
+
# require 'numo/linalg/autoloader'
|
13
|
+
#
|
14
|
+
# transformer = Rumale::KernelApproximation::Nystroem.new(gamma: 1, n_components: 128, random_seed: 1)
|
15
|
+
# new_training_samples = transformer.fit_transform(training_samples)
|
16
|
+
# new_testing_samples = transformer.transform(testing_samples)
|
17
|
+
#
|
18
|
+
# *Reference*
|
19
|
+
# 1. T. Yang, Y. Li, M. Mahdavi, R. Jin, and Z-H. Zhou, "Nystrom Method vs Random Fourier Features: A Theoretical and Empirical Comparison," Advances in NIPS'12, Vol. 1, pp. 476--484, 2012.
|
20
|
+
class Nystroem
|
21
|
+
include Base::BaseEstimator
|
22
|
+
include Base::Transformer
|
23
|
+
|
24
|
+
# Returns the randomly sampled training data for feature mapping.
|
25
|
+
# @return [Numo::DFloat] (shape: n_components, n_features])
|
26
|
+
attr_reader :components
|
27
|
+
|
28
|
+
# Returns the indices sampled training data.
|
29
|
+
# @return [Numo::Int32] (shape: [n_components])
|
30
|
+
attr_reader :component_indices
|
31
|
+
|
32
|
+
# Returns the normalizing factors.
|
33
|
+
# @return [Numo::DFloat] (shape: [n_components, n_components])
|
34
|
+
attr_reader :normalizer
|
35
|
+
|
36
|
+
# Return the random generator for transformation.
|
37
|
+
# @return [Random]
|
38
|
+
attr_reader :rng
|
39
|
+
|
40
|
+
# Create a new transformer for mapping to kernel feature space with Nystrom method.
|
41
|
+
#
|
42
|
+
# @param kernel [String] The type of kernel. This parameter is ignored in the current implementation.
|
43
|
+
# @param gamma [Float] The parameter of RBF kernel: exp(-gamma * x^2).
|
44
|
+
# @param n_components [Integer] The number of dimensions of the RBF kernel feature space.
|
45
|
+
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
46
|
+
def initialize(kernel: 'rbf', gamma: 1, n_components: 100, random_seed: nil)
|
47
|
+
check_params_numeric(gamma: gamma, n_components: n_components)
|
48
|
+
check_params_numeric_or_nil(random_seed: random_seed)
|
49
|
+
@params = method(:initialize).parameters.map { |_t, arg| [arg, binding.local_variable_get(arg)] }.to_h
|
50
|
+
@params[:random_seed] ||= srand
|
51
|
+
@rng = Random.new(@params[:random_seed])
|
52
|
+
@component_indices = nil
|
53
|
+
@components = nil
|
54
|
+
@normalizer = nil
|
55
|
+
end
|
56
|
+
|
57
|
+
# Fit the model with given training data.
|
58
|
+
#
|
59
|
+
# @overload fit(x) -> RBF
|
60
|
+
# @param x [Numo::NArray] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
61
|
+
# @return [Nystroem] The learned transformer itself.
|
62
|
+
def fit(x, _y = nil)
|
63
|
+
x = check_convert_sample_array(x)
|
64
|
+
raise 'Nystroem#fit requires Numo::Linalg but that is not loaded.' unless enable_linalg?
|
65
|
+
|
66
|
+
# initialize some variables.
|
67
|
+
sub_rng = @rng.dup
|
68
|
+
n_samples = x.shape[0]
|
69
|
+
n_components = [1, [@params[:n_components], n_samples].min].max
|
70
|
+
|
71
|
+
# random sampling.
|
72
|
+
@component_indices = Numo::Int32.cast([*0...n_samples].shuffle(random: sub_rng)[0...n_components])
|
73
|
+
@components = x[@component_indices, true]
|
74
|
+
|
75
|
+
# calculate normalizing factor.
|
76
|
+
kernel_mat = Rumale::PairwiseMetric.rbf_kernel(@components, nil, @params[:gamma])
|
77
|
+
eig_vals, eig_vecs = Numo::Linalg.eigh(kernel_mat)
|
78
|
+
la = eig_vals.class.maximum(eig_vals.reverse, 1e-12)
|
79
|
+
u = eig_vecs.reverse(1)
|
80
|
+
@normalizer = u.dot((1.0 / Numo::NMath.sqrt(la)).diag)
|
81
|
+
|
82
|
+
self
|
83
|
+
end
|
84
|
+
|
85
|
+
# Fit the model with training data, and then transform them with the learned model.
|
86
|
+
#
|
87
|
+
# @overload fit_transform(x) -> Numo::DFloat
|
88
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
89
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data
|
90
|
+
def fit_transform(x, _y = nil)
|
91
|
+
x = check_convert_sample_array(x)
|
92
|
+
fit(x).transform(x)
|
93
|
+
end
|
94
|
+
|
95
|
+
# Transform the given data with the learned model.
|
96
|
+
#
|
97
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be transformed with the learned model.
|
98
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data.
|
99
|
+
def transform(x)
|
100
|
+
x = check_convert_sample_array(x)
|
101
|
+
z = Rumale::PairwiseMetric.rbf_kernel(x, @components, @params[:gamma])
|
102
|
+
z.dot(@normalizer)
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
@@ -40,7 +40,6 @@ module Rumale
|
|
40
40
|
# @param y [Numo::NArray] (shape: [n_samples, n_outputs]) The target values or labels to be used for fitting the model.
|
41
41
|
# @return [Pipeline] The learned pipeline itself.
|
42
42
|
def fit(x, y)
|
43
|
-
x = check_convert_sample_array(x)
|
44
43
|
trans_x = apply_transforms(x, y, fit: true)
|
45
44
|
last_estimator&.fit(trans_x, y)
|
46
45
|
self
|
@@ -52,7 +51,6 @@ module Rumale
|
|
52
51
|
# @param y [Numo::NArray] (shape: [n_samples, n_outputs], default: nil) The target values or labels to be used for fitting the model.
|
53
52
|
# @return [Numo::NArray] The predicted results by last estimator.
|
54
53
|
def fit_predict(x, y = nil)
|
55
|
-
x = check_convert_sample_array(x)
|
56
54
|
trans_x = apply_transforms(x, y, fit: true)
|
57
55
|
last_estimator.fit_predict(trans_x)
|
58
56
|
end
|
@@ -63,7 +61,6 @@ module Rumale
|
|
63
61
|
# @param y [Numo::NArray] (shape: [n_samples, n_outputs], default: nil) The target values or labels to be used for fitting the model.
|
64
62
|
# @return [Numo::NArray] The predicted results by last estimator.
|
65
63
|
def fit_transform(x, y = nil)
|
66
|
-
x = check_convert_sample_array(x)
|
67
64
|
trans_x = apply_transforms(x, y, fit: true)
|
68
65
|
last_estimator.fit_transform(trans_x, y)
|
69
66
|
end
|
@@ -73,7 +70,6 @@ module Rumale
|
|
73
70
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
|
74
71
|
# @return [Numo::DFloat] (shape: [n_samples]) Confidence score per sample.
|
75
72
|
def decision_function(x)
|
76
|
-
x = check_convert_sample_array(x)
|
77
73
|
trans_x = apply_transforms(x)
|
78
74
|
last_estimator.decision_function(trans_x)
|
79
75
|
end
|
@@ -83,7 +79,6 @@ module Rumale
|
|
83
79
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to obtain prediction result.
|
84
80
|
# @return [Numo::NArray] The predicted results by last estimator.
|
85
81
|
def predict(x)
|
86
|
-
x = check_convert_sample_array(x)
|
87
82
|
trans_x = apply_transforms(x)
|
88
83
|
last_estimator.predict(trans_x)
|
89
84
|
end
|
@@ -93,7 +88,6 @@ module Rumale
|
|
93
88
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the log-probailities.
|
94
89
|
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted log-probability of each class per sample.
|
95
90
|
def predict_log_proba(x)
|
96
|
-
x = check_convert_sample_array(x)
|
97
91
|
trans_x = apply_transforms(x)
|
98
92
|
last_estimator.predict_log_proba(trans_x)
|
99
93
|
end
|
@@ -103,7 +97,6 @@ module Rumale
|
|
103
97
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
|
104
98
|
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
|
105
99
|
def predict_proba(x)
|
106
|
-
x = check_convert_sample_array(x)
|
107
100
|
trans_x = apply_transforms(x)
|
108
101
|
last_estimator.predict_proba(trans_x)
|
109
102
|
end
|
@@ -113,7 +106,6 @@ module Rumale
|
|
113
106
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be transformed.
|
114
107
|
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed samples.
|
115
108
|
def transform(x)
|
116
|
-
x = check_convert_sample_array(x)
|
117
109
|
trans_x = apply_transforms(x)
|
118
110
|
last_estimator.nil? ? trans_x : last_estimator.transform(trans_x)
|
119
111
|
end
|
@@ -123,7 +115,6 @@ module Rumale
|
|
123
115
|
# @param z [Numo::DFloat] (shape: [n_samples, n_components]) The transformed samples to be restored into original space.
|
124
116
|
# @return [Numo::DFloat] (shape: [n_samples, n_featuress]) The restored samples.
|
125
117
|
def inverse_transform(z)
|
126
|
-
z = check_convert_sample_array(z)
|
127
118
|
itrans_z = z
|
128
119
|
@steps.keys.reverse_each do |name|
|
129
120
|
transformer = @steps[name]
|
@@ -139,26 +130,10 @@ module Rumale
|
|
139
130
|
# @param y [Numo::NArray] (shape: [n_samples, n_outputs]) True target values or labels for testing data.
|
140
131
|
# @return [Float] The score of last estimator
|
141
132
|
def score(x, y)
|
142
|
-
x = check_convert_sample_array(x)
|
143
133
|
trans_x = apply_transforms(x)
|
144
134
|
last_estimator.score(trans_x, y)
|
145
135
|
end
|
146
136
|
|
147
|
-
# Dump marshal data.
|
148
|
-
# @return [Hash] The marshal data about Pipeline.
|
149
|
-
def marshal_dump
|
150
|
-
{ params: @params,
|
151
|
-
steps: @steps }
|
152
|
-
end
|
153
|
-
|
154
|
-
# Load marshal data.
|
155
|
-
# @return [nil]
|
156
|
-
def marshal_load(obj)
|
157
|
-
@params = obj[:params]
|
158
|
-
@steps = obj[:steps]
|
159
|
-
nil
|
160
|
-
end
|
161
|
-
|
162
137
|
private
|
163
138
|
|
164
139
|
def validate_steps(steps)
|
data/lib/rumale/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rumale
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.17.
|
4
|
+
version: 0.17.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-01
|
11
|
+
date: 2020-02-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: numo-narray
|
@@ -209,6 +209,7 @@ files:
|
|
209
209
|
- lib/rumale/evaluation_measure/silhouette_score.rb
|
210
210
|
- lib/rumale/feature_extraction/feature_hasher.rb
|
211
211
|
- lib/rumale/feature_extraction/hash_vectorizer.rb
|
212
|
+
- lib/rumale/kernel_approximation/nystroem.rb
|
212
213
|
- lib/rumale/kernel_approximation/rbf.rb
|
213
214
|
- lib/rumale/kernel_machine/kernel_pca.rb
|
214
215
|
- lib/rumale/kernel_machine/kernel_ridge.rb
|