rumale 0.20.3 → 0.22.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.coveralls.yml +1 -1
- data/.github/workflows/build.yml +23 -0
- data/.github/workflows/coverage.yml +28 -0
- data/.gitignore +1 -0
- data/.rubocop.yml +1 -0
- data/CHANGELOG.md +30 -0
- data/Gemfile +5 -4
- data/LICENSE.txt +1 -1
- data/README.md +57 -21
- data/ext/rumale/tree.c +23 -10
- data/lib/rumale.rb +4 -0
- data/lib/rumale/base/base_estimator.rb +5 -3
- data/lib/rumale/decomposition/pca.rb +1 -1
- data/lib/rumale/ensemble/stacking_classifier.rb +214 -0
- data/lib/rumale/ensemble/stacking_regressor.rb +163 -0
- data/lib/rumale/feature_extraction/feature_hasher.rb +1 -1
- data/lib/rumale/feature_extraction/hash_vectorizer.rb +1 -1
- data/lib/rumale/kernel_machine/kernel_svc.rb +4 -3
- data/lib/rumale/linear_model/base_sgd.rb +1 -1
- data/lib/rumale/linear_model/elastic_net.rb +3 -3
- data/lib/rumale/linear_model/lasso.rb +3 -3
- data/lib/rumale/linear_model/linear_regression.rb +65 -36
- data/lib/rumale/linear_model/logistic_regression.rb +123 -35
- data/lib/rumale/linear_model/nnls.rb +137 -0
- data/lib/rumale/linear_model/ridge.rb +72 -35
- data/lib/rumale/linear_model/svc.rb +6 -5
- data/lib/rumale/linear_model/svr.rb +6 -5
- data/lib/rumale/metric_learning/mlkr.rb +161 -0
- data/lib/rumale/metric_learning/neighbourhood_component_analysis.rb +18 -47
- data/lib/rumale/pairwise_metric.rb +1 -1
- data/lib/rumale/validation.rb +13 -1
- data/lib/rumale/version.rb +1 -1
- data/rumale.gemspec +2 -1
- metadata +24 -4
@@ -2,13 +2,15 @@
|
|
2
2
|
|
3
3
|
require 'rumale/base/base_estimator'
|
4
4
|
require 'rumale/base/transformer'
|
5
|
+
require 'rumale/utils'
|
6
|
+
require 'rumale/pairwise_metric'
|
7
|
+
require 'lbfgsb'
|
5
8
|
|
6
9
|
module Rumale
|
7
10
|
module MetricLearning
|
8
11
|
# NeighbourhoodComponentAnalysis is a class that implements Neighbourhood Component Analysis.
|
9
12
|
#
|
10
13
|
# @example
|
11
|
-
# require 'mopti'
|
12
14
|
# require 'rumale'
|
13
15
|
#
|
14
16
|
# transformer = Rumale::MetricLearning::NeighbourhoodComponentAnalysis.new
|
@@ -39,7 +41,9 @@ module Rumale
|
|
39
41
|
# @param init [String] The initialization method for components ('random' or 'pca').
|
40
42
|
# @param max_iter [Integer] The maximum number of iterations.
|
41
43
|
# @param tol [Float] The tolerance of termination criterion.
|
44
|
+
# This value is given as tol / Lbfgsb::DBL_EPSILON to the factr argument of Lbfgsb.minimize method.
|
42
45
|
# @param verbose [Boolean] The flag indicating whether to output loss during iteration.
|
46
|
+
# If true is given, 'iterate.dat' file is generated by lbfgsb.rb.
|
43
47
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
44
48
|
def initialize(n_components: nil, init: 'random', max_iter: 100, tol: 1e-6, verbose: false, random_seed: nil)
|
45
49
|
check_params_numeric_or_nil(n_components: n_components, random_seed: random_seed)
|
@@ -65,8 +69,6 @@ module Rumale
|
|
65
69
|
# @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
|
66
70
|
# @return [NeighbourhoodComponentAnalysis] The learned classifier itself.
|
67
71
|
def fit(x, y)
|
68
|
-
raise 'NeighbourhoodComponentAnalysis#fit requires Mopti but that is not loaded.' unless enable_mopti?
|
69
|
-
|
70
72
|
x = check_convert_sample_array(x)
|
71
73
|
y = check_convert_label_array(y)
|
72
74
|
check_sample_label_size(x, y)
|
@@ -102,14 +104,6 @@ module Rumale
|
|
102
104
|
|
103
105
|
private
|
104
106
|
|
105
|
-
def enable_mopti?
|
106
|
-
if defined?(Mopti).nil?
|
107
|
-
warn('NeighbourhoodComponentAnalysis#fit requires Mopti but that is not loaded. You should intall and load mopti gem in advance.')
|
108
|
-
return false
|
109
|
-
end
|
110
|
-
true
|
111
|
-
end
|
112
|
-
|
113
107
|
def init_components(x, n_features, n_components)
|
114
108
|
if @params[:init] == 'pca'
|
115
109
|
pca = Rumale::Decomposition::PCA.new(n_components: n_components)
|
@@ -127,28 +121,18 @@ module Rumale
|
|
127
121
|
res[:x] = comp_init
|
128
122
|
res[:n_iter] = 0
|
129
123
|
# perform optimization.
|
130
|
-
|
131
|
-
|
132
|
-
x_init: comp_init, args: [x, y],
|
133
|
-
|
124
|
+
verbose = @params[:verbose] ? 1 : -1
|
125
|
+
res = Lbfgsb.minimize(
|
126
|
+
fnc: method(:nca_fnc), jcb: true, x_init: comp_init, args: [x, y],
|
127
|
+
maxiter: @params[:max_iter], factr: @params[:tol] / Lbfgsb::DBL_EPSILON, verbose: verbose
|
134
128
|
)
|
135
|
-
fold = 0.0
|
136
|
-
dold = 0.0
|
137
|
-
optimizer.each do |prm|
|
138
|
-
res = prm
|
139
|
-
puts "[NeighbourhoodComponentAnalysis] The value of objective function after #{res[:n_iter]} epochs: #{x.shape[0] - res[:fnc]}" if @params[:verbose]
|
140
|
-
break if (fold - res[:fnc]).abs <= @params[:tol] && (dold - res[:jcb]).abs <= @params[:tol]
|
141
|
-
|
142
|
-
fold = res[:fnc]
|
143
|
-
dold = res[:jcb]
|
144
|
-
end
|
145
129
|
# return the results.
|
146
130
|
n_iter = res[:n_iter]
|
147
131
|
comps = n_components == 1 ? res[:x].dup : res[:x].reshape(n_components, n_features)
|
148
132
|
[comps, n_iter]
|
149
133
|
end
|
150
134
|
|
151
|
-
def
|
135
|
+
def nca_fnc(w, x, y)
|
152
136
|
# initialize some variables.
|
153
137
|
n_samples, n_features = x.shape
|
154
138
|
n_components = w.size / n_features
|
@@ -157,32 +141,19 @@ module Rumale
|
|
157
141
|
z = x.dot(w.transpose)
|
158
142
|
# calculate probability matrix.
|
159
143
|
prob_mat = probability_matrix(z)
|
160
|
-
# calculate loss.
|
144
|
+
# calculate loss and gradient.
|
161
145
|
# NOTE:
|
162
146
|
# NCA attempts to maximize its objective function.
|
163
147
|
# For the minization algorithm, the objective function value is subtracted from the maixmum value (n_samples).
|
164
148
|
mask_mat = y.expand_dims(1).eq(y)
|
165
149
|
masked_prob_mat = prob_mat * mask_mat
|
166
|
-
n_samples - masked_prob_mat.sum
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
# projection.
|
174
|
-
w = w.reshape(n_components, n_features)
|
175
|
-
z = x.dot(w.transpose)
|
176
|
-
# calculate probability matrix.
|
177
|
-
prob_mat = probability_matrix(z)
|
178
|
-
# calculate gradient.
|
179
|
-
mask_mat = y.expand_dims(1).eq(y)
|
180
|
-
masked_prob_mat = prob_mat * mask_mat
|
181
|
-
weighted_prob_mat = masked_prob_mat - prob_mat * masked_prob_mat.sum(1).expand_dims(1)
|
182
|
-
weighted_prob_mat += weighted_prob_mat.transpose
|
183
|
-
weighted_prob_mat[weighted_prob_mat.diag_indices] = -weighted_prob_mat.sum(0)
|
184
|
-
gradient = 2 * z.transpose.dot(weighted_prob_mat).dot(x)
|
185
|
-
-gradient.flatten.dup
|
150
|
+
loss = n_samples - masked_prob_mat.sum
|
151
|
+
sum_probs = masked_prob_mat.sum(1)
|
152
|
+
weight_mat = (sum_probs.expand_dims(1) * prob_mat - masked_prob_mat)
|
153
|
+
weight_mat += weight_mat.transpose
|
154
|
+
weight_mat = weight_mat.sum(0).diag - weight_mat
|
155
|
+
gradient = -2 * z.transpose.dot(weight_mat).dot(x)
|
156
|
+
[loss, gradient.flatten.dup]
|
186
157
|
end
|
187
158
|
|
188
159
|
def probability_matrix(z)
|
@@ -123,7 +123,7 @@ module Rumale
|
|
123
123
|
# @param gamma [Float] The parameter of polynomial kernel, if nil it is 1 / n_features.
|
124
124
|
# @param coef [Integer] The parameter of polynomial kernel.
|
125
125
|
# @return [Numo::DFloat] (shape: [n_samples_x, n_samples_x] or [n_samples_x, n_samples_y] if y is given)
|
126
|
-
def polynomial_kernel(x, y = nil, degree = 3, gamma = nil, coef = 1)
|
126
|
+
def polynomial_kernel(x, y = nil, degree = 3, gamma = nil, coef = 1) # rubocop:disable Metrics/ParameterLists
|
127
127
|
y = x if y.nil?
|
128
128
|
gamma ||= 1.0 / x.shape[1]
|
129
129
|
x = Rumale::Validation.check_convert_sample_array(x)
|
data/lib/rumale/validation.rb
CHANGED
@@ -27,6 +27,7 @@ module Rumale
|
|
27
27
|
y
|
28
28
|
end
|
29
29
|
|
30
|
+
# @deprecated Use check_convert_sample_array instead of this method.
|
30
31
|
# @!visibility private
|
31
32
|
def check_sample_array(x)
|
32
33
|
raise TypeError, 'Expect class of sample matrix to be Numo::DFloat' unless x.is_a?(Numo::DFloat)
|
@@ -35,6 +36,7 @@ module Rumale
|
|
35
36
|
nil
|
36
37
|
end
|
37
38
|
|
39
|
+
# @deprecated Use check_convert_label_array instead of this method.
|
38
40
|
# @!visibility private
|
39
41
|
def check_label_array(y)
|
40
42
|
raise TypeError, 'Expect class of label vector to be Numo::Int32' unless y.is_a?(Numo::Int32)
|
@@ -43,6 +45,7 @@ module Rumale
|
|
43
45
|
nil
|
44
46
|
end
|
45
47
|
|
48
|
+
# @deprecated Use check_convert_tvalue_array instead of this method.
|
46
49
|
# @!visibility private
|
47
50
|
def check_tvalue_array(y)
|
48
51
|
raise TypeError, 'Expect class of target value vector to be Numo::DFloat' unless y.is_a?(Numo::DFloat)
|
@@ -64,52 +67,61 @@ module Rumale
|
|
64
67
|
nil
|
65
68
|
end
|
66
69
|
|
70
|
+
# TODO: Better to replace with RBS in the future.
|
67
71
|
# @!visibility private
|
68
72
|
def check_params_type(type, params = {})
|
69
73
|
params.each { |k, v| raise TypeError, "Expect class of #{k} to be #{type}" unless v.is_a?(type) }
|
70
74
|
nil
|
71
75
|
end
|
72
76
|
|
77
|
+
# TODO: Better to replace with RBS in the future.
|
73
78
|
# @!visibility private
|
74
79
|
def check_params_type_or_nil(type, params = {})
|
75
80
|
params.each { |k, v| raise TypeError, "Expect class of #{k} to be #{type} or nil" unless v.is_a?(type) || v.is_a?(NilClass) }
|
76
81
|
nil
|
77
82
|
end
|
78
83
|
|
84
|
+
# TODO: Better to replace with RBS in the future.
|
79
85
|
# @!visibility private
|
80
86
|
def check_params_numeric(params = {})
|
81
87
|
check_params_type(Numeric, params)
|
82
88
|
end
|
83
89
|
|
90
|
+
# TODO: Better to replace with RBS in the future.
|
84
91
|
# @!visibility private
|
85
92
|
def check_params_numeric_or_nil(params = {})
|
86
93
|
check_params_type_or_nil(Numeric, params)
|
87
94
|
end
|
88
95
|
|
96
|
+
# @deprecated Use check_params_numeric instead of this method.
|
89
97
|
# @!visibility private
|
90
98
|
def check_params_float(params = {})
|
91
99
|
check_params_type(Float, params)
|
92
100
|
end
|
93
101
|
|
102
|
+
# @deprecated Use check_params_numeric instead of this method.
|
94
103
|
# @!visibility private
|
95
104
|
def check_params_integer(params = {})
|
96
105
|
check_params_type(Integer, params)
|
97
106
|
end
|
98
107
|
|
108
|
+
# TODO: Better to replace with RBS in the future.
|
99
109
|
# @!visibility private
|
100
110
|
def check_params_string(params = {})
|
101
111
|
check_params_type(String, params)
|
102
112
|
end
|
103
113
|
|
114
|
+
# TODO: Better to replace with RBS in the future.
|
104
115
|
# @!visibility private
|
105
116
|
def check_params_boolean(params = {})
|
106
117
|
params.each { |k, v| raise TypeError, "Expect class of #{k} to be Boolean" unless v.is_a?(FalseClass) || v.is_a?(TrueClass) }
|
107
118
|
nil
|
108
119
|
end
|
109
120
|
|
121
|
+
# TODO: Better to replace with RBS in the future.
|
110
122
|
# @!visibility private
|
111
123
|
def check_params_positive(params = {})
|
112
|
-
params.
|
124
|
+
params.compact.each { |k, v| raise ArgumentError, "Expect #{k} to be positive value" if v.negative? }
|
113
125
|
nil
|
114
126
|
end
|
115
127
|
end
|
data/lib/rumale/version.rb
CHANGED
data/rumale.gemspec
CHANGED
@@ -38,11 +38,12 @@ Gem::Specification.new do |spec|
|
|
38
38
|
|
39
39
|
spec.metadata = {
|
40
40
|
'homepage_uri' => 'https://github.com/yoshoku/rumale',
|
41
|
-
'changelog_uri' => 'https://github.com/yoshoku/rumale/blob/
|
41
|
+
'changelog_uri' => 'https://github.com/yoshoku/rumale/blob/main/CHANGELOG.md',
|
42
42
|
'source_code_uri' => 'https://github.com/yoshoku/rumale',
|
43
43
|
'documentation_uri' => 'https://yoshoku.github.io/rumale/doc/',
|
44
44
|
'bug_tracker_uri' => 'https://github.com/yoshoku/rumale/issues'
|
45
45
|
}
|
46
46
|
|
47
47
|
spec.add_runtime_dependency 'numo-narray', '>= 0.9.1'
|
48
|
+
spec.add_runtime_dependency 'lbfgsb', '>=0.3.0'
|
48
49
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rumale
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.22.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-01-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: numo-narray
|
@@ -24,6 +24,20 @@ dependencies:
|
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: 0.9.1
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: lbfgsb
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 0.3.0
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 0.3.0
|
27
41
|
description: |
|
28
42
|
Rumale is a machine learning library in Ruby.
|
29
43
|
Rumale provides machine learning algorithms with interfaces similar to Scikit-Learn in Python.
|
@@ -44,6 +58,8 @@ extensions:
|
|
44
58
|
extra_rdoc_files: []
|
45
59
|
files:
|
46
60
|
- ".coveralls.yml"
|
61
|
+
- ".github/workflows/build.yml"
|
62
|
+
- ".github/workflows/coverage.yml"
|
47
63
|
- ".gitignore"
|
48
64
|
- ".rspec"
|
49
65
|
- ".rubocop.yml"
|
@@ -90,6 +106,8 @@ files:
|
|
90
106
|
- lib/rumale/ensemble/gradient_boosting_regressor.rb
|
91
107
|
- lib/rumale/ensemble/random_forest_classifier.rb
|
92
108
|
- lib/rumale/ensemble/random_forest_regressor.rb
|
109
|
+
- lib/rumale/ensemble/stacking_classifier.rb
|
110
|
+
- lib/rumale/ensemble/stacking_regressor.rb
|
93
111
|
- lib/rumale/evaluation_measure/accuracy.rb
|
94
112
|
- lib/rumale/evaluation_measure/adjusted_rand_score.rb
|
95
113
|
- lib/rumale/evaluation_measure/calinski_harabasz_score.rb
|
@@ -125,12 +143,14 @@ files:
|
|
125
143
|
- lib/rumale/linear_model/lasso.rb
|
126
144
|
- lib/rumale/linear_model/linear_regression.rb
|
127
145
|
- lib/rumale/linear_model/logistic_regression.rb
|
146
|
+
- lib/rumale/linear_model/nnls.rb
|
128
147
|
- lib/rumale/linear_model/ridge.rb
|
129
148
|
- lib/rumale/linear_model/svc.rb
|
130
149
|
- lib/rumale/linear_model/svr.rb
|
131
150
|
- lib/rumale/manifold/mds.rb
|
132
151
|
- lib/rumale/manifold/tsne.rb
|
133
152
|
- lib/rumale/metric_learning/fisher_discriminant_analysis.rb
|
153
|
+
- lib/rumale/metric_learning/mlkr.rb
|
134
154
|
- lib/rumale/metric_learning/neighbourhood_component_analysis.rb
|
135
155
|
- lib/rumale/model_selection/cross_validation.rb
|
136
156
|
- lib/rumale/model_selection/function.rb
|
@@ -190,7 +210,7 @@ licenses:
|
|
190
210
|
- BSD-2-Clause
|
191
211
|
metadata:
|
192
212
|
homepage_uri: https://github.com/yoshoku/rumale
|
193
|
-
changelog_uri: https://github.com/yoshoku/rumale/blob/
|
213
|
+
changelog_uri: https://github.com/yoshoku/rumale/blob/main/CHANGELOG.md
|
194
214
|
source_code_uri: https://github.com/yoshoku/rumale
|
195
215
|
documentation_uri: https://yoshoku.github.io/rumale/doc/
|
196
216
|
bug_tracker_uri: https://github.com/yoshoku/rumale/issues
|
@@ -209,7 +229,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
209
229
|
- !ruby/object:Gem::Version
|
210
230
|
version: '0'
|
211
231
|
requirements: []
|
212
|
-
rubygems_version: 3.
|
232
|
+
rubygems_version: 3.2.3
|
213
233
|
signing_key:
|
214
234
|
specification_version: 4
|
215
235
|
summary: Rumale is a machine learning library in Ruby. Rumale provides machine learning
|