rumale 0.22.0 → 0.22.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4e2f68b3182ada73537901e7bc74bddd100aff75264f9147c88d8240fb624e29
4
- data.tar.gz: e2639a55fc84d1399b925f65b3a56b38f2ae3150dd15ab8556120af28d408cae
3
+ metadata.gz: 475798da3815141b5337bc7803eca26978bbc98c36a2be4d681bc63f778f5840
4
+ data.tar.gz: 71841127edccbeea2b30c4bd8a744735933c1fcf8c7d6afa507cd2d361c8b5c8
5
5
  SHA512:
6
- metadata.gz: 91ffcbade578bbb9c6a5d87a54ebd89a2b5990eb70835e7a5549afe78541dbfeafe3af50833725bee751fa89c059484970e5add7ebf8adee3e25bc000fbe3778
7
- data.tar.gz: 2ee2b1448a486581ef98561f65bc3446b2e161c89a3a12bd6cd78867350e26151bc0b350bd431902d21f6979493ab2d01a6ee81b55c1099f631aa84c84a704e6
6
+ metadata.gz: 8c90eaffa3847e3cc4f31c58e3d74f66a86e2cf0bd1c6e5aa386f9519de3984ac1c605187119e0ec01585c82cf8c06e4f4aa2f19c7b40883b784e834e2e801d8
7
+ data.tar.gz: 336b1afcc35e52e1c13ced74f527d54c994ff66509cf9b8b2f81dce62692078964a453df790a5bcaf8ea2bd156277d719a33958de2f57aa64d97beeafdc48d01
@@ -1,3 +1,8 @@
1
+ # 0.22.1
2
+ - Add transfomer class for MLKR, that implements Metric Learning for Kernel Regression.
3
+ - Refactor NeighbourhoodComponentAnalysis.
4
+ - Update API documentation.
5
+
1
6
  # 0.22.0
2
7
  ## Breaking change
3
8
  - Add lbfgsb.rb gem to runtime dependencies. Rumale uses lbfgsb gem for optimization.
data/README.md CHANGED
@@ -113,10 +113,10 @@ require 'rumale'
113
113
  samples, labels = Rumale::Dataset.load_libsvm_file('pendigits')
114
114
 
115
115
  # Define the estimator to be evaluated.
116
- lr = Rumale::LinearModel::LogisticRegression.new(learning_rate: 0.00001, reg_param: 0.0001, random_seed: 1)
116
+ lr = Rumale::LinearModel::LogisticRegression.new
117
117
 
118
118
  # Define the evaluation measure, splitting strategy, and cross validation.
119
- ev = Rumale::EvaluationMeasure::LogLoss.new
119
+ ev = Rumale::EvaluationMeasure::Accuracy.new
120
120
  kf = Rumale::ModelSelection::StratifiedKFold.new(n_splits: 5, shuffle: true, random_seed: 1)
121
121
  cv = Rumale::ModelSelection::CrossValidation.new(estimator: lr, splitter: kf, evaluator: ev)
122
122
 
@@ -124,15 +124,15 @@ cv = Rumale::ModelSelection::CrossValidation.new(estimator: lr, splitter: kf, ev
124
124
  report = cv.perform(samples, labels)
125
125
 
126
126
  # Output result.
127
- mean_logloss = report[:test_score].inject(:+) / kf.n_splits
128
- puts("5-CV mean log-loss: %.3f" % mean_logloss)
127
+ mean_accuracy = report[:test_score].sum / kf.n_splits
128
+ puts "5-CV mean accuracy: %.1f%%" % (100.0 * mean_accuracy)
129
129
  ```
130
130
 
131
131
  Execution of the above scripts result in the following.
132
132
 
133
133
  ```bash
134
134
  $ ruby cross_validation.rb
135
- 5-CV mean log-loss: 0.355
135
+ 5-CV mean accuracy: 95.4%
136
136
  ```
137
137
 
138
138
  ### Example 3. Pipeline
@@ -143,10 +143,10 @@ require 'rumale'
143
143
  # Load dataset.
144
144
  samples, labels = Rumale::Dataset.load_libsvm_file('pendigits')
145
145
 
146
- # Construct pipeline with kernel approximation and SVC.
147
- rbf = Rumale::KernelApproximation::RBF.new(gamma: 0.0001, n_components: 800, random_seed: 1)
148
- svc = Rumale::LinearModel::SVC.new(reg_param: 0.0001, random_seed: 1)
149
- pipeline = Rumale::Pipeline::Pipeline.new(steps: { trns: rbf, clsf: svc })
146
+ # Construct pipeline with kernel approximation and LogisticRegression.
147
+ rbf = Rumale::KernelApproximation::RBF.new(gamma: 1e-4, n_components: 800, random_seed: 1)
148
+ lr = Rumale::LinearModel::LogisticRegression.new(reg_param: 1e-3)
149
+ pipeline = Rumale::Pipeline::Pipeline.new(steps: { trns: rbf, clsf: lr })
150
150
 
151
151
  # Define the splitting strategy and cross validation.
152
152
  kf = Rumale::ModelSelection::StratifiedKFold.new(n_splits: 5, shuffle: true, random_seed: 1)
@@ -156,7 +156,7 @@ cv = Rumale::ModelSelection::CrossValidation.new(estimator: pipeline, splitter:
156
156
  report = cv.perform(samples, labels)
157
157
 
158
158
  # Output result.
159
- mean_accuracy = report[:test_score].inject(:+) / kf.n_splits
159
+ mean_accuracy = report[:test_score].sum / kf.n_splits
160
160
  puts("5-CV mean accuracy: %.1f %%" % (mean_accuracy * 100.0))
161
161
  ```
162
162
 
@@ -77,6 +77,7 @@ require 'rumale/manifold/tsne'
77
77
  require 'rumale/manifold/mds'
78
78
  require 'rumale/metric_learning/fisher_discriminant_analysis'
79
79
  require 'rumale/metric_learning/neighbourhood_component_analysis'
80
+ require 'rumale/metric_learning/mlkr'
80
81
  require 'rumale/neural_network/adam'
81
82
  require 'rumale/neural_network/base_mlp'
82
83
  require 'rumale/neural_network/mlp_regressor'
@@ -11,9 +11,10 @@ module Rumale
11
11
  # with stochastic gradient descent (SGD) optimization.
12
12
  # For multiclass classification problem, it uses one-vs-the-rest strategy.
13
13
  #
14
- # Rumale::SVM provides kernel support vector classifier based on LIBSVM.
15
- # If you prefer execution speed, you should use Rumale::SVM::SVC.
16
- # https://github.com/yoshoku/rumale-svm
14
+ # @note
15
+ # Rumale::SVM provides kernel support vector classifier based on LIBSVM.
16
+ # If you prefer execution speed, you should use Rumale::SVM::SVC.
17
+ # https://github.com/yoshoku/rumale-svm
17
18
  #
18
19
  # @example
19
20
  # training_kernel_matrix = Rumale::PairwiseMetric::rbf_kernel(training_samples)
@@ -11,9 +11,10 @@ module Rumale
11
11
  # with stochastic gradient descent optimization.
12
12
  # For multiclass classification problem, it uses one-vs-the-rest strategy.
13
13
  #
14
- # Rumale::SVM provides linear support vector classifier based on LIBLINEAR.
15
- # If you prefer execution speed, you should use Rumale::SVM::LinearSVC.
16
- # https://github.com/yoshoku/rumale-svm
14
+ # @note
15
+ # Rumale::SVM provides linear support vector classifier based on LIBLINEAR.
16
+ # If you prefer execution speed, you should use Rumale::SVM::LinearSVC.
17
+ # https://github.com/yoshoku/rumale-svm
17
18
  #
18
19
  # @example
19
20
  # estimator =
@@ -8,9 +8,10 @@ module Rumale
8
8
  # SVR is a class that implements Support Vector Regressor
9
9
  # with stochastic gradient descent optimization.
10
10
  #
11
- # Rumale::SVM provides linear and kernel support vector regressor based on LIBLINEAR and LIBSVM.
12
- # If you prefer execution speed, you should use Rumale::SVM::LinearSVR.
13
- # https://github.com/yoshoku/rumale-svm
11
+ # @note
12
+ # Rumale::SVM provides linear and kernel support vector regressor based on LIBLINEAR and LIBSVM.
13
+ # If you prefer execution speed, you should use Rumale::SVM::LinearSVR.
14
+ # https://github.com/yoshoku/rumale-svm
14
15
  #
15
16
  # @example
16
17
  # estimator =
@@ -0,0 +1,161 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/base_estimator'
4
+ require 'rumale/base/transformer'
5
+ require 'rumale/decomposition/pca'
6
+ require 'rumale/pairwise_metric'
7
+ require 'rumale/utils'
8
+ require 'lbfgsb'
9
+
10
+ module Rumale
11
+ module MetricLearning
12
+ # MLKR is a class that implements Metric Learning for Kernel Regression.
13
+ #
14
+ # @example
15
+ # transformer = Rumale::MetricLearning::MLKR.new
16
+ # transformer.fit(training_samples, traininig_target_values)
17
+ # low_samples = transformer.transform(testing_samples)
18
+ #
19
+ # *Reference*
20
+ # - Weinberger, K. Q. and Tesauro, G., "Metric Learning for Kernel Regression," Proc. AISTATS'07, pp. 612--629, 2007.
21
+ class MLKR
22
+ include Base::BaseEstimator
23
+ include Base::Transformer
24
+
25
+ # Returns the metric components.
26
+ # @return [Numo::DFloat] (shape: [n_components, n_features])
27
+ attr_reader :components
28
+
29
+ # Return the number of iterations run for optimization
30
+ # @return [Integer]
31
+ attr_reader :n_iter
32
+
33
+ # Return the random generator.
34
+ # @return [Random]
35
+ attr_reader :rng
36
+
37
+ # Create a new transformer with MLKR.
38
+ #
39
+ # @param n_components [Integer] The number of components.
40
+ # @param init [String] The initialization method for components ('random' or 'pca').
41
+ # @param max_iter [Integer] The maximum number of iterations.
42
+ # @param tol [Float] The tolerance of termination criterion.
43
+ # This value is given as tol / Lbfgsb::DBL_EPSILON to the factr argument of Lbfgsb.minimize method.
44
+ # @param verbose [Boolean] The flag indicating whether to output loss during iteration.
45
+ # If true is given, 'iterate.dat' file is generated by lbfgsb.rb.
46
+ # @param random_seed [Integer] The seed value using to initialize the random generator.
47
+ def initialize(n_components: nil, init: 'random', max_iter: 100, tol: 1e-6, verbose: false, random_seed: nil)
48
+ check_params_numeric_or_nil(n_components: n_components, random_seed: random_seed)
49
+ check_params_numeric(max_iter: max_iter, tol: tol)
50
+ check_params_string(init: init)
51
+ check_params_boolean(verbose: verbose)
52
+ @params = {}
53
+ @params[:n_components] = n_components
54
+ @params[:init] = init
55
+ @params[:max_iter] = max_iter
56
+ @params[:tol] = tol
57
+ @params[:verbose] = verbose
58
+ @params[:random_seed] = random_seed
59
+ @params[:random_seed] ||= srand
60
+ @components = nil
61
+ @n_iter = nil
62
+ @rng = Random.new(@params[:random_seed])
63
+ end
64
+
65
+ # Fit the model with given training data.
66
+ #
67
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
68
+ # @param y [Numo::DFloat] (shape: [n_samples]) The target values to be used for fitting the model.
69
+ # @return [MLKR] The learned classifier itself.
70
+ def fit(x, y)
71
+ x = check_convert_sample_array(x)
72
+ y = check_convert_tvalue_array(y)
73
+ check_sample_tvalue_size(x, y)
74
+ n_features = x.shape[1]
75
+ n_components = if @params[:n_components].nil?
76
+ n_features
77
+ else
78
+ [n_features, @params[:n_components]].min
79
+ end
80
+ @components, @n_iter = optimize_components(x, y, n_features, n_components)
81
+ @prototypes = x.dot(@components.transpose)
82
+ @values = y
83
+ self
84
+ end
85
+
86
+ # Fit the model with training data, and then transform them with the learned model.
87
+ #
88
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
89
+ # @param y [Numo::DFloat] (shape: [n_samples]) The target values to be used for fitting the model.
90
+ # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data
91
+ def fit_transform(x, y)
92
+ x = check_convert_sample_array(x)
93
+ y = check_convert_tvalue_array(y)
94
+ check_sample_tvalue_size(x, y)
95
+ fit(x, y).transform(x)
96
+ end
97
+
98
+ # Transform the given data with the learned model.
99
+ #
100
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be transformed with the learned model.
101
+ # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data.
102
+ def transform(x)
103
+ x = check_convert_sample_array(x)
104
+ x.dot(@components.transpose)
105
+ end
106
+
107
+ private
108
+
109
+ def init_components(x, n_features, n_components)
110
+ if @params[:init] == 'pca'
111
+ pca = Rumale::Decomposition::PCA.new(n_components: n_components)
112
+ pca.fit(x).components.flatten.dup
113
+ else
114
+ Rumale::Utils.rand_normal([n_features, n_components], @rng.dup).flatten.dup
115
+ end
116
+ end
117
+
118
+ def optimize_components(x, y, n_features, n_components)
119
+ # initialize components.
120
+ comp_init = init_components(x, n_features, n_components)
121
+ # initialize optimization results.
122
+ res = {}
123
+ res[:x] = comp_init
124
+ res[:n_iter] = 0
125
+ # perform optimization.
126
+ verbose = @params[:verbose] ? 1 : -1
127
+ res = Lbfgsb.minimize(
128
+ fnc: method(:mlkr_fnc), jcb: true, x_init: comp_init, args: [x, y],
129
+ maxiter: @params[:max_iter], factr: @params[:tol] / Lbfgsb::DBL_EPSILON, verbose: verbose
130
+ )
131
+ # return the results.
132
+ n_iter = res[:n_iter]
133
+ comps = n_components == 1 ? res[:x].dup : res[:x].reshape(n_components, n_features)
134
+ [comps, n_iter]
135
+ end
136
+
137
+ def mlkr_fnc(w, x, y)
138
+ # initialize some variables.
139
+ n_features = x.shape[1]
140
+ n_components = w.size / n_features
141
+ # projection.
142
+ w = w.reshape(n_components, n_features)
143
+ z = x.dot(w.transpose)
144
+ # predict values.
145
+ kernel_mat = Numo::NMath.exp(-Rumale::PairwiseMetric.squared_error(z))
146
+ kernel_mat[kernel_mat.diag_indices] = 0.0
147
+ norm = kernel_mat.sum(1)
148
+ norm[norm.eq(0)] = 1
149
+ y_pred = kernel_mat.dot(y) / norm
150
+ # calculate loss.
151
+ y_diff = y_pred - y
152
+ loss = (y_diff**2).sum
153
+ # calculate gradient.
154
+ weight_mat = y_diff * y_diff.expand_dims(1) * kernel_mat
155
+ weight_mat = weight_mat.sum(0).diag - weight_mat
156
+ gradient = 8 * z.transpose.dot(weight_mat).dot(x)
157
+ [loss, gradient.flatten.dup]
158
+ end
159
+ end
160
+ end
161
+ end
@@ -2,6 +2,8 @@
2
2
 
3
3
  require 'rumale/base/base_estimator'
4
4
  require 'rumale/base/transformer'
5
+ require 'rumale/utils'
6
+ require 'rumale/pairwise_metric'
5
7
  require 'lbfgsb'
6
8
 
7
9
  module Rumale
@@ -146,10 +148,11 @@ module Rumale
146
148
  mask_mat = y.expand_dims(1).eq(y)
147
149
  masked_prob_mat = prob_mat * mask_mat
148
150
  loss = n_samples - masked_prob_mat.sum
149
- weighted_prob_mat = masked_prob_mat - prob_mat * masked_prob_mat.sum(1).expand_dims(1)
150
- weighted_prob_mat += weighted_prob_mat.transpose
151
- weighted_prob_mat[weighted_prob_mat.diag_indices] = -weighted_prob_mat.sum(0)
152
- gradient = -2 * z.transpose.dot(weighted_prob_mat).dot(x)
151
+ sum_probs = masked_prob_mat.sum(1)
152
+ weight_mat = (sum_probs.expand_dims(1) * prob_mat - masked_prob_mat)
153
+ weight_mat += weight_mat.transpose
154
+ weight_mat = weight_mat.sum(0).diag - weight_mat
155
+ gradient = -2 * z.transpose.dot(weight_mat).dot(x)
153
156
  [loss, gradient.flatten.dup]
154
157
  end
155
158
 
@@ -3,5 +3,5 @@
3
3
  # Rumale is a machine learning library in Ruby.
4
4
  module Rumale
5
5
  # The version of Rumale you are using.
6
- VERSION = '0.22.0'
6
+ VERSION = '0.22.1'
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rumale
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.22.0
4
+ version: 0.22.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-11-22 00:00:00.000000000 Z
11
+ date: 2020-12-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: numo-narray
@@ -145,6 +145,7 @@ files:
145
145
  - lib/rumale/manifold/mds.rb
146
146
  - lib/rumale/manifold/tsne.rb
147
147
  - lib/rumale/metric_learning/fisher_discriminant_analysis.rb
148
+ - lib/rumale/metric_learning/mlkr.rb
148
149
  - lib/rumale/metric_learning/neighbourhood_component_analysis.rb
149
150
  - lib/rumale/model_selection/cross_validation.rb
150
151
  - lib/rumale/model_selection/function.rb