rumale 0.22.0 → 0.22.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4e2f68b3182ada73537901e7bc74bddd100aff75264f9147c88d8240fb624e29
4
- data.tar.gz: e2639a55fc84d1399b925f65b3a56b38f2ae3150dd15ab8556120af28d408cae
3
+ metadata.gz: 475798da3815141b5337bc7803eca26978bbc98c36a2be4d681bc63f778f5840
4
+ data.tar.gz: 71841127edccbeea2b30c4bd8a744735933c1fcf8c7d6afa507cd2d361c8b5c8
5
5
  SHA512:
6
- metadata.gz: 91ffcbade578bbb9c6a5d87a54ebd89a2b5990eb70835e7a5549afe78541dbfeafe3af50833725bee751fa89c059484970e5add7ebf8adee3e25bc000fbe3778
7
- data.tar.gz: 2ee2b1448a486581ef98561f65bc3446b2e161c89a3a12bd6cd78867350e26151bc0b350bd431902d21f6979493ab2d01a6ee81b55c1099f631aa84c84a704e6
6
+ metadata.gz: 8c90eaffa3847e3cc4f31c58e3d74f66a86e2cf0bd1c6e5aa386f9519de3984ac1c605187119e0ec01585c82cf8c06e4f4aa2f19c7b40883b784e834e2e801d8
7
+ data.tar.gz: 336b1afcc35e52e1c13ced74f527d54c994ff66509cf9b8b2f81dce62692078964a453df790a5bcaf8ea2bd156277d719a33958de2f57aa64d97beeafdc48d01
@@ -1,3 +1,8 @@
1
+ # 0.22.1
2
+ - Add transfomer class for MLKR, that implements Metric Learning for Kernel Regression.
3
+ - Refactor NeighbourhoodComponentAnalysis.
4
+ - Update API documentation.
5
+
1
6
  # 0.22.0
2
7
  ## Breaking change
3
8
  - Add lbfgsb.rb gem to runtime dependencies. Rumale uses lbfgsb gem for optimization.
data/README.md CHANGED
@@ -113,10 +113,10 @@ require 'rumale'
113
113
  samples, labels = Rumale::Dataset.load_libsvm_file('pendigits')
114
114
 
115
115
  # Define the estimator to be evaluated.
116
- lr = Rumale::LinearModel::LogisticRegression.new(learning_rate: 0.00001, reg_param: 0.0001, random_seed: 1)
116
+ lr = Rumale::LinearModel::LogisticRegression.new
117
117
 
118
118
  # Define the evaluation measure, splitting strategy, and cross validation.
119
- ev = Rumale::EvaluationMeasure::LogLoss.new
119
+ ev = Rumale::EvaluationMeasure::Accuracy.new
120
120
  kf = Rumale::ModelSelection::StratifiedKFold.new(n_splits: 5, shuffle: true, random_seed: 1)
121
121
  cv = Rumale::ModelSelection::CrossValidation.new(estimator: lr, splitter: kf, evaluator: ev)
122
122
 
@@ -124,15 +124,15 @@ cv = Rumale::ModelSelection::CrossValidation.new(estimator: lr, splitter: kf, ev
124
124
  report = cv.perform(samples, labels)
125
125
 
126
126
  # Output result.
127
- mean_logloss = report[:test_score].inject(:+) / kf.n_splits
128
- puts("5-CV mean log-loss: %.3f" % mean_logloss)
127
+ mean_accuracy = report[:test_score].sum / kf.n_splits
128
+ puts "5-CV mean accuracy: %.1f%%" % (100.0 * mean_accuracy)
129
129
  ```
130
130
 
131
131
  Execution of the above scripts result in the following.
132
132
 
133
133
  ```bash
134
134
  $ ruby cross_validation.rb
135
- 5-CV mean log-loss: 0.355
135
+ 5-CV mean accuracy: 95.4%
136
136
  ```
137
137
 
138
138
  ### Example 3. Pipeline
@@ -143,10 +143,10 @@ require 'rumale'
143
143
  # Load dataset.
144
144
  samples, labels = Rumale::Dataset.load_libsvm_file('pendigits')
145
145
 
146
- # Construct pipeline with kernel approximation and SVC.
147
- rbf = Rumale::KernelApproximation::RBF.new(gamma: 0.0001, n_components: 800, random_seed: 1)
148
- svc = Rumale::LinearModel::SVC.new(reg_param: 0.0001, random_seed: 1)
149
- pipeline = Rumale::Pipeline::Pipeline.new(steps: { trns: rbf, clsf: svc })
146
+ # Construct pipeline with kernel approximation and LogisticRegression.
147
+ rbf = Rumale::KernelApproximation::RBF.new(gamma: 1e-4, n_components: 800, random_seed: 1)
148
+ lr = Rumale::LinearModel::LogisticRegression.new(reg_param: 1e-3)
149
+ pipeline = Rumale::Pipeline::Pipeline.new(steps: { trns: rbf, clsf: lr })
150
150
 
151
151
  # Define the splitting strategy and cross validation.
152
152
  kf = Rumale::ModelSelection::StratifiedKFold.new(n_splits: 5, shuffle: true, random_seed: 1)
@@ -156,7 +156,7 @@ cv = Rumale::ModelSelection::CrossValidation.new(estimator: pipeline, splitter:
156
156
  report = cv.perform(samples, labels)
157
157
 
158
158
  # Output result.
159
- mean_accuracy = report[:test_score].inject(:+) / kf.n_splits
159
+ mean_accuracy = report[:test_score].sum / kf.n_splits
160
160
  puts("5-CV mean accuracy: %.1f %%" % (mean_accuracy * 100.0))
161
161
  ```
162
162
 
@@ -77,6 +77,7 @@ require 'rumale/manifold/tsne'
77
77
  require 'rumale/manifold/mds'
78
78
  require 'rumale/metric_learning/fisher_discriminant_analysis'
79
79
  require 'rumale/metric_learning/neighbourhood_component_analysis'
80
+ require 'rumale/metric_learning/mlkr'
80
81
  require 'rumale/neural_network/adam'
81
82
  require 'rumale/neural_network/base_mlp'
82
83
  require 'rumale/neural_network/mlp_regressor'
@@ -11,9 +11,10 @@ module Rumale
11
11
  # with stochastic gradient descent (SGD) optimization.
12
12
  # For multiclass classification problem, it uses one-vs-the-rest strategy.
13
13
  #
14
- # Rumale::SVM provides kernel support vector classifier based on LIBSVM.
15
- # If you prefer execution speed, you should use Rumale::SVM::SVC.
16
- # https://github.com/yoshoku/rumale-svm
14
+ # @note
15
+ # Rumale::SVM provides kernel support vector classifier based on LIBSVM.
16
+ # If you prefer execution speed, you should use Rumale::SVM::SVC.
17
+ # https://github.com/yoshoku/rumale-svm
17
18
  #
18
19
  # @example
19
20
  # training_kernel_matrix = Rumale::PairwiseMetric::rbf_kernel(training_samples)
@@ -11,9 +11,10 @@ module Rumale
11
11
  # with stochastic gradient descent optimization.
12
12
  # For multiclass classification problem, it uses one-vs-the-rest strategy.
13
13
  #
14
- # Rumale::SVM provides linear support vector classifier based on LIBLINEAR.
15
- # If you prefer execution speed, you should use Rumale::SVM::LinearSVC.
16
- # https://github.com/yoshoku/rumale-svm
14
+ # @note
15
+ # Rumale::SVM provides linear support vector classifier based on LIBLINEAR.
16
+ # If you prefer execution speed, you should use Rumale::SVM::LinearSVC.
17
+ # https://github.com/yoshoku/rumale-svm
17
18
  #
18
19
  # @example
19
20
  # estimator =
@@ -8,9 +8,10 @@ module Rumale
8
8
  # SVR is a class that implements Support Vector Regressor
9
9
  # with stochastic gradient descent optimization.
10
10
  #
11
- # Rumale::SVM provides linear and kernel support vector regressor based on LIBLINEAR and LIBSVM.
12
- # If you prefer execution speed, you should use Rumale::SVM::LinearSVR.
13
- # https://github.com/yoshoku/rumale-svm
11
+ # @note
12
+ # Rumale::SVM provides linear and kernel support vector regressor based on LIBLINEAR and LIBSVM.
13
+ # If you prefer execution speed, you should use Rumale::SVM::LinearSVR.
14
+ # https://github.com/yoshoku/rumale-svm
14
15
  #
15
16
  # @example
16
17
  # estimator =
@@ -0,0 +1,161 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/base_estimator'
4
+ require 'rumale/base/transformer'
5
+ require 'rumale/decomposition/pca'
6
+ require 'rumale/pairwise_metric'
7
+ require 'rumale/utils'
8
+ require 'lbfgsb'
9
+
10
+ module Rumale
11
+ module MetricLearning
12
+ # MLKR is a class that implements Metric Learning for Kernel Regression.
13
+ #
14
+ # @example
15
+ # transformer = Rumale::MetricLearning::MLKR.new
16
+ # transformer.fit(training_samples, traininig_target_values)
17
+ # low_samples = transformer.transform(testing_samples)
18
+ #
19
+ # *Reference*
20
+ # - Weinberger, K. Q. and Tesauro, G., "Metric Learning for Kernel Regression," Proc. AISTATS'07, pp. 612--629, 2007.
21
+ class MLKR
22
+ include Base::BaseEstimator
23
+ include Base::Transformer
24
+
25
+ # Returns the metric components.
26
+ # @return [Numo::DFloat] (shape: [n_components, n_features])
27
+ attr_reader :components
28
+
29
+ # Return the number of iterations run for optimization
30
+ # @return [Integer]
31
+ attr_reader :n_iter
32
+
33
+ # Return the random generator.
34
+ # @return [Random]
35
+ attr_reader :rng
36
+
37
+ # Create a new transformer with MLKR.
38
+ #
39
+ # @param n_components [Integer] The number of components.
40
+ # @param init [String] The initialization method for components ('random' or 'pca').
41
+ # @param max_iter [Integer] The maximum number of iterations.
42
+ # @param tol [Float] The tolerance of termination criterion.
43
+ # This value is given as tol / Lbfgsb::DBL_EPSILON to the factr argument of Lbfgsb.minimize method.
44
+ # @param verbose [Boolean] The flag indicating whether to output loss during iteration.
45
+ # If true is given, 'iterate.dat' file is generated by lbfgsb.rb.
46
+ # @param random_seed [Integer] The seed value using to initialize the random generator.
47
+ def initialize(n_components: nil, init: 'random', max_iter: 100, tol: 1e-6, verbose: false, random_seed: nil)
48
+ check_params_numeric_or_nil(n_components: n_components, random_seed: random_seed)
49
+ check_params_numeric(max_iter: max_iter, tol: tol)
50
+ check_params_string(init: init)
51
+ check_params_boolean(verbose: verbose)
52
+ @params = {}
53
+ @params[:n_components] = n_components
54
+ @params[:init] = init
55
+ @params[:max_iter] = max_iter
56
+ @params[:tol] = tol
57
+ @params[:verbose] = verbose
58
+ @params[:random_seed] = random_seed
59
+ @params[:random_seed] ||= srand
60
+ @components = nil
61
+ @n_iter = nil
62
+ @rng = Random.new(@params[:random_seed])
63
+ end
64
+
65
+ # Fit the model with given training data.
66
+ #
67
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
68
+ # @param y [Numo::DFloat] (shape: [n_samples]) The target values to be used for fitting the model.
69
+ # @return [MLKR] The learned classifier itself.
70
+ def fit(x, y)
71
+ x = check_convert_sample_array(x)
72
+ y = check_convert_tvalue_array(y)
73
+ check_sample_tvalue_size(x, y)
74
+ n_features = x.shape[1]
75
+ n_components = if @params[:n_components].nil?
76
+ n_features
77
+ else
78
+ [n_features, @params[:n_components]].min
79
+ end
80
+ @components, @n_iter = optimize_components(x, y, n_features, n_components)
81
+ @prototypes = x.dot(@components.transpose)
82
+ @values = y
83
+ self
84
+ end
85
+
86
+ # Fit the model with training data, and then transform them with the learned model.
87
+ #
88
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
89
+ # @param y [Numo::DFloat] (shape: [n_samples]) The target values to be used for fitting the model.
90
+ # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data
91
+ def fit_transform(x, y)
92
+ x = check_convert_sample_array(x)
93
+ y = check_convert_tvalue_array(y)
94
+ check_sample_tvalue_size(x, y)
95
+ fit(x, y).transform(x)
96
+ end
97
+
98
+ # Transform the given data with the learned model.
99
+ #
100
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be transformed with the learned model.
101
+ # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data.
102
+ def transform(x)
103
+ x = check_convert_sample_array(x)
104
+ x.dot(@components.transpose)
105
+ end
106
+
107
+ private
108
+
109
+ def init_components(x, n_features, n_components)
110
+ if @params[:init] == 'pca'
111
+ pca = Rumale::Decomposition::PCA.new(n_components: n_components)
112
+ pca.fit(x).components.flatten.dup
113
+ else
114
+ Rumale::Utils.rand_normal([n_features, n_components], @rng.dup).flatten.dup
115
+ end
116
+ end
117
+
118
+ def optimize_components(x, y, n_features, n_components)
119
+ # initialize components.
120
+ comp_init = init_components(x, n_features, n_components)
121
+ # initialize optimization results.
122
+ res = {}
123
+ res[:x] = comp_init
124
+ res[:n_iter] = 0
125
+ # perform optimization.
126
+ verbose = @params[:verbose] ? 1 : -1
127
+ res = Lbfgsb.minimize(
128
+ fnc: method(:mlkr_fnc), jcb: true, x_init: comp_init, args: [x, y],
129
+ maxiter: @params[:max_iter], factr: @params[:tol] / Lbfgsb::DBL_EPSILON, verbose: verbose
130
+ )
131
+ # return the results.
132
+ n_iter = res[:n_iter]
133
+ comps = n_components == 1 ? res[:x].dup : res[:x].reshape(n_components, n_features)
134
+ [comps, n_iter]
135
+ end
136
+
137
+ def mlkr_fnc(w, x, y)
138
+ # initialize some variables.
139
+ n_features = x.shape[1]
140
+ n_components = w.size / n_features
141
+ # projection.
142
+ w = w.reshape(n_components, n_features)
143
+ z = x.dot(w.transpose)
144
+ # predict values.
145
+ kernel_mat = Numo::NMath.exp(-Rumale::PairwiseMetric.squared_error(z))
146
+ kernel_mat[kernel_mat.diag_indices] = 0.0
147
+ norm = kernel_mat.sum(1)
148
+ norm[norm.eq(0)] = 1
149
+ y_pred = kernel_mat.dot(y) / norm
150
+ # calculate loss.
151
+ y_diff = y_pred - y
152
+ loss = (y_diff**2).sum
153
+ # calculate gradient.
154
+ weight_mat = y_diff * y_diff.expand_dims(1) * kernel_mat
155
+ weight_mat = weight_mat.sum(0).diag - weight_mat
156
+ gradient = 8 * z.transpose.dot(weight_mat).dot(x)
157
+ [loss, gradient.flatten.dup]
158
+ end
159
+ end
160
+ end
161
+ end
@@ -2,6 +2,8 @@
2
2
 
3
3
  require 'rumale/base/base_estimator'
4
4
  require 'rumale/base/transformer'
5
+ require 'rumale/utils'
6
+ require 'rumale/pairwise_metric'
5
7
  require 'lbfgsb'
6
8
 
7
9
  module Rumale
@@ -146,10 +148,11 @@ module Rumale
146
148
  mask_mat = y.expand_dims(1).eq(y)
147
149
  masked_prob_mat = prob_mat * mask_mat
148
150
  loss = n_samples - masked_prob_mat.sum
149
- weighted_prob_mat = masked_prob_mat - prob_mat * masked_prob_mat.sum(1).expand_dims(1)
150
- weighted_prob_mat += weighted_prob_mat.transpose
151
- weighted_prob_mat[weighted_prob_mat.diag_indices] = -weighted_prob_mat.sum(0)
152
- gradient = -2 * z.transpose.dot(weighted_prob_mat).dot(x)
151
+ sum_probs = masked_prob_mat.sum(1)
152
+ weight_mat = (sum_probs.expand_dims(1) * prob_mat - masked_prob_mat)
153
+ weight_mat += weight_mat.transpose
154
+ weight_mat = weight_mat.sum(0).diag - weight_mat
155
+ gradient = -2 * z.transpose.dot(weight_mat).dot(x)
153
156
  [loss, gradient.flatten.dup]
154
157
  end
155
158
 
@@ -3,5 +3,5 @@
3
3
  # Rumale is a machine learning library in Ruby.
4
4
  module Rumale
5
5
  # The version of Rumale you are using.
6
- VERSION = '0.22.0'
6
+ VERSION = '0.22.1'
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rumale
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.22.0
4
+ version: 0.22.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-11-22 00:00:00.000000000 Z
11
+ date: 2020-12-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: numo-narray
@@ -145,6 +145,7 @@ files:
145
145
  - lib/rumale/manifold/mds.rb
146
146
  - lib/rumale/manifold/tsne.rb
147
147
  - lib/rumale/metric_learning/fisher_discriminant_analysis.rb
148
+ - lib/rumale/metric_learning/mlkr.rb
148
149
  - lib/rumale/metric_learning/neighbourhood_component_analysis.rb
149
150
  - lib/rumale/model_selection/cross_validation.rb
150
151
  - lib/rumale/model_selection/function.rb