rumale 0.20.1 → 0.22.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 0f361026cd2922a2d36846a817eee855bf0c000156ed6c756bca29d2e42d67a2
4
- data.tar.gz: 016fa40aa2546824cacbc32353263cbfc9427f0ceabb7e703f99854914bb9a2e
3
+ metadata.gz: 475798da3815141b5337bc7803eca26978bbc98c36a2be4d681bc63f778f5840
4
+ data.tar.gz: 71841127edccbeea2b30c4bd8a744735933c1fcf8c7d6afa507cd2d361c8b5c8
5
5
  SHA512:
6
- metadata.gz: 7a53a958db7ec8b56236018505370b9908ae81a9afc9d7c8ff0b16d83971539c1ad729b5ab350eb49ae9b90ada43a8912ed2404a37eef97a4d34dad90b1d3e9f
7
- data.tar.gz: 2f2b3d48625c7120464179bc7759c01ba7de85cb0d54720665eaf1e4822f24c1870474ebc24a47cff123e44a8626b0e0fac6a7e81216c057286071770ea5ba79
6
+ metadata.gz: 8c90eaffa3847e3cc4f31c58e3d74f66a86e2cf0bd1c6e5aa386f9519de3984ac1c605187119e0ec01585c82cf8c06e4f4aa2f19c7b40883b784e834e2e801d8
7
+ data.tar.gz: 336b1afcc35e52e1c13ced74f527d54c994ff66509cf9b8b2f81dce62692078964a453df790a5bcaf8ea2bd156277d719a33958de2f57aa64d97beeafdc48d01
@@ -0,0 +1,23 @@
1
+ name: build
2
+
3
+ on: [push]
4
+
5
+ jobs:
6
+ build:
7
+ runs-on: ubuntu-latest
8
+ strategy:
9
+ matrix:
10
+ ruby: [ '2.5', '2.6', '2.7' ]
11
+ steps:
12
+ - uses: actions/checkout@v2
13
+ - name: Install BLAS and LAPACK
14
+ run: sudo apt-get install -y libopenblas-dev liblapacke-dev
15
+ - name: Set up Ruby ${{ matrix.ruby }}
16
+ uses: actions/setup-ruby@v1
17
+ with:
18
+ ruby-version: ${{ matrix.ruby }}
19
+ - name: Build and test with Rake
20
+ run: |
21
+ gem install bundler
22
+ bundle install --jobs 4 --retry 3
23
+ bundle exec rake
@@ -20,6 +20,9 @@ Layout/LineLength:
20
20
  Max: 145
21
21
  IgnoredPatterns: ['(\A|\s)#']
22
22
 
23
+ Lint/ConstantDefinitionInBlock:
24
+ Enabled: false
25
+
23
26
  Lint/MissingSuper:
24
27
  Enabled: false
25
28
 
@@ -70,6 +73,9 @@ Style/StringConcatenation:
70
73
  RSpec/MultipleExpectations:
71
74
  Enabled: false
72
75
 
76
+ RSpec/MultipleMemoizedHelpers:
77
+ Max: 25
78
+
73
79
  RSpec/NestedGroups:
74
80
  Max: 4
75
81
 
@@ -81,3 +87,6 @@ RSpec/InstanceVariable:
81
87
 
82
88
  RSpec/LeakyConstantDeclaration:
83
89
  Enabled: false
90
+
91
+ Performance/Sum:
92
+ Enabled: false
@@ -1,3 +1,27 @@
1
+ # 0.22.1
2
+ - Add transfomer class for MLKR, that implements Metric Learning for Kernel Regression.
3
+ - Refactor NeighbourhoodComponentAnalysis.
4
+ - Update API documentation.
5
+
6
+ # 0.22.0
7
+ ## Breaking change
8
+ - Add lbfgsb.rb gem to runtime dependencies. Rumale uses lbfgsb gem for optimization.
9
+ This eliminates the need to require the mopti gem when using [NeighbourhoodComponentAnalysis](https://yoshoku.github.io/rumale/doc/Rumale/MetricLearning/NeighbourhoodComponentAnalysis.html).
10
+ - Add lbfgs solver to [LogisticRegression](https://yoshoku.github.io/rumale/doc/Rumale/LinearModel/LogisticRegression.html) and make it the default solver.
11
+
12
+ # 0.21.0
13
+ ## Breaking change
14
+ - Change the default value of max_iter argument on LinearModel estimators to 1000.
15
+
16
+ # 0.20.3
17
+ - Fix to use automatic solver of PCA in NeighbourhoodComponentAnalysis.
18
+ - Refactor some codes with Rubocop.
19
+ - Update README.
20
+
21
+ # 0.20.2
22
+ - Add cross-validator class for time-series data.
23
+ - [TimeSeriesSplit](https://yoshoku.github.io/rumale/doc/Rumale/ModelSelection/TimeSeriesSplit.html)
24
+
1
25
  # 0.20.1
2
26
  - Add cross-validator classes that split data according group labels.
3
27
  - [GroupKFold](https://yoshoku.github.io/rumale/doc/Rumale/ModelSelection/GroupKFold.html)
data/Gemfile CHANGED
@@ -3,11 +3,13 @@ source 'https://rubygems.org'
3
3
  # Specify your gem's dependencies in rumale.gemspec
4
4
  gemspec
5
5
 
6
- gem 'coveralls', '~> 0.8'
7
6
  gem 'mmh3', '>= 1.0'
8
- gem 'mopti', '>= 0.1.0'
9
7
  gem 'numo-linalg', '>= 0.1.4'
10
8
  gem 'parallel', '>= 1.17.0'
11
9
  gem 'rake', '~> 12.0'
12
10
  gem 'rake-compiler', '~> 1.0'
13
11
  gem 'rspec', '~> 3.0'
12
+ gem 'rubocop', '~> 0.91'
13
+ gem 'rubocop-performance', '~> 1.8'
14
+ gem 'rubocop-rspec', '~> 1.43'
15
+ gem 'simplecov', '~> 0.19'
data/README.md CHANGED
@@ -2,8 +2,7 @@
2
2
 
3
3
  ![Rumale](https://dl.dropboxusercontent.com/s/joxruk2720ur66o/rumale_header_400.png)
4
4
 
5
- [![Build Status](https://travis-ci.org/yoshoku/rumale.svg?branch=master)](https://travis-ci.org/yoshoku/rumale)
6
- [![Coverage Status](https://coveralls.io/repos/github/yoshoku/rumale/badge.svg?branch=master)](https://coveralls.io/github/yoshoku/rumale?branch=master)
5
+ [![Build Status](https://github.com/yoshoku/rumale/workflows/build/badge.svg)](https://github.com/yoshoku/rumale/actions?query=workflow%3Abuild)
7
6
  [![Gem Version](https://badge.fury.io/rb/rumale.svg)](https://badge.fury.io/rb/rumale)
8
7
  [![BSD 2-Clause License](https://img.shields.io/badge/License-BSD%202--Clause-orange.svg)](https://github.com/yoshoku/rumale/blob/master/LICENSE.txt)
9
8
  [![Documentation](https://img.shields.io/badge/api-reference-blue.svg)](https://yoshoku.github.io/rumale/doc/)
@@ -114,10 +113,10 @@ require 'rumale'
114
113
  samples, labels = Rumale::Dataset.load_libsvm_file('pendigits')
115
114
 
116
115
  # Define the estimator to be evaluated.
117
- lr = Rumale::LinearModel::LogisticRegression.new(learning_rate: 0.00001, reg_param: 0.0001, random_seed: 1)
116
+ lr = Rumale::LinearModel::LogisticRegression.new
118
117
 
119
118
  # Define the evaluation measure, splitting strategy, and cross validation.
120
- ev = Rumale::EvaluationMeasure::LogLoss.new
119
+ ev = Rumale::EvaluationMeasure::Accuracy.new
121
120
  kf = Rumale::ModelSelection::StratifiedKFold.new(n_splits: 5, shuffle: true, random_seed: 1)
122
121
  cv = Rumale::ModelSelection::CrossValidation.new(estimator: lr, splitter: kf, evaluator: ev)
123
122
 
@@ -125,15 +124,15 @@ cv = Rumale::ModelSelection::CrossValidation.new(estimator: lr, splitter: kf, ev
125
124
  report = cv.perform(samples, labels)
126
125
 
127
126
  # Output result.
128
- mean_logloss = report[:test_score].inject(:+) / kf.n_splits
129
- puts("5-CV mean log-loss: %.3f" % mean_logloss)
127
+ mean_accuracy = report[:test_score].sum / kf.n_splits
128
+ puts "5-CV mean accuracy: %.1f%%" % (100.0 * mean_accuracy)
130
129
  ```
131
130
 
132
131
  Execution of the above scripts result in the following.
133
132
 
134
133
  ```bash
135
134
  $ ruby cross_validation.rb
136
- 5-CV mean log-loss: 0.355
135
+ 5-CV mean accuracy: 95.4%
137
136
  ```
138
137
 
139
138
  ### Example 3. Pipeline
@@ -144,10 +143,10 @@ require 'rumale'
144
143
  # Load dataset.
145
144
  samples, labels = Rumale::Dataset.load_libsvm_file('pendigits')
146
145
 
147
- # Construct pipeline with kernel approximation and SVC.
148
- rbf = Rumale::KernelApproximation::RBF.new(gamma: 0.0001, n_components: 800, random_seed: 1)
149
- svc = Rumale::LinearModel::SVC.new(reg_param: 0.0001, random_seed: 1)
150
- pipeline = Rumale::Pipeline::Pipeline.new(steps: { trns: rbf, clsf: svc })
146
+ # Construct pipeline with kernel approximation and LogisticRegression.
147
+ rbf = Rumale::KernelApproximation::RBF.new(gamma: 1e-4, n_components: 800, random_seed: 1)
148
+ lr = Rumale::LinearModel::LogisticRegression.new(reg_param: 1e-3)
149
+ pipeline = Rumale::Pipeline::Pipeline.new(steps: { trns: rbf, clsf: lr })
151
150
 
152
151
  # Define the splitting strategy and cross validation.
153
152
  kf = Rumale::ModelSelection::StratifiedKFold.new(n_splits: 5, shuffle: true, random_seed: 1)
@@ -157,7 +156,7 @@ cv = Rumale::ModelSelection::CrossValidation.new(estimator: pipeline, splitter:
157
156
  report = cv.perform(samples, labels)
158
157
 
159
158
  # Output result.
160
- mean_accuracy = report[:test_score].inject(:+) / kf.n_splits
159
+ mean_accuracy = report[:test_score].sum / kf.n_splits
161
160
  puts("5-CV mean accuracy: %.1f %%" % (mean_accuracy * 100.0))
162
161
  ```
163
162
 
@@ -228,6 +227,10 @@ When -1 is given to n_jobs parameter, all processors are used.
228
227
  estimator = Rumale::Ensemble::RandomForestClassifier.new(n_jobs: -1, random_seed: 1)
229
228
  ```
230
229
 
230
+ ## Novelties
231
+
232
+ * [Rumale SHOP](https://suzuri.jp/yoshoku)
233
+
231
234
  ## Contributing
232
235
 
233
236
  Bug reports and pull requests are welcome on GitHub at https://github.com/yoshoku/rumale.
@@ -77,6 +77,7 @@ require 'rumale/manifold/tsne'
77
77
  require 'rumale/manifold/mds'
78
78
  require 'rumale/metric_learning/fisher_discriminant_analysis'
79
79
  require 'rumale/metric_learning/neighbourhood_component_analysis'
80
+ require 'rumale/metric_learning/mlkr'
80
81
  require 'rumale/neural_network/adam'
81
82
  require 'rumale/neural_network/base_mlp'
82
83
  require 'rumale/neural_network/mlp_regressor'
@@ -103,6 +104,7 @@ require 'rumale/model_selection/stratified_k_fold'
103
104
  require 'rumale/model_selection/shuffle_split'
104
105
  require 'rumale/model_selection/group_shuffle_split'
105
106
  require 'rumale/model_selection/stratified_shuffle_split'
107
+ require 'rumale/model_selection/time_series_split'
106
108
  require 'rumale/model_selection/cross_validation'
107
109
  require 'rumale/model_selection/grid_search_cv'
108
110
  require 'rumale/model_selection/function'
@@ -51,7 +51,7 @@ module Rumale
51
51
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be used for cluster analysis.
52
52
  # If the metric is 'precomputed', x must be a square distance matrix (shape: [n_samples, n_samples]).
53
53
  # @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
54
- def fit_predict(x)
54
+ def fit_predict(x) # rubocop:disable Lint/UselessMethodDefinition
55
55
  super
56
56
  end
57
57
 
@@ -75,9 +75,12 @@ module Rumale
75
75
  false_pos, true_pos, thresholds = binary_roc_curve(y_true, y_score, pos_label)
76
76
 
77
77
  if true_pos.size.zero? || false_pos[0] != 0 || true_pos[0] != 0
78
+ # NOTE: Numo::NArray#insert is not a destructive method.
79
+ # rubocop:disable Style/RedundantSelfAssignment
78
80
  true_pos = true_pos.insert(0, 0)
79
81
  false_pos = false_pos.insert(0, 0)
80
82
  thresholds = thresholds.insert(0, thresholds[0] + 1)
83
+ # rubocop:enable Style/RedundantSelfAssignment
81
84
  end
82
85
 
83
86
  tpr = true_pos / true_pos[-1].to_f
@@ -11,9 +11,10 @@ module Rumale
11
11
  # with stochastic gradient descent (SGD) optimization.
12
12
  # For multiclass classification problem, it uses one-vs-the-rest strategy.
13
13
  #
14
- # Rumale::SVM provides kernel support vector classifier based on LIBSVM.
15
- # If you prefer execution speed, you should use Rumale::SVM::SVC.
16
- # https://github.com/yoshoku/rumale-svm
14
+ # @note
15
+ # Rumale::SVM provides kernel support vector classifier based on LIBSVM.
16
+ # If you prefer execution speed, you should use Rumale::SVM::SVC.
17
+ # https://github.com/yoshoku/rumale-svm
17
18
  #
18
19
  # @example
19
20
  # training_kernel_matrix = Rumale::PairwiseMetric::rbf_kernel(training_samples)
@@ -171,7 +171,7 @@ module Rumale
171
171
  @params[:fit_bias] = true
172
172
  @params[:reg_param] = 0.0
173
173
  @params[:l1_ratio] = 0.0
174
- @params[:max_iter] = 200
174
+ @params[:max_iter] = 1000
175
175
  @params[:batch_size] = 50
176
176
  @params[:tol] = 0.0001
177
177
  @params[:verbose] = false
@@ -10,7 +10,7 @@ module Rumale
10
10
  #
11
11
  # @example
12
12
  # estimator =
13
- # Rumale::LinearModel::ElasticNet.new(reg_param: 0.1, l1_ratio: 0.5, max_iter: 200, batch_size: 50, random_seed: 1)
13
+ # Rumale::LinearModel::ElasticNet.new(reg_param: 0.1, l1_ratio: 0.5, max_iter: 1000, batch_size: 50, random_seed: 1)
14
14
  # estimator.fit(training_samples, traininig_values)
15
15
  # results = estimator.predict(testing_samples)
16
16
  #
@@ -59,7 +59,7 @@ module Rumale
59
59
  # @param random_seed [Integer] The seed value using to initialize the random generator.
60
60
  def initialize(learning_rate: 0.01, decay: nil, momentum: 0.9,
61
61
  reg_param: 1.0, l1_ratio: 0.5, fit_bias: true, bias_scale: 1.0,
62
- max_iter: 200, batch_size: 50, tol: 1e-4,
62
+ max_iter: 1000, batch_size: 50, tol: 1e-4,
63
63
  n_jobs: nil, verbose: false, random_seed: nil)
64
64
  check_params_numeric(learning_rate: learning_rate, momentum: momentum,
65
65
  reg_param: reg_param, l1_ratio: l1_ratio, bias_scale: bias_scale,
@@ -10,7 +10,7 @@ module Rumale
10
10
  #
11
11
  # @example
12
12
  # estimator =
13
- # Rumale::LinearModel::Lasso.new(reg_param: 0.1, max_iter: 500, batch_size: 20, random_seed: 1)
13
+ # Rumale::LinearModel::Lasso.new(reg_param: 0.1, max_iter: 1000, batch_size: 20, random_seed: 1)
14
14
  # estimator.fit(training_samples, traininig_values)
15
15
  # results = estimator.predict(testing_samples)
16
16
  #
@@ -55,7 +55,7 @@ module Rumale
55
55
  # @param random_seed [Integer] The seed value using to initialize the random generator.
56
56
  def initialize(learning_rate: 0.01, decay: nil, momentum: 0.9,
57
57
  reg_param: 1.0, fit_bias: true, bias_scale: 1.0,
58
- max_iter: 200, batch_size: 50, tol: 1e-4,
58
+ max_iter: 1000, batch_size: 50, tol: 1e-4,
59
59
  n_jobs: nil, verbose: false, random_seed: nil)
60
60
  check_params_numeric(learning_rate: learning_rate, momentum: momentum,
61
61
  reg_param: reg_param, bias_scale: bias_scale,
@@ -10,7 +10,7 @@ module Rumale
10
10
  #
11
11
  # @example
12
12
  # estimator =
13
- # Rumale::LinearModel::LinearRegression.new(max_iter: 500, batch_size: 20, random_seed: 1)
13
+ # Rumale::LinearModel::LinearRegression.new(max_iter: 1000, batch_size: 20, random_seed: 1)
14
14
  # estimator.fit(training_samples, traininig_values)
15
15
  # results = estimator.predict(testing_samples)
16
16
  #
@@ -68,7 +68,7 @@ module Rumale
68
68
  # If solver = 'svd', this parameter is ignored.
69
69
  # @param random_seed [Integer] The seed value using to initialize the random generator.
70
70
  def initialize(learning_rate: 0.01, decay: nil, momentum: 0.9,
71
- fit_bias: true, bias_scale: 1.0, max_iter: 200, batch_size: 50, tol: 1e-4,
71
+ fit_bias: true, bias_scale: 1.0, max_iter: 1000, batch_size: 50, tol: 1e-4,
72
72
  solver: 'auto',
73
73
  n_jobs: nil, verbose: false, random_seed: nil)
74
74
  check_params_numeric(learning_rate: learning_rate, momentum: momentum,
@@ -1,21 +1,24 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'rumale/linear_model/base_sgd'
3
+ require 'lbfgsb'
4
4
  require 'rumale/base/classifier'
5
+ require 'rumale/linear_model/base_sgd'
6
+ require 'rumale/preprocessing/label_binarizer'
5
7
 
6
8
  module Rumale
7
9
  module LinearModel
8
- # LogisticRegression is a class that implements Logistic Regression
9
- # with stochastic gradient descent optimization.
10
- # For multiclass classification problem, it uses one-vs-the-rest strategy.
10
+ # LogisticRegression is a class that implements Logistic Regression.
11
+ # In multiclass classification problem, it uses one-vs-the-rest strategy for the sgd solver
12
+ # and multinomial logistic regression for the lbfgs solver.
11
13
  #
12
- # Rumale::SVM provides Logistic Regression based on LIBLINEAR.
13
- # If you prefer execution speed, you should use Rumale::SVM::LogisticRegression.
14
- # https://github.com/yoshoku/rumale-svm
14
+ # @note
15
+ # Rumale::SVM provides Logistic Regression based on LIBLINEAR.
16
+ # If you prefer execution speed, you should use Rumale::SVM::LogisticRegression.
17
+ # https://github.com/yoshoku/rumale-svm
15
18
  #
16
19
  # @example
17
20
  # estimator =
18
- # Rumale::LinearModel::LogisticRegression.new(reg_param: 1.0, max_iter: 200, batch_size: 50, random_seed: 1)
21
+ # Rumale::LinearModel::LogisticRegression.new(reg_param: 1.0, random_seed: 1)
19
22
  # estimator.fit(training_samples, traininig_labels)
20
23
  # results = estimator.predict(testing_samples)
21
24
  #
@@ -42,19 +45,24 @@ module Rumale
42
45
  # @return [Random]
43
46
  attr_reader :rng
44
47
 
45
- # Create a new classifier with Logisitc Regression by the SGD optimization.
48
+ # Create a new classifier with Logisitc Regression.
46
49
  #
47
50
  # @param learning_rate [Float] The initial value of learning rate.
48
51
  # The learning rate decreases as the iteration proceeds according to the equation: learning_rate / (1 + decay * t).
52
+ # If solver = 'lbfgs', this parameter is ignored.
49
53
  # @param decay [Float] The smoothing parameter for decreasing learning rate as the iteration proceeds.
50
54
  # If nil is given, the decay sets to 'reg_param * learning_rate'.
55
+ # If solver = 'lbfgs', this parameter is ignored.
51
56
  # @param momentum [Float] The momentum factor.
57
+ # If solver = 'lbfgs', this parameter is ignored.
52
58
  # @param penalty [String] The regularization type to be used ('l1', 'l2', and 'elasticnet').
59
+ # If solver = 'lbfgs', only 'l2' can be selected for this parameter.
53
60
  # @param l1_ratio [Float] The elastic-net type regularization mixing parameter.
54
61
  # If penalty set to 'l2' or 'l1', this parameter is ignored.
55
62
  # If l1_ratio = 1, the regularization is similar to Lasso.
56
63
  # If l1_ratio = 0, the regularization is similar to Ridge.
57
64
  # If 0 < l1_ratio < 1, the regularization is a combination of L1 and L2.
65
+ # If solver = 'lbfgs', this parameter is ignored.
58
66
  # @param reg_param [Float] The regularization parameter.
59
67
  # @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
60
68
  # @param bias_scale [Float] The scale of the bias term.
@@ -62,28 +70,38 @@ module Rumale
62
70
  # @param max_iter [Integer] The maximum number of epochs that indicates
63
71
  # how many times the whole data is given to the training process.
64
72
  # @param batch_size [Integer] The size of the mini batches.
73
+ # If solver = 'lbfgs', this parameter is ignored.
65
74
  # @param tol [Float] The tolerance of loss for terminating optimization.
75
+ # If solver = 'lbfgs', this value is given as tol / Lbfgsb::DBL_EPSILON to the factr argument of Lbfgsb.minimize method.
76
+ # @param solver [String] The algorithm for optimization. ('lbfgs' or 'sgd').
77
+ # 'lbfgs' uses the L-BFGS with lbfgs.rb gem.
78
+ # 'sgd' uses the stochastic gradient descent optimization.
66
79
  # @param n_jobs [Integer] The number of jobs for running the fit and predict methods in parallel.
67
80
  # If nil is given, the methods do not execute in parallel.
68
81
  # If zero or less is given, it becomes equal to the number of processors.
69
- # This parameter is ignored if the Parallel gem is not loaded.
82
+ # This parameter is ignored if the Parallel gem is not loaded or the solver is 'lbfgs'.
70
83
  # @param verbose [Boolean] The flag indicating whether to output loss during iteration.
84
+ # If solver = 'lbfgs' and true is given, 'iterate.dat' file is generated by lbfgsb.rb.
71
85
  # @param random_seed [Integer] The seed value using to initialize the random generator.
72
86
  def initialize(learning_rate: 0.01, decay: nil, momentum: 0.9,
73
87
  penalty: 'l2', reg_param: 1.0, l1_ratio: 0.5,
74
88
  fit_bias: true, bias_scale: 1.0,
75
- max_iter: 200, batch_size: 50, tol: 1e-4,
89
+ max_iter: 1000, batch_size: 50, tol: 1e-4,
90
+ solver: 'lbfgs',
76
91
  n_jobs: nil, verbose: false, random_seed: nil)
77
92
  check_params_numeric(learning_rate: learning_rate, momentum: momentum,
78
93
  reg_param: reg_param, l1_ratio: l1_ratio, bias_scale: bias_scale,
79
94
  max_iter: max_iter, batch_size: batch_size, tol: tol)
80
95
  check_params_boolean(fit_bias: fit_bias, verbose: verbose)
81
- check_params_string(penalty: penalty)
96
+ check_params_string(solver: solver, penalty: penalty)
82
97
  check_params_numeric_or_nil(decay: decay, n_jobs: n_jobs, random_seed: random_seed)
83
98
  check_params_positive(learning_rate: learning_rate, reg_param: reg_param,
84
99
  bias_scale: bias_scale, max_iter: max_iter, batch_size: batch_size)
100
+ raise ArgumentError, "The 'lbfgs' solver supports only 'l2' penalties." if solver == 'lbfgs' && penalty != 'l2'
101
+
85
102
  super()
86
103
  @params.merge!(method(:initialize).parameters.map { |_t, arg| [arg, binding.local_variable_get(arg)] }.to_h)
104
+ @params[:solver] = solver == 'sgd' ? 'sgd' : 'lbfgs'
87
105
  @params[:decay] ||= @params[:reg_param] * @params[:learning_rate]
88
106
  @params[:random_seed] ||= srand
89
107
  @rng = Random.new(@params[:random_seed])
@@ -105,30 +123,10 @@ module Rumale
105
123
  check_sample_label_size(x, y)
106
124
 
107
125
  @classes = Numo::Int32[*y.to_a.uniq.sort]
108
-
109
- if multiclass_problem?
110
- n_classes = @classes.size
111
- n_features = x.shape[1]
112
- @weight_vec = Numo::DFloat.zeros(n_classes, n_features)
113
- @bias_term = Numo::DFloat.zeros(n_classes)
114
- if enable_parallel?
115
- # :nocov:
116
- models = parallel_map(n_classes) do |n|
117
- bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
118
- partial_fit(x, bin_y)
119
- end
120
- # :nocov:
121
- n_classes.times { |n| @weight_vec[n, true], @bias_term[n] = models[n] }
122
- else
123
- n_classes.times do |n|
124
- bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
125
- @weight_vec[n, true], @bias_term[n] = partial_fit(x, bin_y)
126
- end
127
- end
126
+ if @params[:solver] == 'sgd'
127
+ fit_sgd(x, y)
128
128
  else
129
- negative_label = @classes[0]
130
- bin_y = Numo::Int32.cast(y.ne(negative_label)) * 2 - 1
131
- @weight_vec, @bias_term = partial_fit(x, bin_y)
129
+ fit_lbfgs(x, y)
132
130
  end
133
131
 
134
132
  self
@@ -182,6 +180,96 @@ module Rumale
182
180
  def multiclass_problem?
183
181
  @classes.size > 2
184
182
  end
183
+
184
+ def fit_lbfgs(base_x, base_y)
185
+ if multiclass_problem?
186
+ fnc = proc do |w, x, y, a|
187
+ n_features = x.shape[1]
188
+ n_classes = y.shape[1]
189
+ z = x.dot(w.reshape(n_classes, n_features).transpose)
190
+ # logsumexp and softmax
191
+ z_max = z.max(-1).expand_dims(-1).dup
192
+ z_max[~z_max.isfinite] = 0.0
193
+ lgsexp = Numo::NMath.log(Numo::NMath.exp(z - z_max).sum(-1)).expand_dims(-1) + z_max
194
+ t = z - lgsexp
195
+ sftmax = Numo::NMath.exp(t)
196
+ # loss and gradient
197
+ loss = -(y * t).sum + 0.5 * a * w.dot(w)
198
+ grad = (sftmax - y).transpose.dot(x).flatten.dup + a * w
199
+ [loss, grad]
200
+ end
201
+
202
+ base_x = expand_feature(base_x) if fit_bias?
203
+ encoder = Rumale::Preprocessing::LabelBinarizer.new
204
+ onehot_y = encoder.fit_transform(base_y)
205
+ n_classes = @classes.size
206
+ n_features = base_x.shape[1]
207
+ w_init = Numo::DFloat.zeros(n_classes * n_features)
208
+
209
+ verbose = @params[:verbose] ? 1 : -1
210
+ res = Lbfgsb.minimize(
211
+ fnc: fnc, jcb: true, x_init: w_init, args: [base_x, onehot_y, @params[:reg_param]],
212
+ maxiter: @params[:max_iter], factr: @params[:tol] / Lbfgsb::DBL_EPSILON, verbose: verbose
213
+ )
214
+
215
+ if fit_bias?
216
+ weight = res[:x].reshape(n_classes, n_features)
217
+ @weight_vec = weight[true, 0...-1].dup
218
+ @bias_term = weight[true, -1].dup
219
+ else
220
+ @weight_vec = res[:x].reshape(n_classes, n_features)
221
+ @bias_term = Numo::DFloat.zeros(n_classes)
222
+ end
223
+ else
224
+ fnc = proc do |w, x, y, a|
225
+ z = 1 + Numo::NMath.exp(-y * x.dot(w))
226
+ loss = Numo::NMath.log(z).sum + 0.5 * a * w.dot(w)
227
+ grad = (y / z - y).dot(x) + a * w
228
+ [loss, grad]
229
+ end
230
+
231
+ base_x = expand_feature(base_x) if fit_bias?
232
+ negative_label = @classes[0]
233
+ bin_y = Numo::Int32.cast(base_y.ne(negative_label)) * 2 - 1
234
+ n_features = base_x.shape[1]
235
+ w_init = Numo::DFloat.zeros(n_features)
236
+
237
+ verbose = @params[:verbose] ? 1 : -1
238
+ res = Lbfgsb.minimize(
239
+ fnc: fnc, jcb: true, x_init: w_init, args: [base_x, bin_y, @params[:reg_param]],
240
+ maxiter: @params[:max_iter], factr: @params[:tol] / Lbfgsb::DBL_EPSILON, verbose: verbose
241
+ )
242
+
243
+ @weight_vec, @bias_term = split_weight(res[:x])
244
+ end
245
+ end
246
+
247
+ def fit_sgd(x, y)
248
+ if multiclass_problem?
249
+ n_classes = @classes.size
250
+ n_features = x.shape[1]
251
+ @weight_vec = Numo::DFloat.zeros(n_classes, n_features)
252
+ @bias_term = Numo::DFloat.zeros(n_classes)
253
+ if enable_parallel?
254
+ # :nocov:
255
+ models = parallel_map(n_classes) do |n|
256
+ bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
257
+ partial_fit(x, bin_y)
258
+ end
259
+ # :nocov:
260
+ n_classes.times { |n| @weight_vec[n, true], @bias_term[n] = models[n] }
261
+ else
262
+ n_classes.times do |n|
263
+ bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
264
+ @weight_vec[n, true], @bias_term[n] = partial_fit(x, bin_y)
265
+ end
266
+ end
267
+ else
268
+ negative_label = @classes[0]
269
+ bin_y = Numo::Int32.cast(y.ne(negative_label)) * 2 - 1
270
+ @weight_vec, @bias_term = partial_fit(x, bin_y)
271
+ end
272
+ end
185
273
  end
186
274
  end
187
275
  end
@@ -10,7 +10,7 @@ module Rumale
10
10
  #
11
11
  # @example
12
12
  # estimator =
13
- # Rumale::LinearModel::Ridge.new(reg_param: 0.1, max_iter: 500, batch_size: 20, random_seed: 1)
13
+ # Rumale::LinearModel::Ridge.new(reg_param: 0.1, max_iter: 1000, batch_size: 20, random_seed: 1)
14
14
  # estimator.fit(training_samples, traininig_values)
15
15
  # results = estimator.predict(testing_samples)
16
16
  #
@@ -70,7 +70,7 @@ module Rumale
70
70
  # @param random_seed [Integer] The seed value using to initialize the random generator.
71
71
  def initialize(learning_rate: 0.01, decay: nil, momentum: 0.9,
72
72
  reg_param: 1.0, fit_bias: true, bias_scale: 1.0,
73
- max_iter: 200, batch_size: 50, tol: 1e-4,
73
+ max_iter: 1000, batch_size: 50, tol: 1e-4,
74
74
  solver: 'auto',
75
75
  n_jobs: nil, verbose: false, random_seed: nil)
76
76
  check_params_numeric(learning_rate: learning_rate, momentum: momentum,
@@ -11,13 +11,14 @@ module Rumale
11
11
  # with stochastic gradient descent optimization.
12
12
  # For multiclass classification problem, it uses one-vs-the-rest strategy.
13
13
  #
14
- # Rumale::SVM provides linear support vector classifier based on LIBLINEAR.
15
- # If you prefer execution speed, you should use Rumale::SVM::LinearSVC.
16
- # https://github.com/yoshoku/rumale-svm
14
+ # @note
15
+ # Rumale::SVM provides linear support vector classifier based on LIBLINEAR.
16
+ # If you prefer execution speed, you should use Rumale::SVM::LinearSVC.
17
+ # https://github.com/yoshoku/rumale-svm
17
18
  #
18
19
  # @example
19
20
  # estimator =
20
- # Rumale::LinearModel::SVC.new(reg_param: 1.0, max_iter: 200, batch_size: 50, random_seed: 1)
21
+ # Rumale::LinearModel::SVC.new(reg_param: 1.0, max_iter: 1000, batch_size: 50, random_seed: 1)
21
22
  # estimator.fit(training_samples, traininig_labels)
22
23
  # results = estimator.predict(testing_samples)
23
24
  #
@@ -74,7 +75,7 @@ module Rumale
74
75
  def initialize(learning_rate: 0.01, decay: nil, momentum: 0.9,
75
76
  penalty: 'l2', reg_param: 1.0, l1_ratio: 0.5,
76
77
  fit_bias: true, bias_scale: 1.0,
77
- max_iter: 200, batch_size: 50, tol: 1e-4,
78
+ max_iter: 1000, batch_size: 50, tol: 1e-4,
78
79
  probability: false,
79
80
  n_jobs: nil, verbose: false, random_seed: nil)
80
81
  check_params_numeric(learning_rate: learning_rate, momentum: momentum,
@@ -8,13 +8,14 @@ module Rumale
8
8
  # SVR is a class that implements Support Vector Regressor
9
9
  # with stochastic gradient descent optimization.
10
10
  #
11
- # Rumale::SVM provides linear and kernel support vector regressor based on LIBLINEAR and LIBSVM.
12
- # If you prefer execution speed, you should use Rumale::SVM::LinearSVR.
13
- # https://github.com/yoshoku/rumale-svm
11
+ # @note
12
+ # Rumale::SVM provides linear and kernel support vector regressor based on LIBLINEAR and LIBSVM.
13
+ # If you prefer execution speed, you should use Rumale::SVM::LinearSVR.
14
+ # https://github.com/yoshoku/rumale-svm
14
15
  #
15
16
  # @example
16
17
  # estimator =
17
- # Rumale::LinearModel::SVR.new(reg_param: 1.0, epsilon: 0.1, max_iter: 200, batch_size: 50, random_seed: 1)
18
+ # Rumale::LinearModel::SVR.new(reg_param: 1.0, epsilon: 0.1, max_iter: 1000, batch_size: 50, random_seed: 1)
18
19
  # estimator.fit(training_samples, traininig_target_values)
19
20
  # results = estimator.predict(testing_samples)
20
21
  #
@@ -68,7 +69,7 @@ module Rumale
68
69
  penalty: 'l2', reg_param: 1.0, l1_ratio: 0.5,
69
70
  fit_bias: true, bias_scale: 1.0,
70
71
  epsilon: 0.1,
71
- max_iter: 200, batch_size: 50, tol: 1e-4,
72
+ max_iter: 1000, batch_size: 50, tol: 1e-4,
72
73
  n_jobs: nil, verbose: false, random_seed: nil)
73
74
  check_params_numeric(learning_rate: learning_rate, momentum: momentum,
74
75
  reg_param: reg_param, bias_scale: bias_scale, epsilon: epsilon,
@@ -0,0 +1,161 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/base_estimator'
4
+ require 'rumale/base/transformer'
5
+ require 'rumale/decomposition/pca'
6
+ require 'rumale/pairwise_metric'
7
+ require 'rumale/utils'
8
+ require 'lbfgsb'
9
+
10
+ module Rumale
11
+ module MetricLearning
12
+ # MLKR is a class that implements Metric Learning for Kernel Regression.
13
+ #
14
+ # @example
15
+ # transformer = Rumale::MetricLearning::MLKR.new
16
+ # transformer.fit(training_samples, traininig_target_values)
17
+ # low_samples = transformer.transform(testing_samples)
18
+ #
19
+ # *Reference*
20
+ # - Weinberger, K. Q. and Tesauro, G., "Metric Learning for Kernel Regression," Proc. AISTATS'07, pp. 612--629, 2007.
21
+ class MLKR
22
+ include Base::BaseEstimator
23
+ include Base::Transformer
24
+
25
+ # Returns the metric components.
26
+ # @return [Numo::DFloat] (shape: [n_components, n_features])
27
+ attr_reader :components
28
+
29
+ # Return the number of iterations run for optimization
30
+ # @return [Integer]
31
+ attr_reader :n_iter
32
+
33
+ # Return the random generator.
34
+ # @return [Random]
35
+ attr_reader :rng
36
+
37
+ # Create a new transformer with MLKR.
38
+ #
39
+ # @param n_components [Integer] The number of components.
40
+ # @param init [String] The initialization method for components ('random' or 'pca').
41
+ # @param max_iter [Integer] The maximum number of iterations.
42
+ # @param tol [Float] The tolerance of termination criterion.
43
+ # This value is given as tol / Lbfgsb::DBL_EPSILON to the factr argument of Lbfgsb.minimize method.
44
+ # @param verbose [Boolean] The flag indicating whether to output loss during iteration.
45
+ # If true is given, 'iterate.dat' file is generated by lbfgsb.rb.
46
+ # @param random_seed [Integer] The seed value using to initialize the random generator.
47
+ def initialize(n_components: nil, init: 'random', max_iter: 100, tol: 1e-6, verbose: false, random_seed: nil)
48
+ check_params_numeric_or_nil(n_components: n_components, random_seed: random_seed)
49
+ check_params_numeric(max_iter: max_iter, tol: tol)
50
+ check_params_string(init: init)
51
+ check_params_boolean(verbose: verbose)
52
+ @params = {}
53
+ @params[:n_components] = n_components
54
+ @params[:init] = init
55
+ @params[:max_iter] = max_iter
56
+ @params[:tol] = tol
57
+ @params[:verbose] = verbose
58
+ @params[:random_seed] = random_seed
59
+ @params[:random_seed] ||= srand
60
+ @components = nil
61
+ @n_iter = nil
62
+ @rng = Random.new(@params[:random_seed])
63
+ end
64
+
65
+ # Fit the model with given training data.
66
+ #
67
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
68
+ # @param y [Numo::DFloat] (shape: [n_samples]) The target values to be used for fitting the model.
69
+ # @return [MLKR] The learned classifier itself.
70
+ def fit(x, y)
71
+ x = check_convert_sample_array(x)
72
+ y = check_convert_tvalue_array(y)
73
+ check_sample_tvalue_size(x, y)
74
+ n_features = x.shape[1]
75
+ n_components = if @params[:n_components].nil?
76
+ n_features
77
+ else
78
+ [n_features, @params[:n_components]].min
79
+ end
80
+ @components, @n_iter = optimize_components(x, y, n_features, n_components)
81
+ @prototypes = x.dot(@components.transpose)
82
+ @values = y
83
+ self
84
+ end
85
+
86
+ # Fit the model with training data, and then transform them with the learned model.
87
+ #
88
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
89
+ # @param y [Numo::DFloat] (shape: [n_samples]) The target values to be used for fitting the model.
90
+ # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data
91
+ def fit_transform(x, y)
92
+ x = check_convert_sample_array(x)
93
+ y = check_convert_tvalue_array(y)
94
+ check_sample_tvalue_size(x, y)
95
+ fit(x, y).transform(x)
96
+ end
97
+
98
+ # Transform the given data with the learned model.
99
+ #
100
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be transformed with the learned model.
101
+ # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data.
102
+ def transform(x)
103
+ x = check_convert_sample_array(x)
104
+ x.dot(@components.transpose)
105
+ end
106
+
107
+ private
108
+
109
+ def init_components(x, n_features, n_components)
110
+ if @params[:init] == 'pca'
111
+ pca = Rumale::Decomposition::PCA.new(n_components: n_components)
112
+ pca.fit(x).components.flatten.dup
113
+ else
114
+ Rumale::Utils.rand_normal([n_features, n_components], @rng.dup).flatten.dup
115
+ end
116
+ end
117
+
118
+ def optimize_components(x, y, n_features, n_components)
119
+ # initialize components.
120
+ comp_init = init_components(x, n_features, n_components)
121
+ # initialize optimization results.
122
+ res = {}
123
+ res[:x] = comp_init
124
+ res[:n_iter] = 0
125
+ # perform optimization.
126
+ verbose = @params[:verbose] ? 1 : -1
127
+ res = Lbfgsb.minimize(
128
+ fnc: method(:mlkr_fnc), jcb: true, x_init: comp_init, args: [x, y],
129
+ maxiter: @params[:max_iter], factr: @params[:tol] / Lbfgsb::DBL_EPSILON, verbose: verbose
130
+ )
131
+ # return the results.
132
+ n_iter = res[:n_iter]
133
+ comps = n_components == 1 ? res[:x].dup : res[:x].reshape(n_components, n_features)
134
+ [comps, n_iter]
135
+ end
136
+
137
+ def mlkr_fnc(w, x, y)
138
+ # initialize some variables.
139
+ n_features = x.shape[1]
140
+ n_components = w.size / n_features
141
+ # projection.
142
+ w = w.reshape(n_components, n_features)
143
+ z = x.dot(w.transpose)
144
+ # predict values.
145
+ kernel_mat = Numo::NMath.exp(-Rumale::PairwiseMetric.squared_error(z))
146
+ kernel_mat[kernel_mat.diag_indices] = 0.0
147
+ norm = kernel_mat.sum(1)
148
+ norm[norm.eq(0)] = 1
149
+ y_pred = kernel_mat.dot(y) / norm
150
+ # calculate loss.
151
+ y_diff = y_pred - y
152
+ loss = (y_diff**2).sum
153
+ # calculate gradient.
154
+ weight_mat = y_diff * y_diff.expand_dims(1) * kernel_mat
155
+ weight_mat = weight_mat.sum(0).diag - weight_mat
156
+ gradient = 8 * z.transpose.dot(weight_mat).dot(x)
157
+ [loss, gradient.flatten.dup]
158
+ end
159
+ end
160
+ end
161
+ end
@@ -2,13 +2,15 @@
2
2
 
3
3
  require 'rumale/base/base_estimator'
4
4
  require 'rumale/base/transformer'
5
+ require 'rumale/utils'
6
+ require 'rumale/pairwise_metric'
7
+ require 'lbfgsb'
5
8
 
6
9
  module Rumale
7
10
  module MetricLearning
8
11
  # NeighbourhoodComponentAnalysis is a class that implements Neighbourhood Component Analysis.
9
12
  #
10
13
  # @example
11
- # require 'mopti'
12
14
  # require 'rumale'
13
15
  #
14
16
  # transformer = Rumale::MetricLearning::NeighbourhoodComponentAnalysis.new
@@ -39,7 +41,9 @@ module Rumale
39
41
  # @param init [String] The initialization method for components ('random' or 'pca').
40
42
  # @param max_iter [Integer] The maximum number of iterations.
41
43
  # @param tol [Float] The tolerance of termination criterion.
44
+ # This value is given as tol / Lbfgsb::DBL_EPSILON to the factr argument of Lbfgsb.minimize method.
42
45
  # @param verbose [Boolean] The flag indicating whether to output loss during iteration.
46
+ # If true is given, 'iterate.dat' file is generated by lbfgsb.rb.
43
47
  # @param random_seed [Integer] The seed value using to initialize the random generator.
44
48
  def initialize(n_components: nil, init: 'random', max_iter: 100, tol: 1e-6, verbose: false, random_seed: nil)
45
49
  check_params_numeric_or_nil(n_components: n_components, random_seed: random_seed)
@@ -65,8 +69,6 @@ module Rumale
65
69
  # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
66
70
  # @return [NeighbourhoodComponentAnalysis] The learned classifier itself.
67
71
  def fit(x, y)
68
- raise 'NeighbourhoodComponentAnalysis#fit requires Mopti but that is not loaded.' unless enable_mopti?
69
-
70
72
  x = check_convert_sample_array(x)
71
73
  y = check_convert_label_array(y)
72
74
  check_sample_label_size(x, y)
@@ -102,17 +104,9 @@ module Rumale
102
104
 
103
105
  private
104
106
 
105
- def enable_mopti?
106
- if defined?(Mopti).nil?
107
- warn('NeighbourhoodComponentAnalysis#fit requires Mopti but that is not loaded. You should intall and load mopti gem in advance.')
108
- return false
109
- end
110
- true
111
- end
112
-
113
107
  def init_components(x, n_features, n_components)
114
108
  if @params[:init] == 'pca'
115
- pca = Rumale::Decomposition::PCA.new(n_components: n_components, solver: 'evd')
109
+ pca = Rumale::Decomposition::PCA.new(n_components: n_components)
116
110
  pca.fit(x).components.flatten.dup
117
111
  else
118
112
  Rumale::Utils.rand_normal([n_features, n_components], @rng.dup).flatten.dup
@@ -127,28 +121,18 @@ module Rumale
127
121
  res[:x] = comp_init
128
122
  res[:n_iter] = 0
129
123
  # perform optimization.
130
- optimizer = Mopti::ScaledConjugateGradient.new(
131
- fnc: method(:nca_loss), jcb: method(:nca_dloss),
132
- x_init: comp_init, args: [x, y],
133
- max_iter: @params[:max_iter], ftol: @params[:tol]
124
+ verbose = @params[:verbose] ? 1 : -1
125
+ res = Lbfgsb.minimize(
126
+ fnc: method(:nca_fnc), jcb: true, x_init: comp_init, args: [x, y],
127
+ maxiter: @params[:max_iter], factr: @params[:tol] / Lbfgsb::DBL_EPSILON, verbose: verbose
134
128
  )
135
- fold = 0.0
136
- dold = 0.0
137
- optimizer.each do |prm|
138
- res = prm
139
- puts "[NeighbourhoodComponentAnalysis] The value of objective function after #{res[:n_iter]} epochs: #{x.shape[0] - res[:fnc]}" if @params[:verbose]
140
- break if (fold - res[:fnc]).abs <= @params[:tol] && (dold - res[:jcb]).abs <= @params[:tol]
141
-
142
- fold = res[:fnc]
143
- dold = res[:jcb]
144
- end
145
129
  # return the results.
146
130
  n_iter = res[:n_iter]
147
131
  comps = n_components == 1 ? res[:x].dup : res[:x].reshape(n_components, n_features)
148
132
  [comps, n_iter]
149
133
  end
150
134
 
151
- def nca_loss(w, x, y)
135
+ def nca_fnc(w, x, y)
152
136
  # initialize some variables.
153
137
  n_samples, n_features = x.shape
154
138
  n_components = w.size / n_features
@@ -157,32 +141,19 @@ module Rumale
157
141
  z = x.dot(w.transpose)
158
142
  # calculate probability matrix.
159
143
  prob_mat = probability_matrix(z)
160
- # calculate loss.
144
+ # calculate loss and gradient.
161
145
  # NOTE:
162
146
  # NCA attempts to maximize its objective function.
163
147
  # For the minization algorithm, the objective function value is subtracted from the maixmum value (n_samples).
164
148
  mask_mat = y.expand_dims(1).eq(y)
165
149
  masked_prob_mat = prob_mat * mask_mat
166
- n_samples - masked_prob_mat.sum
167
- end
168
-
169
- def nca_dloss(w, x, y)
170
- # initialize some variables.
171
- n_features = x.shape[1]
172
- n_components = w.size / n_features
173
- # projection.
174
- w = w.reshape(n_components, n_features)
175
- z = x.dot(w.transpose)
176
- # calculate probability matrix.
177
- prob_mat = probability_matrix(z)
178
- # calculate gradient.
179
- mask_mat = y.expand_dims(1).eq(y)
180
- masked_prob_mat = prob_mat * mask_mat
181
- weighted_prob_mat = masked_prob_mat - prob_mat * masked_prob_mat.sum(1).expand_dims(1)
182
- weighted_prob_mat += weighted_prob_mat.transpose
183
- weighted_prob_mat[weighted_prob_mat.diag_indices] = -weighted_prob_mat.sum(0)
184
- gradient = 2 * z.transpose.dot(weighted_prob_mat).dot(x)
185
- -gradient.flatten.dup
150
+ loss = n_samples - masked_prob_mat.sum
151
+ sum_probs = masked_prob_mat.sum(1)
152
+ weight_mat = (sum_probs.expand_dims(1) * prob_mat - masked_prob_mat)
153
+ weight_mat += weight_mat.transpose
154
+ weight_mat = weight_mat.sum(0).diag - weight_mat
155
+ gradient = -2 * z.transpose.dot(weight_mat).dot(x)
156
+ [loss, gradient.flatten.dup]
186
157
  end
187
158
 
188
159
  def probability_matrix(z)
@@ -0,0 +1,91 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/splitter'
4
+
5
+ module Rumale
6
+ module ModelSelection
7
+ # TimeSeriesSplit is a class that generates the set of data indices for time series cross-validation.
8
+ # It is assumed that the dataset given are already ordered by time information.
9
+ #
10
+ # @example
11
+ # cv = Rumale::ModelSelection::TimeSeriesSplit.new(n_splits: 5)
12
+ # x = Numo::DFloat.new(6, 2).rand
13
+ # cv.split(x, nil).each do |train_ids, test_ids|
14
+ # puts '---'
15
+ # pp train_ids
16
+ # pp test_ids
17
+ # end
18
+ #
19
+ # # ---
20
+ # # [0]
21
+ # # [1]
22
+ # # ---
23
+ # # [0, 1]
24
+ # # [2]
25
+ # # ---
26
+ # # [0, 1, 2]
27
+ # # [3]
28
+ # # ---
29
+ # # [0, 1, 2, 3]
30
+ # # [4]
31
+ # # ---
32
+ # # [0, 1, 2, 3, 4]
33
+ # # [5]
34
+ #
35
+ class TimeSeriesSplit
36
+ include Base::Splitter
37
+
38
+ # Return the number of splits.
39
+ # @return [Integer]
40
+ attr_reader :n_splits
41
+
42
+ # Return the maximum number of training samples in a split.
43
+ # @return [Integer/Nil]
44
+ attr_reader :max_train_size
45
+
46
+ # Create a new data splitter for time series cross-validation.
47
+ #
48
+ # @param n_splits [Integer] The number of splits.
49
+ # @param max_train_size [Integer/Nil] The maximum number of training samples in a split.
50
+ def initialize(n_splits: 5, max_train_size: nil)
51
+ check_params_numeric(n_splits: n_splits)
52
+ check_params_numeric_or_nil(max_train_size: max_train_size)
53
+ @n_splits = n_splits
54
+ @max_train_size = max_train_size
55
+ end
56
+
57
+ # Generate data indices for time series cross-validation.
58
+ #
59
+ # @overload split(x, y) -> Array
60
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features])
61
+ # The dataset to be used to generate data indices for time series cross-validation.
62
+ # It is expected that the data will be ordered by time information.
63
+ # @param y [Numo::Int32] (shape: [n_samples])
64
+ # This argument exists to unify the interface between the K-fold methods, it is not used in the method.
65
+ # @return [Array] The set of data indices for constructing the training and testing dataset in each fold.
66
+ def split(x, _y)
67
+ x = check_convert_sample_array(x)
68
+
69
+ n_samples = x.shape[0]
70
+ unless (@n_splits + 1).between?(2, n_samples)
71
+ raise ArgumentError,
72
+ 'The number of folds (n_splits + 1) must be not less than 2 and not more than the number of samples.'
73
+ end
74
+
75
+ test_size = n_samples / (@n_splits + 1)
76
+ offset = test_size + n_samples % (@n_splits + 1)
77
+
78
+ Array.new(@n_splits) do |n|
79
+ start = offset * (n + 1)
80
+ train_ids = if !@max_train_size.nil? && @max_train_size < test_size
81
+ Array((start - @max_train_size)...start)
82
+ else
83
+ Array(0...start)
84
+ end
85
+ test_ids = Array(start...(start + test_size))
86
+ [train_ids, test_ids]
87
+ end
88
+ end
89
+ end
90
+ end
91
+ end
@@ -140,7 +140,7 @@ module Rumale
140
140
  def validate_steps(steps)
141
141
  steps.keys[0...-1].each do |name|
142
142
  transformer = steps[name]
143
- next if transformer.nil? || %i[fit transform].all? { |m| transformer.class.method_defined?(m) }
143
+ next if transformer.nil? || (transformer.class.method_defined?(:fit) && transformer.class.method_defined?(:transform))
144
144
 
145
145
  raise TypeError,
146
146
  'Class of intermediate step in pipeline should be implemented fit and transform methods: ' \
@@ -75,17 +75,10 @@ module Rumale
75
75
  node = Node.new(depth: depth, impurity: impurity, n_samples: n_samples)
76
76
 
77
77
  # terminate growing.
78
- unless @params[:max_leaf_nodes].nil?
79
- return nil if @n_leaves >= @params[:max_leaf_nodes]
80
- end
81
-
78
+ return nil if !@params[:max_leaf_nodes].nil? && @n_leaves >= @params[:max_leaf_nodes]
82
79
  return nil if n_samples < @params[:min_samples_leaf]
83
80
  return put_leaf(node, y) if n_samples == @params[:min_samples_leaf]
84
-
85
- unless @params[:max_depth].nil?
86
- return put_leaf(node, y) if depth == @params[:max_depth]
87
- end
88
-
81
+ return put_leaf(node, y) if !@params[:max_depth].nil? && depth == @params[:max_depth]
89
82
  return put_leaf(node, y) if stop_growing?(y)
90
83
 
91
84
  # calculate optimal parameters.
@@ -138,7 +138,7 @@ module Rumale
138
138
  nil
139
139
  end
140
140
 
141
- def grow_node(depth, x, y, g, h)
141
+ def grow_node(depth, x, y, g, h) # rubocop:disable Metrics/AbcSize
142
142
  # intialize some variables.
143
143
  sum_g = g.sum
144
144
  sum_h = h.sum
@@ -146,17 +146,10 @@ module Rumale
146
146
  node = Node.new(depth: depth, n_samples: n_samples)
147
147
 
148
148
  # terminate growing.
149
- unless @params[:max_leaf_nodes].nil?
150
- return nil if @n_leaves >= @params[:max_leaf_nodes]
151
- end
152
-
149
+ return nil if !@params[:max_leaf_nodes].nil? && @n_leaves >= @params[:max_leaf_nodes]
153
150
  return nil if n_samples < @params[:min_samples_leaf]
154
151
  return put_leaf(node, sum_g, sum_h) if n_samples == @params[:min_samples_leaf]
155
-
156
- unless @params[:max_depth].nil?
157
- return put_leaf(node, sum_g, sum_h) if depth == @params[:max_depth]
158
- end
159
-
152
+ return put_leaf(node, sum_g, sum_h) if !@params[:max_depth].nil? && depth == @params[:max_depth]
160
153
  return put_leaf(node, sum_g, sum_h) if stop_growing?(y)
161
154
 
162
155
  # calculate optimal parameters.
@@ -3,5 +3,5 @@
3
3
  # Rumale is a machine learning library in Ruby.
4
4
  module Rumale
5
5
  # The version of Rumale you are using.
6
- VERSION = '0.20.1'
6
+ VERSION = '0.22.1'
7
7
  end
@@ -45,4 +45,5 @@ Gem::Specification.new do |spec|
45
45
  }
46
46
 
47
47
  spec.add_runtime_dependency 'numo-narray', '>= 0.9.1'
48
+ spec.add_runtime_dependency 'lbfgsb', '>=0.3.0'
48
49
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rumale
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.20.1
4
+ version: 0.22.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-08-23 00:00:00.000000000 Z
11
+ date: 2020-12-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: numo-narray
@@ -24,6 +24,20 @@ dependencies:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
26
  version: 0.9.1
27
+ - !ruby/object:Gem::Dependency
28
+ name: lbfgsb
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: 0.3.0
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: 0.3.0
27
41
  description: |
28
42
  Rumale is a machine learning library in Ruby.
29
43
  Rumale provides machine learning algorithms with interfaces similar to Scikit-Learn in Python.
@@ -43,7 +57,7 @@ extensions:
43
57
  - ext/rumale/extconf.rb
44
58
  extra_rdoc_files: []
45
59
  files:
46
- - ".coveralls.yml"
60
+ - ".github/workflows/build.yml"
47
61
  - ".gitignore"
48
62
  - ".rspec"
49
63
  - ".rubocop.yml"
@@ -131,6 +145,7 @@ files:
131
145
  - lib/rumale/manifold/mds.rb
132
146
  - lib/rumale/manifold/tsne.rb
133
147
  - lib/rumale/metric_learning/fisher_discriminant_analysis.rb
148
+ - lib/rumale/metric_learning/mlkr.rb
134
149
  - lib/rumale/metric_learning/neighbourhood_component_analysis.rb
135
150
  - lib/rumale/model_selection/cross_validation.rb
136
151
  - lib/rumale/model_selection/function.rb
@@ -141,6 +156,7 @@ files:
141
156
  - lib/rumale/model_selection/shuffle_split.rb
142
157
  - lib/rumale/model_selection/stratified_k_fold.rb
143
158
  - lib/rumale/model_selection/stratified_shuffle_split.rb
159
+ - lib/rumale/model_selection/time_series_split.rb
144
160
  - lib/rumale/multiclass/one_vs_rest_classifier.rb
145
161
  - lib/rumale/naive_bayes/base_naive_bayes.rb
146
162
  - lib/rumale/naive_bayes/bernoulli_nb.rb
@@ -208,7 +224,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
208
224
  - !ruby/object:Gem::Version
209
225
  version: '0'
210
226
  requirements: []
211
- rubygems_version: 3.1.2
227
+ rubygems_version: 3.1.4
212
228
  signing_key:
213
229
  specification_version: 4
214
230
  summary: Rumale is a machine learning library in Ruby. Rumale provides machine learning
@@ -1 +0,0 @@
1
- service_name: travis-ci