rumale 0.17.1 → 0.17.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 54719fad09a9036dbdc1430323272af7f3c3e746aecac9b6199eef70bfe07856
4
- data.tar.gz: b9df7b5ff0cc4feebb053955aea4d022e72263be1288578906b67f2100b97160
3
+ metadata.gz: '09585216da740231269c5675c48a8fa2ee3a8aba3f5c5b7d671429d113aaa4ba'
4
+ data.tar.gz: 66eb473c718f6a03938db19df06694373c781e098d68e2f2c20162865dc4f9f8
5
5
  SHA512:
6
- metadata.gz: 42aedd744761fb61e6fcfa32643ee56b267ea163a2376f3eb308b181cd5981ceef4270e88ed1fe322e5f198d94b3b34af277f246e99f0439cfc3699c3838d76e
7
- data.tar.gz: '082dda62d97b2655413ef599185b38c88cceacdec3ceecb99aab67b222306b5478963dfa7e216daa56838c6c3a9ac65ced53e51d513ae23ddf0954fb2c5982e5'
6
+ metadata.gz: d68eaa297116c4b834cbf2745355d07e4e4b0e50b170c631c417f623d7a0e75a515389903b6e2cb6f22355f9cf5eded4968eac5c7f216f5a98698bb5283c2e00
7
+ data.tar.gz: ddb3be08dc88cc99f5c7086e8bea83496f2644316d382f052a954ce6d1056bc2aabfbd2664ff572a55fdb1017a9bbb0d790d4d83b3788b781d133a97d26bd92e
data/CHANGELOG.md CHANGED
@@ -1,3 +1,7 @@
1
+ # 0.17.2
2
+ - Add transformer class for kernel approximation with [Nystroem](https://yoshoku.github.io/rumale/doc/Rumale/KernelApproximation/Nystroem.html) method.
3
+ - Delete array validation on [Pipeline](https://yoshoku.github.io/rumale/doc/Rumale/Pipeline/Pipeline.html) class considering that array of hash is given to HashVectorizer.
4
+
1
5
  # 0.17.1
2
6
  - Add transformer class for [PolynomialFeatures](https://yoshoku.github.io/rumale/doc/Rumale/Preprocessing/PolynomialFeatures.html)
3
7
  - Add verbose and tol parameter to [FactorizationMachineClassifier](https://yoshoku.github.io/rumale/doc/Rumale/PolynomialModel/FactorizationMachineClassifier.html)
data/README.md CHANGED
@@ -50,22 +50,21 @@ First, let's classify simple xor data.
50
50
  require 'rumale'
51
51
 
52
52
  # Prepare XOR data.
53
- features = [[0, 0], [0, 1], [1, 0], [1, 1]]
53
+ samples = [[0, 0], [0, 1], [1, 0], [1, 1]]
54
54
  labels = [0, 1, 1, 0]
55
55
 
56
56
  # Train classifier with nearest neighbor rule.
57
57
  estimator = Rumale::NearestNeighbors::KNeighborsClassifier.new(n_neighbors: 1)
58
- estimator.fit(x, y)
58
+ estimator.fit(samples, labels)
59
59
 
60
60
  # Predict labels.
61
- p y
62
- p estimator.predict(x)
61
+ p labels
62
+ p estimator.predict(samples)
63
63
  ```
64
64
 
65
65
  Execution of the above script result in the following.
66
66
 
67
67
  ```ruby
68
- Numo::Int32#shape=[4]
69
68
  [0, 1, 1, 0]
70
69
  Numo::Int32#shape=[4]
71
70
  [0, 1, 1, 0]
data/lib/rumale.rb CHANGED
@@ -26,6 +26,7 @@ require 'rumale/optimizer/nadam'
26
26
  require 'rumale/optimizer/yellow_fin'
27
27
  require 'rumale/pipeline/pipeline'
28
28
  require 'rumale/kernel_approximation/rbf'
29
+ require 'rumale/kernel_approximation/nystroem'
29
30
  require 'rumale/linear_model/base_linear_model'
30
31
  require 'rumale/linear_model/base_sgd'
31
32
  require 'rumale/linear_model/svc'
@@ -0,0 +1,106 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/base_estimator'
4
+ require 'rumale/base/transformer'
5
+ require 'rumale/pairwise_metric'
6
+
7
+ module Rumale
8
+ module KernelApproximation
9
+ # Nystroem is a class that implements feature mapping with Nystroem method.
10
+ #
11
+ # @example
12
+ # require 'numo/linalg/autoloader'
13
+ #
14
+ # transformer = Rumale::KernelApproximation::Nystroem.new(gamma: 1, n_components: 128, random_seed: 1)
15
+ # new_training_samples = transformer.fit_transform(training_samples)
16
+ # new_testing_samples = transformer.transform(testing_samples)
17
+ #
18
+ # *Reference*
19
+ # 1. T. Yang, Y. Li, M. Mahdavi, R. Jin, and Z-H. Zhou, "Nystrom Method vs Random Fourier Features: A Theoretical and Empirical Comparison," Advances in NIPS'12, Vol. 1, pp. 476--484, 2012.
20
+ class Nystroem
21
+ include Base::BaseEstimator
22
+ include Base::Transformer
23
+
24
+ # Returns the randomly sampled training data for feature mapping.
25
+ # @return [Numo::DFloat] (shape: n_components, n_features])
26
+ attr_reader :components
27
+
28
+ # Returns the indices sampled training data.
29
+ # @return [Numo::Int32] (shape: [n_components])
30
+ attr_reader :component_indices
31
+
32
+ # Returns the normalizing factors.
33
+ # @return [Numo::DFloat] (shape: [n_components, n_components])
34
+ attr_reader :normalizer
35
+
36
+ # Return the random generator for transformation.
37
+ # @return [Random]
38
+ attr_reader :rng
39
+
40
+ # Create a new transformer for mapping to kernel feature space with Nystrom method.
41
+ #
42
+ # @param kernel [String] The type of kernel. This parameter is ignored in the current implementation.
43
+ # @param gamma [Float] The parameter of RBF kernel: exp(-gamma * x^2).
44
+ # @param n_components [Integer] The number of dimensions of the RBF kernel feature space.
45
+ # @param random_seed [Integer] The seed value using to initialize the random generator.
46
+ def initialize(kernel: 'rbf', gamma: 1, n_components: 100, random_seed: nil)
47
+ check_params_numeric(gamma: gamma, n_components: n_components)
48
+ check_params_numeric_or_nil(random_seed: random_seed)
49
+ @params = method(:initialize).parameters.map { |_t, arg| [arg, binding.local_variable_get(arg)] }.to_h
50
+ @params[:random_seed] ||= srand
51
+ @rng = Random.new(@params[:random_seed])
52
+ @component_indices = nil
53
+ @components = nil
54
+ @normalizer = nil
55
+ end
56
+
57
+ # Fit the model with given training data.
58
+ #
59
+ # @overload fit(x) -> RBF
60
+ # @param x [Numo::NArray] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
61
+ # @return [Nystroem] The learned transformer itself.
62
+ def fit(x, _y = nil)
63
+ x = check_convert_sample_array(x)
64
+ raise 'Nystroem#fit requires Numo::Linalg but that is not loaded.' unless enable_linalg?
65
+
66
+ # initialize some variables.
67
+ sub_rng = @rng.dup
68
+ n_samples = x.shape[0]
69
+ n_components = [1, [@params[:n_components], n_samples].min].max
70
+
71
+ # random sampling.
72
+ @component_indices = Numo::Int32.cast([*0...n_samples].shuffle(random: sub_rng)[0...n_components])
73
+ @components = x[@component_indices, true]
74
+
75
+ # calculate normalizing factor.
76
+ kernel_mat = Rumale::PairwiseMetric.rbf_kernel(@components, nil, @params[:gamma])
77
+ eig_vals, eig_vecs = Numo::Linalg.eigh(kernel_mat)
78
+ la = eig_vals.class.maximum(eig_vals.reverse, 1e-12)
79
+ u = eig_vecs.reverse(1)
80
+ @normalizer = u.dot((1.0 / Numo::NMath.sqrt(la)).diag)
81
+
82
+ self
83
+ end
84
+
85
+ # Fit the model with training data, and then transform them with the learned model.
86
+ #
87
+ # @overload fit_transform(x) -> Numo::DFloat
88
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
89
+ # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data
90
+ def fit_transform(x, _y = nil)
91
+ x = check_convert_sample_array(x)
92
+ fit(x).transform(x)
93
+ end
94
+
95
+ # Transform the given data with the learned model.
96
+ #
97
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be transformed with the learned model.
98
+ # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data.
99
+ def transform(x)
100
+ x = check_convert_sample_array(x)
101
+ z = Rumale::PairwiseMetric.rbf_kernel(x, @components, @params[:gamma])
102
+ z.dot(@normalizer)
103
+ end
104
+ end
105
+ end
106
+ end
@@ -40,7 +40,6 @@ module Rumale
40
40
  # @param y [Numo::NArray] (shape: [n_samples, n_outputs]) The target values or labels to be used for fitting the model.
41
41
  # @return [Pipeline] The learned pipeline itself.
42
42
  def fit(x, y)
43
- x = check_convert_sample_array(x)
44
43
  trans_x = apply_transforms(x, y, fit: true)
45
44
  last_estimator&.fit(trans_x, y)
46
45
  self
@@ -52,7 +51,6 @@ module Rumale
52
51
  # @param y [Numo::NArray] (shape: [n_samples, n_outputs], default: nil) The target values or labels to be used for fitting the model.
53
52
  # @return [Numo::NArray] The predicted results by last estimator.
54
53
  def fit_predict(x, y = nil)
55
- x = check_convert_sample_array(x)
56
54
  trans_x = apply_transforms(x, y, fit: true)
57
55
  last_estimator.fit_predict(trans_x)
58
56
  end
@@ -63,7 +61,6 @@ module Rumale
63
61
  # @param y [Numo::NArray] (shape: [n_samples, n_outputs], default: nil) The target values or labels to be used for fitting the model.
64
62
  # @return [Numo::NArray] The predicted results by last estimator.
65
63
  def fit_transform(x, y = nil)
66
- x = check_convert_sample_array(x)
67
64
  trans_x = apply_transforms(x, y, fit: true)
68
65
  last_estimator.fit_transform(trans_x, y)
69
66
  end
@@ -73,7 +70,6 @@ module Rumale
73
70
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
74
71
  # @return [Numo::DFloat] (shape: [n_samples]) Confidence score per sample.
75
72
  def decision_function(x)
76
- x = check_convert_sample_array(x)
77
73
  trans_x = apply_transforms(x)
78
74
  last_estimator.decision_function(trans_x)
79
75
  end
@@ -83,7 +79,6 @@ module Rumale
83
79
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to obtain prediction result.
84
80
  # @return [Numo::NArray] The predicted results by last estimator.
85
81
  def predict(x)
86
- x = check_convert_sample_array(x)
87
82
  trans_x = apply_transforms(x)
88
83
  last_estimator.predict(trans_x)
89
84
  end
@@ -93,7 +88,6 @@ module Rumale
93
88
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the log-probailities.
94
89
  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted log-probability of each class per sample.
95
90
  def predict_log_proba(x)
96
- x = check_convert_sample_array(x)
97
91
  trans_x = apply_transforms(x)
98
92
  last_estimator.predict_log_proba(trans_x)
99
93
  end
@@ -103,7 +97,6 @@ module Rumale
103
97
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
104
98
  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
105
99
  def predict_proba(x)
106
- x = check_convert_sample_array(x)
107
100
  trans_x = apply_transforms(x)
108
101
  last_estimator.predict_proba(trans_x)
109
102
  end
@@ -113,7 +106,6 @@ module Rumale
113
106
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be transformed.
114
107
  # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed samples.
115
108
  def transform(x)
116
- x = check_convert_sample_array(x)
117
109
  trans_x = apply_transforms(x)
118
110
  last_estimator.nil? ? trans_x : last_estimator.transform(trans_x)
119
111
  end
@@ -123,7 +115,6 @@ module Rumale
123
115
  # @param z [Numo::DFloat] (shape: [n_samples, n_components]) The transformed samples to be restored into original space.
124
116
  # @return [Numo::DFloat] (shape: [n_samples, n_featuress]) The restored samples.
125
117
  def inverse_transform(z)
126
- z = check_convert_sample_array(z)
127
118
  itrans_z = z
128
119
  @steps.keys.reverse_each do |name|
129
120
  transformer = @steps[name]
@@ -139,26 +130,10 @@ module Rumale
139
130
  # @param y [Numo::NArray] (shape: [n_samples, n_outputs]) True target values or labels for testing data.
140
131
  # @return [Float] The score of last estimator
141
132
  def score(x, y)
142
- x = check_convert_sample_array(x)
143
133
  trans_x = apply_transforms(x)
144
134
  last_estimator.score(trans_x, y)
145
135
  end
146
136
 
147
- # Dump marshal data.
148
- # @return [Hash] The marshal data about Pipeline.
149
- def marshal_dump
150
- { params: @params,
151
- steps: @steps }
152
- end
153
-
154
- # Load marshal data.
155
- # @return [nil]
156
- def marshal_load(obj)
157
- @params = obj[:params]
158
- @steps = obj[:steps]
159
- nil
160
- end
161
-
162
137
  private
163
138
 
164
139
  def validate_steps(steps)
@@ -3,5 +3,5 @@
3
3
  # Rumale is a machine learning library in Ruby.
4
4
  module Rumale
5
5
  # The version of Rumale you are using.
6
- VERSION = '0.17.1'
6
+ VERSION = '0.17.2'
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rumale
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.17.1
4
+ version: 0.17.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-01-26 00:00:00.000000000 Z
11
+ date: 2020-02-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: numo-narray
@@ -209,6 +209,7 @@ files:
209
209
  - lib/rumale/evaluation_measure/silhouette_score.rb
210
210
  - lib/rumale/feature_extraction/feature_hasher.rb
211
211
  - lib/rumale/feature_extraction/hash_vectorizer.rb
212
+ - lib/rumale/kernel_approximation/nystroem.rb
212
213
  - lib/rumale/kernel_approximation/rbf.rb
213
214
  - lib/rumale/kernel_machine/kernel_pca.rb
214
215
  - lib/rumale/kernel_machine/kernel_ridge.rb