rumale 0.17.1 → 0.17.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 54719fad09a9036dbdc1430323272af7f3c3e746aecac9b6199eef70bfe07856
4
- data.tar.gz: b9df7b5ff0cc4feebb053955aea4d022e72263be1288578906b67f2100b97160
3
+ metadata.gz: '09585216da740231269c5675c48a8fa2ee3a8aba3f5c5b7d671429d113aaa4ba'
4
+ data.tar.gz: 66eb473c718f6a03938db19df06694373c781e098d68e2f2c20162865dc4f9f8
5
5
  SHA512:
6
- metadata.gz: 42aedd744761fb61e6fcfa32643ee56b267ea163a2376f3eb308b181cd5981ceef4270e88ed1fe322e5f198d94b3b34af277f246e99f0439cfc3699c3838d76e
7
- data.tar.gz: '082dda62d97b2655413ef599185b38c88cceacdec3ceecb99aab67b222306b5478963dfa7e216daa56838c6c3a9ac65ced53e51d513ae23ddf0954fb2c5982e5'
6
+ metadata.gz: d68eaa297116c4b834cbf2745355d07e4e4b0e50b170c631c417f623d7a0e75a515389903b6e2cb6f22355f9cf5eded4968eac5c7f216f5a98698bb5283c2e00
7
+ data.tar.gz: ddb3be08dc88cc99f5c7086e8bea83496f2644316d382f052a954ce6d1056bc2aabfbd2664ff572a55fdb1017a9bbb0d790d4d83b3788b781d133a97d26bd92e
data/CHANGELOG.md CHANGED
@@ -1,3 +1,7 @@
1
+ # 0.17.2
2
+ - Add transformer class for kernel approximation with [Nystroem](https://yoshoku.github.io/rumale/doc/Rumale/KernelApproximation/Nystroem.html) method.
3
+ - Delete array validation on [Pipeline](https://yoshoku.github.io/rumale/doc/Rumale/Pipeline/Pipeline.html) class considering that array of hash is given to HashVectorizer.
4
+
1
5
  # 0.17.1
2
6
  - Add transformer class for [PolynomialFeatures](https://yoshoku.github.io/rumale/doc/Rumale/Preprocessing/PolynomialFeatures.html)
3
7
  - Add verbose and tol parameter to [FactorizationMachineClassifier](https://yoshoku.github.io/rumale/doc/Rumale/PolynomialModel/FactorizationMachineClassifier.html)
data/README.md CHANGED
@@ -50,22 +50,21 @@ First, let's classify simple xor data.
50
50
  require 'rumale'
51
51
 
52
52
  # Prepare XOR data.
53
- features = [[0, 0], [0, 1], [1, 0], [1, 1]]
53
+ samples = [[0, 0], [0, 1], [1, 0], [1, 1]]
54
54
  labels = [0, 1, 1, 0]
55
55
 
56
56
  # Train classifier with nearest neighbor rule.
57
57
  estimator = Rumale::NearestNeighbors::KNeighborsClassifier.new(n_neighbors: 1)
58
- estimator.fit(x, y)
58
+ estimator.fit(samples, labels)
59
59
 
60
60
  # Predict labels.
61
- p y
62
- p estimator.predict(x)
61
+ p labels
62
+ p estimator.predict(samples)
63
63
  ```
64
64
 
65
65
  Execution of the above script result in the following.
66
66
 
67
67
  ```ruby
68
- Numo::Int32#shape=[4]
69
68
  [0, 1, 1, 0]
70
69
  Numo::Int32#shape=[4]
71
70
  [0, 1, 1, 0]
data/lib/rumale.rb CHANGED
@@ -26,6 +26,7 @@ require 'rumale/optimizer/nadam'
26
26
  require 'rumale/optimizer/yellow_fin'
27
27
  require 'rumale/pipeline/pipeline'
28
28
  require 'rumale/kernel_approximation/rbf'
29
+ require 'rumale/kernel_approximation/nystroem'
29
30
  require 'rumale/linear_model/base_linear_model'
30
31
  require 'rumale/linear_model/base_sgd'
31
32
  require 'rumale/linear_model/svc'
@@ -0,0 +1,106 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/base_estimator'
4
+ require 'rumale/base/transformer'
5
+ require 'rumale/pairwise_metric'
6
+
7
+ module Rumale
8
+ module KernelApproximation
9
+ # Nystroem is a class that implements feature mapping with Nystroem method.
10
+ #
11
+ # @example
12
+ # require 'numo/linalg/autoloader'
13
+ #
14
+ # transformer = Rumale::KernelApproximation::Nystroem.new(gamma: 1, n_components: 128, random_seed: 1)
15
+ # new_training_samples = transformer.fit_transform(training_samples)
16
+ # new_testing_samples = transformer.transform(testing_samples)
17
+ #
18
+ # *Reference*
19
+ # 1. T. Yang, Y. Li, M. Mahdavi, R. Jin, and Z-H. Zhou, "Nystrom Method vs Random Fourier Features: A Theoretical and Empirical Comparison," Advances in NIPS'12, Vol. 1, pp. 476--484, 2012.
20
+ class Nystroem
21
+ include Base::BaseEstimator
22
+ include Base::Transformer
23
+
24
+ # Returns the randomly sampled training data for feature mapping.
25
+ # @return [Numo::DFloat] (shape: n_components, n_features])
26
+ attr_reader :components
27
+
28
+ # Returns the indices sampled training data.
29
+ # @return [Numo::Int32] (shape: [n_components])
30
+ attr_reader :component_indices
31
+
32
+ # Returns the normalizing factors.
33
+ # @return [Numo::DFloat] (shape: [n_components, n_components])
34
+ attr_reader :normalizer
35
+
36
+ # Return the random generator for transformation.
37
+ # @return [Random]
38
+ attr_reader :rng
39
+
40
+ # Create a new transformer for mapping to kernel feature space with Nystrom method.
41
+ #
42
+ # @param kernel [String] The type of kernel. This parameter is ignored in the current implementation.
43
+ # @param gamma [Float] The parameter of RBF kernel: exp(-gamma * x^2).
44
+ # @param n_components [Integer] The number of dimensions of the RBF kernel feature space.
45
+ # @param random_seed [Integer] The seed value using to initialize the random generator.
46
+ def initialize(kernel: 'rbf', gamma: 1, n_components: 100, random_seed: nil)
47
+ check_params_numeric(gamma: gamma, n_components: n_components)
48
+ check_params_numeric_or_nil(random_seed: random_seed)
49
+ @params = method(:initialize).parameters.map { |_t, arg| [arg, binding.local_variable_get(arg)] }.to_h
50
+ @params[:random_seed] ||= srand
51
+ @rng = Random.new(@params[:random_seed])
52
+ @component_indices = nil
53
+ @components = nil
54
+ @normalizer = nil
55
+ end
56
+
57
+ # Fit the model with given training data.
58
+ #
59
+ # @overload fit(x) -> RBF
60
+ # @param x [Numo::NArray] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
61
+ # @return [Nystroem] The learned transformer itself.
62
+ def fit(x, _y = nil)
63
+ x = check_convert_sample_array(x)
64
+ raise 'Nystroem#fit requires Numo::Linalg but that is not loaded.' unless enable_linalg?
65
+
66
+ # initialize some variables.
67
+ sub_rng = @rng.dup
68
+ n_samples = x.shape[0]
69
+ n_components = [1, [@params[:n_components], n_samples].min].max
70
+
71
+ # random sampling.
72
+ @component_indices = Numo::Int32.cast([*0...n_samples].shuffle(random: sub_rng)[0...n_components])
73
+ @components = x[@component_indices, true]
74
+
75
+ # calculate normalizing factor.
76
+ kernel_mat = Rumale::PairwiseMetric.rbf_kernel(@components, nil, @params[:gamma])
77
+ eig_vals, eig_vecs = Numo::Linalg.eigh(kernel_mat)
78
+ la = eig_vals.class.maximum(eig_vals.reverse, 1e-12)
79
+ u = eig_vecs.reverse(1)
80
+ @normalizer = u.dot((1.0 / Numo::NMath.sqrt(la)).diag)
81
+
82
+ self
83
+ end
84
+
85
+ # Fit the model with training data, and then transform them with the learned model.
86
+ #
87
+ # @overload fit_transform(x) -> Numo::DFloat
88
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
89
+ # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data
90
+ def fit_transform(x, _y = nil)
91
+ x = check_convert_sample_array(x)
92
+ fit(x).transform(x)
93
+ end
94
+
95
+ # Transform the given data with the learned model.
96
+ #
97
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be transformed with the learned model.
98
+ # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data.
99
+ def transform(x)
100
+ x = check_convert_sample_array(x)
101
+ z = Rumale::PairwiseMetric.rbf_kernel(x, @components, @params[:gamma])
102
+ z.dot(@normalizer)
103
+ end
104
+ end
105
+ end
106
+ end
@@ -40,7 +40,6 @@ module Rumale
40
40
  # @param y [Numo::NArray] (shape: [n_samples, n_outputs]) The target values or labels to be used for fitting the model.
41
41
  # @return [Pipeline] The learned pipeline itself.
42
42
  def fit(x, y)
43
- x = check_convert_sample_array(x)
44
43
  trans_x = apply_transforms(x, y, fit: true)
45
44
  last_estimator&.fit(trans_x, y)
46
45
  self
@@ -52,7 +51,6 @@ module Rumale
52
51
  # @param y [Numo::NArray] (shape: [n_samples, n_outputs], default: nil) The target values or labels to be used for fitting the model.
53
52
  # @return [Numo::NArray] The predicted results by last estimator.
54
53
  def fit_predict(x, y = nil)
55
- x = check_convert_sample_array(x)
56
54
  trans_x = apply_transforms(x, y, fit: true)
57
55
  last_estimator.fit_predict(trans_x)
58
56
  end
@@ -63,7 +61,6 @@ module Rumale
63
61
  # @param y [Numo::NArray] (shape: [n_samples, n_outputs], default: nil) The target values or labels to be used for fitting the model.
64
62
  # @return [Numo::NArray] The predicted results by last estimator.
65
63
  def fit_transform(x, y = nil)
66
- x = check_convert_sample_array(x)
67
64
  trans_x = apply_transforms(x, y, fit: true)
68
65
  last_estimator.fit_transform(trans_x, y)
69
66
  end
@@ -73,7 +70,6 @@ module Rumale
73
70
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
74
71
  # @return [Numo::DFloat] (shape: [n_samples]) Confidence score per sample.
75
72
  def decision_function(x)
76
- x = check_convert_sample_array(x)
77
73
  trans_x = apply_transforms(x)
78
74
  last_estimator.decision_function(trans_x)
79
75
  end
@@ -83,7 +79,6 @@ module Rumale
83
79
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to obtain prediction result.
84
80
  # @return [Numo::NArray] The predicted results by last estimator.
85
81
  def predict(x)
86
- x = check_convert_sample_array(x)
87
82
  trans_x = apply_transforms(x)
88
83
  last_estimator.predict(trans_x)
89
84
  end
@@ -93,7 +88,6 @@ module Rumale
93
88
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the log-probailities.
94
89
  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted log-probability of each class per sample.
95
90
  def predict_log_proba(x)
96
- x = check_convert_sample_array(x)
97
91
  trans_x = apply_transforms(x)
98
92
  last_estimator.predict_log_proba(trans_x)
99
93
  end
@@ -103,7 +97,6 @@ module Rumale
103
97
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
104
98
  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
105
99
  def predict_proba(x)
106
- x = check_convert_sample_array(x)
107
100
  trans_x = apply_transforms(x)
108
101
  last_estimator.predict_proba(trans_x)
109
102
  end
@@ -113,7 +106,6 @@ module Rumale
113
106
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be transformed.
114
107
  # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed samples.
115
108
  def transform(x)
116
- x = check_convert_sample_array(x)
117
109
  trans_x = apply_transforms(x)
118
110
  last_estimator.nil? ? trans_x : last_estimator.transform(trans_x)
119
111
  end
@@ -123,7 +115,6 @@ module Rumale
123
115
  # @param z [Numo::DFloat] (shape: [n_samples, n_components]) The transformed samples to be restored into original space.
124
116
  # @return [Numo::DFloat] (shape: [n_samples, n_featuress]) The restored samples.
125
117
  def inverse_transform(z)
126
- z = check_convert_sample_array(z)
127
118
  itrans_z = z
128
119
  @steps.keys.reverse_each do |name|
129
120
  transformer = @steps[name]
@@ -139,26 +130,10 @@ module Rumale
139
130
  # @param y [Numo::NArray] (shape: [n_samples, n_outputs]) True target values or labels for testing data.
140
131
  # @return [Float] The score of last estimator
141
132
  def score(x, y)
142
- x = check_convert_sample_array(x)
143
133
  trans_x = apply_transforms(x)
144
134
  last_estimator.score(trans_x, y)
145
135
  end
146
136
 
147
- # Dump marshal data.
148
- # @return [Hash] The marshal data about Pipeline.
149
- def marshal_dump
150
- { params: @params,
151
- steps: @steps }
152
- end
153
-
154
- # Load marshal data.
155
- # @return [nil]
156
- def marshal_load(obj)
157
- @params = obj[:params]
158
- @steps = obj[:steps]
159
- nil
160
- end
161
-
162
137
  private
163
138
 
164
139
  def validate_steps(steps)
@@ -3,5 +3,5 @@
3
3
  # Rumale is a machine learning library in Ruby.
4
4
  module Rumale
5
5
  # The version of Rumale you are using.
6
- VERSION = '0.17.1'
6
+ VERSION = '0.17.2'
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rumale
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.17.1
4
+ version: 0.17.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-01-26 00:00:00.000000000 Z
11
+ date: 2020-02-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: numo-narray
@@ -209,6 +209,7 @@ files:
209
209
  - lib/rumale/evaluation_measure/silhouette_score.rb
210
210
  - lib/rumale/feature_extraction/feature_hasher.rb
211
211
  - lib/rumale/feature_extraction/hash_vectorizer.rb
212
+ - lib/rumale/kernel_approximation/nystroem.rb
212
213
  - lib/rumale/kernel_approximation/rbf.rb
213
214
  - lib/rumale/kernel_machine/kernel_pca.rb
214
215
  - lib/rumale/kernel_machine/kernel_ridge.rb