rumale 0.13.7 → 0.13.8

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6b7030e096e10df1a95d79aefe6d275c0ca16406
4
- data.tar.gz: 85f6b809e41ecb4743df7e07b99bc3fcf13710e1
3
+ metadata.gz: 581437e401cec76201212dbd1aa1e38ff6142eef
4
+ data.tar.gz: 42ce62c892dcee05d41a5857a28c80627da95833
5
5
  SHA512:
6
- metadata.gz: efb25c2ea461c3ceb9b8d500a4ef77dd4db6e4a2c21400009e8fd940bd28dab6d4a725a320a5e3cc1a1b7676626d03b9c2ef73c9c02246c29e122461680488d7
7
- data.tar.gz: 264c10852a7eb01ddb075c87969625f2cb82eb0bfa3050e21bf41f355d8213808c3684a63a0e7e95480965e0737ae76a85babee60b0a638274c31bc1741774b9
6
+ metadata.gz: 36d7bb281d676827b40f3382292a414b474a83c153e7260f5ceb8af2b7664d621ae0adb3ece7475f35e83debd6f6b1e031c804bf76260ec58be6aca315b0d431
7
+ data.tar.gz: 367c273c90a4685913eb48dde5cb1d109e871e87ac1b2aab76d06b565a283996ea25ece3aa9030b7df64348902f8636c2ee3580a19ac533224fbbe62a495e45c
@@ -1,8 +1,13 @@
1
+ # 0.13.8
2
+ - Add [module function](https://yoshoku.github.io/rumale/doc/Rumale/Dataset.html#make_blobs-class_method) for generating artificial dataset with gaussian blobs.
3
+ - Add documents about Rumale::SVM.
4
+ - Refactor specs.
5
+
1
6
  # 0.13.7
2
7
  - Add some evaluator classes for clustering.
3
- - SilhouetteScore
4
- - CalinskiHarabaszScore
5
- - DaviesBouldinScore
8
+ - [SilhouetteScore](https://yoshoku.github.io/rumale/doc/Rumale/EvaluationMeasure/SilhouetteScore.html)
9
+ - [CalinskiHarabaszScore](https://yoshoku.github.io/rumale/doc/Rumale/EvaluationMeasure/CalinskiHarabaszScore.html)
10
+ - [DaviesBouldinScore](https://yoshoku.github.io/rumale/doc/Rumale/EvaluationMeasure/DaviesBouldinScore.html)
6
11
 
7
12
  # 0.13.6
8
13
  - Add transformer class for [FastICA](https://yoshoku.github.io/rumale/doc/Rumale/Decomposition/FastICA.html).
@@ -2,6 +2,8 @@
2
2
 
3
3
  require 'csv'
4
4
  require 'rumale/validation'
5
+ require 'rumale/utils'
6
+ require 'rumale/preprocessing/min_max_scaler'
5
7
 
6
8
  module Rumale
7
9
  # Module for loading and saving a dataset file.
@@ -126,6 +128,60 @@ module Rumale
126
128
  [x, y]
127
129
  end
128
130
 
131
+ # Generate Gaussian blobs.
132
+ #
133
+ # @param n_samples [Integer] The total number of samples.
134
+ # @param n_features [Integer] The number of features.
135
+ # If "centers" parameter is given as a Numo::DFloat array, this parameter is ignored.
136
+ # @param centers [Integer/Numo::DFloat/Nil] The number of cluster centroids or the fixed cluster centroids.
137
+ # If nil is given, the number of cluster centroids is set to 3.
138
+ # @param cluster_std [Float] The standard deviation of the clusters.
139
+ # @param center_box [Array] The bounding box for each cluster centroids.
140
+ # If "centers" parameter is given as a Numo::DFloat array, this parameter is ignored.
141
+ # @param shuffle [Boolean] The flag indicating whether to shuffle the dataset
142
+ # @param random_seed [Integer] The seed value using to initialize the random generator.
143
+ def make_blobs(n_samples = 1000, n_features = 2,
144
+ centers: nil, cluster_std: 1.0, center_box: [-10, 10], shuffle: true, random_seed: nil)
145
+ Rumale::Validation.check_params_integer(n_samples: n_samples, n_features: n_features)
146
+ Rumale::Validation.check_params_float(cluster_std: cluster_std)
147
+ Rumale::Validation.check_params_type(Array, center_box: center_box)
148
+ Rumale::Validation.check_params_boolean(shuffle: shuffle)
149
+ Rumale::Validation.check_params_type_or_nil(Integer, random_seed: random_seed)
150
+ # initialize rng.
151
+ rs = random_seed
152
+ rs ||= srand
153
+ rng = Random.new(rs)
154
+ # initialize centers.
155
+ if centers.is_a?(Numo::DFloat)
156
+ n_centers = centers.shape[0]
157
+ n_features = centers.shape[1]
158
+ else
159
+ n_centers = centers.is_a?(Integer) ? centers : 3
160
+ center_min = center_box.first
161
+ center_max = center_box.last
162
+ centers = Rumale::Utils.rand_uniform([n_centers, n_features], rng)
163
+ normalizer = Rumale::Preprocessing::MinMaxScaler.new(feature_range: [center_min, center_max])
164
+ centers = normalizer.fit_transform(centers)
165
+ end
166
+ # generate blobs.
167
+ sz_cluster = [n_samples / n_centers] * n_centers
168
+ (n_samples % n_centers).times { |n| sz_cluster[n] += 1 }
169
+ x = Rumale::Utils.rand_normal([sz_cluster[0], n_features], rng, 0.0, cluster_std) + centers[0, true]
170
+ y = Numo::Int32.zeros(sz_cluster[0])
171
+ (1...n_centers).each do |n|
172
+ c = Rumale::Utils.rand_normal([sz_cluster[n], n_features], rng, 0.0, cluster_std) + centers[n, true]
173
+ x = Numo::DFloat.vstack([x, c])
174
+ y = y.concatenate(Numo::Int32.zeros(sz_cluster[n]) + n)
175
+ end
176
+ # shuffle data.
177
+ if shuffle
178
+ rand_ids = [*0...n_samples].shuffle(random: rng.dup)
179
+ x = x[rand_ids, true].dup
180
+ y = y[rand_ids].dup
181
+ end
182
+ [x, y]
183
+ end
184
+
129
185
  private
130
186
 
131
187
  def parse_libsvm_line(line, zero_based)
@@ -11,6 +11,10 @@ module Rumale
11
11
  # with stochastic gradient descent (SGD) optimization.
12
12
  # For multiclass classification problem, it uses one-vs-the-rest strategy.
13
13
  #
14
+ # Rumale::SVM provides kernel support vector classifier based on LIBSVM.
15
+ # If you prefer execution speed, you should use Rumale::SVM::SVC.
16
+ # https://github.com/yoshoku/rumale-svm
17
+ #
14
18
  # @example
15
19
  # training_kernel_matrix = Rumale::PairwiseMetric::rbf_kernel(training_samples)
16
20
  # estimator =
@@ -9,6 +9,10 @@ module Rumale
9
9
  # with mini-batch stochastic gradient descent optimization.
10
10
  # For multiclass classification problem, it uses one-vs-the-rest strategy.
11
11
  #
12
+ # Rumale::SVM provides Logistic Regression based on LIBLINEAR.
13
+ # If you prefer execution speed, you should use Rumale::SVM::LogisticRegression.
14
+ # https://github.com/yoshoku/rumale-svm
15
+ #
12
16
  # @example
13
17
  # estimator =
14
18
  # Rumale::LinearModel::LogisticRegression.new(reg_param: 1.0, max_iter: 1000, batch_size: 20, random_seed: 1)
@@ -11,6 +11,10 @@ module Rumale
11
11
  # with mini-batch stochastic gradient descent optimization.
12
12
  # For multiclass classification problem, it uses one-vs-the-rest strategy.
13
13
  #
14
+ # Rumale::SVM provides linear support vector classifier based on LIBLINEAR.
15
+ # If you prefer execution speed, you should use Rumale::SVM::LinearSVC.
16
+ # https://github.com/yoshoku/rumale-svm
17
+ #
14
18
  # @example
15
19
  # estimator =
16
20
  # Rumale::LinearModel::SVC.new(reg_param: 1.0, max_iter: 1000, batch_size: 20, random_seed: 1)
@@ -8,6 +8,10 @@ module Rumale
8
8
  # SVR is a class that implements Support Vector Regressor
9
9
  # with mini-batch stochastic gradient descent optimization.
10
10
  #
11
+ # Rumale::SVM provides linear and kernel support vector regressor based on LIBLINEAR and LIBSVM.
12
+ # If you prefer execution speed, you should use Rumale::SVM::LinearSVR.
13
+ # https://github.com/yoshoku/rumale-svm
14
+ #
11
15
  # @example
12
16
  # estimator =
13
17
  # Rumale::LinearModel::SVR.new(reg_param: 1.0, epsilon: 0.1, max_iter: 1000, batch_size: 20, random_seed: 1)
@@ -3,5 +3,5 @@
3
3
  # Rumale is a machine learning library in Ruby.
4
4
  module Rumale
5
5
  # The version of Rumale you are using.
6
- VERSION = '0.13.7'
6
+ VERSION = '0.13.8'
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rumale
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.13.7
4
+ version: 0.13.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-11-02 00:00:00.000000000 Z
11
+ date: 2019-11-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: numo-narray