rumale 0.13.7 → 0.13.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6b7030e096e10df1a95d79aefe6d275c0ca16406
4
- data.tar.gz: 85f6b809e41ecb4743df7e07b99bc3fcf13710e1
3
+ metadata.gz: 581437e401cec76201212dbd1aa1e38ff6142eef
4
+ data.tar.gz: 42ce62c892dcee05d41a5857a28c80627da95833
5
5
  SHA512:
6
- metadata.gz: efb25c2ea461c3ceb9b8d500a4ef77dd4db6e4a2c21400009e8fd940bd28dab6d4a725a320a5e3cc1a1b7676626d03b9c2ef73c9c02246c29e122461680488d7
7
- data.tar.gz: 264c10852a7eb01ddb075c87969625f2cb82eb0bfa3050e21bf41f355d8213808c3684a63a0e7e95480965e0737ae76a85babee60b0a638274c31bc1741774b9
6
+ metadata.gz: 36d7bb281d676827b40f3382292a414b474a83c153e7260f5ceb8af2b7664d621ae0adb3ece7475f35e83debd6f6b1e031c804bf76260ec58be6aca315b0d431
7
+ data.tar.gz: 367c273c90a4685913eb48dde5cb1d109e871e87ac1b2aab76d06b565a283996ea25ece3aa9030b7df64348902f8636c2ee3580a19ac533224fbbe62a495e45c
@@ -1,8 +1,13 @@
1
+ # 0.13.8
2
+ - Add [module function](https://yoshoku.github.io/rumale/doc/Rumale/Dataset.html#make_blobs-class_method) for generating artificial dataset with gaussian blobs.
3
+ - Add documents about Rumale::SVM.
4
+ - Refactor specs.
5
+
1
6
  # 0.13.7
2
7
  - Add some evaluator classes for clustering.
3
- - SilhouetteScore
4
- - CalinskiHarabaszScore
5
- - DaviesBouldinScore
8
+ - [SilhouetteScore](https://yoshoku.github.io/rumale/doc/Rumale/EvaluationMeasure/SilhouetteScore.html)
9
+ - [CalinskiHarabaszScore](https://yoshoku.github.io/rumale/doc/Rumale/EvaluationMeasure/CalinskiHarabaszScore.html)
10
+ - [DaviesBouldinScore](https://yoshoku.github.io/rumale/doc/Rumale/EvaluationMeasure/DaviesBouldinScore.html)
6
11
 
7
12
  # 0.13.6
8
13
  - Add transformer class for [FastICA](https://yoshoku.github.io/rumale/doc/Rumale/Decomposition/FastICA.html).
@@ -2,6 +2,8 @@
2
2
 
3
3
  require 'csv'
4
4
  require 'rumale/validation'
5
+ require 'rumale/utils'
6
+ require 'rumale/preprocessing/min_max_scaler'
5
7
 
6
8
  module Rumale
7
9
  # Module for loading and saving a dataset file.
@@ -126,6 +128,60 @@ module Rumale
126
128
  [x, y]
127
129
  end
128
130
 
131
+ # Generate Gaussian blobs.
132
+ #
133
+ # @param n_samples [Integer] The total number of samples.
134
+ # @param n_features [Integer] The number of features.
135
+ # If "centers" parameter is given as a Numo::DFloat array, this parameter is ignored.
136
+ # @param centers [Integer/Numo::DFloat/Nil] The number of cluster centroids or the fixed cluster centroids.
137
+ # If nil is given, the number of cluster centroids is set to 3.
138
+ # @param cluster_std [Float] The standard deviation of the clusters.
139
+ # @param center_box [Array] The bounding box for each cluster centroids.
140
+ # If "centers" parameter is given as a Numo::DFloat array, this parameter is ignored.
141
+ # @param shuffle [Boolean] The flag indicating whether to shuffle the dataset
142
+ # @param random_seed [Integer] The seed value using to initialize the random generator.
143
+ def make_blobs(n_samples = 1000, n_features = 2,
144
+ centers: nil, cluster_std: 1.0, center_box: [-10, 10], shuffle: true, random_seed: nil)
145
+ Rumale::Validation.check_params_integer(n_samples: n_samples, n_features: n_features)
146
+ Rumale::Validation.check_params_float(cluster_std: cluster_std)
147
+ Rumale::Validation.check_params_type(Array, center_box: center_box)
148
+ Rumale::Validation.check_params_boolean(shuffle: shuffle)
149
+ Rumale::Validation.check_params_type_or_nil(Integer, random_seed: random_seed)
150
+ # initialize rng.
151
+ rs = random_seed
152
+ rs ||= srand
153
+ rng = Random.new(rs)
154
+ # initialize centers.
155
+ if centers.is_a?(Numo::DFloat)
156
+ n_centers = centers.shape[0]
157
+ n_features = centers.shape[1]
158
+ else
159
+ n_centers = centers.is_a?(Integer) ? centers : 3
160
+ center_min = center_box.first
161
+ center_max = center_box.last
162
+ centers = Rumale::Utils.rand_uniform([n_centers, n_features], rng)
163
+ normalizer = Rumale::Preprocessing::MinMaxScaler.new(feature_range: [center_min, center_max])
164
+ centers = normalizer.fit_transform(centers)
165
+ end
166
+ # generate blobs.
167
+ sz_cluster = [n_samples / n_centers] * n_centers
168
+ (n_samples % n_centers).times { |n| sz_cluster[n] += 1 }
169
+ x = Rumale::Utils.rand_normal([sz_cluster[0], n_features], rng, 0.0, cluster_std) + centers[0, true]
170
+ y = Numo::Int32.zeros(sz_cluster[0])
171
+ (1...n_centers).each do |n|
172
+ c = Rumale::Utils.rand_normal([sz_cluster[n], n_features], rng, 0.0, cluster_std) + centers[n, true]
173
+ x = Numo::DFloat.vstack([x, c])
174
+ y = y.concatenate(Numo::Int32.zeros(sz_cluster[n]) + n)
175
+ end
176
+ # shuffle data.
177
+ if shuffle
178
+ rand_ids = [*0...n_samples].shuffle(random: rng.dup)
179
+ x = x[rand_ids, true].dup
180
+ y = y[rand_ids].dup
181
+ end
182
+ [x, y]
183
+ end
184
+
129
185
  private
130
186
 
131
187
  def parse_libsvm_line(line, zero_based)
@@ -11,6 +11,10 @@ module Rumale
11
11
  # with stochastic gradient descent (SGD) optimization.
12
12
  # For multiclass classification problem, it uses one-vs-the-rest strategy.
13
13
  #
14
+ # Rumale::SVM provides kernel support vector classifier based on LIBSVM.
15
+ # If you prefer execution speed, you should use Rumale::SVM::SVC.
16
+ # https://github.com/yoshoku/rumale-svm
17
+ #
14
18
  # @example
15
19
  # training_kernel_matrix = Rumale::PairwiseMetric::rbf_kernel(training_samples)
16
20
  # estimator =
@@ -9,6 +9,10 @@ module Rumale
9
9
  # with mini-batch stochastic gradient descent optimization.
10
10
  # For multiclass classification problem, it uses one-vs-the-rest strategy.
11
11
  #
12
+ # Rumale::SVM provides Logistic Regression based on LIBLINEAR.
13
+ # If you prefer execution speed, you should use Rumale::SVM::LogisticRegression.
14
+ # https://github.com/yoshoku/rumale-svm
15
+ #
12
16
  # @example
13
17
  # estimator =
14
18
  # Rumale::LinearModel::LogisticRegression.new(reg_param: 1.0, max_iter: 1000, batch_size: 20, random_seed: 1)
@@ -11,6 +11,10 @@ module Rumale
11
11
  # with mini-batch stochastic gradient descent optimization.
12
12
  # For multiclass classification problem, it uses one-vs-the-rest strategy.
13
13
  #
14
+ # Rumale::SVM provides linear support vector classifier based on LIBLINEAR.
15
+ # If you prefer execution speed, you should use Rumale::SVM::LinearSVC.
16
+ # https://github.com/yoshoku/rumale-svm
17
+ #
14
18
  # @example
15
19
  # estimator =
16
20
  # Rumale::LinearModel::SVC.new(reg_param: 1.0, max_iter: 1000, batch_size: 20, random_seed: 1)
@@ -8,6 +8,10 @@ module Rumale
8
8
  # SVR is a class that implements Support Vector Regressor
9
9
  # with mini-batch stochastic gradient descent optimization.
10
10
  #
11
+ # Rumale::SVM provides linear and kernel support vector regressor based on LIBLINEAR and LIBSVM.
12
+ # If you prefer execution speed, you should use Rumale::SVM::LinearSVR.
13
+ # https://github.com/yoshoku/rumale-svm
14
+ #
11
15
  # @example
12
16
  # estimator =
13
17
  # Rumale::LinearModel::SVR.new(reg_param: 1.0, epsilon: 0.1, max_iter: 1000, batch_size: 20, random_seed: 1)
@@ -3,5 +3,5 @@
3
3
  # Rumale is a machine learning library in Ruby.
4
4
  module Rumale
5
5
  # The version of Rumale you are using.
6
- VERSION = '0.13.7'
6
+ VERSION = '0.13.8'
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rumale
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.13.7
4
+ version: 0.13.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-11-02 00:00:00.000000000 Z
11
+ date: 2019-11-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: numo-narray