rumale-core 0.24.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE.txt +27 -0
- data/README.md +36 -0
- data/lib/rumale/base/classifier.rb +36 -0
- data/lib/rumale/base/cluster_analyzer.rb +34 -0
- data/lib/rumale/base/estimator.rb +58 -0
- data/lib/rumale/base/evaluator.rb +15 -0
- data/lib/rumale/base/regressor.rb +44 -0
- data/lib/rumale/base/splitter.rb +19 -0
- data/lib/rumale/base/transformer.rb +20 -0
- data/lib/rumale/core/version.rb +10 -0
- data/lib/rumale/core.rb +19 -0
- data/lib/rumale/dataset.rb +233 -0
- data/lib/rumale/pairwise_metric.rb +130 -0
- data/lib/rumale/probabilistic_output.rb +116 -0
- data/lib/rumale/utils.rb +69 -0
- data/lib/rumale/validation.rb +39 -0
- metadata +81 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 20730446c51b9a32802a495391fac8b0876f312a9c4dec81d32f710ca4df1bfc
|
4
|
+
data.tar.gz: 1f1e142622c7cb40d6604333d4598bf1504262695847d371456a429d2051831a
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: fd8d3655d13753a5cea108ba7e140df5e3fd1cad0814521ea6c867d0c9165a60ecd7b53d26b7684d9b834e9b0c5daf2e4164a9c93b0af01fcf303c45b8bf908f
|
7
|
+
data.tar.gz: 272f966cd88623339ad6d6811fe270901b5be42ea88094c17275599a18e6ce27e2d942de5bdc6b82048df41a4c826b1df0e7d88d09f1a7a444471e96ab077727
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
Copyright (c) 2022 Atsushi Tatsuma
|
2
|
+
All rights reserved.
|
3
|
+
|
4
|
+
Redistribution and use in source and binary forms, with or without
|
5
|
+
modification, are permitted provided that the following conditions are met:
|
6
|
+
|
7
|
+
* Redistributions of source code must retain the above copyright notice, this
|
8
|
+
list of conditions and the following disclaimer.
|
9
|
+
|
10
|
+
* Redistributions in binary form must reproduce the above copyright notice,
|
11
|
+
this list of conditions and the following disclaimer in the documentation
|
12
|
+
and/or other materials provided with the distribution.
|
13
|
+
|
14
|
+
* Neither the name of the copyright holder nor the names of its
|
15
|
+
contributors may be used to endorse or promote products derived from
|
16
|
+
this software without specific prior written permission.
|
17
|
+
|
18
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
19
|
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
20
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
21
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
22
|
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
23
|
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
24
|
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
25
|
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
26
|
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
27
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
data/README.md
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
# Rumale::Core
|
2
|
+
|
3
|
+
[![Gem Version](https://badge.fury.io/rb/rumale-core.svg)](https://badge.fury.io/rb/rumale-core)
|
4
|
+
[![BSD 3-Clause License](https://img.shields.io/badge/License-BSD%203--Clause-orange.svg)](https://github.com/yoshoku/rumale/blob/main/rumale-core/LICENSE.txt)
|
5
|
+
[![Documentation](https://img.shields.io/badge/api-reference-blue.svg)](https://yoshoku.github.io/rumale/doc/Rumale/Base.html)
|
6
|
+
|
7
|
+
Rumale is a machine learning library in Ruby.
|
8
|
+
Rumale::Core provides base classes and utility functions for implementing
|
9
|
+
machine learning algorithm with Rumale interface.
|
10
|
+
|
11
|
+
## Installation
|
12
|
+
|
13
|
+
Add this line to your application's Gemfile:
|
14
|
+
|
15
|
+
```ruby
|
16
|
+
gem 'rumale-core'
|
17
|
+
```
|
18
|
+
|
19
|
+
And then execute:
|
20
|
+
|
21
|
+
$ bundle install
|
22
|
+
|
23
|
+
Or install it yourself as:
|
24
|
+
|
25
|
+
$ gem install rumale-core
|
26
|
+
|
27
|
+
## Documentation
|
28
|
+
|
29
|
+
- [Rumale API Documentation - Base](https://yoshoku.github.io/rumale/doc/Rumale/Base.html)
|
30
|
+
- [Rumale API Documentation - Dataset](https://yoshoku.github.io/rumale/doc/Rumale/Dataset.html)
|
31
|
+
- [Rumale API Documentation - PairwiseMetric](https://yoshoku.github.io/rumale/doc/Rumale/PairwiseMetric.html)
|
32
|
+
- [Rumale API Documentation - ProbabilisticOutput](https://yoshoku.github.io/rumale/doc/Rumale/ProbabilisticOutput.html)
|
33
|
+
|
34
|
+
## License
|
35
|
+
|
36
|
+
The gem is available as open source under the terms of the [BSD-3-Clause License](https://opensource.org/licenses/BSD-3-Clause).
|
@@ -0,0 +1,36 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'numo/narray'
|
4
|
+
|
5
|
+
require 'rumale/validation'
|
6
|
+
|
7
|
+
module Rumale
|
8
|
+
module Base
|
9
|
+
# Module for all classifiers in Rumale.
|
10
|
+
module Classifier
|
11
|
+
# An abstract method for fitting a model.
|
12
|
+
def fit
|
13
|
+
raise NotImplementedError, "#{__method__} has to be implemented in #{self.class}."
|
14
|
+
end
|
15
|
+
|
16
|
+
# An abstract method for predicting labels.
|
17
|
+
def predict
|
18
|
+
raise NotImplementedError, "#{__method__} has to be implemented in #{self.class}."
|
19
|
+
end
|
20
|
+
|
21
|
+
# Calculate the mean accuracy of the given testing data.
|
22
|
+
#
|
23
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) Testing data.
|
24
|
+
# @param y [Numo::Int32] (shape: [n_samples]) True labels for testing data.
|
25
|
+
# @return [Float] Mean accuracy
|
26
|
+
def score(x, y)
|
27
|
+
x = ::Rumale::Validation.check_convert_sample_array(x)
|
28
|
+
y = ::Rumale::Validation.check_convert_label_array(y)
|
29
|
+
::Rumale::Validation.check_sample_size(x, y)
|
30
|
+
|
31
|
+
predicted = predict(x)
|
32
|
+
(y.to_a.map.with_index { |label, n| label == predicted[n] ? 1 : 0 }).sum.fdiv(y.size)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'numo/narray'
|
4
|
+
|
5
|
+
module Rumale
|
6
|
+
module Base
|
7
|
+
# Module for all clustering algorithms in Rumale.
|
8
|
+
module ClusterAnalyzer
|
9
|
+
# An abstract method for analyzing clusters and predicting cluster indices.
|
10
|
+
def fit_predict
|
11
|
+
raise NotImplementedError, "#{__method__} has to be implemented in #{self.class}."
|
12
|
+
end
|
13
|
+
|
14
|
+
# Calculate purity of clustering result.
|
15
|
+
#
|
16
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) Testing data.
|
17
|
+
# @param y [Numo::Int32] (shape: [n_samples]) True labels for testing data.
|
18
|
+
# @return [Float] Purity
|
19
|
+
def score(x, y)
|
20
|
+
x = ::Rumale::Validation.check_convert_sample_array(x)
|
21
|
+
y = ::Rumale::Validation.check_convert_label_array(y)
|
22
|
+
::Rumale::Validation.check_sample_size(x, y)
|
23
|
+
|
24
|
+
predicted = fit_predict(x)
|
25
|
+
cluster_ids = predicted.to_a.uniq
|
26
|
+
class_ids = y.to_a.uniq
|
27
|
+
cluster_ids.sum do |k|
|
28
|
+
pr_sample_ids = predicted.eq(k).where.to_a
|
29
|
+
class_ids.map { |j| (pr_sample_ids & y.eq(j).where.to_a).size }.max
|
30
|
+
end.fdiv(y.size)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'numo/narray'
|
4
|
+
|
5
|
+
module Rumale
|
6
|
+
# This module consists of basic mix-in classes.
|
7
|
+
module Base
|
8
|
+
# Base class for all estimators in Rumale.
|
9
|
+
class Estimator
|
10
|
+
# Return parameters about an estimator.
|
11
|
+
# @return [Hash]
|
12
|
+
attr_reader :params
|
13
|
+
|
14
|
+
private
|
15
|
+
|
16
|
+
def enable_linalg?(warning: true)
|
17
|
+
unless defined?(Numo::Linalg)
|
18
|
+
if warning
|
19
|
+
warn('If you want to use features that depend on Numo::Linalg, ' \
|
20
|
+
'you should install and load Numo::Linalg in advance.')
|
21
|
+
end
|
22
|
+
return false
|
23
|
+
end
|
24
|
+
if Numo::Linalg::VERSION < '0.1.4'
|
25
|
+
if warning
|
26
|
+
warn('The loaded Numo::Linalg does not implement the methods required by Rumale. ' \
|
27
|
+
'Please load Numo::Linalg version 0.1.4 or later.')
|
28
|
+
end
|
29
|
+
return false
|
30
|
+
end
|
31
|
+
true
|
32
|
+
end
|
33
|
+
|
34
|
+
def enable_parallel?(warning: true)
|
35
|
+
return false if @params[:n_jobs].nil?
|
36
|
+
|
37
|
+
unless defined?(Parallel)
|
38
|
+
if warning
|
39
|
+
warn('If you want to use parallel option, ' \
|
40
|
+
'you should install and load Parallel in advance.')
|
41
|
+
end
|
42
|
+
return false
|
43
|
+
end
|
44
|
+
true
|
45
|
+
end
|
46
|
+
|
47
|
+
def n_processes
|
48
|
+
return 1 unless enable_parallel?(warning: false)
|
49
|
+
|
50
|
+
@params[:n_jobs] <= 0 ? Parallel.processor_count : @params[:n_jobs]
|
51
|
+
end
|
52
|
+
|
53
|
+
def parallel_map(n_outputs, &block)
|
54
|
+
Parallel.map(Array.new(n_outputs) { |v| v }, in_processes: n_processes, &block)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'numo/narray'
|
4
|
+
|
5
|
+
module Rumale
|
6
|
+
module Base
|
7
|
+
# Module for all evaluation measures in Rumale.
|
8
|
+
module Evaluator
|
9
|
+
# An abstract method for evaluation of model.
|
10
|
+
def score
|
11
|
+
raise NotImplementedError, "#{__method__} has to be implemented in #{self.class}."
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'numo/narray'
|
4
|
+
|
5
|
+
module Rumale
|
6
|
+
module Base
|
7
|
+
# Module for all regressors in Rumale.
|
8
|
+
module Regressor
|
9
|
+
# An abstract method for fitting a model.
|
10
|
+
def fit
|
11
|
+
raise NotImplementedError, "#{__method__} has to be implemented in #{self.class}."
|
12
|
+
end
|
13
|
+
|
14
|
+
# An abstract method for predicting labels.
|
15
|
+
def predict
|
16
|
+
raise NotImplementedError, "#{__method__} has to be implemented in #{self.class}."
|
17
|
+
end
|
18
|
+
|
19
|
+
# Calculate the coefficient of determination for the given testing data.
|
20
|
+
#
|
21
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) Testing data.
|
22
|
+
# @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) Target values for testing data.
|
23
|
+
# @return [Float] Coefficient of determination
|
24
|
+
def score(x, y)
|
25
|
+
x = ::Rumale::Validation.check_convert_sample_array(x)
|
26
|
+
y = ::Rumale::Validation.check_convert_target_value_array(y)
|
27
|
+
::Rumale::Validation.check_sample_size(x, y)
|
28
|
+
|
29
|
+
predicted = predict(x)
|
30
|
+
n_samples, n_outputs = y.shape
|
31
|
+
numerator = ((y - predicted)**2).sum(axis: 0)
|
32
|
+
yt_mean = y.sum(axis: 0) / n_samples
|
33
|
+
denominator = ((y - yt_mean)**2).sum(axis: 0)
|
34
|
+
if n_outputs.nil?
|
35
|
+
denominator.zero? ? 0.0 : 1.0 - numerator.fdiv(denominator)
|
36
|
+
else
|
37
|
+
scores = 1.0 - numerator / denominator
|
38
|
+
scores[denominator.eq(0)] = 0.0
|
39
|
+
scores.sum.fdiv(scores.size)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'numo/narray'
|
4
|
+
|
5
|
+
module Rumale
|
6
|
+
module Base
|
7
|
+
# Module for all validation methods in Rumale.
|
8
|
+
module Splitter
|
9
|
+
# Return the number of splits.
|
10
|
+
# @return [Integer]
|
11
|
+
attr_reader :n_splits
|
12
|
+
|
13
|
+
# An abstract method for splitting dataset.
|
14
|
+
def split
|
15
|
+
raise NotImplementedError, "#{__method__} has to be implemented in #{self.class}."
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'numo/narray'
|
4
|
+
|
5
|
+
module Rumale
|
6
|
+
module Base
|
7
|
+
# Module for all transfomers in Rumale.
|
8
|
+
module Transformer
|
9
|
+
# An abstract method for fitting a model.
|
10
|
+
def fit
|
11
|
+
raise NotImplementedError, "#{__method__} has to be implemented in #{self.class}."
|
12
|
+
end
|
13
|
+
|
14
|
+
# An abstract method for fitting a model and transforming given data.
|
15
|
+
def fit_transform
|
16
|
+
raise NotImplementedError, "#{__method__} has to be implemented in #{self.class}."
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
data/lib/rumale/core.rb
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'numo/narray'
|
4
|
+
|
5
|
+
require_relative 'core/version'
|
6
|
+
|
7
|
+
require_relative 'base/estimator'
|
8
|
+
require_relative 'base/classifier'
|
9
|
+
require_relative 'base/cluster_analyzer'
|
10
|
+
require_relative 'base/evaluator'
|
11
|
+
require_relative 'base/regressor'
|
12
|
+
require_relative 'base/splitter'
|
13
|
+
require_relative 'base/transformer'
|
14
|
+
|
15
|
+
require_relative 'dataset'
|
16
|
+
require_relative 'pairwise_metric'
|
17
|
+
require_relative 'probabilistic_output'
|
18
|
+
require_relative 'utils'
|
19
|
+
require_relative 'validation'
|
@@ -0,0 +1,233 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'csv'
|
4
|
+
require 'numo/narray'
|
5
|
+
require 'rumale/utils'
|
6
|
+
|
7
|
+
module Rumale
|
8
|
+
# Module for loading and saving a dataset file.
|
9
|
+
module Dataset # rubocop:disable Metrics/ModuleLength
|
10
|
+
class << self
|
11
|
+
# Load a dataset with the libsvm file format into Numo::NArray.
|
12
|
+
#
|
13
|
+
# @param filename [String] A path to a dataset file.
|
14
|
+
# @param n_features [Integer/Nil] The number of features of data to load.
|
15
|
+
# If nil is given, it will be detected automatically from given file.
|
16
|
+
# @param zero_based [Boolean] Whether the column index starts from 0 (true) or 1 (false).
|
17
|
+
# @param dtype [Numo::NArray] Data type of Numo::NArray for features to be loaded.
|
18
|
+
#
|
19
|
+
# @return [Array<Numo::NArray>]
|
20
|
+
# Returns array containing the (n_samples x n_features) matrix for feature vectors
|
21
|
+
# and (n_samples) vector for labels or target values.
|
22
|
+
def load_libsvm_file(filename, n_features: nil, zero_based: false, dtype: Numo::DFloat)
|
23
|
+
ftvecs = []
|
24
|
+
labels = []
|
25
|
+
n_features_detected = 0
|
26
|
+
CSV.foreach(filename, col_sep: "\s", headers: false) do |line|
|
27
|
+
label, ftvec, max_idx = parse_libsvm_line(line, zero_based)
|
28
|
+
labels.push(label)
|
29
|
+
ftvecs.push(ftvec)
|
30
|
+
n_features_detected = max_idx if n_features_detected < max_idx
|
31
|
+
end
|
32
|
+
n_features ||= n_features_detected
|
33
|
+
n_features = [n_features, n_features_detected].max
|
34
|
+
[convert_to_matrix(ftvecs, n_features, dtype), Numo::NArray.asarray(labels)]
|
35
|
+
end
|
36
|
+
|
37
|
+
# Dump the dataset with the libsvm file format.
|
38
|
+
#
|
39
|
+
# @param data [Numo::NArray] (shape: [n_samples, n_features]) matrix consisting of feature vectors.
|
40
|
+
# @param labels [Numo::NArray] (shape: [n_samples]) matrix consisting of labels or target values.
|
41
|
+
# @param filename [String] A path to the output libsvm file.
|
42
|
+
# @param zero_based [Boolean] Whether the column index starts from 0 (true) or 1 (false).
|
43
|
+
def dump_libsvm_file(data, labels, filename, zero_based: false)
|
44
|
+
n_samples = [data.shape[0], labels.shape[0]].min
|
45
|
+
single_label = labels.shape[1].nil?
|
46
|
+
label_type = detect_dtype(labels)
|
47
|
+
value_type = detect_dtype(data)
|
48
|
+
File.open(filename, 'w') do |file|
|
49
|
+
n_samples.times do |n|
|
50
|
+
label = single_label ? labels[n] : labels[n, true].to_a
|
51
|
+
file.puts(dump_libsvm_line(label, data[n, true],
|
52
|
+
label_type, value_type, zero_based))
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
# Generate a two-dimensional data set consisting of an inner circle and an outer circle.
|
58
|
+
#
|
59
|
+
# @param n_samples [Integer] The number of samples.
|
60
|
+
# @param shuffle [Boolean] The flag indicating whether to shuffle the dataset
|
61
|
+
# @param noise [Float] The standard deviaion of gaussian noise added to the data.
|
62
|
+
# If nil is given, no noise is added.
|
63
|
+
# @param factor [Float] The scale factor between inner and outer circles. The interval of factor is (0, 1).
|
64
|
+
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
65
|
+
def make_circles(n_samples, shuffle: true, noise: nil, factor: 0.8, random_seed: nil)
|
66
|
+
# initialize some variables.
|
67
|
+
rs = random_seed
|
68
|
+
rs ||= srand
|
69
|
+
rng = Random.new(rs)
|
70
|
+
n_samples_out = n_samples.fdiv(2).to_i
|
71
|
+
n_samples_in = n_samples - n_samples_out
|
72
|
+
# make two circles.
|
73
|
+
linsp_out = Numo::DFloat.linspace(0, 2 * Math::PI, n_samples_out)
|
74
|
+
linsp_in = Numo::DFloat.linspace(0, 2 * Math::PI, n_samples_in)
|
75
|
+
circle_out = Numo::DFloat[Numo::NMath.cos(linsp_out), Numo::NMath.sin(linsp_out)].transpose
|
76
|
+
circle_in = Numo::DFloat[Numo::NMath.cos(linsp_in), Numo::NMath.sin(linsp_in)].transpose
|
77
|
+
x = Numo::DFloat.vstack([circle_out, factor * circle_in])
|
78
|
+
y = Numo::Int32.hstack([Numo::Int32.zeros(n_samples_out), Numo::Int32.ones(n_samples_in)])
|
79
|
+
# shuffle data indices.
|
80
|
+
if shuffle
|
81
|
+
rand_ids = Array(0...n_samples).shuffle(random: rng.dup)
|
82
|
+
x = x[rand_ids, true].dup
|
83
|
+
y = y[rand_ids].dup
|
84
|
+
end
|
85
|
+
# add gaussian noise.
|
86
|
+
x += ::Rumale::Utils.rand_normal(x.shape, rng.dup, 0.0, noise) unless noise.nil?
|
87
|
+
[x, y]
|
88
|
+
end
|
89
|
+
|
90
|
+
# Generate a two-dimensional data set consisting of two half circles shifted.
|
91
|
+
#
|
92
|
+
# @param n_samples [Integer] The number of samples.
|
93
|
+
# @param shuffle [Boolean] The flag indicating whether to shuffle the dataset
|
94
|
+
# @param noise [Float] The standard deviaion of gaussian noise added to the data.
|
95
|
+
# If nil is given, no noise is added.
|
96
|
+
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
97
|
+
def make_moons(n_samples, shuffle: true, noise: nil, random_seed: nil)
|
98
|
+
# initialize some variables.
|
99
|
+
rs = random_seed
|
100
|
+
rs ||= srand
|
101
|
+
rng = Random.new(rs)
|
102
|
+
n_samples_out = n_samples.fdiv(2).to_i
|
103
|
+
n_samples_in = n_samples - n_samples_out
|
104
|
+
# make two half circles.
|
105
|
+
linsp_out = Numo::DFloat.linspace(0, Math::PI, n_samples_out)
|
106
|
+
linsp_in = Numo::DFloat.linspace(0, Math::PI, n_samples_in)
|
107
|
+
circle_out = Numo::DFloat[Numo::NMath.cos(linsp_out), Numo::NMath.sin(linsp_out)].transpose
|
108
|
+
circle_in = Numo::DFloat[1 - Numo::NMath.cos(linsp_in), 1 - Numo::NMath.sin(linsp_in) - 0.5].transpose
|
109
|
+
x = Numo::DFloat.vstack([circle_out, circle_in])
|
110
|
+
y = Numo::Int32.hstack([Numo::Int32.zeros(n_samples_out), Numo::Int32.ones(n_samples_in)])
|
111
|
+
# shuffle data indices.
|
112
|
+
if shuffle
|
113
|
+
rand_ids = Array(0...n_samples).shuffle(random: rng.dup)
|
114
|
+
x = x[rand_ids, true].dup
|
115
|
+
y = y[rand_ids].dup
|
116
|
+
end
|
117
|
+
# add gaussian noise.
|
118
|
+
x += ::Rumale::Utils.rand_normal(x.shape, rng.dup, 0.0, noise) unless noise.nil?
|
119
|
+
[x, y]
|
120
|
+
end
|
121
|
+
|
122
|
+
# Generate Gaussian blobs.
|
123
|
+
#
|
124
|
+
# @param n_samples [Integer] The total number of samples.
|
125
|
+
# @param n_features [Integer] The number of features.
|
126
|
+
# If "centers" parameter is given as a Numo::DFloat array, this parameter is ignored.
|
127
|
+
# @param centers [Integer/Numo::DFloat/Nil] The number of cluster centroids or the fixed cluster centroids.
|
128
|
+
# If nil is given, the number of cluster centroids is set to 3.
|
129
|
+
# @param cluster_std [Float] The standard deviation of the clusters.
|
130
|
+
# @param center_box [Array] The bounding box for each cluster centroids.
|
131
|
+
# If "centers" parameter is given as a Numo::DFloat array, this parameter is ignored.
|
132
|
+
# @param shuffle [Boolean] The flag indicating whether to shuffle the dataset
|
133
|
+
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
134
|
+
def make_blobs(n_samples = 1000, n_features = 2,
|
135
|
+
centers: nil, cluster_std: 1.0, center_box: [-10, 10], shuffle: true, random_seed: nil)
|
136
|
+
# initialize rng.
|
137
|
+
rs = random_seed
|
138
|
+
rs ||= srand
|
139
|
+
rng = Random.new(rs)
|
140
|
+
# initialize centers.
|
141
|
+
if centers.is_a?(Numo::DFloat)
|
142
|
+
n_centers = centers.shape[0]
|
143
|
+
n_features = centers.shape[1]
|
144
|
+
else
|
145
|
+
n_centers = centers.is_a?(Integer) ? centers : 3
|
146
|
+
center_min = center_box.first
|
147
|
+
center_max = center_box.last
|
148
|
+
centers = ::Rumale::Utils.rand_uniform([n_centers, n_features], rng)
|
149
|
+
min_vec = centers.min(0)
|
150
|
+
dif_vec = centers.max(0) - min_vec
|
151
|
+
dif_vec[dif_vec.eq(0)] = 1.0
|
152
|
+
centers = ((centers - min_vec.tile(n_centers,
|
153
|
+
1)) / dif_vec.tile(n_centers, 1)) * (center_max - center_min) + center_min
|
154
|
+
end
|
155
|
+
# generate blobs.
|
156
|
+
sz_cluster = [n_samples / n_centers] * n_centers
|
157
|
+
(n_samples % n_centers).times { |n| sz_cluster[n] += 1 }
|
158
|
+
x = ::Rumale::Utils.rand_normal([sz_cluster[0], n_features], rng, 0.0, cluster_std) + centers[0, true]
|
159
|
+
y = Numo::Int32.zeros(sz_cluster[0])
|
160
|
+
(1...n_centers).each do |n|
|
161
|
+
c = ::Rumale::Utils.rand_normal([sz_cluster[n], n_features], rng, 0.0, cluster_std) + centers[n, true]
|
162
|
+
x = Numo::DFloat.vstack([x, c])
|
163
|
+
y = y.concatenate(Numo::Int32.zeros(sz_cluster[n]) + n)
|
164
|
+
end
|
165
|
+
# shuffle data.
|
166
|
+
if shuffle
|
167
|
+
rand_ids = Array(0...n_samples).shuffle(random: rng.dup)
|
168
|
+
x = x[rand_ids, true].dup
|
169
|
+
y = y[rand_ids].dup
|
170
|
+
end
|
171
|
+
[x, y]
|
172
|
+
end
|
173
|
+
|
174
|
+
private
|
175
|
+
|
176
|
+
def parse_libsvm_line(line, zero_based)
|
177
|
+
label = parse_label(line.shift)
|
178
|
+
adj_idx = zero_based == false ? 1 : 0
|
179
|
+
max_idx = -1
|
180
|
+
ftvec = []
|
181
|
+
while (el = line.shift)
|
182
|
+
idx, val = el.split(':')
|
183
|
+
idx = idx.to_i - adj_idx
|
184
|
+
val = val.to_i.to_s == val ? val.to_i : val.to_f
|
185
|
+
max_idx = idx if max_idx < idx
|
186
|
+
ftvec.push([idx, val])
|
187
|
+
end
|
188
|
+
[label, ftvec, max_idx]
|
189
|
+
end
|
190
|
+
|
191
|
+
def parse_label(label)
|
192
|
+
lbl_arr = label.split(',').map { |lbl| lbl.to_i.to_s == lbl ? lbl.to_i : lbl.to_f }
|
193
|
+
lbl_arr.size > 1 ? lbl_arr : lbl_arr[0]
|
194
|
+
end
|
195
|
+
|
196
|
+
def convert_to_matrix(data, n_features, dtype)
|
197
|
+
mat = []
|
198
|
+
data.each do |ft|
|
199
|
+
vec = Array.new(n_features) { 0 }
|
200
|
+
ft.each { |el| vec[el[0]] = el[1] }
|
201
|
+
mat.push(vec)
|
202
|
+
end
|
203
|
+
dtype.asarray(mat)
|
204
|
+
end
|
205
|
+
|
206
|
+
def detect_dtype(data)
|
207
|
+
arr_type_str = Numo::NArray.array_type(data).to_s
|
208
|
+
type = '%s'
|
209
|
+
type = '%d' if ['Numo::Int8', 'Numo::Int16', 'Numo::Int32', 'Numo::Int64'].include?(arr_type_str)
|
210
|
+
type = '%d' if ['Numo::UInt8', 'Numo::UInt16', 'Numo::UInt32', 'Numo::UInt64'].include?(arr_type_str)
|
211
|
+
type = '%.10g' if ['Numo::SFloat', 'Numo::DFloat'].include?(arr_type_str)
|
212
|
+
type
|
213
|
+
end
|
214
|
+
|
215
|
+
def dump_libsvm_line(label, ftvec, label_type, value_type, zero_based)
|
216
|
+
line = dump_label(label, label_type.to_s)
|
217
|
+
ftvec.to_a.each_with_index do |val, n|
|
218
|
+
idx = n + (zero_based == false ? 1 : 0)
|
219
|
+
line += format(" %d:#{value_type}", idx, val) if val != 0
|
220
|
+
end
|
221
|
+
line
|
222
|
+
end
|
223
|
+
|
224
|
+
def dump_label(label, label_type_str)
|
225
|
+
if label.is_a?(Array)
|
226
|
+
label.map { |lbl| format(label_type_str, lbl) }.join(',')
|
227
|
+
else
|
228
|
+
format(label_type_str, label)
|
229
|
+
end
|
230
|
+
end
|
231
|
+
end
|
232
|
+
end
|
233
|
+
end
|
@@ -0,0 +1,130 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'numo/narray'
|
4
|
+
|
5
|
+
module Rumale
|
6
|
+
# Module for calculating pairwise distances, similarities, and kernels.
|
7
|
+
module PairwiseMetric
|
8
|
+
module_function
|
9
|
+
|
10
|
+
# Calculate the pairwise euclidean distances between x and y.
|
11
|
+
#
|
12
|
+
# @param x [Numo::DFloat] (shape: [n_samples_x, n_features])
|
13
|
+
# @param y [Numo::DFloat] (shape: [n_samples_y, n_features])
|
14
|
+
# @return [Numo::DFloat] (shape: [n_samples_x, n_samples_x] or [n_samples_x, n_samples_y] if y is given)
|
15
|
+
def euclidean_distance(x, y = nil)
|
16
|
+
y = x if y.nil?
|
17
|
+
Numo::NMath.sqrt(squared_error(x, y).abs)
|
18
|
+
end
|
19
|
+
|
20
|
+
# Calculate the pairwise manhattan distances between x and y.
|
21
|
+
#
|
22
|
+
# @param x [Numo::DFloat] (shape: [n_samples_x, n_features])
|
23
|
+
# @param y [Numo::DFloat] (shape: [n_samples_y, n_features])
|
24
|
+
# @return [Numo::DFloat] (shape: [n_samples_x, n_samples_x] or [n_samples_x, n_samples_y] if y is given)
|
25
|
+
def manhattan_distance(x, y = nil)
|
26
|
+
y = x if y.nil?
|
27
|
+
n_samples_x = x.shape[0]
|
28
|
+
n_samples_y = y.shape[0]
|
29
|
+
distance_mat = Numo::DFloat.zeros(n_samples_x, n_samples_y)
|
30
|
+
n_samples_x.times do |n|
|
31
|
+
distance_mat[n, true] = (y - x[n, true]).abs.sum(axis: 1)
|
32
|
+
end
|
33
|
+
distance_mat
|
34
|
+
end
|
35
|
+
|
36
|
+
# Calculate the pairwise squared errors between x and y.
|
37
|
+
#
|
38
|
+
# @param x [Numo::DFloat] (shape: [n_samples_x, n_features])
|
39
|
+
# @param y [Numo::DFloat] (shape: [n_samples_y, n_features])
|
40
|
+
# @return [Numo::DFloat] (shape: [n_samples_x, n_samples_x] or [n_samples_x, n_samples_y] if y is given)
|
41
|
+
def squared_error(x, y = nil)
|
42
|
+
y = x if y.nil?
|
43
|
+
sum_x_vec = (x**2).sum(axis: 1).expand_dims(1)
|
44
|
+
sum_y_vec = y.nil? ? sum_x_vec.transpose : (y**2).sum(axis: 1).expand_dims(1).transpose
|
45
|
+
err_mat = -2 * x.dot(y.transpose)
|
46
|
+
err_mat += sum_x_vec
|
47
|
+
err_mat += sum_y_vec
|
48
|
+
err_mat.class.maximum(err_mat, 0)
|
49
|
+
end
|
50
|
+
|
51
|
+
# Calculate the pairwise cosine simlarities between x and y.
|
52
|
+
#
|
53
|
+
# @param x [Numo::DFloat] (shape: [n_samples_x, n_features])
|
54
|
+
# @param y [Numo::DFloat] (shape: [n_samples_y, n_features])
|
55
|
+
# @return [Numo::DFloat] (shape: [n_samples_x, n_samples_x] or [n_samples_x, n_samples_y] if y is given)
|
56
|
+
def cosine_similarity(x, y = nil)
|
57
|
+
x_norm = Numo::NMath.sqrt((x**2).sum(axis: 1))
|
58
|
+
x_norm[x_norm.eq(0)] = 1
|
59
|
+
x /= x_norm.expand_dims(1)
|
60
|
+
if y.nil?
|
61
|
+
x.dot(x.transpose)
|
62
|
+
else
|
63
|
+
y_norm = Numo::NMath.sqrt((y**2).sum(axis: 1))
|
64
|
+
y_norm[y_norm.eq(0)] = 1
|
65
|
+
y /= y_norm.expand_dims(1)
|
66
|
+
x.dot(y.transpose)
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
# Calculate the pairwise cosine distances between x and y.
|
71
|
+
#
|
72
|
+
# @param x [Numo::DFloat] (shape: [n_samples_x, n_features])
|
73
|
+
# @param y [Numo::DFloat] (shape: [n_samples_y, n_features])
|
74
|
+
# @return [Numo::DFloat] (shape: [n_samples_x, n_samples_x] or [n_samples_x, n_samples_y] if y is given)
|
75
|
+
def cosine_distance(x, y = nil)
|
76
|
+
dist_mat = 1 - cosine_similarity(x, y)
|
77
|
+
dist_mat[dist_mat.diag_indices] = 0 if y.nil?
|
78
|
+
dist_mat.clip(0, 2)
|
79
|
+
end
|
80
|
+
|
81
|
+
# Calculate the rbf kernel between x and y.
|
82
|
+
#
|
83
|
+
# @param x [Numo::DFloat] (shape: [n_samples_x, n_features])
|
84
|
+
# @param y [Numo::DFloat] (shape: [n_samples_y, n_features])
|
85
|
+
# @param gamma [Float] The parameter of rbf kernel, if nil it is 1 / n_features.
|
86
|
+
# @return [Numo::DFloat] (shape: [n_samples_x, n_samples_x] or [n_samples_x, n_samples_y] if y is given)
|
87
|
+
def rbf_kernel(x, y = nil, gamma = nil)
|
88
|
+
y = x if y.nil?
|
89
|
+
gamma ||= 1.0 / x.shape[1]
|
90
|
+
Numo::NMath.exp(-gamma * squared_error(x, y))
|
91
|
+
end
|
92
|
+
|
93
|
+
# Calculate the linear kernel between x and y.
|
94
|
+
#
|
95
|
+
# @param x [Numo::DFloat] (shape: [n_samples_x, n_features])
|
96
|
+
# @param y [Numo::DFloat] (shape: [n_samples_y, n_features])
|
97
|
+
# @return [Numo::DFloat] (shape: [n_samples_x, n_samples_x] or [n_samples_x, n_samples_y] if y is given)
|
98
|
+
def linear_kernel(x, y = nil)
|
99
|
+
y = x if y.nil?
|
100
|
+
x.dot(y.transpose)
|
101
|
+
end
|
102
|
+
|
103
|
+
# Calculate the polynomial kernel between x and y.
|
104
|
+
#
|
105
|
+
# @param x [Numo::DFloat] (shape: [n_samples_x, n_features])
|
106
|
+
# @param y [Numo::DFloat] (shape: [n_samples_y, n_features])
|
107
|
+
# @param degree [Integer] The parameter of polynomial kernel.
|
108
|
+
# @param gamma [Float] The parameter of polynomial kernel, if nil it is 1 / n_features.
|
109
|
+
# @param coef [Integer] The parameter of polynomial kernel.
|
110
|
+
# @return [Numo::DFloat] (shape: [n_samples_x, n_samples_x] or [n_samples_x, n_samples_y] if y is given)
|
111
|
+
def polynomial_kernel(x, y = nil, degree = 3, gamma = nil, coef = 1) # rubocop:disable Metrics/ParameterLists
|
112
|
+
y = x if y.nil?
|
113
|
+
gamma ||= 1.0 / x.shape[1]
|
114
|
+
(x.dot(y.transpose) * gamma + coef)**degree
|
115
|
+
end
|
116
|
+
|
117
|
+
# Calculate the sigmoid kernel between x and y.
|
118
|
+
#
|
119
|
+
# @param x [Numo::DFloat] (shape: [n_samples_x, n_features])
|
120
|
+
# @param y [Numo::DFloat] (shape: [n_samples_y, n_features])
|
121
|
+
# @param gamma [Float] The parameter of polynomial kernel, if nil it is 1 / n_features.
|
122
|
+
# @param coef [Integer] The parameter of polynomial kernel.
|
123
|
+
# @return [Numo::DFloat] (shape: [n_samples_x, n_samples_x] or [n_samples_x, n_samples_y] if y is given)
|
124
|
+
def sigmoid_kernel(x, y = nil, gamma = nil, coef = 1)
|
125
|
+
y = x if y.nil?
|
126
|
+
gamma ||= 1.0 / x.shape[1]
|
127
|
+
Numo::NMath.tanh(x.dot(y.transpose) * gamma + coef)
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end
|
@@ -0,0 +1,116 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'numo/narray'
|
4
|
+
|
5
|
+
module Rumale
|
6
|
+
# Module for calculating posterior class probabilities with SVM outputs.
|
7
|
+
# This module is used for internal processes.
|
8
|
+
#
|
9
|
+
# @example
|
10
|
+
# estimator = Rumale::LinearModel::SVC.new
|
11
|
+
# estimator.fit(x, bin_y)
|
12
|
+
# df = estimator.decision_function(x)
|
13
|
+
# params = Rumale::ProbabilisticOutput.fit_sigmoid(df, bin_y)
|
14
|
+
# probs = 1 / (Numo::NMath.exp(params[0] * df + params[1]) + 1)
|
15
|
+
#
|
16
|
+
# *Reference*
|
17
|
+
# - Platt, J C., "Probabilistic Outputs for Support Vector Machines and Comparisons to Regularized Likelihood Methods," Adv. Large Margin Classifiers, pp. 61--74, 2000.
|
18
|
+
# - Lin, H-T., Lin, C-J., and Weng, R C., "A Note on Platt's Probabilistic Outputs for Support Vector Machines," J. Machine Learning, Vol. 63 (3), pp. 267--276, 2007.
|
19
|
+
module ProbabilisticOutput
|
20
|
+
class << self
|
21
|
+
# Fit the probabilistic model for binary SVM outputs.
|
22
|
+
#
|
23
|
+
# @param df [Numo::DFloat] (shape: [n_samples]) The outputs of decision function to be used for fitting the model.
|
24
|
+
# @param bin_y [Numo::Int32] (shape: [n_samples]) The binary labels to be used for fitting the model.
|
25
|
+
# @param max_iter [Integer] The maximum number of iterations.
|
26
|
+
# @param min_step [Float] The minimum step of Newton's method.
|
27
|
+
# @param sigma [Float] The parameter to avoid hessian matrix from becoming singular matrix.
|
28
|
+
# @return [Numo::DFloat] (shape: 2) The parameters of the model.
|
29
|
+
def fit_sigmoid(df, bin_y, max_iter = 100, min_step = 1e-10, sigma = 1e-12)
|
30
|
+
# Initialize some variables.
|
31
|
+
n_samples = bin_y.size
|
32
|
+
negative_label = bin_y.to_a.uniq.min
|
33
|
+
pos = bin_y.ne(negative_label)
|
34
|
+
neg = bin_y.eq(negative_label)
|
35
|
+
n_pos_samples = pos.count
|
36
|
+
n_neg_samples = neg.count
|
37
|
+
target_probs = Numo::DFloat.zeros(n_samples)
|
38
|
+
target_probs[pos] = (n_pos_samples + 1) / (n_pos_samples + 2.0)
|
39
|
+
target_probs[neg] = 1 / (n_neg_samples + 2.0)
|
40
|
+
alpha = 0.0
|
41
|
+
beta = Math.log((n_neg_samples + 1) / (n_pos_samples + 1.0))
|
42
|
+
err = error_function(target_probs, df, alpha, beta)
|
43
|
+
# Optimize parameters for class porbability calculation.
|
44
|
+
old_grad_vec = Numo::DFloat.zeros(2)
|
45
|
+
max_iter.times do
|
46
|
+
# Calculate gradient and hessian matrix.
|
47
|
+
probs = predicted_probs(df, alpha, beta)
|
48
|
+
grad_vec = gradient(target_probs, probs, df)
|
49
|
+
hess_mat = hessian_matrix(probs, df, sigma)
|
50
|
+
break if grad_vec.abs.lt(1e-5).count == 2
|
51
|
+
break if (old_grad_vec - grad_vec).abs.sum < 1e-5
|
52
|
+
|
53
|
+
old_grad_vec = grad_vec
|
54
|
+
# Calculate Newton directions.
|
55
|
+
dirs_vec = directions(grad_vec, hess_mat)
|
56
|
+
grad_dir = grad_vec.dot(dirs_vec)
|
57
|
+
stepsize = 2.0
|
58
|
+
while stepsize >= min_step
|
59
|
+
stepsize *= 0.5
|
60
|
+
new_alpha = alpha + stepsize * dirs_vec[0]
|
61
|
+
new_beta = beta + stepsize * dirs_vec[1]
|
62
|
+
new_err = error_function(target_probs, df, new_alpha, new_beta)
|
63
|
+
next unless new_err < err + 0.0001 * stepsize * grad_dir
|
64
|
+
|
65
|
+
alpha = new_alpha
|
66
|
+
beta = new_beta
|
67
|
+
err = new_err
|
68
|
+
break
|
69
|
+
end
|
70
|
+
end
|
71
|
+
Numo::DFloat[alpha, beta]
|
72
|
+
end
|
73
|
+
|
74
|
+
private
|
75
|
+
|
76
|
+
def error_function(target_probs, df, alpha, beta)
|
77
|
+
fn = alpha * df + beta
|
78
|
+
pos = fn.ge(0.0)
|
79
|
+
neg = fn.lt(0.0)
|
80
|
+
err = 0.0
|
81
|
+
err += (target_probs[pos] * fn[pos] + Numo::NMath.log(1 + Numo::NMath.exp(-fn[pos]))).sum if pos.count.positive?
|
82
|
+
err += ((target_probs[neg] - 1) * fn[neg] + Numo::NMath.log(1 + Numo::NMath.exp(fn[neg]))).sum if neg.count.positive?
|
83
|
+
err
|
84
|
+
end
|
85
|
+
|
86
|
+
def predicted_probs(df, alpha, beta)
|
87
|
+
fn = alpha * df + beta
|
88
|
+
pos = fn.ge(0.0)
|
89
|
+
neg = fn.lt(0.0)
|
90
|
+
probs = Numo::DFloat.zeros(df.shape[0])
|
91
|
+
probs[pos] = Numo::NMath.exp(-fn[pos]) / (1 + Numo::NMath.exp(-fn[pos])) if pos.count.positive?
|
92
|
+
probs[neg] = 1 / (1 + Numo::NMath.exp(fn[neg])) if neg.count.positive?
|
93
|
+
probs
|
94
|
+
end
|
95
|
+
|
96
|
+
def gradient(target_probs, probs, df)
|
97
|
+
sub = target_probs - probs
|
98
|
+
Numo::DFloat[(df * sub).sum, sub.sum]
|
99
|
+
end
|
100
|
+
|
101
|
+
def hessian_matrix(probs, df, sigma)
|
102
|
+
sub = probs * (1 - probs)
|
103
|
+
h11 = (df**2 * sub).sum + sigma
|
104
|
+
h22 = sub.sum + sigma
|
105
|
+
h21 = (df * sub).sum
|
106
|
+
Numo::DFloat[[h11, h21], [h21, h22]]
|
107
|
+
end
|
108
|
+
|
109
|
+
def directions(grad_vec, hess_mat)
|
110
|
+
det = hess_mat[0, 0] * hess_mat[1, 1] - hess_mat[0, 1] * hess_mat[1, 0]
|
111
|
+
inv_hess_mat = Numo::DFloat[[hess_mat[1, 1], -hess_mat[0, 1]], [-hess_mat[1, 0], hess_mat[0, 0]]] / det
|
112
|
+
-inv_hess_mat.dot(grad_vec)
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
data/lib/rumale/utils.rb
ADDED
@@ -0,0 +1,69 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'numo/narray'
|
4
|
+
|
5
|
+
module Rumale
|
6
|
+
# @!visibility private
|
7
|
+
module Utils
|
8
|
+
module_function
|
9
|
+
|
10
|
+
# @!visibility private
|
11
|
+
def choice_ids(size, probs, rng = nil)
|
12
|
+
rng ||= Random.new
|
13
|
+
Array.new(size) do
|
14
|
+
target = rng.rand
|
15
|
+
chosen = 0
|
16
|
+
probs.each_with_index do |p, idx|
|
17
|
+
break (chosen = idx) if target <= p
|
18
|
+
|
19
|
+
target -= p
|
20
|
+
end
|
21
|
+
chosen
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
# @!visibility private
|
26
|
+
def rand_uniform(shape, rng = nil)
|
27
|
+
rng ||= Random.new
|
28
|
+
if shape.is_a?(Array)
|
29
|
+
rnd_vals = Array.new(shape.inject(:*)) { rng.rand }
|
30
|
+
Numo::DFloat.asarray(rnd_vals).reshape(shape[0], shape[1])
|
31
|
+
else
|
32
|
+
Numo::DFloat.asarray(Array.new(shape) { rng.rand })
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
# @!visibility private
|
37
|
+
def rand_normal(shape, rng = nil, mu = 0.0, sigma = 1.0)
|
38
|
+
rng ||= Random.new
|
39
|
+
a = rand_uniform(shape, rng)
|
40
|
+
b = rand_uniform(shape, rng)
|
41
|
+
(Numo::NMath.sqrt(Numo::NMath.log(a) * -2.0) * Numo::NMath.sin(b * 2.0 * Math::PI)) * sigma + mu
|
42
|
+
end
|
43
|
+
|
44
|
+
# @!visibility private
|
45
|
+
def binarize_labels(labels)
|
46
|
+
labels = labels.to_a if labels.is_a?(Numo::NArray)
|
47
|
+
classes = labels.uniq.sort
|
48
|
+
n_classes = classes.size
|
49
|
+
n_samples = labels.size
|
50
|
+
binarized = Numo::Int32.zeros(n_samples, n_classes)
|
51
|
+
labels.each_with_index { |el, idx| binarized[idx, classes.index(el)] = 1 }
|
52
|
+
binarized
|
53
|
+
end
|
54
|
+
|
55
|
+
# @!visibility private
|
56
|
+
def normalize(x, norm)
|
57
|
+
norm_vec = case norm
|
58
|
+
when 'l2'
|
59
|
+
Numo::NMath.sqrt((x**2).sum(axis: 1))
|
60
|
+
when 'l1'
|
61
|
+
x.abs.sum(axis: 1)
|
62
|
+
else
|
63
|
+
raise ArgumentError, 'given an unsupported norm type'
|
64
|
+
end
|
65
|
+
norm_vec[norm_vec.eq(0)] = 1
|
66
|
+
x / norm_vec.expand_dims(1)
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Rumale
|
4
|
+
# @!visibility private
|
5
|
+
module Validation
|
6
|
+
module_function
|
7
|
+
|
8
|
+
# @!visibility private
|
9
|
+
def check_convert_sample_array(x)
|
10
|
+
x = Numo::DFloat.cast(x) unless x.is_a?(Numo::DFloat)
|
11
|
+
raise ArgumentError, 'the sample array is expected to be 2-D array' unless x.ndim == 2
|
12
|
+
|
13
|
+
x
|
14
|
+
end
|
15
|
+
|
16
|
+
# @!visibility private
|
17
|
+
def check_convert_label_array(y)
|
18
|
+
y = Numo::Int32.cast(y) unless y.is_a?(Numo::Int32)
|
19
|
+
raise ArgumentError, 'the label array is expected to be 1-D arrray' unless y.ndim == 1
|
20
|
+
|
21
|
+
y
|
22
|
+
end
|
23
|
+
|
24
|
+
# @!visibility private
|
25
|
+
def check_convert_target_value_array(y)
|
26
|
+
y = Numo::DFloat.cast(y) unless y.is_a?(Numo::DFloat)
|
27
|
+
raise ArgumentError, 'the target value array is expected to be 1-D or 2-D arrray' unless y.ndim == 1 || y.ndim == 2
|
28
|
+
|
29
|
+
y
|
30
|
+
end
|
31
|
+
|
32
|
+
# @!visibility private
|
33
|
+
def check_sample_size(x, y)
|
34
|
+
return if x.shape[0] == y.shape[0]
|
35
|
+
|
36
|
+
raise ArgumentError, 'the sample array and label or target value array are expected to have the same number of samples'
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
metadata
ADDED
@@ -0,0 +1,81 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: rumale-core
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.24.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- yoshoku
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2022-12-31 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: numo-narray
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 0.9.1
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 0.9.1
|
27
|
+
description: |
|
28
|
+
Rumale::Core provides base classes and utility functions for implementing
|
29
|
+
machine learning algorithm with Rumale interface.
|
30
|
+
email:
|
31
|
+
- yoshoku@outlook.com
|
32
|
+
executables: []
|
33
|
+
extensions: []
|
34
|
+
extra_rdoc_files: []
|
35
|
+
files:
|
36
|
+
- LICENSE.txt
|
37
|
+
- README.md
|
38
|
+
- lib/rumale/base/classifier.rb
|
39
|
+
- lib/rumale/base/cluster_analyzer.rb
|
40
|
+
- lib/rumale/base/estimator.rb
|
41
|
+
- lib/rumale/base/evaluator.rb
|
42
|
+
- lib/rumale/base/regressor.rb
|
43
|
+
- lib/rumale/base/splitter.rb
|
44
|
+
- lib/rumale/base/transformer.rb
|
45
|
+
- lib/rumale/core.rb
|
46
|
+
- lib/rumale/core/version.rb
|
47
|
+
- lib/rumale/dataset.rb
|
48
|
+
- lib/rumale/pairwise_metric.rb
|
49
|
+
- lib/rumale/probabilistic_output.rb
|
50
|
+
- lib/rumale/utils.rb
|
51
|
+
- lib/rumale/validation.rb
|
52
|
+
homepage: https://github.com/yoshoku/rumale
|
53
|
+
licenses:
|
54
|
+
- BSD-3-Clause
|
55
|
+
metadata:
|
56
|
+
homepage_uri: https://github.com/yoshoku/rumale
|
57
|
+
source_code_uri: https://github.com/yoshoku/rumale/tree/main/rumale-core
|
58
|
+
changelog_uri: https://github.com/yoshoku/rumale/blob/main/CHANGELOG.md
|
59
|
+
documentation_uri: https://yoshoku.github.io/rumale/doc/
|
60
|
+
rubygems_mfa_required: 'true'
|
61
|
+
post_install_message:
|
62
|
+
rdoc_options: []
|
63
|
+
require_paths:
|
64
|
+
- lib
|
65
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
66
|
+
requirements:
|
67
|
+
- - ">="
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0'
|
70
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
71
|
+
requirements:
|
72
|
+
- - ">="
|
73
|
+
- !ruby/object:Gem::Version
|
74
|
+
version: '0'
|
75
|
+
requirements: []
|
76
|
+
rubygems_version: 3.3.26
|
77
|
+
signing_key:
|
78
|
+
specification_version: 4
|
79
|
+
summary: Rumale::Core provides base classes and utility functions for implementing
|
80
|
+
machine learning algorithm with Rumale interface.
|
81
|
+
test_files: []
|