rumale 0.13.1 → 0.13.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +1 -1
- data/lib/rumale.rb +1 -0
- data/lib/rumale/clustering/gaussian_mixture.rb +3 -2
- data/lib/rumale/evaluation_measure/log_loss.rb +3 -3
- data/lib/rumale/preprocessing/label_binarizer.rb +104 -0
- data/lib/rumale/preprocessing/one_hot_encoder.rb +26 -9
- data/lib/rumale/version.rb +1 -1
- data/rumale.gemspec +8 -0
- metadata +9 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 948ea0c8e1c7d41704f0259ecd75dc2c4dd3e10f
|
4
|
+
data.tar.gz: 66adfaeb23d85aafc8cdea65bdda628ed1b481a8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c424d21b6c49e55606e26d946ea6df05fd5f860914ba016a8c32da82a63865e74be24ce1f82e14cb787352979960aedaae953093854756df81c6ef57079f7ed5
|
7
|
+
data.tar.gz: 04ba83211d4a296fda4f109f92439b76e494ee2ac9321a9d46cf1c5cd4d07a5736e86c9217d180438ea360b0f48564ef03d4dfad5caa8e2c9586e096d703adbc
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,8 @@
|
|
1
|
+
# 0.13.2
|
2
|
+
- Add preprocessing class for label binarization.
|
3
|
+
- Fix to use LabelBinarizer instead of OneHotEncoder.
|
4
|
+
- Fix bug that OneHotEncoder leaves elements related to values that do not occur in training data.
|
5
|
+
|
1
6
|
# 0.13.1
|
2
7
|
- Add class for Shared Neareset Neighbor clustering.
|
3
8
|
- Add function for calculation of manhattan distance to Rumale::PairwiseMetric.
|
data/README.md
CHANGED
@@ -6,7 +6,7 @@
|
|
6
6
|
[](https://coveralls.io/github/yoshoku/rumale?branch=master)
|
7
7
|
[](https://badge.fury.io/rb/rumale)
|
8
8
|
[](https://github.com/yoshoku/rumale/blob/master/LICENSE.txt)
|
9
|
-
[](https://
|
9
|
+
[](https://yoshoku.github.io/rumale/doc/)
|
10
10
|
|
11
11
|
Rumale (**Ru**by **ma**chine **le**arning) is a machine learning library in Ruby.
|
12
12
|
Rumale provides machine learning algorithms with interfaces similar to Scikit-Learn in Python.
|
data/lib/rumale.rb
CHANGED
@@ -71,6 +71,7 @@ require 'rumale/preprocessing/min_max_scaler'
|
|
71
71
|
require 'rumale/preprocessing/max_abs_scaler'
|
72
72
|
require 'rumale/preprocessing/standard_scaler'
|
73
73
|
require 'rumale/preprocessing/bin_discretizer'
|
74
|
+
require 'rumale/preprocessing/label_binarizer'
|
74
75
|
require 'rumale/preprocessing/label_encoder'
|
75
76
|
require 'rumale/preprocessing/one_hot_encoder'
|
76
77
|
require 'rumale/preprocessing/ordinal_encoder'
|
@@ -2,6 +2,7 @@
|
|
2
2
|
|
3
3
|
require 'rumale/base/base_estimator'
|
4
4
|
require 'rumale/base/cluster_analyzer'
|
5
|
+
require 'rumale/preprocessing/label_binarizer'
|
5
6
|
require 'rumale/pairwise_metric'
|
6
7
|
|
7
8
|
module Rumale
|
@@ -136,8 +137,8 @@ module Rumale
|
|
136
137
|
n_clusters: @params[:n_clusters], init: @params[:init], max_iter: 0, random_seed: @params[:random_seed]
|
137
138
|
)
|
138
139
|
cluster_ids = kmeans.fit_predict(x)
|
139
|
-
encoder = Rumale::Preprocessing::
|
140
|
-
encoder.fit_transform(cluster_ids)
|
140
|
+
encoder = Rumale::Preprocessing::LabelBinarizer.new
|
141
|
+
Numo::DFloat.cast(encoder.fit_transform(cluster_ids))
|
141
142
|
end
|
142
143
|
|
143
144
|
def calc_memberships(x, weights, means, diag_cov)
|
@@ -1,7 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require 'rumale/base/evaluator'
|
4
|
-
require 'rumale/preprocessing/
|
4
|
+
require 'rumale/preprocessing/label_binarizer'
|
5
5
|
|
6
6
|
module Rumale
|
7
7
|
module EvaluationMeasure
|
@@ -33,8 +33,8 @@ module Rumale
|
|
33
33
|
bin_y_true = Numo::DFloat.cast(y_true.ne(negative_label))
|
34
34
|
-(bin_y_true * Numo::NMath.log(clipped_p) + (1 - bin_y_true) * Numo::NMath.log(1 - clipped_p))
|
35
35
|
else
|
36
|
-
encoder = Rumale::Preprocessing::
|
37
|
-
encoded_y_true = encoder.fit_transform(y_true)
|
36
|
+
encoder = Rumale::Preprocessing::LabelBinarizer.new
|
37
|
+
encoded_y_true = Numo::DFloat.cast(encoder.fit_transform(y_true))
|
38
38
|
clipped_p /= clipped_p.sum(1).expand_dims(1)
|
39
39
|
-(encoded_y_true * Numo::NMath.log(clipped_p)).sum(1)
|
40
40
|
end
|
@@ -0,0 +1,104 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/base_estimator'
|
4
|
+
require 'rumale/base/transformer'
|
5
|
+
|
6
|
+
module Rumale
|
7
|
+
module Preprocessing
|
8
|
+
# Encode labels to binary labels with one-vs-all scheme.
|
9
|
+
#
|
10
|
+
# @example
|
11
|
+
# encoder = Rumale::Preprocessing::LabelBinarizer.new
|
12
|
+
# label = [0, -1, 3, 3, 1, 1]
|
13
|
+
# p encoder.fit_transform(label)
|
14
|
+
# # Numo::Int32#shape=[6,4]
|
15
|
+
# # [[0, 1, 0, 0],
|
16
|
+
# # [1, 0, 0, 0],
|
17
|
+
# # [0, 0, 0, 1],
|
18
|
+
# # [0, 0, 0, 1],
|
19
|
+
# # [0, 0, 1, 0],
|
20
|
+
# # [0, 0, 1, 0]]
|
21
|
+
class LabelBinarizer
|
22
|
+
include Base::BaseEstimator
|
23
|
+
include Base::Transformer
|
24
|
+
|
25
|
+
# Return the class labels.
|
26
|
+
# @return [Array] (size: [n_classes])
|
27
|
+
attr_reader :classes
|
28
|
+
|
29
|
+
# Create a new encoder for binarizing labels with one-vs-all scheme.
|
30
|
+
#
|
31
|
+
# @param neg_label [Integer] The value represents negative label.
|
32
|
+
# @param pos_label [Integer] The value represents positive label.
|
33
|
+
def initialize(neg_label: 0, pos_label: 1)
|
34
|
+
check_params_integer(neg_label: neg_label, pos_label: pos_label)
|
35
|
+
@params = {}
|
36
|
+
@params[:neg_label] = neg_label
|
37
|
+
@params[:pos_label] = pos_label
|
38
|
+
@classes = nil
|
39
|
+
end
|
40
|
+
|
41
|
+
# Fit encoder to labels.
|
42
|
+
#
|
43
|
+
# @overload fit(y) -> LabelBinarizer
|
44
|
+
# @param y [Numo::NArray/Array] (shape: [n_samples]) The labels to fit encoder.
|
45
|
+
# @return [LabelBinarizer]
|
46
|
+
def fit(y, _not_used = nil)
|
47
|
+
y = y.to_a if y.is_a?(Numo::NArray)
|
48
|
+
check_params_type(Array, y: y)
|
49
|
+
@classes = y.uniq.sort
|
50
|
+
self
|
51
|
+
end
|
52
|
+
|
53
|
+
# Fit encoder to labels, then return binarized labels.
|
54
|
+
#
|
55
|
+
# @overload fit_transform(y) -> Numo::DFloat
|
56
|
+
# @param y [Numo::NArray/Array] (shape: [n_samples]) The labels to fit encoder.
|
57
|
+
# @return [Numo::Int32] (shape: [n_samples, n_classes]) The binarized labels.
|
58
|
+
def fit_transform(y, _not_used = nil)
|
59
|
+
y = y.to_a if y.is_a?(Numo::NArray)
|
60
|
+
check_params_type(Array, y: y)
|
61
|
+
fit(y).transform(y)
|
62
|
+
end
|
63
|
+
|
64
|
+
# Encode labels.
|
65
|
+
#
|
66
|
+
# @param y [Array] (shape: [n_samples]) The labels to be encoded.
|
67
|
+
# @return [Numo::Int32] (shape: [n_samples, n_classes]) The binarized labels.
|
68
|
+
def transform(y)
|
69
|
+
y = y.to_a if y.is_a?(Numo::NArray)
|
70
|
+
check_params_type(Array, y: y)
|
71
|
+
n_classes = @classes.size
|
72
|
+
n_samples = y.size
|
73
|
+
codes = Numo::Int32.zeros(n_samples, n_classes) + @params[:neg_label]
|
74
|
+
n_samples.times { |n| codes[n, @classes.index(y[n])] = @params[:pos_label] }
|
75
|
+
codes
|
76
|
+
end
|
77
|
+
|
78
|
+
# Decode binarized labels.
|
79
|
+
#
|
80
|
+
# @param x [Numo::Int32] (shape: [n_samples, n_classes]) The binarized labels to be decoded.
|
81
|
+
# @return [Array] (shape: [n_samples]) The decoded labels.
|
82
|
+
def inverse_transform(x)
|
83
|
+
check_params_type(Numo::Int32, x: x)
|
84
|
+
n_samples = x.shape[0]
|
85
|
+
Array.new(n_samples) { |n| @classes[x[n, true].ne(@params[:neg_label]).where[0]] }
|
86
|
+
end
|
87
|
+
|
88
|
+
# Dump marshal data.
|
89
|
+
# @return [Hash] The marshal data about LabelBinarizer.
|
90
|
+
def marshal_dump
|
91
|
+
{ params: @params,
|
92
|
+
classes: @classes }
|
93
|
+
end
|
94
|
+
|
95
|
+
# Load marshal data.
|
96
|
+
# @return [nil]
|
97
|
+
def marshal_load(obj)
|
98
|
+
@params = obj[:params]
|
99
|
+
@classes = obj[:classes]
|
100
|
+
nil
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
@@ -27,6 +27,10 @@ module Rumale
|
|
27
27
|
# @return [Numo::Int32] (shape: [n_features])
|
28
28
|
attr_reader :n_values
|
29
29
|
|
30
|
+
# Return the indices for feature values that actually occur in the training set.
|
31
|
+
# @return [Nimo::Int32]
|
32
|
+
attr_reader :active_features
|
33
|
+
|
30
34
|
# Return the indices to feature ranges.
|
31
35
|
# @return [Numo::Int32] (shape: [n_features + 1])
|
32
36
|
attr_reader :feature_indices
|
@@ -35,19 +39,21 @@ module Rumale
|
|
35
39
|
def initialize
|
36
40
|
@params = {}
|
37
41
|
@n_values = nil
|
42
|
+
@active_features = nil
|
38
43
|
@feature_indices = nil
|
39
44
|
end
|
40
45
|
|
41
46
|
# Fit one-hot-encoder to samples.
|
42
47
|
#
|
43
48
|
# @overload fit(x) -> OneHotEncoder
|
44
|
-
#
|
45
|
-
# @param x [Numo::Int32] (shape: [n_samples, n_features]) The samples to fit one-hot-encoder.
|
49
|
+
# @param x [Numo::Int32] (shape: [n_samples, n_features]) The samples to fit one-hot-encoder.
|
46
50
|
# @return [OneHotEncoder]
|
47
51
|
def fit(x, _y = nil)
|
48
52
|
check_params_type(Numo::Int32, x: x)
|
53
|
+
raise ArgumentError, 'Expected the input samples only consists of non-negative integer values.' if x.lt(0).any?
|
49
54
|
@n_values = x.max(0) + 1
|
50
55
|
@feature_indices = Numo::Int32.hstack([[0], @n_values]).cumsum
|
56
|
+
@active_features = encode(x, @feature_indices).sum(0).ne(0).where
|
51
57
|
self
|
52
58
|
end
|
53
59
|
|
@@ -59,6 +65,7 @@ module Rumale
|
|
59
65
|
# @return [Numo::DFloat] The one-hot-vectors.
|
60
66
|
def fit_transform(x, _y = nil)
|
61
67
|
check_params_type(Numo::Int32, x: x)
|
68
|
+
raise ArgumentError, 'Expected the input samples only consists of non-negative integer values.' if x.lt(0).any?
|
62
69
|
fit(x).transform(x)
|
63
70
|
end
|
64
71
|
|
@@ -68,13 +75,9 @@ module Rumale
|
|
68
75
|
# @return [Numo::DFloat] The one-hot-vectors.
|
69
76
|
def transform(x)
|
70
77
|
check_params_type(Numo::Int32, x: x)
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
row_indices = Numo::Int32.new(n_samples).seq.repeat(n_features).to_a
|
75
|
-
codes = Numo::DFloat.zeros(n_samples, @feature_indices[-1])
|
76
|
-
row_indices.zip(column_indices).each { |r, c| codes[r, c] = 1.0 }
|
77
|
-
codes
|
78
|
+
raise ArgumentError, 'Expected the input samples only consists of non-negative integer values.' if x.lt(0).any?
|
79
|
+
codes = encode(x, @feature_indices)
|
80
|
+
codes[true, @active_features].dup
|
78
81
|
end
|
79
82
|
|
80
83
|
# Dump marshal data.
|
@@ -82,6 +85,7 @@ module Rumale
|
|
82
85
|
def marshal_dump
|
83
86
|
{ params: @params,
|
84
87
|
n_values: @n_values,
|
88
|
+
active_features: @active_features,
|
85
89
|
feature_indices: @feature_indices }
|
86
90
|
end
|
87
91
|
|
@@ -90,9 +94,22 @@ module Rumale
|
|
90
94
|
def marshal_load(obj)
|
91
95
|
@params = obj[:params]
|
92
96
|
@n_values = obj[:n_values]
|
97
|
+
@active_features = obj[:active_features]
|
93
98
|
@feature_indices = obj[:feature_indices]
|
94
99
|
nil
|
95
100
|
end
|
101
|
+
|
102
|
+
private
|
103
|
+
|
104
|
+
def encode(x, indices)
|
105
|
+
n_samples, n_features = x.shape
|
106
|
+
n_features = 1 if n_features.nil?
|
107
|
+
col_indices = (x + indices[0...-1]).flatten.to_a
|
108
|
+
row_indices = Numo::Int32.new(n_samples).seq.repeat(n_features).to_a
|
109
|
+
codes = Numo::DFloat.zeros(n_samples, indices[-1])
|
110
|
+
row_indices.zip(col_indices).each { |r, c| codes[r, c] = 1.0 }
|
111
|
+
codes
|
112
|
+
end
|
96
113
|
end
|
97
114
|
end
|
98
115
|
end
|
data/lib/rumale/version.rb
CHANGED
data/rumale.gemspec
CHANGED
@@ -33,6 +33,14 @@ Gem::Specification.new do |spec|
|
|
33
33
|
spec.require_paths = ['lib']
|
34
34
|
spec.extensions = ['ext/rumale/extconf.rb']
|
35
35
|
|
36
|
+
spec.metadata = {
|
37
|
+
'homepage_uri' => 'https://github.com/yoshoku/rumale',
|
38
|
+
'changelog_uri' => 'https://github.com/yoshoku/rumale/blob/master/CHANGELOG.md',
|
39
|
+
'source_code_uri' => 'https://github.com/yoshoku/rumale',
|
40
|
+
'documentation_uri' => 'https://yoshoku.github.io/rumale/doc/',
|
41
|
+
'bug_tracker_uri' => 'https://github.com/yoshoku/rumale/issues'
|
42
|
+
}
|
43
|
+
|
36
44
|
spec.required_ruby_version = '>= 2.3'
|
37
45
|
|
38
46
|
spec.add_runtime_dependency 'numo-narray', '>= 0.9.1'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rumale
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.13.
|
4
|
+
version: 0.13.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-09-
|
11
|
+
date: 2019-09-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: numo-narray
|
@@ -229,6 +229,7 @@ files:
|
|
229
229
|
- lib/rumale/polynomial_model/factorization_machine_regressor.rb
|
230
230
|
- lib/rumale/preprocessing/bin_discretizer.rb
|
231
231
|
- lib/rumale/preprocessing/l2_normalizer.rb
|
232
|
+
- lib/rumale/preprocessing/label_binarizer.rb
|
232
233
|
- lib/rumale/preprocessing/label_encoder.rb
|
233
234
|
- lib/rumale/preprocessing/max_abs_scaler.rb
|
234
235
|
- lib/rumale/preprocessing/min_max_scaler.rb
|
@@ -251,7 +252,12 @@ files:
|
|
251
252
|
homepage: https://github.com/yoshoku/rumale
|
252
253
|
licenses:
|
253
254
|
- BSD-2-Clause
|
254
|
-
metadata:
|
255
|
+
metadata:
|
256
|
+
homepage_uri: https://github.com/yoshoku/rumale
|
257
|
+
changelog_uri: https://github.com/yoshoku/rumale/blob/master/CHANGELOG.md
|
258
|
+
source_code_uri: https://github.com/yoshoku/rumale
|
259
|
+
documentation_uri: https://yoshoku.github.io/rumale/doc/
|
260
|
+
bug_tracker_uri: https://github.com/yoshoku/rumale/issues
|
255
261
|
post_install_message:
|
256
262
|
rdoc_options: []
|
257
263
|
require_paths:
|