rumale 0.13.1 → 0.13.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +1 -1
- data/lib/rumale.rb +1 -0
- data/lib/rumale/clustering/gaussian_mixture.rb +3 -2
- data/lib/rumale/evaluation_measure/log_loss.rb +3 -3
- data/lib/rumale/preprocessing/label_binarizer.rb +104 -0
- data/lib/rumale/preprocessing/one_hot_encoder.rb +26 -9
- data/lib/rumale/version.rb +1 -1
- data/rumale.gemspec +8 -0
- metadata +9 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 948ea0c8e1c7d41704f0259ecd75dc2c4dd3e10f
|
4
|
+
data.tar.gz: 66adfaeb23d85aafc8cdea65bdda628ed1b481a8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c424d21b6c49e55606e26d946ea6df05fd5f860914ba016a8c32da82a63865e74be24ce1f82e14cb787352979960aedaae953093854756df81c6ef57079f7ed5
|
7
|
+
data.tar.gz: 04ba83211d4a296fda4f109f92439b76e494ee2ac9321a9d46cf1c5cd4d07a5736e86c9217d180438ea360b0f48564ef03d4dfad5caa8e2c9586e096d703adbc
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,8 @@
|
|
1
|
+
# 0.13.2
|
2
|
+
- Add preprocessing class for label binarization.
|
3
|
+
- Fix to use LabelBinarizer instead of OneHotEncoder.
|
4
|
+
- Fix bug that OneHotEncoder leaves elements related to values that do not occur in training data.
|
5
|
+
|
1
6
|
# 0.13.1
|
2
7
|
- Add class for Shared Neareset Neighbor clustering.
|
3
8
|
- Add function for calculation of manhattan distance to Rumale::PairwiseMetric.
|
data/README.md
CHANGED
@@ -6,7 +6,7 @@
|
|
6
6
|
[![Coverage Status](https://coveralls.io/repos/github/yoshoku/rumale/badge.svg?branch=master)](https://coveralls.io/github/yoshoku/rumale?branch=master)
|
7
7
|
[![Gem Version](https://badge.fury.io/rb/rumale.svg)](https://badge.fury.io/rb/rumale)
|
8
8
|
[![BSD 2-Clause License](https://img.shields.io/badge/License-BSD%202--Clause-orange.svg)](https://github.com/yoshoku/rumale/blob/master/LICENSE.txt)
|
9
|
-
[![Documentation](http://img.shields.io/badge/docs-rdoc.info-blue.svg)](https://
|
9
|
+
[![Documentation](http://img.shields.io/badge/docs-rdoc.info-blue.svg)](https://yoshoku.github.io/rumale/doc/)
|
10
10
|
|
11
11
|
Rumale (**Ru**by **ma**chine **le**arning) is a machine learning library in Ruby.
|
12
12
|
Rumale provides machine learning algorithms with interfaces similar to Scikit-Learn in Python.
|
data/lib/rumale.rb
CHANGED
@@ -71,6 +71,7 @@ require 'rumale/preprocessing/min_max_scaler'
|
|
71
71
|
require 'rumale/preprocessing/max_abs_scaler'
|
72
72
|
require 'rumale/preprocessing/standard_scaler'
|
73
73
|
require 'rumale/preprocessing/bin_discretizer'
|
74
|
+
require 'rumale/preprocessing/label_binarizer'
|
74
75
|
require 'rumale/preprocessing/label_encoder'
|
75
76
|
require 'rumale/preprocessing/one_hot_encoder'
|
76
77
|
require 'rumale/preprocessing/ordinal_encoder'
|
@@ -2,6 +2,7 @@
|
|
2
2
|
|
3
3
|
require 'rumale/base/base_estimator'
|
4
4
|
require 'rumale/base/cluster_analyzer'
|
5
|
+
require 'rumale/preprocessing/label_binarizer'
|
5
6
|
require 'rumale/pairwise_metric'
|
6
7
|
|
7
8
|
module Rumale
|
@@ -136,8 +137,8 @@ module Rumale
|
|
136
137
|
n_clusters: @params[:n_clusters], init: @params[:init], max_iter: 0, random_seed: @params[:random_seed]
|
137
138
|
)
|
138
139
|
cluster_ids = kmeans.fit_predict(x)
|
139
|
-
encoder = Rumale::Preprocessing::
|
140
|
-
encoder.fit_transform(cluster_ids)
|
140
|
+
encoder = Rumale::Preprocessing::LabelBinarizer.new
|
141
|
+
Numo::DFloat.cast(encoder.fit_transform(cluster_ids))
|
141
142
|
end
|
142
143
|
|
143
144
|
def calc_memberships(x, weights, means, diag_cov)
|
@@ -1,7 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require 'rumale/base/evaluator'
|
4
|
-
require 'rumale/preprocessing/
|
4
|
+
require 'rumale/preprocessing/label_binarizer'
|
5
5
|
|
6
6
|
module Rumale
|
7
7
|
module EvaluationMeasure
|
@@ -33,8 +33,8 @@ module Rumale
|
|
33
33
|
bin_y_true = Numo::DFloat.cast(y_true.ne(negative_label))
|
34
34
|
-(bin_y_true * Numo::NMath.log(clipped_p) + (1 - bin_y_true) * Numo::NMath.log(1 - clipped_p))
|
35
35
|
else
|
36
|
-
encoder = Rumale::Preprocessing::
|
37
|
-
encoded_y_true = encoder.fit_transform(y_true)
|
36
|
+
encoder = Rumale::Preprocessing::LabelBinarizer.new
|
37
|
+
encoded_y_true = Numo::DFloat.cast(encoder.fit_transform(y_true))
|
38
38
|
clipped_p /= clipped_p.sum(1).expand_dims(1)
|
39
39
|
-(encoded_y_true * Numo::NMath.log(clipped_p)).sum(1)
|
40
40
|
end
|
@@ -0,0 +1,104 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/base_estimator'
|
4
|
+
require 'rumale/base/transformer'
|
5
|
+
|
6
|
+
module Rumale
|
7
|
+
module Preprocessing
|
8
|
+
# Encode labels to binary labels with one-vs-all scheme.
|
9
|
+
#
|
10
|
+
# @example
|
11
|
+
# encoder = Rumale::Preprocessing::LabelBinarizer.new
|
12
|
+
# label = [0, -1, 3, 3, 1, 1]
|
13
|
+
# p encoder.fit_transform(label)
|
14
|
+
# # Numo::Int32#shape=[6,4]
|
15
|
+
# # [[0, 1, 0, 0],
|
16
|
+
# # [1, 0, 0, 0],
|
17
|
+
# # [0, 0, 0, 1],
|
18
|
+
# # [0, 0, 0, 1],
|
19
|
+
# # [0, 0, 1, 0],
|
20
|
+
# # [0, 0, 1, 0]]
|
21
|
+
class LabelBinarizer
|
22
|
+
include Base::BaseEstimator
|
23
|
+
include Base::Transformer
|
24
|
+
|
25
|
+
# Return the class labels.
|
26
|
+
# @return [Array] (size: [n_classes])
|
27
|
+
attr_reader :classes
|
28
|
+
|
29
|
+
# Create a new encoder for binarizing labels with one-vs-all scheme.
|
30
|
+
#
|
31
|
+
# @param neg_label [Integer] The value represents negative label.
|
32
|
+
# @param pos_label [Integer] The value represents positive label.
|
33
|
+
def initialize(neg_label: 0, pos_label: 1)
|
34
|
+
check_params_integer(neg_label: neg_label, pos_label: pos_label)
|
35
|
+
@params = {}
|
36
|
+
@params[:neg_label] = neg_label
|
37
|
+
@params[:pos_label] = pos_label
|
38
|
+
@classes = nil
|
39
|
+
end
|
40
|
+
|
41
|
+
# Fit encoder to labels.
|
42
|
+
#
|
43
|
+
# @overload fit(y) -> LabelBinarizer
|
44
|
+
# @param y [Numo::NArray/Array] (shape: [n_samples]) The labels to fit encoder.
|
45
|
+
# @return [LabelBinarizer]
|
46
|
+
def fit(y, _not_used = nil)
|
47
|
+
y = y.to_a if y.is_a?(Numo::NArray)
|
48
|
+
check_params_type(Array, y: y)
|
49
|
+
@classes = y.uniq.sort
|
50
|
+
self
|
51
|
+
end
|
52
|
+
|
53
|
+
# Fit encoder to labels, then return binarized labels.
|
54
|
+
#
|
55
|
+
# @overload fit_transform(y) -> Numo::DFloat
|
56
|
+
# @param y [Numo::NArray/Array] (shape: [n_samples]) The labels to fit encoder.
|
57
|
+
# @return [Numo::Int32] (shape: [n_samples, n_classes]) The binarized labels.
|
58
|
+
def fit_transform(y, _not_used = nil)
|
59
|
+
y = y.to_a if y.is_a?(Numo::NArray)
|
60
|
+
check_params_type(Array, y: y)
|
61
|
+
fit(y).transform(y)
|
62
|
+
end
|
63
|
+
|
64
|
+
# Encode labels.
|
65
|
+
#
|
66
|
+
# @param y [Array] (shape: [n_samples]) The labels to be encoded.
|
67
|
+
# @return [Numo::Int32] (shape: [n_samples, n_classes]) The binarized labels.
|
68
|
+
def transform(y)
|
69
|
+
y = y.to_a if y.is_a?(Numo::NArray)
|
70
|
+
check_params_type(Array, y: y)
|
71
|
+
n_classes = @classes.size
|
72
|
+
n_samples = y.size
|
73
|
+
codes = Numo::Int32.zeros(n_samples, n_classes) + @params[:neg_label]
|
74
|
+
n_samples.times { |n| codes[n, @classes.index(y[n])] = @params[:pos_label] }
|
75
|
+
codes
|
76
|
+
end
|
77
|
+
|
78
|
+
# Decode binarized labels.
|
79
|
+
#
|
80
|
+
# @param x [Numo::Int32] (shape: [n_samples, n_classes]) The binarized labels to be decoded.
|
81
|
+
# @return [Array] (shape: [n_samples]) The decoded labels.
|
82
|
+
def inverse_transform(x)
|
83
|
+
check_params_type(Numo::Int32, x: x)
|
84
|
+
n_samples = x.shape[0]
|
85
|
+
Array.new(n_samples) { |n| @classes[x[n, true].ne(@params[:neg_label]).where[0]] }
|
86
|
+
end
|
87
|
+
|
88
|
+
# Dump marshal data.
|
89
|
+
# @return [Hash] The marshal data about LabelBinarizer.
|
90
|
+
def marshal_dump
|
91
|
+
{ params: @params,
|
92
|
+
classes: @classes }
|
93
|
+
end
|
94
|
+
|
95
|
+
# Load marshal data.
|
96
|
+
# @return [nil]
|
97
|
+
def marshal_load(obj)
|
98
|
+
@params = obj[:params]
|
99
|
+
@classes = obj[:classes]
|
100
|
+
nil
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
@@ -27,6 +27,10 @@ module Rumale
|
|
27
27
|
# @return [Numo::Int32] (shape: [n_features])
|
28
28
|
attr_reader :n_values
|
29
29
|
|
30
|
+
# Return the indices for feature values that actually occur in the training set.
|
31
|
+
# @return [Nimo::Int32]
|
32
|
+
attr_reader :active_features
|
33
|
+
|
30
34
|
# Return the indices to feature ranges.
|
31
35
|
# @return [Numo::Int32] (shape: [n_features + 1])
|
32
36
|
attr_reader :feature_indices
|
@@ -35,19 +39,21 @@ module Rumale
|
|
35
39
|
def initialize
|
36
40
|
@params = {}
|
37
41
|
@n_values = nil
|
42
|
+
@active_features = nil
|
38
43
|
@feature_indices = nil
|
39
44
|
end
|
40
45
|
|
41
46
|
# Fit one-hot-encoder to samples.
|
42
47
|
#
|
43
48
|
# @overload fit(x) -> OneHotEncoder
|
44
|
-
#
|
45
|
-
# @param x [Numo::Int32] (shape: [n_samples, n_features]) The samples to fit one-hot-encoder.
|
49
|
+
# @param x [Numo::Int32] (shape: [n_samples, n_features]) The samples to fit one-hot-encoder.
|
46
50
|
# @return [OneHotEncoder]
|
47
51
|
def fit(x, _y = nil)
|
48
52
|
check_params_type(Numo::Int32, x: x)
|
53
|
+
raise ArgumentError, 'Expected the input samples only consists of non-negative integer values.' if x.lt(0).any?
|
49
54
|
@n_values = x.max(0) + 1
|
50
55
|
@feature_indices = Numo::Int32.hstack([[0], @n_values]).cumsum
|
56
|
+
@active_features = encode(x, @feature_indices).sum(0).ne(0).where
|
51
57
|
self
|
52
58
|
end
|
53
59
|
|
@@ -59,6 +65,7 @@ module Rumale
|
|
59
65
|
# @return [Numo::DFloat] The one-hot-vectors.
|
60
66
|
def fit_transform(x, _y = nil)
|
61
67
|
check_params_type(Numo::Int32, x: x)
|
68
|
+
raise ArgumentError, 'Expected the input samples only consists of non-negative integer values.' if x.lt(0).any?
|
62
69
|
fit(x).transform(x)
|
63
70
|
end
|
64
71
|
|
@@ -68,13 +75,9 @@ module Rumale
|
|
68
75
|
# @return [Numo::DFloat] The one-hot-vectors.
|
69
76
|
def transform(x)
|
70
77
|
check_params_type(Numo::Int32, x: x)
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
row_indices = Numo::Int32.new(n_samples).seq.repeat(n_features).to_a
|
75
|
-
codes = Numo::DFloat.zeros(n_samples, @feature_indices[-1])
|
76
|
-
row_indices.zip(column_indices).each { |r, c| codes[r, c] = 1.0 }
|
77
|
-
codes
|
78
|
+
raise ArgumentError, 'Expected the input samples only consists of non-negative integer values.' if x.lt(0).any?
|
79
|
+
codes = encode(x, @feature_indices)
|
80
|
+
codes[true, @active_features].dup
|
78
81
|
end
|
79
82
|
|
80
83
|
# Dump marshal data.
|
@@ -82,6 +85,7 @@ module Rumale
|
|
82
85
|
def marshal_dump
|
83
86
|
{ params: @params,
|
84
87
|
n_values: @n_values,
|
88
|
+
active_features: @active_features,
|
85
89
|
feature_indices: @feature_indices }
|
86
90
|
end
|
87
91
|
|
@@ -90,9 +94,22 @@ module Rumale
|
|
90
94
|
def marshal_load(obj)
|
91
95
|
@params = obj[:params]
|
92
96
|
@n_values = obj[:n_values]
|
97
|
+
@active_features = obj[:active_features]
|
93
98
|
@feature_indices = obj[:feature_indices]
|
94
99
|
nil
|
95
100
|
end
|
101
|
+
|
102
|
+
private
|
103
|
+
|
104
|
+
def encode(x, indices)
|
105
|
+
n_samples, n_features = x.shape
|
106
|
+
n_features = 1 if n_features.nil?
|
107
|
+
col_indices = (x + indices[0...-1]).flatten.to_a
|
108
|
+
row_indices = Numo::Int32.new(n_samples).seq.repeat(n_features).to_a
|
109
|
+
codes = Numo::DFloat.zeros(n_samples, indices[-1])
|
110
|
+
row_indices.zip(col_indices).each { |r, c| codes[r, c] = 1.0 }
|
111
|
+
codes
|
112
|
+
end
|
96
113
|
end
|
97
114
|
end
|
98
115
|
end
|
data/lib/rumale/version.rb
CHANGED
data/rumale.gemspec
CHANGED
@@ -33,6 +33,14 @@ Gem::Specification.new do |spec|
|
|
33
33
|
spec.require_paths = ['lib']
|
34
34
|
spec.extensions = ['ext/rumale/extconf.rb']
|
35
35
|
|
36
|
+
spec.metadata = {
|
37
|
+
'homepage_uri' => 'https://github.com/yoshoku/rumale',
|
38
|
+
'changelog_uri' => 'https://github.com/yoshoku/rumale/blob/master/CHANGELOG.md',
|
39
|
+
'source_code_uri' => 'https://github.com/yoshoku/rumale',
|
40
|
+
'documentation_uri' => 'https://yoshoku.github.io/rumale/doc/',
|
41
|
+
'bug_tracker_uri' => 'https://github.com/yoshoku/rumale/issues'
|
42
|
+
}
|
43
|
+
|
36
44
|
spec.required_ruby_version = '>= 2.3'
|
37
45
|
|
38
46
|
spec.add_runtime_dependency 'numo-narray', '>= 0.9.1'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rumale
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.13.
|
4
|
+
version: 0.13.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-09-
|
11
|
+
date: 2019-09-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: numo-narray
|
@@ -229,6 +229,7 @@ files:
|
|
229
229
|
- lib/rumale/polynomial_model/factorization_machine_regressor.rb
|
230
230
|
- lib/rumale/preprocessing/bin_discretizer.rb
|
231
231
|
- lib/rumale/preprocessing/l2_normalizer.rb
|
232
|
+
- lib/rumale/preprocessing/label_binarizer.rb
|
232
233
|
- lib/rumale/preprocessing/label_encoder.rb
|
233
234
|
- lib/rumale/preprocessing/max_abs_scaler.rb
|
234
235
|
- lib/rumale/preprocessing/min_max_scaler.rb
|
@@ -251,7 +252,12 @@ files:
|
|
251
252
|
homepage: https://github.com/yoshoku/rumale
|
252
253
|
licenses:
|
253
254
|
- BSD-2-Clause
|
254
|
-
metadata:
|
255
|
+
metadata:
|
256
|
+
homepage_uri: https://github.com/yoshoku/rumale
|
257
|
+
changelog_uri: https://github.com/yoshoku/rumale/blob/master/CHANGELOG.md
|
258
|
+
source_code_uri: https://github.com/yoshoku/rumale
|
259
|
+
documentation_uri: https://yoshoku.github.io/rumale/doc/
|
260
|
+
bug_tracker_uri: https://github.com/yoshoku/rumale/issues
|
255
261
|
post_install_message:
|
256
262
|
rdoc_options: []
|
257
263
|
require_paths:
|