rumale 0.13.1 → 0.13.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ce88d7170fd676377227427a0be90f8bdb1a9c97
4
- data.tar.gz: 04f0d07e6d098768eda726fc82f864420678e427
3
+ metadata.gz: 948ea0c8e1c7d41704f0259ecd75dc2c4dd3e10f
4
+ data.tar.gz: 66adfaeb23d85aafc8cdea65bdda628ed1b481a8
5
5
  SHA512:
6
- metadata.gz: 203444f0e7d833946f67c2ee922e02a48b7174c20eac84480e190f8749e150e0c5ed18e3d7b7d30480e565483b5a5b51d1990cced7e09b5db027d8c508fa4313
7
- data.tar.gz: e608c97fc0d29c018c778f9cc96cd53b0edff927c5631bd3b0cb606ee93f4e8c647ed2c76e7835b49f1933c0e5aeccb1ffbda4fe9aec59a2689f7bde4a28e103
6
+ metadata.gz: c424d21b6c49e55606e26d946ea6df05fd5f860914ba016a8c32da82a63865e74be24ce1f82e14cb787352979960aedaae953093854756df81c6ef57079f7ed5
7
+ data.tar.gz: 04ba83211d4a296fda4f109f92439b76e494ee2ac9321a9d46cf1c5cd4d07a5736e86c9217d180438ea360b0f48564ef03d4dfad5caa8e2c9586e096d703adbc
@@ -1,3 +1,8 @@
1
+ # 0.13.2
2
+ - Add preprocessing class for label binarization.
3
+ - Fix to use LabelBinarizer instead of OneHotEncoder.
4
+ - Fix bug that OneHotEncoder leaves elements related to values that do not occur in training data.
5
+
1
6
  # 0.13.1
2
7
  - Add class for Shared Neareset Neighbor clustering.
3
8
  - Add function for calculation of manhattan distance to Rumale::PairwiseMetric.
data/README.md CHANGED
@@ -6,7 +6,7 @@
6
6
  [![Coverage Status](https://coveralls.io/repos/github/yoshoku/rumale/badge.svg?branch=master)](https://coveralls.io/github/yoshoku/rumale?branch=master)
7
7
  [![Gem Version](https://badge.fury.io/rb/rumale.svg)](https://badge.fury.io/rb/rumale)
8
8
  [![BSD 2-Clause License](https://img.shields.io/badge/License-BSD%202--Clause-orange.svg)](https://github.com/yoshoku/rumale/blob/master/LICENSE.txt)
9
- [![Documentation](http://img.shields.io/badge/docs-rdoc.info-blue.svg)](https://www.rubydoc.info/gems/rumale/0.13.1)
9
+ [![Documentation](http://img.shields.io/badge/docs-rdoc.info-blue.svg)](https://yoshoku.github.io/rumale/doc/)
10
10
 
11
11
  Rumale (**Ru**by **ma**chine **le**arning) is a machine learning library in Ruby.
12
12
  Rumale provides machine learning algorithms with interfaces similar to Scikit-Learn in Python.
@@ -71,6 +71,7 @@ require 'rumale/preprocessing/min_max_scaler'
71
71
  require 'rumale/preprocessing/max_abs_scaler'
72
72
  require 'rumale/preprocessing/standard_scaler'
73
73
  require 'rumale/preprocessing/bin_discretizer'
74
+ require 'rumale/preprocessing/label_binarizer'
74
75
  require 'rumale/preprocessing/label_encoder'
75
76
  require 'rumale/preprocessing/one_hot_encoder'
76
77
  require 'rumale/preprocessing/ordinal_encoder'
@@ -2,6 +2,7 @@
2
2
 
3
3
  require 'rumale/base/base_estimator'
4
4
  require 'rumale/base/cluster_analyzer'
5
+ require 'rumale/preprocessing/label_binarizer'
5
6
  require 'rumale/pairwise_metric'
6
7
 
7
8
  module Rumale
@@ -136,8 +137,8 @@ module Rumale
136
137
  n_clusters: @params[:n_clusters], init: @params[:init], max_iter: 0, random_seed: @params[:random_seed]
137
138
  )
138
139
  cluster_ids = kmeans.fit_predict(x)
139
- encoder = Rumale::Preprocessing::OneHotEncoder.new
140
- encoder.fit_transform(cluster_ids)
140
+ encoder = Rumale::Preprocessing::LabelBinarizer.new
141
+ Numo::DFloat.cast(encoder.fit_transform(cluster_ids))
141
142
  end
142
143
 
143
144
  def calc_memberships(x, weights, means, diag_cov)
@@ -1,7 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'rumale/base/evaluator'
4
- require 'rumale/preprocessing/one_hot_encoder'
4
+ require 'rumale/preprocessing/label_binarizer'
5
5
 
6
6
  module Rumale
7
7
  module EvaluationMeasure
@@ -33,8 +33,8 @@ module Rumale
33
33
  bin_y_true = Numo::DFloat.cast(y_true.ne(negative_label))
34
34
  -(bin_y_true * Numo::NMath.log(clipped_p) + (1 - bin_y_true) * Numo::NMath.log(1 - clipped_p))
35
35
  else
36
- encoder = Rumale::Preprocessing::OneHotEncoder.new
37
- encoded_y_true = encoder.fit_transform(y_true)
36
+ encoder = Rumale::Preprocessing::LabelBinarizer.new
37
+ encoded_y_true = Numo::DFloat.cast(encoder.fit_transform(y_true))
38
38
  clipped_p /= clipped_p.sum(1).expand_dims(1)
39
39
  -(encoded_y_true * Numo::NMath.log(clipped_p)).sum(1)
40
40
  end
@@ -0,0 +1,104 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/base_estimator'
4
+ require 'rumale/base/transformer'
5
+
6
+ module Rumale
7
+ module Preprocessing
8
+ # Encode labels to binary labels with one-vs-all scheme.
9
+ #
10
+ # @example
11
+ # encoder = Rumale::Preprocessing::LabelBinarizer.new
12
+ # label = [0, -1, 3, 3, 1, 1]
13
+ # p encoder.fit_transform(label)
14
+ # # Numo::Int32#shape=[6,4]
15
+ # # [[0, 1, 0, 0],
16
+ # # [1, 0, 0, 0],
17
+ # # [0, 0, 0, 1],
18
+ # # [0, 0, 0, 1],
19
+ # # [0, 0, 1, 0],
20
+ # # [0, 0, 1, 0]]
21
+ class LabelBinarizer
22
+ include Base::BaseEstimator
23
+ include Base::Transformer
24
+
25
+ # Return the class labels.
26
+ # @return [Array] (size: [n_classes])
27
+ attr_reader :classes
28
+
29
+ # Create a new encoder for binarizing labels with one-vs-all scheme.
30
+ #
31
+ # @param neg_label [Integer] The value represents negative label.
32
+ # @param pos_label [Integer] The value represents positive label.
33
+ def initialize(neg_label: 0, pos_label: 1)
34
+ check_params_integer(neg_label: neg_label, pos_label: pos_label)
35
+ @params = {}
36
+ @params[:neg_label] = neg_label
37
+ @params[:pos_label] = pos_label
38
+ @classes = nil
39
+ end
40
+
41
+ # Fit encoder to labels.
42
+ #
43
+ # @overload fit(y) -> LabelBinarizer
44
+ # @param y [Numo::NArray/Array] (shape: [n_samples]) The labels to fit encoder.
45
+ # @return [LabelBinarizer]
46
+ def fit(y, _not_used = nil)
47
+ y = y.to_a if y.is_a?(Numo::NArray)
48
+ check_params_type(Array, y: y)
49
+ @classes = y.uniq.sort
50
+ self
51
+ end
52
+
53
+ # Fit encoder to labels, then return binarized labels.
54
+ #
55
+ # @overload fit_transform(y) -> Numo::DFloat
56
+ # @param y [Numo::NArray/Array] (shape: [n_samples]) The labels to fit encoder.
57
+ # @return [Numo::Int32] (shape: [n_samples, n_classes]) The binarized labels.
58
+ def fit_transform(y, _not_used = nil)
59
+ y = y.to_a if y.is_a?(Numo::NArray)
60
+ check_params_type(Array, y: y)
61
+ fit(y).transform(y)
62
+ end
63
+
64
+ # Encode labels.
65
+ #
66
+ # @param y [Array] (shape: [n_samples]) The labels to be encoded.
67
+ # @return [Numo::Int32] (shape: [n_samples, n_classes]) The binarized labels.
68
+ def transform(y)
69
+ y = y.to_a if y.is_a?(Numo::NArray)
70
+ check_params_type(Array, y: y)
71
+ n_classes = @classes.size
72
+ n_samples = y.size
73
+ codes = Numo::Int32.zeros(n_samples, n_classes) + @params[:neg_label]
74
+ n_samples.times { |n| codes[n, @classes.index(y[n])] = @params[:pos_label] }
75
+ codes
76
+ end
77
+
78
+ # Decode binarized labels.
79
+ #
80
+ # @param x [Numo::Int32] (shape: [n_samples, n_classes]) The binarized labels to be decoded.
81
+ # @return [Array] (shape: [n_samples]) The decoded labels.
82
+ def inverse_transform(x)
83
+ check_params_type(Numo::Int32, x: x)
84
+ n_samples = x.shape[0]
85
+ Array.new(n_samples) { |n| @classes[x[n, true].ne(@params[:neg_label]).where[0]] }
86
+ end
87
+
88
+ # Dump marshal data.
89
+ # @return [Hash] The marshal data about LabelBinarizer.
90
+ def marshal_dump
91
+ { params: @params,
92
+ classes: @classes }
93
+ end
94
+
95
+ # Load marshal data.
96
+ # @return [nil]
97
+ def marshal_load(obj)
98
+ @params = obj[:params]
99
+ @classes = obj[:classes]
100
+ nil
101
+ end
102
+ end
103
+ end
104
+ end
@@ -27,6 +27,10 @@ module Rumale
27
27
  # @return [Numo::Int32] (shape: [n_features])
28
28
  attr_reader :n_values
29
29
 
30
+ # Return the indices for feature values that actually occur in the training set.
31
+ # @return [Nimo::Int32]
32
+ attr_reader :active_features
33
+
30
34
  # Return the indices to feature ranges.
31
35
  # @return [Numo::Int32] (shape: [n_features + 1])
32
36
  attr_reader :feature_indices
@@ -35,19 +39,21 @@ module Rumale
35
39
  def initialize
36
40
  @params = {}
37
41
  @n_values = nil
42
+ @active_features = nil
38
43
  @feature_indices = nil
39
44
  end
40
45
 
41
46
  # Fit one-hot-encoder to samples.
42
47
  #
43
48
  # @overload fit(x) -> OneHotEncoder
44
- #
45
- # @param x [Numo::Int32] (shape: [n_samples, n_features]) The samples to fit one-hot-encoder.
49
+ # @param x [Numo::Int32] (shape: [n_samples, n_features]) The samples to fit one-hot-encoder.
46
50
  # @return [OneHotEncoder]
47
51
  def fit(x, _y = nil)
48
52
  check_params_type(Numo::Int32, x: x)
53
+ raise ArgumentError, 'Expected the input samples only consists of non-negative integer values.' if x.lt(0).any?
49
54
  @n_values = x.max(0) + 1
50
55
  @feature_indices = Numo::Int32.hstack([[0], @n_values]).cumsum
56
+ @active_features = encode(x, @feature_indices).sum(0).ne(0).where
51
57
  self
52
58
  end
53
59
 
@@ -59,6 +65,7 @@ module Rumale
59
65
  # @return [Numo::DFloat] The one-hot-vectors.
60
66
  def fit_transform(x, _y = nil)
61
67
  check_params_type(Numo::Int32, x: x)
68
+ raise ArgumentError, 'Expected the input samples only consists of non-negative integer values.' if x.lt(0).any?
62
69
  fit(x).transform(x)
63
70
  end
64
71
 
@@ -68,13 +75,9 @@ module Rumale
68
75
  # @return [Numo::DFloat] The one-hot-vectors.
69
76
  def transform(x)
70
77
  check_params_type(Numo::Int32, x: x)
71
- n_samples, n_features = x.shape
72
- n_features = 1 if n_features.nil?
73
- column_indices = (x + @feature_indices[0...-1]).flatten.to_a
74
- row_indices = Numo::Int32.new(n_samples).seq.repeat(n_features).to_a
75
- codes = Numo::DFloat.zeros(n_samples, @feature_indices[-1])
76
- row_indices.zip(column_indices).each { |r, c| codes[r, c] = 1.0 }
77
- codes
78
+ raise ArgumentError, 'Expected the input samples only consists of non-negative integer values.' if x.lt(0).any?
79
+ codes = encode(x, @feature_indices)
80
+ codes[true, @active_features].dup
78
81
  end
79
82
 
80
83
  # Dump marshal data.
@@ -82,6 +85,7 @@ module Rumale
82
85
  def marshal_dump
83
86
  { params: @params,
84
87
  n_values: @n_values,
88
+ active_features: @active_features,
85
89
  feature_indices: @feature_indices }
86
90
  end
87
91
 
@@ -90,9 +94,22 @@ module Rumale
90
94
  def marshal_load(obj)
91
95
  @params = obj[:params]
92
96
  @n_values = obj[:n_values]
97
+ @active_features = obj[:active_features]
93
98
  @feature_indices = obj[:feature_indices]
94
99
  nil
95
100
  end
101
+
102
+ private
103
+
104
+ def encode(x, indices)
105
+ n_samples, n_features = x.shape
106
+ n_features = 1 if n_features.nil?
107
+ col_indices = (x + indices[0...-1]).flatten.to_a
108
+ row_indices = Numo::Int32.new(n_samples).seq.repeat(n_features).to_a
109
+ codes = Numo::DFloat.zeros(n_samples, indices[-1])
110
+ row_indices.zip(col_indices).each { |r, c| codes[r, c] = 1.0 }
111
+ codes
112
+ end
96
113
  end
97
114
  end
98
115
  end
@@ -3,5 +3,5 @@
3
3
  # Rumale is a machine learning library in Ruby.
4
4
  module Rumale
5
5
  # The version of Rumale you are using.
6
- VERSION = '0.13.1'
6
+ VERSION = '0.13.2'
7
7
  end
@@ -33,6 +33,14 @@ Gem::Specification.new do |spec|
33
33
  spec.require_paths = ['lib']
34
34
  spec.extensions = ['ext/rumale/extconf.rb']
35
35
 
36
+ spec.metadata = {
37
+ 'homepage_uri' => 'https://github.com/yoshoku/rumale',
38
+ 'changelog_uri' => 'https://github.com/yoshoku/rumale/blob/master/CHANGELOG.md',
39
+ 'source_code_uri' => 'https://github.com/yoshoku/rumale',
40
+ 'documentation_uri' => 'https://yoshoku.github.io/rumale/doc/',
41
+ 'bug_tracker_uri' => 'https://github.com/yoshoku/rumale/issues'
42
+ }
43
+
36
44
  spec.required_ruby_version = '>= 2.3'
37
45
 
38
46
  spec.add_runtime_dependency 'numo-narray', '>= 0.9.1'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rumale
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.13.1
4
+ version: 0.13.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-09-01 00:00:00.000000000 Z
11
+ date: 2019-09-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: numo-narray
@@ -229,6 +229,7 @@ files:
229
229
  - lib/rumale/polynomial_model/factorization_machine_regressor.rb
230
230
  - lib/rumale/preprocessing/bin_discretizer.rb
231
231
  - lib/rumale/preprocessing/l2_normalizer.rb
232
+ - lib/rumale/preprocessing/label_binarizer.rb
232
233
  - lib/rumale/preprocessing/label_encoder.rb
233
234
  - lib/rumale/preprocessing/max_abs_scaler.rb
234
235
  - lib/rumale/preprocessing/min_max_scaler.rb
@@ -251,7 +252,12 @@ files:
251
252
  homepage: https://github.com/yoshoku/rumale
252
253
  licenses:
253
254
  - BSD-2-Clause
254
- metadata: {}
255
+ metadata:
256
+ homepage_uri: https://github.com/yoshoku/rumale
257
+ changelog_uri: https://github.com/yoshoku/rumale/blob/master/CHANGELOG.md
258
+ source_code_uri: https://github.com/yoshoku/rumale
259
+ documentation_uri: https://yoshoku.github.io/rumale/doc/
260
+ bug_tracker_uri: https://github.com/yoshoku/rumale/issues
255
261
  post_install_message:
256
262
  rdoc_options: []
257
263
  require_paths: