rumale 0.13.1 → 0.13.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ce88d7170fd676377227427a0be90f8bdb1a9c97
4
- data.tar.gz: 04f0d07e6d098768eda726fc82f864420678e427
3
+ metadata.gz: 948ea0c8e1c7d41704f0259ecd75dc2c4dd3e10f
4
+ data.tar.gz: 66adfaeb23d85aafc8cdea65bdda628ed1b481a8
5
5
  SHA512:
6
- metadata.gz: 203444f0e7d833946f67c2ee922e02a48b7174c20eac84480e190f8749e150e0c5ed18e3d7b7d30480e565483b5a5b51d1990cced7e09b5db027d8c508fa4313
7
- data.tar.gz: e608c97fc0d29c018c778f9cc96cd53b0edff927c5631bd3b0cb606ee93f4e8c647ed2c76e7835b49f1933c0e5aeccb1ffbda4fe9aec59a2689f7bde4a28e103
6
+ metadata.gz: c424d21b6c49e55606e26d946ea6df05fd5f860914ba016a8c32da82a63865e74be24ce1f82e14cb787352979960aedaae953093854756df81c6ef57079f7ed5
7
+ data.tar.gz: 04ba83211d4a296fda4f109f92439b76e494ee2ac9321a9d46cf1c5cd4d07a5736e86c9217d180438ea360b0f48564ef03d4dfad5caa8e2c9586e096d703adbc
@@ -1,3 +1,8 @@
1
+ # 0.13.2
2
+ - Add preprocessing class for label binarization.
3
+ - Fix to use LabelBinarizer instead of OneHotEncoder.
4
+ - Fix bug that OneHotEncoder leaves elements related to values that do not occur in training data.
5
+
1
6
  # 0.13.1
2
7
  - Add class for Shared Neareset Neighbor clustering.
3
8
  - Add function for calculation of manhattan distance to Rumale::PairwiseMetric.
data/README.md CHANGED
@@ -6,7 +6,7 @@
6
6
  [![Coverage Status](https://coveralls.io/repos/github/yoshoku/rumale/badge.svg?branch=master)](https://coveralls.io/github/yoshoku/rumale?branch=master)
7
7
  [![Gem Version](https://badge.fury.io/rb/rumale.svg)](https://badge.fury.io/rb/rumale)
8
8
  [![BSD 2-Clause License](https://img.shields.io/badge/License-BSD%202--Clause-orange.svg)](https://github.com/yoshoku/rumale/blob/master/LICENSE.txt)
9
- [![Documentation](http://img.shields.io/badge/docs-rdoc.info-blue.svg)](https://www.rubydoc.info/gems/rumale/0.13.1)
9
+ [![Documentation](http://img.shields.io/badge/docs-rdoc.info-blue.svg)](https://yoshoku.github.io/rumale/doc/)
10
10
 
11
11
  Rumale (**Ru**by **ma**chine **le**arning) is a machine learning library in Ruby.
12
12
  Rumale provides machine learning algorithms with interfaces similar to Scikit-Learn in Python.
@@ -71,6 +71,7 @@ require 'rumale/preprocessing/min_max_scaler'
71
71
  require 'rumale/preprocessing/max_abs_scaler'
72
72
  require 'rumale/preprocessing/standard_scaler'
73
73
  require 'rumale/preprocessing/bin_discretizer'
74
+ require 'rumale/preprocessing/label_binarizer'
74
75
  require 'rumale/preprocessing/label_encoder'
75
76
  require 'rumale/preprocessing/one_hot_encoder'
76
77
  require 'rumale/preprocessing/ordinal_encoder'
@@ -2,6 +2,7 @@
2
2
 
3
3
  require 'rumale/base/base_estimator'
4
4
  require 'rumale/base/cluster_analyzer'
5
+ require 'rumale/preprocessing/label_binarizer'
5
6
  require 'rumale/pairwise_metric'
6
7
 
7
8
  module Rumale
@@ -136,8 +137,8 @@ module Rumale
136
137
  n_clusters: @params[:n_clusters], init: @params[:init], max_iter: 0, random_seed: @params[:random_seed]
137
138
  )
138
139
  cluster_ids = kmeans.fit_predict(x)
139
- encoder = Rumale::Preprocessing::OneHotEncoder.new
140
- encoder.fit_transform(cluster_ids)
140
+ encoder = Rumale::Preprocessing::LabelBinarizer.new
141
+ Numo::DFloat.cast(encoder.fit_transform(cluster_ids))
141
142
  end
142
143
 
143
144
  def calc_memberships(x, weights, means, diag_cov)
@@ -1,7 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'rumale/base/evaluator'
4
- require 'rumale/preprocessing/one_hot_encoder'
4
+ require 'rumale/preprocessing/label_binarizer'
5
5
 
6
6
  module Rumale
7
7
  module EvaluationMeasure
@@ -33,8 +33,8 @@ module Rumale
33
33
  bin_y_true = Numo::DFloat.cast(y_true.ne(negative_label))
34
34
  -(bin_y_true * Numo::NMath.log(clipped_p) + (1 - bin_y_true) * Numo::NMath.log(1 - clipped_p))
35
35
  else
36
- encoder = Rumale::Preprocessing::OneHotEncoder.new
37
- encoded_y_true = encoder.fit_transform(y_true)
36
+ encoder = Rumale::Preprocessing::LabelBinarizer.new
37
+ encoded_y_true = Numo::DFloat.cast(encoder.fit_transform(y_true))
38
38
  clipped_p /= clipped_p.sum(1).expand_dims(1)
39
39
  -(encoded_y_true * Numo::NMath.log(clipped_p)).sum(1)
40
40
  end
@@ -0,0 +1,104 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/base_estimator'
4
+ require 'rumale/base/transformer'
5
+
6
+ module Rumale
7
+ module Preprocessing
8
+ # Encode labels to binary labels with one-vs-all scheme.
9
+ #
10
+ # @example
11
+ # encoder = Rumale::Preprocessing::LabelBinarizer.new
12
+ # label = [0, -1, 3, 3, 1, 1]
13
+ # p encoder.fit_transform(label)
14
+ # # Numo::Int32#shape=[6,4]
15
+ # # [[0, 1, 0, 0],
16
+ # # [1, 0, 0, 0],
17
+ # # [0, 0, 0, 1],
18
+ # # [0, 0, 0, 1],
19
+ # # [0, 0, 1, 0],
20
+ # # [0, 0, 1, 0]]
21
+ class LabelBinarizer
22
+ include Base::BaseEstimator
23
+ include Base::Transformer
24
+
25
+ # Return the class labels.
26
+ # @return [Array] (size: [n_classes])
27
+ attr_reader :classes
28
+
29
+ # Create a new encoder for binarizing labels with one-vs-all scheme.
30
+ #
31
+ # @param neg_label [Integer] The value represents negative label.
32
+ # @param pos_label [Integer] The value represents positive label.
33
+ def initialize(neg_label: 0, pos_label: 1)
34
+ check_params_integer(neg_label: neg_label, pos_label: pos_label)
35
+ @params = {}
36
+ @params[:neg_label] = neg_label
37
+ @params[:pos_label] = pos_label
38
+ @classes = nil
39
+ end
40
+
41
+ # Fit encoder to labels.
42
+ #
43
+ # @overload fit(y) -> LabelBinarizer
44
+ # @param y [Numo::NArray/Array] (shape: [n_samples]) The labels to fit encoder.
45
+ # @return [LabelBinarizer]
46
+ def fit(y, _not_used = nil)
47
+ y = y.to_a if y.is_a?(Numo::NArray)
48
+ check_params_type(Array, y: y)
49
+ @classes = y.uniq.sort
50
+ self
51
+ end
52
+
53
+ # Fit encoder to labels, then return binarized labels.
54
+ #
55
+ # @overload fit_transform(y) -> Numo::DFloat
56
+ # @param y [Numo::NArray/Array] (shape: [n_samples]) The labels to fit encoder.
57
+ # @return [Numo::Int32] (shape: [n_samples, n_classes]) The binarized labels.
58
+ def fit_transform(y, _not_used = nil)
59
+ y = y.to_a if y.is_a?(Numo::NArray)
60
+ check_params_type(Array, y: y)
61
+ fit(y).transform(y)
62
+ end
63
+
64
+ # Encode labels.
65
+ #
66
+ # @param y [Array] (shape: [n_samples]) The labels to be encoded.
67
+ # @return [Numo::Int32] (shape: [n_samples, n_classes]) The binarized labels.
68
+ def transform(y)
69
+ y = y.to_a if y.is_a?(Numo::NArray)
70
+ check_params_type(Array, y: y)
71
+ n_classes = @classes.size
72
+ n_samples = y.size
73
+ codes = Numo::Int32.zeros(n_samples, n_classes) + @params[:neg_label]
74
+ n_samples.times { |n| codes[n, @classes.index(y[n])] = @params[:pos_label] }
75
+ codes
76
+ end
77
+
78
+ # Decode binarized labels.
79
+ #
80
+ # @param x [Numo::Int32] (shape: [n_samples, n_classes]) The binarized labels to be decoded.
81
+ # @return [Array] (shape: [n_samples]) The decoded labels.
82
+ def inverse_transform(x)
83
+ check_params_type(Numo::Int32, x: x)
84
+ n_samples = x.shape[0]
85
+ Array.new(n_samples) { |n| @classes[x[n, true].ne(@params[:neg_label]).where[0]] }
86
+ end
87
+
88
+ # Dump marshal data.
89
+ # @return [Hash] The marshal data about LabelBinarizer.
90
+ def marshal_dump
91
+ { params: @params,
92
+ classes: @classes }
93
+ end
94
+
95
+ # Load marshal data.
96
+ # @return [nil]
97
+ def marshal_load(obj)
98
+ @params = obj[:params]
99
+ @classes = obj[:classes]
100
+ nil
101
+ end
102
+ end
103
+ end
104
+ end
@@ -27,6 +27,10 @@ module Rumale
27
27
  # @return [Numo::Int32] (shape: [n_features])
28
28
  attr_reader :n_values
29
29
 
30
+ # Return the indices for feature values that actually occur in the training set.
31
+ # @return [Nimo::Int32]
32
+ attr_reader :active_features
33
+
30
34
  # Return the indices to feature ranges.
31
35
  # @return [Numo::Int32] (shape: [n_features + 1])
32
36
  attr_reader :feature_indices
@@ -35,19 +39,21 @@ module Rumale
35
39
  def initialize
36
40
  @params = {}
37
41
  @n_values = nil
42
+ @active_features = nil
38
43
  @feature_indices = nil
39
44
  end
40
45
 
41
46
  # Fit one-hot-encoder to samples.
42
47
  #
43
48
  # @overload fit(x) -> OneHotEncoder
44
- #
45
- # @param x [Numo::Int32] (shape: [n_samples, n_features]) The samples to fit one-hot-encoder.
49
+ # @param x [Numo::Int32] (shape: [n_samples, n_features]) The samples to fit one-hot-encoder.
46
50
  # @return [OneHotEncoder]
47
51
  def fit(x, _y = nil)
48
52
  check_params_type(Numo::Int32, x: x)
53
+ raise ArgumentError, 'Expected the input samples only consists of non-negative integer values.' if x.lt(0).any?
49
54
  @n_values = x.max(0) + 1
50
55
  @feature_indices = Numo::Int32.hstack([[0], @n_values]).cumsum
56
+ @active_features = encode(x, @feature_indices).sum(0).ne(0).where
51
57
  self
52
58
  end
53
59
 
@@ -59,6 +65,7 @@ module Rumale
59
65
  # @return [Numo::DFloat] The one-hot-vectors.
60
66
  def fit_transform(x, _y = nil)
61
67
  check_params_type(Numo::Int32, x: x)
68
+ raise ArgumentError, 'Expected the input samples only consists of non-negative integer values.' if x.lt(0).any?
62
69
  fit(x).transform(x)
63
70
  end
64
71
 
@@ -68,13 +75,9 @@ module Rumale
68
75
  # @return [Numo::DFloat] The one-hot-vectors.
69
76
  def transform(x)
70
77
  check_params_type(Numo::Int32, x: x)
71
- n_samples, n_features = x.shape
72
- n_features = 1 if n_features.nil?
73
- column_indices = (x + @feature_indices[0...-1]).flatten.to_a
74
- row_indices = Numo::Int32.new(n_samples).seq.repeat(n_features).to_a
75
- codes = Numo::DFloat.zeros(n_samples, @feature_indices[-1])
76
- row_indices.zip(column_indices).each { |r, c| codes[r, c] = 1.0 }
77
- codes
78
+ raise ArgumentError, 'Expected the input samples only consists of non-negative integer values.' if x.lt(0).any?
79
+ codes = encode(x, @feature_indices)
80
+ codes[true, @active_features].dup
78
81
  end
79
82
 
80
83
  # Dump marshal data.
@@ -82,6 +85,7 @@ module Rumale
82
85
  def marshal_dump
83
86
  { params: @params,
84
87
  n_values: @n_values,
88
+ active_features: @active_features,
85
89
  feature_indices: @feature_indices }
86
90
  end
87
91
 
@@ -90,9 +94,22 @@ module Rumale
90
94
  def marshal_load(obj)
91
95
  @params = obj[:params]
92
96
  @n_values = obj[:n_values]
97
+ @active_features = obj[:active_features]
93
98
  @feature_indices = obj[:feature_indices]
94
99
  nil
95
100
  end
101
+
102
+ private
103
+
104
+ def encode(x, indices)
105
+ n_samples, n_features = x.shape
106
+ n_features = 1 if n_features.nil?
107
+ col_indices = (x + indices[0...-1]).flatten.to_a
108
+ row_indices = Numo::Int32.new(n_samples).seq.repeat(n_features).to_a
109
+ codes = Numo::DFloat.zeros(n_samples, indices[-1])
110
+ row_indices.zip(col_indices).each { |r, c| codes[r, c] = 1.0 }
111
+ codes
112
+ end
96
113
  end
97
114
  end
98
115
  end
@@ -3,5 +3,5 @@
3
3
  # Rumale is a machine learning library in Ruby.
4
4
  module Rumale
5
5
  # The version of Rumale you are using.
6
- VERSION = '0.13.1'
6
+ VERSION = '0.13.2'
7
7
  end
@@ -33,6 +33,14 @@ Gem::Specification.new do |spec|
33
33
  spec.require_paths = ['lib']
34
34
  spec.extensions = ['ext/rumale/extconf.rb']
35
35
 
36
+ spec.metadata = {
37
+ 'homepage_uri' => 'https://github.com/yoshoku/rumale',
38
+ 'changelog_uri' => 'https://github.com/yoshoku/rumale/blob/master/CHANGELOG.md',
39
+ 'source_code_uri' => 'https://github.com/yoshoku/rumale',
40
+ 'documentation_uri' => 'https://yoshoku.github.io/rumale/doc/',
41
+ 'bug_tracker_uri' => 'https://github.com/yoshoku/rumale/issues'
42
+ }
43
+
36
44
  spec.required_ruby_version = '>= 2.3'
37
45
 
38
46
  spec.add_runtime_dependency 'numo-narray', '>= 0.9.1'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rumale
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.13.1
4
+ version: 0.13.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-09-01 00:00:00.000000000 Z
11
+ date: 2019-09-06 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: numo-narray
@@ -229,6 +229,7 @@ files:
229
229
  - lib/rumale/polynomial_model/factorization_machine_regressor.rb
230
230
  - lib/rumale/preprocessing/bin_discretizer.rb
231
231
  - lib/rumale/preprocessing/l2_normalizer.rb
232
+ - lib/rumale/preprocessing/label_binarizer.rb
232
233
  - lib/rumale/preprocessing/label_encoder.rb
233
234
  - lib/rumale/preprocessing/max_abs_scaler.rb
234
235
  - lib/rumale/preprocessing/min_max_scaler.rb
@@ -251,7 +252,12 @@ files:
251
252
  homepage: https://github.com/yoshoku/rumale
252
253
  licenses:
253
254
  - BSD-2-Clause
254
- metadata: {}
255
+ metadata:
256
+ homepage_uri: https://github.com/yoshoku/rumale
257
+ changelog_uri: https://github.com/yoshoku/rumale/blob/master/CHANGELOG.md
258
+ source_code_uri: https://github.com/yoshoku/rumale
259
+ documentation_uri: https://yoshoku.github.io/rumale/doc/
260
+ bug_tracker_uri: https://github.com/yoshoku/rumale/issues
255
261
  post_install_message:
256
262
  rdoc_options: []
257
263
  require_paths: