rumale 0.12.0 → 0.12.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f662b1bf4abdb9aba9c978362094d80f59fcb390
4
- data.tar.gz: eb5087ce4b4f2dfdc8e789c340139dd7d36693e0
3
+ metadata.gz: e069b026a9236e69924051af75689ba292632b50
4
+ data.tar.gz: b98e3260e947f66936a7149a8b09eb24bb5219fe
5
5
  SHA512:
6
- metadata.gz: 8418aa3932962b135c3a9725262e84b741825ed2491e98dba508e04ea4104d7abe0a9938d6248d9405bc7d9793d1f128a7e37c89804a79367626a47d3fa6a773
7
- data.tar.gz: 40fff97c335d5720eaf1c90b45ed39531c61827777797a3a4bbc5c8b5f4b8df9b3814290686c959f9f7a9726966187a064b521850139124c9be2c64189d1d29f
6
+ metadata.gz: 2ff9dac0d7d633064fc3f4b5127a639a4617357c219fc20ad38c79048981c555fdddebc3f8d16047798bdfc658292e98cbc04d1f26733bcebf4a6412ca84f3ab
7
+ data.tar.gz: 9ad31638ea47d527a0af109a8cda2dcb44d0296ce31a88509833e4f8a116f49601dbf6353606fca080ed378425f5ce759c440791e8eccd90743ed65b14b1ef86
data/.gitignore CHANGED
@@ -13,6 +13,7 @@
13
13
 
14
14
  *.swp
15
15
  *.bundle
16
+ tags
16
17
  .DS_Store
17
18
  .ruby-version
18
19
  /spec/dump_dbl.t
@@ -1,7 +1,5 @@
1
1
  require: rubocop-performance
2
2
 
3
- inherit_from: .rubocop_todo.yml
4
-
5
3
  AllCops:
6
4
  TargetRubyVersion: 2.3
7
5
  DisplayCopNames: true
@@ -31,7 +29,14 @@ Metrics/MethodLength:
31
29
  Metrics/AbcSize:
32
30
  Max: 60
33
31
 
32
+ Metrics/CyclomaticComplexity:
33
+ Max: 16
34
+
35
+ Metrics/PerceivedComplexity:
36
+ Max: 16
37
+
34
38
  Metrics/BlockLength:
39
+ Max: 40
35
40
  Exclude:
36
41
  - 'spec/**/*'
37
42
 
@@ -1,3 +1,7 @@
1
+ # 0.12.1
2
+ - Refactor kernel support vector classifier.
3
+ - Refactor random sampling on tree estimators.
4
+
1
5
  # 0.12.0
2
6
  ## Breaking changes
3
7
  - For reproductivity, Rumale changes to not repeatedly use the same random number generator in the same estimator.
data/README.md CHANGED
@@ -6,7 +6,7 @@
6
6
  [![Coverage Status](https://coveralls.io/repos/github/yoshoku/rumale/badge.svg?branch=master)](https://coveralls.io/github/yoshoku/rumale?branch=master)
7
7
  [![Gem Version](https://badge.fury.io/rb/rumale.svg)](https://badge.fury.io/rb/rumale)
8
8
  [![BSD 2-Clause License](https://img.shields.io/badge/License-BSD%202--Clause-orange.svg)](https://github.com/yoshoku/rumale/blob/master/LICENSE.txt)
9
- [![Documentation](http://img.shields.io/badge/docs-rdoc.info-blue.svg)](https://www.rubydoc.info/gems/rumale/0.12.0)
9
+ [![Documentation](http://img.shields.io/badge/docs-rdoc.info-blue.svg)](https://www.rubydoc.info/gems/rumale/0.12.1)
10
10
 
11
11
  Rumale (**Ru**by **ma**chine **le**arning) is a machine learning library in Ruby.
12
12
  Rumale provides machine learning algorithms with interfaces similar to Scikit-Learn in Python.
@@ -209,7 +209,7 @@ Install OpenBLAS library.
209
209
  Mac:
210
210
 
211
211
  ```bash
212
- $ brew install openblas --with-openmp
212
+ $ brew install openblas
213
213
  ```
214
214
 
215
215
  Ubuntu:
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'mkmf'
2
4
 
3
5
  create_makefile('rumale/rumale')
@@ -129,7 +129,7 @@ module Rumale
129
129
  Tree::ExtraTreeRegressor.new(
130
130
  criterion: @params[:criterion], max_depth: @params[:max_depth],
131
131
  max_leaf_nodes: @params[:max_leaf_nodes], min_samples_leaf: @params[:min_samples_leaf],
132
- max_features: @params[:max_features], random_seed:rnd_seed
132
+ max_features: @params[:max_features], random_seed: rnd_seed
133
133
  )
134
134
  end
135
135
  end
@@ -79,45 +79,29 @@ module Rumale
79
79
 
80
80
  @classes = Numo::Int32[*y.to_a.uniq.sort]
81
81
  n_classes = @classes.size
82
- _n_samples, n_features = x.shape
82
+ n_features = x.shape[1]
83
83
 
84
84
  if n_classes > 2
85
85
  @weight_vec = Numo::DFloat.zeros(n_classes, n_features)
86
86
  @prob_param = Numo::DFloat.zeros(n_classes, 2)
87
- if enable_parallel?
88
- # :nocov:
89
- models = parallel_map(n_classes) do |n|
90
- bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
91
- w = binary_fit(x, bin_y)
92
- p = if @params[:probability]
93
- Rumale::ProbabilisticOutput.fit_sigmoid(x.dot(w), bin_y)
94
- else
95
- Numo::DFloat[1, 0]
96
- end
97
- [w, p]
98
- end
99
- # :nocov:
100
- n_classes.times { |n| @weight_vec[n, true], @prob_param[n, true] = models[n] }
101
- else
102
- n_classes.times do |n|
103
- bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
104
- @weight_vec[n, true] = binary_fit(x, bin_y)
105
- @prob_param[n, true] = if @params[:probability]
106
- Rumale::ProbabilisticOutput.fit_sigmoid(x.dot(@weight_vec[n, true].transpose), bin_y)
107
- else
108
- Numo::DFloat[1, 0]
109
- end
110
- end
111
- end
87
+ models = if enable_parallel?
88
+ # :nocov:
89
+ parallel_map(n_classes) do |n|
90
+ bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
91
+ partial_fit(x, bin_y)
92
+ end
93
+ # :nocov:
94
+ else
95
+ Array.new(n_classes) do |n|
96
+ bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
97
+ partial_fit(x, bin_y)
98
+ end
99
+ end
100
+ models.each_with_index { |model, n| @weight_vec[n, true], @prob_param[n, true] = model }
112
101
  else
113
102
  negative_label = y.to_a.uniq.min
114
103
  bin_y = Numo::Int32.cast(y.ne(negative_label)) * 2 - 1
115
- @weight_vec = binary_fit(x, bin_y)
116
- @prob_param = if @params[:probability]
117
- Rumale::ProbabilisticOutput.fit_sigmoid(x.dot(@weight_vec.transpose), bin_y)
118
- else
119
- Numo::DFloat[1, 0]
120
- end
104
+ @weight_vec, @prob_param = partial_fit(x, bin_y)
121
105
  end
122
106
 
123
107
  self
@@ -197,7 +181,7 @@ module Rumale
197
181
 
198
182
  private
199
183
 
200
- def binary_fit(x, bin_y)
184
+ def partial_fit(x, bin_y)
201
185
  # Initialize some variables.
202
186
  n_training_samples = x.shape[0]
203
187
  rand_ids = []
@@ -213,7 +197,13 @@ module Rumale
213
197
  func *= bin_y[target_id] / (@params[:reg_param] * (t + 1))
214
198
  weight_vec[target_id] += 1.0 if func < 1.0
215
199
  end
216
- weight_vec * bin_y
200
+ w = weight_vec * bin_y
201
+ p = if @params[:probability]
202
+ Rumale::ProbabilisticOutput.fit_sigmoid(x.dot(w), bin_y)
203
+ else
204
+ Numo::DFloat[1, 0]
205
+ end
206
+ [w, p]
217
207
  end
218
208
  end
219
209
  end
@@ -62,14 +62,16 @@ module Rumale
62
62
 
63
63
  def build_tree(x, y)
64
64
  y = y.expand_dims(1).dup if y.shape[1].nil?
65
+ @feature_ids = Array.new(x.shape[1]) { |v| v }
65
66
  @tree = grow_node(0, x, y, impurity(y))
67
+ @feature_ids = nil
66
68
  nil
67
69
  end
68
70
 
69
- def grow_node(depth, x, y, whole_impurity)
71
+ def grow_node(depth, x, y, impurity)
70
72
  # intialize node.
71
73
  n_samples, n_features = x.shape
72
- node = Node.new(depth: depth, impurity: whole_impurity, n_samples: n_samples)
74
+ node = Node.new(depth: depth, impurity: impurity, n_samples: n_samples)
73
75
 
74
76
  # terminate growing.
75
77
  unless @params[:max_leaf_nodes].nil?
@@ -87,7 +89,7 @@ module Rumale
87
89
 
88
90
  # calculate optimal parameters.
89
91
  feature_id, left_imp, right_imp, threshold, gain =
90
- rand_ids(n_features).map { |n| [n, *best_split(x[true, n], y, whole_impurity)] }.max_by(&:last)
92
+ rand_ids.map { |n| [n, *best_split(x[true, n], y, impurity)] }.max_by(&:last)
91
93
 
92
94
  return put_leaf(node, y) if gain.nil? || gain.zero?
93
95
 
@@ -112,8 +114,8 @@ module Rumale
112
114
  raise NotImplementedError, "#{__method__} has to be implemented in #{self.class}."
113
115
  end
114
116
 
115
- def rand_ids(n)
116
- [*0...n].sample(@params[:max_features], random: @sub_rng)
117
+ def rand_ids
118
+ @feature_ids.sample(@params[:max_features], random: @sub_rng)
117
119
  end
118
120
 
119
121
  def best_split(_features, _y, _impurity)
@@ -153,7 +153,9 @@ module Rumale
153
153
  end
154
154
 
155
155
  def build_tree(x, y, g, h)
156
+ @feature_ids = Array.new(x.shape[1]) { |v| v }
156
157
  @tree = grow_node(0, x, y, g, h)
158
+ @feature_ids = nil
157
159
  nil
158
160
  end
159
161
 
@@ -179,8 +181,7 @@ module Rumale
179
181
  return put_leaf(node, sum_g, sum_h) if stop_growing?(y)
180
182
 
181
183
  # calculate optimal parameters.
182
- feature_id, threshold, gain =
183
- rand_ids(n_features).map { |n| [n, *best_split(x[true, n], g, h, sum_g, sum_h)] }.max_by(&:last)
184
+ feature_id, threshold, gain = rand_ids.map { |n| [n, *best_split(x[true, n], g, h, sum_g, sum_h)] }.max_by(&:last)
184
185
 
185
186
  return put_leaf(node, sum_g, sum_h) if gain.nil? || gain.zero?
186
187
 
@@ -221,8 +222,8 @@ module Rumale
221
222
  find_split_params(sorted_f, sorted_g, sorted_h, sum_g, sum_h, @params[:reg_lambda])
222
223
  end
223
224
 
224
- def rand_ids(n)
225
- [*0...n].sample(@params[:max_features], random: @sub_rng)
225
+ def rand_ids
226
+ @feature_ids.sample(@params[:max_features], random: @sub_rng)
226
227
  end
227
228
  end
228
229
  end
@@ -3,5 +3,5 @@
3
3
  # Rumale is a machine learning library in Ruby.
4
4
  module Rumale
5
5
  # The version of Rumale you are using.
6
- VERSION = '0.12.0'
6
+ VERSION = '0.12.1'
7
7
  end
@@ -1,3 +1,4 @@
1
+
1
2
  lib = File.expand_path('lib', __dir__)
2
3
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
3
4
  require 'rumale/version'
@@ -8,18 +9,18 @@ Gem::Specification.new do |spec|
8
9
  spec.authors = ['yoshoku']
9
10
  spec.email = ['yoshoku@outlook.com']
10
11
 
11
- spec.summary = <<MSG
12
- Rumale is a machine learning library in Ruby.
13
- Rumale provides machine learning algorithms with interfaces similar to Scikit-Learn in Python.
14
- MSG
15
- spec.description = <<MSG
16
- Rumale is a machine learning library in Ruby.
17
- Rumale provides machine learning algorithms with interfaces similar to Scikit-Learn in Python.
18
- Rumale currently supports Linear / Kernel Support Vector Machine,
19
- Logistic Regression, Linear Regression, Ridge, Lasso, Factorization Machine,
20
- Naive Bayes, Decision Tree, AdaBoost, Gradient Tree Boosting, Random Forest, Extra-Trees, K-nearest neighbor algorithm,
21
- K-Means, DBSCAN, t-SNE, Principal Component Analysis, and Non-negative Matrix Factorization.
22
- MSG
12
+ spec.summary = <<~MSG
13
+ Rumale is a machine learning library in Ruby.
14
+ Rumale provides machine learning algorithms with interfaces similar to Scikit-Learn in Python.
15
+ MSG
16
+ spec.description = <<~MSG
17
+ Rumale is a machine learning library in Ruby.
18
+ Rumale provides machine learning algorithms with interfaces similar to Scikit-Learn in Python.
19
+ Rumale currently supports Linear / Kernel Support Vector Machine,
20
+ Logistic Regression, Linear Regression, Ridge, Lasso, Factorization Machine,
21
+ Naive Bayes, Decision Tree, AdaBoost, Gradient Tree Boosting, Random Forest, Extra-Trees, K-nearest neighbor algorithm,
22
+ K-Means, DBSCAN, t-SNE, Principal Component Analysis, and Non-negative Matrix Factorization.
23
+ MSG
23
24
  spec.homepage = 'https://github.com/yoshoku/rumale'
24
25
  spec.license = 'BSD-2-Clause'
25
26
 
@@ -37,8 +38,8 @@ MSG
37
38
 
38
39
  spec.add_development_dependency 'bundler', '>= 1.16'
39
40
  spec.add_development_dependency 'coveralls', '~> 0.8'
41
+ spec.add_development_dependency 'parallel'
40
42
  spec.add_development_dependency 'rake', '~> 12.0'
41
43
  spec.add_development_dependency 'rake-compiler'
42
- spec.add_development_dependency 'parallel'
43
44
  spec.add_development_dependency 'rspec', '~> 3.0'
44
45
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rumale
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.12.0
4
+ version: 0.12.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-06-01 00:00:00.000000000 Z
11
+ date: 2019-06-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: numo-narray
@@ -53,35 +53,35 @@ dependencies:
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0.8'
55
55
  - !ruby/object:Gem::Dependency
56
- name: rake
56
+ name: parallel
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
- - - "~>"
59
+ - - ">="
60
60
  - !ruby/object:Gem::Version
61
- version: '12.0'
61
+ version: '0'
62
62
  type: :development
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
- - - "~>"
66
+ - - ">="
67
67
  - !ruby/object:Gem::Version
68
- version: '12.0'
68
+ version: '0'
69
69
  - !ruby/object:Gem::Dependency
70
- name: rake-compiler
70
+ name: rake
71
71
  requirement: !ruby/object:Gem::Requirement
72
72
  requirements:
73
- - - ">="
73
+ - - "~>"
74
74
  - !ruby/object:Gem::Version
75
- version: '0'
75
+ version: '12.0'
76
76
  type: :development
77
77
  prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
- - - ">="
80
+ - - "~>"
81
81
  - !ruby/object:Gem::Version
82
- version: '0'
82
+ version: '12.0'
83
83
  - !ruby/object:Gem::Dependency
84
- name: parallel
84
+ name: rake-compiler
85
85
  requirement: !ruby/object:Gem::Requirement
86
86
  requirements:
87
87
  - - ">="
@@ -126,7 +126,6 @@ files:
126
126
  - ".gitignore"
127
127
  - ".rspec"
128
128
  - ".rubocop.yml"
129
- - ".rubocop_todo.yml"
130
129
  - ".travis.yml"
131
130
  - CHANGELOG.md
132
131
  - CODE_OF_CONDUCT.md
@@ -1,58 +0,0 @@
1
- # This configuration was generated by
2
- # `rubocop --auto-gen-config`
3
- # on 2018-06-10 12:21:53 +0900 using RuboCop version 0.57.1.
4
- # The point is for the user to remove these configuration records
5
- # one by one as the offenses are removed from the code base.
6
- # Note that changes in the inspected code, or installation of new
7
- # versions of RuboCop, may require this file to be generated again.
8
-
9
- # Offense count: 2
10
- # Cop supports --auto-correct.
11
- Layout/ClosingHeredocIndentation:
12
- Exclude:
13
- - 'rumale.gemspec'
14
-
15
- # Offense count: 2
16
- # Cop supports --auto-correct.
17
- # Configuration parameters: EnforcedStyle.
18
- # SupportedStyles: auto_detection, squiggly, active_support, powerpack, unindent
19
- Layout/IndentHeredoc:
20
- Exclude:
21
- - 'rumale.gemspec'
22
-
23
- # Offense count: 1
24
- # Cop supports --auto-correct.
25
- Layout/LeadingBlankLines:
26
- Exclude:
27
- - 'rumale.gemspec'
28
-
29
- # Offense count: 1
30
- # Configuration parameters: CountComments, ExcludedMethods.
31
- Metrics/BlockLength:
32
- Max: 29
33
-
34
- # Offense count: 3
35
- Metrics/CyclomaticComplexity:
36
- Max: 12
37
-
38
- # Offense count: 3
39
- Metrics/PerceivedComplexity:
40
- Max: 13
41
-
42
- # Offense count: 1
43
- # Cop supports --auto-correct.
44
- # Configuration parameters: EnforcedStyle, UseHashRocketsWithSymbolValues, PreferHashRocketsForNonAlnumEndingSymbols.
45
- # SupportedStyles: ruby19, hash_rockets, no_mixed_keys, ruby19_no_mixed_keys
46
- Style/HashSyntax:
47
- Exclude:
48
- - 'Rakefile'
49
-
50
- # Offense count: 6
51
- # Cop supports --auto-correct.
52
- # Configuration parameters: EnforcedStyle, ConsistentQuotesInMultiline.
53
- # SupportedStyles: single_quotes, double_quotes
54
- Style/StringLiterals:
55
- Exclude:
56
- - 'Gemfile'
57
- - 'Rakefile'
58
- - 'bin/console'