rumale 0.12.0 → 0.12.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f662b1bf4abdb9aba9c978362094d80f59fcb390
4
- data.tar.gz: eb5087ce4b4f2dfdc8e789c340139dd7d36693e0
3
+ metadata.gz: e069b026a9236e69924051af75689ba292632b50
4
+ data.tar.gz: b98e3260e947f66936a7149a8b09eb24bb5219fe
5
5
  SHA512:
6
- metadata.gz: 8418aa3932962b135c3a9725262e84b741825ed2491e98dba508e04ea4104d7abe0a9938d6248d9405bc7d9793d1f128a7e37c89804a79367626a47d3fa6a773
7
- data.tar.gz: 40fff97c335d5720eaf1c90b45ed39531c61827777797a3a4bbc5c8b5f4b8df9b3814290686c959f9f7a9726966187a064b521850139124c9be2c64189d1d29f
6
+ metadata.gz: 2ff9dac0d7d633064fc3f4b5127a639a4617357c219fc20ad38c79048981c555fdddebc3f8d16047798bdfc658292e98cbc04d1f26733bcebf4a6412ca84f3ab
7
+ data.tar.gz: 9ad31638ea47d527a0af109a8cda2dcb44d0296ce31a88509833e4f8a116f49601dbf6353606fca080ed378425f5ce759c440791e8eccd90743ed65b14b1ef86
data/.gitignore CHANGED
@@ -13,6 +13,7 @@
13
13
 
14
14
  *.swp
15
15
  *.bundle
16
+ tags
16
17
  .DS_Store
17
18
  .ruby-version
18
19
  /spec/dump_dbl.t
@@ -1,7 +1,5 @@
1
1
  require: rubocop-performance
2
2
 
3
- inherit_from: .rubocop_todo.yml
4
-
5
3
  AllCops:
6
4
  TargetRubyVersion: 2.3
7
5
  DisplayCopNames: true
@@ -31,7 +29,14 @@ Metrics/MethodLength:
31
29
  Metrics/AbcSize:
32
30
  Max: 60
33
31
 
32
+ Metrics/CyclomaticComplexity:
33
+ Max: 16
34
+
35
+ Metrics/PerceivedComplexity:
36
+ Max: 16
37
+
34
38
  Metrics/BlockLength:
39
+ Max: 40
35
40
  Exclude:
36
41
  - 'spec/**/*'
37
42
 
@@ -1,3 +1,7 @@
1
+ # 0.12.1
2
+ - Refactor kernel support vector classifier.
3
+ - Refactor random sampling on tree estimators.
4
+
1
5
  # 0.12.0
2
6
  ## Breaking changes
3
7
  - For reproductivity, Rumale changes to not repeatedly use the same random number generator in the same estimator.
data/README.md CHANGED
@@ -6,7 +6,7 @@
6
6
  [![Coverage Status](https://coveralls.io/repos/github/yoshoku/rumale/badge.svg?branch=master)](https://coveralls.io/github/yoshoku/rumale?branch=master)
7
7
  [![Gem Version](https://badge.fury.io/rb/rumale.svg)](https://badge.fury.io/rb/rumale)
8
8
  [![BSD 2-Clause License](https://img.shields.io/badge/License-BSD%202--Clause-orange.svg)](https://github.com/yoshoku/rumale/blob/master/LICENSE.txt)
9
- [![Documentation](http://img.shields.io/badge/docs-rdoc.info-blue.svg)](https://www.rubydoc.info/gems/rumale/0.12.0)
9
+ [![Documentation](http://img.shields.io/badge/docs-rdoc.info-blue.svg)](https://www.rubydoc.info/gems/rumale/0.12.1)
10
10
 
11
11
  Rumale (**Ru**by **ma**chine **le**arning) is a machine learning library in Ruby.
12
12
  Rumale provides machine learning algorithms with interfaces similar to Scikit-Learn in Python.
@@ -209,7 +209,7 @@ Install OpenBLAS library.
209
209
  Mac:
210
210
 
211
211
  ```bash
212
- $ brew install openblas --with-openmp
212
+ $ brew install openblas
213
213
  ```
214
214
 
215
215
  Ubuntu:
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'mkmf'
2
4
 
3
5
  create_makefile('rumale/rumale')
@@ -129,7 +129,7 @@ module Rumale
129
129
  Tree::ExtraTreeRegressor.new(
130
130
  criterion: @params[:criterion], max_depth: @params[:max_depth],
131
131
  max_leaf_nodes: @params[:max_leaf_nodes], min_samples_leaf: @params[:min_samples_leaf],
132
- max_features: @params[:max_features], random_seed:rnd_seed
132
+ max_features: @params[:max_features], random_seed: rnd_seed
133
133
  )
134
134
  end
135
135
  end
@@ -79,45 +79,29 @@ module Rumale
79
79
 
80
80
  @classes = Numo::Int32[*y.to_a.uniq.sort]
81
81
  n_classes = @classes.size
82
- _n_samples, n_features = x.shape
82
+ n_features = x.shape[1]
83
83
 
84
84
  if n_classes > 2
85
85
  @weight_vec = Numo::DFloat.zeros(n_classes, n_features)
86
86
  @prob_param = Numo::DFloat.zeros(n_classes, 2)
87
- if enable_parallel?
88
- # :nocov:
89
- models = parallel_map(n_classes) do |n|
90
- bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
91
- w = binary_fit(x, bin_y)
92
- p = if @params[:probability]
93
- Rumale::ProbabilisticOutput.fit_sigmoid(x.dot(w), bin_y)
94
- else
95
- Numo::DFloat[1, 0]
96
- end
97
- [w, p]
98
- end
99
- # :nocov:
100
- n_classes.times { |n| @weight_vec[n, true], @prob_param[n, true] = models[n] }
101
- else
102
- n_classes.times do |n|
103
- bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
104
- @weight_vec[n, true] = binary_fit(x, bin_y)
105
- @prob_param[n, true] = if @params[:probability]
106
- Rumale::ProbabilisticOutput.fit_sigmoid(x.dot(@weight_vec[n, true].transpose), bin_y)
107
- else
108
- Numo::DFloat[1, 0]
109
- end
110
- end
111
- end
87
+ models = if enable_parallel?
88
+ # :nocov:
89
+ parallel_map(n_classes) do |n|
90
+ bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
91
+ partial_fit(x, bin_y)
92
+ end
93
+ # :nocov:
94
+ else
95
+ Array.new(n_classes) do |n|
96
+ bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
97
+ partial_fit(x, bin_y)
98
+ end
99
+ end
100
+ models.each_with_index { |model, n| @weight_vec[n, true], @prob_param[n, true] = model }
112
101
  else
113
102
  negative_label = y.to_a.uniq.min
114
103
  bin_y = Numo::Int32.cast(y.ne(negative_label)) * 2 - 1
115
- @weight_vec = binary_fit(x, bin_y)
116
- @prob_param = if @params[:probability]
117
- Rumale::ProbabilisticOutput.fit_sigmoid(x.dot(@weight_vec.transpose), bin_y)
118
- else
119
- Numo::DFloat[1, 0]
120
- end
104
+ @weight_vec, @prob_param = partial_fit(x, bin_y)
121
105
  end
122
106
 
123
107
  self
@@ -197,7 +181,7 @@ module Rumale
197
181
 
198
182
  private
199
183
 
200
- def binary_fit(x, bin_y)
184
+ def partial_fit(x, bin_y)
201
185
  # Initialize some variables.
202
186
  n_training_samples = x.shape[0]
203
187
  rand_ids = []
@@ -213,7 +197,13 @@ module Rumale
213
197
  func *= bin_y[target_id] / (@params[:reg_param] * (t + 1))
214
198
  weight_vec[target_id] += 1.0 if func < 1.0
215
199
  end
216
- weight_vec * bin_y
200
+ w = weight_vec * bin_y
201
+ p = if @params[:probability]
202
+ Rumale::ProbabilisticOutput.fit_sigmoid(x.dot(w), bin_y)
203
+ else
204
+ Numo::DFloat[1, 0]
205
+ end
206
+ [w, p]
217
207
  end
218
208
  end
219
209
  end
@@ -62,14 +62,16 @@ module Rumale
62
62
 
63
63
  def build_tree(x, y)
64
64
  y = y.expand_dims(1).dup if y.shape[1].nil?
65
+ @feature_ids = Array.new(x.shape[1]) { |v| v }
65
66
  @tree = grow_node(0, x, y, impurity(y))
67
+ @feature_ids = nil
66
68
  nil
67
69
  end
68
70
 
69
- def grow_node(depth, x, y, whole_impurity)
71
+ def grow_node(depth, x, y, impurity)
70
72
  # intialize node.
71
73
  n_samples, n_features = x.shape
72
- node = Node.new(depth: depth, impurity: whole_impurity, n_samples: n_samples)
74
+ node = Node.new(depth: depth, impurity: impurity, n_samples: n_samples)
73
75
 
74
76
  # terminate growing.
75
77
  unless @params[:max_leaf_nodes].nil?
@@ -87,7 +89,7 @@ module Rumale
87
89
 
88
90
  # calculate optimal parameters.
89
91
  feature_id, left_imp, right_imp, threshold, gain =
90
- rand_ids(n_features).map { |n| [n, *best_split(x[true, n], y, whole_impurity)] }.max_by(&:last)
92
+ rand_ids.map { |n| [n, *best_split(x[true, n], y, impurity)] }.max_by(&:last)
91
93
 
92
94
  return put_leaf(node, y) if gain.nil? || gain.zero?
93
95
 
@@ -112,8 +114,8 @@ module Rumale
112
114
  raise NotImplementedError, "#{__method__} has to be implemented in #{self.class}."
113
115
  end
114
116
 
115
- def rand_ids(n)
116
- [*0...n].sample(@params[:max_features], random: @sub_rng)
117
+ def rand_ids
118
+ @feature_ids.sample(@params[:max_features], random: @sub_rng)
117
119
  end
118
120
 
119
121
  def best_split(_features, _y, _impurity)
@@ -153,7 +153,9 @@ module Rumale
153
153
  end
154
154
 
155
155
  def build_tree(x, y, g, h)
156
+ @feature_ids = Array.new(x.shape[1]) { |v| v }
156
157
  @tree = grow_node(0, x, y, g, h)
158
+ @feature_ids = nil
157
159
  nil
158
160
  end
159
161
 
@@ -179,8 +181,7 @@ module Rumale
179
181
  return put_leaf(node, sum_g, sum_h) if stop_growing?(y)
180
182
 
181
183
  # calculate optimal parameters.
182
- feature_id, threshold, gain =
183
- rand_ids(n_features).map { |n| [n, *best_split(x[true, n], g, h, sum_g, sum_h)] }.max_by(&:last)
184
+ feature_id, threshold, gain = rand_ids.map { |n| [n, *best_split(x[true, n], g, h, sum_g, sum_h)] }.max_by(&:last)
184
185
 
185
186
  return put_leaf(node, sum_g, sum_h) if gain.nil? || gain.zero?
186
187
 
@@ -221,8 +222,8 @@ module Rumale
221
222
  find_split_params(sorted_f, sorted_g, sorted_h, sum_g, sum_h, @params[:reg_lambda])
222
223
  end
223
224
 
224
- def rand_ids(n)
225
- [*0...n].sample(@params[:max_features], random: @sub_rng)
225
+ def rand_ids
226
+ @feature_ids.sample(@params[:max_features], random: @sub_rng)
226
227
  end
227
228
  end
228
229
  end
@@ -3,5 +3,5 @@
3
3
  # Rumale is a machine learning library in Ruby.
4
4
  module Rumale
5
5
  # The version of Rumale you are using.
6
- VERSION = '0.12.0'
6
+ VERSION = '0.12.1'
7
7
  end
@@ -1,3 +1,4 @@
1
+
1
2
  lib = File.expand_path('lib', __dir__)
2
3
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
3
4
  require 'rumale/version'
@@ -8,18 +9,18 @@ Gem::Specification.new do |spec|
8
9
  spec.authors = ['yoshoku']
9
10
  spec.email = ['yoshoku@outlook.com']
10
11
 
11
- spec.summary = <<MSG
12
- Rumale is a machine learning library in Ruby.
13
- Rumale provides machine learning algorithms with interfaces similar to Scikit-Learn in Python.
14
- MSG
15
- spec.description = <<MSG
16
- Rumale is a machine learning library in Ruby.
17
- Rumale provides machine learning algorithms with interfaces similar to Scikit-Learn in Python.
18
- Rumale currently supports Linear / Kernel Support Vector Machine,
19
- Logistic Regression, Linear Regression, Ridge, Lasso, Factorization Machine,
20
- Naive Bayes, Decision Tree, AdaBoost, Gradient Tree Boosting, Random Forest, Extra-Trees, K-nearest neighbor algorithm,
21
- K-Means, DBSCAN, t-SNE, Principal Component Analysis, and Non-negative Matrix Factorization.
22
- MSG
12
+ spec.summary = <<~MSG
13
+ Rumale is a machine learning library in Ruby.
14
+ Rumale provides machine learning algorithms with interfaces similar to Scikit-Learn in Python.
15
+ MSG
16
+ spec.description = <<~MSG
17
+ Rumale is a machine learning library in Ruby.
18
+ Rumale provides machine learning algorithms with interfaces similar to Scikit-Learn in Python.
19
+ Rumale currently supports Linear / Kernel Support Vector Machine,
20
+ Logistic Regression, Linear Regression, Ridge, Lasso, Factorization Machine,
21
+ Naive Bayes, Decision Tree, AdaBoost, Gradient Tree Boosting, Random Forest, Extra-Trees, K-nearest neighbor algorithm,
22
+ K-Means, DBSCAN, t-SNE, Principal Component Analysis, and Non-negative Matrix Factorization.
23
+ MSG
23
24
  spec.homepage = 'https://github.com/yoshoku/rumale'
24
25
  spec.license = 'BSD-2-Clause'
25
26
 
@@ -37,8 +38,8 @@ MSG
37
38
 
38
39
  spec.add_development_dependency 'bundler', '>= 1.16'
39
40
  spec.add_development_dependency 'coveralls', '~> 0.8'
41
+ spec.add_development_dependency 'parallel'
40
42
  spec.add_development_dependency 'rake', '~> 12.0'
41
43
  spec.add_development_dependency 'rake-compiler'
42
- spec.add_development_dependency 'parallel'
43
44
  spec.add_development_dependency 'rspec', '~> 3.0'
44
45
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rumale
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.12.0
4
+ version: 0.12.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-06-01 00:00:00.000000000 Z
11
+ date: 2019-06-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: numo-narray
@@ -53,35 +53,35 @@ dependencies:
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0.8'
55
55
  - !ruby/object:Gem::Dependency
56
- name: rake
56
+ name: parallel
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
- - - "~>"
59
+ - - ">="
60
60
  - !ruby/object:Gem::Version
61
- version: '12.0'
61
+ version: '0'
62
62
  type: :development
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
- - - "~>"
66
+ - - ">="
67
67
  - !ruby/object:Gem::Version
68
- version: '12.0'
68
+ version: '0'
69
69
  - !ruby/object:Gem::Dependency
70
- name: rake-compiler
70
+ name: rake
71
71
  requirement: !ruby/object:Gem::Requirement
72
72
  requirements:
73
- - - ">="
73
+ - - "~>"
74
74
  - !ruby/object:Gem::Version
75
- version: '0'
75
+ version: '12.0'
76
76
  type: :development
77
77
  prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
- - - ">="
80
+ - - "~>"
81
81
  - !ruby/object:Gem::Version
82
- version: '0'
82
+ version: '12.0'
83
83
  - !ruby/object:Gem::Dependency
84
- name: parallel
84
+ name: rake-compiler
85
85
  requirement: !ruby/object:Gem::Requirement
86
86
  requirements:
87
87
  - - ">="
@@ -126,7 +126,6 @@ files:
126
126
  - ".gitignore"
127
127
  - ".rspec"
128
128
  - ".rubocop.yml"
129
- - ".rubocop_todo.yml"
130
129
  - ".travis.yml"
131
130
  - CHANGELOG.md
132
131
  - CODE_OF_CONDUCT.md
@@ -1,58 +0,0 @@
1
- # This configuration was generated by
2
- # `rubocop --auto-gen-config`
3
- # on 2018-06-10 12:21:53 +0900 using RuboCop version 0.57.1.
4
- # The point is for the user to remove these configuration records
5
- # one by one as the offenses are removed from the code base.
6
- # Note that changes in the inspected code, or installation of new
7
- # versions of RuboCop, may require this file to be generated again.
8
-
9
- # Offense count: 2
10
- # Cop supports --auto-correct.
11
- Layout/ClosingHeredocIndentation:
12
- Exclude:
13
- - 'rumale.gemspec'
14
-
15
- # Offense count: 2
16
- # Cop supports --auto-correct.
17
- # Configuration parameters: EnforcedStyle.
18
- # SupportedStyles: auto_detection, squiggly, active_support, powerpack, unindent
19
- Layout/IndentHeredoc:
20
- Exclude:
21
- - 'rumale.gemspec'
22
-
23
- # Offense count: 1
24
- # Cop supports --auto-correct.
25
- Layout/LeadingBlankLines:
26
- Exclude:
27
- - 'rumale.gemspec'
28
-
29
- # Offense count: 1
30
- # Configuration parameters: CountComments, ExcludedMethods.
31
- Metrics/BlockLength:
32
- Max: 29
33
-
34
- # Offense count: 3
35
- Metrics/CyclomaticComplexity:
36
- Max: 12
37
-
38
- # Offense count: 3
39
- Metrics/PerceivedComplexity:
40
- Max: 13
41
-
42
- # Offense count: 1
43
- # Cop supports --auto-correct.
44
- # Configuration parameters: EnforcedStyle, UseHashRocketsWithSymbolValues, PreferHashRocketsForNonAlnumEndingSymbols.
45
- # SupportedStyles: ruby19, hash_rockets, no_mixed_keys, ruby19_no_mixed_keys
46
- Style/HashSyntax:
47
- Exclude:
48
- - 'Rakefile'
49
-
50
- # Offense count: 6
51
- # Cop supports --auto-correct.
52
- # Configuration parameters: EnforcedStyle, ConsistentQuotesInMultiline.
53
- # SupportedStyles: single_quotes, double_quotes
54
- Style/StringLiterals:
55
- Exclude:
56
- - 'Gemfile'
57
- - 'Rakefile'
58
- - 'bin/console'