rumale 0.10.0 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -38,12 +38,17 @@ module Rumale
38
38
  # @param batch_size [Integer] The size of the mini batches.
39
39
  # @param optimizer [Optimizer] The optimizer to calculate adaptive learning rate.
40
40
  # If nil is given, Nadam is used.
41
+ # @param n_jobs [Integer] The number of jobs for running the fit method in parallel.
42
+ # If nil is given, the method does not execute in parallel.
43
+ # If zero or less is given, it becomes equal to the number of processors.
44
+ # This parameter is ignored if the Parallel gem is not loaded.
41
45
  # @param random_seed [Integer] The seed value using to initialize the random generator.
42
- def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0, max_iter: 1000, batch_size: 10, optimizer: nil, random_seed: nil)
46
+ def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0, max_iter: 1000, batch_size: 10, optimizer: nil,
47
+ n_jobs: nil, random_seed: nil)
43
48
  check_params_float(reg_param: reg_param, bias_scale: bias_scale)
44
49
  check_params_integer(max_iter: max_iter, batch_size: batch_size)
45
50
  check_params_boolean(fit_bias: fit_bias)
46
- check_params_type_or_nil(Integer, random_seed: random_seed)
51
+ check_params_type_or_nil(Integer, n_jobs: n_jobs, random_seed: random_seed)
47
52
  check_params_positive(reg_param: reg_param, max_iter: max_iter, batch_size: batch_size)
48
53
  super
49
54
  end
@@ -64,7 +69,12 @@ module Rumale
64
69
  if n_outputs > 1
65
70
  @weight_vec = Numo::DFloat.zeros(n_outputs, n_features)
66
71
  @bias_term = Numo::DFloat.zeros(n_outputs)
67
- n_outputs.times { |n| @weight_vec[n, true], @bias_term[n] = partial_fit(x, y[true, n]) }
72
+ if enable_parallel?
73
+ models = parallel_map(n_outputs) { |n| partial_fit(x, y[true, n]) }
74
+ n_outputs.times { |n| @weight_vec[n, true], @bias_term[n] = models[n] }
75
+ else
76
+ n_outputs.times { |n| @weight_vec[n, true], @bias_term[n] = partial_fit(x, y[true, n]) }
77
+ end
68
78
  else
69
79
  @weight_vec, @bias_term = partial_fit(x, y)
70
80
  end
@@ -48,13 +48,17 @@ module Rumale
48
48
  # @param probability [Boolean] The flag indicating whether to perform probability estimation.
49
49
  # @param optimizer [Optimizer] The optimizer to calculate adaptive learning rate.
50
50
  # If nil is given, Nadam is used.
51
+ # @param n_jobs [Integer] The number of jobs for running the fit and predict methods in parallel.
52
+ # If nil is given, the methods do not execute in parallel.
53
+ # If zero or less is given, it becomes equal to the number of processors.
54
+ # This parameter is ignored if the Parallel gem is not loaded.
51
55
  # @param random_seed [Integer] The seed value using to initialize the random generator.
52
56
  def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0,
53
- max_iter: 1000, batch_size: 20, probability: false, optimizer: nil, random_seed: nil)
57
+ max_iter: 1000, batch_size: 20, probability: false, optimizer: nil, n_jobs: nil, random_seed: nil)
54
58
  check_params_float(reg_param: reg_param, bias_scale: bias_scale)
55
59
  check_params_integer(max_iter: max_iter, batch_size: batch_size)
56
60
  check_params_boolean(fit_bias: fit_bias, probability: probability)
57
- check_params_type_or_nil(Integer, random_seed: random_seed)
61
+ check_params_type_or_nil(Integer, n_jobs: n_jobs, random_seed: random_seed)
58
62
  check_params_positive(reg_param: reg_param, bias_scale: bias_scale, max_iter: max_iter, batch_size: batch_size)
59
63
  keywd_args = method(:initialize).parameters.map { |_t, arg| [arg, binding.local_variable_get(arg)] }.to_h
60
64
  keywd_args.delete(:probability)
@@ -79,27 +83,30 @@ module Rumale
79
83
  n_features = x.shape[1]
80
84
 
81
85
  if n_classes > 2
86
+ # initialize model.
82
87
  @weight_vec = Numo::DFloat.zeros(n_classes, n_features)
83
88
  @bias_term = Numo::DFloat.zeros(n_classes)
84
89
  @prob_param = Numo::DFloat.zeros(n_classes, 2)
85
- n_classes.times do |n|
86
- bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
87
- @weight_vec[n, true], @bias_term[n] = partial_fit(x, bin_y)
88
- @prob_param[n, true] = if @params[:probability]
89
- Rumale::ProbabilisticOutput.fit_sigmoid(x.dot(@weight_vec[n, true].transpose) + @bias_term[n], bin_y)
90
- else
91
- Numo::DFloat[1, 0]
92
- end
93
- end
90
+ # fit model.
91
+ models = if enable_parallel?
92
+ # :nocov:
93
+ parallel_map(n_classes) do |n|
94
+ bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
95
+ partial_fit(x, bin_y)
96
+ end
97
+ # :nocov:
98
+ else
99
+ Array.new(n_classes) do |n|
100
+ bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
101
+ partial_fit(x, bin_y)
102
+ end
103
+ end
104
+ # store model.
105
+ models.each_with_index { |model, n| @weight_vec[n, true], @bias_term[n], @prob_param[n, true] = model }
94
106
  else
95
107
  negative_label = y.to_a.uniq.min
96
108
  bin_y = Numo::Int32.cast(y.ne(negative_label)) * 2 - 1
97
- @weight_vec, @bias_term = partial_fit(x, bin_y)
98
- @prob_param = if @params[:probability]
99
- Rumale::ProbabilisticOutput.fit_sigmoid(x.dot(@weight_vec.transpose) + @bias_term, bin_y)
100
- else
101
- Numo::DFloat[1, 0]
102
- end
109
+ @weight_vec, @bias_term, @prob_param = partial_fit(x, bin_y)
103
110
  end
104
111
 
105
112
  self
@@ -125,7 +132,12 @@ module Rumale
125
132
 
126
133
  n_samples, = x.shape
127
134
  decision_values = decision_function(x)
128
- Numo::Int32.asarray(Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] })
135
+ predicted = if enable_parallel?
136
+ parallel_map(n_samples) { |n| @classes[decision_values[n, true].max_index] }
137
+ else
138
+ Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] }
139
+ end
140
+ Numo::Int32.asarray(predicted)
129
141
  end
130
142
 
131
143
  # Predict probability for samples.
@@ -172,6 +184,16 @@ module Rumale
172
184
 
173
185
  private
174
186
 
187
+ def partial_fit(x, bin_y)
188
+ w, b = super
189
+ p = if @params[:probability]
190
+ Rumale::ProbabilisticOutput.fit_sigmoid(x.dot(w.transpose) + b, bin_y)
191
+ else
192
+ Numo::DFloat[1, 0]
193
+ end
194
+ [w, b, p]
195
+ end
196
+
175
197
  def calc_loss_gradient(x, y, weight)
176
198
  target_ids = (x.dot(weight) * y).lt(1.0).where
177
199
  grad = Numo::DFloat.zeros(@params[:batch_size])
@@ -41,13 +41,17 @@ module Rumale
41
41
  # @param batch_size [Integer] The size of the mini batches.
42
42
  # @param optimizer [Optimizer] The optimizer to calculate adaptive learning rate.
43
43
  # If nil is given, Nadam is used.
44
+ # @param n_jobs [Integer] The number of jobs for running the fit method in parallel.
45
+ # If nil is given, the method does not execute in parallel.
46
+ # If zero or less is given, it becomes equal to the number of processors.
47
+ # This parameter is ignored if the Parallel gem is not loaded.
44
48
  # @param random_seed [Integer] The seed value using to initialize the random generator.
45
49
  def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0, epsilon: 0.1,
46
- max_iter: 1000, batch_size: 20, optimizer: nil, random_seed: nil)
50
+ max_iter: 1000, batch_size: 20, optimizer: nil, n_jobs: nil, random_seed: nil)
47
51
  check_params_float(reg_param: reg_param, bias_scale: bias_scale, epsilon: epsilon)
48
52
  check_params_integer(max_iter: max_iter, batch_size: batch_size)
49
53
  check_params_boolean(fit_bias: fit_bias)
50
- check_params_type_or_nil(Integer, random_seed: random_seed)
54
+ check_params_type_or_nil(Integer, n_jobs: n_jobs, random_seed: random_seed)
51
55
  check_params_positive(reg_param: reg_param, bias_scale: bias_scale, epsilon: epsilon,
52
56
  max_iter: max_iter, batch_size: batch_size)
53
57
  keywd_args = method(:initialize).parameters.map { |_t, arg| [arg, binding.local_variable_get(arg)] }.to_h
@@ -72,7 +76,12 @@ module Rumale
72
76
  if n_outputs > 1
73
77
  @weight_vec = Numo::DFloat.zeros(n_outputs, n_features)
74
78
  @bias_term = Numo::DFloat.zeros(n_outputs)
75
- n_outputs.times { |n| @weight_vec[n, true], @bias_term[n] = partial_fit(x, y[true, n]) }
79
+ if enable_parallel?
80
+ models = parallel_map(n_outputs) { |n| partial_fit(x, y[true, n]) }
81
+ n_outputs.times { |n| @weight_vec[n, true], @bias_term[n] = models[n] }
82
+ else
83
+ n_outputs.times { |n| @weight_vec[n, true], @bias_term[n] = partial_fit(x, y[true, n]) }
84
+ end
76
85
  else
77
86
  @weight_vec, @bias_term = partial_fit(x, y)
78
87
  end
@@ -21,9 +21,13 @@ module Rumale
21
21
  # @param batch_size [Integer] The size of the mini batches.
22
22
  # @param optimizer [Optimizer] The optimizer to calculate adaptive learning rate.
23
23
  # If nil is given, Nadam is used.
24
+ # @param n_jobs [Integer] The number of jobs for running the fit and predict methods in parallel.
25
+ # If nil is given, the methods do not execute in parallel.
26
+ # If zero or less is given, it becomes equal to the number of processors.
27
+ # This parameter is ignored if the Parallel gem is not loaded.
24
28
  # @param random_seed [Integer] The seed value using to initialize the random generator.
25
29
  def initialize(n_factors: 2, loss: nil, reg_param_linear: 1.0, reg_param_factor: 1.0,
26
- max_iter: 1000, batch_size: 10, optimizer: nil, random_seed: nil)
30
+ max_iter: 1000, batch_size: 10, optimizer: nil, n_jobs: nil, random_seed: nil)
27
31
  @params = {}
28
32
  @params[:n_factors] = n_factors
29
33
  @params[:loss] = loss unless loss.nil?
@@ -33,6 +37,7 @@ module Rumale
33
37
  @params[:batch_size] = batch_size
34
38
  @params[:optimizer] = optimizer
35
39
  @params[:optimizer] ||= Optimizer::Nadam.new
40
+ @params[:n_jobs] = n_jobs
36
41
  @params[:random_seed] = random_seed
37
42
  @params[:random_seed] ||= srand
38
43
  @factor_mat = nil
@@ -54,13 +54,17 @@ module Rumale
54
54
  # @param batch_size [Integer] The size of the mini batches.
55
55
  # @param optimizer [Optimizer] The optimizer to calculate adaptive learning rate.
56
56
  # If nil is given, Nadam is used.
57
+ # @param n_jobs [Integer] The number of jobs for running the fit and predict methods in parallel.
58
+ # If nil is given, the methods do not execute in parallel.
59
+ # If zero or less is given, it becomes equal to the number of processors.
60
+ # This parameter is ignored if the Parallel gem is not loaded.
57
61
  # @param random_seed [Integer] The seed value using to initialize the random generator.
58
62
  def initialize(n_factors: 2, loss: 'hinge', reg_param_linear: 1.0, reg_param_factor: 1.0,
59
- max_iter: 1000, batch_size: 10, optimizer: nil, random_seed: nil)
63
+ max_iter: 1000, batch_size: 10, optimizer: nil, n_jobs: nil, random_seed: nil)
60
64
  check_params_float(reg_param_linear: reg_param_linear, reg_param_factor: reg_param_factor)
61
65
  check_params_integer(n_factors: n_factors, max_iter: max_iter, batch_size: batch_size)
62
66
  check_params_string(loss: loss)
63
- check_params_type_or_nil(Integer, random_seed: random_seed)
67
+ check_params_type_or_nil(Integer, n_jobs: n_jobs, random_seed: random_seed)
64
68
  check_params_positive(n_factors: n_factors,
65
69
  reg_param_linear: reg_param_linear, reg_param_factor: reg_param_factor,
66
70
  max_iter: max_iter, batch_size: batch_size)
@@ -86,9 +90,19 @@ module Rumale
86
90
  @factor_mat = Numo::DFloat.zeros(n_classes, @params[:n_factors], n_features)
87
91
  @weight_vec = Numo::DFloat.zeros(n_classes, n_features)
88
92
  @bias_term = Numo::DFloat.zeros(n_classes)
89
- n_classes.times do |n|
90
- bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
91
- @factor_mat[n, true, true], @weight_vec[n, true], @bias_term[n] = partial_fit(x, bin_y)
93
+ if enable_parallel?
94
+ # :nocov:
95
+ models = parallel_map(n_classes) do |n|
96
+ bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
97
+ partial_fit(x, bin_y)
98
+ end
99
+ # :nocov:
100
+ n_classes.times { |n| @factor_mat[n, true, true], @weight_vec[n, true], @bias_term[n] = models[n] }
101
+ else
102
+ n_classes.times do |n|
103
+ bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
104
+ @factor_mat[n, true, true], @weight_vec[n, true], @bias_term[n] = partial_fit(x, bin_y)
105
+ end
92
106
  end
93
107
  else
94
108
  negative_label = y.to_a.uniq.min
@@ -122,9 +136,14 @@ module Rumale
122
136
  check_sample_array(x)
123
137
  return Numo::Int32.cast(decision_function(x).ge(0.0)) * 2 - 1 if @classes.size <= 2
124
138
 
125
- n_samples, = x.shape
139
+ n_samples = x.shape[0]
126
140
  decision_values = decision_function(x)
127
- Numo::Int32.asarray(Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] })
141
+ predicted = if enable_parallel?
142
+ parallel_map(n_samples) { |n| @classes[decision_values[n, true].max_index] }
143
+ else
144
+ Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] }
145
+ end
146
+ Numo::Int32.asarray(predicted)
128
147
  end
129
148
 
130
149
  # Predict probability for samples.
@@ -47,12 +47,16 @@ module Rumale
47
47
  # @param batch_size [Integer] The size of the mini batches.
48
48
  # @param optimizer [Optimizer] The optimizer to calculate adaptive learning rate.
49
49
  # If nil is given, Nadam is used.
50
+ # @param n_jobs [Integer] The number of jobs for running the fit method in parallel.
51
+ # If nil is given, the method does not execute in parallel.
52
+ # If zero or less is given, it becomes equal to the number of processors.
53
+ # This parameter is ignored if the Parallel gem is not loaded.
50
54
  # @param random_seed [Integer] The seed value using to initialize the random generator.
51
55
  def initialize(n_factors: 2, reg_param_linear: 1.0, reg_param_factor: 1.0,
52
- max_iter: 1000, batch_size: 10, optimizer: nil, random_seed: nil)
56
+ max_iter: 1000, batch_size: 10, optimizer: nil, n_jobs: nil, random_seed: nil)
53
57
  check_params_float(reg_param_linear: reg_param_linear, reg_param_factor: reg_param_factor)
54
58
  check_params_integer(n_factors: n_factors, max_iter: max_iter, batch_size: batch_size)
55
- check_params_type_or_nil(Integer, random_seed: random_seed)
59
+ check_params_type_or_nil(Integer, n_jobs: n_jobs, random_seed: random_seed)
56
60
  check_params_positive(n_factors: n_factors, reg_param_linear: reg_param_linear, reg_param_factor: reg_param_factor,
57
61
  max_iter: max_iter, batch_size: batch_size)
58
62
  keywd_args = method(:initialize).parameters.map { |_t, arg| [arg, binding.local_variable_get(arg)] }.to_h.merge(loss: nil)
@@ -76,7 +80,12 @@ module Rumale
76
80
  @factor_mat = Numo::DFloat.zeros(n_outputs, @params[:n_factors], n_features)
77
81
  @weight_vec = Numo::DFloat.zeros(n_outputs, n_features)
78
82
  @bias_term = Numo::DFloat.zeros(n_outputs)
79
- n_outputs.times { |n| @factor_mat[n, true, true], @weight_vec[n, true], @bias_term[n] = partial_fit(x, y[true, n]) }
83
+ if enable_parallel?
84
+ models = parallel_map(n_outputs) { |n| partial_fit(x, y[true, n]) }
85
+ n_outputs.times { |n| @factor_mat[n, true, true], @weight_vec[n, true], @bias_term[n] = models[n] }
86
+ else
87
+ n_outputs.times { |n| @factor_mat[n, true, true], @weight_vec[n, true], @bias_term[n] = partial_fit(x, y[true, n]) }
88
+ end
80
89
  else
81
90
  @factor_mat, @weight_vec, @bias_term = partial_fit(x, y)
82
91
  end
@@ -3,5 +3,5 @@
3
3
  # Rumale is a machine learning library in Ruby.
4
4
  module Rumale
5
5
  # The version of Rumale you are using.
6
- VERSION = '0.10.0'
6
+ VERSION = '0.11.0'
7
7
  end
@@ -39,5 +39,6 @@ MSG
39
39
  spec.add_development_dependency 'coveralls', '~> 0.8'
40
40
  spec.add_development_dependency 'rake', '~> 12.0'
41
41
  spec.add_development_dependency 'rake-compiler'
42
+ spec.add_development_dependency 'parallel'
42
43
  spec.add_development_dependency 'rspec', '~> 3.0'
43
44
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rumale
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.10.0
4
+ version: 0.11.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-05-17 00:00:00.000000000 Z
11
+ date: 2019-05-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: numo-narray
@@ -80,6 +80,20 @@ dependencies:
80
80
  - - ">="
81
81
  - !ruby/object:Gem::Version
82
82
  version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: parallel
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
83
97
  - !ruby/object:Gem::Dependency
84
98
  name: rspec
85
99
  requirement: !ruby/object:Gem::Requirement