rumale 0.18.6 → 0.18.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +14 -2
  3. data/CHANGELOG.md +4 -0
  4. data/lib/rumale/base/base_estimator.rb +2 -0
  5. data/lib/rumale/clustering/dbscan.rb +4 -0
  6. data/lib/rumale/clustering/gaussian_mixture.rb +2 -0
  7. data/lib/rumale/clustering/hdbscan.rb +2 -0
  8. data/lib/rumale/clustering/k_means.rb +1 -0
  9. data/lib/rumale/clustering/k_medoids.rb +4 -0
  10. data/lib/rumale/clustering/power_iteration.rb +2 -0
  11. data/lib/rumale/clustering/single_linkage.rb +2 -0
  12. data/lib/rumale/dataset.rb +2 -0
  13. data/lib/rumale/decomposition/factor_analysis.rb +2 -0
  14. data/lib/rumale/decomposition/pca.rb +1 -0
  15. data/lib/rumale/ensemble/ada_boost_classifier.rb +3 -0
  16. data/lib/rumale/ensemble/ada_boost_regressor.rb +3 -0
  17. data/lib/rumale/evaluation_measure/function.rb +2 -1
  18. data/lib/rumale/evaluation_measure/normalized_mutual_information.rb +2 -0
  19. data/lib/rumale/evaluation_measure/precision_recall.rb +5 -0
  20. data/lib/rumale/evaluation_measure/roc_auc.rb +3 -0
  21. data/lib/rumale/evaluation_measure/silhouette_score.rb +2 -0
  22. data/lib/rumale/feature_extraction/hash_vectorizer.rb +1 -0
  23. data/lib/rumale/kernel_machine/kernel_ridge.rb +2 -0
  24. data/lib/rumale/linear_model/base_linear_model.rb +1 -0
  25. data/lib/rumale/linear_model/linear_regression.rb +1 -0
  26. data/lib/rumale/linear_model/ridge.rb +1 -0
  27. data/lib/rumale/manifold/mds.rb +2 -0
  28. data/lib/rumale/manifold/tsne.rb +4 -0
  29. data/lib/rumale/metric_learning/neighbourhood_component_analysis.rb +1 -0
  30. data/lib/rumale/model_selection/grid_search_cv.rb +1 -0
  31. data/lib/rumale/nearest_neighbors/k_neighbors_classifier.rb +1 -0
  32. data/lib/rumale/nearest_neighbors/k_neighbors_regressor.rb +2 -0
  33. data/lib/rumale/neural_network/base_mlp.rb +1 -0
  34. data/lib/rumale/pipeline/pipeline.rb +3 -0
  35. data/lib/rumale/polynomial_model/base_factorization_machine.rb +1 -0
  36. data/lib/rumale/preprocessing/one_hot_encoder.rb +3 -0
  37. data/lib/rumale/preprocessing/ordinal_encoder.rb +2 -0
  38. data/lib/rumale/preprocessing/polynomial_features.rb +1 -0
  39. data/lib/rumale/probabilistic_output.rb +2 -0
  40. data/lib/rumale/tree/base_decision_tree.rb +2 -0
  41. data/lib/rumale/tree/decision_tree_classifier.rb +1 -0
  42. data/lib/rumale/tree/gradient_tree_regressor.rb +1 -0
  43. data/lib/rumale/utils.rb +1 -0
  44. data/lib/rumale/validation.rb +7 -0
  45. data/lib/rumale/version.rb +1 -1
  46. metadata +2 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 40d5504cf4463721f53a4202ed99ec3f015c571fbadf3a4a4c7e0ac6eb00c7a7
4
- data.tar.gz: fae3bebad1e88aa166d9279e5f5a2de4ebbad5f79fd416fef68d33d4f66ba2c6
3
+ metadata.gz: 5e3069531e5acbdaab178769d20684a0fa260e29f6c39a645632b903fff8cce0
4
+ data.tar.gz: d39c7e61a20b1bce23ccbb9d809bb06f1babce101f509c78c6da2a64e95f180f
5
5
  SHA512:
6
- metadata.gz: b9d32bc9bd5c5f37d27b06fcaa554c28f9a209debaaac4024c1c2a1f6fb367484ce760168f62a2d9e1ee24d9372ad9cccd1d36e7280f202734e5330105a995fa
7
- data.tar.gz: c18470cb533df4f6315324942afc98b5c52f4b7f6246078f459987a3407b79ae60a42599f40bc6236d5adba3dc85799a091e0d7ae5e9a1a3fd9fc626206cbef2
6
+ metadata.gz: eb9077b26d63f153eefd4c68ea57083e12b6a465d06864da8b24a3f9d2aff907b8de350ade5c96e8e9ec28997424839b91ac884d787a7bff7c2a44d212addd81
7
+ data.tar.gz: 7d94c6d80e16ed405f87a7c777b4922863e42922a5b33046df4bc42d9daa5f5243ebb6c5492cb2d20120215cdce9a5c4b0ac3156012bf38cf91e7717b2c51c22
@@ -3,7 +3,7 @@ require:
3
3
  - rubocop-rspec
4
4
 
5
5
  AllCops:
6
- TargetRubyVersion: 2.3
6
+ TargetRubyVersion: 2.5
7
7
  DisplayCopNames: true
8
8
  DisplayStyleGuide: true
9
9
  Exclude:
@@ -70,14 +70,26 @@ Naming/MethodParameterName:
70
70
  Naming/ConstantName:
71
71
  Enabled: false
72
72
 
73
+ Style/ExponentialNotation:
74
+ Enabled: true
75
+
73
76
  Style/FormatStringToken:
74
77
  Enabled: false
75
78
 
76
79
  Style/NumericLiterals:
77
80
  Enabled: false
78
81
 
82
+ Style/SlicingWithRange:
83
+ Enabled: true
84
+
79
85
  Layout/EmptyLineAfterGuardClause:
80
- Enabled: false
86
+ Enabled: true
87
+
88
+ Layout/EmptyLinesAroundAttributeAccessor:
89
+ Enabled: true
90
+
91
+ Layout/SpaceAroundMethodCallOperator:
92
+ Enabled: true
81
93
 
82
94
  RSpec/MultipleExpectations:
83
95
  Enabled: false
@@ -1,3 +1,7 @@
1
+ # 0.18.7
2
+ - Fix to convert target_name to string array in [classification_report method](https://yoshoku.github.io/rumale/doc/Rumale/EvaluationMeasure.html#classification_report-class_method).
3
+ - Refactor some codes with Rubocop.
4
+
1
5
  # 0.18.6
2
6
  - Fix some configuration files.
3
7
  - Update API documentation.
@@ -25,6 +25,7 @@ module Rumale
25
25
 
26
26
  def enable_parallel?
27
27
  return false if @params[:n_jobs].nil?
28
+
28
29
  if defined?(Parallel).nil?
29
30
  warn('If you want to use parallel option, you should install and load Parallel in advance.')
30
31
  return false
@@ -34,6 +35,7 @@ module Rumale
34
35
 
35
36
  def n_processes
36
37
  return 1 unless enable_parallel?
38
+
37
39
  @params[:n_jobs] <= 0 ? Parallel.processor_count : @params[:n_jobs]
38
40
  end
39
41
 
@@ -54,6 +54,7 @@ module Rumale
54
54
  def fit(x, _y = nil)
55
55
  x = check_convert_sample_array(x)
56
56
  raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
57
+
57
58
  partial_fit(x)
58
59
  self
59
60
  end
@@ -66,6 +67,7 @@ module Rumale
66
67
  def fit_predict(x)
67
68
  x = check_convert_sample_array(x)
68
69
  raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
70
+
69
71
  partial_fit(x)
70
72
  labels
71
73
  end
@@ -80,6 +82,7 @@ module Rumale
80
82
  @labels = Numo::Int32.zeros(n_samples) - 2
81
83
  n_samples.times do |query_id|
82
84
  next if @labels[query_id] >= -1
85
+
83
86
  cluster_id += 1 if expand_cluster(metric_mat, query_id, cluster_id)
84
87
  end
85
88
  @core_sample_ids = Numo::Int32[*@core_sample_ids.flatten]
@@ -102,6 +105,7 @@ module Rumale
102
105
  while (m = target_ids.shift)
103
106
  neighbor_ids = region_query(metric_mat[m, true])
104
107
  next if neighbor_ids.size < @params[:min_samples]
108
+
105
109
  neighbor_ids.each do |n|
106
110
  target_ids.push(n) if @labels[n] < -1
107
111
  @labels[n] = cluster_id if @labels[n] <= -1
@@ -86,6 +86,7 @@ module Rumale
86
86
  new_memberships = calc_memberships(x, @weights, @means, @covariances, @params[:covariance_type])
87
87
  error = (memberships - new_memberships).abs.max
88
88
  break if error <= @params[:tol]
89
+
89
90
  memberships = new_memberships.dup
90
91
  end
91
92
  self
@@ -209,6 +210,7 @@ module Rumale
209
210
 
210
211
  def check_enable_linalg(method_name)
211
212
  return unless @params[:covariance_type] == 'full' && !enable_linalg?
213
+
212
214
  raise "GaussianMixture##{method_name} requires Numo::Linalg when covariance_type is 'full' but that is not loaded."
213
215
  end
214
216
  end
@@ -55,6 +55,7 @@ module Rumale
55
55
  def fit(x, _y = nil)
56
56
  x = check_convert_sample_array(x)
57
57
  raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
58
+
58
59
  fit_predict(x)
59
60
  self
60
61
  end
@@ -67,6 +68,7 @@ module Rumale
67
68
  def fit_predict(x)
68
69
  x = check_convert_sample_array(x)
69
70
  raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
71
+
70
72
  distance_mat = @params[:metric] == 'precomputed' ? x : Rumale::PairwiseMetric.euclidean_distance(x)
71
73
  @labels = partial_fit(distance_mat)
72
74
  end
@@ -106,6 +106,7 @@ module Rumale
106
106
  rand_id = [*0...n_samples].sample(@params[:n_clusters], random: sub_rng)
107
107
  @cluster_centers = x[rand_id, true].dup
108
108
  return unless @params[:init] == 'k-means++'
109
+
109
110
  # k-means++ initialize
110
111
  (1...@params[:n_clusters]).each do |n|
111
112
  distance_matrix = PairwiseMetric.euclidean_distance(x, @cluster_centers[0...n, true])
@@ -64,6 +64,7 @@ module Rumale
64
64
  def fit(x, _not_used = nil)
65
65
  x = check_convert_sample_array(x)
66
66
  raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
67
+
67
68
  # initialize some varibales.
68
69
  distance_mat = @params[:metric] == 'precomputed' ? x : Rumale::PairwiseMetric.euclidean_distance(x)
69
70
  init_cluster_centers(distance_mat)
@@ -76,6 +77,7 @@ module Rumale
76
77
  end
77
78
  new_error = distance_mat[true, @medoid_ids].mean
78
79
  break if (error - new_error).abs <= @params[:tol]
80
+
79
81
  error = new_error
80
82
  end
81
83
  @cluster_centers = x[@medoid_ids, true].dup if @params[:metric] == 'euclidean'
@@ -93,6 +95,7 @@ module Rumale
93
95
  if @params[:metric] == 'precomputed' && distance_mat.shape[1] != @medoid_ids.size
94
96
  raise ArgumentError, 'Expect the size input matrix to be n_samples-by-n_clusters.'
95
97
  end
98
+
96
99
  assign_cluster(distance_mat)
97
100
  end
98
101
 
@@ -123,6 +126,7 @@ module Rumale
123
126
  sub_rng = @rng.dup
124
127
  @medoid_ids = Numo::Int32.asarray([*0...n_samples].sample(@params[:n_clusters], random: sub_rng))
125
128
  return unless @params[:init] == 'k-means++'
129
+
126
130
  # k-means++ initialize
127
131
  (1...@params[:n_clusters]).each do |n|
128
132
  distances = distance_mat[true, @medoid_ids[0...n]]
@@ -71,6 +71,7 @@ module Rumale
71
71
  def fit(x, _y = nil)
72
72
  x = check_convert_sample_array(x)
73
73
  raise ArgumentError, 'Expect the input affinity matrix to be square.' if @params[:affinity] == 'precomputed' && x.shape[0] != x.shape[1]
74
+
74
75
  fit_predict(x)
75
76
  self
76
77
  end
@@ -107,6 +108,7 @@ module Rumale
107
108
  new_embedded_line /= new_embedded_line.abs.sum
108
109
  new_error = (new_embedded_line - embedded_line).abs
109
110
  break if (new_error - error).abs.max <= tol
111
+
110
112
  embedded_line = new_embedded_line
111
113
  error = new_error
112
114
  end
@@ -54,6 +54,7 @@ module Rumale
54
54
  def fit(x, _y = nil)
55
55
  x = check_convert_sample_array(x)
56
56
  raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
57
+
57
58
  fit_predict(x)
58
59
  self
59
60
  end
@@ -66,6 +67,7 @@ module Rumale
66
67
  def fit_predict(x)
67
68
  x = check_convert_sample_array(x)
68
69
  raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
70
+
69
71
  distance_mat = @params[:metric] == 'precomputed' ? x : Rumale::PairwiseMetric.euclidean_distance(x)
70
72
  @labels = partial_fit(distance_mat)
71
73
  end
@@ -65,6 +65,7 @@ module Rumale
65
65
  Rumale::Validation.check_params_numeric_or_nil(noise: noise, random_seed: random_seed)
66
66
  raise ArgumentError, 'The number of samples must be more than 2.' if n_samples <= 1
67
67
  raise RangeError, 'The interval of factor is (0, 1).' if factor <= 0 || factor >= 1
68
+
68
69
  # initialize some variables.
69
70
  rs = random_seed
70
71
  rs ||= srand
@@ -101,6 +102,7 @@ module Rumale
101
102
  Rumale::Validation.check_params_boolean(shuffle: shuffle)
102
103
  Rumale::Validation.check_params_numeric_or_nil(noise: noise, random_seed: random_seed)
103
104
  raise ArgumentError, 'The number of samples must be more than 2.' if n_samples <= 1
105
+
104
106
  # initialize some variables.
105
107
  rs = random_seed
106
108
  rs ||= srand
@@ -90,9 +90,11 @@ module Rumale
90
90
  @components = (sqrt_noise_variance.diag.dot(u) * scaler).transpose.dup
91
91
  @noise_variance = Numo::DFloat.maximum(sample_vars - @components.transpose.dot(@components).diagonal, 1e-12)
92
92
  next if @params[:tol].nil?
93
+
93
94
  new_loglike = log_likelihood(cov_mat, @components, @noise_variance)
94
95
  @loglike.push(new_loglike)
95
96
  break if (old_loglike - new_loglike).abs <= @params[:tol]
97
+
96
98
  old_loglike = new_loglike
97
99
  end
98
100
 
@@ -87,6 +87,7 @@ module Rumale
87
87
  @params[:max_iter].times do
88
88
  updated = orthogonalize(covariance_mat.dot(comp_vec))
89
89
  break if (updated.dot(comp_vec) - 1).abs < @params[:tol]
90
+
90
91
  comp_vec = updated
91
92
  end
92
93
  @components = @components.nil? ? comp_vec : Numo::NArray.vstack([@components, comp_vec])
@@ -105,6 +105,7 @@ module Rumale
105
105
  # Fit classfier.
106
106
  ids = Rumale::Utils.choice_ids(n_samples, observation_weights, sub_rng)
107
107
  break if y[ids].to_a.uniq.size != n_classes
108
+
108
109
  tree = Tree::DecisionTreeClassifier.new(
109
110
  criterion: @params[:criterion], max_depth: @params[:max_depth],
110
111
  max_leaf_nodes: @params[:max_leaf_nodes], min_samples_leaf: @params[:min_samples_leaf],
@@ -120,12 +121,14 @@ module Rumale
120
121
  @estimators.push(tree)
121
122
  @feature_importances += tree.feature_importances
122
123
  break if error.zero?
124
+
123
125
  # Update observation weights.
124
126
  log_proba = Numo::NMath.log(proba)
125
127
  observation_weights *= Numo::NMath.exp(-1.0 * (n_classes - 1).fdiv(n_classes) * (y_codes * log_proba).sum(1))
126
128
  observation_weights = observation_weights.clip(1.0e-15, nil)
127
129
  sum_observation_weights = observation_weights.sum
128
130
  break if sum_observation_weights.zero?
131
+
129
132
  observation_weights /= sum_observation_weights
130
133
  end
131
134
  @feature_importances /= @feature_importances.sum
@@ -93,6 +93,7 @@ module Rumale
93
93
  check_sample_tvalue_size(x, y)
94
94
  # Check target values
95
95
  raise ArgumentError, 'Expect target value vector to be 1-D arrray' unless y.shape.size == 1
96
+
96
97
  # Initialize some variables.
97
98
  n_samples, n_features = x.shape
98
99
  @params[:max_features] = n_features unless @params[:max_features].is_a?(Integer)
@@ -117,6 +118,7 @@ module Rumale
117
118
  abs_err = ((p - y) / y).abs
118
119
  err = observation_weights[abs_err.gt(@params[:threshold])].sum
119
120
  break if err <= 0.0
121
+
120
122
  # Calculate weight.
121
123
  beta = err**@params[:exponent]
122
124
  weight = Math.log(1.fdiv(beta))
@@ -131,6 +133,7 @@ module Rumale
131
133
  observation_weights = observation_weights.clip(1.0e-15, nil)
132
134
  sum_observation_weights = observation_weights.sum
133
135
  break if sum_observation_weights.zero?
136
+
134
137
  observation_weights /= sum_observation_weights
135
138
  end
136
139
  @estimator_weights = Numo::DFloat.asarray(@estimator_weights)
@@ -86,7 +86,8 @@ module Rumale
86
86
  weighted_recall = (Numo::DFloat.cast(recalls) * weights).sum
87
87
  weighted_fscore = (Numo::DFloat.cast(fscores) * weights).sum
88
88
  # output reults.
89
- target_name ||= classes.map(&:to_s)
89
+ target_name ||= classes
90
+ target_name.map!(&:to_s)
90
91
  if output_hash
91
92
  res = {}
92
93
  target_name.each_with_index do |label, n|
@@ -28,8 +28,10 @@ module Rumale
28
28
  # calculate entropies.
29
29
  class_entropy = entropy(y_true)
30
30
  return 0.0 if class_entropy.zero?
31
+
31
32
  cluster_entropy = entropy(y_pred)
32
33
  return 0.0 if cluster_entropy.zero?
34
+
33
35
  # calculate mutual information.
34
36
  mi = MutualInformation.new
35
37
  mi.score(y_true, y_pred) / Math.sqrt(class_entropy * cluster_entropy)
@@ -14,6 +14,7 @@ module Rumale
14
14
  y_true.sort.to_a.uniq.map do |label|
15
15
  target_positions = y_pred.eq(label)
16
16
  next 0.0 if y_pred[target_positions].empty?
17
+
17
18
  n_true_positives = Numo::Int32.cast(y_true[target_positions].eq(y_pred[target_positions])).sum.to_f
18
19
  n_false_positives = Numo::Int32.cast(y_true[target_positions].ne(y_pred[target_positions])).sum.to_f
19
20
  n_true_positives / (n_true_positives + n_false_positives)
@@ -25,6 +26,7 @@ module Rumale
25
26
  y_true.sort.to_a.uniq.map do |label|
26
27
  target_positions = y_true.eq(label)
27
28
  next 0.0 if y_pred[target_positions].empty?
29
+
28
30
  n_true_positives = Numo::Int32.cast(y_true[target_positions].eq(y_pred[target_positions])).sum.to_f
29
31
  n_false_negatives = Numo::Int32.cast(y_true[target_positions].ne(y_pred[target_positions])).sum.to_f
30
32
  n_true_positives / (n_true_positives + n_false_negatives)
@@ -35,6 +37,7 @@ module Rumale
35
37
  def f_score_each_class(y_true, y_pred)
36
38
  precision_each_class(y_true, y_pred).zip(recall_each_class(y_true, y_pred)).map do |p, r|
37
39
  next 0.0 if p.zero? && r.zero?
40
+
38
41
  (2.0 * p * r) / (p + r)
39
42
  end
40
43
  end
@@ -44,6 +47,7 @@ module Rumale
44
47
  evaluated_values = y_true.sort.to_a.uniq.map do |label|
45
48
  target_positions = y_pred.eq(label)
46
49
  next [0.0, 0.0] if y_pred[target_positions].empty?
50
+
47
51
  n_true_positives = Numo::Int32.cast(y_true[target_positions].eq(y_pred[target_positions])).sum.to_f
48
52
  n_false_positives = Numo::Int32.cast(y_true[target_positions].ne(y_pred[target_positions])).sum.to_f
49
53
  [n_true_positives, n_true_positives + n_false_positives]
@@ -57,6 +61,7 @@ module Rumale
57
61
  evaluated_values = y_true.sort.to_a.uniq.map do |label|
58
62
  target_positions = y_true.eq(label)
59
63
  next 0.0 if y_pred[target_positions].empty?
64
+
60
65
  n_true_positives = Numo::Int32.cast(y_true[target_positions].eq(y_pred[target_positions])).sum.to_f
61
66
  n_false_negatives = Numo::Int32.cast(y_true[target_positions].ne(y_pred[target_positions])).sum.to_f
62
67
  [n_true_positives, n_true_positives + n_false_negatives]
@@ -64,6 +64,7 @@ module Rumale
64
64
  y_score = Numo::DFloat.cast(y_score) unless y_score.is_a?(Numo::DFloat)
65
65
  raise ArgumentError, 'Expect y_true to be 1-D arrray.' unless y_true.shape[1].nil?
66
66
  raise ArgumentError, 'Expect y_score to be 1-D arrray.' unless y_score.shape[1].nil?
67
+
67
68
  labels = y_true.to_a.uniq
68
69
  if pos_label.nil?
69
70
  raise ArgumentError, 'y_true must be binary labels or pos_label must be specified if y_true is multi-label' unless labels.size == 2
@@ -96,8 +97,10 @@ module Rumale
96
97
  y = Numo::NArray.asarray(y) unless y.is_a?(Numo::NArray)
97
98
  raise ArgumentError, 'Expect x to be 1-D arrray.' unless x.shape[1].nil?
98
99
  raise ArgumentError, 'Expect y to be 1-D arrray.' unless y.shape[1].nil?
100
+
99
101
  n_samples = [x.shape[0], y.shape[0]].min
100
102
  raise ArgumentError, 'At least two points are required to calculate area under curve.' if n_samples < 2
103
+
101
104
  (0...n_samples).to_a.each_cons(2).map { |i, j| 0.5 * (x[i] - x[j]).abs * (y[i] + y[j]) }.reduce(&:+)
102
105
  end
103
106
 
@@ -47,6 +47,7 @@ module Rumale
47
47
  cls_pos = y.eq(labels[n])
48
48
  sz_cluster = cls_pos.count
49
49
  next unless sz_cluster > 1
50
+
50
51
  cls_dist_mat = dist_mat[cls_pos, cls_pos].dup
51
52
  cls_dist_mat[cls_dist_mat.diag_indices] = 0.0
52
53
  intra_dists[cls_pos] = cls_dist_mat.sum(0) / (sz_cluster - 1)
@@ -57,6 +58,7 @@ module Rumale
57
58
  cls_pos = y.eq(labels[m])
58
59
  n_clusters.times do |n|
59
60
  next if m == n
61
+
60
62
  not_cls_pos = y.eq(labels[n])
61
63
  inter_dists[cls_pos] = Numo::DFloat.minimum(
62
64
  inter_dists[cls_pos], dist_mat[cls_pos, not_cls_pos].mean(1)
@@ -71,6 +71,7 @@ module Rumale
71
71
  f.each do |k, v|
72
72
  k = "#{k}#{separator}#{v}".to_sym if v.is_a?(String)
73
73
  next if @vocabulary.key?(k)
74
+
74
75
  @feature_names.push(k)
75
76
  @vocabulary[k] = @vocabulary.size
76
77
  end
@@ -30,6 +30,7 @@ module Rumale
30
30
  def initialize(reg_param: 1.0)
31
31
  raise TypeError, 'Expect class of reg_param to be Float or Numo::DFloat' unless reg_param.is_a?(Float) || reg_param.is_a?(Numo::DFloat)
32
32
  raise ArgumentError, 'Expect reg_param array to be 1-D arrray' if reg_param.is_a?(Numo::DFloat) && reg_param.shape.size != 1
33
+
33
34
  @params = {}
34
35
  @params[:reg_param] = reg_param
35
36
  @weight_vec = nil
@@ -55,6 +56,7 @@ module Rumale
55
56
  @weight_vec = Numo::Linalg.solve(reg_kernel_mat, y, driver: 'sym')
56
57
  else
57
58
  raise ArgumentError, 'Expect y and reg_param to have the same number of elements.' unless y.shape[1] == @params[:reg_param].shape[0]
59
+
58
60
  n_outputs = y.shape[1]
59
61
  @weight_vec = Numo::DFloat.zeros(n_samples, n_outputs)
60
62
  n_outputs.times do |n|
@@ -68,6 +68,7 @@ module Rumale
68
68
  # Update weight.
69
69
  loss_gradient = calc_loss_gradient(sub_samples, sub_targets, weight)
70
70
  next if loss_gradient.ne(0.0).count.zero?
71
+
71
72
  weight = calc_new_weight(optimizer, sub_samples, weight, loss_gradient)
72
73
  end
73
74
  split_weight(weight)
@@ -162,6 +162,7 @@ module Rumale
162
162
  def load_linalg?
163
163
  return false if defined?(Numo::Linalg).nil?
164
164
  return false if Numo::Linalg::VERSION < '0.1.4'
165
+
165
166
  true
166
167
  end
167
168
  end
@@ -164,6 +164,7 @@ module Rumale
164
164
  def load_linalg?
165
165
  return false if defined?(Numo::Linalg).nil?
166
166
  return false if Numo::Linalg::VERSION < '0.1.4'
167
+
167
168
  true
168
169
  end
169
170
  end
@@ -83,6 +83,7 @@ module Rumale
83
83
  def fit(x, _not_used = nil)
84
84
  x = check_convert_sample_array(x)
85
85
  raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
86
+
86
87
  # initialize some varibales.
87
88
  n_samples = x.shape[0]
88
89
  hi_distance_mat = @params[:metric] == 'precomputed' ? x : Rumale::PairwiseMetric.euclidean_distance(x)
@@ -142,6 +143,7 @@ module Rumale
142
143
  def terminate?(old_stress, new_stress)
143
144
  return false if @params[:tol].nil?
144
145
  return false if old_stress.nil?
146
+
145
147
  (old_stress - new_stress).abs <= @params[:tol]
146
148
  end
147
149
 
@@ -89,6 +89,7 @@ module Rumale
89
89
  def fit(x, _not_used = nil)
90
90
  x = check_convert_sample_array(x)
91
91
  raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
92
+
92
93
  # initialize some varibales.
93
94
  @n_iter = 0
94
95
  distance_mat = @params[:metric] == 'precomputed' ? x**2 : Rumale::PairwiseMetric.squared_error(x)
@@ -99,6 +100,7 @@ module Rumale
99
100
  one_vec = Numo::DFloat.ones(x.shape[0]).expand_dims(1)
100
101
  @params[:max_iter].times do |t|
101
102
  break if terminate?(hi_prob_mat, lo_prob_mat)
103
+
102
104
  a = hi_prob_mat * lo_prob_mat
103
105
  b = lo_prob_mat * lo_prob_mat
104
106
  y = (b.dot(one_vec) * y + (a - b).dot(y)) / a.dot(one_vec)
@@ -170,6 +172,7 @@ module Rumale
170
172
  entropy, probs = gaussian_distributed_probability_vector(sample_id, distance_vec, beta)
171
173
  diff_entropy = entropy - init_entropy
172
174
  break if diff_entropy.abs <= 1e-5
175
+
173
176
  if diff_entropy.positive?
174
177
  betamin = beta
175
178
  if betamax == Float::MAX
@@ -211,6 +214,7 @@ module Rumale
211
214
 
212
215
  def terminate?(p, q)
213
216
  return false if @params[:tol].nil?
217
+
214
218
  cost(p, q) <= @params[:tol]
215
219
  end
216
220
  end
@@ -126,6 +126,7 @@ module Rumale
126
126
  res = prm
127
127
  puts "[NeighbourhoodComponentAnalysis] The value of objective function after #{res[:n_iter]} epochs: #{x.shape[0] - res[:fnc]}" if @params[:verbose]
128
128
  break if (fold - res[:fnc]).abs <= @params[:tol] && (dold - res[:jcb]).abs <= @params[:tol]
129
+
129
130
  fold = res[:fnc]
130
131
  dold = res[:jcb]
131
132
  end
@@ -156,6 +156,7 @@ module Rumale
156
156
 
157
157
  def valid_param_grid(grid)
158
158
  raise TypeError, 'Expect class of param_grid to be Hash or Array' unless grid.is_a?(Hash) || grid.is_a?(Array)
159
+
159
160
  grid = [grid] if grid.is_a?(Hash)
160
161
  grid.each do |h|
161
162
  raise TypeError, 'Expect class of elements in param_grid to be Hash' unless h.is_a?(Hash)
@@ -67,6 +67,7 @@ module Rumale
67
67
  y = check_convert_label_array(y)
68
68
  check_sample_label_size(x, y)
69
69
  raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
70
+
70
71
  @prototypes = if @params[:metric] == 'euclidean'
71
72
  if @params[:algorithm] == 'vptree'
72
73
  VPTree.new(x)
@@ -61,6 +61,7 @@ module Rumale
61
61
  y = check_convert_tvalue_array(y)
62
62
  check_sample_tvalue_size(x, y)
63
63
  raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
64
+
64
65
  @prototypes = if @params[:metric] == 'euclidean'
65
66
  if @params[:algorithm] == 'vptree'
66
67
  VPTree.new(x)
@@ -82,6 +83,7 @@ module Rumale
82
83
  if @params[:metric] == 'precomputed' && x.shape[1] != @values.shape[0]
83
84
  raise ArgumentError, 'Expect the size input matrix to be n_testing_samples-by-n_training_samples.'
84
85
  end
86
+
85
87
  # Initialize some variables.
86
88
  n_samples = x.shape[0]
87
89
  n_prototypes, n_outputs = @values.shape
@@ -233,6 +233,7 @@ module Rumale
233
233
  # calc loss function
234
234
  loss, dout = loss_func.call(out, sub_y)
235
235
  break if loss < @params[:tol]
236
+
236
237
  # backward
237
238
  backward.call(dout)
238
239
  end
@@ -119,6 +119,7 @@ module Rumale
119
119
  @steps.keys.reverse_each do |name|
120
120
  transformer = @steps[name]
121
121
  next if transformer.nil?
122
+
122
123
  itrans_z = transformer.inverse_transform(itrans_z)
123
124
  end
124
125
  itrans_z
@@ -140,6 +141,7 @@ module Rumale
140
141
  steps.keys[0...-1].each do |name|
141
142
  transformer = steps[name]
142
143
  next if transformer.nil? || %i[fit transform].all? { |m| transformer.class.method_defined?(m) }
144
+
143
145
  raise TypeError,
144
146
  'Class of intermediate step in pipeline should be implemented fit and transform methods: ' \
145
147
  "#{name} => #{transformer.class}"
@@ -158,6 +160,7 @@ module Rumale
158
160
  @steps.keys[0...-1].each do |name|
159
161
  transformer = @steps[name]
160
162
  next if transformer.nil?
163
+
161
164
  transformer.fit(trans_x, y) if fit
162
165
  trans_x = transformer.transform(trans_x)
163
166
  end
@@ -75,6 +75,7 @@ module Rumale
75
75
  # Calculate gradients for loss function.
76
76
  loss_grad = loss_gradient(sub_x, ex_sub_x, sub_y, factor_mat, weight_vec)
77
77
  next if loss_grad.ne(0.0).count.zero?
78
+
78
79
  # Update each parameter.
79
80
  weight_vec = weight_optimizer.call(weight_vec, weight_gradient(loss_grad, ex_sub_x, weight_vec))
80
81
  @params[:n_factors].times do |n|
@@ -51,6 +51,7 @@ module Rumale
51
51
  def fit(x, _y = nil)
52
52
  x = Numo::Int32.cast(x) unless x.is_a?(Numo::Int32)
53
53
  raise ArgumentError, 'Expected the input samples only consists of non-negative integer values.' if x.lt(0).any?
54
+
54
55
  @n_values = x.max(0) + 1
55
56
  @feature_indices = Numo::Int32.hstack([[0], @n_values]).cumsum
56
57
  @active_features = encode(x, @feature_indices).sum(0).ne(0).where
@@ -67,6 +68,7 @@ module Rumale
67
68
  x = Numo::Int32.cast(x) unless x.is_a?(Numo::Int32)
68
69
  raise ArgumentError, 'Expected the input samples only consists of non-negative integer values.' if x.lt(0).any?
69
70
  raise ArgumentError, 'Expected the input samples only consists of non-negative integer values.' if x.lt(0).any?
71
+
70
72
  fit(x).transform(x)
71
73
  end
72
74
 
@@ -77,6 +79,7 @@ module Rumale
77
79
  def transform(x)
78
80
  x = Numo::Int32.cast(x) unless x.is_a?(Numo::Int32)
79
81
  raise ArgumentError, 'Expected the input samples only consists of non-negative integer values.' if x.lt(0).any?
82
+
80
83
  codes = encode(x, @feature_indices)
81
84
  codes[true, @active_features].dup
82
85
  end
@@ -51,6 +51,7 @@ module Rumale
51
51
  def fit(x, _y = nil)
52
52
  raise TypeError, 'Expect class of sample matrix to be Numo::NArray' unless x.is_a?(Numo::NArray)
53
53
  raise ArgumentError, 'Expect sample matrix to be 2-D array' unless x.shape.size == 2
54
+
54
55
  n_features = x.shape[1]
55
56
  @categories = Array.new(n_features) { |n| x[true, n].to_a.uniq.sort }
56
57
  self
@@ -65,6 +66,7 @@ module Rumale
65
66
  def fit_transform(x, _y = nil)
66
67
  raise TypeError, 'Expect class of sample matrix to be Numo::NArray' unless x.is_a?(Numo::NArray)
67
68
  raise ArgumentError, 'Expect sample matrix to be 2-D array' unless x.shape.size == 2
69
+
68
70
  fit(x).transform(x)
69
71
  end
70
72
 
@@ -41,6 +41,7 @@ module Rumale
41
41
  def initialize(degree: 2)
42
42
  check_params_numeric(degree: degree)
43
43
  raise ArgumentError, 'Expect the value of degree parameter greater than or eqaul to 1.' if degree < 1
44
+
44
45
  @params = {}
45
46
  @params[:degree] = degree
46
47
  @n_output_features = nil
@@ -47,6 +47,7 @@ module Rumale
47
47
  hess_mat = hessian_matrix(probs, df, sigma)
48
48
  break if grad_vec.abs.lt(1e-5).count == 2
49
49
  break if (old_grad_vec - grad_vec).abs.sum < 1e-5
50
+
50
51
  old_grad_vec = grad_vec
51
52
  # Calculate Newton directions.
52
53
  dirs_vec = directions(grad_vec, hess_mat)
@@ -58,6 +59,7 @@ module Rumale
58
59
  new_beta = beta + stepsize * dirs_vec[1]
59
60
  new_err = error_function(target_probs, df, new_alpha, new_beta)
60
61
  next unless new_err < err + 0.0001 * stepsize * grad_dir
62
+
61
63
  alpha = new_alpha
62
64
  beta = new_beta
63
65
  err = new_err
@@ -53,6 +53,7 @@ module Rumale
53
53
  return node.leaf_id if node.leaf
54
54
  return apply_at_node(node.left, sample) if node.right.nil?
55
55
  return apply_at_node(node.right, sample) if node.left.nil?
56
+
56
57
  if sample[node.feature_id] <= node.threshold
57
58
  apply_at_node(node.left, sample)
58
59
  else
@@ -138,6 +139,7 @@ module Rumale
138
139
  def eval_importance_at_node(node)
139
140
  return nil if node.leaf
140
141
  return nil if node.left.nil? || node.right.nil?
142
+
141
143
  gain = node.n_samples * node.impurity -
142
144
  node.left.n_samples * node.left.impurity -
143
145
  node.right.n_samples * node.right.impurity
@@ -110,6 +110,7 @@ module Rumale
110
110
  return node.probs if node.leaf
111
111
  return predict_proba_at_node(node.left, sample) if node.right.nil?
112
112
  return predict_proba_at_node(node.right, sample) if node.left.nil?
113
+
113
114
  if sample[node.feature_id] <= node.threshold
114
115
  predict_proba_at_node(node.left, sample)
115
116
  else
@@ -123,6 +123,7 @@ module Rumale
123
123
  return node.leaf_id if node.leaf
124
124
  return apply_at_node(node.left, sample) if node.right.nil?
125
125
  return apply_at_node(node.right, sample) if node.left.nil?
126
+
126
127
  if sample[node.feature_id] <= node.threshold
127
128
  apply_at_node(node.left, sample)
128
129
  else
@@ -13,6 +13,7 @@ module Rumale
13
13
  chosen = 0
14
14
  probs.each_with_index do |p, idx|
15
15
  break (chosen = idx) if target <= p
16
+
16
17
  target -= p
17
18
  end
18
19
  chosen
@@ -9,6 +9,7 @@ module Rumale
9
9
  def check_convert_sample_array(x)
10
10
  x = Numo::DFloat.cast(x) unless x.is_a?(Numo::DFloat)
11
11
  raise ArgumentError, 'Expect sample matrix to be 2-D array' unless x.ndim == 2
12
+
12
13
  x
13
14
  end
14
15
 
@@ -16,6 +17,7 @@ module Rumale
16
17
  def check_convert_label_array(y)
17
18
  y = Numo::Int32.cast(y) unless y.is_a?(Numo::Int32)
18
19
  raise ArgumentError, 'Expect label vector to be 1-D arrray' unless y.ndim == 1
20
+
19
21
  y
20
22
  end
21
23
 
@@ -29,6 +31,7 @@ module Rumale
29
31
  def check_sample_array(x)
30
32
  raise TypeError, 'Expect class of sample matrix to be Numo::DFloat' unless x.is_a?(Numo::DFloat)
31
33
  raise ArgumentError, 'Expect sample matrix to be 2-D array' unless x.ndim == 2
34
+
32
35
  nil
33
36
  end
34
37
 
@@ -36,24 +39,28 @@ module Rumale
36
39
  def check_label_array(y)
37
40
  raise TypeError, 'Expect class of label vector to be Numo::Int32' unless y.is_a?(Numo::Int32)
38
41
  raise ArgumentError, 'Expect label vector to be 1-D arrray' unless y.ndim == 1
42
+
39
43
  nil
40
44
  end
41
45
 
42
46
  # @!visibility private
43
47
  def check_tvalue_array(y)
44
48
  raise TypeError, 'Expect class of target value vector to be Numo::DFloat' unless y.is_a?(Numo::DFloat)
49
+
45
50
  nil
46
51
  end
47
52
 
48
53
  # @!visibility private
49
54
  def check_sample_label_size(x, y)
50
55
  raise ArgumentError, 'Expect to have the same number of samples for sample matrix and label vector' unless x.shape[0] == y.shape[0]
56
+
51
57
  nil
52
58
  end
53
59
 
54
60
  # @!visibility private
55
61
  def check_sample_tvalue_size(x, y)
56
62
  raise ArgumentError, 'Expect to have the same number of samples for sample matrix and target value vector' unless x.shape[0] == y.shape[0]
63
+
57
64
  nil
58
65
  end
59
66
 
@@ -3,5 +3,5 @@
3
3
  # Rumale is a machine learning library in Ruby.
4
4
  module Rumale
5
5
  # The version of Rumale you are using.
6
- VERSION = '0.18.6'
6
+ VERSION = '0.18.7'
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rumale
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.18.6
4
+ version: 0.18.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-05-02 00:00:00.000000000 Z
11
+ date: 2020-05-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: numo-narray