rumale 0.18.6 → 0.18.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +14 -2
- data/CHANGELOG.md +4 -0
- data/lib/rumale/base/base_estimator.rb +2 -0
- data/lib/rumale/clustering/dbscan.rb +4 -0
- data/lib/rumale/clustering/gaussian_mixture.rb +2 -0
- data/lib/rumale/clustering/hdbscan.rb +2 -0
- data/lib/rumale/clustering/k_means.rb +1 -0
- data/lib/rumale/clustering/k_medoids.rb +4 -0
- data/lib/rumale/clustering/power_iteration.rb +2 -0
- data/lib/rumale/clustering/single_linkage.rb +2 -0
- data/lib/rumale/dataset.rb +2 -0
- data/lib/rumale/decomposition/factor_analysis.rb +2 -0
- data/lib/rumale/decomposition/pca.rb +1 -0
- data/lib/rumale/ensemble/ada_boost_classifier.rb +3 -0
- data/lib/rumale/ensemble/ada_boost_regressor.rb +3 -0
- data/lib/rumale/evaluation_measure/function.rb +2 -1
- data/lib/rumale/evaluation_measure/normalized_mutual_information.rb +2 -0
- data/lib/rumale/evaluation_measure/precision_recall.rb +5 -0
- data/lib/rumale/evaluation_measure/roc_auc.rb +3 -0
- data/lib/rumale/evaluation_measure/silhouette_score.rb +2 -0
- data/lib/rumale/feature_extraction/hash_vectorizer.rb +1 -0
- data/lib/rumale/kernel_machine/kernel_ridge.rb +2 -0
- data/lib/rumale/linear_model/base_linear_model.rb +1 -0
- data/lib/rumale/linear_model/linear_regression.rb +1 -0
- data/lib/rumale/linear_model/ridge.rb +1 -0
- data/lib/rumale/manifold/mds.rb +2 -0
- data/lib/rumale/manifold/tsne.rb +4 -0
- data/lib/rumale/metric_learning/neighbourhood_component_analysis.rb +1 -0
- data/lib/rumale/model_selection/grid_search_cv.rb +1 -0
- data/lib/rumale/nearest_neighbors/k_neighbors_classifier.rb +1 -0
- data/lib/rumale/nearest_neighbors/k_neighbors_regressor.rb +2 -0
- data/lib/rumale/neural_network/base_mlp.rb +1 -0
- data/lib/rumale/pipeline/pipeline.rb +3 -0
- data/lib/rumale/polynomial_model/base_factorization_machine.rb +1 -0
- data/lib/rumale/preprocessing/one_hot_encoder.rb +3 -0
- data/lib/rumale/preprocessing/ordinal_encoder.rb +2 -0
- data/lib/rumale/preprocessing/polynomial_features.rb +1 -0
- data/lib/rumale/probabilistic_output.rb +2 -0
- data/lib/rumale/tree/base_decision_tree.rb +2 -0
- data/lib/rumale/tree/decision_tree_classifier.rb +1 -0
- data/lib/rumale/tree/gradient_tree_regressor.rb +1 -0
- data/lib/rumale/utils.rb +1 -0
- data/lib/rumale/validation.rb +7 -0
- data/lib/rumale/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 5e3069531e5acbdaab178769d20684a0fa260e29f6c39a645632b903fff8cce0
|
|
4
|
+
data.tar.gz: d39c7e61a20b1bce23ccbb9d809bb06f1babce101f509c78c6da2a64e95f180f
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: eb9077b26d63f153eefd4c68ea57083e12b6a465d06864da8b24a3f9d2aff907b8de350ade5c96e8e9ec28997424839b91ac884d787a7bff7c2a44d212addd81
|
|
7
|
+
data.tar.gz: 7d94c6d80e16ed405f87a7c777b4922863e42922a5b33046df4bc42d9daa5f5243ebb6c5492cb2d20120215cdce9a5c4b0ac3156012bf38cf91e7717b2c51c22
|
data/.rubocop.yml
CHANGED
|
@@ -3,7 +3,7 @@ require:
|
|
|
3
3
|
- rubocop-rspec
|
|
4
4
|
|
|
5
5
|
AllCops:
|
|
6
|
-
TargetRubyVersion: 2.
|
|
6
|
+
TargetRubyVersion: 2.5
|
|
7
7
|
DisplayCopNames: true
|
|
8
8
|
DisplayStyleGuide: true
|
|
9
9
|
Exclude:
|
|
@@ -70,14 +70,26 @@ Naming/MethodParameterName:
|
|
|
70
70
|
Naming/ConstantName:
|
|
71
71
|
Enabled: false
|
|
72
72
|
|
|
73
|
+
Style/ExponentialNotation:
|
|
74
|
+
Enabled: true
|
|
75
|
+
|
|
73
76
|
Style/FormatStringToken:
|
|
74
77
|
Enabled: false
|
|
75
78
|
|
|
76
79
|
Style/NumericLiterals:
|
|
77
80
|
Enabled: false
|
|
78
81
|
|
|
82
|
+
Style/SlicingWithRange:
|
|
83
|
+
Enabled: true
|
|
84
|
+
|
|
79
85
|
Layout/EmptyLineAfterGuardClause:
|
|
80
|
-
Enabled:
|
|
86
|
+
Enabled: true
|
|
87
|
+
|
|
88
|
+
Layout/EmptyLinesAroundAttributeAccessor:
|
|
89
|
+
Enabled: true
|
|
90
|
+
|
|
91
|
+
Layout/SpaceAroundMethodCallOperator:
|
|
92
|
+
Enabled: true
|
|
81
93
|
|
|
82
94
|
RSpec/MultipleExpectations:
|
|
83
95
|
Enabled: false
|
data/CHANGELOG.md
CHANGED
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
# 0.18.7
|
|
2
|
+
- Fix to convert target_name to string array in [classification_report method](https://yoshoku.github.io/rumale/doc/Rumale/EvaluationMeasure.html#classification_report-class_method).
|
|
3
|
+
- Refactor some codes with Rubocop.
|
|
4
|
+
|
|
1
5
|
# 0.18.6
|
|
2
6
|
- Fix some configuration files.
|
|
3
7
|
- Update API documentation.
|
|
@@ -25,6 +25,7 @@ module Rumale
|
|
|
25
25
|
|
|
26
26
|
def enable_parallel?
|
|
27
27
|
return false if @params[:n_jobs].nil?
|
|
28
|
+
|
|
28
29
|
if defined?(Parallel).nil?
|
|
29
30
|
warn('If you want to use parallel option, you should install and load Parallel in advance.')
|
|
30
31
|
return false
|
|
@@ -34,6 +35,7 @@ module Rumale
|
|
|
34
35
|
|
|
35
36
|
def n_processes
|
|
36
37
|
return 1 unless enable_parallel?
|
|
38
|
+
|
|
37
39
|
@params[:n_jobs] <= 0 ? Parallel.processor_count : @params[:n_jobs]
|
|
38
40
|
end
|
|
39
41
|
|
|
@@ -54,6 +54,7 @@ module Rumale
|
|
|
54
54
|
def fit(x, _y = nil)
|
|
55
55
|
x = check_convert_sample_array(x)
|
|
56
56
|
raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
|
|
57
|
+
|
|
57
58
|
partial_fit(x)
|
|
58
59
|
self
|
|
59
60
|
end
|
|
@@ -66,6 +67,7 @@ module Rumale
|
|
|
66
67
|
def fit_predict(x)
|
|
67
68
|
x = check_convert_sample_array(x)
|
|
68
69
|
raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
|
|
70
|
+
|
|
69
71
|
partial_fit(x)
|
|
70
72
|
labels
|
|
71
73
|
end
|
|
@@ -80,6 +82,7 @@ module Rumale
|
|
|
80
82
|
@labels = Numo::Int32.zeros(n_samples) - 2
|
|
81
83
|
n_samples.times do |query_id|
|
|
82
84
|
next if @labels[query_id] >= -1
|
|
85
|
+
|
|
83
86
|
cluster_id += 1 if expand_cluster(metric_mat, query_id, cluster_id)
|
|
84
87
|
end
|
|
85
88
|
@core_sample_ids = Numo::Int32[*@core_sample_ids.flatten]
|
|
@@ -102,6 +105,7 @@ module Rumale
|
|
|
102
105
|
while (m = target_ids.shift)
|
|
103
106
|
neighbor_ids = region_query(metric_mat[m, true])
|
|
104
107
|
next if neighbor_ids.size < @params[:min_samples]
|
|
108
|
+
|
|
105
109
|
neighbor_ids.each do |n|
|
|
106
110
|
target_ids.push(n) if @labels[n] < -1
|
|
107
111
|
@labels[n] = cluster_id if @labels[n] <= -1
|
|
@@ -86,6 +86,7 @@ module Rumale
|
|
|
86
86
|
new_memberships = calc_memberships(x, @weights, @means, @covariances, @params[:covariance_type])
|
|
87
87
|
error = (memberships - new_memberships).abs.max
|
|
88
88
|
break if error <= @params[:tol]
|
|
89
|
+
|
|
89
90
|
memberships = new_memberships.dup
|
|
90
91
|
end
|
|
91
92
|
self
|
|
@@ -209,6 +210,7 @@ module Rumale
|
|
|
209
210
|
|
|
210
211
|
def check_enable_linalg(method_name)
|
|
211
212
|
return unless @params[:covariance_type] == 'full' && !enable_linalg?
|
|
213
|
+
|
|
212
214
|
raise "GaussianMixture##{method_name} requires Numo::Linalg when covariance_type is 'full' but that is not loaded."
|
|
213
215
|
end
|
|
214
216
|
end
|
|
@@ -55,6 +55,7 @@ module Rumale
|
|
|
55
55
|
def fit(x, _y = nil)
|
|
56
56
|
x = check_convert_sample_array(x)
|
|
57
57
|
raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
|
|
58
|
+
|
|
58
59
|
fit_predict(x)
|
|
59
60
|
self
|
|
60
61
|
end
|
|
@@ -67,6 +68,7 @@ module Rumale
|
|
|
67
68
|
def fit_predict(x)
|
|
68
69
|
x = check_convert_sample_array(x)
|
|
69
70
|
raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
|
|
71
|
+
|
|
70
72
|
distance_mat = @params[:metric] == 'precomputed' ? x : Rumale::PairwiseMetric.euclidean_distance(x)
|
|
71
73
|
@labels = partial_fit(distance_mat)
|
|
72
74
|
end
|
|
@@ -106,6 +106,7 @@ module Rumale
|
|
|
106
106
|
rand_id = [*0...n_samples].sample(@params[:n_clusters], random: sub_rng)
|
|
107
107
|
@cluster_centers = x[rand_id, true].dup
|
|
108
108
|
return unless @params[:init] == 'k-means++'
|
|
109
|
+
|
|
109
110
|
# k-means++ initialize
|
|
110
111
|
(1...@params[:n_clusters]).each do |n|
|
|
111
112
|
distance_matrix = PairwiseMetric.euclidean_distance(x, @cluster_centers[0...n, true])
|
|
@@ -64,6 +64,7 @@ module Rumale
|
|
|
64
64
|
def fit(x, _not_used = nil)
|
|
65
65
|
x = check_convert_sample_array(x)
|
|
66
66
|
raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
|
|
67
|
+
|
|
67
68
|
# initialize some varibales.
|
|
68
69
|
distance_mat = @params[:metric] == 'precomputed' ? x : Rumale::PairwiseMetric.euclidean_distance(x)
|
|
69
70
|
init_cluster_centers(distance_mat)
|
|
@@ -76,6 +77,7 @@ module Rumale
|
|
|
76
77
|
end
|
|
77
78
|
new_error = distance_mat[true, @medoid_ids].mean
|
|
78
79
|
break if (error - new_error).abs <= @params[:tol]
|
|
80
|
+
|
|
79
81
|
error = new_error
|
|
80
82
|
end
|
|
81
83
|
@cluster_centers = x[@medoid_ids, true].dup if @params[:metric] == 'euclidean'
|
|
@@ -93,6 +95,7 @@ module Rumale
|
|
|
93
95
|
if @params[:metric] == 'precomputed' && distance_mat.shape[1] != @medoid_ids.size
|
|
94
96
|
raise ArgumentError, 'Expect the size input matrix to be n_samples-by-n_clusters.'
|
|
95
97
|
end
|
|
98
|
+
|
|
96
99
|
assign_cluster(distance_mat)
|
|
97
100
|
end
|
|
98
101
|
|
|
@@ -123,6 +126,7 @@ module Rumale
|
|
|
123
126
|
sub_rng = @rng.dup
|
|
124
127
|
@medoid_ids = Numo::Int32.asarray([*0...n_samples].sample(@params[:n_clusters], random: sub_rng))
|
|
125
128
|
return unless @params[:init] == 'k-means++'
|
|
129
|
+
|
|
126
130
|
# k-means++ initialize
|
|
127
131
|
(1...@params[:n_clusters]).each do |n|
|
|
128
132
|
distances = distance_mat[true, @medoid_ids[0...n]]
|
|
@@ -71,6 +71,7 @@ module Rumale
|
|
|
71
71
|
def fit(x, _y = nil)
|
|
72
72
|
x = check_convert_sample_array(x)
|
|
73
73
|
raise ArgumentError, 'Expect the input affinity matrix to be square.' if @params[:affinity] == 'precomputed' && x.shape[0] != x.shape[1]
|
|
74
|
+
|
|
74
75
|
fit_predict(x)
|
|
75
76
|
self
|
|
76
77
|
end
|
|
@@ -107,6 +108,7 @@ module Rumale
|
|
|
107
108
|
new_embedded_line /= new_embedded_line.abs.sum
|
|
108
109
|
new_error = (new_embedded_line - embedded_line).abs
|
|
109
110
|
break if (new_error - error).abs.max <= tol
|
|
111
|
+
|
|
110
112
|
embedded_line = new_embedded_line
|
|
111
113
|
error = new_error
|
|
112
114
|
end
|
|
@@ -54,6 +54,7 @@ module Rumale
|
|
|
54
54
|
def fit(x, _y = nil)
|
|
55
55
|
x = check_convert_sample_array(x)
|
|
56
56
|
raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
|
|
57
|
+
|
|
57
58
|
fit_predict(x)
|
|
58
59
|
self
|
|
59
60
|
end
|
|
@@ -66,6 +67,7 @@ module Rumale
|
|
|
66
67
|
def fit_predict(x)
|
|
67
68
|
x = check_convert_sample_array(x)
|
|
68
69
|
raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
|
|
70
|
+
|
|
69
71
|
distance_mat = @params[:metric] == 'precomputed' ? x : Rumale::PairwiseMetric.euclidean_distance(x)
|
|
70
72
|
@labels = partial_fit(distance_mat)
|
|
71
73
|
end
|
data/lib/rumale/dataset.rb
CHANGED
|
@@ -65,6 +65,7 @@ module Rumale
|
|
|
65
65
|
Rumale::Validation.check_params_numeric_or_nil(noise: noise, random_seed: random_seed)
|
|
66
66
|
raise ArgumentError, 'The number of samples must be more than 2.' if n_samples <= 1
|
|
67
67
|
raise RangeError, 'The interval of factor is (0, 1).' if factor <= 0 || factor >= 1
|
|
68
|
+
|
|
68
69
|
# initialize some variables.
|
|
69
70
|
rs = random_seed
|
|
70
71
|
rs ||= srand
|
|
@@ -101,6 +102,7 @@ module Rumale
|
|
|
101
102
|
Rumale::Validation.check_params_boolean(shuffle: shuffle)
|
|
102
103
|
Rumale::Validation.check_params_numeric_or_nil(noise: noise, random_seed: random_seed)
|
|
103
104
|
raise ArgumentError, 'The number of samples must be more than 2.' if n_samples <= 1
|
|
105
|
+
|
|
104
106
|
# initialize some variables.
|
|
105
107
|
rs = random_seed
|
|
106
108
|
rs ||= srand
|
|
@@ -90,9 +90,11 @@ module Rumale
|
|
|
90
90
|
@components = (sqrt_noise_variance.diag.dot(u) * scaler).transpose.dup
|
|
91
91
|
@noise_variance = Numo::DFloat.maximum(sample_vars - @components.transpose.dot(@components).diagonal, 1e-12)
|
|
92
92
|
next if @params[:tol].nil?
|
|
93
|
+
|
|
93
94
|
new_loglike = log_likelihood(cov_mat, @components, @noise_variance)
|
|
94
95
|
@loglike.push(new_loglike)
|
|
95
96
|
break if (old_loglike - new_loglike).abs <= @params[:tol]
|
|
97
|
+
|
|
96
98
|
old_loglike = new_loglike
|
|
97
99
|
end
|
|
98
100
|
|
|
@@ -87,6 +87,7 @@ module Rumale
|
|
|
87
87
|
@params[:max_iter].times do
|
|
88
88
|
updated = orthogonalize(covariance_mat.dot(comp_vec))
|
|
89
89
|
break if (updated.dot(comp_vec) - 1).abs < @params[:tol]
|
|
90
|
+
|
|
90
91
|
comp_vec = updated
|
|
91
92
|
end
|
|
92
93
|
@components = @components.nil? ? comp_vec : Numo::NArray.vstack([@components, comp_vec])
|
|
@@ -105,6 +105,7 @@ module Rumale
|
|
|
105
105
|
# Fit classfier.
|
|
106
106
|
ids = Rumale::Utils.choice_ids(n_samples, observation_weights, sub_rng)
|
|
107
107
|
break if y[ids].to_a.uniq.size != n_classes
|
|
108
|
+
|
|
108
109
|
tree = Tree::DecisionTreeClassifier.new(
|
|
109
110
|
criterion: @params[:criterion], max_depth: @params[:max_depth],
|
|
110
111
|
max_leaf_nodes: @params[:max_leaf_nodes], min_samples_leaf: @params[:min_samples_leaf],
|
|
@@ -120,12 +121,14 @@ module Rumale
|
|
|
120
121
|
@estimators.push(tree)
|
|
121
122
|
@feature_importances += tree.feature_importances
|
|
122
123
|
break if error.zero?
|
|
124
|
+
|
|
123
125
|
# Update observation weights.
|
|
124
126
|
log_proba = Numo::NMath.log(proba)
|
|
125
127
|
observation_weights *= Numo::NMath.exp(-1.0 * (n_classes - 1).fdiv(n_classes) * (y_codes * log_proba).sum(1))
|
|
126
128
|
observation_weights = observation_weights.clip(1.0e-15, nil)
|
|
127
129
|
sum_observation_weights = observation_weights.sum
|
|
128
130
|
break if sum_observation_weights.zero?
|
|
131
|
+
|
|
129
132
|
observation_weights /= sum_observation_weights
|
|
130
133
|
end
|
|
131
134
|
@feature_importances /= @feature_importances.sum
|
|
@@ -93,6 +93,7 @@ module Rumale
|
|
|
93
93
|
check_sample_tvalue_size(x, y)
|
|
94
94
|
# Check target values
|
|
95
95
|
raise ArgumentError, 'Expect target value vector to be 1-D arrray' unless y.shape.size == 1
|
|
96
|
+
|
|
96
97
|
# Initialize some variables.
|
|
97
98
|
n_samples, n_features = x.shape
|
|
98
99
|
@params[:max_features] = n_features unless @params[:max_features].is_a?(Integer)
|
|
@@ -117,6 +118,7 @@ module Rumale
|
|
|
117
118
|
abs_err = ((p - y) / y).abs
|
|
118
119
|
err = observation_weights[abs_err.gt(@params[:threshold])].sum
|
|
119
120
|
break if err <= 0.0
|
|
121
|
+
|
|
120
122
|
# Calculate weight.
|
|
121
123
|
beta = err**@params[:exponent]
|
|
122
124
|
weight = Math.log(1.fdiv(beta))
|
|
@@ -131,6 +133,7 @@ module Rumale
|
|
|
131
133
|
observation_weights = observation_weights.clip(1.0e-15, nil)
|
|
132
134
|
sum_observation_weights = observation_weights.sum
|
|
133
135
|
break if sum_observation_weights.zero?
|
|
136
|
+
|
|
134
137
|
observation_weights /= sum_observation_weights
|
|
135
138
|
end
|
|
136
139
|
@estimator_weights = Numo::DFloat.asarray(@estimator_weights)
|
|
@@ -86,7 +86,8 @@ module Rumale
|
|
|
86
86
|
weighted_recall = (Numo::DFloat.cast(recalls) * weights).sum
|
|
87
87
|
weighted_fscore = (Numo::DFloat.cast(fscores) * weights).sum
|
|
88
88
|
# output reults.
|
|
89
|
-
target_name ||= classes
|
|
89
|
+
target_name ||= classes
|
|
90
|
+
target_name.map!(&:to_s)
|
|
90
91
|
if output_hash
|
|
91
92
|
res = {}
|
|
92
93
|
target_name.each_with_index do |label, n|
|
|
@@ -28,8 +28,10 @@ module Rumale
|
|
|
28
28
|
# calculate entropies.
|
|
29
29
|
class_entropy = entropy(y_true)
|
|
30
30
|
return 0.0 if class_entropy.zero?
|
|
31
|
+
|
|
31
32
|
cluster_entropy = entropy(y_pred)
|
|
32
33
|
return 0.0 if cluster_entropy.zero?
|
|
34
|
+
|
|
33
35
|
# calculate mutual information.
|
|
34
36
|
mi = MutualInformation.new
|
|
35
37
|
mi.score(y_true, y_pred) / Math.sqrt(class_entropy * cluster_entropy)
|
|
@@ -14,6 +14,7 @@ module Rumale
|
|
|
14
14
|
y_true.sort.to_a.uniq.map do |label|
|
|
15
15
|
target_positions = y_pred.eq(label)
|
|
16
16
|
next 0.0 if y_pred[target_positions].empty?
|
|
17
|
+
|
|
17
18
|
n_true_positives = Numo::Int32.cast(y_true[target_positions].eq(y_pred[target_positions])).sum.to_f
|
|
18
19
|
n_false_positives = Numo::Int32.cast(y_true[target_positions].ne(y_pred[target_positions])).sum.to_f
|
|
19
20
|
n_true_positives / (n_true_positives + n_false_positives)
|
|
@@ -25,6 +26,7 @@ module Rumale
|
|
|
25
26
|
y_true.sort.to_a.uniq.map do |label|
|
|
26
27
|
target_positions = y_true.eq(label)
|
|
27
28
|
next 0.0 if y_pred[target_positions].empty?
|
|
29
|
+
|
|
28
30
|
n_true_positives = Numo::Int32.cast(y_true[target_positions].eq(y_pred[target_positions])).sum.to_f
|
|
29
31
|
n_false_negatives = Numo::Int32.cast(y_true[target_positions].ne(y_pred[target_positions])).sum.to_f
|
|
30
32
|
n_true_positives / (n_true_positives + n_false_negatives)
|
|
@@ -35,6 +37,7 @@ module Rumale
|
|
|
35
37
|
def f_score_each_class(y_true, y_pred)
|
|
36
38
|
precision_each_class(y_true, y_pred).zip(recall_each_class(y_true, y_pred)).map do |p, r|
|
|
37
39
|
next 0.0 if p.zero? && r.zero?
|
|
40
|
+
|
|
38
41
|
(2.0 * p * r) / (p + r)
|
|
39
42
|
end
|
|
40
43
|
end
|
|
@@ -44,6 +47,7 @@ module Rumale
|
|
|
44
47
|
evaluated_values = y_true.sort.to_a.uniq.map do |label|
|
|
45
48
|
target_positions = y_pred.eq(label)
|
|
46
49
|
next [0.0, 0.0] if y_pred[target_positions].empty?
|
|
50
|
+
|
|
47
51
|
n_true_positives = Numo::Int32.cast(y_true[target_positions].eq(y_pred[target_positions])).sum.to_f
|
|
48
52
|
n_false_positives = Numo::Int32.cast(y_true[target_positions].ne(y_pred[target_positions])).sum.to_f
|
|
49
53
|
[n_true_positives, n_true_positives + n_false_positives]
|
|
@@ -57,6 +61,7 @@ module Rumale
|
|
|
57
61
|
evaluated_values = y_true.sort.to_a.uniq.map do |label|
|
|
58
62
|
target_positions = y_true.eq(label)
|
|
59
63
|
next 0.0 if y_pred[target_positions].empty?
|
|
64
|
+
|
|
60
65
|
n_true_positives = Numo::Int32.cast(y_true[target_positions].eq(y_pred[target_positions])).sum.to_f
|
|
61
66
|
n_false_negatives = Numo::Int32.cast(y_true[target_positions].ne(y_pred[target_positions])).sum.to_f
|
|
62
67
|
[n_true_positives, n_true_positives + n_false_negatives]
|
|
@@ -64,6 +64,7 @@ module Rumale
|
|
|
64
64
|
y_score = Numo::DFloat.cast(y_score) unless y_score.is_a?(Numo::DFloat)
|
|
65
65
|
raise ArgumentError, 'Expect y_true to be 1-D arrray.' unless y_true.shape[1].nil?
|
|
66
66
|
raise ArgumentError, 'Expect y_score to be 1-D arrray.' unless y_score.shape[1].nil?
|
|
67
|
+
|
|
67
68
|
labels = y_true.to_a.uniq
|
|
68
69
|
if pos_label.nil?
|
|
69
70
|
raise ArgumentError, 'y_true must be binary labels or pos_label must be specified if y_true is multi-label' unless labels.size == 2
|
|
@@ -96,8 +97,10 @@ module Rumale
|
|
|
96
97
|
y = Numo::NArray.asarray(y) unless y.is_a?(Numo::NArray)
|
|
97
98
|
raise ArgumentError, 'Expect x to be 1-D arrray.' unless x.shape[1].nil?
|
|
98
99
|
raise ArgumentError, 'Expect y to be 1-D arrray.' unless y.shape[1].nil?
|
|
100
|
+
|
|
99
101
|
n_samples = [x.shape[0], y.shape[0]].min
|
|
100
102
|
raise ArgumentError, 'At least two points are required to calculate area under curve.' if n_samples < 2
|
|
103
|
+
|
|
101
104
|
(0...n_samples).to_a.each_cons(2).map { |i, j| 0.5 * (x[i] - x[j]).abs * (y[i] + y[j]) }.reduce(&:+)
|
|
102
105
|
end
|
|
103
106
|
|
|
@@ -47,6 +47,7 @@ module Rumale
|
|
|
47
47
|
cls_pos = y.eq(labels[n])
|
|
48
48
|
sz_cluster = cls_pos.count
|
|
49
49
|
next unless sz_cluster > 1
|
|
50
|
+
|
|
50
51
|
cls_dist_mat = dist_mat[cls_pos, cls_pos].dup
|
|
51
52
|
cls_dist_mat[cls_dist_mat.diag_indices] = 0.0
|
|
52
53
|
intra_dists[cls_pos] = cls_dist_mat.sum(0) / (sz_cluster - 1)
|
|
@@ -57,6 +58,7 @@ module Rumale
|
|
|
57
58
|
cls_pos = y.eq(labels[m])
|
|
58
59
|
n_clusters.times do |n|
|
|
59
60
|
next if m == n
|
|
61
|
+
|
|
60
62
|
not_cls_pos = y.eq(labels[n])
|
|
61
63
|
inter_dists[cls_pos] = Numo::DFloat.minimum(
|
|
62
64
|
inter_dists[cls_pos], dist_mat[cls_pos, not_cls_pos].mean(1)
|
|
@@ -30,6 +30,7 @@ module Rumale
|
|
|
30
30
|
def initialize(reg_param: 1.0)
|
|
31
31
|
raise TypeError, 'Expect class of reg_param to be Float or Numo::DFloat' unless reg_param.is_a?(Float) || reg_param.is_a?(Numo::DFloat)
|
|
32
32
|
raise ArgumentError, 'Expect reg_param array to be 1-D arrray' if reg_param.is_a?(Numo::DFloat) && reg_param.shape.size != 1
|
|
33
|
+
|
|
33
34
|
@params = {}
|
|
34
35
|
@params[:reg_param] = reg_param
|
|
35
36
|
@weight_vec = nil
|
|
@@ -55,6 +56,7 @@ module Rumale
|
|
|
55
56
|
@weight_vec = Numo::Linalg.solve(reg_kernel_mat, y, driver: 'sym')
|
|
56
57
|
else
|
|
57
58
|
raise ArgumentError, 'Expect y and reg_param to have the same number of elements.' unless y.shape[1] == @params[:reg_param].shape[0]
|
|
59
|
+
|
|
58
60
|
n_outputs = y.shape[1]
|
|
59
61
|
@weight_vec = Numo::DFloat.zeros(n_samples, n_outputs)
|
|
60
62
|
n_outputs.times do |n|
|
data/lib/rumale/manifold/mds.rb
CHANGED
|
@@ -83,6 +83,7 @@ module Rumale
|
|
|
83
83
|
def fit(x, _not_used = nil)
|
|
84
84
|
x = check_convert_sample_array(x)
|
|
85
85
|
raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
|
|
86
|
+
|
|
86
87
|
# initialize some varibales.
|
|
87
88
|
n_samples = x.shape[0]
|
|
88
89
|
hi_distance_mat = @params[:metric] == 'precomputed' ? x : Rumale::PairwiseMetric.euclidean_distance(x)
|
|
@@ -142,6 +143,7 @@ module Rumale
|
|
|
142
143
|
def terminate?(old_stress, new_stress)
|
|
143
144
|
return false if @params[:tol].nil?
|
|
144
145
|
return false if old_stress.nil?
|
|
146
|
+
|
|
145
147
|
(old_stress - new_stress).abs <= @params[:tol]
|
|
146
148
|
end
|
|
147
149
|
|
data/lib/rumale/manifold/tsne.rb
CHANGED
|
@@ -89,6 +89,7 @@ module Rumale
|
|
|
89
89
|
def fit(x, _not_used = nil)
|
|
90
90
|
x = check_convert_sample_array(x)
|
|
91
91
|
raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
|
|
92
|
+
|
|
92
93
|
# initialize some varibales.
|
|
93
94
|
@n_iter = 0
|
|
94
95
|
distance_mat = @params[:metric] == 'precomputed' ? x**2 : Rumale::PairwiseMetric.squared_error(x)
|
|
@@ -99,6 +100,7 @@ module Rumale
|
|
|
99
100
|
one_vec = Numo::DFloat.ones(x.shape[0]).expand_dims(1)
|
|
100
101
|
@params[:max_iter].times do |t|
|
|
101
102
|
break if terminate?(hi_prob_mat, lo_prob_mat)
|
|
103
|
+
|
|
102
104
|
a = hi_prob_mat * lo_prob_mat
|
|
103
105
|
b = lo_prob_mat * lo_prob_mat
|
|
104
106
|
y = (b.dot(one_vec) * y + (a - b).dot(y)) / a.dot(one_vec)
|
|
@@ -170,6 +172,7 @@ module Rumale
|
|
|
170
172
|
entropy, probs = gaussian_distributed_probability_vector(sample_id, distance_vec, beta)
|
|
171
173
|
diff_entropy = entropy - init_entropy
|
|
172
174
|
break if diff_entropy.abs <= 1e-5
|
|
175
|
+
|
|
173
176
|
if diff_entropy.positive?
|
|
174
177
|
betamin = beta
|
|
175
178
|
if betamax == Float::MAX
|
|
@@ -211,6 +214,7 @@ module Rumale
|
|
|
211
214
|
|
|
212
215
|
def terminate?(p, q)
|
|
213
216
|
return false if @params[:tol].nil?
|
|
217
|
+
|
|
214
218
|
cost(p, q) <= @params[:tol]
|
|
215
219
|
end
|
|
216
220
|
end
|
|
@@ -126,6 +126,7 @@ module Rumale
|
|
|
126
126
|
res = prm
|
|
127
127
|
puts "[NeighbourhoodComponentAnalysis] The value of objective function after #{res[:n_iter]} epochs: #{x.shape[0] - res[:fnc]}" if @params[:verbose]
|
|
128
128
|
break if (fold - res[:fnc]).abs <= @params[:tol] && (dold - res[:jcb]).abs <= @params[:tol]
|
|
129
|
+
|
|
129
130
|
fold = res[:fnc]
|
|
130
131
|
dold = res[:jcb]
|
|
131
132
|
end
|
|
@@ -156,6 +156,7 @@ module Rumale
|
|
|
156
156
|
|
|
157
157
|
def valid_param_grid(grid)
|
|
158
158
|
raise TypeError, 'Expect class of param_grid to be Hash or Array' unless grid.is_a?(Hash) || grid.is_a?(Array)
|
|
159
|
+
|
|
159
160
|
grid = [grid] if grid.is_a?(Hash)
|
|
160
161
|
grid.each do |h|
|
|
161
162
|
raise TypeError, 'Expect class of elements in param_grid to be Hash' unless h.is_a?(Hash)
|
|
@@ -67,6 +67,7 @@ module Rumale
|
|
|
67
67
|
y = check_convert_label_array(y)
|
|
68
68
|
check_sample_label_size(x, y)
|
|
69
69
|
raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
|
|
70
|
+
|
|
70
71
|
@prototypes = if @params[:metric] == 'euclidean'
|
|
71
72
|
if @params[:algorithm] == 'vptree'
|
|
72
73
|
VPTree.new(x)
|
|
@@ -61,6 +61,7 @@ module Rumale
|
|
|
61
61
|
y = check_convert_tvalue_array(y)
|
|
62
62
|
check_sample_tvalue_size(x, y)
|
|
63
63
|
raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
|
|
64
|
+
|
|
64
65
|
@prototypes = if @params[:metric] == 'euclidean'
|
|
65
66
|
if @params[:algorithm] == 'vptree'
|
|
66
67
|
VPTree.new(x)
|
|
@@ -82,6 +83,7 @@ module Rumale
|
|
|
82
83
|
if @params[:metric] == 'precomputed' && x.shape[1] != @values.shape[0]
|
|
83
84
|
raise ArgumentError, 'Expect the size input matrix to be n_testing_samples-by-n_training_samples.'
|
|
84
85
|
end
|
|
86
|
+
|
|
85
87
|
# Initialize some variables.
|
|
86
88
|
n_samples = x.shape[0]
|
|
87
89
|
n_prototypes, n_outputs = @values.shape
|
|
@@ -119,6 +119,7 @@ module Rumale
|
|
|
119
119
|
@steps.keys.reverse_each do |name|
|
|
120
120
|
transformer = @steps[name]
|
|
121
121
|
next if transformer.nil?
|
|
122
|
+
|
|
122
123
|
itrans_z = transformer.inverse_transform(itrans_z)
|
|
123
124
|
end
|
|
124
125
|
itrans_z
|
|
@@ -140,6 +141,7 @@ module Rumale
|
|
|
140
141
|
steps.keys[0...-1].each do |name|
|
|
141
142
|
transformer = steps[name]
|
|
142
143
|
next if transformer.nil? || %i[fit transform].all? { |m| transformer.class.method_defined?(m) }
|
|
144
|
+
|
|
143
145
|
raise TypeError,
|
|
144
146
|
'Class of intermediate step in pipeline should be implemented fit and transform methods: ' \
|
|
145
147
|
"#{name} => #{transformer.class}"
|
|
@@ -158,6 +160,7 @@ module Rumale
|
|
|
158
160
|
@steps.keys[0...-1].each do |name|
|
|
159
161
|
transformer = @steps[name]
|
|
160
162
|
next if transformer.nil?
|
|
163
|
+
|
|
161
164
|
transformer.fit(trans_x, y) if fit
|
|
162
165
|
trans_x = transformer.transform(trans_x)
|
|
163
166
|
end
|
|
@@ -75,6 +75,7 @@ module Rumale
|
|
|
75
75
|
# Calculate gradients for loss function.
|
|
76
76
|
loss_grad = loss_gradient(sub_x, ex_sub_x, sub_y, factor_mat, weight_vec)
|
|
77
77
|
next if loss_grad.ne(0.0).count.zero?
|
|
78
|
+
|
|
78
79
|
# Update each parameter.
|
|
79
80
|
weight_vec = weight_optimizer.call(weight_vec, weight_gradient(loss_grad, ex_sub_x, weight_vec))
|
|
80
81
|
@params[:n_factors].times do |n|
|
|
@@ -51,6 +51,7 @@ module Rumale
|
|
|
51
51
|
def fit(x, _y = nil)
|
|
52
52
|
x = Numo::Int32.cast(x) unless x.is_a?(Numo::Int32)
|
|
53
53
|
raise ArgumentError, 'Expected the input samples only consists of non-negative integer values.' if x.lt(0).any?
|
|
54
|
+
|
|
54
55
|
@n_values = x.max(0) + 1
|
|
55
56
|
@feature_indices = Numo::Int32.hstack([[0], @n_values]).cumsum
|
|
56
57
|
@active_features = encode(x, @feature_indices).sum(0).ne(0).where
|
|
@@ -67,6 +68,7 @@ module Rumale
|
|
|
67
68
|
x = Numo::Int32.cast(x) unless x.is_a?(Numo::Int32)
|
|
68
69
|
raise ArgumentError, 'Expected the input samples only consists of non-negative integer values.' if x.lt(0).any?
|
|
69
70
|
raise ArgumentError, 'Expected the input samples only consists of non-negative integer values.' if x.lt(0).any?
|
|
71
|
+
|
|
70
72
|
fit(x).transform(x)
|
|
71
73
|
end
|
|
72
74
|
|
|
@@ -77,6 +79,7 @@ module Rumale
|
|
|
77
79
|
def transform(x)
|
|
78
80
|
x = Numo::Int32.cast(x) unless x.is_a?(Numo::Int32)
|
|
79
81
|
raise ArgumentError, 'Expected the input samples only consists of non-negative integer values.' if x.lt(0).any?
|
|
82
|
+
|
|
80
83
|
codes = encode(x, @feature_indices)
|
|
81
84
|
codes[true, @active_features].dup
|
|
82
85
|
end
|
|
@@ -51,6 +51,7 @@ module Rumale
|
|
|
51
51
|
def fit(x, _y = nil)
|
|
52
52
|
raise TypeError, 'Expect class of sample matrix to be Numo::NArray' unless x.is_a?(Numo::NArray)
|
|
53
53
|
raise ArgumentError, 'Expect sample matrix to be 2-D array' unless x.shape.size == 2
|
|
54
|
+
|
|
54
55
|
n_features = x.shape[1]
|
|
55
56
|
@categories = Array.new(n_features) { |n| x[true, n].to_a.uniq.sort }
|
|
56
57
|
self
|
|
@@ -65,6 +66,7 @@ module Rumale
|
|
|
65
66
|
def fit_transform(x, _y = nil)
|
|
66
67
|
raise TypeError, 'Expect class of sample matrix to be Numo::NArray' unless x.is_a?(Numo::NArray)
|
|
67
68
|
raise ArgumentError, 'Expect sample matrix to be 2-D array' unless x.shape.size == 2
|
|
69
|
+
|
|
68
70
|
fit(x).transform(x)
|
|
69
71
|
end
|
|
70
72
|
|
|
@@ -41,6 +41,7 @@ module Rumale
|
|
|
41
41
|
def initialize(degree: 2)
|
|
42
42
|
check_params_numeric(degree: degree)
|
|
43
43
|
raise ArgumentError, 'Expect the value of degree parameter greater than or eqaul to 1.' if degree < 1
|
|
44
|
+
|
|
44
45
|
@params = {}
|
|
45
46
|
@params[:degree] = degree
|
|
46
47
|
@n_output_features = nil
|
|
@@ -47,6 +47,7 @@ module Rumale
|
|
|
47
47
|
hess_mat = hessian_matrix(probs, df, sigma)
|
|
48
48
|
break if grad_vec.abs.lt(1e-5).count == 2
|
|
49
49
|
break if (old_grad_vec - grad_vec).abs.sum < 1e-5
|
|
50
|
+
|
|
50
51
|
old_grad_vec = grad_vec
|
|
51
52
|
# Calculate Newton directions.
|
|
52
53
|
dirs_vec = directions(grad_vec, hess_mat)
|
|
@@ -58,6 +59,7 @@ module Rumale
|
|
|
58
59
|
new_beta = beta + stepsize * dirs_vec[1]
|
|
59
60
|
new_err = error_function(target_probs, df, new_alpha, new_beta)
|
|
60
61
|
next unless new_err < err + 0.0001 * stepsize * grad_dir
|
|
62
|
+
|
|
61
63
|
alpha = new_alpha
|
|
62
64
|
beta = new_beta
|
|
63
65
|
err = new_err
|
|
@@ -53,6 +53,7 @@ module Rumale
|
|
|
53
53
|
return node.leaf_id if node.leaf
|
|
54
54
|
return apply_at_node(node.left, sample) if node.right.nil?
|
|
55
55
|
return apply_at_node(node.right, sample) if node.left.nil?
|
|
56
|
+
|
|
56
57
|
if sample[node.feature_id] <= node.threshold
|
|
57
58
|
apply_at_node(node.left, sample)
|
|
58
59
|
else
|
|
@@ -138,6 +139,7 @@ module Rumale
|
|
|
138
139
|
def eval_importance_at_node(node)
|
|
139
140
|
return nil if node.leaf
|
|
140
141
|
return nil if node.left.nil? || node.right.nil?
|
|
142
|
+
|
|
141
143
|
gain = node.n_samples * node.impurity -
|
|
142
144
|
node.left.n_samples * node.left.impurity -
|
|
143
145
|
node.right.n_samples * node.right.impurity
|
|
@@ -110,6 +110,7 @@ module Rumale
|
|
|
110
110
|
return node.probs if node.leaf
|
|
111
111
|
return predict_proba_at_node(node.left, sample) if node.right.nil?
|
|
112
112
|
return predict_proba_at_node(node.right, sample) if node.left.nil?
|
|
113
|
+
|
|
113
114
|
if sample[node.feature_id] <= node.threshold
|
|
114
115
|
predict_proba_at_node(node.left, sample)
|
|
115
116
|
else
|
|
@@ -123,6 +123,7 @@ module Rumale
|
|
|
123
123
|
return node.leaf_id if node.leaf
|
|
124
124
|
return apply_at_node(node.left, sample) if node.right.nil?
|
|
125
125
|
return apply_at_node(node.right, sample) if node.left.nil?
|
|
126
|
+
|
|
126
127
|
if sample[node.feature_id] <= node.threshold
|
|
127
128
|
apply_at_node(node.left, sample)
|
|
128
129
|
else
|
data/lib/rumale/utils.rb
CHANGED
data/lib/rumale/validation.rb
CHANGED
|
@@ -9,6 +9,7 @@ module Rumale
|
|
|
9
9
|
def check_convert_sample_array(x)
|
|
10
10
|
x = Numo::DFloat.cast(x) unless x.is_a?(Numo::DFloat)
|
|
11
11
|
raise ArgumentError, 'Expect sample matrix to be 2-D array' unless x.ndim == 2
|
|
12
|
+
|
|
12
13
|
x
|
|
13
14
|
end
|
|
14
15
|
|
|
@@ -16,6 +17,7 @@ module Rumale
|
|
|
16
17
|
def check_convert_label_array(y)
|
|
17
18
|
y = Numo::Int32.cast(y) unless y.is_a?(Numo::Int32)
|
|
18
19
|
raise ArgumentError, 'Expect label vector to be 1-D arrray' unless y.ndim == 1
|
|
20
|
+
|
|
19
21
|
y
|
|
20
22
|
end
|
|
21
23
|
|
|
@@ -29,6 +31,7 @@ module Rumale
|
|
|
29
31
|
def check_sample_array(x)
|
|
30
32
|
raise TypeError, 'Expect class of sample matrix to be Numo::DFloat' unless x.is_a?(Numo::DFloat)
|
|
31
33
|
raise ArgumentError, 'Expect sample matrix to be 2-D array' unless x.ndim == 2
|
|
34
|
+
|
|
32
35
|
nil
|
|
33
36
|
end
|
|
34
37
|
|
|
@@ -36,24 +39,28 @@ module Rumale
|
|
|
36
39
|
def check_label_array(y)
|
|
37
40
|
raise TypeError, 'Expect class of label vector to be Numo::Int32' unless y.is_a?(Numo::Int32)
|
|
38
41
|
raise ArgumentError, 'Expect label vector to be 1-D arrray' unless y.ndim == 1
|
|
42
|
+
|
|
39
43
|
nil
|
|
40
44
|
end
|
|
41
45
|
|
|
42
46
|
# @!visibility private
|
|
43
47
|
def check_tvalue_array(y)
|
|
44
48
|
raise TypeError, 'Expect class of target value vector to be Numo::DFloat' unless y.is_a?(Numo::DFloat)
|
|
49
|
+
|
|
45
50
|
nil
|
|
46
51
|
end
|
|
47
52
|
|
|
48
53
|
# @!visibility private
|
|
49
54
|
def check_sample_label_size(x, y)
|
|
50
55
|
raise ArgumentError, 'Expect to have the same number of samples for sample matrix and label vector' unless x.shape[0] == y.shape[0]
|
|
56
|
+
|
|
51
57
|
nil
|
|
52
58
|
end
|
|
53
59
|
|
|
54
60
|
# @!visibility private
|
|
55
61
|
def check_sample_tvalue_size(x, y)
|
|
56
62
|
raise ArgumentError, 'Expect to have the same number of samples for sample matrix and target value vector' unless x.shape[0] == y.shape[0]
|
|
63
|
+
|
|
57
64
|
nil
|
|
58
65
|
end
|
|
59
66
|
|
data/lib/rumale/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: rumale
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.18.
|
|
4
|
+
version: 0.18.7
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- yoshoku
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2020-05-
|
|
11
|
+
date: 2020-05-16 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: numo-narray
|