svmkit 0.2.7 → 0.2.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/HISTORY.md +5 -0
- data/lib/svmkit/base/base_estimator.rb +1 -1
- data/lib/svmkit/base/classifier.rb +2 -0
- data/lib/svmkit/ensemble/random_forest_classifier.rb +11 -1
- data/lib/svmkit/evaluation_measure/accuracy.rb +3 -0
- data/lib/svmkit/evaluation_measure/f_score.rb +4 -0
- data/lib/svmkit/evaluation_measure/precision.rb +4 -0
- data/lib/svmkit/evaluation_measure/recall.rb +4 -0
- data/lib/svmkit/kernel_approximation/rbf.rb +11 -1
- data/lib/svmkit/kernel_machine/kernel_svc.rb +12 -11
- data/lib/svmkit/linear_model/logistic_regression.rb +20 -6
- data/lib/svmkit/linear_model/svc.rb +12 -0
- data/lib/svmkit/model_selection/cross_validation.rb +6 -0
- data/lib/svmkit/model_selection/k_fold.rb +6 -4
- data/lib/svmkit/model_selection/stratified_k_fold.rb +6 -0
- data/lib/svmkit/multiclass/one_vs_rest_classifier.rb +5 -0
- data/lib/svmkit/naive_bayes/naive_bayes.rb +14 -0
- data/lib/svmkit/nearest_neighbors/k_neighbors_classifier.rb +5 -0
- data/lib/svmkit/pairwise_metric.rb +15 -0
- data/lib/svmkit/polynomial_model/factorization_machine_classifier.rb +12 -0
- data/lib/svmkit/preprocessing/l2_normalizer.rb +2 -0
- data/lib/svmkit/preprocessing/min_max_scaler.rb +4 -0
- data/lib/svmkit/preprocessing/standard_scaler.rb +3 -0
- data/lib/svmkit/tree/decision_tree_classifier.rb +16 -3
- data/lib/svmkit/validation.rb +55 -0
- data/lib/svmkit/version.rb +1 -1
- data/lib/svmkit.rb +1 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bbc648db53b4285bd15ae00e2464c4376d027893
|
4
|
+
data.tar.gz: 28d9db47ae3053031f1643329cf02cd4e7d9c135
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c504f010a70fc7a31afa4471096092adac9ff44de979d42d7277c63d737d517981e2ac6d7481ad42dc1c864f2a7756d39cd1d7697d2d4b1bca150d0a4eca3b8e
|
7
|
+
data.tar.gz: 8d89dc525ed37626a2d97e6fe3bebdacd4ec2945df285d8275cd0bd5df62c5ebc897dca67b91157df0bfc460fa987ed4098654ce6d164cb05ec46de4c6fe27af
|
data/HISTORY.md
CHANGED
@@ -1,3 +1,8 @@
|
|
1
|
+
# 0.2.8
|
2
|
+
- Fixed bug on gradient calculation of Logistic Regression.
|
3
|
+
- Fixed to change accessor of params of estimators to read only.
|
4
|
+
- Added parameter validation.
|
5
|
+
|
1
6
|
# 0.2.7
|
2
7
|
- Fixed to support multiclass classifiction into LinearSVC, LogisticRegression, KernelSVC, and FactorizationMachineClassifier
|
3
8
|
|
@@ -20,6 +20,8 @@ module SVMKit
|
|
20
20
|
# @param y [Numo::Int32] (shape: [n_samples]) True labels for testing data.
|
21
21
|
# @return [Float] Mean accuracy
|
22
22
|
def score(x, y)
|
23
|
+
SVMKit::Validation.check_sample_array(x)
|
24
|
+
SVMKit::Validation.check_label_array(y)
|
23
25
|
evaluator = SVMKit::EvaluationMeasure::Accuracy.new
|
24
26
|
evaluator.score(y, predict(x))
|
25
27
|
end
|
@@ -50,6 +50,11 @@ module SVMKit
|
|
50
50
|
# It is used to randomly determine the order of features when deciding spliting point.
|
51
51
|
def initialize(n_estimators: 10, criterion: 'gini', max_depth: nil, max_leaf_nodes: nil, min_samples_leaf: 1,
|
52
52
|
max_features: nil, random_seed: nil)
|
53
|
+
SVMKit::Validation.check_params_type_or_nil(Integer, max_depth: max_depth, max_leaf_nodes: max_leaf_nodes,
|
54
|
+
max_features: max_features, random_seed: random_seed)
|
55
|
+
SVMKit::Validation.check_params_integer(n_estimators: n_estimators, min_samples_leaf: min_samples_leaf)
|
56
|
+
SVMKit::Validation.check_params_string(criterion: criterion)
|
57
|
+
|
53
58
|
@params = {}
|
54
59
|
@params[:n_estimators] = n_estimators
|
55
60
|
@params[:criterion] = criterion
|
@@ -59,10 +64,10 @@ module SVMKit
|
|
59
64
|
@params[:max_features] = max_features
|
60
65
|
@params[:random_seed] = random_seed
|
61
66
|
@params[:random_seed] ||= srand
|
62
|
-
@rng = Random.new(@params[:random_seed])
|
63
67
|
@estimators = nil
|
64
68
|
@classes = nil
|
65
69
|
@feature_importances = nil
|
70
|
+
@rng = Random.new(@params[:random_seed])
|
66
71
|
end
|
67
72
|
|
68
73
|
# Fit the model with given training data.
|
@@ -71,6 +76,8 @@ module SVMKit
|
|
71
76
|
# @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
|
72
77
|
# @return [RandomForestClassifier] The learned classifier itself.
|
73
78
|
def fit(x, y)
|
79
|
+
SVMKit::Validation.check_sample_array(x)
|
80
|
+
SVMKit::Validation.check_label_array(y)
|
74
81
|
# Initialize some variables.
|
75
82
|
n_samples, n_features = x.shape
|
76
83
|
@params[:max_features] = n_features unless @params[:max_features].is_a?(Integer)
|
@@ -98,6 +105,7 @@ module SVMKit
|
|
98
105
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
|
99
106
|
# @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
|
100
107
|
def predict(x)
|
108
|
+
SVMKit::Validation.check_sample_array(x)
|
101
109
|
n_samples, = x.shape
|
102
110
|
n_classes = @classes.size
|
103
111
|
classes_arr = @classes.to_a
|
@@ -117,6 +125,7 @@ module SVMKit
|
|
117
125
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
|
118
126
|
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
|
119
127
|
def predict_proba(x)
|
128
|
+
SVMKit::Validation.check_sample_array(x)
|
120
129
|
n_samples, = x.shape
|
121
130
|
n_classes = @classes.size
|
122
131
|
classes_arr = @classes.to_a
|
@@ -136,6 +145,7 @@ module SVMKit
|
|
136
145
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
|
137
146
|
# @return [Numo::Int32] (shape: [n_samples, n_estimators]) Leaf index for sample.
|
138
147
|
def apply(x)
|
148
|
+
SVMKit::Validation.check_sample_array(x)
|
139
149
|
Numo::Int32[*Array.new(@params[:n_estimators]) { |n| @estimators[n].apply(x) }].transpose
|
140
150
|
end
|
141
151
|
|
@@ -19,6 +19,9 @@ module SVMKit
|
|
19
19
|
# @param y_pred [Numo::Int32] (shape: [n_samples]) Predicted labels.
|
20
20
|
# @return [Float] Mean accuracy
|
21
21
|
def score(y_true, y_pred)
|
22
|
+
SVMKit::Validation.check_label_array(y_true)
|
23
|
+
SVMKit::Validation.check_label_array(y_pred)
|
24
|
+
|
22
25
|
(y_true.to_a.map.with_index { |label, n| label == y_pred[n] ? 1 : 0 }).inject(:+) / y_true.size.to_f
|
23
26
|
end
|
24
27
|
end
|
@@ -23,6 +23,7 @@ module SVMKit
|
|
23
23
|
#
|
24
24
|
# @param average [String] The average type ('binary', 'micro', 'macro')
|
25
25
|
def initialize(average: 'binary')
|
26
|
+
SVMKit::Validation.check_params_string(average: average)
|
26
27
|
@average = average
|
27
28
|
end
|
28
29
|
|
@@ -32,6 +33,9 @@ module SVMKit
|
|
32
33
|
# @param y_pred [Numo::Int32] (shape: [n_samples]) Predicted labels.
|
33
34
|
# @return [Float] Average F1-score
|
34
35
|
def score(y_true, y_pred)
|
36
|
+
SVMKit::Validation.check_label_array(y_true)
|
37
|
+
SVMKit::Validation.check_label_array(y_pred)
|
38
|
+
|
35
39
|
case @average
|
36
40
|
when 'binary'
|
37
41
|
f_score_each_class(y_true, y_pred).last
|
@@ -23,6 +23,7 @@ module SVMKit
|
|
23
23
|
#
|
24
24
|
# @param average [String] The average type ('binary', 'micro', 'macro')
|
25
25
|
def initialize(average: 'binary')
|
26
|
+
SVMKit::Validation.check_params_string(average: average)
|
26
27
|
@average = average
|
27
28
|
end
|
28
29
|
|
@@ -32,6 +33,9 @@ module SVMKit
|
|
32
33
|
# @param y_pred [Numo::Int32] (shape: [n_samples]) Predicted labels.
|
33
34
|
# @return [Float] Average precision
|
34
35
|
def score(y_true, y_pred)
|
36
|
+
SVMKit::Validation.check_label_array(y_true)
|
37
|
+
SVMKit::Validation.check_label_array(y_pred)
|
38
|
+
|
35
39
|
case @average
|
36
40
|
when 'binary'
|
37
41
|
precision_each_class(y_true, y_pred).last
|
@@ -23,6 +23,7 @@ module SVMKit
|
|
23
23
|
#
|
24
24
|
# @param average [String] The average type ('binary', 'micro', 'macro')
|
25
25
|
def initialize(average: 'binary')
|
26
|
+
SVMKit::Validation.check_params_string(average: average)
|
26
27
|
@average = average
|
27
28
|
end
|
28
29
|
|
@@ -32,6 +33,9 @@ module SVMKit
|
|
32
33
|
# @param y_pred [Numo::Int32] (shape: [n_samples]) Predicted labels.
|
33
34
|
# @return [Float] Average recall
|
34
35
|
def score(y_true, y_pred)
|
36
|
+
SVMKit::Validation.check_label_array(y_true)
|
37
|
+
SVMKit::Validation.check_label_array(y_pred)
|
38
|
+
|
35
39
|
case @average
|
36
40
|
when 'binary'
|
37
41
|
recall_each_class(y_true, y_pred).last
|
@@ -37,14 +37,18 @@ module SVMKit
|
|
37
37
|
# @param n_components [Integer] The number of dimensions of the RBF kernel feature space.
|
38
38
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
39
39
|
def initialize(gamma: 1.0, n_components: 128, random_seed: nil)
|
40
|
+
SVMKit::Validation.check_params_float(gamma: gamma)
|
41
|
+
SVMKit::Validation.check_params_integer(n_components: n_components)
|
42
|
+
SVMKit::Validation.check_params_type_or_nil(Integer, random_seed: random_seed)
|
43
|
+
|
40
44
|
@params = {}
|
41
45
|
@params[:gamma] = gamma
|
42
46
|
@params[:n_components] = n_components
|
43
47
|
@params[:random_seed] = random_seed
|
44
48
|
@params[:random_seed] ||= srand
|
45
|
-
@rng = Random.new(@params[:random_seed])
|
46
49
|
@random_mat = nil
|
47
50
|
@random_vec = nil
|
51
|
+
@rng = Random.new(@params[:random_seed])
|
48
52
|
end
|
49
53
|
|
50
54
|
# Fit the model with given training data.
|
@@ -55,6 +59,8 @@ module SVMKit
|
|
55
59
|
# This method uses only the number of features of the data.
|
56
60
|
# @return [RBF] The learned transformer itself.
|
57
61
|
def fit(x, _y = nil)
|
62
|
+
SVMKit::Validation.check_sample_array(x)
|
63
|
+
|
58
64
|
n_features = x.shape[1]
|
59
65
|
@params[:n_components] = 2 * n_features if @params[:n_components] <= 0
|
60
66
|
@random_mat = rand_normal([n_features, @params[:n_components]]) * (2.0 * @params[:gamma])**0.5
|
@@ -72,6 +78,8 @@ module SVMKit
|
|
72
78
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
73
79
|
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data
|
74
80
|
def fit_transform(x, _y = nil)
|
81
|
+
SVMKit::Validation.check_sample_array(x)
|
82
|
+
|
75
83
|
fit(x).transform(x)
|
76
84
|
end
|
77
85
|
|
@@ -82,6 +90,8 @@ module SVMKit
|
|
82
90
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be transformed with the learned model.
|
83
91
|
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data.
|
84
92
|
def transform(x)
|
93
|
+
SVMKit::Validation.check_sample_array(x)
|
94
|
+
|
85
95
|
n_samples, = x.shape
|
86
96
|
projection = x.dot(@random_mat) + @random_vec.tile(n_samples, 1)
|
87
97
|
Numo::NMath.sin(projection) * ((2.0 / @params[:n_components])**0.5)
|
@@ -42,13 +42,17 @@ module SVMKit
|
|
42
42
|
# @param max_iter [Integer] The maximum number of iterations.
|
43
43
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
44
44
|
def initialize(reg_param: 1.0, max_iter: 1000, random_seed: nil)
|
45
|
+
SVMKit::Validation.check_params_float(reg_param: reg_param)
|
46
|
+
SVMKit::Validation.check_params_integer(max_iter: max_iter)
|
47
|
+
SVMKit::Validation.check_params_type_or_nil(Integer, random_seed: random_seed)
|
48
|
+
|
45
49
|
@params = {}
|
46
50
|
@params[:reg_param] = reg_param
|
47
51
|
@params[:max_iter] = max_iter
|
48
52
|
@params[:random_seed] = random_seed
|
49
53
|
@params[:random_seed] ||= srand
|
50
54
|
@weight_vec = nil
|
51
|
-
@classes
|
55
|
+
@classes = nil
|
52
56
|
@rng = Random.new(@params[:random_seed])
|
53
57
|
end
|
54
58
|
|
@@ -59,6 +63,9 @@ module SVMKit
|
|
59
63
|
# @param y [Numo::Int32] (shape: [n_training_samples]) The labels to be used for fitting the model.
|
60
64
|
# @return [KernelSVC] The learned classifier itself.
|
61
65
|
def fit(x, y)
|
66
|
+
SVMKit::Validation.check_sample_array(x)
|
67
|
+
SVMKit::Validation.check_label_array(y)
|
68
|
+
|
62
69
|
@classes = Numo::Int32[*y.to_a.uniq.sort]
|
63
70
|
n_classes = @classes.size
|
64
71
|
_n_samples, n_features = x.shape
|
@@ -84,6 +91,8 @@ module SVMKit
|
|
84
91
|
# The kernel matrix between testing samples and training samples to compute the scores.
|
85
92
|
# @return [Numo::DFloat] (shape: [n_testing_samples, n_classes]) Confidence score per sample.
|
86
93
|
def decision_function(x)
|
94
|
+
SVMKit::Validation.check_sample_array(x)
|
95
|
+
|
87
96
|
x.dot(@weight_vec.transpose)
|
88
97
|
end
|
89
98
|
|
@@ -93,6 +102,8 @@ module SVMKit
|
|
93
102
|
# The kernel matrix between testing samples and training samples to predict the labels.
|
94
103
|
# @return [Numo::Int32] (shape: [n_testing_samples]) Predicted class label per sample.
|
95
104
|
def predict(x)
|
105
|
+
SVMKit::Validation.check_sample_array(x)
|
106
|
+
|
96
107
|
return Numo::Int32.cast(decision_function(x).ge(0.0)) * 2 - 1 if @classes.size <= 2
|
97
108
|
|
98
109
|
n_samples, = x.shape
|
@@ -100,16 +111,6 @@ module SVMKit
|
|
100
111
|
Numo::Int32.asarray(Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] })
|
101
112
|
end
|
102
113
|
|
103
|
-
# Claculate the mean accuracy of the given testing data.
|
104
|
-
#
|
105
|
-
# @param x [Numo::DFloat] (shape: [n_testing_samples, n_training_samples])
|
106
|
-
# The kernel matrix between testing samples and training samples.
|
107
|
-
# @param y [Numo::Int32] (shape: [n_testing_samples]) True labels for testing data.
|
108
|
-
# @return [Float] Mean accuracy
|
109
|
-
def score(x, y)
|
110
|
-
super
|
111
|
-
end
|
112
|
-
|
113
114
|
# Dump marshal data.
|
114
115
|
# @return [Hash] The marshal data about KernelSVC.
|
115
116
|
def marshal_dump
|
@@ -50,6 +50,11 @@ module SVMKit
|
|
50
50
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
51
51
|
def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0,
|
52
52
|
max_iter: 100, batch_size: 50, normalize: true, random_seed: nil)
|
53
|
+
SVMKit::Validation.check_params_float(reg_param: reg_param, bias_scale: bias_scale)
|
54
|
+
SVMKit::Validation.check_params_integer(max_iter: max_iter, batch_size: batch_size)
|
55
|
+
SVMKit::Validation.check_params_boolean(fit_bias: fit_bias, normalize: normalize)
|
56
|
+
SVMKit::Validation.check_params_type_or_nil(Integer, random_seed: random_seed)
|
57
|
+
|
53
58
|
@params = {}
|
54
59
|
@params[:reg_param] = reg_param
|
55
60
|
@params[:fit_bias] = fit_bias
|
@@ -71,6 +76,9 @@ module SVMKit
|
|
71
76
|
# @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
|
72
77
|
# @return [LogisticRegression] The learned classifier itself.
|
73
78
|
def fit(x, y)
|
79
|
+
SVMKit::Validation.check_sample_array(x)
|
80
|
+
SVMKit::Validation.check_label_array(y)
|
81
|
+
|
74
82
|
@classes = Numo::Int32[*y.to_a.uniq.sort]
|
75
83
|
n_classes = @classes.size
|
76
84
|
_n_samples, n_features = x.shape
|
@@ -79,14 +87,14 @@ module SVMKit
|
|
79
87
|
@weight_vec = Numo::DFloat.zeros(n_classes, n_features)
|
80
88
|
@bias_term = Numo::DFloat.zeros(n_classes)
|
81
89
|
n_classes.times do |n|
|
82
|
-
bin_y = Numo::Int32.cast(y.eq(@classes[n]))
|
90
|
+
bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
|
83
91
|
weight, bias = binary_fit(x, bin_y)
|
84
92
|
@weight_vec[n, true] = weight
|
85
93
|
@bias_term[n] = bias
|
86
94
|
end
|
87
95
|
else
|
88
96
|
negative_label = y.to_a.uniq.sort.first
|
89
|
-
bin_y = Numo::Int32.cast(y.ne(negative_label))
|
97
|
+
bin_y = Numo::Int32.cast(y.ne(negative_label)) * 2 - 1
|
90
98
|
@weight_vec, @bias_term = binary_fit(x, bin_y)
|
91
99
|
end
|
92
100
|
|
@@ -98,6 +106,8 @@ module SVMKit
|
|
98
106
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
|
99
107
|
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence score per sample.
|
100
108
|
def decision_function(x)
|
109
|
+
SVMKit::Validation.check_sample_array(x)
|
110
|
+
|
101
111
|
x.dot(@weight_vec.transpose) + @bias_term
|
102
112
|
end
|
103
113
|
|
@@ -106,10 +116,12 @@ module SVMKit
|
|
106
116
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
|
107
117
|
# @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
|
108
118
|
def predict(x)
|
109
|
-
|
119
|
+
SVMKit::Validation.check_sample_array(x)
|
120
|
+
|
121
|
+
return Numo::Int32.cast(predict_proba(x)[true, 1].ge(0.5)) * 2 - 1 if @classes.size <= 2
|
110
122
|
|
111
123
|
n_samples, = x.shape
|
112
|
-
decision_values =
|
124
|
+
decision_values = predict_proba(x)
|
113
125
|
Numo::Int32.asarray(Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] })
|
114
126
|
end
|
115
127
|
|
@@ -118,6 +130,8 @@ module SVMKit
|
|
118
130
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
|
119
131
|
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
|
120
132
|
def predict_proba(x)
|
133
|
+
SVMKit::Validation.check_sample_array(x)
|
134
|
+
|
121
135
|
proba = 1.0 / (Numo::NMath.exp(-decision_function(x)) + 1.0)
|
122
136
|
return (proba.transpose / proba.sum(axis: 1)).transpose if @classes.size > 2
|
123
137
|
|
@@ -165,9 +179,9 @@ module SVMKit
|
|
165
179
|
rand_ids.concat(subset_ids)
|
166
180
|
# update the weight vector.
|
167
181
|
df = samples[subset_ids, true].dot(weight_vec.transpose)
|
168
|
-
coef = bin_y[subset_ids] / (Numo::NMath.exp(-bin_y[subset_ids] * df) + 1.0)
|
182
|
+
coef = bin_y[subset_ids] / (Numo::NMath.exp(-bin_y[subset_ids] * df) + 1.0) - bin_y[subset_ids]
|
169
183
|
mean_vec = samples[subset_ids, true].transpose.dot(coef) / @params[:batch_size]
|
170
|
-
weight_vec -= learning_rate(t) * (@params[:reg_param] * weight_vec
|
184
|
+
weight_vec -= learning_rate(t) * (@params[:reg_param] * weight_vec + mean_vec)
|
171
185
|
# scale the weight vector.
|
172
186
|
normalize_weight_vec(weight_vec) if @params[:normalize]
|
173
187
|
end
|
@@ -49,6 +49,11 @@ module SVMKit
|
|
49
49
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
50
50
|
def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0,
|
51
51
|
max_iter: 100, batch_size: 50, normalize: true, random_seed: nil)
|
52
|
+
SVMKit::Validation.check_params_float(reg_param: reg_param, bias_scale: bias_scale)
|
53
|
+
SVMKit::Validation.check_params_integer(max_iter: max_iter, batch_size: batch_size)
|
54
|
+
SVMKit::Validation.check_params_boolean(fit_bias: fit_bias, normalize: normalize)
|
55
|
+
SVMKit::Validation.check_params_type_or_nil(Integer, random_seed: random_seed)
|
56
|
+
|
52
57
|
@params = {}
|
53
58
|
@params[:reg_param] = reg_param
|
54
59
|
@params[:fit_bias] = fit_bias
|
@@ -70,6 +75,9 @@ module SVMKit
|
|
70
75
|
# @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
|
71
76
|
# @return [SVC] The learned classifier itself.
|
72
77
|
def fit(x, y)
|
78
|
+
SVMKit::Validation.check_sample_array(x)
|
79
|
+
SVMKit::Validation.check_label_array(y)
|
80
|
+
|
73
81
|
@classes = Numo::Int32[*y.to_a.uniq.sort]
|
74
82
|
n_classes = @classes.size
|
75
83
|
_n_samples, n_features = x.shape
|
@@ -97,6 +105,8 @@ module SVMKit
|
|
97
105
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
|
98
106
|
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence score per sample.
|
99
107
|
def decision_function(x)
|
108
|
+
SVMKit::Validation.check_sample_array(x)
|
109
|
+
|
100
110
|
x.dot(@weight_vec.transpose) + @bias_term
|
101
111
|
end
|
102
112
|
|
@@ -105,6 +115,8 @@ module SVMKit
|
|
105
115
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
|
106
116
|
# @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
|
107
117
|
def predict(x)
|
118
|
+
SVMKit::Validation.check_sample_array(x)
|
119
|
+
|
108
120
|
return Numo::Int32.cast(decision_function(x).ge(0.0)) * 2 - 1 if @classes.size <= 2
|
109
121
|
|
110
122
|
n_samples, = x.shape
|
@@ -38,6 +38,10 @@ module SVMKit
|
|
38
38
|
# @param evaluator [Evaluator] The evaluator that calculates score of estimator results.
|
39
39
|
# @param return_train_score [Boolean] The flag indicating whether to calculate the score of training dataset.
|
40
40
|
def initialize(estimator: nil, splitter: nil, evaluator: nil, return_train_score: false)
|
41
|
+
SVMKit::Validation.check_params_type(SVMKit::Base::BaseEstimator, estimator: estimator)
|
42
|
+
SVMKit::Validation.check_params_type(SVMKit::Base::Splitter, splitter: splitter)
|
43
|
+
SVMKit::Validation.check_params_type_or_nil(SVMKit::Base::Evaluator, evaluator: evaluator)
|
44
|
+
SVMKit::Validation.check_params_boolean(return_train_score: return_train_score)
|
41
45
|
@estimator = estimator
|
42
46
|
@splitter = splitter
|
43
47
|
@evaluator = evaluator
|
@@ -56,6 +60,8 @@ module SVMKit
|
|
56
60
|
# * :train_score (Array<Float>) The scores of training dataset for each split. This option is nil if
|
57
61
|
# the return_train_score is false.
|
58
62
|
def perform(x, y)
|
63
|
+
SVMKit::Validation.check_sample_array(x)
|
64
|
+
SVMKit::Validation.check_label_array(y)
|
59
65
|
# Initialize the report of cross validation.
|
60
66
|
report = { test_score: [], train_score: nil, fit_time: [] }
|
61
67
|
report[:train_score] = [] if @return_train_score
|
@@ -32,6 +32,10 @@ module SVMKit
|
|
32
32
|
# @param shuffle [Boolean] The flag indicating whether to shuffle the dataset.
|
33
33
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
34
34
|
def initialize(n_splits: 3, shuffle: false, random_seed: nil)
|
35
|
+
SVMKit::Validation.check_params_integer(n_splits: n_splits)
|
36
|
+
SVMKit::Validation.check_params_boolean(shuffle: shuffle)
|
37
|
+
SVMKit::Validation.check_params_type_or_nil(Integer, random_seed: random_seed)
|
38
|
+
|
35
39
|
@n_splits = n_splits
|
36
40
|
@shuffle = shuffle
|
37
41
|
@random_seed = random_seed
|
@@ -43,11 +47,9 @@ module SVMKit
|
|
43
47
|
#
|
44
48
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features])
|
45
49
|
# The dataset to be used to generate data indices for K-fold cross validation.
|
46
|
-
# @param y [Numo::Int32] (shape: [n_samples])
|
47
|
-
# The labels to be used to generate data indices for stratified K-fold cross validation.
|
48
|
-
# This argument exists to unify the interface between the K-fold methods, it is not used in the method.
|
49
50
|
# @return [Array] The set of data indices for constructing the training and testing dataset in each fold.
|
50
|
-
def split(x,
|
51
|
+
def split(x, _y = nil)
|
52
|
+
SVMKit::Validation.check_sample_array(x)
|
51
53
|
# Initialize and check some variables.
|
52
54
|
n_samples, = x.shape
|
53
55
|
unless @n_splits.between?(2, n_samples)
|
@@ -32,6 +32,10 @@ module SVMKit
|
|
32
32
|
# @param shuffle [Boolean] The flag indicating whether to shuffle the dataset.
|
33
33
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
34
34
|
def initialize(n_splits: 3, shuffle: false, random_seed: nil)
|
35
|
+
SVMKit::Validation.check_params_integer(n_splits: n_splits)
|
36
|
+
SVMKit::Validation.check_params_boolean(shuffle: shuffle)
|
37
|
+
SVMKit::Validation.check_params_type_or_nil(Integer, random_seed: random_seed)
|
38
|
+
|
35
39
|
@n_splits = n_splits
|
36
40
|
@shuffle = shuffle
|
37
41
|
@random_seed = random_seed
|
@@ -48,6 +52,8 @@ module SVMKit
|
|
48
52
|
# The labels to be used to generate data indices for stratified K-fold cross validation.
|
49
53
|
# @return [Array] The set of data indices for constructing the training and testing dataset in each fold.
|
50
54
|
def split(x, y) # rubocop:disable Lint/UnusedMethodArgument
|
55
|
+
SVMKit::Validation.check_sample_array(x)
|
56
|
+
SVMKit::Validation.check_label_array(y)
|
51
57
|
# Check the number of samples in each class.
|
52
58
|
unless valid_n_splits?(y)
|
53
59
|
raise ArgumentError,
|
@@ -33,6 +33,7 @@ module SVMKit
|
|
33
33
|
#
|
34
34
|
# @param estimator [Classifier] The (binary) classifier for construction a multi-class classifier.
|
35
35
|
def initialize(estimator: nil)
|
36
|
+
SVMKit::Validation.check_params_type(SVMKit::Base::BaseEstimator, estimator: estimator)
|
36
37
|
@params = {}
|
37
38
|
@params[:estimator] = estimator
|
38
39
|
@estimators = nil
|
@@ -45,6 +46,8 @@ module SVMKit
|
|
45
46
|
# @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
|
46
47
|
# @return [OneVsRestClassifier] The learned classifier itself.
|
47
48
|
def fit(x, y)
|
49
|
+
SVMKit::Validation.check_sample_array(x)
|
50
|
+
SVMKit::Validation.check_label_array(y)
|
48
51
|
y_arr = y.to_a
|
49
52
|
@classes = Numo::Int32.asarray(y_arr.uniq.sort)
|
50
53
|
@estimators = @classes.to_a.map do |label|
|
@@ -59,6 +62,7 @@ module SVMKit
|
|
59
62
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
|
60
63
|
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
|
61
64
|
def decision_function(x)
|
65
|
+
SVMKit::Validation.check_sample_array(x)
|
62
66
|
n_classes = @classes.size
|
63
67
|
Numo::DFloat.asarray(Array.new(n_classes) { |m| @estimators[m].decision_function(x).to_a }).transpose
|
64
68
|
end
|
@@ -68,6 +72,7 @@ module SVMKit
|
|
68
72
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
|
69
73
|
# @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
|
70
74
|
def predict(x)
|
75
|
+
SVMKit::Validation.check_sample_array(x)
|
71
76
|
n_samples, = x.shape
|
72
77
|
decision_values = decision_function(x)
|
73
78
|
Numo::Int32.asarray(Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] })
|
@@ -16,6 +16,7 @@ module SVMKit
|
|
16
16
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
|
17
17
|
# @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
|
18
18
|
def predict(x)
|
19
|
+
SVMKit::Validation.check_sample_array(x)
|
19
20
|
n_samples = x.shape.first
|
20
21
|
decision_values = decision_function(x)
|
21
22
|
Numo::Int32.asarray(Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] })
|
@@ -26,6 +27,7 @@ module SVMKit
|
|
26
27
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the log-probailities.
|
27
28
|
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted log-probability of each class per sample.
|
28
29
|
def predict_log_proba(x)
|
30
|
+
SVMKit::Validation.check_sample_array(x)
|
29
31
|
n_samples, = x.shape
|
30
32
|
log_likelihoods = decision_function(x)
|
31
33
|
log_likelihoods - Numo::NMath.log(Numo::NMath.exp(log_likelihoods).sum(1)).reshape(n_samples, 1)
|
@@ -36,6 +38,7 @@ module SVMKit
|
|
36
38
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
|
37
39
|
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
|
38
40
|
def predict_proba(x)
|
41
|
+
SVMKit::Validation.check_sample_array(x)
|
39
42
|
Numo::NMath.exp(predict_log_proba(x)).abs
|
40
43
|
end
|
41
44
|
end
|
@@ -75,6 +78,8 @@ module SVMKit
|
|
75
78
|
# to be used for fitting the model.
|
76
79
|
# @return [GaussianNB] The learned classifier itself.
|
77
80
|
def fit(x, y)
|
81
|
+
SVMKit::Validation.check_sample_array(x)
|
82
|
+
SVMKit::Validation.check_label_array(y)
|
78
83
|
n_samples, = x.shape
|
79
84
|
@classes = Numo::Int32[*y.to_a.uniq.sort]
|
80
85
|
@class_priors = Numo::DFloat[*@classes.to_a.map { |l| y.eq(l).count / n_samples.to_f }]
|
@@ -88,6 +93,7 @@ module SVMKit
|
|
88
93
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
|
89
94
|
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
|
90
95
|
def decision_function(x)
|
96
|
+
SVMKit::Validation.check_sample_array(x)
|
91
97
|
n_classes = @classes.size
|
92
98
|
log_likelihoods = Array.new(n_classes) do |l|
|
93
99
|
Math.log(@class_priors[l]) - 0.5 * (
|
@@ -147,6 +153,7 @@ module SVMKit
|
|
147
153
|
#
|
148
154
|
# @param smoothing_param [Float] The Laplace smoothing parameter.
|
149
155
|
def initialize(smoothing_param: 1.0)
|
156
|
+
SVMKit::Validation.check_params_float(smoothing_param: smoothing_param)
|
150
157
|
@params = {}
|
151
158
|
@params[:smoothing_param] = smoothing_param
|
152
159
|
end
|
@@ -158,6 +165,8 @@ module SVMKit
|
|
158
165
|
# to be used for fitting the model.
|
159
166
|
# @return [MultinomialNB] The learned classifier itself.
|
160
167
|
def fit(x, y)
|
168
|
+
SVMKit::Validation.check_sample_array(x)
|
169
|
+
SVMKit::Validation.check_label_array(y)
|
161
170
|
n_samples, = x.shape
|
162
171
|
@classes = Numo::Int32[*y.to_a.uniq.sort]
|
163
172
|
@class_priors = Numo::DFloat[*@classes.to_a.map { |l| y.eq(l).count / n_samples.to_f }]
|
@@ -173,6 +182,7 @@ module SVMKit
|
|
173
182
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
|
174
183
|
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
|
175
184
|
def decision_function(x)
|
185
|
+
SVMKit::Validation.check_sample_array(x)
|
176
186
|
n_classes = @classes.size
|
177
187
|
bin_x = x.gt(0)
|
178
188
|
log_likelihoods = Array.new(n_classes) do |l|
|
@@ -230,6 +240,7 @@ module SVMKit
|
|
230
240
|
# @param smoothing_param [Float] The Laplace smoothing parameter.
|
231
241
|
# @param bin_threshold [Float] The threshold for binarizing of features.
|
232
242
|
def initialize(smoothing_param: 1.0, bin_threshold: 0.0)
|
243
|
+
SVMKit::Validation.check_params_float(smoothing_param: smoothing_param, bin_threshold: bin_threshold)
|
233
244
|
@params = {}
|
234
245
|
@params[:smoothing_param] = smoothing_param
|
235
246
|
@params[:bin_threshold] = bin_threshold
|
@@ -242,6 +253,8 @@ module SVMKit
|
|
242
253
|
# to be used for fitting the model.
|
243
254
|
# @return [BernoulliNB] The learned classifier itself.
|
244
255
|
def fit(x, y)
|
256
|
+
SVMKit::Validation.check_sample_array(x)
|
257
|
+
SVMKit::Validation.check_label_array(y)
|
245
258
|
n_samples, = x.shape
|
246
259
|
bin_x = Numo::DFloat[*x.gt(@params[:bin_threshold])]
|
247
260
|
@classes = Numo::Int32[*y.to_a.uniq.sort]
|
@@ -260,6 +273,7 @@ module SVMKit
|
|
260
273
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
|
261
274
|
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
|
262
275
|
def decision_function(x)
|
276
|
+
SVMKit::Validation.check_sample_array(x)
|
263
277
|
n_classes = @classes.size
|
264
278
|
bin_x = Numo::DFloat[*x.gt(@params[:bin_threshold])]
|
265
279
|
not_bin_x = Numo::DFloat[*x.le(@params[:bin_threshold])]
|
@@ -35,6 +35,7 @@ module SVMKit
|
|
35
35
|
#
|
36
36
|
# @param n_neighbors [Integer] The number of neighbors.
|
37
37
|
def initialize(n_neighbors: 5)
|
38
|
+
SVMKit::Validation.check_params_integer(n_neighbors: n_neighbors)
|
38
39
|
@params = {}
|
39
40
|
@params[:n_neighbors] = n_neighbors
|
40
41
|
@prototypes = nil
|
@@ -48,6 +49,8 @@ module SVMKit
|
|
48
49
|
# @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
|
49
50
|
# @return [KNeighborsClassifier] The learned classifier itself.
|
50
51
|
def fit(x, y)
|
52
|
+
SVMKit::Validation.check_sample_array(x)
|
53
|
+
SVMKit::Validation.check_label_array(y)
|
51
54
|
@prototypes = Numo::DFloat.asarray(x.to_a)
|
52
55
|
@labels = Numo::Int32.asarray(y.to_a)
|
53
56
|
@classes = Numo::Int32.asarray(y.to_a.uniq.sort)
|
@@ -59,6 +62,7 @@ module SVMKit
|
|
59
62
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
|
60
63
|
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
|
61
64
|
def decision_function(x)
|
65
|
+
SVMKit::Validation.check_sample_array(x)
|
62
66
|
distance_matrix = PairwiseMetric.euclidean_distance(x, @prototypes)
|
63
67
|
n_samples, n_prototypes = distance_matrix.shape
|
64
68
|
n_classes = @classes.size
|
@@ -76,6 +80,7 @@ module SVMKit
|
|
76
80
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
|
77
81
|
# @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
|
78
82
|
def predict(x)
|
83
|
+
SVMKit::Validation.check_sample_array(x)
|
79
84
|
n_samples = x.shape.first
|
80
85
|
decision_values = decision_function(x)
|
81
86
|
Numo::Int32.asarray(Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] })
|
@@ -11,6 +11,8 @@ module SVMKit
|
|
11
11
|
# @return [Numo::DFloat] (shape: [n_samples_x, n_samples_x] or [n_samples_x, n_samples_y] if y is given)
|
12
12
|
def euclidean_distance(x, y = nil)
|
13
13
|
y = x if y.nil?
|
14
|
+
SVMKit::Validation.check_sample_array(x)
|
15
|
+
SVMKit::Validation.check_sample_array(y)
|
14
16
|
sum_x_vec = (x**2).sum(1)
|
15
17
|
sum_y_vec = (y**2).sum(1)
|
16
18
|
dot_xy_mat = x.dot(y.transpose)
|
@@ -29,6 +31,9 @@ module SVMKit
|
|
29
31
|
def rbf_kernel(x, y = nil, gamma = nil)
|
30
32
|
y = x if y.nil?
|
31
33
|
gamma ||= 1.0 / x.shape[1]
|
34
|
+
SVMKit::Validation.check_sample_array(x)
|
35
|
+
SVMKit::Validation.check_sample_array(y)
|
36
|
+
SVMKit::Validation.check_params_float(gamma: gamma)
|
32
37
|
distance_matrix = euclidean_distance(x, y)
|
33
38
|
Numo::NMath.exp((distance_matrix**2) * -gamma)
|
34
39
|
end
|
@@ -40,6 +45,8 @@ module SVMKit
|
|
40
45
|
# @return [Numo::DFloat] (shape: [n_samples_x, n_samples_x] or [n_samples_x, n_samples_y] if y is given)
|
41
46
|
def linear_kernel(x, y = nil)
|
42
47
|
y = x if y.nil?
|
48
|
+
SVMKit::Validation.check_sample_array(x)
|
49
|
+
SVMKit::Validation.check_sample_array(y)
|
43
50
|
x.dot(y.transpose)
|
44
51
|
end
|
45
52
|
|
@@ -54,6 +61,10 @@ module SVMKit
|
|
54
61
|
def polynomial_kernel(x, y = nil, degree = 3, gamma = nil, coef = 1)
|
55
62
|
y = x if y.nil?
|
56
63
|
gamma ||= 1.0 / x.shape[1]
|
64
|
+
SVMKit::Validation.check_sample_array(x)
|
65
|
+
SVMKit::Validation.check_sample_array(y)
|
66
|
+
SVMKit::Validation.check_params_float(gamma: gamma)
|
67
|
+
SVMKit::Validation.check_params_integer(degree: degree, coef: coef)
|
57
68
|
(x.dot(y.transpose) * gamma + coef)**degree
|
58
69
|
end
|
59
70
|
|
@@ -67,6 +78,10 @@ module SVMKit
|
|
67
78
|
def sigmoid_kernel(x, y = nil, gamma = nil, coef = 1)
|
68
79
|
y = x if y.nil?
|
69
80
|
gamma ||= 1.0 / x.shape[1]
|
81
|
+
SVMKit::Validation.check_sample_array(x)
|
82
|
+
SVMKit::Validation.check_sample_array(y)
|
83
|
+
SVMKit::Validation.check_params_float(gamma: gamma)
|
84
|
+
SVMKit::Validation.check_params_integer(coef: coef)
|
70
85
|
Numo::NMath.tanh(x.dot(y.transpose) * gamma + coef)
|
71
86
|
end
|
72
87
|
end
|
@@ -58,6 +58,12 @@ module SVMKit
|
|
58
58
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
59
59
|
def initialize(n_factors: 2, loss: 'hinge', reg_param_bias: 1.0, reg_param_weight: 1.0, reg_param_factor: 1.0,
|
60
60
|
init_std: 0.1, max_iter: 1000, batch_size: 10, random_seed: nil)
|
61
|
+
SVMKit::Validation.check_params_float(reg_param_bias: reg_param_bias, reg_param_weight: reg_param_weight,
|
62
|
+
reg_param_factor: reg_param_factor, init_std: init_std)
|
63
|
+
SVMKit::Validation.check_params_integer(n_factors: n_factors, max_iter: max_iter, batch_size: batch_size)
|
64
|
+
SVMKit::Validation.check_params_string(loss: loss)
|
65
|
+
SVMKit::Validation.check_params_type_or_nil(Integer, random_seed: random_seed)
|
66
|
+
|
61
67
|
@params = {}
|
62
68
|
@params[:n_factors] = n_factors
|
63
69
|
@params[:loss] = loss
|
@@ -82,6 +88,9 @@ module SVMKit
|
|
82
88
|
# @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
|
83
89
|
# @return [FactorizationMachineClassifier] The learned classifier itself.
|
84
90
|
def fit(x, y)
|
91
|
+
SVMKit::Validation.check_sample_array(x)
|
92
|
+
SVMKit::Validation.check_label_array(y)
|
93
|
+
|
85
94
|
@classes = Numo::Int32[*y.to_a.uniq.sort]
|
86
95
|
n_classes = @classes.size
|
87
96
|
_n_samples, n_features = x.shape
|
@@ -111,6 +120,7 @@ module SVMKit
|
|
111
120
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
|
112
121
|
# @return [Numo::DFloat] (shape: [n_samples]) Confidence score per sample.
|
113
122
|
def decision_function(x)
|
123
|
+
SVMKit::Validation.check_sample_array(x)
|
114
124
|
linear_term = @bias_term + x.dot(@weight_vec.transpose)
|
115
125
|
factor_term = if @classes.size <= 2
|
116
126
|
0.5 * (@factor_mat.dot(x.transpose)**2 - (@factor_mat**2).dot(x.transpose**2)).sum
|
@@ -125,6 +135,7 @@ module SVMKit
|
|
125
135
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
|
126
136
|
# @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
|
127
137
|
def predict(x)
|
138
|
+
SVMKit::Validation.check_sample_array(x)
|
128
139
|
return Numo::Int32.cast(decision_function(x).ge(0.0)) * 2 - 1 if @classes.size <= 2
|
129
140
|
|
130
141
|
n_samples, = x.shape
|
@@ -137,6 +148,7 @@ module SVMKit
|
|
137
148
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
|
138
149
|
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
|
139
150
|
def predict_proba(x)
|
151
|
+
SVMKit::Validation.check_sample_array(x)
|
140
152
|
proba = 1.0 / (Numo::NMath.exp(-decision_function(x)) + 1.0)
|
141
153
|
return (proba.transpose / proba.sum(axis: 1)).transpose if @classes.size > 2
|
142
154
|
|
@@ -32,6 +32,7 @@ module SVMKit
|
|
32
32
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate L2-norms.
|
33
33
|
# @return [L2Normalizer]
|
34
34
|
def fit(x, _y = nil)
|
35
|
+
SVMKit::Validation.check_sample_array(x)
|
35
36
|
@norm_vec = Numo::NMath.sqrt((x**2).sum(1))
|
36
37
|
self
|
37
38
|
end
|
@@ -43,6 +44,7 @@ module SVMKit
|
|
43
44
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate L2-norms.
|
44
45
|
# @return [Numo::DFloat] The normalized samples.
|
45
46
|
def fit_transform(x, _y = nil)
|
47
|
+
SVMKit::Validation.check_sample_array(x)
|
46
48
|
fit(x)
|
47
49
|
x / @norm_vec.tile(x.shape[1], 1).transpose
|
48
50
|
end
|
@@ -28,6 +28,7 @@ module SVMKit
|
|
28
28
|
#
|
29
29
|
# @param feature_range [Array<Float>] The desired range of samples.
|
30
30
|
def initialize(feature_range: [0.0, 1.0])
|
31
|
+
SVMKit::Validation.check_params_type(Array, feature_range: feature_range)
|
31
32
|
@params = {}
|
32
33
|
@params[:feature_range] = feature_range
|
33
34
|
@min_vec = nil
|
@@ -41,6 +42,7 @@ module SVMKit
|
|
41
42
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate the minimum and maximum values.
|
42
43
|
# @return [MinMaxScaler]
|
43
44
|
def fit(x, _y = nil)
|
45
|
+
SVMKit::Validation.check_sample_array(x)
|
44
46
|
@min_vec = x.min(0)
|
45
47
|
@max_vec = x.max(0)
|
46
48
|
self
|
@@ -53,6 +55,7 @@ module SVMKit
|
|
53
55
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate the minimum and maximum values.
|
54
56
|
# @return [Numo::DFloat] The scaled samples.
|
55
57
|
def fit_transform(x, _y = nil)
|
58
|
+
SVMKit::Validation.check_sample_array(x)
|
56
59
|
fit(x).transform(x)
|
57
60
|
end
|
58
61
|
|
@@ -61,6 +64,7 @@ module SVMKit
|
|
61
64
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be scaled.
|
62
65
|
# @return [Numo::DFloat] The scaled samples.
|
63
66
|
def transform(x)
|
67
|
+
SVMKit::Validation.check_sample_array(x)
|
64
68
|
n_samples, = x.shape
|
65
69
|
dif_vec = @max_vec - @min_vec
|
66
70
|
nx = (x - @min_vec.tile(n_samples, 1)) / dif_vec.tile(n_samples, 1)
|
@@ -39,6 +39,7 @@ module SVMKit
|
|
39
39
|
# The samples to calculate the mean values and standard deviations.
|
40
40
|
# @return [StandardScaler]
|
41
41
|
def fit(x, _y = nil)
|
42
|
+
SVMKit::Validation.check_sample_array(x)
|
42
43
|
@mean_vec = x.mean(0)
|
43
44
|
@std_vec = x.stddev(0)
|
44
45
|
self
|
@@ -52,6 +53,7 @@ module SVMKit
|
|
52
53
|
# The samples to calculate the mean values and standard deviations.
|
53
54
|
# @return [Numo::DFloat] The scaled samples.
|
54
55
|
def fit_transform(x, _y = nil)
|
56
|
+
SVMKit::Validation.check_sample_array(x)
|
55
57
|
fit(x).transform(x)
|
56
58
|
end
|
57
59
|
|
@@ -60,6 +62,7 @@ module SVMKit
|
|
60
62
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be scaled.
|
61
63
|
# @return [Numo::DFloat] The scaled samples.
|
62
64
|
def transform(x)
|
65
|
+
SVMKit::Validation.check_sample_array(x)
|
63
66
|
n_samples, = x.shape
|
64
67
|
(x - @mean_vec.tile(n_samples, 1)) / @std_vec.tile(n_samples, 1)
|
65
68
|
end
|
@@ -54,6 +54,11 @@ module SVMKit
|
|
54
54
|
# It is used to randomly determine the order of features when deciding spliting point.
|
55
55
|
def initialize(criterion: 'gini', max_depth: nil, max_leaf_nodes: nil, min_samples_leaf: 1, max_features: nil,
|
56
56
|
random_seed: nil)
|
57
|
+
SVMKit::Validation.check_params_type_or_nil(Integer, max_depth: max_depth, max_leaf_nodes: max_leaf_nodes,
|
58
|
+
max_features: max_features, random_seed: random_seed)
|
59
|
+
SVMKit::Validation.check_params_integer(min_samples_leaf: min_samples_leaf)
|
60
|
+
SVMKit::Validation.check_params_string(criterion: criterion)
|
61
|
+
|
57
62
|
@params = {}
|
58
63
|
@params[:criterion] = criterion
|
59
64
|
@params[:max_depth] = max_depth
|
@@ -62,12 +67,12 @@ module SVMKit
|
|
62
67
|
@params[:max_features] = max_features
|
63
68
|
@params[:random_seed] = random_seed
|
64
69
|
@params[:random_seed] ||= srand
|
65
|
-
@rng = Random.new(@params[:random_seed])
|
66
70
|
@tree = nil
|
67
71
|
@classes = nil
|
68
72
|
@feature_importances = nil
|
69
73
|
@n_leaves = nil
|
70
74
|
@leaf_labels = nil
|
75
|
+
@rng = Random.new(@params[:random_seed])
|
71
76
|
end
|
72
77
|
|
73
78
|
# Fit the model with given training data.
|
@@ -76,6 +81,8 @@ module SVMKit
|
|
76
81
|
# @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
|
77
82
|
# @return [DecisionTreeClassifier] The learned classifier itself.
|
78
83
|
def fit(x, y)
|
84
|
+
SVMKit::Validation.check_sample_array(x)
|
85
|
+
SVMKit::Validation.check_label_array(y)
|
79
86
|
n_samples, n_features = x.shape
|
80
87
|
@params[:max_features] = n_features unless @params[:max_features].is_a?(Integer)
|
81
88
|
@params[:max_features] = [[1, @params[:max_features]].max, n_features].min
|
@@ -90,6 +97,7 @@ module SVMKit
|
|
90
97
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
|
91
98
|
# @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
|
92
99
|
def predict(x)
|
100
|
+
SVMKit::Validation.check_sample_array(x)
|
93
101
|
@leaf_labels[apply(x)]
|
94
102
|
end
|
95
103
|
|
@@ -98,6 +106,7 @@ module SVMKit
|
|
98
106
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
|
99
107
|
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
|
100
108
|
def predict_proba(x)
|
109
|
+
SVMKit::Validation.check_sample_array(x)
|
101
110
|
probs = Numo::DFloat[*(Array.new(x.shape[0]) { |n| predict_at_node(@tree, x[n, true]) })]
|
102
111
|
probs[true, @classes]
|
103
112
|
end
|
@@ -107,14 +116,18 @@ module SVMKit
|
|
107
116
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
|
108
117
|
# @return [Numo::Int32] (shape: [n_samples]) Leaf index for sample.
|
109
118
|
def apply(x)
|
119
|
+
SVMKit::Validation.check_sample_array(x)
|
110
120
|
Numo::Int32[*(Array.new(x.shape[0]) { |n| apply_at_node(@tree, x[n, true]) })]
|
111
121
|
end
|
112
122
|
|
113
123
|
# Dump marshal data.
|
114
124
|
# @return [Hash] The marshal data about DecisionTreeClassifier
|
115
125
|
def marshal_dump
|
116
|
-
{ params: @params,
|
117
|
-
|
126
|
+
{ params: @params,
|
127
|
+
classes: @classes,
|
128
|
+
tree: @tree,
|
129
|
+
feature_importances: @feature_importances,
|
130
|
+
leaf_labels: @leaf_labels,
|
118
131
|
rng: @rng }
|
119
132
|
end
|
120
133
|
|
@@ -0,0 +1,55 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module SVMKit
|
4
|
+
# @!visibility private
|
5
|
+
module Validation
|
6
|
+
module_function
|
7
|
+
|
8
|
+
# @!visibility private
|
9
|
+
def check_sample_array(x)
|
10
|
+
raise TypeError, 'Expect class of sample matrix to be Numo::DFloat' unless x.is_a?(Numo::DFloat)
|
11
|
+
raise ArgumentError, 'Expect sample matrix to be 2-D array' unless x.shape.size == 2
|
12
|
+
nil
|
13
|
+
end
|
14
|
+
|
15
|
+
# @!visibility private
|
16
|
+
def check_label_array(y)
|
17
|
+
raise TypeError, 'Expect class of label vector to be Numo::Int32' unless y.is_a?(Numo::Int32)
|
18
|
+
raise ArgumentError, 'Expect label vector to be 1-D arrray' unless y.shape.size == 1
|
19
|
+
nil
|
20
|
+
end
|
21
|
+
|
22
|
+
# @!visibility private
|
23
|
+
def check_params_type(type, params = {})
|
24
|
+
params.each { |k, v| raise TypeError, "Expect class of #{k} to be #{type}" unless v.is_a?(type) }
|
25
|
+
nil
|
26
|
+
end
|
27
|
+
|
28
|
+
# @!visibility private
|
29
|
+
def check_params_type_or_nil(type, params = {})
|
30
|
+
params.each { |k, v| raise TypeError, "Expect class of #{k} to be #{type} or nil" unless v.is_a?(type) || v.is_a?(NilClass) }
|
31
|
+
nil
|
32
|
+
end
|
33
|
+
|
34
|
+
# @!visibility private
|
35
|
+
def check_params_float(params = {})
|
36
|
+
check_params_type(Float, params)
|
37
|
+
end
|
38
|
+
|
39
|
+
# @!visibility private
|
40
|
+
def check_params_integer(params = {})
|
41
|
+
check_params_type(Integer, params)
|
42
|
+
end
|
43
|
+
|
44
|
+
# @!visibility private
|
45
|
+
def check_params_string(params = {})
|
46
|
+
check_params_type(String, params)
|
47
|
+
end
|
48
|
+
|
49
|
+
# @!visibility private
|
50
|
+
def check_params_boolean(params = {})
|
51
|
+
params.each { |k, v| raise TypeError, "Expect class of #{k} to be Boolean" unless v.is_a?(FalseClass) || v.is_a?(TrueClass) }
|
52
|
+
nil
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
data/lib/svmkit/version.rb
CHANGED
data/lib/svmkit.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: svmkit
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-04-
|
11
|
+
date: 2018-04-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: numo-narray
|
@@ -135,6 +135,7 @@ files:
|
|
135
135
|
- lib/svmkit/preprocessing/min_max_scaler.rb
|
136
136
|
- lib/svmkit/preprocessing/standard_scaler.rb
|
137
137
|
- lib/svmkit/tree/decision_tree_classifier.rb
|
138
|
+
- lib/svmkit/validation.rb
|
138
139
|
- lib/svmkit/version.rb
|
139
140
|
- svmkit.gemspec
|
140
141
|
homepage: https://github.com/yoshoku/svmkit
|