svmkit 0.2.7 → 0.2.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/HISTORY.md +5 -0
- data/lib/svmkit/base/base_estimator.rb +1 -1
- data/lib/svmkit/base/classifier.rb +2 -0
- data/lib/svmkit/ensemble/random_forest_classifier.rb +11 -1
- data/lib/svmkit/evaluation_measure/accuracy.rb +3 -0
- data/lib/svmkit/evaluation_measure/f_score.rb +4 -0
- data/lib/svmkit/evaluation_measure/precision.rb +4 -0
- data/lib/svmkit/evaluation_measure/recall.rb +4 -0
- data/lib/svmkit/kernel_approximation/rbf.rb +11 -1
- data/lib/svmkit/kernel_machine/kernel_svc.rb +12 -11
- data/lib/svmkit/linear_model/logistic_regression.rb +20 -6
- data/lib/svmkit/linear_model/svc.rb +12 -0
- data/lib/svmkit/model_selection/cross_validation.rb +6 -0
- data/lib/svmkit/model_selection/k_fold.rb +6 -4
- data/lib/svmkit/model_selection/stratified_k_fold.rb +6 -0
- data/lib/svmkit/multiclass/one_vs_rest_classifier.rb +5 -0
- data/lib/svmkit/naive_bayes/naive_bayes.rb +14 -0
- data/lib/svmkit/nearest_neighbors/k_neighbors_classifier.rb +5 -0
- data/lib/svmkit/pairwise_metric.rb +15 -0
- data/lib/svmkit/polynomial_model/factorization_machine_classifier.rb +12 -0
- data/lib/svmkit/preprocessing/l2_normalizer.rb +2 -0
- data/lib/svmkit/preprocessing/min_max_scaler.rb +4 -0
- data/lib/svmkit/preprocessing/standard_scaler.rb +3 -0
- data/lib/svmkit/tree/decision_tree_classifier.rb +16 -3
- data/lib/svmkit/validation.rb +55 -0
- data/lib/svmkit/version.rb +1 -1
- data/lib/svmkit.rb +1 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bbc648db53b4285bd15ae00e2464c4376d027893
|
4
|
+
data.tar.gz: 28d9db47ae3053031f1643329cf02cd4e7d9c135
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c504f010a70fc7a31afa4471096092adac9ff44de979d42d7277c63d737d517981e2ac6d7481ad42dc1c864f2a7756d39cd1d7697d2d4b1bca150d0a4eca3b8e
|
7
|
+
data.tar.gz: 8d89dc525ed37626a2d97e6fe3bebdacd4ec2945df285d8275cd0bd5df62c5ebc897dca67b91157df0bfc460fa987ed4098654ce6d164cb05ec46de4c6fe27af
|
data/HISTORY.md
CHANGED
@@ -1,3 +1,8 @@
|
|
1
|
+
# 0.2.8
|
2
|
+
- Fixed bug on gradient calculation of Logistic Regression.
|
3
|
+
- Fixed to change accessor of params of estimators to read only.
|
4
|
+
- Added parameter validation.
|
5
|
+
|
1
6
|
# 0.2.7
|
2
7
|
- Fixed to support multiclass classifiction into LinearSVC, LogisticRegression, KernelSVC, and FactorizationMachineClassifier
|
3
8
|
|
@@ -20,6 +20,8 @@ module SVMKit
|
|
20
20
|
# @param y [Numo::Int32] (shape: [n_samples]) True labels for testing data.
|
21
21
|
# @return [Float] Mean accuracy
|
22
22
|
def score(x, y)
|
23
|
+
SVMKit::Validation.check_sample_array(x)
|
24
|
+
SVMKit::Validation.check_label_array(y)
|
23
25
|
evaluator = SVMKit::EvaluationMeasure::Accuracy.new
|
24
26
|
evaluator.score(y, predict(x))
|
25
27
|
end
|
@@ -50,6 +50,11 @@ module SVMKit
|
|
50
50
|
# It is used to randomly determine the order of features when deciding spliting point.
|
51
51
|
def initialize(n_estimators: 10, criterion: 'gini', max_depth: nil, max_leaf_nodes: nil, min_samples_leaf: 1,
|
52
52
|
max_features: nil, random_seed: nil)
|
53
|
+
SVMKit::Validation.check_params_type_or_nil(Integer, max_depth: max_depth, max_leaf_nodes: max_leaf_nodes,
|
54
|
+
max_features: max_features, random_seed: random_seed)
|
55
|
+
SVMKit::Validation.check_params_integer(n_estimators: n_estimators, min_samples_leaf: min_samples_leaf)
|
56
|
+
SVMKit::Validation.check_params_string(criterion: criterion)
|
57
|
+
|
53
58
|
@params = {}
|
54
59
|
@params[:n_estimators] = n_estimators
|
55
60
|
@params[:criterion] = criterion
|
@@ -59,10 +64,10 @@ module SVMKit
|
|
59
64
|
@params[:max_features] = max_features
|
60
65
|
@params[:random_seed] = random_seed
|
61
66
|
@params[:random_seed] ||= srand
|
62
|
-
@rng = Random.new(@params[:random_seed])
|
63
67
|
@estimators = nil
|
64
68
|
@classes = nil
|
65
69
|
@feature_importances = nil
|
70
|
+
@rng = Random.new(@params[:random_seed])
|
66
71
|
end
|
67
72
|
|
68
73
|
# Fit the model with given training data.
|
@@ -71,6 +76,8 @@ module SVMKit
|
|
71
76
|
# @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
|
72
77
|
# @return [RandomForestClassifier] The learned classifier itself.
|
73
78
|
def fit(x, y)
|
79
|
+
SVMKit::Validation.check_sample_array(x)
|
80
|
+
SVMKit::Validation.check_label_array(y)
|
74
81
|
# Initialize some variables.
|
75
82
|
n_samples, n_features = x.shape
|
76
83
|
@params[:max_features] = n_features unless @params[:max_features].is_a?(Integer)
|
@@ -98,6 +105,7 @@ module SVMKit
|
|
98
105
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
|
99
106
|
# @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
|
100
107
|
def predict(x)
|
108
|
+
SVMKit::Validation.check_sample_array(x)
|
101
109
|
n_samples, = x.shape
|
102
110
|
n_classes = @classes.size
|
103
111
|
classes_arr = @classes.to_a
|
@@ -117,6 +125,7 @@ module SVMKit
|
|
117
125
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
|
118
126
|
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
|
119
127
|
def predict_proba(x)
|
128
|
+
SVMKit::Validation.check_sample_array(x)
|
120
129
|
n_samples, = x.shape
|
121
130
|
n_classes = @classes.size
|
122
131
|
classes_arr = @classes.to_a
|
@@ -136,6 +145,7 @@ module SVMKit
|
|
136
145
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
|
137
146
|
# @return [Numo::Int32] (shape: [n_samples, n_estimators]) Leaf index for sample.
|
138
147
|
def apply(x)
|
148
|
+
SVMKit::Validation.check_sample_array(x)
|
139
149
|
Numo::Int32[*Array.new(@params[:n_estimators]) { |n| @estimators[n].apply(x) }].transpose
|
140
150
|
end
|
141
151
|
|
@@ -19,6 +19,9 @@ module SVMKit
|
|
19
19
|
# @param y_pred [Numo::Int32] (shape: [n_samples]) Predicted labels.
|
20
20
|
# @return [Float] Mean accuracy
|
21
21
|
def score(y_true, y_pred)
|
22
|
+
SVMKit::Validation.check_label_array(y_true)
|
23
|
+
SVMKit::Validation.check_label_array(y_pred)
|
24
|
+
|
22
25
|
(y_true.to_a.map.with_index { |label, n| label == y_pred[n] ? 1 : 0 }).inject(:+) / y_true.size.to_f
|
23
26
|
end
|
24
27
|
end
|
@@ -23,6 +23,7 @@ module SVMKit
|
|
23
23
|
#
|
24
24
|
# @param average [String] The average type ('binary', 'micro', 'macro')
|
25
25
|
def initialize(average: 'binary')
|
26
|
+
SVMKit::Validation.check_params_string(average: average)
|
26
27
|
@average = average
|
27
28
|
end
|
28
29
|
|
@@ -32,6 +33,9 @@ module SVMKit
|
|
32
33
|
# @param y_pred [Numo::Int32] (shape: [n_samples]) Predicted labels.
|
33
34
|
# @return [Float] Average F1-score
|
34
35
|
def score(y_true, y_pred)
|
36
|
+
SVMKit::Validation.check_label_array(y_true)
|
37
|
+
SVMKit::Validation.check_label_array(y_pred)
|
38
|
+
|
35
39
|
case @average
|
36
40
|
when 'binary'
|
37
41
|
f_score_each_class(y_true, y_pred).last
|
@@ -23,6 +23,7 @@ module SVMKit
|
|
23
23
|
#
|
24
24
|
# @param average [String] The average type ('binary', 'micro', 'macro')
|
25
25
|
def initialize(average: 'binary')
|
26
|
+
SVMKit::Validation.check_params_string(average: average)
|
26
27
|
@average = average
|
27
28
|
end
|
28
29
|
|
@@ -32,6 +33,9 @@ module SVMKit
|
|
32
33
|
# @param y_pred [Numo::Int32] (shape: [n_samples]) Predicted labels.
|
33
34
|
# @return [Float] Average precision
|
34
35
|
def score(y_true, y_pred)
|
36
|
+
SVMKit::Validation.check_label_array(y_true)
|
37
|
+
SVMKit::Validation.check_label_array(y_pred)
|
38
|
+
|
35
39
|
case @average
|
36
40
|
when 'binary'
|
37
41
|
precision_each_class(y_true, y_pred).last
|
@@ -23,6 +23,7 @@ module SVMKit
|
|
23
23
|
#
|
24
24
|
# @param average [String] The average type ('binary', 'micro', 'macro')
|
25
25
|
def initialize(average: 'binary')
|
26
|
+
SVMKit::Validation.check_params_string(average: average)
|
26
27
|
@average = average
|
27
28
|
end
|
28
29
|
|
@@ -32,6 +33,9 @@ module SVMKit
|
|
32
33
|
# @param y_pred [Numo::Int32] (shape: [n_samples]) Predicted labels.
|
33
34
|
# @return [Float] Average recall
|
34
35
|
def score(y_true, y_pred)
|
36
|
+
SVMKit::Validation.check_label_array(y_true)
|
37
|
+
SVMKit::Validation.check_label_array(y_pred)
|
38
|
+
|
35
39
|
case @average
|
36
40
|
when 'binary'
|
37
41
|
recall_each_class(y_true, y_pred).last
|
@@ -37,14 +37,18 @@ module SVMKit
|
|
37
37
|
# @param n_components [Integer] The number of dimensions of the RBF kernel feature space.
|
38
38
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
39
39
|
def initialize(gamma: 1.0, n_components: 128, random_seed: nil)
|
40
|
+
SVMKit::Validation.check_params_float(gamma: gamma)
|
41
|
+
SVMKit::Validation.check_params_integer(n_components: n_components)
|
42
|
+
SVMKit::Validation.check_params_type_or_nil(Integer, random_seed: random_seed)
|
43
|
+
|
40
44
|
@params = {}
|
41
45
|
@params[:gamma] = gamma
|
42
46
|
@params[:n_components] = n_components
|
43
47
|
@params[:random_seed] = random_seed
|
44
48
|
@params[:random_seed] ||= srand
|
45
|
-
@rng = Random.new(@params[:random_seed])
|
46
49
|
@random_mat = nil
|
47
50
|
@random_vec = nil
|
51
|
+
@rng = Random.new(@params[:random_seed])
|
48
52
|
end
|
49
53
|
|
50
54
|
# Fit the model with given training data.
|
@@ -55,6 +59,8 @@ module SVMKit
|
|
55
59
|
# This method uses only the number of features of the data.
|
56
60
|
# @return [RBF] The learned transformer itself.
|
57
61
|
def fit(x, _y = nil)
|
62
|
+
SVMKit::Validation.check_sample_array(x)
|
63
|
+
|
58
64
|
n_features = x.shape[1]
|
59
65
|
@params[:n_components] = 2 * n_features if @params[:n_components] <= 0
|
60
66
|
@random_mat = rand_normal([n_features, @params[:n_components]]) * (2.0 * @params[:gamma])**0.5
|
@@ -72,6 +78,8 @@ module SVMKit
|
|
72
78
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
73
79
|
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data
|
74
80
|
def fit_transform(x, _y = nil)
|
81
|
+
SVMKit::Validation.check_sample_array(x)
|
82
|
+
|
75
83
|
fit(x).transform(x)
|
76
84
|
end
|
77
85
|
|
@@ -82,6 +90,8 @@ module SVMKit
|
|
82
90
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be transformed with the learned model.
|
83
91
|
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data.
|
84
92
|
def transform(x)
|
93
|
+
SVMKit::Validation.check_sample_array(x)
|
94
|
+
|
85
95
|
n_samples, = x.shape
|
86
96
|
projection = x.dot(@random_mat) + @random_vec.tile(n_samples, 1)
|
87
97
|
Numo::NMath.sin(projection) * ((2.0 / @params[:n_components])**0.5)
|
@@ -42,13 +42,17 @@ module SVMKit
|
|
42
42
|
# @param max_iter [Integer] The maximum number of iterations.
|
43
43
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
44
44
|
def initialize(reg_param: 1.0, max_iter: 1000, random_seed: nil)
|
45
|
+
SVMKit::Validation.check_params_float(reg_param: reg_param)
|
46
|
+
SVMKit::Validation.check_params_integer(max_iter: max_iter)
|
47
|
+
SVMKit::Validation.check_params_type_or_nil(Integer, random_seed: random_seed)
|
48
|
+
|
45
49
|
@params = {}
|
46
50
|
@params[:reg_param] = reg_param
|
47
51
|
@params[:max_iter] = max_iter
|
48
52
|
@params[:random_seed] = random_seed
|
49
53
|
@params[:random_seed] ||= srand
|
50
54
|
@weight_vec = nil
|
51
|
-
@classes
|
55
|
+
@classes = nil
|
52
56
|
@rng = Random.new(@params[:random_seed])
|
53
57
|
end
|
54
58
|
|
@@ -59,6 +63,9 @@ module SVMKit
|
|
59
63
|
# @param y [Numo::Int32] (shape: [n_training_samples]) The labels to be used for fitting the model.
|
60
64
|
# @return [KernelSVC] The learned classifier itself.
|
61
65
|
def fit(x, y)
|
66
|
+
SVMKit::Validation.check_sample_array(x)
|
67
|
+
SVMKit::Validation.check_label_array(y)
|
68
|
+
|
62
69
|
@classes = Numo::Int32[*y.to_a.uniq.sort]
|
63
70
|
n_classes = @classes.size
|
64
71
|
_n_samples, n_features = x.shape
|
@@ -84,6 +91,8 @@ module SVMKit
|
|
84
91
|
# The kernel matrix between testing samples and training samples to compute the scores.
|
85
92
|
# @return [Numo::DFloat] (shape: [n_testing_samples, n_classes]) Confidence score per sample.
|
86
93
|
def decision_function(x)
|
94
|
+
SVMKit::Validation.check_sample_array(x)
|
95
|
+
|
87
96
|
x.dot(@weight_vec.transpose)
|
88
97
|
end
|
89
98
|
|
@@ -93,6 +102,8 @@ module SVMKit
|
|
93
102
|
# The kernel matrix between testing samples and training samples to predict the labels.
|
94
103
|
# @return [Numo::Int32] (shape: [n_testing_samples]) Predicted class label per sample.
|
95
104
|
def predict(x)
|
105
|
+
SVMKit::Validation.check_sample_array(x)
|
106
|
+
|
96
107
|
return Numo::Int32.cast(decision_function(x).ge(0.0)) * 2 - 1 if @classes.size <= 2
|
97
108
|
|
98
109
|
n_samples, = x.shape
|
@@ -100,16 +111,6 @@ module SVMKit
|
|
100
111
|
Numo::Int32.asarray(Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] })
|
101
112
|
end
|
102
113
|
|
103
|
-
# Claculate the mean accuracy of the given testing data.
|
104
|
-
#
|
105
|
-
# @param x [Numo::DFloat] (shape: [n_testing_samples, n_training_samples])
|
106
|
-
# The kernel matrix between testing samples and training samples.
|
107
|
-
# @param y [Numo::Int32] (shape: [n_testing_samples]) True labels for testing data.
|
108
|
-
# @return [Float] Mean accuracy
|
109
|
-
def score(x, y)
|
110
|
-
super
|
111
|
-
end
|
112
|
-
|
113
114
|
# Dump marshal data.
|
114
115
|
# @return [Hash] The marshal data about KernelSVC.
|
115
116
|
def marshal_dump
|
@@ -50,6 +50,11 @@ module SVMKit
|
|
50
50
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
51
51
|
def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0,
|
52
52
|
max_iter: 100, batch_size: 50, normalize: true, random_seed: nil)
|
53
|
+
SVMKit::Validation.check_params_float(reg_param: reg_param, bias_scale: bias_scale)
|
54
|
+
SVMKit::Validation.check_params_integer(max_iter: max_iter, batch_size: batch_size)
|
55
|
+
SVMKit::Validation.check_params_boolean(fit_bias: fit_bias, normalize: normalize)
|
56
|
+
SVMKit::Validation.check_params_type_or_nil(Integer, random_seed: random_seed)
|
57
|
+
|
53
58
|
@params = {}
|
54
59
|
@params[:reg_param] = reg_param
|
55
60
|
@params[:fit_bias] = fit_bias
|
@@ -71,6 +76,9 @@ module SVMKit
|
|
71
76
|
# @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
|
72
77
|
# @return [LogisticRegression] The learned classifier itself.
|
73
78
|
def fit(x, y)
|
79
|
+
SVMKit::Validation.check_sample_array(x)
|
80
|
+
SVMKit::Validation.check_label_array(y)
|
81
|
+
|
74
82
|
@classes = Numo::Int32[*y.to_a.uniq.sort]
|
75
83
|
n_classes = @classes.size
|
76
84
|
_n_samples, n_features = x.shape
|
@@ -79,14 +87,14 @@ module SVMKit
|
|
79
87
|
@weight_vec = Numo::DFloat.zeros(n_classes, n_features)
|
80
88
|
@bias_term = Numo::DFloat.zeros(n_classes)
|
81
89
|
n_classes.times do |n|
|
82
|
-
bin_y = Numo::Int32.cast(y.eq(@classes[n]))
|
90
|
+
bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
|
83
91
|
weight, bias = binary_fit(x, bin_y)
|
84
92
|
@weight_vec[n, true] = weight
|
85
93
|
@bias_term[n] = bias
|
86
94
|
end
|
87
95
|
else
|
88
96
|
negative_label = y.to_a.uniq.sort.first
|
89
|
-
bin_y = Numo::Int32.cast(y.ne(negative_label))
|
97
|
+
bin_y = Numo::Int32.cast(y.ne(negative_label)) * 2 - 1
|
90
98
|
@weight_vec, @bias_term = binary_fit(x, bin_y)
|
91
99
|
end
|
92
100
|
|
@@ -98,6 +106,8 @@ module SVMKit
|
|
98
106
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
|
99
107
|
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence score per sample.
|
100
108
|
def decision_function(x)
|
109
|
+
SVMKit::Validation.check_sample_array(x)
|
110
|
+
|
101
111
|
x.dot(@weight_vec.transpose) + @bias_term
|
102
112
|
end
|
103
113
|
|
@@ -106,10 +116,12 @@ module SVMKit
|
|
106
116
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
|
107
117
|
# @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
|
108
118
|
def predict(x)
|
109
|
-
|
119
|
+
SVMKit::Validation.check_sample_array(x)
|
120
|
+
|
121
|
+
return Numo::Int32.cast(predict_proba(x)[true, 1].ge(0.5)) * 2 - 1 if @classes.size <= 2
|
110
122
|
|
111
123
|
n_samples, = x.shape
|
112
|
-
decision_values =
|
124
|
+
decision_values = predict_proba(x)
|
113
125
|
Numo::Int32.asarray(Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] })
|
114
126
|
end
|
115
127
|
|
@@ -118,6 +130,8 @@ module SVMKit
|
|
118
130
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
|
119
131
|
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
|
120
132
|
def predict_proba(x)
|
133
|
+
SVMKit::Validation.check_sample_array(x)
|
134
|
+
|
121
135
|
proba = 1.0 / (Numo::NMath.exp(-decision_function(x)) + 1.0)
|
122
136
|
return (proba.transpose / proba.sum(axis: 1)).transpose if @classes.size > 2
|
123
137
|
|
@@ -165,9 +179,9 @@ module SVMKit
|
|
165
179
|
rand_ids.concat(subset_ids)
|
166
180
|
# update the weight vector.
|
167
181
|
df = samples[subset_ids, true].dot(weight_vec.transpose)
|
168
|
-
coef = bin_y[subset_ids] / (Numo::NMath.exp(-bin_y[subset_ids] * df) + 1.0)
|
182
|
+
coef = bin_y[subset_ids] / (Numo::NMath.exp(-bin_y[subset_ids] * df) + 1.0) - bin_y[subset_ids]
|
169
183
|
mean_vec = samples[subset_ids, true].transpose.dot(coef) / @params[:batch_size]
|
170
|
-
weight_vec -= learning_rate(t) * (@params[:reg_param] * weight_vec
|
184
|
+
weight_vec -= learning_rate(t) * (@params[:reg_param] * weight_vec + mean_vec)
|
171
185
|
# scale the weight vector.
|
172
186
|
normalize_weight_vec(weight_vec) if @params[:normalize]
|
173
187
|
end
|
@@ -49,6 +49,11 @@ module SVMKit
|
|
49
49
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
50
50
|
def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0,
|
51
51
|
max_iter: 100, batch_size: 50, normalize: true, random_seed: nil)
|
52
|
+
SVMKit::Validation.check_params_float(reg_param: reg_param, bias_scale: bias_scale)
|
53
|
+
SVMKit::Validation.check_params_integer(max_iter: max_iter, batch_size: batch_size)
|
54
|
+
SVMKit::Validation.check_params_boolean(fit_bias: fit_bias, normalize: normalize)
|
55
|
+
SVMKit::Validation.check_params_type_or_nil(Integer, random_seed: random_seed)
|
56
|
+
|
52
57
|
@params = {}
|
53
58
|
@params[:reg_param] = reg_param
|
54
59
|
@params[:fit_bias] = fit_bias
|
@@ -70,6 +75,9 @@ module SVMKit
|
|
70
75
|
# @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
|
71
76
|
# @return [SVC] The learned classifier itself.
|
72
77
|
def fit(x, y)
|
78
|
+
SVMKit::Validation.check_sample_array(x)
|
79
|
+
SVMKit::Validation.check_label_array(y)
|
80
|
+
|
73
81
|
@classes = Numo::Int32[*y.to_a.uniq.sort]
|
74
82
|
n_classes = @classes.size
|
75
83
|
_n_samples, n_features = x.shape
|
@@ -97,6 +105,8 @@ module SVMKit
|
|
97
105
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
|
98
106
|
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence score per sample.
|
99
107
|
def decision_function(x)
|
108
|
+
SVMKit::Validation.check_sample_array(x)
|
109
|
+
|
100
110
|
x.dot(@weight_vec.transpose) + @bias_term
|
101
111
|
end
|
102
112
|
|
@@ -105,6 +115,8 @@ module SVMKit
|
|
105
115
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
|
106
116
|
# @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
|
107
117
|
def predict(x)
|
118
|
+
SVMKit::Validation.check_sample_array(x)
|
119
|
+
|
108
120
|
return Numo::Int32.cast(decision_function(x).ge(0.0)) * 2 - 1 if @classes.size <= 2
|
109
121
|
|
110
122
|
n_samples, = x.shape
|
@@ -38,6 +38,10 @@ module SVMKit
|
|
38
38
|
# @param evaluator [Evaluator] The evaluator that calculates score of estimator results.
|
39
39
|
# @param return_train_score [Boolean] The flag indicating whether to calculate the score of training dataset.
|
40
40
|
def initialize(estimator: nil, splitter: nil, evaluator: nil, return_train_score: false)
|
41
|
+
SVMKit::Validation.check_params_type(SVMKit::Base::BaseEstimator, estimator: estimator)
|
42
|
+
SVMKit::Validation.check_params_type(SVMKit::Base::Splitter, splitter: splitter)
|
43
|
+
SVMKit::Validation.check_params_type_or_nil(SVMKit::Base::Evaluator, evaluator: evaluator)
|
44
|
+
SVMKit::Validation.check_params_boolean(return_train_score: return_train_score)
|
41
45
|
@estimator = estimator
|
42
46
|
@splitter = splitter
|
43
47
|
@evaluator = evaluator
|
@@ -56,6 +60,8 @@ module SVMKit
|
|
56
60
|
# * :train_score (Array<Float>) The scores of training dataset for each split. This option is nil if
|
57
61
|
# the return_train_score is false.
|
58
62
|
def perform(x, y)
|
63
|
+
SVMKit::Validation.check_sample_array(x)
|
64
|
+
SVMKit::Validation.check_label_array(y)
|
59
65
|
# Initialize the report of cross validation.
|
60
66
|
report = { test_score: [], train_score: nil, fit_time: [] }
|
61
67
|
report[:train_score] = [] if @return_train_score
|
@@ -32,6 +32,10 @@ module SVMKit
|
|
32
32
|
# @param shuffle [Boolean] The flag indicating whether to shuffle the dataset.
|
33
33
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
34
34
|
def initialize(n_splits: 3, shuffle: false, random_seed: nil)
|
35
|
+
SVMKit::Validation.check_params_integer(n_splits: n_splits)
|
36
|
+
SVMKit::Validation.check_params_boolean(shuffle: shuffle)
|
37
|
+
SVMKit::Validation.check_params_type_or_nil(Integer, random_seed: random_seed)
|
38
|
+
|
35
39
|
@n_splits = n_splits
|
36
40
|
@shuffle = shuffle
|
37
41
|
@random_seed = random_seed
|
@@ -43,11 +47,9 @@ module SVMKit
|
|
43
47
|
#
|
44
48
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features])
|
45
49
|
# The dataset to be used to generate data indices for K-fold cross validation.
|
46
|
-
# @param y [Numo::Int32] (shape: [n_samples])
|
47
|
-
# The labels to be used to generate data indices for stratified K-fold cross validation.
|
48
|
-
# This argument exists to unify the interface between the K-fold methods, it is not used in the method.
|
49
50
|
# @return [Array] The set of data indices for constructing the training and testing dataset in each fold.
|
50
|
-
def split(x,
|
51
|
+
def split(x, _y = nil)
|
52
|
+
SVMKit::Validation.check_sample_array(x)
|
51
53
|
# Initialize and check some variables.
|
52
54
|
n_samples, = x.shape
|
53
55
|
unless @n_splits.between?(2, n_samples)
|
@@ -32,6 +32,10 @@ module SVMKit
|
|
32
32
|
# @param shuffle [Boolean] The flag indicating whether to shuffle the dataset.
|
33
33
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
34
34
|
def initialize(n_splits: 3, shuffle: false, random_seed: nil)
|
35
|
+
SVMKit::Validation.check_params_integer(n_splits: n_splits)
|
36
|
+
SVMKit::Validation.check_params_boolean(shuffle: shuffle)
|
37
|
+
SVMKit::Validation.check_params_type_or_nil(Integer, random_seed: random_seed)
|
38
|
+
|
35
39
|
@n_splits = n_splits
|
36
40
|
@shuffle = shuffle
|
37
41
|
@random_seed = random_seed
|
@@ -48,6 +52,8 @@ module SVMKit
|
|
48
52
|
# The labels to be used to generate data indices for stratified K-fold cross validation.
|
49
53
|
# @return [Array] The set of data indices for constructing the training and testing dataset in each fold.
|
50
54
|
def split(x, y) # rubocop:disable Lint/UnusedMethodArgument
|
55
|
+
SVMKit::Validation.check_sample_array(x)
|
56
|
+
SVMKit::Validation.check_label_array(y)
|
51
57
|
# Check the number of samples in each class.
|
52
58
|
unless valid_n_splits?(y)
|
53
59
|
raise ArgumentError,
|
@@ -33,6 +33,7 @@ module SVMKit
|
|
33
33
|
#
|
34
34
|
# @param estimator [Classifier] The (binary) classifier for construction a multi-class classifier.
|
35
35
|
def initialize(estimator: nil)
|
36
|
+
SVMKit::Validation.check_params_type(SVMKit::Base::BaseEstimator, estimator: estimator)
|
36
37
|
@params = {}
|
37
38
|
@params[:estimator] = estimator
|
38
39
|
@estimators = nil
|
@@ -45,6 +46,8 @@ module SVMKit
|
|
45
46
|
# @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
|
46
47
|
# @return [OneVsRestClassifier] The learned classifier itself.
|
47
48
|
def fit(x, y)
|
49
|
+
SVMKit::Validation.check_sample_array(x)
|
50
|
+
SVMKit::Validation.check_label_array(y)
|
48
51
|
y_arr = y.to_a
|
49
52
|
@classes = Numo::Int32.asarray(y_arr.uniq.sort)
|
50
53
|
@estimators = @classes.to_a.map do |label|
|
@@ -59,6 +62,7 @@ module SVMKit
|
|
59
62
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
|
60
63
|
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
|
61
64
|
def decision_function(x)
|
65
|
+
SVMKit::Validation.check_sample_array(x)
|
62
66
|
n_classes = @classes.size
|
63
67
|
Numo::DFloat.asarray(Array.new(n_classes) { |m| @estimators[m].decision_function(x).to_a }).transpose
|
64
68
|
end
|
@@ -68,6 +72,7 @@ module SVMKit
|
|
68
72
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
|
69
73
|
# @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
|
70
74
|
def predict(x)
|
75
|
+
SVMKit::Validation.check_sample_array(x)
|
71
76
|
n_samples, = x.shape
|
72
77
|
decision_values = decision_function(x)
|
73
78
|
Numo::Int32.asarray(Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] })
|
@@ -16,6 +16,7 @@ module SVMKit
|
|
16
16
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
|
17
17
|
# @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
|
18
18
|
def predict(x)
|
19
|
+
SVMKit::Validation.check_sample_array(x)
|
19
20
|
n_samples = x.shape.first
|
20
21
|
decision_values = decision_function(x)
|
21
22
|
Numo::Int32.asarray(Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] })
|
@@ -26,6 +27,7 @@ module SVMKit
|
|
26
27
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the log-probailities.
|
27
28
|
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted log-probability of each class per sample.
|
28
29
|
def predict_log_proba(x)
|
30
|
+
SVMKit::Validation.check_sample_array(x)
|
29
31
|
n_samples, = x.shape
|
30
32
|
log_likelihoods = decision_function(x)
|
31
33
|
log_likelihoods - Numo::NMath.log(Numo::NMath.exp(log_likelihoods).sum(1)).reshape(n_samples, 1)
|
@@ -36,6 +38,7 @@ module SVMKit
|
|
36
38
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
|
37
39
|
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
|
38
40
|
def predict_proba(x)
|
41
|
+
SVMKit::Validation.check_sample_array(x)
|
39
42
|
Numo::NMath.exp(predict_log_proba(x)).abs
|
40
43
|
end
|
41
44
|
end
|
@@ -75,6 +78,8 @@ module SVMKit
|
|
75
78
|
# to be used for fitting the model.
|
76
79
|
# @return [GaussianNB] The learned classifier itself.
|
77
80
|
def fit(x, y)
|
81
|
+
SVMKit::Validation.check_sample_array(x)
|
82
|
+
SVMKit::Validation.check_label_array(y)
|
78
83
|
n_samples, = x.shape
|
79
84
|
@classes = Numo::Int32[*y.to_a.uniq.sort]
|
80
85
|
@class_priors = Numo::DFloat[*@classes.to_a.map { |l| y.eq(l).count / n_samples.to_f }]
|
@@ -88,6 +93,7 @@ module SVMKit
|
|
88
93
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
|
89
94
|
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
|
90
95
|
def decision_function(x)
|
96
|
+
SVMKit::Validation.check_sample_array(x)
|
91
97
|
n_classes = @classes.size
|
92
98
|
log_likelihoods = Array.new(n_classes) do |l|
|
93
99
|
Math.log(@class_priors[l]) - 0.5 * (
|
@@ -147,6 +153,7 @@ module SVMKit
|
|
147
153
|
#
|
148
154
|
# @param smoothing_param [Float] The Laplace smoothing parameter.
|
149
155
|
def initialize(smoothing_param: 1.0)
|
156
|
+
SVMKit::Validation.check_params_float(smoothing_param: smoothing_param)
|
150
157
|
@params = {}
|
151
158
|
@params[:smoothing_param] = smoothing_param
|
152
159
|
end
|
@@ -158,6 +165,8 @@ module SVMKit
|
|
158
165
|
# to be used for fitting the model.
|
159
166
|
# @return [MultinomialNB] The learned classifier itself.
|
160
167
|
def fit(x, y)
|
168
|
+
SVMKit::Validation.check_sample_array(x)
|
169
|
+
SVMKit::Validation.check_label_array(y)
|
161
170
|
n_samples, = x.shape
|
162
171
|
@classes = Numo::Int32[*y.to_a.uniq.sort]
|
163
172
|
@class_priors = Numo::DFloat[*@classes.to_a.map { |l| y.eq(l).count / n_samples.to_f }]
|
@@ -173,6 +182,7 @@ module SVMKit
|
|
173
182
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
|
174
183
|
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
|
175
184
|
def decision_function(x)
|
185
|
+
SVMKit::Validation.check_sample_array(x)
|
176
186
|
n_classes = @classes.size
|
177
187
|
bin_x = x.gt(0)
|
178
188
|
log_likelihoods = Array.new(n_classes) do |l|
|
@@ -230,6 +240,7 @@ module SVMKit
|
|
230
240
|
# @param smoothing_param [Float] The Laplace smoothing parameter.
|
231
241
|
# @param bin_threshold [Float] The threshold for binarizing of features.
|
232
242
|
def initialize(smoothing_param: 1.0, bin_threshold: 0.0)
|
243
|
+
SVMKit::Validation.check_params_float(smoothing_param: smoothing_param, bin_threshold: bin_threshold)
|
233
244
|
@params = {}
|
234
245
|
@params[:smoothing_param] = smoothing_param
|
235
246
|
@params[:bin_threshold] = bin_threshold
|
@@ -242,6 +253,8 @@ module SVMKit
|
|
242
253
|
# to be used for fitting the model.
|
243
254
|
# @return [BernoulliNB] The learned classifier itself.
|
244
255
|
def fit(x, y)
|
256
|
+
SVMKit::Validation.check_sample_array(x)
|
257
|
+
SVMKit::Validation.check_label_array(y)
|
245
258
|
n_samples, = x.shape
|
246
259
|
bin_x = Numo::DFloat[*x.gt(@params[:bin_threshold])]
|
247
260
|
@classes = Numo::Int32[*y.to_a.uniq.sort]
|
@@ -260,6 +273,7 @@ module SVMKit
|
|
260
273
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
|
261
274
|
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
|
262
275
|
def decision_function(x)
|
276
|
+
SVMKit::Validation.check_sample_array(x)
|
263
277
|
n_classes = @classes.size
|
264
278
|
bin_x = Numo::DFloat[*x.gt(@params[:bin_threshold])]
|
265
279
|
not_bin_x = Numo::DFloat[*x.le(@params[:bin_threshold])]
|
@@ -35,6 +35,7 @@ module SVMKit
|
|
35
35
|
#
|
36
36
|
# @param n_neighbors [Integer] The number of neighbors.
|
37
37
|
def initialize(n_neighbors: 5)
|
38
|
+
SVMKit::Validation.check_params_integer(n_neighbors: n_neighbors)
|
38
39
|
@params = {}
|
39
40
|
@params[:n_neighbors] = n_neighbors
|
40
41
|
@prototypes = nil
|
@@ -48,6 +49,8 @@ module SVMKit
|
|
48
49
|
# @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
|
49
50
|
# @return [KNeighborsClassifier] The learned classifier itself.
|
50
51
|
def fit(x, y)
|
52
|
+
SVMKit::Validation.check_sample_array(x)
|
53
|
+
SVMKit::Validation.check_label_array(y)
|
51
54
|
@prototypes = Numo::DFloat.asarray(x.to_a)
|
52
55
|
@labels = Numo::Int32.asarray(y.to_a)
|
53
56
|
@classes = Numo::Int32.asarray(y.to_a.uniq.sort)
|
@@ -59,6 +62,7 @@ module SVMKit
|
|
59
62
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
|
60
63
|
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
|
61
64
|
def decision_function(x)
|
65
|
+
SVMKit::Validation.check_sample_array(x)
|
62
66
|
distance_matrix = PairwiseMetric.euclidean_distance(x, @prototypes)
|
63
67
|
n_samples, n_prototypes = distance_matrix.shape
|
64
68
|
n_classes = @classes.size
|
@@ -76,6 +80,7 @@ module SVMKit
|
|
76
80
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
|
77
81
|
# @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
|
78
82
|
def predict(x)
|
83
|
+
SVMKit::Validation.check_sample_array(x)
|
79
84
|
n_samples = x.shape.first
|
80
85
|
decision_values = decision_function(x)
|
81
86
|
Numo::Int32.asarray(Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] })
|
@@ -11,6 +11,8 @@ module SVMKit
|
|
11
11
|
# @return [Numo::DFloat] (shape: [n_samples_x, n_samples_x] or [n_samples_x, n_samples_y] if y is given)
|
12
12
|
def euclidean_distance(x, y = nil)
|
13
13
|
y = x if y.nil?
|
14
|
+
SVMKit::Validation.check_sample_array(x)
|
15
|
+
SVMKit::Validation.check_sample_array(y)
|
14
16
|
sum_x_vec = (x**2).sum(1)
|
15
17
|
sum_y_vec = (y**2).sum(1)
|
16
18
|
dot_xy_mat = x.dot(y.transpose)
|
@@ -29,6 +31,9 @@ module SVMKit
|
|
29
31
|
def rbf_kernel(x, y = nil, gamma = nil)
|
30
32
|
y = x if y.nil?
|
31
33
|
gamma ||= 1.0 / x.shape[1]
|
34
|
+
SVMKit::Validation.check_sample_array(x)
|
35
|
+
SVMKit::Validation.check_sample_array(y)
|
36
|
+
SVMKit::Validation.check_params_float(gamma: gamma)
|
32
37
|
distance_matrix = euclidean_distance(x, y)
|
33
38
|
Numo::NMath.exp((distance_matrix**2) * -gamma)
|
34
39
|
end
|
@@ -40,6 +45,8 @@ module SVMKit
|
|
40
45
|
# @return [Numo::DFloat] (shape: [n_samples_x, n_samples_x] or [n_samples_x, n_samples_y] if y is given)
|
41
46
|
def linear_kernel(x, y = nil)
|
42
47
|
y = x if y.nil?
|
48
|
+
SVMKit::Validation.check_sample_array(x)
|
49
|
+
SVMKit::Validation.check_sample_array(y)
|
43
50
|
x.dot(y.transpose)
|
44
51
|
end
|
45
52
|
|
@@ -54,6 +61,10 @@ module SVMKit
|
|
54
61
|
def polynomial_kernel(x, y = nil, degree = 3, gamma = nil, coef = 1)
|
55
62
|
y = x if y.nil?
|
56
63
|
gamma ||= 1.0 / x.shape[1]
|
64
|
+
SVMKit::Validation.check_sample_array(x)
|
65
|
+
SVMKit::Validation.check_sample_array(y)
|
66
|
+
SVMKit::Validation.check_params_float(gamma: gamma)
|
67
|
+
SVMKit::Validation.check_params_integer(degree: degree, coef: coef)
|
57
68
|
(x.dot(y.transpose) * gamma + coef)**degree
|
58
69
|
end
|
59
70
|
|
@@ -67,6 +78,10 @@ module SVMKit
|
|
67
78
|
def sigmoid_kernel(x, y = nil, gamma = nil, coef = 1)
|
68
79
|
y = x if y.nil?
|
69
80
|
gamma ||= 1.0 / x.shape[1]
|
81
|
+
SVMKit::Validation.check_sample_array(x)
|
82
|
+
SVMKit::Validation.check_sample_array(y)
|
83
|
+
SVMKit::Validation.check_params_float(gamma: gamma)
|
84
|
+
SVMKit::Validation.check_params_integer(coef: coef)
|
70
85
|
Numo::NMath.tanh(x.dot(y.transpose) * gamma + coef)
|
71
86
|
end
|
72
87
|
end
|
@@ -58,6 +58,12 @@ module SVMKit
|
|
58
58
|
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
59
59
|
def initialize(n_factors: 2, loss: 'hinge', reg_param_bias: 1.0, reg_param_weight: 1.0, reg_param_factor: 1.0,
|
60
60
|
init_std: 0.1, max_iter: 1000, batch_size: 10, random_seed: nil)
|
61
|
+
SVMKit::Validation.check_params_float(reg_param_bias: reg_param_bias, reg_param_weight: reg_param_weight,
|
62
|
+
reg_param_factor: reg_param_factor, init_std: init_std)
|
63
|
+
SVMKit::Validation.check_params_integer(n_factors: n_factors, max_iter: max_iter, batch_size: batch_size)
|
64
|
+
SVMKit::Validation.check_params_string(loss: loss)
|
65
|
+
SVMKit::Validation.check_params_type_or_nil(Integer, random_seed: random_seed)
|
66
|
+
|
61
67
|
@params = {}
|
62
68
|
@params[:n_factors] = n_factors
|
63
69
|
@params[:loss] = loss
|
@@ -82,6 +88,9 @@ module SVMKit
|
|
82
88
|
# @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
|
83
89
|
# @return [FactorizationMachineClassifier] The learned classifier itself.
|
84
90
|
def fit(x, y)
|
91
|
+
SVMKit::Validation.check_sample_array(x)
|
92
|
+
SVMKit::Validation.check_label_array(y)
|
93
|
+
|
85
94
|
@classes = Numo::Int32[*y.to_a.uniq.sort]
|
86
95
|
n_classes = @classes.size
|
87
96
|
_n_samples, n_features = x.shape
|
@@ -111,6 +120,7 @@ module SVMKit
|
|
111
120
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
|
112
121
|
# @return [Numo::DFloat] (shape: [n_samples]) Confidence score per sample.
|
113
122
|
def decision_function(x)
|
123
|
+
SVMKit::Validation.check_sample_array(x)
|
114
124
|
linear_term = @bias_term + x.dot(@weight_vec.transpose)
|
115
125
|
factor_term = if @classes.size <= 2
|
116
126
|
0.5 * (@factor_mat.dot(x.transpose)**2 - (@factor_mat**2).dot(x.transpose**2)).sum
|
@@ -125,6 +135,7 @@ module SVMKit
|
|
125
135
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
|
126
136
|
# @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
|
127
137
|
def predict(x)
|
138
|
+
SVMKit::Validation.check_sample_array(x)
|
128
139
|
return Numo::Int32.cast(decision_function(x).ge(0.0)) * 2 - 1 if @classes.size <= 2
|
129
140
|
|
130
141
|
n_samples, = x.shape
|
@@ -137,6 +148,7 @@ module SVMKit
|
|
137
148
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
|
138
149
|
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
|
139
150
|
def predict_proba(x)
|
151
|
+
SVMKit::Validation.check_sample_array(x)
|
140
152
|
proba = 1.0 / (Numo::NMath.exp(-decision_function(x)) + 1.0)
|
141
153
|
return (proba.transpose / proba.sum(axis: 1)).transpose if @classes.size > 2
|
142
154
|
|
@@ -32,6 +32,7 @@ module SVMKit
|
|
32
32
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate L2-norms.
|
33
33
|
# @return [L2Normalizer]
|
34
34
|
def fit(x, _y = nil)
|
35
|
+
SVMKit::Validation.check_sample_array(x)
|
35
36
|
@norm_vec = Numo::NMath.sqrt((x**2).sum(1))
|
36
37
|
self
|
37
38
|
end
|
@@ -43,6 +44,7 @@ module SVMKit
|
|
43
44
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate L2-norms.
|
44
45
|
# @return [Numo::DFloat] The normalized samples.
|
45
46
|
def fit_transform(x, _y = nil)
|
47
|
+
SVMKit::Validation.check_sample_array(x)
|
46
48
|
fit(x)
|
47
49
|
x / @norm_vec.tile(x.shape[1], 1).transpose
|
48
50
|
end
|
@@ -28,6 +28,7 @@ module SVMKit
|
|
28
28
|
#
|
29
29
|
# @param feature_range [Array<Float>] The desired range of samples.
|
30
30
|
def initialize(feature_range: [0.0, 1.0])
|
31
|
+
SVMKit::Validation.check_params_type(Array, feature_range: feature_range)
|
31
32
|
@params = {}
|
32
33
|
@params[:feature_range] = feature_range
|
33
34
|
@min_vec = nil
|
@@ -41,6 +42,7 @@ module SVMKit
|
|
41
42
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate the minimum and maximum values.
|
42
43
|
# @return [MinMaxScaler]
|
43
44
|
def fit(x, _y = nil)
|
45
|
+
SVMKit::Validation.check_sample_array(x)
|
44
46
|
@min_vec = x.min(0)
|
45
47
|
@max_vec = x.max(0)
|
46
48
|
self
|
@@ -53,6 +55,7 @@ module SVMKit
|
|
53
55
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate the minimum and maximum values.
|
54
56
|
# @return [Numo::DFloat] The scaled samples.
|
55
57
|
def fit_transform(x, _y = nil)
|
58
|
+
SVMKit::Validation.check_sample_array(x)
|
56
59
|
fit(x).transform(x)
|
57
60
|
end
|
58
61
|
|
@@ -61,6 +64,7 @@ module SVMKit
|
|
61
64
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be scaled.
|
62
65
|
# @return [Numo::DFloat] The scaled samples.
|
63
66
|
def transform(x)
|
67
|
+
SVMKit::Validation.check_sample_array(x)
|
64
68
|
n_samples, = x.shape
|
65
69
|
dif_vec = @max_vec - @min_vec
|
66
70
|
nx = (x - @min_vec.tile(n_samples, 1)) / dif_vec.tile(n_samples, 1)
|
@@ -39,6 +39,7 @@ module SVMKit
|
|
39
39
|
# The samples to calculate the mean values and standard deviations.
|
40
40
|
# @return [StandardScaler]
|
41
41
|
def fit(x, _y = nil)
|
42
|
+
SVMKit::Validation.check_sample_array(x)
|
42
43
|
@mean_vec = x.mean(0)
|
43
44
|
@std_vec = x.stddev(0)
|
44
45
|
self
|
@@ -52,6 +53,7 @@ module SVMKit
|
|
52
53
|
# The samples to calculate the mean values and standard deviations.
|
53
54
|
# @return [Numo::DFloat] The scaled samples.
|
54
55
|
def fit_transform(x, _y = nil)
|
56
|
+
SVMKit::Validation.check_sample_array(x)
|
55
57
|
fit(x).transform(x)
|
56
58
|
end
|
57
59
|
|
@@ -60,6 +62,7 @@ module SVMKit
|
|
60
62
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be scaled.
|
61
63
|
# @return [Numo::DFloat] The scaled samples.
|
62
64
|
def transform(x)
|
65
|
+
SVMKit::Validation.check_sample_array(x)
|
63
66
|
n_samples, = x.shape
|
64
67
|
(x - @mean_vec.tile(n_samples, 1)) / @std_vec.tile(n_samples, 1)
|
65
68
|
end
|
@@ -54,6 +54,11 @@ module SVMKit
|
|
54
54
|
# It is used to randomly determine the order of features when deciding spliting point.
|
55
55
|
def initialize(criterion: 'gini', max_depth: nil, max_leaf_nodes: nil, min_samples_leaf: 1, max_features: nil,
|
56
56
|
random_seed: nil)
|
57
|
+
SVMKit::Validation.check_params_type_or_nil(Integer, max_depth: max_depth, max_leaf_nodes: max_leaf_nodes,
|
58
|
+
max_features: max_features, random_seed: random_seed)
|
59
|
+
SVMKit::Validation.check_params_integer(min_samples_leaf: min_samples_leaf)
|
60
|
+
SVMKit::Validation.check_params_string(criterion: criterion)
|
61
|
+
|
57
62
|
@params = {}
|
58
63
|
@params[:criterion] = criterion
|
59
64
|
@params[:max_depth] = max_depth
|
@@ -62,12 +67,12 @@ module SVMKit
|
|
62
67
|
@params[:max_features] = max_features
|
63
68
|
@params[:random_seed] = random_seed
|
64
69
|
@params[:random_seed] ||= srand
|
65
|
-
@rng = Random.new(@params[:random_seed])
|
66
70
|
@tree = nil
|
67
71
|
@classes = nil
|
68
72
|
@feature_importances = nil
|
69
73
|
@n_leaves = nil
|
70
74
|
@leaf_labels = nil
|
75
|
+
@rng = Random.new(@params[:random_seed])
|
71
76
|
end
|
72
77
|
|
73
78
|
# Fit the model with given training data.
|
@@ -76,6 +81,8 @@ module SVMKit
|
|
76
81
|
# @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
|
77
82
|
# @return [DecisionTreeClassifier] The learned classifier itself.
|
78
83
|
def fit(x, y)
|
84
|
+
SVMKit::Validation.check_sample_array(x)
|
85
|
+
SVMKit::Validation.check_label_array(y)
|
79
86
|
n_samples, n_features = x.shape
|
80
87
|
@params[:max_features] = n_features unless @params[:max_features].is_a?(Integer)
|
81
88
|
@params[:max_features] = [[1, @params[:max_features]].max, n_features].min
|
@@ -90,6 +97,7 @@ module SVMKit
|
|
90
97
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
|
91
98
|
# @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
|
92
99
|
def predict(x)
|
100
|
+
SVMKit::Validation.check_sample_array(x)
|
93
101
|
@leaf_labels[apply(x)]
|
94
102
|
end
|
95
103
|
|
@@ -98,6 +106,7 @@ module SVMKit
|
|
98
106
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
|
99
107
|
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
|
100
108
|
def predict_proba(x)
|
109
|
+
SVMKit::Validation.check_sample_array(x)
|
101
110
|
probs = Numo::DFloat[*(Array.new(x.shape[0]) { |n| predict_at_node(@tree, x[n, true]) })]
|
102
111
|
probs[true, @classes]
|
103
112
|
end
|
@@ -107,14 +116,18 @@ module SVMKit
|
|
107
116
|
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
|
108
117
|
# @return [Numo::Int32] (shape: [n_samples]) Leaf index for sample.
|
109
118
|
def apply(x)
|
119
|
+
SVMKit::Validation.check_sample_array(x)
|
110
120
|
Numo::Int32[*(Array.new(x.shape[0]) { |n| apply_at_node(@tree, x[n, true]) })]
|
111
121
|
end
|
112
122
|
|
113
123
|
# Dump marshal data.
|
114
124
|
# @return [Hash] The marshal data about DecisionTreeClassifier
|
115
125
|
def marshal_dump
|
116
|
-
{ params: @params,
|
117
|
-
|
126
|
+
{ params: @params,
|
127
|
+
classes: @classes,
|
128
|
+
tree: @tree,
|
129
|
+
feature_importances: @feature_importances,
|
130
|
+
leaf_labels: @leaf_labels,
|
118
131
|
rng: @rng }
|
119
132
|
end
|
120
133
|
|
@@ -0,0 +1,55 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module SVMKit
|
4
|
+
# @!visibility private
|
5
|
+
module Validation
|
6
|
+
module_function
|
7
|
+
|
8
|
+
# @!visibility private
|
9
|
+
def check_sample_array(x)
|
10
|
+
raise TypeError, 'Expect class of sample matrix to be Numo::DFloat' unless x.is_a?(Numo::DFloat)
|
11
|
+
raise ArgumentError, 'Expect sample matrix to be 2-D array' unless x.shape.size == 2
|
12
|
+
nil
|
13
|
+
end
|
14
|
+
|
15
|
+
# @!visibility private
|
16
|
+
def check_label_array(y)
|
17
|
+
raise TypeError, 'Expect class of label vector to be Numo::Int32' unless y.is_a?(Numo::Int32)
|
18
|
+
raise ArgumentError, 'Expect label vector to be 1-D arrray' unless y.shape.size == 1
|
19
|
+
nil
|
20
|
+
end
|
21
|
+
|
22
|
+
# @!visibility private
|
23
|
+
def check_params_type(type, params = {})
|
24
|
+
params.each { |k, v| raise TypeError, "Expect class of #{k} to be #{type}" unless v.is_a?(type) }
|
25
|
+
nil
|
26
|
+
end
|
27
|
+
|
28
|
+
# @!visibility private
|
29
|
+
def check_params_type_or_nil(type, params = {})
|
30
|
+
params.each { |k, v| raise TypeError, "Expect class of #{k} to be #{type} or nil" unless v.is_a?(type) || v.is_a?(NilClass) }
|
31
|
+
nil
|
32
|
+
end
|
33
|
+
|
34
|
+
# @!visibility private
|
35
|
+
def check_params_float(params = {})
|
36
|
+
check_params_type(Float, params)
|
37
|
+
end
|
38
|
+
|
39
|
+
# @!visibility private
|
40
|
+
def check_params_integer(params = {})
|
41
|
+
check_params_type(Integer, params)
|
42
|
+
end
|
43
|
+
|
44
|
+
# @!visibility private
|
45
|
+
def check_params_string(params = {})
|
46
|
+
check_params_type(String, params)
|
47
|
+
end
|
48
|
+
|
49
|
+
# @!visibility private
|
50
|
+
def check_params_boolean(params = {})
|
51
|
+
params.each { |k, v| raise TypeError, "Expect class of #{k} to be Boolean" unless v.is_a?(FalseClass) || v.is_a?(TrueClass) }
|
52
|
+
nil
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
data/lib/svmkit/version.rb
CHANGED
data/lib/svmkit.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: svmkit
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-04-
|
11
|
+
date: 2018-04-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: numo-narray
|
@@ -135,6 +135,7 @@ files:
|
|
135
135
|
- lib/svmkit/preprocessing/min_max_scaler.rb
|
136
136
|
- lib/svmkit/preprocessing/standard_scaler.rb
|
137
137
|
- lib/svmkit/tree/decision_tree_classifier.rb
|
138
|
+
- lib/svmkit/validation.rb
|
138
139
|
- lib/svmkit/version.rb
|
139
140
|
- svmkit.gemspec
|
140
141
|
homepage: https://github.com/yoshoku/svmkit
|