svmkit 0.2.7 → 0.2.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 46878b59860b61bae7b522fb02af984208609f56
4
- data.tar.gz: 6e889c6ad8382c654455a242d2f7f27de41de2d5
3
+ metadata.gz: bbc648db53b4285bd15ae00e2464c4376d027893
4
+ data.tar.gz: 28d9db47ae3053031f1643329cf02cd4e7d9c135
5
5
  SHA512:
6
- metadata.gz: cddb239bf0768e6d983ce942ed6a7bdda8b827fa2e73e51c1b4591e8af3c641339377417f844358159c3a2bdff51d2f5678ef07fe21fe86e51136289e69ea38c
7
- data.tar.gz: 64c2029c729de580765ad9ee89fd57821a40773721eac291201cb9b9f4c72697f5945c8f5259ed8d6a879f0b35dac841bcdd6d5322014c9cc78b9a42046dc310
6
+ metadata.gz: c504f010a70fc7a31afa4471096092adac9ff44de979d42d7277c63d737d517981e2ac6d7481ad42dc1c864f2a7756d39cd1d7697d2d4b1bca150d0a4eca3b8e
7
+ data.tar.gz: 8d89dc525ed37626a2d97e6fe3bebdacd4ec2945df285d8275cd0bd5df62c5ebc897dca67b91157df0bfc460fa987ed4098654ce6d164cb05ec46de4c6fe27af
data/HISTORY.md CHANGED
@@ -1,3 +1,8 @@
1
+ # 0.2.8
2
+ - Fixed bug on gradient calculation of Logistic Regression.
3
+ - Fixed to change accessor of params of estimators to read only.
4
+ - Added parameter validation.
5
+
1
6
  # 0.2.7
2
7
  - Fixed to support multiclass classifiction into LinearSVC, LogisticRegression, KernelSVC, and FactorizationMachineClassifier
3
8
 
@@ -7,7 +7,7 @@ module SVMKit
7
7
  module BaseEstimator
8
8
  # Return parameters about an estimator.
9
9
  # @return [Hash]
10
- attr_accessor :params
10
+ attr_reader :params
11
11
  end
12
12
  end
13
13
  end
@@ -20,6 +20,8 @@ module SVMKit
20
20
  # @param y [Numo::Int32] (shape: [n_samples]) True labels for testing data.
21
21
  # @return [Float] Mean accuracy
22
22
  def score(x, y)
23
+ SVMKit::Validation.check_sample_array(x)
24
+ SVMKit::Validation.check_label_array(y)
23
25
  evaluator = SVMKit::EvaluationMeasure::Accuracy.new
24
26
  evaluator.score(y, predict(x))
25
27
  end
@@ -50,6 +50,11 @@ module SVMKit
50
50
  # It is used to randomly determine the order of features when deciding spliting point.
51
51
  def initialize(n_estimators: 10, criterion: 'gini', max_depth: nil, max_leaf_nodes: nil, min_samples_leaf: 1,
52
52
  max_features: nil, random_seed: nil)
53
+ SVMKit::Validation.check_params_type_or_nil(Integer, max_depth: max_depth, max_leaf_nodes: max_leaf_nodes,
54
+ max_features: max_features, random_seed: random_seed)
55
+ SVMKit::Validation.check_params_integer(n_estimators: n_estimators, min_samples_leaf: min_samples_leaf)
56
+ SVMKit::Validation.check_params_string(criterion: criterion)
57
+
53
58
  @params = {}
54
59
  @params[:n_estimators] = n_estimators
55
60
  @params[:criterion] = criterion
@@ -59,10 +64,10 @@ module SVMKit
59
64
  @params[:max_features] = max_features
60
65
  @params[:random_seed] = random_seed
61
66
  @params[:random_seed] ||= srand
62
- @rng = Random.new(@params[:random_seed])
63
67
  @estimators = nil
64
68
  @classes = nil
65
69
  @feature_importances = nil
70
+ @rng = Random.new(@params[:random_seed])
66
71
  end
67
72
 
68
73
  # Fit the model with given training data.
@@ -71,6 +76,8 @@ module SVMKit
71
76
  # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
72
77
  # @return [RandomForestClassifier] The learned classifier itself.
73
78
  def fit(x, y)
79
+ SVMKit::Validation.check_sample_array(x)
80
+ SVMKit::Validation.check_label_array(y)
74
81
  # Initialize some variables.
75
82
  n_samples, n_features = x.shape
76
83
  @params[:max_features] = n_features unless @params[:max_features].is_a?(Integer)
@@ -98,6 +105,7 @@ module SVMKit
98
105
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
99
106
  # @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
100
107
  def predict(x)
108
+ SVMKit::Validation.check_sample_array(x)
101
109
  n_samples, = x.shape
102
110
  n_classes = @classes.size
103
111
  classes_arr = @classes.to_a
@@ -117,6 +125,7 @@ module SVMKit
117
125
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
118
126
  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
119
127
  def predict_proba(x)
128
+ SVMKit::Validation.check_sample_array(x)
120
129
  n_samples, = x.shape
121
130
  n_classes = @classes.size
122
131
  classes_arr = @classes.to_a
@@ -136,6 +145,7 @@ module SVMKit
136
145
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
137
146
  # @return [Numo::Int32] (shape: [n_samples, n_estimators]) Leaf index for sample.
138
147
  def apply(x)
148
+ SVMKit::Validation.check_sample_array(x)
139
149
  Numo::Int32[*Array.new(@params[:n_estimators]) { |n| @estimators[n].apply(x) }].transpose
140
150
  end
141
151
 
@@ -19,6 +19,9 @@ module SVMKit
19
19
  # @param y_pred [Numo::Int32] (shape: [n_samples]) Predicted labels.
20
20
  # @return [Float] Mean accuracy
21
21
  def score(y_true, y_pred)
22
+ SVMKit::Validation.check_label_array(y_true)
23
+ SVMKit::Validation.check_label_array(y_pred)
24
+
22
25
  (y_true.to_a.map.with_index { |label, n| label == y_pred[n] ? 1 : 0 }).inject(:+) / y_true.size.to_f
23
26
  end
24
27
  end
@@ -23,6 +23,7 @@ module SVMKit
23
23
  #
24
24
  # @param average [String] The average type ('binary', 'micro', 'macro')
25
25
  def initialize(average: 'binary')
26
+ SVMKit::Validation.check_params_string(average: average)
26
27
  @average = average
27
28
  end
28
29
 
@@ -32,6 +33,9 @@ module SVMKit
32
33
  # @param y_pred [Numo::Int32] (shape: [n_samples]) Predicted labels.
33
34
  # @return [Float] Average F1-score
34
35
  def score(y_true, y_pred)
36
+ SVMKit::Validation.check_label_array(y_true)
37
+ SVMKit::Validation.check_label_array(y_pred)
38
+
35
39
  case @average
36
40
  when 'binary'
37
41
  f_score_each_class(y_true, y_pred).last
@@ -23,6 +23,7 @@ module SVMKit
23
23
  #
24
24
  # @param average [String] The average type ('binary', 'micro', 'macro')
25
25
  def initialize(average: 'binary')
26
+ SVMKit::Validation.check_params_string(average: average)
26
27
  @average = average
27
28
  end
28
29
 
@@ -32,6 +33,9 @@ module SVMKit
32
33
  # @param y_pred [Numo::Int32] (shape: [n_samples]) Predicted labels.
33
34
  # @return [Float] Average precision
34
35
  def score(y_true, y_pred)
36
+ SVMKit::Validation.check_label_array(y_true)
37
+ SVMKit::Validation.check_label_array(y_pred)
38
+
35
39
  case @average
36
40
  when 'binary'
37
41
  precision_each_class(y_true, y_pred).last
@@ -23,6 +23,7 @@ module SVMKit
23
23
  #
24
24
  # @param average [String] The average type ('binary', 'micro', 'macro')
25
25
  def initialize(average: 'binary')
26
+ SVMKit::Validation.check_params_string(average: average)
26
27
  @average = average
27
28
  end
28
29
 
@@ -32,6 +33,9 @@ module SVMKit
32
33
  # @param y_pred [Numo::Int32] (shape: [n_samples]) Predicted labels.
33
34
  # @return [Float] Average recall
34
35
  def score(y_true, y_pred)
36
+ SVMKit::Validation.check_label_array(y_true)
37
+ SVMKit::Validation.check_label_array(y_pred)
38
+
35
39
  case @average
36
40
  when 'binary'
37
41
  recall_each_class(y_true, y_pred).last
@@ -37,14 +37,18 @@ module SVMKit
37
37
  # @param n_components [Integer] The number of dimensions of the RBF kernel feature space.
38
38
  # @param random_seed [Integer] The seed value using to initialize the random generator.
39
39
  def initialize(gamma: 1.0, n_components: 128, random_seed: nil)
40
+ SVMKit::Validation.check_params_float(gamma: gamma)
41
+ SVMKit::Validation.check_params_integer(n_components: n_components)
42
+ SVMKit::Validation.check_params_type_or_nil(Integer, random_seed: random_seed)
43
+
40
44
  @params = {}
41
45
  @params[:gamma] = gamma
42
46
  @params[:n_components] = n_components
43
47
  @params[:random_seed] = random_seed
44
48
  @params[:random_seed] ||= srand
45
- @rng = Random.new(@params[:random_seed])
46
49
  @random_mat = nil
47
50
  @random_vec = nil
51
+ @rng = Random.new(@params[:random_seed])
48
52
  end
49
53
 
50
54
  # Fit the model with given training data.
@@ -55,6 +59,8 @@ module SVMKit
55
59
  # This method uses only the number of features of the data.
56
60
  # @return [RBF] The learned transformer itself.
57
61
  def fit(x, _y = nil)
62
+ SVMKit::Validation.check_sample_array(x)
63
+
58
64
  n_features = x.shape[1]
59
65
  @params[:n_components] = 2 * n_features if @params[:n_components] <= 0
60
66
  @random_mat = rand_normal([n_features, @params[:n_components]]) * (2.0 * @params[:gamma])**0.5
@@ -72,6 +78,8 @@ module SVMKit
72
78
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
73
79
  # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data
74
80
  def fit_transform(x, _y = nil)
81
+ SVMKit::Validation.check_sample_array(x)
82
+
75
83
  fit(x).transform(x)
76
84
  end
77
85
 
@@ -82,6 +90,8 @@ module SVMKit
82
90
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be transformed with the learned model.
83
91
  # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data.
84
92
  def transform(x)
93
+ SVMKit::Validation.check_sample_array(x)
94
+
85
95
  n_samples, = x.shape
86
96
  projection = x.dot(@random_mat) + @random_vec.tile(n_samples, 1)
87
97
  Numo::NMath.sin(projection) * ((2.0 / @params[:n_components])**0.5)
@@ -42,13 +42,17 @@ module SVMKit
42
42
  # @param max_iter [Integer] The maximum number of iterations.
43
43
  # @param random_seed [Integer] The seed value using to initialize the random generator.
44
44
  def initialize(reg_param: 1.0, max_iter: 1000, random_seed: nil)
45
+ SVMKit::Validation.check_params_float(reg_param: reg_param)
46
+ SVMKit::Validation.check_params_integer(max_iter: max_iter)
47
+ SVMKit::Validation.check_params_type_or_nil(Integer, random_seed: random_seed)
48
+
45
49
  @params = {}
46
50
  @params[:reg_param] = reg_param
47
51
  @params[:max_iter] = max_iter
48
52
  @params[:random_seed] = random_seed
49
53
  @params[:random_seed] ||= srand
50
54
  @weight_vec = nil
51
- @classes
55
+ @classes = nil
52
56
  @rng = Random.new(@params[:random_seed])
53
57
  end
54
58
 
@@ -59,6 +63,9 @@ module SVMKit
59
63
  # @param y [Numo::Int32] (shape: [n_training_samples]) The labels to be used for fitting the model.
60
64
  # @return [KernelSVC] The learned classifier itself.
61
65
  def fit(x, y)
66
+ SVMKit::Validation.check_sample_array(x)
67
+ SVMKit::Validation.check_label_array(y)
68
+
62
69
  @classes = Numo::Int32[*y.to_a.uniq.sort]
63
70
  n_classes = @classes.size
64
71
  _n_samples, n_features = x.shape
@@ -84,6 +91,8 @@ module SVMKit
84
91
  # The kernel matrix between testing samples and training samples to compute the scores.
85
92
  # @return [Numo::DFloat] (shape: [n_testing_samples, n_classes]) Confidence score per sample.
86
93
  def decision_function(x)
94
+ SVMKit::Validation.check_sample_array(x)
95
+
87
96
  x.dot(@weight_vec.transpose)
88
97
  end
89
98
 
@@ -93,6 +102,8 @@ module SVMKit
93
102
  # The kernel matrix between testing samples and training samples to predict the labels.
94
103
  # @return [Numo::Int32] (shape: [n_testing_samples]) Predicted class label per sample.
95
104
  def predict(x)
105
+ SVMKit::Validation.check_sample_array(x)
106
+
96
107
  return Numo::Int32.cast(decision_function(x).ge(0.0)) * 2 - 1 if @classes.size <= 2
97
108
 
98
109
  n_samples, = x.shape
@@ -100,16 +111,6 @@ module SVMKit
100
111
  Numo::Int32.asarray(Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] })
101
112
  end
102
113
 
103
- # Claculate the mean accuracy of the given testing data.
104
- #
105
- # @param x [Numo::DFloat] (shape: [n_testing_samples, n_training_samples])
106
- # The kernel matrix between testing samples and training samples.
107
- # @param y [Numo::Int32] (shape: [n_testing_samples]) True labels for testing data.
108
- # @return [Float] Mean accuracy
109
- def score(x, y)
110
- super
111
- end
112
-
113
114
  # Dump marshal data.
114
115
  # @return [Hash] The marshal data about KernelSVC.
115
116
  def marshal_dump
@@ -50,6 +50,11 @@ module SVMKit
50
50
  # @param random_seed [Integer] The seed value using to initialize the random generator.
51
51
  def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0,
52
52
  max_iter: 100, batch_size: 50, normalize: true, random_seed: nil)
53
+ SVMKit::Validation.check_params_float(reg_param: reg_param, bias_scale: bias_scale)
54
+ SVMKit::Validation.check_params_integer(max_iter: max_iter, batch_size: batch_size)
55
+ SVMKit::Validation.check_params_boolean(fit_bias: fit_bias, normalize: normalize)
56
+ SVMKit::Validation.check_params_type_or_nil(Integer, random_seed: random_seed)
57
+
53
58
  @params = {}
54
59
  @params[:reg_param] = reg_param
55
60
  @params[:fit_bias] = fit_bias
@@ -71,6 +76,9 @@ module SVMKit
71
76
  # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
72
77
  # @return [LogisticRegression] The learned classifier itself.
73
78
  def fit(x, y)
79
+ SVMKit::Validation.check_sample_array(x)
80
+ SVMKit::Validation.check_label_array(y)
81
+
74
82
  @classes = Numo::Int32[*y.to_a.uniq.sort]
75
83
  n_classes = @classes.size
76
84
  _n_samples, n_features = x.shape
@@ -79,14 +87,14 @@ module SVMKit
79
87
  @weight_vec = Numo::DFloat.zeros(n_classes, n_features)
80
88
  @bias_term = Numo::DFloat.zeros(n_classes)
81
89
  n_classes.times do |n|
82
- bin_y = Numo::Int32.cast(y.eq(@classes[n]))
90
+ bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
83
91
  weight, bias = binary_fit(x, bin_y)
84
92
  @weight_vec[n, true] = weight
85
93
  @bias_term[n] = bias
86
94
  end
87
95
  else
88
96
  negative_label = y.to_a.uniq.sort.first
89
- bin_y = Numo::Int32.cast(y.ne(negative_label))
97
+ bin_y = Numo::Int32.cast(y.ne(negative_label)) * 2 - 1
90
98
  @weight_vec, @bias_term = binary_fit(x, bin_y)
91
99
  end
92
100
 
@@ -98,6 +106,8 @@ module SVMKit
98
106
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
99
107
  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence score per sample.
100
108
  def decision_function(x)
109
+ SVMKit::Validation.check_sample_array(x)
110
+
101
111
  x.dot(@weight_vec.transpose) + @bias_term
102
112
  end
103
113
 
@@ -106,10 +116,12 @@ module SVMKit
106
116
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
107
117
  # @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
108
118
  def predict(x)
109
- return Numo::Int32.cast(decision_function(x).ge(0.5)) * 2 - 1 if @classes.size <= 2
119
+ SVMKit::Validation.check_sample_array(x)
120
+
121
+ return Numo::Int32.cast(predict_proba(x)[true, 1].ge(0.5)) * 2 - 1 if @classes.size <= 2
110
122
 
111
123
  n_samples, = x.shape
112
- decision_values = decision_function(x)
124
+ decision_values = predict_proba(x)
113
125
  Numo::Int32.asarray(Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] })
114
126
  end
115
127
 
@@ -118,6 +130,8 @@ module SVMKit
118
130
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
119
131
  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
120
132
  def predict_proba(x)
133
+ SVMKit::Validation.check_sample_array(x)
134
+
121
135
  proba = 1.0 / (Numo::NMath.exp(-decision_function(x)) + 1.0)
122
136
  return (proba.transpose / proba.sum(axis: 1)).transpose if @classes.size > 2
123
137
 
@@ -165,9 +179,9 @@ module SVMKit
165
179
  rand_ids.concat(subset_ids)
166
180
  # update the weight vector.
167
181
  df = samples[subset_ids, true].dot(weight_vec.transpose)
168
- coef = bin_y[subset_ids] / (Numo::NMath.exp(-bin_y[subset_ids] * df) + 1.0)
182
+ coef = bin_y[subset_ids] / (Numo::NMath.exp(-bin_y[subset_ids] * df) + 1.0) - bin_y[subset_ids]
169
183
  mean_vec = samples[subset_ids, true].transpose.dot(coef) / @params[:batch_size]
170
- weight_vec -= learning_rate(t) * (@params[:reg_param] * weight_vec - mean_vec)
184
+ weight_vec -= learning_rate(t) * (@params[:reg_param] * weight_vec + mean_vec)
171
185
  # scale the weight vector.
172
186
  normalize_weight_vec(weight_vec) if @params[:normalize]
173
187
  end
@@ -49,6 +49,11 @@ module SVMKit
49
49
  # @param random_seed [Integer] The seed value using to initialize the random generator.
50
50
  def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0,
51
51
  max_iter: 100, batch_size: 50, normalize: true, random_seed: nil)
52
+ SVMKit::Validation.check_params_float(reg_param: reg_param, bias_scale: bias_scale)
53
+ SVMKit::Validation.check_params_integer(max_iter: max_iter, batch_size: batch_size)
54
+ SVMKit::Validation.check_params_boolean(fit_bias: fit_bias, normalize: normalize)
55
+ SVMKit::Validation.check_params_type_or_nil(Integer, random_seed: random_seed)
56
+
52
57
  @params = {}
53
58
  @params[:reg_param] = reg_param
54
59
  @params[:fit_bias] = fit_bias
@@ -70,6 +75,9 @@ module SVMKit
70
75
  # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
71
76
  # @return [SVC] The learned classifier itself.
72
77
  def fit(x, y)
78
+ SVMKit::Validation.check_sample_array(x)
79
+ SVMKit::Validation.check_label_array(y)
80
+
73
81
  @classes = Numo::Int32[*y.to_a.uniq.sort]
74
82
  n_classes = @classes.size
75
83
  _n_samples, n_features = x.shape
@@ -97,6 +105,8 @@ module SVMKit
97
105
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
98
106
  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence score per sample.
99
107
  def decision_function(x)
108
+ SVMKit::Validation.check_sample_array(x)
109
+
100
110
  x.dot(@weight_vec.transpose) + @bias_term
101
111
  end
102
112
 
@@ -105,6 +115,8 @@ module SVMKit
105
115
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
106
116
  # @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
107
117
  def predict(x)
118
+ SVMKit::Validation.check_sample_array(x)
119
+
108
120
  return Numo::Int32.cast(decision_function(x).ge(0.0)) * 2 - 1 if @classes.size <= 2
109
121
 
110
122
  n_samples, = x.shape
@@ -38,6 +38,10 @@ module SVMKit
38
38
  # @param evaluator [Evaluator] The evaluator that calculates score of estimator results.
39
39
  # @param return_train_score [Boolean] The flag indicating whether to calculate the score of training dataset.
40
40
  def initialize(estimator: nil, splitter: nil, evaluator: nil, return_train_score: false)
41
+ SVMKit::Validation.check_params_type(SVMKit::Base::BaseEstimator, estimator: estimator)
42
+ SVMKit::Validation.check_params_type(SVMKit::Base::Splitter, splitter: splitter)
43
+ SVMKit::Validation.check_params_type_or_nil(SVMKit::Base::Evaluator, evaluator: evaluator)
44
+ SVMKit::Validation.check_params_boolean(return_train_score: return_train_score)
41
45
  @estimator = estimator
42
46
  @splitter = splitter
43
47
  @evaluator = evaluator
@@ -56,6 +60,8 @@ module SVMKit
56
60
  # * :train_score (Array<Float>) The scores of training dataset for each split. This option is nil if
57
61
  # the return_train_score is false.
58
62
  def perform(x, y)
63
+ SVMKit::Validation.check_sample_array(x)
64
+ SVMKit::Validation.check_label_array(y)
59
65
  # Initialize the report of cross validation.
60
66
  report = { test_score: [], train_score: nil, fit_time: [] }
61
67
  report[:train_score] = [] if @return_train_score
@@ -32,6 +32,10 @@ module SVMKit
32
32
  # @param shuffle [Boolean] The flag indicating whether to shuffle the dataset.
33
33
  # @param random_seed [Integer] The seed value using to initialize the random generator.
34
34
  def initialize(n_splits: 3, shuffle: false, random_seed: nil)
35
+ SVMKit::Validation.check_params_integer(n_splits: n_splits)
36
+ SVMKit::Validation.check_params_boolean(shuffle: shuffle)
37
+ SVMKit::Validation.check_params_type_or_nil(Integer, random_seed: random_seed)
38
+
35
39
  @n_splits = n_splits
36
40
  @shuffle = shuffle
37
41
  @random_seed = random_seed
@@ -43,11 +47,9 @@ module SVMKit
43
47
  #
44
48
  # @param x [Numo::DFloat] (shape: [n_samples, n_features])
45
49
  # The dataset to be used to generate data indices for K-fold cross validation.
46
- # @param y [Numo::Int32] (shape: [n_samples])
47
- # The labels to be used to generate data indices for stratified K-fold cross validation.
48
- # This argument exists to unify the interface between the K-fold methods, it is not used in the method.
49
50
  # @return [Array] The set of data indices for constructing the training and testing dataset in each fold.
50
- def split(x, y) # rubocop:disable Lint/UnusedMethodArgument
51
+ def split(x, _y = nil)
52
+ SVMKit::Validation.check_sample_array(x)
51
53
  # Initialize and check some variables.
52
54
  n_samples, = x.shape
53
55
  unless @n_splits.between?(2, n_samples)
@@ -32,6 +32,10 @@ module SVMKit
32
32
  # @param shuffle [Boolean] The flag indicating whether to shuffle the dataset.
33
33
  # @param random_seed [Integer] The seed value using to initialize the random generator.
34
34
  def initialize(n_splits: 3, shuffle: false, random_seed: nil)
35
+ SVMKit::Validation.check_params_integer(n_splits: n_splits)
36
+ SVMKit::Validation.check_params_boolean(shuffle: shuffle)
37
+ SVMKit::Validation.check_params_type_or_nil(Integer, random_seed: random_seed)
38
+
35
39
  @n_splits = n_splits
36
40
  @shuffle = shuffle
37
41
  @random_seed = random_seed
@@ -48,6 +52,8 @@ module SVMKit
48
52
  # The labels to be used to generate data indices for stratified K-fold cross validation.
49
53
  # @return [Array] The set of data indices for constructing the training and testing dataset in each fold.
50
54
  def split(x, y) # rubocop:disable Lint/UnusedMethodArgument
55
+ SVMKit::Validation.check_sample_array(x)
56
+ SVMKit::Validation.check_label_array(y)
51
57
  # Check the number of samples in each class.
52
58
  unless valid_n_splits?(y)
53
59
  raise ArgumentError,
@@ -33,6 +33,7 @@ module SVMKit
33
33
  #
34
34
  # @param estimator [Classifier] The (binary) classifier for construction a multi-class classifier.
35
35
  def initialize(estimator: nil)
36
+ SVMKit::Validation.check_params_type(SVMKit::Base::BaseEstimator, estimator: estimator)
36
37
  @params = {}
37
38
  @params[:estimator] = estimator
38
39
  @estimators = nil
@@ -45,6 +46,8 @@ module SVMKit
45
46
  # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
46
47
  # @return [OneVsRestClassifier] The learned classifier itself.
47
48
  def fit(x, y)
49
+ SVMKit::Validation.check_sample_array(x)
50
+ SVMKit::Validation.check_label_array(y)
48
51
  y_arr = y.to_a
49
52
  @classes = Numo::Int32.asarray(y_arr.uniq.sort)
50
53
  @estimators = @classes.to_a.map do |label|
@@ -59,6 +62,7 @@ module SVMKit
59
62
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
60
63
  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
61
64
  def decision_function(x)
65
+ SVMKit::Validation.check_sample_array(x)
62
66
  n_classes = @classes.size
63
67
  Numo::DFloat.asarray(Array.new(n_classes) { |m| @estimators[m].decision_function(x).to_a }).transpose
64
68
  end
@@ -68,6 +72,7 @@ module SVMKit
68
72
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
69
73
  # @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
70
74
  def predict(x)
75
+ SVMKit::Validation.check_sample_array(x)
71
76
  n_samples, = x.shape
72
77
  decision_values = decision_function(x)
73
78
  Numo::Int32.asarray(Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] })
@@ -16,6 +16,7 @@ module SVMKit
16
16
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
17
17
  # @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
18
18
  def predict(x)
19
+ SVMKit::Validation.check_sample_array(x)
19
20
  n_samples = x.shape.first
20
21
  decision_values = decision_function(x)
21
22
  Numo::Int32.asarray(Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] })
@@ -26,6 +27,7 @@ module SVMKit
26
27
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the log-probailities.
27
28
  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted log-probability of each class per sample.
28
29
  def predict_log_proba(x)
30
+ SVMKit::Validation.check_sample_array(x)
29
31
  n_samples, = x.shape
30
32
  log_likelihoods = decision_function(x)
31
33
  log_likelihoods - Numo::NMath.log(Numo::NMath.exp(log_likelihoods).sum(1)).reshape(n_samples, 1)
@@ -36,6 +38,7 @@ module SVMKit
36
38
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
37
39
  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
38
40
  def predict_proba(x)
41
+ SVMKit::Validation.check_sample_array(x)
39
42
  Numo::NMath.exp(predict_log_proba(x)).abs
40
43
  end
41
44
  end
@@ -75,6 +78,8 @@ module SVMKit
75
78
  # to be used for fitting the model.
76
79
  # @return [GaussianNB] The learned classifier itself.
77
80
  def fit(x, y)
81
+ SVMKit::Validation.check_sample_array(x)
82
+ SVMKit::Validation.check_label_array(y)
78
83
  n_samples, = x.shape
79
84
  @classes = Numo::Int32[*y.to_a.uniq.sort]
80
85
  @class_priors = Numo::DFloat[*@classes.to_a.map { |l| y.eq(l).count / n_samples.to_f }]
@@ -88,6 +93,7 @@ module SVMKit
88
93
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
89
94
  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
90
95
  def decision_function(x)
96
+ SVMKit::Validation.check_sample_array(x)
91
97
  n_classes = @classes.size
92
98
  log_likelihoods = Array.new(n_classes) do |l|
93
99
  Math.log(@class_priors[l]) - 0.5 * (
@@ -147,6 +153,7 @@ module SVMKit
147
153
  #
148
154
  # @param smoothing_param [Float] The Laplace smoothing parameter.
149
155
  def initialize(smoothing_param: 1.0)
156
+ SVMKit::Validation.check_params_float(smoothing_param: smoothing_param)
150
157
  @params = {}
151
158
  @params[:smoothing_param] = smoothing_param
152
159
  end
@@ -158,6 +165,8 @@ module SVMKit
158
165
  # to be used for fitting the model.
159
166
  # @return [MultinomialNB] The learned classifier itself.
160
167
  def fit(x, y)
168
+ SVMKit::Validation.check_sample_array(x)
169
+ SVMKit::Validation.check_label_array(y)
161
170
  n_samples, = x.shape
162
171
  @classes = Numo::Int32[*y.to_a.uniq.sort]
163
172
  @class_priors = Numo::DFloat[*@classes.to_a.map { |l| y.eq(l).count / n_samples.to_f }]
@@ -173,6 +182,7 @@ module SVMKit
173
182
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
174
183
  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
175
184
  def decision_function(x)
185
+ SVMKit::Validation.check_sample_array(x)
176
186
  n_classes = @classes.size
177
187
  bin_x = x.gt(0)
178
188
  log_likelihoods = Array.new(n_classes) do |l|
@@ -230,6 +240,7 @@ module SVMKit
230
240
  # @param smoothing_param [Float] The Laplace smoothing parameter.
231
241
  # @param bin_threshold [Float] The threshold for binarizing of features.
232
242
  def initialize(smoothing_param: 1.0, bin_threshold: 0.0)
243
+ SVMKit::Validation.check_params_float(smoothing_param: smoothing_param, bin_threshold: bin_threshold)
233
244
  @params = {}
234
245
  @params[:smoothing_param] = smoothing_param
235
246
  @params[:bin_threshold] = bin_threshold
@@ -242,6 +253,8 @@ module SVMKit
242
253
  # to be used for fitting the model.
243
254
  # @return [BernoulliNB] The learned classifier itself.
244
255
  def fit(x, y)
256
+ SVMKit::Validation.check_sample_array(x)
257
+ SVMKit::Validation.check_label_array(y)
245
258
  n_samples, = x.shape
246
259
  bin_x = Numo::DFloat[*x.gt(@params[:bin_threshold])]
247
260
  @classes = Numo::Int32[*y.to_a.uniq.sort]
@@ -260,6 +273,7 @@ module SVMKit
260
273
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
261
274
  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
262
275
  def decision_function(x)
276
+ SVMKit::Validation.check_sample_array(x)
263
277
  n_classes = @classes.size
264
278
  bin_x = Numo::DFloat[*x.gt(@params[:bin_threshold])]
265
279
  not_bin_x = Numo::DFloat[*x.le(@params[:bin_threshold])]
@@ -35,6 +35,7 @@ module SVMKit
35
35
  #
36
36
  # @param n_neighbors [Integer] The number of neighbors.
37
37
  def initialize(n_neighbors: 5)
38
+ SVMKit::Validation.check_params_integer(n_neighbors: n_neighbors)
38
39
  @params = {}
39
40
  @params[:n_neighbors] = n_neighbors
40
41
  @prototypes = nil
@@ -48,6 +49,8 @@ module SVMKit
48
49
  # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
49
50
  # @return [KNeighborsClassifier] The learned classifier itself.
50
51
  def fit(x, y)
52
+ SVMKit::Validation.check_sample_array(x)
53
+ SVMKit::Validation.check_label_array(y)
51
54
  @prototypes = Numo::DFloat.asarray(x.to_a)
52
55
  @labels = Numo::Int32.asarray(y.to_a)
53
56
  @classes = Numo::Int32.asarray(y.to_a.uniq.sort)
@@ -59,6 +62,7 @@ module SVMKit
59
62
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
60
63
  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
61
64
  def decision_function(x)
65
+ SVMKit::Validation.check_sample_array(x)
62
66
  distance_matrix = PairwiseMetric.euclidean_distance(x, @prototypes)
63
67
  n_samples, n_prototypes = distance_matrix.shape
64
68
  n_classes = @classes.size
@@ -76,6 +80,7 @@ module SVMKit
76
80
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
77
81
  # @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
78
82
  def predict(x)
83
+ SVMKit::Validation.check_sample_array(x)
79
84
  n_samples = x.shape.first
80
85
  decision_values = decision_function(x)
81
86
  Numo::Int32.asarray(Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] })
@@ -11,6 +11,8 @@ module SVMKit
11
11
  # @return [Numo::DFloat] (shape: [n_samples_x, n_samples_x] or [n_samples_x, n_samples_y] if y is given)
12
12
  def euclidean_distance(x, y = nil)
13
13
  y = x if y.nil?
14
+ SVMKit::Validation.check_sample_array(x)
15
+ SVMKit::Validation.check_sample_array(y)
14
16
  sum_x_vec = (x**2).sum(1)
15
17
  sum_y_vec = (y**2).sum(1)
16
18
  dot_xy_mat = x.dot(y.transpose)
@@ -29,6 +31,9 @@ module SVMKit
29
31
  def rbf_kernel(x, y = nil, gamma = nil)
30
32
  y = x if y.nil?
31
33
  gamma ||= 1.0 / x.shape[1]
34
+ SVMKit::Validation.check_sample_array(x)
35
+ SVMKit::Validation.check_sample_array(y)
36
+ SVMKit::Validation.check_params_float(gamma: gamma)
32
37
  distance_matrix = euclidean_distance(x, y)
33
38
  Numo::NMath.exp((distance_matrix**2) * -gamma)
34
39
  end
@@ -40,6 +45,8 @@ module SVMKit
40
45
  # @return [Numo::DFloat] (shape: [n_samples_x, n_samples_x] or [n_samples_x, n_samples_y] if y is given)
41
46
  def linear_kernel(x, y = nil)
42
47
  y = x if y.nil?
48
+ SVMKit::Validation.check_sample_array(x)
49
+ SVMKit::Validation.check_sample_array(y)
43
50
  x.dot(y.transpose)
44
51
  end
45
52
 
@@ -54,6 +61,10 @@ module SVMKit
54
61
  def polynomial_kernel(x, y = nil, degree = 3, gamma = nil, coef = 1)
55
62
  y = x if y.nil?
56
63
  gamma ||= 1.0 / x.shape[1]
64
+ SVMKit::Validation.check_sample_array(x)
65
+ SVMKit::Validation.check_sample_array(y)
66
+ SVMKit::Validation.check_params_float(gamma: gamma)
67
+ SVMKit::Validation.check_params_integer(degree: degree, coef: coef)
57
68
  (x.dot(y.transpose) * gamma + coef)**degree
58
69
  end
59
70
 
@@ -67,6 +78,10 @@ module SVMKit
67
78
  def sigmoid_kernel(x, y = nil, gamma = nil, coef = 1)
68
79
  y = x if y.nil?
69
80
  gamma ||= 1.0 / x.shape[1]
81
+ SVMKit::Validation.check_sample_array(x)
82
+ SVMKit::Validation.check_sample_array(y)
83
+ SVMKit::Validation.check_params_float(gamma: gamma)
84
+ SVMKit::Validation.check_params_integer(coef: coef)
70
85
  Numo::NMath.tanh(x.dot(y.transpose) * gamma + coef)
71
86
  end
72
87
  end
@@ -58,6 +58,12 @@ module SVMKit
58
58
  # @param random_seed [Integer] The seed value using to initialize the random generator.
59
59
  def initialize(n_factors: 2, loss: 'hinge', reg_param_bias: 1.0, reg_param_weight: 1.0, reg_param_factor: 1.0,
60
60
  init_std: 0.1, max_iter: 1000, batch_size: 10, random_seed: nil)
61
+ SVMKit::Validation.check_params_float(reg_param_bias: reg_param_bias, reg_param_weight: reg_param_weight,
62
+ reg_param_factor: reg_param_factor, init_std: init_std)
63
+ SVMKit::Validation.check_params_integer(n_factors: n_factors, max_iter: max_iter, batch_size: batch_size)
64
+ SVMKit::Validation.check_params_string(loss: loss)
65
+ SVMKit::Validation.check_params_type_or_nil(Integer, random_seed: random_seed)
66
+
61
67
  @params = {}
62
68
  @params[:n_factors] = n_factors
63
69
  @params[:loss] = loss
@@ -82,6 +88,9 @@ module SVMKit
82
88
  # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
83
89
  # @return [FactorizationMachineClassifier] The learned classifier itself.
84
90
  def fit(x, y)
91
+ SVMKit::Validation.check_sample_array(x)
92
+ SVMKit::Validation.check_label_array(y)
93
+
85
94
  @classes = Numo::Int32[*y.to_a.uniq.sort]
86
95
  n_classes = @classes.size
87
96
  _n_samples, n_features = x.shape
@@ -111,6 +120,7 @@ module SVMKit
111
120
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
112
121
  # @return [Numo::DFloat] (shape: [n_samples]) Confidence score per sample.
113
122
  def decision_function(x)
123
+ SVMKit::Validation.check_sample_array(x)
114
124
  linear_term = @bias_term + x.dot(@weight_vec.transpose)
115
125
  factor_term = if @classes.size <= 2
116
126
  0.5 * (@factor_mat.dot(x.transpose)**2 - (@factor_mat**2).dot(x.transpose**2)).sum
@@ -125,6 +135,7 @@ module SVMKit
125
135
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
126
136
  # @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
127
137
  def predict(x)
138
+ SVMKit::Validation.check_sample_array(x)
128
139
  return Numo::Int32.cast(decision_function(x).ge(0.0)) * 2 - 1 if @classes.size <= 2
129
140
 
130
141
  n_samples, = x.shape
@@ -137,6 +148,7 @@ module SVMKit
137
148
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
138
149
  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
139
150
  def predict_proba(x)
151
+ SVMKit::Validation.check_sample_array(x)
140
152
  proba = 1.0 / (Numo::NMath.exp(-decision_function(x)) + 1.0)
141
153
  return (proba.transpose / proba.sum(axis: 1)).transpose if @classes.size > 2
142
154
 
@@ -32,6 +32,7 @@ module SVMKit
32
32
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate L2-norms.
33
33
  # @return [L2Normalizer]
34
34
  def fit(x, _y = nil)
35
+ SVMKit::Validation.check_sample_array(x)
35
36
  @norm_vec = Numo::NMath.sqrt((x**2).sum(1))
36
37
  self
37
38
  end
@@ -43,6 +44,7 @@ module SVMKit
43
44
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate L2-norms.
44
45
  # @return [Numo::DFloat] The normalized samples.
45
46
  def fit_transform(x, _y = nil)
47
+ SVMKit::Validation.check_sample_array(x)
46
48
  fit(x)
47
49
  x / @norm_vec.tile(x.shape[1], 1).transpose
48
50
  end
@@ -28,6 +28,7 @@ module SVMKit
28
28
  #
29
29
  # @param feature_range [Array<Float>] The desired range of samples.
30
30
  def initialize(feature_range: [0.0, 1.0])
31
+ SVMKit::Validation.check_params_type(Array, feature_range: feature_range)
31
32
  @params = {}
32
33
  @params[:feature_range] = feature_range
33
34
  @min_vec = nil
@@ -41,6 +42,7 @@ module SVMKit
41
42
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate the minimum and maximum values.
42
43
  # @return [MinMaxScaler]
43
44
  def fit(x, _y = nil)
45
+ SVMKit::Validation.check_sample_array(x)
44
46
  @min_vec = x.min(0)
45
47
  @max_vec = x.max(0)
46
48
  self
@@ -53,6 +55,7 @@ module SVMKit
53
55
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate the minimum and maximum values.
54
56
  # @return [Numo::DFloat] The scaled samples.
55
57
  def fit_transform(x, _y = nil)
58
+ SVMKit::Validation.check_sample_array(x)
56
59
  fit(x).transform(x)
57
60
  end
58
61
 
@@ -61,6 +64,7 @@ module SVMKit
61
64
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be scaled.
62
65
  # @return [Numo::DFloat] The scaled samples.
63
66
  def transform(x)
67
+ SVMKit::Validation.check_sample_array(x)
64
68
  n_samples, = x.shape
65
69
  dif_vec = @max_vec - @min_vec
66
70
  nx = (x - @min_vec.tile(n_samples, 1)) / dif_vec.tile(n_samples, 1)
@@ -39,6 +39,7 @@ module SVMKit
39
39
  # The samples to calculate the mean values and standard deviations.
40
40
  # @return [StandardScaler]
41
41
  def fit(x, _y = nil)
42
+ SVMKit::Validation.check_sample_array(x)
42
43
  @mean_vec = x.mean(0)
43
44
  @std_vec = x.stddev(0)
44
45
  self
@@ -52,6 +53,7 @@ module SVMKit
52
53
  # The samples to calculate the mean values and standard deviations.
53
54
  # @return [Numo::DFloat] The scaled samples.
54
55
  def fit_transform(x, _y = nil)
56
+ SVMKit::Validation.check_sample_array(x)
55
57
  fit(x).transform(x)
56
58
  end
57
59
 
@@ -60,6 +62,7 @@ module SVMKit
60
62
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be scaled.
61
63
  # @return [Numo::DFloat] The scaled samples.
62
64
  def transform(x)
65
+ SVMKit::Validation.check_sample_array(x)
63
66
  n_samples, = x.shape
64
67
  (x - @mean_vec.tile(n_samples, 1)) / @std_vec.tile(n_samples, 1)
65
68
  end
@@ -54,6 +54,11 @@ module SVMKit
54
54
  # It is used to randomly determine the order of features when deciding spliting point.
55
55
  def initialize(criterion: 'gini', max_depth: nil, max_leaf_nodes: nil, min_samples_leaf: 1, max_features: nil,
56
56
  random_seed: nil)
57
+ SVMKit::Validation.check_params_type_or_nil(Integer, max_depth: max_depth, max_leaf_nodes: max_leaf_nodes,
58
+ max_features: max_features, random_seed: random_seed)
59
+ SVMKit::Validation.check_params_integer(min_samples_leaf: min_samples_leaf)
60
+ SVMKit::Validation.check_params_string(criterion: criterion)
61
+
57
62
  @params = {}
58
63
  @params[:criterion] = criterion
59
64
  @params[:max_depth] = max_depth
@@ -62,12 +67,12 @@ module SVMKit
62
67
  @params[:max_features] = max_features
63
68
  @params[:random_seed] = random_seed
64
69
  @params[:random_seed] ||= srand
65
- @rng = Random.new(@params[:random_seed])
66
70
  @tree = nil
67
71
  @classes = nil
68
72
  @feature_importances = nil
69
73
  @n_leaves = nil
70
74
  @leaf_labels = nil
75
+ @rng = Random.new(@params[:random_seed])
71
76
  end
72
77
 
73
78
  # Fit the model with given training data.
@@ -76,6 +81,8 @@ module SVMKit
76
81
  # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
77
82
  # @return [DecisionTreeClassifier] The learned classifier itself.
78
83
  def fit(x, y)
84
+ SVMKit::Validation.check_sample_array(x)
85
+ SVMKit::Validation.check_label_array(y)
79
86
  n_samples, n_features = x.shape
80
87
  @params[:max_features] = n_features unless @params[:max_features].is_a?(Integer)
81
88
  @params[:max_features] = [[1, @params[:max_features]].max, n_features].min
@@ -90,6 +97,7 @@ module SVMKit
90
97
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
91
98
  # @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
92
99
  def predict(x)
100
+ SVMKit::Validation.check_sample_array(x)
93
101
  @leaf_labels[apply(x)]
94
102
  end
95
103
 
@@ -98,6 +106,7 @@ module SVMKit
98
106
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
99
107
  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
100
108
  def predict_proba(x)
109
+ SVMKit::Validation.check_sample_array(x)
101
110
  probs = Numo::DFloat[*(Array.new(x.shape[0]) { |n| predict_at_node(@tree, x[n, true]) })]
102
111
  probs[true, @classes]
103
112
  end
@@ -107,14 +116,18 @@ module SVMKit
107
116
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
108
117
  # @return [Numo::Int32] (shape: [n_samples]) Leaf index for sample.
109
118
  def apply(x)
119
+ SVMKit::Validation.check_sample_array(x)
110
120
  Numo::Int32[*(Array.new(x.shape[0]) { |n| apply_at_node(@tree, x[n, true]) })]
111
121
  end
112
122
 
113
123
  # Dump marshal data.
114
124
  # @return [Hash] The marshal data about DecisionTreeClassifier
115
125
  def marshal_dump
116
- { params: @params, classes: @classes, tree: @tree,
117
- feature_importances: @feature_importances, leaf_labels: @leaf_labels,
126
+ { params: @params,
127
+ classes: @classes,
128
+ tree: @tree,
129
+ feature_importances: @feature_importances,
130
+ leaf_labels: @leaf_labels,
118
131
  rng: @rng }
119
132
  end
120
133
 
@@ -0,0 +1,55 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SVMKit
4
+ # @!visibility private
5
+ module Validation
6
+ module_function
7
+
8
+ # @!visibility private
9
+ def check_sample_array(x)
10
+ raise TypeError, 'Expect class of sample matrix to be Numo::DFloat' unless x.is_a?(Numo::DFloat)
11
+ raise ArgumentError, 'Expect sample matrix to be 2-D array' unless x.shape.size == 2
12
+ nil
13
+ end
14
+
15
+ # @!visibility private
16
+ def check_label_array(y)
17
+ raise TypeError, 'Expect class of label vector to be Numo::Int32' unless y.is_a?(Numo::Int32)
18
+ raise ArgumentError, 'Expect label vector to be 1-D arrray' unless y.shape.size == 1
19
+ nil
20
+ end
21
+
22
+ # @!visibility private
23
+ def check_params_type(type, params = {})
24
+ params.each { |k, v| raise TypeError, "Expect class of #{k} to be #{type}" unless v.is_a?(type) }
25
+ nil
26
+ end
27
+
28
+ # @!visibility private
29
+ def check_params_type_or_nil(type, params = {})
30
+ params.each { |k, v| raise TypeError, "Expect class of #{k} to be #{type} or nil" unless v.is_a?(type) || v.is_a?(NilClass) }
31
+ nil
32
+ end
33
+
34
+ # @!visibility private
35
+ def check_params_float(params = {})
36
+ check_params_type(Float, params)
37
+ end
38
+
39
+ # @!visibility private
40
+ def check_params_integer(params = {})
41
+ check_params_type(Integer, params)
42
+ end
43
+
44
+ # @!visibility private
45
+ def check_params_string(params = {})
46
+ check_params_type(String, params)
47
+ end
48
+
49
+ # @!visibility private
50
+ def check_params_boolean(params = {})
51
+ params.each { |k, v| raise TypeError, "Expect class of #{k} to be Boolean" unless v.is_a?(FalseClass) || v.is_a?(TrueClass) }
52
+ nil
53
+ end
54
+ end
55
+ end
@@ -3,5 +3,5 @@
3
3
  # SVMKit is a machine learning library in Ruby.
4
4
  module SVMKit
5
5
  # @!visibility private
6
- VERSION = '0.2.7'
6
+ VERSION = '0.2.8'
7
7
  end
data/lib/svmkit.rb CHANGED
@@ -3,6 +3,7 @@
3
3
  require 'numo/narray'
4
4
 
5
5
  require 'svmkit/version'
6
+ require 'svmkit/validation'
6
7
  require 'svmkit/pairwise_metric'
7
8
  require 'svmkit/dataset'
8
9
  require 'svmkit/base/base_estimator'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: svmkit
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.7
4
+ version: 0.2.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2018-04-01 00:00:00.000000000 Z
11
+ date: 2018-04-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: numo-narray
@@ -135,6 +135,7 @@ files:
135
135
  - lib/svmkit/preprocessing/min_max_scaler.rb
136
136
  - lib/svmkit/preprocessing/standard_scaler.rb
137
137
  - lib/svmkit/tree/decision_tree_classifier.rb
138
+ - lib/svmkit/validation.rb
138
139
  - lib/svmkit/version.rb
139
140
  - svmkit.gemspec
140
141
  homepage: https://github.com/yoshoku/svmkit