svmkit 0.2.7 → 0.2.8

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 46878b59860b61bae7b522fb02af984208609f56
4
- data.tar.gz: 6e889c6ad8382c654455a242d2f7f27de41de2d5
3
+ metadata.gz: bbc648db53b4285bd15ae00e2464c4376d027893
4
+ data.tar.gz: 28d9db47ae3053031f1643329cf02cd4e7d9c135
5
5
  SHA512:
6
- metadata.gz: cddb239bf0768e6d983ce942ed6a7bdda8b827fa2e73e51c1b4591e8af3c641339377417f844358159c3a2bdff51d2f5678ef07fe21fe86e51136289e69ea38c
7
- data.tar.gz: 64c2029c729de580765ad9ee89fd57821a40773721eac291201cb9b9f4c72697f5945c8f5259ed8d6a879f0b35dac841bcdd6d5322014c9cc78b9a42046dc310
6
+ metadata.gz: c504f010a70fc7a31afa4471096092adac9ff44de979d42d7277c63d737d517981e2ac6d7481ad42dc1c864f2a7756d39cd1d7697d2d4b1bca150d0a4eca3b8e
7
+ data.tar.gz: 8d89dc525ed37626a2d97e6fe3bebdacd4ec2945df285d8275cd0bd5df62c5ebc897dca67b91157df0bfc460fa987ed4098654ce6d164cb05ec46de4c6fe27af
data/HISTORY.md CHANGED
@@ -1,3 +1,8 @@
1
+ # 0.2.8
2
+ - Fixed bug on gradient calculation of Logistic Regression.
3
+ - Fixed to change accessor of params of estimators to read only.
4
+ - Added parameter validation.
5
+
1
6
  # 0.2.7
2
7
  - Fixed to support multiclass classifiction into LinearSVC, LogisticRegression, KernelSVC, and FactorizationMachineClassifier
3
8
 
@@ -7,7 +7,7 @@ module SVMKit
7
7
  module BaseEstimator
8
8
  # Return parameters about an estimator.
9
9
  # @return [Hash]
10
- attr_accessor :params
10
+ attr_reader :params
11
11
  end
12
12
  end
13
13
  end
@@ -20,6 +20,8 @@ module SVMKit
20
20
  # @param y [Numo::Int32] (shape: [n_samples]) True labels for testing data.
21
21
  # @return [Float] Mean accuracy
22
22
  def score(x, y)
23
+ SVMKit::Validation.check_sample_array(x)
24
+ SVMKit::Validation.check_label_array(y)
23
25
  evaluator = SVMKit::EvaluationMeasure::Accuracy.new
24
26
  evaluator.score(y, predict(x))
25
27
  end
@@ -50,6 +50,11 @@ module SVMKit
50
50
  # It is used to randomly determine the order of features when deciding spliting point.
51
51
  def initialize(n_estimators: 10, criterion: 'gini', max_depth: nil, max_leaf_nodes: nil, min_samples_leaf: 1,
52
52
  max_features: nil, random_seed: nil)
53
+ SVMKit::Validation.check_params_type_or_nil(Integer, max_depth: max_depth, max_leaf_nodes: max_leaf_nodes,
54
+ max_features: max_features, random_seed: random_seed)
55
+ SVMKit::Validation.check_params_integer(n_estimators: n_estimators, min_samples_leaf: min_samples_leaf)
56
+ SVMKit::Validation.check_params_string(criterion: criterion)
57
+
53
58
  @params = {}
54
59
  @params[:n_estimators] = n_estimators
55
60
  @params[:criterion] = criterion
@@ -59,10 +64,10 @@ module SVMKit
59
64
  @params[:max_features] = max_features
60
65
  @params[:random_seed] = random_seed
61
66
  @params[:random_seed] ||= srand
62
- @rng = Random.new(@params[:random_seed])
63
67
  @estimators = nil
64
68
  @classes = nil
65
69
  @feature_importances = nil
70
+ @rng = Random.new(@params[:random_seed])
66
71
  end
67
72
 
68
73
  # Fit the model with given training data.
@@ -71,6 +76,8 @@ module SVMKit
71
76
  # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
72
77
  # @return [RandomForestClassifier] The learned classifier itself.
73
78
  def fit(x, y)
79
+ SVMKit::Validation.check_sample_array(x)
80
+ SVMKit::Validation.check_label_array(y)
74
81
  # Initialize some variables.
75
82
  n_samples, n_features = x.shape
76
83
  @params[:max_features] = n_features unless @params[:max_features].is_a?(Integer)
@@ -98,6 +105,7 @@ module SVMKit
98
105
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
99
106
  # @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
100
107
  def predict(x)
108
+ SVMKit::Validation.check_sample_array(x)
101
109
  n_samples, = x.shape
102
110
  n_classes = @classes.size
103
111
  classes_arr = @classes.to_a
@@ -117,6 +125,7 @@ module SVMKit
117
125
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
118
126
  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
119
127
  def predict_proba(x)
128
+ SVMKit::Validation.check_sample_array(x)
120
129
  n_samples, = x.shape
121
130
  n_classes = @classes.size
122
131
  classes_arr = @classes.to_a
@@ -136,6 +145,7 @@ module SVMKit
136
145
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
137
146
  # @return [Numo::Int32] (shape: [n_samples, n_estimators]) Leaf index for sample.
138
147
  def apply(x)
148
+ SVMKit::Validation.check_sample_array(x)
139
149
  Numo::Int32[*Array.new(@params[:n_estimators]) { |n| @estimators[n].apply(x) }].transpose
140
150
  end
141
151
 
@@ -19,6 +19,9 @@ module SVMKit
19
19
  # @param y_pred [Numo::Int32] (shape: [n_samples]) Predicted labels.
20
20
  # @return [Float] Mean accuracy
21
21
  def score(y_true, y_pred)
22
+ SVMKit::Validation.check_label_array(y_true)
23
+ SVMKit::Validation.check_label_array(y_pred)
24
+
22
25
  (y_true.to_a.map.with_index { |label, n| label == y_pred[n] ? 1 : 0 }).inject(:+) / y_true.size.to_f
23
26
  end
24
27
  end
@@ -23,6 +23,7 @@ module SVMKit
23
23
  #
24
24
  # @param average [String] The average type ('binary', 'micro', 'macro')
25
25
  def initialize(average: 'binary')
26
+ SVMKit::Validation.check_params_string(average: average)
26
27
  @average = average
27
28
  end
28
29
 
@@ -32,6 +33,9 @@ module SVMKit
32
33
  # @param y_pred [Numo::Int32] (shape: [n_samples]) Predicted labels.
33
34
  # @return [Float] Average F1-score
34
35
  def score(y_true, y_pred)
36
+ SVMKit::Validation.check_label_array(y_true)
37
+ SVMKit::Validation.check_label_array(y_pred)
38
+
35
39
  case @average
36
40
  when 'binary'
37
41
  f_score_each_class(y_true, y_pred).last
@@ -23,6 +23,7 @@ module SVMKit
23
23
  #
24
24
  # @param average [String] The average type ('binary', 'micro', 'macro')
25
25
  def initialize(average: 'binary')
26
+ SVMKit::Validation.check_params_string(average: average)
26
27
  @average = average
27
28
  end
28
29
 
@@ -32,6 +33,9 @@ module SVMKit
32
33
  # @param y_pred [Numo::Int32] (shape: [n_samples]) Predicted labels.
33
34
  # @return [Float] Average precision
34
35
  def score(y_true, y_pred)
36
+ SVMKit::Validation.check_label_array(y_true)
37
+ SVMKit::Validation.check_label_array(y_pred)
38
+
35
39
  case @average
36
40
  when 'binary'
37
41
  precision_each_class(y_true, y_pred).last
@@ -23,6 +23,7 @@ module SVMKit
23
23
  #
24
24
  # @param average [String] The average type ('binary', 'micro', 'macro')
25
25
  def initialize(average: 'binary')
26
+ SVMKit::Validation.check_params_string(average: average)
26
27
  @average = average
27
28
  end
28
29
 
@@ -32,6 +33,9 @@ module SVMKit
32
33
  # @param y_pred [Numo::Int32] (shape: [n_samples]) Predicted labels.
33
34
  # @return [Float] Average recall
34
35
  def score(y_true, y_pred)
36
+ SVMKit::Validation.check_label_array(y_true)
37
+ SVMKit::Validation.check_label_array(y_pred)
38
+
35
39
  case @average
36
40
  when 'binary'
37
41
  recall_each_class(y_true, y_pred).last
@@ -37,14 +37,18 @@ module SVMKit
37
37
  # @param n_components [Integer] The number of dimensions of the RBF kernel feature space.
38
38
  # @param random_seed [Integer] The seed value using to initialize the random generator.
39
39
  def initialize(gamma: 1.0, n_components: 128, random_seed: nil)
40
+ SVMKit::Validation.check_params_float(gamma: gamma)
41
+ SVMKit::Validation.check_params_integer(n_components: n_components)
42
+ SVMKit::Validation.check_params_type_or_nil(Integer, random_seed: random_seed)
43
+
40
44
  @params = {}
41
45
  @params[:gamma] = gamma
42
46
  @params[:n_components] = n_components
43
47
  @params[:random_seed] = random_seed
44
48
  @params[:random_seed] ||= srand
45
- @rng = Random.new(@params[:random_seed])
46
49
  @random_mat = nil
47
50
  @random_vec = nil
51
+ @rng = Random.new(@params[:random_seed])
48
52
  end
49
53
 
50
54
  # Fit the model with given training data.
@@ -55,6 +59,8 @@ module SVMKit
55
59
  # This method uses only the number of features of the data.
56
60
  # @return [RBF] The learned transformer itself.
57
61
  def fit(x, _y = nil)
62
+ SVMKit::Validation.check_sample_array(x)
63
+
58
64
  n_features = x.shape[1]
59
65
  @params[:n_components] = 2 * n_features if @params[:n_components] <= 0
60
66
  @random_mat = rand_normal([n_features, @params[:n_components]]) * (2.0 * @params[:gamma])**0.5
@@ -72,6 +78,8 @@ module SVMKit
72
78
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
73
79
  # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data
74
80
  def fit_transform(x, _y = nil)
81
+ SVMKit::Validation.check_sample_array(x)
82
+
75
83
  fit(x).transform(x)
76
84
  end
77
85
 
@@ -82,6 +90,8 @@ module SVMKit
82
90
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be transformed with the learned model.
83
91
  # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data.
84
92
  def transform(x)
93
+ SVMKit::Validation.check_sample_array(x)
94
+
85
95
  n_samples, = x.shape
86
96
  projection = x.dot(@random_mat) + @random_vec.tile(n_samples, 1)
87
97
  Numo::NMath.sin(projection) * ((2.0 / @params[:n_components])**0.5)
@@ -42,13 +42,17 @@ module SVMKit
42
42
  # @param max_iter [Integer] The maximum number of iterations.
43
43
  # @param random_seed [Integer] The seed value using to initialize the random generator.
44
44
  def initialize(reg_param: 1.0, max_iter: 1000, random_seed: nil)
45
+ SVMKit::Validation.check_params_float(reg_param: reg_param)
46
+ SVMKit::Validation.check_params_integer(max_iter: max_iter)
47
+ SVMKit::Validation.check_params_type_or_nil(Integer, random_seed: random_seed)
48
+
45
49
  @params = {}
46
50
  @params[:reg_param] = reg_param
47
51
  @params[:max_iter] = max_iter
48
52
  @params[:random_seed] = random_seed
49
53
  @params[:random_seed] ||= srand
50
54
  @weight_vec = nil
51
- @classes
55
+ @classes = nil
52
56
  @rng = Random.new(@params[:random_seed])
53
57
  end
54
58
 
@@ -59,6 +63,9 @@ module SVMKit
59
63
  # @param y [Numo::Int32] (shape: [n_training_samples]) The labels to be used for fitting the model.
60
64
  # @return [KernelSVC] The learned classifier itself.
61
65
  def fit(x, y)
66
+ SVMKit::Validation.check_sample_array(x)
67
+ SVMKit::Validation.check_label_array(y)
68
+
62
69
  @classes = Numo::Int32[*y.to_a.uniq.sort]
63
70
  n_classes = @classes.size
64
71
  _n_samples, n_features = x.shape
@@ -84,6 +91,8 @@ module SVMKit
84
91
  # The kernel matrix between testing samples and training samples to compute the scores.
85
92
  # @return [Numo::DFloat] (shape: [n_testing_samples, n_classes]) Confidence score per sample.
86
93
  def decision_function(x)
94
+ SVMKit::Validation.check_sample_array(x)
95
+
87
96
  x.dot(@weight_vec.transpose)
88
97
  end
89
98
 
@@ -93,6 +102,8 @@ module SVMKit
93
102
  # The kernel matrix between testing samples and training samples to predict the labels.
94
103
  # @return [Numo::Int32] (shape: [n_testing_samples]) Predicted class label per sample.
95
104
  def predict(x)
105
+ SVMKit::Validation.check_sample_array(x)
106
+
96
107
  return Numo::Int32.cast(decision_function(x).ge(0.0)) * 2 - 1 if @classes.size <= 2
97
108
 
98
109
  n_samples, = x.shape
@@ -100,16 +111,6 @@ module SVMKit
100
111
  Numo::Int32.asarray(Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] })
101
112
  end
102
113
 
103
- # Claculate the mean accuracy of the given testing data.
104
- #
105
- # @param x [Numo::DFloat] (shape: [n_testing_samples, n_training_samples])
106
- # The kernel matrix between testing samples and training samples.
107
- # @param y [Numo::Int32] (shape: [n_testing_samples]) True labels for testing data.
108
- # @return [Float] Mean accuracy
109
- def score(x, y)
110
- super
111
- end
112
-
113
114
  # Dump marshal data.
114
115
  # @return [Hash] The marshal data about KernelSVC.
115
116
  def marshal_dump
@@ -50,6 +50,11 @@ module SVMKit
50
50
  # @param random_seed [Integer] The seed value using to initialize the random generator.
51
51
  def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0,
52
52
  max_iter: 100, batch_size: 50, normalize: true, random_seed: nil)
53
+ SVMKit::Validation.check_params_float(reg_param: reg_param, bias_scale: bias_scale)
54
+ SVMKit::Validation.check_params_integer(max_iter: max_iter, batch_size: batch_size)
55
+ SVMKit::Validation.check_params_boolean(fit_bias: fit_bias, normalize: normalize)
56
+ SVMKit::Validation.check_params_type_or_nil(Integer, random_seed: random_seed)
57
+
53
58
  @params = {}
54
59
  @params[:reg_param] = reg_param
55
60
  @params[:fit_bias] = fit_bias
@@ -71,6 +76,9 @@ module SVMKit
71
76
  # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
72
77
  # @return [LogisticRegression] The learned classifier itself.
73
78
  def fit(x, y)
79
+ SVMKit::Validation.check_sample_array(x)
80
+ SVMKit::Validation.check_label_array(y)
81
+
74
82
  @classes = Numo::Int32[*y.to_a.uniq.sort]
75
83
  n_classes = @classes.size
76
84
  _n_samples, n_features = x.shape
@@ -79,14 +87,14 @@ module SVMKit
79
87
  @weight_vec = Numo::DFloat.zeros(n_classes, n_features)
80
88
  @bias_term = Numo::DFloat.zeros(n_classes)
81
89
  n_classes.times do |n|
82
- bin_y = Numo::Int32.cast(y.eq(@classes[n]))
90
+ bin_y = Numo::Int32.cast(y.eq(@classes[n])) * 2 - 1
83
91
  weight, bias = binary_fit(x, bin_y)
84
92
  @weight_vec[n, true] = weight
85
93
  @bias_term[n] = bias
86
94
  end
87
95
  else
88
96
  negative_label = y.to_a.uniq.sort.first
89
- bin_y = Numo::Int32.cast(y.ne(negative_label))
97
+ bin_y = Numo::Int32.cast(y.ne(negative_label)) * 2 - 1
90
98
  @weight_vec, @bias_term = binary_fit(x, bin_y)
91
99
  end
92
100
 
@@ -98,6 +106,8 @@ module SVMKit
98
106
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
99
107
  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence score per sample.
100
108
  def decision_function(x)
109
+ SVMKit::Validation.check_sample_array(x)
110
+
101
111
  x.dot(@weight_vec.transpose) + @bias_term
102
112
  end
103
113
 
@@ -106,10 +116,12 @@ module SVMKit
106
116
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
107
117
  # @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
108
118
  def predict(x)
109
- return Numo::Int32.cast(decision_function(x).ge(0.5)) * 2 - 1 if @classes.size <= 2
119
+ SVMKit::Validation.check_sample_array(x)
120
+
121
+ return Numo::Int32.cast(predict_proba(x)[true, 1].ge(0.5)) * 2 - 1 if @classes.size <= 2
110
122
 
111
123
  n_samples, = x.shape
112
- decision_values = decision_function(x)
124
+ decision_values = predict_proba(x)
113
125
  Numo::Int32.asarray(Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] })
114
126
  end
115
127
 
@@ -118,6 +130,8 @@ module SVMKit
118
130
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
119
131
  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
120
132
  def predict_proba(x)
133
+ SVMKit::Validation.check_sample_array(x)
134
+
121
135
  proba = 1.0 / (Numo::NMath.exp(-decision_function(x)) + 1.0)
122
136
  return (proba.transpose / proba.sum(axis: 1)).transpose if @classes.size > 2
123
137
 
@@ -165,9 +179,9 @@ module SVMKit
165
179
  rand_ids.concat(subset_ids)
166
180
  # update the weight vector.
167
181
  df = samples[subset_ids, true].dot(weight_vec.transpose)
168
- coef = bin_y[subset_ids] / (Numo::NMath.exp(-bin_y[subset_ids] * df) + 1.0)
182
+ coef = bin_y[subset_ids] / (Numo::NMath.exp(-bin_y[subset_ids] * df) + 1.0) - bin_y[subset_ids]
169
183
  mean_vec = samples[subset_ids, true].transpose.dot(coef) / @params[:batch_size]
170
- weight_vec -= learning_rate(t) * (@params[:reg_param] * weight_vec - mean_vec)
184
+ weight_vec -= learning_rate(t) * (@params[:reg_param] * weight_vec + mean_vec)
171
185
  # scale the weight vector.
172
186
  normalize_weight_vec(weight_vec) if @params[:normalize]
173
187
  end
@@ -49,6 +49,11 @@ module SVMKit
49
49
  # @param random_seed [Integer] The seed value using to initialize the random generator.
50
50
  def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0,
51
51
  max_iter: 100, batch_size: 50, normalize: true, random_seed: nil)
52
+ SVMKit::Validation.check_params_float(reg_param: reg_param, bias_scale: bias_scale)
53
+ SVMKit::Validation.check_params_integer(max_iter: max_iter, batch_size: batch_size)
54
+ SVMKit::Validation.check_params_boolean(fit_bias: fit_bias, normalize: normalize)
55
+ SVMKit::Validation.check_params_type_or_nil(Integer, random_seed: random_seed)
56
+
52
57
  @params = {}
53
58
  @params[:reg_param] = reg_param
54
59
  @params[:fit_bias] = fit_bias
@@ -70,6 +75,9 @@ module SVMKit
70
75
  # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
71
76
  # @return [SVC] The learned classifier itself.
72
77
  def fit(x, y)
78
+ SVMKit::Validation.check_sample_array(x)
79
+ SVMKit::Validation.check_label_array(y)
80
+
73
81
  @classes = Numo::Int32[*y.to_a.uniq.sort]
74
82
  n_classes = @classes.size
75
83
  _n_samples, n_features = x.shape
@@ -97,6 +105,8 @@ module SVMKit
97
105
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
98
106
  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence score per sample.
99
107
  def decision_function(x)
108
+ SVMKit::Validation.check_sample_array(x)
109
+
100
110
  x.dot(@weight_vec.transpose) + @bias_term
101
111
  end
102
112
 
@@ -105,6 +115,8 @@ module SVMKit
105
115
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
106
116
  # @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
107
117
  def predict(x)
118
+ SVMKit::Validation.check_sample_array(x)
119
+
108
120
  return Numo::Int32.cast(decision_function(x).ge(0.0)) * 2 - 1 if @classes.size <= 2
109
121
 
110
122
  n_samples, = x.shape
@@ -38,6 +38,10 @@ module SVMKit
38
38
  # @param evaluator [Evaluator] The evaluator that calculates score of estimator results.
39
39
  # @param return_train_score [Boolean] The flag indicating whether to calculate the score of training dataset.
40
40
  def initialize(estimator: nil, splitter: nil, evaluator: nil, return_train_score: false)
41
+ SVMKit::Validation.check_params_type(SVMKit::Base::BaseEstimator, estimator: estimator)
42
+ SVMKit::Validation.check_params_type(SVMKit::Base::Splitter, splitter: splitter)
43
+ SVMKit::Validation.check_params_type_or_nil(SVMKit::Base::Evaluator, evaluator: evaluator)
44
+ SVMKit::Validation.check_params_boolean(return_train_score: return_train_score)
41
45
  @estimator = estimator
42
46
  @splitter = splitter
43
47
  @evaluator = evaluator
@@ -56,6 +60,8 @@ module SVMKit
56
60
  # * :train_score (Array<Float>) The scores of training dataset for each split. This option is nil if
57
61
  # the return_train_score is false.
58
62
  def perform(x, y)
63
+ SVMKit::Validation.check_sample_array(x)
64
+ SVMKit::Validation.check_label_array(y)
59
65
  # Initialize the report of cross validation.
60
66
  report = { test_score: [], train_score: nil, fit_time: [] }
61
67
  report[:train_score] = [] if @return_train_score
@@ -32,6 +32,10 @@ module SVMKit
32
32
  # @param shuffle [Boolean] The flag indicating whether to shuffle the dataset.
33
33
  # @param random_seed [Integer] The seed value using to initialize the random generator.
34
34
  def initialize(n_splits: 3, shuffle: false, random_seed: nil)
35
+ SVMKit::Validation.check_params_integer(n_splits: n_splits)
36
+ SVMKit::Validation.check_params_boolean(shuffle: shuffle)
37
+ SVMKit::Validation.check_params_type_or_nil(Integer, random_seed: random_seed)
38
+
35
39
  @n_splits = n_splits
36
40
  @shuffle = shuffle
37
41
  @random_seed = random_seed
@@ -43,11 +47,9 @@ module SVMKit
43
47
  #
44
48
  # @param x [Numo::DFloat] (shape: [n_samples, n_features])
45
49
  # The dataset to be used to generate data indices for K-fold cross validation.
46
- # @param y [Numo::Int32] (shape: [n_samples])
47
- # The labels to be used to generate data indices for stratified K-fold cross validation.
48
- # This argument exists to unify the interface between the K-fold methods, it is not used in the method.
49
50
  # @return [Array] The set of data indices for constructing the training and testing dataset in each fold.
50
- def split(x, y) # rubocop:disable Lint/UnusedMethodArgument
51
+ def split(x, _y = nil)
52
+ SVMKit::Validation.check_sample_array(x)
51
53
  # Initialize and check some variables.
52
54
  n_samples, = x.shape
53
55
  unless @n_splits.between?(2, n_samples)
@@ -32,6 +32,10 @@ module SVMKit
32
32
  # @param shuffle [Boolean] The flag indicating whether to shuffle the dataset.
33
33
  # @param random_seed [Integer] The seed value using to initialize the random generator.
34
34
  def initialize(n_splits: 3, shuffle: false, random_seed: nil)
35
+ SVMKit::Validation.check_params_integer(n_splits: n_splits)
36
+ SVMKit::Validation.check_params_boolean(shuffle: shuffle)
37
+ SVMKit::Validation.check_params_type_or_nil(Integer, random_seed: random_seed)
38
+
35
39
  @n_splits = n_splits
36
40
  @shuffle = shuffle
37
41
  @random_seed = random_seed
@@ -48,6 +52,8 @@ module SVMKit
48
52
  # The labels to be used to generate data indices for stratified K-fold cross validation.
49
53
  # @return [Array] The set of data indices for constructing the training and testing dataset in each fold.
50
54
  def split(x, y) # rubocop:disable Lint/UnusedMethodArgument
55
+ SVMKit::Validation.check_sample_array(x)
56
+ SVMKit::Validation.check_label_array(y)
51
57
  # Check the number of samples in each class.
52
58
  unless valid_n_splits?(y)
53
59
  raise ArgumentError,
@@ -33,6 +33,7 @@ module SVMKit
33
33
  #
34
34
  # @param estimator [Classifier] The (binary) classifier for construction a multi-class classifier.
35
35
  def initialize(estimator: nil)
36
+ SVMKit::Validation.check_params_type(SVMKit::Base::BaseEstimator, estimator: estimator)
36
37
  @params = {}
37
38
  @params[:estimator] = estimator
38
39
  @estimators = nil
@@ -45,6 +46,8 @@ module SVMKit
45
46
  # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
46
47
  # @return [OneVsRestClassifier] The learned classifier itself.
47
48
  def fit(x, y)
49
+ SVMKit::Validation.check_sample_array(x)
50
+ SVMKit::Validation.check_label_array(y)
48
51
  y_arr = y.to_a
49
52
  @classes = Numo::Int32.asarray(y_arr.uniq.sort)
50
53
  @estimators = @classes.to_a.map do |label|
@@ -59,6 +62,7 @@ module SVMKit
59
62
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
60
63
  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
61
64
  def decision_function(x)
65
+ SVMKit::Validation.check_sample_array(x)
62
66
  n_classes = @classes.size
63
67
  Numo::DFloat.asarray(Array.new(n_classes) { |m| @estimators[m].decision_function(x).to_a }).transpose
64
68
  end
@@ -68,6 +72,7 @@ module SVMKit
68
72
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
69
73
  # @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
70
74
  def predict(x)
75
+ SVMKit::Validation.check_sample_array(x)
71
76
  n_samples, = x.shape
72
77
  decision_values = decision_function(x)
73
78
  Numo::Int32.asarray(Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] })
@@ -16,6 +16,7 @@ module SVMKit
16
16
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
17
17
  # @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
18
18
  def predict(x)
19
+ SVMKit::Validation.check_sample_array(x)
19
20
  n_samples = x.shape.first
20
21
  decision_values = decision_function(x)
21
22
  Numo::Int32.asarray(Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] })
@@ -26,6 +27,7 @@ module SVMKit
26
27
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the log-probailities.
27
28
  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted log-probability of each class per sample.
28
29
  def predict_log_proba(x)
30
+ SVMKit::Validation.check_sample_array(x)
29
31
  n_samples, = x.shape
30
32
  log_likelihoods = decision_function(x)
31
33
  log_likelihoods - Numo::NMath.log(Numo::NMath.exp(log_likelihoods).sum(1)).reshape(n_samples, 1)
@@ -36,6 +38,7 @@ module SVMKit
36
38
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
37
39
  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
38
40
  def predict_proba(x)
41
+ SVMKit::Validation.check_sample_array(x)
39
42
  Numo::NMath.exp(predict_log_proba(x)).abs
40
43
  end
41
44
  end
@@ -75,6 +78,8 @@ module SVMKit
75
78
  # to be used for fitting the model.
76
79
  # @return [GaussianNB] The learned classifier itself.
77
80
  def fit(x, y)
81
+ SVMKit::Validation.check_sample_array(x)
82
+ SVMKit::Validation.check_label_array(y)
78
83
  n_samples, = x.shape
79
84
  @classes = Numo::Int32[*y.to_a.uniq.sort]
80
85
  @class_priors = Numo::DFloat[*@classes.to_a.map { |l| y.eq(l).count / n_samples.to_f }]
@@ -88,6 +93,7 @@ module SVMKit
88
93
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
89
94
  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
90
95
  def decision_function(x)
96
+ SVMKit::Validation.check_sample_array(x)
91
97
  n_classes = @classes.size
92
98
  log_likelihoods = Array.new(n_classes) do |l|
93
99
  Math.log(@class_priors[l]) - 0.5 * (
@@ -147,6 +153,7 @@ module SVMKit
147
153
  #
148
154
  # @param smoothing_param [Float] The Laplace smoothing parameter.
149
155
  def initialize(smoothing_param: 1.0)
156
+ SVMKit::Validation.check_params_float(smoothing_param: smoothing_param)
150
157
  @params = {}
151
158
  @params[:smoothing_param] = smoothing_param
152
159
  end
@@ -158,6 +165,8 @@ module SVMKit
158
165
  # to be used for fitting the model.
159
166
  # @return [MultinomialNB] The learned classifier itself.
160
167
  def fit(x, y)
168
+ SVMKit::Validation.check_sample_array(x)
169
+ SVMKit::Validation.check_label_array(y)
161
170
  n_samples, = x.shape
162
171
  @classes = Numo::Int32[*y.to_a.uniq.sort]
163
172
  @class_priors = Numo::DFloat[*@classes.to_a.map { |l| y.eq(l).count / n_samples.to_f }]
@@ -173,6 +182,7 @@ module SVMKit
173
182
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
174
183
  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
175
184
  def decision_function(x)
185
+ SVMKit::Validation.check_sample_array(x)
176
186
  n_classes = @classes.size
177
187
  bin_x = x.gt(0)
178
188
  log_likelihoods = Array.new(n_classes) do |l|
@@ -230,6 +240,7 @@ module SVMKit
230
240
  # @param smoothing_param [Float] The Laplace smoothing parameter.
231
241
  # @param bin_threshold [Float] The threshold for binarizing of features.
232
242
  def initialize(smoothing_param: 1.0, bin_threshold: 0.0)
243
+ SVMKit::Validation.check_params_float(smoothing_param: smoothing_param, bin_threshold: bin_threshold)
233
244
  @params = {}
234
245
  @params[:smoothing_param] = smoothing_param
235
246
  @params[:bin_threshold] = bin_threshold
@@ -242,6 +253,8 @@ module SVMKit
242
253
  # to be used for fitting the model.
243
254
  # @return [BernoulliNB] The learned classifier itself.
244
255
  def fit(x, y)
256
+ SVMKit::Validation.check_sample_array(x)
257
+ SVMKit::Validation.check_label_array(y)
245
258
  n_samples, = x.shape
246
259
  bin_x = Numo::DFloat[*x.gt(@params[:bin_threshold])]
247
260
  @classes = Numo::Int32[*y.to_a.uniq.sort]
@@ -260,6 +273,7 @@ module SVMKit
260
273
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
261
274
  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
262
275
  def decision_function(x)
276
+ SVMKit::Validation.check_sample_array(x)
263
277
  n_classes = @classes.size
264
278
  bin_x = Numo::DFloat[*x.gt(@params[:bin_threshold])]
265
279
  not_bin_x = Numo::DFloat[*x.le(@params[:bin_threshold])]
@@ -35,6 +35,7 @@ module SVMKit
35
35
  #
36
36
  # @param n_neighbors [Integer] The number of neighbors.
37
37
  def initialize(n_neighbors: 5)
38
+ SVMKit::Validation.check_params_integer(n_neighbors: n_neighbors)
38
39
  @params = {}
39
40
  @params[:n_neighbors] = n_neighbors
40
41
  @prototypes = nil
@@ -48,6 +49,8 @@ module SVMKit
48
49
  # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
49
50
  # @return [KNeighborsClassifier] The learned classifier itself.
50
51
  def fit(x, y)
52
+ SVMKit::Validation.check_sample_array(x)
53
+ SVMKit::Validation.check_label_array(y)
51
54
  @prototypes = Numo::DFloat.asarray(x.to_a)
52
55
  @labels = Numo::Int32.asarray(y.to_a)
53
56
  @classes = Numo::Int32.asarray(y.to_a.uniq.sort)
@@ -59,6 +62,7 @@ module SVMKit
59
62
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
60
63
  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
61
64
  def decision_function(x)
65
+ SVMKit::Validation.check_sample_array(x)
62
66
  distance_matrix = PairwiseMetric.euclidean_distance(x, @prototypes)
63
67
  n_samples, n_prototypes = distance_matrix.shape
64
68
  n_classes = @classes.size
@@ -76,6 +80,7 @@ module SVMKit
76
80
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
77
81
  # @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
78
82
  def predict(x)
83
+ SVMKit::Validation.check_sample_array(x)
79
84
  n_samples = x.shape.first
80
85
  decision_values = decision_function(x)
81
86
  Numo::Int32.asarray(Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] })
@@ -11,6 +11,8 @@ module SVMKit
11
11
  # @return [Numo::DFloat] (shape: [n_samples_x, n_samples_x] or [n_samples_x, n_samples_y] if y is given)
12
12
  def euclidean_distance(x, y = nil)
13
13
  y = x if y.nil?
14
+ SVMKit::Validation.check_sample_array(x)
15
+ SVMKit::Validation.check_sample_array(y)
14
16
  sum_x_vec = (x**2).sum(1)
15
17
  sum_y_vec = (y**2).sum(1)
16
18
  dot_xy_mat = x.dot(y.transpose)
@@ -29,6 +31,9 @@ module SVMKit
29
31
  def rbf_kernel(x, y = nil, gamma = nil)
30
32
  y = x if y.nil?
31
33
  gamma ||= 1.0 / x.shape[1]
34
+ SVMKit::Validation.check_sample_array(x)
35
+ SVMKit::Validation.check_sample_array(y)
36
+ SVMKit::Validation.check_params_float(gamma: gamma)
32
37
  distance_matrix = euclidean_distance(x, y)
33
38
  Numo::NMath.exp((distance_matrix**2) * -gamma)
34
39
  end
@@ -40,6 +45,8 @@ module SVMKit
40
45
  # @return [Numo::DFloat] (shape: [n_samples_x, n_samples_x] or [n_samples_x, n_samples_y] if y is given)
41
46
  def linear_kernel(x, y = nil)
42
47
  y = x if y.nil?
48
+ SVMKit::Validation.check_sample_array(x)
49
+ SVMKit::Validation.check_sample_array(y)
43
50
  x.dot(y.transpose)
44
51
  end
45
52
 
@@ -54,6 +61,10 @@ module SVMKit
54
61
  def polynomial_kernel(x, y = nil, degree = 3, gamma = nil, coef = 1)
55
62
  y = x if y.nil?
56
63
  gamma ||= 1.0 / x.shape[1]
64
+ SVMKit::Validation.check_sample_array(x)
65
+ SVMKit::Validation.check_sample_array(y)
66
+ SVMKit::Validation.check_params_float(gamma: gamma)
67
+ SVMKit::Validation.check_params_integer(degree: degree, coef: coef)
57
68
  (x.dot(y.transpose) * gamma + coef)**degree
58
69
  end
59
70
 
@@ -67,6 +78,10 @@ module SVMKit
67
78
  def sigmoid_kernel(x, y = nil, gamma = nil, coef = 1)
68
79
  y = x if y.nil?
69
80
  gamma ||= 1.0 / x.shape[1]
81
+ SVMKit::Validation.check_sample_array(x)
82
+ SVMKit::Validation.check_sample_array(y)
83
+ SVMKit::Validation.check_params_float(gamma: gamma)
84
+ SVMKit::Validation.check_params_integer(coef: coef)
70
85
  Numo::NMath.tanh(x.dot(y.transpose) * gamma + coef)
71
86
  end
72
87
  end
@@ -58,6 +58,12 @@ module SVMKit
58
58
  # @param random_seed [Integer] The seed value using to initialize the random generator.
59
59
  def initialize(n_factors: 2, loss: 'hinge', reg_param_bias: 1.0, reg_param_weight: 1.0, reg_param_factor: 1.0,
60
60
  init_std: 0.1, max_iter: 1000, batch_size: 10, random_seed: nil)
61
+ SVMKit::Validation.check_params_float(reg_param_bias: reg_param_bias, reg_param_weight: reg_param_weight,
62
+ reg_param_factor: reg_param_factor, init_std: init_std)
63
+ SVMKit::Validation.check_params_integer(n_factors: n_factors, max_iter: max_iter, batch_size: batch_size)
64
+ SVMKit::Validation.check_params_string(loss: loss)
65
+ SVMKit::Validation.check_params_type_or_nil(Integer, random_seed: random_seed)
66
+
61
67
  @params = {}
62
68
  @params[:n_factors] = n_factors
63
69
  @params[:loss] = loss
@@ -82,6 +88,9 @@ module SVMKit
82
88
  # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
83
89
  # @return [FactorizationMachineClassifier] The learned classifier itself.
84
90
  def fit(x, y)
91
+ SVMKit::Validation.check_sample_array(x)
92
+ SVMKit::Validation.check_label_array(y)
93
+
85
94
  @classes = Numo::Int32[*y.to_a.uniq.sort]
86
95
  n_classes = @classes.size
87
96
  _n_samples, n_features = x.shape
@@ -111,6 +120,7 @@ module SVMKit
111
120
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
112
121
  # @return [Numo::DFloat] (shape: [n_samples]) Confidence score per sample.
113
122
  def decision_function(x)
123
+ SVMKit::Validation.check_sample_array(x)
114
124
  linear_term = @bias_term + x.dot(@weight_vec.transpose)
115
125
  factor_term = if @classes.size <= 2
116
126
  0.5 * (@factor_mat.dot(x.transpose)**2 - (@factor_mat**2).dot(x.transpose**2)).sum
@@ -125,6 +135,7 @@ module SVMKit
125
135
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
126
136
  # @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
127
137
  def predict(x)
138
+ SVMKit::Validation.check_sample_array(x)
128
139
  return Numo::Int32.cast(decision_function(x).ge(0.0)) * 2 - 1 if @classes.size <= 2
129
140
 
130
141
  n_samples, = x.shape
@@ -137,6 +148,7 @@ module SVMKit
137
148
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
138
149
  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
139
150
  def predict_proba(x)
151
+ SVMKit::Validation.check_sample_array(x)
140
152
  proba = 1.0 / (Numo::NMath.exp(-decision_function(x)) + 1.0)
141
153
  return (proba.transpose / proba.sum(axis: 1)).transpose if @classes.size > 2
142
154
 
@@ -32,6 +32,7 @@ module SVMKit
32
32
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate L2-norms.
33
33
  # @return [L2Normalizer]
34
34
  def fit(x, _y = nil)
35
+ SVMKit::Validation.check_sample_array(x)
35
36
  @norm_vec = Numo::NMath.sqrt((x**2).sum(1))
36
37
  self
37
38
  end
@@ -43,6 +44,7 @@ module SVMKit
43
44
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate L2-norms.
44
45
  # @return [Numo::DFloat] The normalized samples.
45
46
  def fit_transform(x, _y = nil)
47
+ SVMKit::Validation.check_sample_array(x)
46
48
  fit(x)
47
49
  x / @norm_vec.tile(x.shape[1], 1).transpose
48
50
  end
@@ -28,6 +28,7 @@ module SVMKit
28
28
  #
29
29
  # @param feature_range [Array<Float>] The desired range of samples.
30
30
  def initialize(feature_range: [0.0, 1.0])
31
+ SVMKit::Validation.check_params_type(Array, feature_range: feature_range)
31
32
  @params = {}
32
33
  @params[:feature_range] = feature_range
33
34
  @min_vec = nil
@@ -41,6 +42,7 @@ module SVMKit
41
42
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate the minimum and maximum values.
42
43
  # @return [MinMaxScaler]
43
44
  def fit(x, _y = nil)
45
+ SVMKit::Validation.check_sample_array(x)
44
46
  @min_vec = x.min(0)
45
47
  @max_vec = x.max(0)
46
48
  self
@@ -53,6 +55,7 @@ module SVMKit
53
55
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate the minimum and maximum values.
54
56
  # @return [Numo::DFloat] The scaled samples.
55
57
  def fit_transform(x, _y = nil)
58
+ SVMKit::Validation.check_sample_array(x)
56
59
  fit(x).transform(x)
57
60
  end
58
61
 
@@ -61,6 +64,7 @@ module SVMKit
61
64
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be scaled.
62
65
  # @return [Numo::DFloat] The scaled samples.
63
66
  def transform(x)
67
+ SVMKit::Validation.check_sample_array(x)
64
68
  n_samples, = x.shape
65
69
  dif_vec = @max_vec - @min_vec
66
70
  nx = (x - @min_vec.tile(n_samples, 1)) / dif_vec.tile(n_samples, 1)
@@ -39,6 +39,7 @@ module SVMKit
39
39
  # The samples to calculate the mean values and standard deviations.
40
40
  # @return [StandardScaler]
41
41
  def fit(x, _y = nil)
42
+ SVMKit::Validation.check_sample_array(x)
42
43
  @mean_vec = x.mean(0)
43
44
  @std_vec = x.stddev(0)
44
45
  self
@@ -52,6 +53,7 @@ module SVMKit
52
53
  # The samples to calculate the mean values and standard deviations.
53
54
  # @return [Numo::DFloat] The scaled samples.
54
55
  def fit_transform(x, _y = nil)
56
+ SVMKit::Validation.check_sample_array(x)
55
57
  fit(x).transform(x)
56
58
  end
57
59
 
@@ -60,6 +62,7 @@ module SVMKit
60
62
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be scaled.
61
63
  # @return [Numo::DFloat] The scaled samples.
62
64
  def transform(x)
65
+ SVMKit::Validation.check_sample_array(x)
63
66
  n_samples, = x.shape
64
67
  (x - @mean_vec.tile(n_samples, 1)) / @std_vec.tile(n_samples, 1)
65
68
  end
@@ -54,6 +54,11 @@ module SVMKit
54
54
  # It is used to randomly determine the order of features when deciding spliting point.
55
55
  def initialize(criterion: 'gini', max_depth: nil, max_leaf_nodes: nil, min_samples_leaf: 1, max_features: nil,
56
56
  random_seed: nil)
57
+ SVMKit::Validation.check_params_type_or_nil(Integer, max_depth: max_depth, max_leaf_nodes: max_leaf_nodes,
58
+ max_features: max_features, random_seed: random_seed)
59
+ SVMKit::Validation.check_params_integer(min_samples_leaf: min_samples_leaf)
60
+ SVMKit::Validation.check_params_string(criterion: criterion)
61
+
57
62
  @params = {}
58
63
  @params[:criterion] = criterion
59
64
  @params[:max_depth] = max_depth
@@ -62,12 +67,12 @@ module SVMKit
62
67
  @params[:max_features] = max_features
63
68
  @params[:random_seed] = random_seed
64
69
  @params[:random_seed] ||= srand
65
- @rng = Random.new(@params[:random_seed])
66
70
  @tree = nil
67
71
  @classes = nil
68
72
  @feature_importances = nil
69
73
  @n_leaves = nil
70
74
  @leaf_labels = nil
75
+ @rng = Random.new(@params[:random_seed])
71
76
  end
72
77
 
73
78
  # Fit the model with given training data.
@@ -76,6 +81,8 @@ module SVMKit
76
81
  # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
77
82
  # @return [DecisionTreeClassifier] The learned classifier itself.
78
83
  def fit(x, y)
84
+ SVMKit::Validation.check_sample_array(x)
85
+ SVMKit::Validation.check_label_array(y)
79
86
  n_samples, n_features = x.shape
80
87
  @params[:max_features] = n_features unless @params[:max_features].is_a?(Integer)
81
88
  @params[:max_features] = [[1, @params[:max_features]].max, n_features].min
@@ -90,6 +97,7 @@ module SVMKit
90
97
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
91
98
  # @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
92
99
  def predict(x)
100
+ SVMKit::Validation.check_sample_array(x)
93
101
  @leaf_labels[apply(x)]
94
102
  end
95
103
 
@@ -98,6 +106,7 @@ module SVMKit
98
106
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
99
107
  # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
100
108
  def predict_proba(x)
109
+ SVMKit::Validation.check_sample_array(x)
101
110
  probs = Numo::DFloat[*(Array.new(x.shape[0]) { |n| predict_at_node(@tree, x[n, true]) })]
102
111
  probs[true, @classes]
103
112
  end
@@ -107,14 +116,18 @@ module SVMKit
107
116
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
108
117
  # @return [Numo::Int32] (shape: [n_samples]) Leaf index for sample.
109
118
  def apply(x)
119
+ SVMKit::Validation.check_sample_array(x)
110
120
  Numo::Int32[*(Array.new(x.shape[0]) { |n| apply_at_node(@tree, x[n, true]) })]
111
121
  end
112
122
 
113
123
  # Dump marshal data.
114
124
  # @return [Hash] The marshal data about DecisionTreeClassifier
115
125
  def marshal_dump
116
- { params: @params, classes: @classes, tree: @tree,
117
- feature_importances: @feature_importances, leaf_labels: @leaf_labels,
126
+ { params: @params,
127
+ classes: @classes,
128
+ tree: @tree,
129
+ feature_importances: @feature_importances,
130
+ leaf_labels: @leaf_labels,
118
131
  rng: @rng }
119
132
  end
120
133
 
@@ -0,0 +1,55 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SVMKit
4
+ # @!visibility private
5
+ module Validation
6
+ module_function
7
+
8
+ # @!visibility private
9
+ def check_sample_array(x)
10
+ raise TypeError, 'Expect class of sample matrix to be Numo::DFloat' unless x.is_a?(Numo::DFloat)
11
+ raise ArgumentError, 'Expect sample matrix to be 2-D array' unless x.shape.size == 2
12
+ nil
13
+ end
14
+
15
+ # @!visibility private
16
+ def check_label_array(y)
17
+ raise TypeError, 'Expect class of label vector to be Numo::Int32' unless y.is_a?(Numo::Int32)
18
+ raise ArgumentError, 'Expect label vector to be 1-D arrray' unless y.shape.size == 1
19
+ nil
20
+ end
21
+
22
+ # @!visibility private
23
+ def check_params_type(type, params = {})
24
+ params.each { |k, v| raise TypeError, "Expect class of #{k} to be #{type}" unless v.is_a?(type) }
25
+ nil
26
+ end
27
+
28
+ # @!visibility private
29
+ def check_params_type_or_nil(type, params = {})
30
+ params.each { |k, v| raise TypeError, "Expect class of #{k} to be #{type} or nil" unless v.is_a?(type) || v.is_a?(NilClass) }
31
+ nil
32
+ end
33
+
34
+ # @!visibility private
35
+ def check_params_float(params = {})
36
+ check_params_type(Float, params)
37
+ end
38
+
39
+ # @!visibility private
40
+ def check_params_integer(params = {})
41
+ check_params_type(Integer, params)
42
+ end
43
+
44
+ # @!visibility private
45
+ def check_params_string(params = {})
46
+ check_params_type(String, params)
47
+ end
48
+
49
+ # @!visibility private
50
+ def check_params_boolean(params = {})
51
+ params.each { |k, v| raise TypeError, "Expect class of #{k} to be Boolean" unless v.is_a?(FalseClass) || v.is_a?(TrueClass) }
52
+ nil
53
+ end
54
+ end
55
+ end
@@ -3,5 +3,5 @@
3
3
  # SVMKit is a machine learning library in Ruby.
4
4
  module SVMKit
5
5
  # @!visibility private
6
- VERSION = '0.2.7'
6
+ VERSION = '0.2.8'
7
7
  end
data/lib/svmkit.rb CHANGED
@@ -3,6 +3,7 @@
3
3
  require 'numo/narray'
4
4
 
5
5
  require 'svmkit/version'
6
+ require 'svmkit/validation'
6
7
  require 'svmkit/pairwise_metric'
7
8
  require 'svmkit/dataset'
8
9
  require 'svmkit/base/base_estimator'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: svmkit
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.7
4
+ version: 0.2.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2018-04-01 00:00:00.000000000 Z
11
+ date: 2018-04-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: numo-narray
@@ -135,6 +135,7 @@ files:
135
135
  - lib/svmkit/preprocessing/min_max_scaler.rb
136
136
  - lib/svmkit/preprocessing/standard_scaler.rb
137
137
  - lib/svmkit/tree/decision_tree_classifier.rb
138
+ - lib/svmkit/validation.rb
138
139
  - lib/svmkit/version.rb
139
140
  - svmkit.gemspec
140
141
  homepage: https://github.com/yoshoku/svmkit