svmkit 0.2.9 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (40) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +2 -2
  3. data/HISTORY.md +11 -0
  4. data/lib/svmkit/base/classifier.rb +4 -1
  5. data/lib/svmkit/base/regressor.rb +34 -0
  6. data/lib/svmkit/ensemble/random_forest_classifier.rb +1 -0
  7. data/lib/svmkit/evaluation_measure/accuracy.rb +2 -1
  8. data/lib/svmkit/evaluation_measure/f_score.rb +2 -1
  9. data/lib/svmkit/evaluation_measure/log_loss.rb +4 -2
  10. data/lib/svmkit/evaluation_measure/mean_absolute_error.rb +30 -0
  11. data/lib/svmkit/evaluation_measure/mean_squared_error.rb +30 -0
  12. data/lib/svmkit/evaluation_measure/precision.rb +2 -1
  13. data/lib/svmkit/evaluation_measure/r2_score.rb +44 -0
  14. data/lib/svmkit/evaluation_measure/recall.rb +2 -1
  15. data/lib/svmkit/kernel_approximation/rbf.rb +1 -0
  16. data/lib/svmkit/kernel_machine/kernel_svc.rb +3 -1
  17. data/lib/svmkit/linear_model/logistic_regression.rb +2 -1
  18. data/lib/svmkit/linear_model/svc.rb +3 -1
  19. data/lib/svmkit/linear_model/svr.rb +172 -0
  20. data/lib/svmkit/model_selection/cross_validation.rb +19 -7
  21. data/lib/svmkit/model_selection/k_fold.rb +1 -0
  22. data/lib/svmkit/model_selection/stratified_k_fold.rb +1 -0
  23. data/lib/svmkit/multiclass/one_vs_rest_classifier.rb +1 -0
  24. data/lib/svmkit/naive_bayes/naive_bayes.rb +1 -0
  25. data/lib/svmkit/nearest_neighbors/k_neighbors_classifier.rb +1 -0
  26. data/lib/svmkit/nearest_neighbors/k_neighbors_regressor.rb +94 -0
  27. data/lib/svmkit/pairwise_metric.rb +2 -0
  28. data/lib/svmkit/polynomial_model/factorization_machine_classifier.rb +2 -1
  29. data/lib/svmkit/preprocessing/l2_normalizer.rb +1 -0
  30. data/lib/svmkit/preprocessing/label_encoder.rb +1 -0
  31. data/lib/svmkit/preprocessing/min_max_scaler.rb +1 -0
  32. data/lib/svmkit/preprocessing/one_hot_encoder.rb +1 -0
  33. data/lib/svmkit/preprocessing/standard_scaler.rb +1 -0
  34. data/lib/svmkit/probabilistic_output.rb +1 -1
  35. data/lib/svmkit/tree/decision_tree_classifier.rb +1 -1
  36. data/lib/svmkit/validation.rb +12 -0
  37. data/lib/svmkit/version.rb +1 -1
  38. data/lib/svmkit.rb +6 -0
  39. data/svmkit.gemspec +1 -10
  40. metadata +10 -10
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 533508a3afd82d2bae3ddea3a5669f6d389688155d44649fd3eafaaff8207e0f
4
- data.tar.gz: 43ff09b3bab72b68bc7a6b3740902be64508496337a4cde61057d33b91d0f349
3
+ metadata.gz: 855d3ac2dcfbfde9eb82a4661f17cebb75b4b7c57ba37ee26a8aa03d0f1ccab8
4
+ data.tar.gz: 13ec3e84fd6f4fcd973d164b3d6f395c024e42095eae63819bea6ef4179697d9
5
5
  SHA512:
6
- metadata.gz: e1c1bed8269d3c768d75bd8a5e731b5d2da689ef7a235a70c5ea87090aac79889c9fe0a004eca73c3015aae42d068f44b2b1e3a61a03b641607b2909441513b6
7
- data.tar.gz: 80a18ca4ec7eb2740148829024f0625c835f24b771bb321168d0cc3233d8e152257b5515355d99a968dc25a670f9a69f3e30b42bf190757206a64bbcd2babcd6
6
+ metadata.gz: be8b4e4528e70ab99c8b9f1ad0d93d717b0359d30ca9b142b4d5cb44b6b3875cc9d97f199021ab48ab3b9cc1e635f37c14dbb15efe81b55de673d83b65cc10ab
7
+ data.tar.gz: 9c8ae85dc3ca8dd7fe6bd15bd4b3eb46b775b32923691a12f9621032bcbffea7a3405404a9b8924f68a344d771cac56f439e6e2c024f68deacd00d7358c3c4f4
data/.rubocop.yml CHANGED
@@ -1,7 +1,7 @@
1
1
  inherit_from: .rubocop_todo.yml
2
2
 
3
3
  AllCops:
4
- TargetRubyVersion: 2.2
4
+ TargetRubyVersion: 2.1
5
5
  DisplayCopNames: true
6
6
  DisplayStyleGuide: true
7
7
 
@@ -9,7 +9,7 @@ Documentation:
9
9
  Enabled: false
10
10
 
11
11
  Metrics/LineLength:
12
- Max: 140
12
+ Max: 145
13
13
  IgnoredPatterns: ['(\A|\s)#']
14
14
 
15
15
  Metrics/ModuleLength:
data/HISTORY.md CHANGED
@@ -1,3 +1,14 @@
1
+ # 0.3.0
2
+ - Add class for Support Vector Regression.
3
+ - Add class for K-Nearest Neighbor Regression.
4
+ - Add class for evaluating coefficient of determination.
5
+ - Add class for evaluating mean squared error.
6
+ - Add class for evaluating mean absolute error.
7
+ - Fix to use min method instead of sort and first methods.
8
+ - Fix cross validation class to be able to use for regression problem.
9
+ - Fix some typos on document.
10
+ - Rename spec filename for Factorization Machine classifier.
11
+
1
12
  # 0.2.9
2
13
  - Add predict_proba method to SVC and KernelSVC.
3
14
  - Add class for evaluating logarithmic loss.
@@ -1,5 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'svmkit/validation'
4
+ require 'svmkit/evaluation_measure/accuracy'
5
+
3
6
  module SVMKit
4
7
  module Base
5
8
  # Module for all classifiers in SVMKit.
@@ -14,7 +17,7 @@ module SVMKit
14
17
  raise NotImplementedError, "#{__method__} has to be implemented in #{self.class}."
15
18
  end
16
19
 
17
- # Claculate the mean accuracy of the given testing data.
20
+ # Calculate the mean accuracy of the given testing data.
18
21
  #
19
22
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) Testing data.
20
23
  # @param y [Numo::Int32] (shape: [n_samples]) True labels for testing data.
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'svmkit/validation'
4
+ require 'svmkit/evaluation_measure/r2_score'
5
+
6
+ module SVMKit
7
+ module Base
8
+ # Module for all regressors in SVMKit.
9
+ module Regressor
10
+ # An abstract method for fitting a model.
11
+ def fit
12
+ raise NotImplementedError, "#{__method__} has to be implemented in #{self.class}."
13
+ end
14
+
15
+ # An abstract method for predicting labels.
16
+ def predict
17
+ raise NotImplementedError, "#{__method__} has to be implemented in #{self.class}."
18
+ end
19
+
20
+ # Calculate the coefficient of determination for the given testing data.
21
+ #
22
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) Testing data.
23
+ # @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) Target values for testing data.
24
+ # @return [Float] Coefficient of determination
25
+ def score(x, y)
26
+ SVMKit::Validation.check_sample_array(x)
27
+ SVMKit::Validation.check_tvalue_array(y)
28
+ SVMKit::Validation.check_sample_tvalue_size(x, y)
29
+ evaluator = SVMKit::EvaluationMeasure::R2Score.new
30
+ evaluator.score(y, predict(x))
31
+ end
32
+ end
33
+ end
34
+ end
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'svmkit/validation'
3
4
  require 'svmkit/base/base_estimator'
4
5
  require 'svmkit/base/classifier'
5
6
 
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'svmkit/validation'
3
4
  require 'svmkit/base/evaluator'
4
5
 
5
6
  module SVMKit
@@ -13,7 +14,7 @@ module SVMKit
13
14
  class Accuracy
14
15
  include Base::Evaluator
15
16
 
16
- # Claculate mean accuracy.
17
+ # Calculate mean accuracy.
17
18
  #
18
19
  # @param y_true [Numo::Int32] (shape: [n_samples]) Ground truth labels.
19
20
  # @param y_pred [Numo::Int32] (shape: [n_samples]) Predicted labels.
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'svmkit/validation'
3
4
  require 'svmkit/base/evaluator'
4
5
  require 'svmkit/evaluation_measure/precision_recall'
5
6
 
@@ -27,7 +28,7 @@ module SVMKit
27
28
  @average = average
28
29
  end
29
30
 
30
- # Claculate average F1-score
31
+ # Calculate average F1-score
31
32
  #
32
33
  # @param y_true [Numo::Int32] (shape: [n_samples]) Ground truth labels.
33
34
  # @param y_pred [Numo::Int32] (shape: [n_samples]) Predicted labels.
@@ -1,6 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'svmkit/validation'
3
4
  require 'svmkit/base/evaluator'
5
+ require 'svmkit/preprocessing/one_hot_encoder'
4
6
 
5
7
  module SVMKit
6
8
  module EvaluationMeasure
@@ -12,7 +14,7 @@ module SVMKit
12
14
  class LogLoss
13
15
  include Base::Evaluator
14
16
 
15
- # Claculate mean logarithmic loss.
17
+ # Calculate mean logarithmic loss.
16
18
  # If both y_true and y_pred are array (both shapes are [n_samples]), this method calculates
17
19
  # mean logarithmic loss for binary classification.
18
20
  #
@@ -28,7 +30,7 @@ module SVMKit
28
30
  clipped_p = y_pred.clip(eps, 1 - eps)
29
31
 
30
32
  log_loss = if n_classes.nil?
31
- negative_label = y_true.to_a.uniq.sort.first
33
+ negative_label = y_true.to_a.uniq.min
32
34
  bin_y_true = Numo::DFloat.cast(y_true.ne(negative_label))
33
35
  -(bin_y_true * Numo::NMath.log(clipped_p) + (1 - bin_y_true) * Numo::NMath.log(1 - clipped_p))
34
36
  else
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'svmkit/validation'
4
+ require 'svmkit/base/evaluator'
5
+
6
+ module SVMKit
7
+ module EvaluationMeasure
8
+ # MeanAbsoluteError is a class that calculates the mean absolute error.
9
+ #
10
+ # @example
11
+ # evaluator = SVMKit::EvaluationMeasure::MeanAbsoluteError.new
12
+ # puts evaluator.score(ground_truth, predicted)
13
+ class MeanAbsoluteError
14
+ include Base::Evaluator
15
+
16
+ # Calculate mean absolute error.
17
+ #
18
+ # @param y_true [Numo::DFloat] (shape: [n_samples, n_outputs]) Ground truth target values.
19
+ # @param y_pred [Numo::DFloat] (shape: [n_samples, n_outputs]) Estimated target values.
20
+ # @return [Float] Mean absolute error
21
+ def score(y_true, y_pred)
22
+ SVMKit::Validation.check_tvalue_array(y_true)
23
+ SVMKit::Validation.check_tvalue_array(y_pred)
24
+ raise ArgumentError, 'Expect to have the same size both y_true and y_pred.' unless y_true.shape == y_pred.shape
25
+
26
+ (y_true - y_pred).abs.mean
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'svmkit/validation'
4
+ require 'svmkit/base/evaluator'
5
+
6
+ module SVMKit
7
+ module EvaluationMeasure
8
+ # MeanSquaredError is a class that calculates the mean squared error.
9
+ #
10
+ # @example
11
+ # evaluator = SVMKit::EvaluationMeasure::MeanSquaredError.new
12
+ # puts evaluator.score(ground_truth, predicted)
13
+ class MeanSquaredError
14
+ include Base::Evaluator
15
+
16
+ # Calculate mean squared error.
17
+ #
18
+ # @param y_true [Numo::DFloat] (shape: [n_samples, n_outputs]) Ground truth target values.
19
+ # @param y_pred [Numo::DFloat] (shape: [n_samples, n_outputs]) Estimated target values.
20
+ # @return [Float] Mean squared error
21
+ def score(y_true, y_pred)
22
+ SVMKit::Validation.check_tvalue_array(y_true)
23
+ SVMKit::Validation.check_tvalue_array(y_pred)
24
+ raise ArgumentError, 'Expect to have the same size both y_true and y_pred.' unless y_true.shape == y_pred.shape
25
+
26
+ ((y_true - y_pred)**2).mean
27
+ end
28
+ end
29
+ end
30
+ end
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'svmkit/validation'
3
4
  require 'svmkit/base/evaluator'
4
5
  require 'svmkit/evaluation_measure/precision_recall'
5
6
 
@@ -27,7 +28,7 @@ module SVMKit
27
28
  @average = average
28
29
  end
29
30
 
30
- # Claculate average precision.
31
+ # Calculate average precision.
31
32
  #
32
33
  # @param y_true [Numo::Int32] (shape: [n_samples]) Ground truth labels.
33
34
  # @param y_pred [Numo::Int32] (shape: [n_samples]) Predicted labels.
@@ -0,0 +1,44 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'svmkit/validation'
4
+ require 'svmkit/base/evaluator'
5
+ require 'svmkit/evaluation_measure/precision_recall'
6
+
7
+ module SVMKit
8
+ module EvaluationMeasure
9
+ # R2Score is a class that calculates the coefficient of determination for the predicted values.
10
+ #
11
+ # @example
12
+ # evaluator = SVMKit::EvaluationMeasure::R2Score.new
13
+ # puts evaluator.score(ground_truth, predicted)
14
+ class R2Score
15
+ include Base::Evaluator
16
+
17
+ # Create a new evaluation measure calculater for coefficient of determination.
18
+ def initialize; end
19
+
20
+ # Calculate the coefficient of determination.
21
+ #
22
+ # @param y_true [Numo::DFloat] (shape: [n_samples, n_outputs]) Ground truth target values.
23
+ # @param y_pred [Numo::DFloat] (shape: [n_samples, n_outputs]) Estimated taget values.
24
+ # @return [Float] Coefficient of determination
25
+ def score(y_true, y_pred)
26
+ SVMKit::Validation.check_tvalue_array(y_true)
27
+ SVMKit::Validation.check_tvalue_array(y_pred)
28
+ raise ArgumentError, 'Expect to have the same size both y_true and y_pred.' unless y_true.shape == y_pred.shape
29
+
30
+ n_samples, n_outputs = y_true.shape
31
+ numerator = ((y_true - y_pred)**2).sum(0)
32
+ yt_mean = y_true.sum(0) / n_samples
33
+ denominator = ((y_true - yt_mean)**2).sum(0)
34
+ if n_outputs.nil?
35
+ denominator.zero? ? 0.0 : 1.0 - numerator / denominator
36
+ else
37
+ scores = 1 - numerator / denominator
38
+ scores[denominator.eq(0)] = 0.0
39
+ scores.sum / scores.size
40
+ end
41
+ end
42
+ end
43
+ end
44
+ end
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'svmkit/validation'
3
4
  require 'svmkit/base/evaluator'
4
5
  require 'svmkit/evaluation_measure/precision_recall'
5
6
 
@@ -27,7 +28,7 @@ module SVMKit
27
28
  @average = average
28
29
  end
29
30
 
30
- # Claculate average recall
31
+ # Calculate average recall
31
32
  #
32
33
  # @param y_true [Numo::Int32] (shape: [n_samples]) Ground truth labels.
33
34
  # @param y_pred [Numo::Int32] (shape: [n_samples]) Predicted labels.
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'svmkit/validation'
3
4
  require 'svmkit/base/base_estimator'
4
5
  require 'svmkit/base/transformer'
5
6
 
@@ -1,7 +1,9 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'svmkit/validation'
3
4
  require 'svmkit/base/base_estimator'
4
5
  require 'svmkit/base/classifier'
6
+ require 'svmkit/probabilistic_output'
5
7
 
6
8
  module SVMKit
7
9
  # This module consists of the classes that implement kernel method-based estimator.
@@ -88,7 +90,7 @@ module SVMKit
88
90
  end
89
91
  end
90
92
  else
91
- negative_label = y.to_a.uniq.sort.first
93
+ negative_label = y.to_a.uniq.min
92
94
  bin_y = Numo::Int32.cast(y.ne(negative_label)) * 2 - 1
93
95
  @weight_vec = binary_fit(x, bin_y)
94
96
  @prob_param = if @params[:probability]
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'svmkit/validation'
3
4
  require 'svmkit/base/base_estimator'
4
5
  require 'svmkit/base/classifier'
5
6
 
@@ -95,7 +96,7 @@ module SVMKit
95
96
  @bias_term[n] = bias
96
97
  end
97
98
  else
98
- negative_label = y.to_a.uniq.sort.first
99
+ negative_label = y.to_a.uniq.min
99
100
  bin_y = Numo::Int32.cast(y.ne(negative_label)) * 2 - 1
100
101
  @weight_vec, @bias_term = binary_fit(x, bin_y)
101
102
  end
@@ -1,7 +1,9 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'svmkit/validation'
3
4
  require 'svmkit/base/base_estimator'
4
5
  require 'svmkit/base/classifier'
6
+ require 'svmkit/probabilistic_output'
5
7
 
6
8
  module SVMKit
7
9
  # This module consists of the classes that implement generalized linear models.
@@ -103,7 +105,7 @@ module SVMKit
103
105
  end
104
106
  end
105
107
  else
106
- negative_label = y.to_a.uniq.sort.first
108
+ negative_label = y.to_a.uniq.min
107
109
  bin_y = Numo::Int32.cast(y.ne(negative_label)) * 2 - 1
108
110
  @weight_vec, @bias_term = binary_fit(x, bin_y)
109
111
  @prob_param = if @params[:probability]
@@ -0,0 +1,172 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'svmkit/validation'
4
+ require 'svmkit/base/base_estimator'
5
+ require 'svmkit/base/regressor'
6
+
7
+ module SVMKit
8
+ module LinearModel
9
+ # SVR is a class that implements Support Vector Regressor
10
+ # with stochastic gradient descent (SGD) optimization.
11
+ #
12
+ # @example
13
+ # estimator =
14
+ # SVMKit::LinearModel::SVR.new(reg_param: 1.0, epsilon: 0.1, max_iter: 100, batch_size: 20, random_seed: 1)
15
+ # estimator.fit(training_samples, traininig_target_values)
16
+ # results = estimator.predict(testing_samples)
17
+ #
18
+ # *Reference*
19
+ # 1. S. Shalev-Shwartz and Y. Singer, "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Proc. ICML'07, pp. 807--814, 2007.
20
+ class SVR
21
+ include Base::BaseEstimator
22
+ include Base::Regressor
23
+
24
+ # Return the weight vector for SVC.
25
+ # @return [Numo::DFloat] (shape: [n_outputs, n_features])
26
+ attr_reader :weight_vec
27
+
28
+ # Return the bias term (a.k.a. intercept) for SVC.
29
+ # @return [Numo::DFloat] (shape: [n_outputs])
30
+ attr_reader :bias_term
31
+
32
+ # Return the random generator for performing random sampling.
33
+ # @return [Random]
34
+ attr_reader :rng
35
+
36
+ # Create a new regressor with Support Vector Machine by the SGD optimization.
37
+ #
38
+ # @param reg_param [Float] The regularization parameter.
39
+ # @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
40
+ # @param bias_scale [Float] The scale of the bias term.
41
+ # @param epsilon [Float] The margin of tolerance.
42
+ # @param max_iter [Integer] The maximum number of iterations.
43
+ # @param batch_size [Integer] The size of the mini batches.
44
+ # @param normalize [Boolean] The flag indicating whether to normalize the weight vector.
45
+ # @param random_seed [Integer] The seed value using to initialize the random generator.
46
+ def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0, epsilon: 0.1,
47
+ max_iter: 100, batch_size: 50, normalize: true, random_seed: nil)
48
+ SVMKit::Validation.check_params_float(reg_param: reg_param, bias_scale: bias_scale, epsilon: epsilon)
49
+ SVMKit::Validation.check_params_integer(max_iter: max_iter, batch_size: batch_size)
50
+ SVMKit::Validation.check_params_boolean(fit_bias: fit_bias, normalize: normalize)
51
+ SVMKit::Validation.check_params_type_or_nil(Integer, random_seed: random_seed)
52
+ SVMKit::Validation.check_params_positive(reg_param: reg_param, bias_scale: bias_scale, epsilon: epsilon,
53
+ max_iter: max_iter, batch_size: batch_size)
54
+ @params = {}
55
+ @params[:reg_param] = reg_param
56
+ @params[:fit_bias] = fit_bias
57
+ @params[:bias_scale] = bias_scale
58
+ @params[:epsilon] = epsilon
59
+ @params[:max_iter] = max_iter
60
+ @params[:batch_size] = batch_size
61
+ @params[:normalize] = normalize
62
+ @params[:random_seed] = random_seed
63
+ @params[:random_seed] ||= srand
64
+ @weight_vec = nil
65
+ @bias_term = nil
66
+ @rng = Random.new(@params[:random_seed])
67
+ end
68
+
69
+ # Fit the model with given training data.
70
+ #
71
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
72
+ # @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
73
+ # @return [SVR] The learned regressor itself.
74
+ def fit(x, y)
75
+ SVMKit::Validation.check_sample_array(x)
76
+ SVMKit::Validation.check_tvalue_array(y)
77
+ SVMKit::Validation.check_sample_tvalue_size(x, y)
78
+
79
+ n_outputs = y.shape[1].nil? ? 1 : y.shape[1]
80
+ _n_samples, n_features = x.shape
81
+
82
+ if n_outputs > 1
83
+ @weight_vec = Numo::DFloat.zeros(n_outputs, n_features)
84
+ @bias_term = Numo::DFloat.zeros(n_outputs)
85
+ n_outputs.times do |n|
86
+ weight, bias = single_fit(x, y[true, n])
87
+ @weight_vec[n, true] = weight
88
+ @bias_term[n] = bias
89
+ end
90
+ else
91
+ @weight_vec, @bias_term = single_fit(x, y)
92
+ end
93
+
94
+ self
95
+ end
96
+
97
+ # Predict values for samples.
98
+ #
99
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
100
+ # @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted values per sample.
101
+ def predict(x)
102
+ SVMKit::Validation.check_sample_array(x)
103
+ x.dot(@weight_vec.transpose) + @bias_term
104
+ end
105
+
106
+ # Dump marshal data.
107
+ # @return [Hash] The marshal data about SVC.
108
+ def marshal_dump
109
+ { params: @params,
110
+ weight_vec: @weight_vec,
111
+ bias_term: @bias_term,
112
+ rng: @rng }
113
+ end
114
+
115
+ # Load marshal data.
116
+ # @return [nil]
117
+ def marshal_load(obj)
118
+ @params = obj[:params]
119
+ @weight_vec = obj[:weight_vec]
120
+ @bias_term = obj[:bias_term]
121
+ @rng = obj[:rng]
122
+ nil
123
+ end
124
+
125
+ private
126
+
127
+ def single_fit(x, y)
128
+ # Expand feature vectors for bias term.
129
+ samples = @params[:fit_bias] ? expand_feature(x) : x
130
+ # Initialize some variables.
131
+ n_samples, n_features = samples.shape
132
+ rand_ids = [*0...n_samples].shuffle(random: @rng)
133
+ weight_vec = Numo::DFloat.zeros(n_features)
134
+ # Start optimization.
135
+ @params[:max_iter].times do |t|
136
+ # random sampling
137
+ subset_ids = rand_ids.shift(@params[:batch_size])
138
+ rand_ids.concat(subset_ids)
139
+ # update the weight vector.
140
+ z = samples[subset_ids, true].dot(weight_vec.transpose)
141
+ coef = Numo::DFloat.zeros(@params[:batch_size])
142
+ coef[(z - y[subset_ids]).gt(@params[:epsilon]).where] = 1
143
+ coef[(y[subset_ids] - z).gt(@params[:epsilon]).where] = -1
144
+ mean_vec = samples[subset_ids, true].transpose.dot(coef) / @params[:batch_size]
145
+ weight_vec -= learning_rate(t) * (@params[:reg_param] * weight_vec + mean_vec)
146
+ # scale the weight vector.
147
+ normalize_weight_vec(weight_vec) if @params[:normalize]
148
+ end
149
+ split_weight_vec_bias(weight_vec)
150
+ end
151
+
152
+ def expand_feature(x)
153
+ Numo::NArray.hstack([x, Numo::DFloat.ones([x.shape[0], 1]) * @params[:bias_scale]])
154
+ end
155
+
156
+ def learning_rate(iter)
157
+ 1.0 / (@params[:reg_param] * (iter + 1))
158
+ end
159
+
160
+ def normalize_weight_vec(weight_vec)
161
+ norm = Math.sqrt(weight_vec.dot(weight_vec))
162
+ weight_vec * [1.0, (1.0 / @params[:reg_param]**0.5) / (norm + 1.0e-12)].min
163
+ end
164
+
165
+ def split_weight_vec_bias(weight_vec)
166
+ weights = @params[:fit_bias] ? weight_vec[0...-1] : weight_vec
167
+ bias = @params[:fit_bias] ? weight_vec[-1] : 0.0
168
+ [weights, bias]
169
+ end
170
+ end
171
+ end
172
+ end
@@ -1,6 +1,12 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'svmkit/validation'
4
+ require 'svmkit/base/base_estimator'
5
+ require 'svmkit/base/classifier'
6
+ require 'svmkit/base/regressor'
3
7
  require 'svmkit/base/splitter'
8
+ require 'svmkit/base/evaluator'
9
+ require 'svmkit/evaluation_measure/log_loss'
4
10
 
5
11
  module SVMKit
6
12
  # This module consists of the classes for model validation techniques.
@@ -51,9 +57,9 @@ module SVMKit
51
57
  # Perform the evalution of given classifier with cross-validation method.
52
58
  #
53
59
  # @param x [Numo::DFloat] (shape: [n_samples, n_features])
54
- # The dataset to be used to evaluate the classifier.
55
- # @param y [Numo::Int32] (shape: [n_samples])
56
- # The labels to be used to evaluate the classifier.
60
+ # The dataset to be used to evaluate the estimator.
61
+ # @param y [Numo::Int32 / Numo::DFloat] (shape: [n_samples] / [n_samples, n_outputs])
62
+ # The labels to be used to evaluate the classifier / The target values to be used to evaluate the regressor.
57
63
  # @return [Hash] The report summarizing the results of cross-validation.
58
64
  # * :fit_time (Array<Float>) The calculation times of fitting the estimator for each split.
59
65
  # * :test_score (Array<Float>) The scores of testing dataset for each split.
@@ -61,8 +67,14 @@ module SVMKit
61
67
  # the return_train_score is false.
62
68
  def perform(x, y)
63
69
  SVMKit::Validation.check_sample_array(x)
64
- SVMKit::Validation.check_label_array(y)
65
- SVMKit::Validation.check_sample_label_size(x, y)
70
+ if @estimator.is_a?(SVMKit::Base::Classifier)
71
+ SVMKit::Validation.check_label_array(y)
72
+ SVMKit::Validation.check_sample_label_size(x, y)
73
+ end
74
+ if @estimator.is_a?(SVMKit::Base::Regressor)
75
+ SVMKit::Validation.check_tvalue_array(y)
76
+ SVMKit::Validation.check_sample_tvalue_size(x, y)
77
+ end
66
78
  # Initialize the report of cross validation.
67
79
  report = { test_score: [], train_score: nil, fit_time: [] }
68
80
  report[:train_score] = [] if @return_train_score
@@ -71,9 +83,9 @@ module SVMKit
71
83
  # Split dataset into training and testing dataset.
72
84
  feature_ids = !kernel_machine? || train_ids
73
85
  train_x = x[train_ids, feature_ids]
74
- train_y = y[train_ids]
86
+ train_y = y.shape[1].nil? ? y[train_ids] : y[train_ids, true]
75
87
  test_x = x[test_ids, feature_ids]
76
- test_y = y[test_ids]
88
+ test_y = y.shape[1].nil? ? y[test_ids] : y[test_ids, true]
77
89
  # Fit the estimator.
78
90
  start_time = Time.now.to_i
79
91
  @estimator.fit(train_x, train_y)
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'svmkit/validation'
3
4
  require 'svmkit/base/splitter'
4
5
 
5
6
  module SVMKit
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'svmkit/validation'
3
4
  require 'svmkit/base/splitter'
4
5
 
5
6
  module SVMKit
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'svmkit/validation'
3
4
  require 'svmkit/base/base_estimator.rb'
4
5
  require 'svmkit/base/classifier.rb'
5
6
 
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'svmkit/validation'
3
4
  require 'svmkit/base/base_estimator'
4
5
  require 'svmkit/base/classifier'
5
6
 
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'svmkit/validation'
3
4
  require 'svmkit/base/base_estimator'
4
5
  require 'svmkit/base/classifier'
5
6
 
@@ -0,0 +1,94 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'svmkit/validation'
4
+ require 'svmkit/base/base_estimator'
5
+ require 'svmkit/base/regressor'
6
+
7
+ module SVMKit
8
+ module NearestNeighbors
9
+ # KNeighborsRegressor is a class that implements the regressor with the k-nearest neighbors rule.
10
+ # The current implementation uses the Euclidean distance for finding the neighbors.
11
+ #
12
+ # @example
13
+ # estimator =
14
+ # SVMKit::NearestNeighbor::KNeighborsRegressor.new(n_neighbors = 5)
15
+ # estimator.fit(training_samples, traininig_target_values)
16
+ # results = estimator.predict(testing_samples)
17
+ #
18
+ class KNeighborsRegressor
19
+ include Base::BaseEstimator
20
+ include Base::Regressor
21
+
22
+ # Return the prototypes for the nearest neighbor regressor.
23
+ # @return [Numo::DFloat] (shape: [n_samples, n_features])
24
+ attr_reader :prototypes
25
+
26
+ # Return the values of the prototypes
27
+ # @return [Numo::DFloat] (shape: [n_samples, n_outputs])
28
+ attr_reader :values
29
+
30
+ # Create a new regressor with the nearest neighbor rule.
31
+ #
32
+ # @param n_neighbors [Integer] The number of neighbors.
33
+ def initialize(n_neighbors: 5)
34
+ SVMKit::Validation.check_params_integer(n_neighbors: n_neighbors)
35
+ SVMKit::Validation.check_params_positive(n_neighbors: n_neighbors)
36
+ @params = {}
37
+ @params[:n_neighbors] = n_neighbors
38
+ @prototypes = nil
39
+ @values = nil
40
+ end
41
+
42
+ # Fit the model with given training data.
43
+ #
44
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
45
+ # @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
46
+ # @return [KNeighborsRegressor] The learned regressor itself.
47
+ def fit(x, y)
48
+ SVMKit::Validation.check_sample_array(x)
49
+ SVMKit::Validation.check_tvalue_array(y)
50
+ SVMKit::Validation.check_sample_tvalue_size(x, y)
51
+ @prototypes = x.dup
52
+ @values = y.dup
53
+ self
54
+ end
55
+
56
+ # Predict values for samples.
57
+ #
58
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
59
+ # @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted values per sample.
60
+ def predict(x)
61
+ SVMKit::Validation.check_sample_array(x)
62
+ # Initialize some variables.
63
+ n_samples, = x.shape
64
+ n_prototypes, n_outputs = @values.shape
65
+ n_neighbors = [@params[:n_neighbors], n_prototypes].min
66
+ # Calculate distance matrix.
67
+ distance_matrix = PairwiseMetric.euclidean_distance(x, @prototypes)
68
+ # Predict values for the given samples.
69
+ predicted_values = Array.new(n_samples) do |n|
70
+ neighbor_ids = distance_matrix[n, true].to_a.each_with_index.sort.map(&:last)[0...n_neighbors]
71
+ n_outputs.nil? ? @values[neighbor_ids].mean : @values[neighbor_ids, true].mean(0).to_a
72
+ end
73
+ Numo::DFloat[*predicted_values]
74
+ end
75
+
76
+ # Dump marshal data.
77
+ # @return [Hash] The marshal data about KNeighborsRegressor.
78
+ def marshal_dump
79
+ { params: @params,
80
+ prototypes: @prototypes,
81
+ values: @values }
82
+ end
83
+
84
+ # Load marshal data.
85
+ # @return [nil]
86
+ def marshal_load(obj)
87
+ @params = obj[:params]
88
+ @prototypes = obj[:prototypes]
89
+ @values = obj[:values]
90
+ nil
91
+ end
92
+ end
93
+ end
94
+ end
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'svmkit/validation'
4
+
3
5
  module SVMKit
4
6
  # Module for calculating pairwise distances, similarities, and kernels.
5
7
  module PairwiseMetric
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'svmkit/validation'
3
4
  require 'svmkit/base/base_estimator'
4
5
  require 'svmkit/base/classifier'
5
6
 
@@ -110,7 +111,7 @@ module SVMKit
110
111
  @bias_term[n] = bias
111
112
  end
112
113
  else
113
- negative_label = y.to_a.uniq.sort.first
114
+ negative_label = y.to_a.uniq.min
114
115
  bin_y = Numo::Int32.cast(y.ne(negative_label)) * 2 - 1
115
116
  @factor_mat, @weight_vec, @bias_term = binary_fit(x, bin_y)
116
117
  end
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'svmkit/validation'
3
4
  require 'svmkit/base/base_estimator'
4
5
  require 'svmkit/base/transformer'
5
6
 
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'svmkit/validation'
3
4
  require 'svmkit/base/base_estimator'
4
5
  require 'svmkit/base/transformer'
5
6
 
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'svmkit/validation'
3
4
  require 'svmkit/base/base_estimator'
4
5
  require 'svmkit/base/transformer'
5
6
 
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'svmkit/validation'
3
4
  require 'svmkit/base/base_estimator'
4
5
  require 'svmkit/base/transformer'
5
6
 
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'svmkit/validation'
3
4
  require 'svmkit/base/base_estimator'
4
5
  require 'svmkit/base/transformer'
5
6
 
@@ -27,7 +27,7 @@ module SVMKit
27
27
  def fit_sigmoid(df, bin_y, max_iter = 100, min_step = 1e-10, sigma = 1e-12)
28
28
  # Initialize some variables.
29
29
  n_samples = bin_y.size
30
- negative_label = bin_y.to_a.uniq.sort.first
30
+ negative_label = bin_y.to_a.uniq.min
31
31
  pos = bin_y.ne(negative_label)
32
32
  neg = bin_y.eq(negative_label)
33
33
  n_pos_samples = pos.count
@@ -1,8 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'svmkit/validation'
3
4
  require 'svmkit/base/base_estimator'
4
5
  require 'svmkit/base/classifier'
5
- require 'ostruct'
6
6
 
7
7
  module SVMKit
8
8
  # This module consists of the classes that implement tree models.
@@ -19,12 +19,24 @@ module SVMKit
19
19
  nil
20
20
  end
21
21
 
22
+ # @!visibility private
23
+ def check_tvalue_array(y)
24
+ raise TypeError, 'Expect class of target value vector to be Numo::DFloat' unless y.is_a?(Numo::DFloat)
25
+ nil
26
+ end
27
+
22
28
  # @!visibility private
23
29
  def check_sample_label_size(x, y)
24
30
  raise ArgumentError, 'Expect to have the same number of samples for sample matrix and label vector' unless x.shape[0] == y.shape[0]
25
31
  nil
26
32
  end
27
33
 
34
+ # @!visibility private
35
+ def check_sample_tvalue_size(x, y)
36
+ raise ArgumentError, 'Expect to have the same number of samples for sample matrix and target value vector' unless x.shape[0] == y.shape[0]
37
+ nil
38
+ end
39
+
28
40
  # @!visibility private
29
41
  def check_params_type(type, params = {})
30
42
  params.each { |k, v| raise TypeError, "Expect class of #{k} to be #{type}" unless v.is_a?(type) }
@@ -3,5 +3,5 @@
3
3
  # SVMKit is a machine learning library in Ruby.
4
4
  module SVMKit
5
5
  # @!visibility private
6
- VERSION = '0.2.9'.freeze
6
+ VERSION = '0.3.0'.freeze
7
7
  end
data/lib/svmkit.rb CHANGED
@@ -9,16 +9,19 @@ require 'svmkit/dataset'
9
9
  require 'svmkit/probabilistic_output'
10
10
  require 'svmkit/base/base_estimator'
11
11
  require 'svmkit/base/classifier'
12
+ require 'svmkit/base/regressor'
12
13
  require 'svmkit/base/transformer'
13
14
  require 'svmkit/base/splitter'
14
15
  require 'svmkit/base/evaluator'
15
16
  require 'svmkit/kernel_approximation/rbf'
16
17
  require 'svmkit/linear_model/svc'
18
+ require 'svmkit/linear_model/svr'
17
19
  require 'svmkit/linear_model/logistic_regression'
18
20
  require 'svmkit/kernel_machine/kernel_svc'
19
21
  require 'svmkit/polynomial_model/factorization_machine_classifier'
20
22
  require 'svmkit/multiclass/one_vs_rest_classifier'
21
23
  require 'svmkit/nearest_neighbors/k_neighbors_classifier'
24
+ require 'svmkit/nearest_neighbors/k_neighbors_regressor'
22
25
  require 'svmkit/naive_bayes/naive_bayes'
23
26
  require 'svmkit/tree/decision_tree_classifier'
24
27
  require 'svmkit/ensemble/random_forest_classifier'
@@ -35,3 +38,6 @@ require 'svmkit/evaluation_measure/precision'
35
38
  require 'svmkit/evaluation_measure/recall'
36
39
  require 'svmkit/evaluation_measure/f_score'
37
40
  require 'svmkit/evaluation_measure/log_loss'
41
+ require 'svmkit/evaluation_measure/r2_score'
42
+ require 'svmkit/evaluation_measure/mean_squared_error'
43
+ require 'svmkit/evaluation_measure/mean_absolute_error'
data/svmkit.gemspec CHANGED
@@ -18,7 +18,7 @@ SVMKit is a machine learninig library in Ruby.
18
18
  SVMKit provides machine learning algorithms with interfaces similar to Scikit-Learn in Python.
19
19
  SVMKit currently supports Linear / Kernel Support Vector Machine,
20
20
  Logistic Regression, Factorization Machine, Naive Bayes, Decision Tree, Random Forest,
21
- K-nearest neighbor classifier, and cross-validation.
21
+ K-nearest neighbor algorithm, and cross-validation.
22
22
  MSG
23
23
  spec.homepage = 'https://github.com/yoshoku/svmkit'
24
24
  spec.license = 'BSD-2-Clause'
@@ -38,13 +38,4 @@ MSG
38
38
  spec.add_development_dependency 'coveralls', '~> 0.8'
39
39
  spec.add_development_dependency 'rake', '~> 12.0'
40
40
  spec.add_development_dependency 'rspec', '~> 3.0'
41
-
42
- spec.post_install_message = <<MSG
43
- *************************************************************************
44
- Thank you for installing SVMKit!!
45
-
46
- Note that the SVMKit has been changed to use Numo::NArray for
47
- linear algebra library from version 0.2.0.
48
- *************************************************************************
49
- MSG
50
41
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: svmkit
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.9
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2018-05-02 00:00:00.000000000 Z
11
+ date: 2018-05-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: numo-narray
@@ -85,7 +85,7 @@ description: |
85
85
  SVMKit provides machine learning algorithms with interfaces similar to Scikit-Learn in Python.
86
86
  SVMKit currently supports Linear / Kernel Support Vector Machine,
87
87
  Logistic Regression, Factorization Machine, Naive Bayes, Decision Tree, Random Forest,
88
- K-nearest neighbor classifier, and cross-validation.
88
+ K-nearest neighbor algorithm, and cross-validation.
89
89
  email:
90
90
  - yoshoku@outlook.com
91
91
  executables: []
@@ -110,6 +110,7 @@ files:
110
110
  - lib/svmkit/base/base_estimator.rb
111
111
  - lib/svmkit/base/classifier.rb
112
112
  - lib/svmkit/base/evaluator.rb
113
+ - lib/svmkit/base/regressor.rb
113
114
  - lib/svmkit/base/splitter.rb
114
115
  - lib/svmkit/base/transformer.rb
115
116
  - lib/svmkit/dataset.rb
@@ -117,19 +118,24 @@ files:
117
118
  - lib/svmkit/evaluation_measure/accuracy.rb
118
119
  - lib/svmkit/evaluation_measure/f_score.rb
119
120
  - lib/svmkit/evaluation_measure/log_loss.rb
121
+ - lib/svmkit/evaluation_measure/mean_absolute_error.rb
122
+ - lib/svmkit/evaluation_measure/mean_squared_error.rb
120
123
  - lib/svmkit/evaluation_measure/precision.rb
121
124
  - lib/svmkit/evaluation_measure/precision_recall.rb
125
+ - lib/svmkit/evaluation_measure/r2_score.rb
122
126
  - lib/svmkit/evaluation_measure/recall.rb
123
127
  - lib/svmkit/kernel_approximation/rbf.rb
124
128
  - lib/svmkit/kernel_machine/kernel_svc.rb
125
129
  - lib/svmkit/linear_model/logistic_regression.rb
126
130
  - lib/svmkit/linear_model/svc.rb
131
+ - lib/svmkit/linear_model/svr.rb
127
132
  - lib/svmkit/model_selection/cross_validation.rb
128
133
  - lib/svmkit/model_selection/k_fold.rb
129
134
  - lib/svmkit/model_selection/stratified_k_fold.rb
130
135
  - lib/svmkit/multiclass/one_vs_rest_classifier.rb
131
136
  - lib/svmkit/naive_bayes/naive_bayes.rb
132
137
  - lib/svmkit/nearest_neighbors/k_neighbors_classifier.rb
138
+ - lib/svmkit/nearest_neighbors/k_neighbors_regressor.rb
133
139
  - lib/svmkit/pairwise_metric.rb
134
140
  - lib/svmkit/polynomial_model/factorization_machine_classifier.rb
135
141
  - lib/svmkit/preprocessing/l2_normalizer.rb
@@ -146,13 +152,7 @@ homepage: https://github.com/yoshoku/svmkit
146
152
  licenses:
147
153
  - BSD-2-Clause
148
154
  metadata: {}
149
- post_install_message: |
150
- *************************************************************************
151
- Thank you for installing SVMKit!!
152
-
153
- Note that the SVMKit has been changed to use Numo::NArray for
154
- linear algebra library from version 0.2.0.
155
- *************************************************************************
155
+ post_install_message:
156
156
  rdoc_options: []
157
157
  require_paths:
158
158
  - lib