svmkit 0.2.9 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +2 -2
  3. data/HISTORY.md +11 -0
  4. data/lib/svmkit/base/classifier.rb +4 -1
  5. data/lib/svmkit/base/regressor.rb +34 -0
  6. data/lib/svmkit/ensemble/random_forest_classifier.rb +1 -0
  7. data/lib/svmkit/evaluation_measure/accuracy.rb +2 -1
  8. data/lib/svmkit/evaluation_measure/f_score.rb +2 -1
  9. data/lib/svmkit/evaluation_measure/log_loss.rb +4 -2
  10. data/lib/svmkit/evaluation_measure/mean_absolute_error.rb +30 -0
  11. data/lib/svmkit/evaluation_measure/mean_squared_error.rb +30 -0
  12. data/lib/svmkit/evaluation_measure/precision.rb +2 -1
  13. data/lib/svmkit/evaluation_measure/r2_score.rb +44 -0
  14. data/lib/svmkit/evaluation_measure/recall.rb +2 -1
  15. data/lib/svmkit/kernel_approximation/rbf.rb +1 -0
  16. data/lib/svmkit/kernel_machine/kernel_svc.rb +3 -1
  17. data/lib/svmkit/linear_model/logistic_regression.rb +2 -1
  18. data/lib/svmkit/linear_model/svc.rb +3 -1
  19. data/lib/svmkit/linear_model/svr.rb +172 -0
  20. data/lib/svmkit/model_selection/cross_validation.rb +19 -7
  21. data/lib/svmkit/model_selection/k_fold.rb +1 -0
  22. data/lib/svmkit/model_selection/stratified_k_fold.rb +1 -0
  23. data/lib/svmkit/multiclass/one_vs_rest_classifier.rb +1 -0
  24. data/lib/svmkit/naive_bayes/naive_bayes.rb +1 -0
  25. data/lib/svmkit/nearest_neighbors/k_neighbors_classifier.rb +1 -0
  26. data/lib/svmkit/nearest_neighbors/k_neighbors_regressor.rb +94 -0
  27. data/lib/svmkit/pairwise_metric.rb +2 -0
  28. data/lib/svmkit/polynomial_model/factorization_machine_classifier.rb +2 -1
  29. data/lib/svmkit/preprocessing/l2_normalizer.rb +1 -0
  30. data/lib/svmkit/preprocessing/label_encoder.rb +1 -0
  31. data/lib/svmkit/preprocessing/min_max_scaler.rb +1 -0
  32. data/lib/svmkit/preprocessing/one_hot_encoder.rb +1 -0
  33. data/lib/svmkit/preprocessing/standard_scaler.rb +1 -0
  34. data/lib/svmkit/probabilistic_output.rb +1 -1
  35. data/lib/svmkit/tree/decision_tree_classifier.rb +1 -1
  36. data/lib/svmkit/validation.rb +12 -0
  37. data/lib/svmkit/version.rb +1 -1
  38. data/lib/svmkit.rb +6 -0
  39. data/svmkit.gemspec +1 -10
  40. metadata +10 -10
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 533508a3afd82d2bae3ddea3a5669f6d389688155d44649fd3eafaaff8207e0f
4
- data.tar.gz: 43ff09b3bab72b68bc7a6b3740902be64508496337a4cde61057d33b91d0f349
3
+ metadata.gz: 855d3ac2dcfbfde9eb82a4661f17cebb75b4b7c57ba37ee26a8aa03d0f1ccab8
4
+ data.tar.gz: 13ec3e84fd6f4fcd973d164b3d6f395c024e42095eae63819bea6ef4179697d9
5
5
  SHA512:
6
- metadata.gz: e1c1bed8269d3c768d75bd8a5e731b5d2da689ef7a235a70c5ea87090aac79889c9fe0a004eca73c3015aae42d068f44b2b1e3a61a03b641607b2909441513b6
7
- data.tar.gz: 80a18ca4ec7eb2740148829024f0625c835f24b771bb321168d0cc3233d8e152257b5515355d99a968dc25a670f9a69f3e30b42bf190757206a64bbcd2babcd6
6
+ metadata.gz: be8b4e4528e70ab99c8b9f1ad0d93d717b0359d30ca9b142b4d5cb44b6b3875cc9d97f199021ab48ab3b9cc1e635f37c14dbb15efe81b55de673d83b65cc10ab
7
+ data.tar.gz: 9c8ae85dc3ca8dd7fe6bd15bd4b3eb46b775b32923691a12f9621032bcbffea7a3405404a9b8924f68a344d771cac56f439e6e2c024f68deacd00d7358c3c4f4
data/.rubocop.yml CHANGED
@@ -1,7 +1,7 @@
1
1
  inherit_from: .rubocop_todo.yml
2
2
 
3
3
  AllCops:
4
- TargetRubyVersion: 2.2
4
+ TargetRubyVersion: 2.1
5
5
  DisplayCopNames: true
6
6
  DisplayStyleGuide: true
7
7
 
@@ -9,7 +9,7 @@ Documentation:
9
9
  Enabled: false
10
10
 
11
11
  Metrics/LineLength:
12
- Max: 140
12
+ Max: 145
13
13
  IgnoredPatterns: ['(\A|\s)#']
14
14
 
15
15
  Metrics/ModuleLength:
data/HISTORY.md CHANGED
@@ -1,3 +1,14 @@
1
+ # 0.3.0
2
+ - Add class for Support Vector Regression.
3
+ - Add class for K-Nearest Neighbor Regression.
4
+ - Add class for evaluating coefficient of determination.
5
+ - Add class for evaluating mean squared error.
6
+ - Add class for evaluating mean absolute error.
7
+ - Fix to use min method instead of sort and first methods.
8
+ - Fix cross validation class to be able to use for regression problem.
9
+ - Fix some typos on document.
10
+ - Rename spec filename for Factorization Machine classifier.
11
+
1
12
  # 0.2.9
2
13
  - Add predict_proba method to SVC and KernelSVC.
3
14
  - Add class for evaluating logarithmic loss.
@@ -1,5 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'svmkit/validation'
4
+ require 'svmkit/evaluation_measure/accuracy'
5
+
3
6
  module SVMKit
4
7
  module Base
5
8
  # Module for all classifiers in SVMKit.
@@ -14,7 +17,7 @@ module SVMKit
14
17
  raise NotImplementedError, "#{__method__} has to be implemented in #{self.class}."
15
18
  end
16
19
 
17
- # Claculate the mean accuracy of the given testing data.
20
+ # Calculate the mean accuracy of the given testing data.
18
21
  #
19
22
  # @param x [Numo::DFloat] (shape: [n_samples, n_features]) Testing data.
20
23
  # @param y [Numo::Int32] (shape: [n_samples]) True labels for testing data.
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'svmkit/validation'
4
+ require 'svmkit/evaluation_measure/r2_score'
5
+
6
+ module SVMKit
7
+ module Base
8
+ # Module for all regressors in SVMKit.
9
+ module Regressor
10
+ # An abstract method for fitting a model.
11
+ def fit
12
+ raise NotImplementedError, "#{__method__} has to be implemented in #{self.class}."
13
+ end
14
+
15
+ # An abstract method for predicting labels.
16
+ def predict
17
+ raise NotImplementedError, "#{__method__} has to be implemented in #{self.class}."
18
+ end
19
+
20
+ # Calculate the coefficient of determination for the given testing data.
21
+ #
22
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) Testing data.
23
+ # @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) Target values for testing data.
24
+ # @return [Float] Coefficient of determination
25
+ def score(x, y)
26
+ SVMKit::Validation.check_sample_array(x)
27
+ SVMKit::Validation.check_tvalue_array(y)
28
+ SVMKit::Validation.check_sample_tvalue_size(x, y)
29
+ evaluator = SVMKit::EvaluationMeasure::R2Score.new
30
+ evaluator.score(y, predict(x))
31
+ end
32
+ end
33
+ end
34
+ end
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'svmkit/validation'
3
4
  require 'svmkit/base/base_estimator'
4
5
  require 'svmkit/base/classifier'
5
6
 
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'svmkit/validation'
3
4
  require 'svmkit/base/evaluator'
4
5
 
5
6
  module SVMKit
@@ -13,7 +14,7 @@ module SVMKit
13
14
  class Accuracy
14
15
  include Base::Evaluator
15
16
 
16
- # Claculate mean accuracy.
17
+ # Calculate mean accuracy.
17
18
  #
18
19
  # @param y_true [Numo::Int32] (shape: [n_samples]) Ground truth labels.
19
20
  # @param y_pred [Numo::Int32] (shape: [n_samples]) Predicted labels.
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'svmkit/validation'
3
4
  require 'svmkit/base/evaluator'
4
5
  require 'svmkit/evaluation_measure/precision_recall'
5
6
 
@@ -27,7 +28,7 @@ module SVMKit
27
28
  @average = average
28
29
  end
29
30
 
30
- # Claculate average F1-score
31
+ # Calculate average F1-score
31
32
  #
32
33
  # @param y_true [Numo::Int32] (shape: [n_samples]) Ground truth labels.
33
34
  # @param y_pred [Numo::Int32] (shape: [n_samples]) Predicted labels.
@@ -1,6 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'svmkit/validation'
3
4
  require 'svmkit/base/evaluator'
5
+ require 'svmkit/preprocessing/one_hot_encoder'
4
6
 
5
7
  module SVMKit
6
8
  module EvaluationMeasure
@@ -12,7 +14,7 @@ module SVMKit
12
14
  class LogLoss
13
15
  include Base::Evaluator
14
16
 
15
- # Claculate mean logarithmic loss.
17
+ # Calculate mean logarithmic loss.
16
18
  # If both y_true and y_pred are array (both shapes are [n_samples]), this method calculates
17
19
  # mean logarithmic loss for binary classification.
18
20
  #
@@ -28,7 +30,7 @@ module SVMKit
28
30
  clipped_p = y_pred.clip(eps, 1 - eps)
29
31
 
30
32
  log_loss = if n_classes.nil?
31
- negative_label = y_true.to_a.uniq.sort.first
33
+ negative_label = y_true.to_a.uniq.min
32
34
  bin_y_true = Numo::DFloat.cast(y_true.ne(negative_label))
33
35
  -(bin_y_true * Numo::NMath.log(clipped_p) + (1 - bin_y_true) * Numo::NMath.log(1 - clipped_p))
34
36
  else
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'svmkit/validation'
4
+ require 'svmkit/base/evaluator'
5
+
6
+ module SVMKit
7
+ module EvaluationMeasure
8
+ # MeanAbsoluteError is a class that calculates the mean absolute error.
9
+ #
10
+ # @example
11
+ # evaluator = SVMKit::EvaluationMeasure::MeanAbsoluteError.new
12
+ # puts evaluator.score(ground_truth, predicted)
13
+ class MeanAbsoluteError
14
+ include Base::Evaluator
15
+
16
+ # Calculate mean absolute error.
17
+ #
18
+ # @param y_true [Numo::DFloat] (shape: [n_samples, n_outputs]) Ground truth target values.
19
+ # @param y_pred [Numo::DFloat] (shape: [n_samples, n_outputs]) Estimated target values.
20
+ # @return [Float] Mean absolute error
21
+ def score(y_true, y_pred)
22
+ SVMKit::Validation.check_tvalue_array(y_true)
23
+ SVMKit::Validation.check_tvalue_array(y_pred)
24
+ raise ArgumentError, 'Expect to have the same size both y_true and y_pred.' unless y_true.shape == y_pred.shape
25
+
26
+ (y_true - y_pred).abs.mean
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'svmkit/validation'
4
+ require 'svmkit/base/evaluator'
5
+
6
+ module SVMKit
7
+ module EvaluationMeasure
8
+ # MeanSquaredError is a class that calculates the mean squared error.
9
+ #
10
+ # @example
11
+ # evaluator = SVMKit::EvaluationMeasure::MeanSquaredError.new
12
+ # puts evaluator.score(ground_truth, predicted)
13
+ class MeanSquaredError
14
+ include Base::Evaluator
15
+
16
+ # Calculate mean squared error.
17
+ #
18
+ # @param y_true [Numo::DFloat] (shape: [n_samples, n_outputs]) Ground truth target values.
19
+ # @param y_pred [Numo::DFloat] (shape: [n_samples, n_outputs]) Estimated target values.
20
+ # @return [Float] Mean squared error
21
+ def score(y_true, y_pred)
22
+ SVMKit::Validation.check_tvalue_array(y_true)
23
+ SVMKit::Validation.check_tvalue_array(y_pred)
24
+ raise ArgumentError, 'Expect to have the same size both y_true and y_pred.' unless y_true.shape == y_pred.shape
25
+
26
+ ((y_true - y_pred)**2).mean
27
+ end
28
+ end
29
+ end
30
+ end
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'svmkit/validation'
3
4
  require 'svmkit/base/evaluator'
4
5
  require 'svmkit/evaluation_measure/precision_recall'
5
6
 
@@ -27,7 +28,7 @@ module SVMKit
27
28
  @average = average
28
29
  end
29
30
 
30
- # Claculate average precision.
31
+ # Calculate average precision.
31
32
  #
32
33
  # @param y_true [Numo::Int32] (shape: [n_samples]) Ground truth labels.
33
34
  # @param y_pred [Numo::Int32] (shape: [n_samples]) Predicted labels.
@@ -0,0 +1,44 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'svmkit/validation'
4
+ require 'svmkit/base/evaluator'
5
+ require 'svmkit/evaluation_measure/precision_recall'
6
+
7
+ module SVMKit
8
+ module EvaluationMeasure
9
+ # R2Score is a class that calculates the coefficient of determination for the predicted values.
10
+ #
11
+ # @example
12
+ # evaluator = SVMKit::EvaluationMeasure::R2Score.new
13
+ # puts evaluator.score(ground_truth, predicted)
14
+ class R2Score
15
+ include Base::Evaluator
16
+
17
+ # Create a new evaluation measure calculater for coefficient of determination.
18
+ def initialize; end
19
+
20
+ # Calculate the coefficient of determination.
21
+ #
22
+ # @param y_true [Numo::DFloat] (shape: [n_samples, n_outputs]) Ground truth target values.
23
+ # @param y_pred [Numo::DFloat] (shape: [n_samples, n_outputs]) Estimated taget values.
24
+ # @return [Float] Coefficient of determination
25
+ def score(y_true, y_pred)
26
+ SVMKit::Validation.check_tvalue_array(y_true)
27
+ SVMKit::Validation.check_tvalue_array(y_pred)
28
+ raise ArgumentError, 'Expect to have the same size both y_true and y_pred.' unless y_true.shape == y_pred.shape
29
+
30
+ n_samples, n_outputs = y_true.shape
31
+ numerator = ((y_true - y_pred)**2).sum(0)
32
+ yt_mean = y_true.sum(0) / n_samples
33
+ denominator = ((y_true - yt_mean)**2).sum(0)
34
+ if n_outputs.nil?
35
+ denominator.zero? ? 0.0 : 1.0 - numerator / denominator
36
+ else
37
+ scores = 1 - numerator / denominator
38
+ scores[denominator.eq(0)] = 0.0
39
+ scores.sum / scores.size
40
+ end
41
+ end
42
+ end
43
+ end
44
+ end
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'svmkit/validation'
3
4
  require 'svmkit/base/evaluator'
4
5
  require 'svmkit/evaluation_measure/precision_recall'
5
6
 
@@ -27,7 +28,7 @@ module SVMKit
27
28
  @average = average
28
29
  end
29
30
 
30
- # Claculate average recall
31
+ # Calculate average recall
31
32
  #
32
33
  # @param y_true [Numo::Int32] (shape: [n_samples]) Ground truth labels.
33
34
  # @param y_pred [Numo::Int32] (shape: [n_samples]) Predicted labels.
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'svmkit/validation'
3
4
  require 'svmkit/base/base_estimator'
4
5
  require 'svmkit/base/transformer'
5
6
 
@@ -1,7 +1,9 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'svmkit/validation'
3
4
  require 'svmkit/base/base_estimator'
4
5
  require 'svmkit/base/classifier'
6
+ require 'svmkit/probabilistic_output'
5
7
 
6
8
  module SVMKit
7
9
  # This module consists of the classes that implement kernel method-based estimator.
@@ -88,7 +90,7 @@ module SVMKit
88
90
  end
89
91
  end
90
92
  else
91
- negative_label = y.to_a.uniq.sort.first
93
+ negative_label = y.to_a.uniq.min
92
94
  bin_y = Numo::Int32.cast(y.ne(negative_label)) * 2 - 1
93
95
  @weight_vec = binary_fit(x, bin_y)
94
96
  @prob_param = if @params[:probability]
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'svmkit/validation'
3
4
  require 'svmkit/base/base_estimator'
4
5
  require 'svmkit/base/classifier'
5
6
 
@@ -95,7 +96,7 @@ module SVMKit
95
96
  @bias_term[n] = bias
96
97
  end
97
98
  else
98
- negative_label = y.to_a.uniq.sort.first
99
+ negative_label = y.to_a.uniq.min
99
100
  bin_y = Numo::Int32.cast(y.ne(negative_label)) * 2 - 1
100
101
  @weight_vec, @bias_term = binary_fit(x, bin_y)
101
102
  end
@@ -1,7 +1,9 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'svmkit/validation'
3
4
  require 'svmkit/base/base_estimator'
4
5
  require 'svmkit/base/classifier'
6
+ require 'svmkit/probabilistic_output'
5
7
 
6
8
  module SVMKit
7
9
  # This module consists of the classes that implement generalized linear models.
@@ -103,7 +105,7 @@ module SVMKit
103
105
  end
104
106
  end
105
107
  else
106
- negative_label = y.to_a.uniq.sort.first
108
+ negative_label = y.to_a.uniq.min
107
109
  bin_y = Numo::Int32.cast(y.ne(negative_label)) * 2 - 1
108
110
  @weight_vec, @bias_term = binary_fit(x, bin_y)
109
111
  @prob_param = if @params[:probability]
@@ -0,0 +1,172 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'svmkit/validation'
4
+ require 'svmkit/base/base_estimator'
5
+ require 'svmkit/base/regressor'
6
+
7
+ module SVMKit
8
+ module LinearModel
9
+ # SVR is a class that implements Support Vector Regressor
10
+ # with stochastic gradient descent (SGD) optimization.
11
+ #
12
+ # @example
13
+ # estimator =
14
+ # SVMKit::LinearModel::SVR.new(reg_param: 1.0, epsilon: 0.1, max_iter: 100, batch_size: 20, random_seed: 1)
15
+ # estimator.fit(training_samples, traininig_target_values)
16
+ # results = estimator.predict(testing_samples)
17
+ #
18
+ # *Reference*
19
+ # 1. S. Shalev-Shwartz and Y. Singer, "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Proc. ICML'07, pp. 807--814, 2007.
20
+ class SVR
21
+ include Base::BaseEstimator
22
+ include Base::Regressor
23
+
24
+ # Return the weight vector for SVC.
25
+ # @return [Numo::DFloat] (shape: [n_outputs, n_features])
26
+ attr_reader :weight_vec
27
+
28
+ # Return the bias term (a.k.a. intercept) for SVC.
29
+ # @return [Numo::DFloat] (shape: [n_outputs])
30
+ attr_reader :bias_term
31
+
32
+ # Return the random generator for performing random sampling.
33
+ # @return [Random]
34
+ attr_reader :rng
35
+
36
+ # Create a new regressor with Support Vector Machine by the SGD optimization.
37
+ #
38
+ # @param reg_param [Float] The regularization parameter.
39
+ # @param fit_bias [Boolean] The flag indicating whether to fit the bias term.
40
+ # @param bias_scale [Float] The scale of the bias term.
41
+ # @param epsilon [Float] The margin of tolerance.
42
+ # @param max_iter [Integer] The maximum number of iterations.
43
+ # @param batch_size [Integer] The size of the mini batches.
44
+ # @param normalize [Boolean] The flag indicating whether to normalize the weight vector.
45
+ # @param random_seed [Integer] The seed value using to initialize the random generator.
46
+ def initialize(reg_param: 1.0, fit_bias: false, bias_scale: 1.0, epsilon: 0.1,
47
+ max_iter: 100, batch_size: 50, normalize: true, random_seed: nil)
48
+ SVMKit::Validation.check_params_float(reg_param: reg_param, bias_scale: bias_scale, epsilon: epsilon)
49
+ SVMKit::Validation.check_params_integer(max_iter: max_iter, batch_size: batch_size)
50
+ SVMKit::Validation.check_params_boolean(fit_bias: fit_bias, normalize: normalize)
51
+ SVMKit::Validation.check_params_type_or_nil(Integer, random_seed: random_seed)
52
+ SVMKit::Validation.check_params_positive(reg_param: reg_param, bias_scale: bias_scale, epsilon: epsilon,
53
+ max_iter: max_iter, batch_size: batch_size)
54
+ @params = {}
55
+ @params[:reg_param] = reg_param
56
+ @params[:fit_bias] = fit_bias
57
+ @params[:bias_scale] = bias_scale
58
+ @params[:epsilon] = epsilon
59
+ @params[:max_iter] = max_iter
60
+ @params[:batch_size] = batch_size
61
+ @params[:normalize] = normalize
62
+ @params[:random_seed] = random_seed
63
+ @params[:random_seed] ||= srand
64
+ @weight_vec = nil
65
+ @bias_term = nil
66
+ @rng = Random.new(@params[:random_seed])
67
+ end
68
+
69
+ # Fit the model with given training data.
70
+ #
71
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
72
+ # @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
73
+ # @return [SVR] The learned regressor itself.
74
+ def fit(x, y)
75
+ SVMKit::Validation.check_sample_array(x)
76
+ SVMKit::Validation.check_tvalue_array(y)
77
+ SVMKit::Validation.check_sample_tvalue_size(x, y)
78
+
79
+ n_outputs = y.shape[1].nil? ? 1 : y.shape[1]
80
+ _n_samples, n_features = x.shape
81
+
82
+ if n_outputs > 1
83
+ @weight_vec = Numo::DFloat.zeros(n_outputs, n_features)
84
+ @bias_term = Numo::DFloat.zeros(n_outputs)
85
+ n_outputs.times do |n|
86
+ weight, bias = single_fit(x, y[true, n])
87
+ @weight_vec[n, true] = weight
88
+ @bias_term[n] = bias
89
+ end
90
+ else
91
+ @weight_vec, @bias_term = single_fit(x, y)
92
+ end
93
+
94
+ self
95
+ end
96
+
97
+ # Predict values for samples.
98
+ #
99
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
100
+ # @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted values per sample.
101
+ def predict(x)
102
+ SVMKit::Validation.check_sample_array(x)
103
+ x.dot(@weight_vec.transpose) + @bias_term
104
+ end
105
+
106
+ # Dump marshal data.
107
+ # @return [Hash] The marshal data about SVC.
108
+ def marshal_dump
109
+ { params: @params,
110
+ weight_vec: @weight_vec,
111
+ bias_term: @bias_term,
112
+ rng: @rng }
113
+ end
114
+
115
+ # Load marshal data.
116
+ # @return [nil]
117
+ def marshal_load(obj)
118
+ @params = obj[:params]
119
+ @weight_vec = obj[:weight_vec]
120
+ @bias_term = obj[:bias_term]
121
+ @rng = obj[:rng]
122
+ nil
123
+ end
124
+
125
+ private
126
+
127
+ def single_fit(x, y)
128
+ # Expand feature vectors for bias term.
129
+ samples = @params[:fit_bias] ? expand_feature(x) : x
130
+ # Initialize some variables.
131
+ n_samples, n_features = samples.shape
132
+ rand_ids = [*0...n_samples].shuffle(random: @rng)
133
+ weight_vec = Numo::DFloat.zeros(n_features)
134
+ # Start optimization.
135
+ @params[:max_iter].times do |t|
136
+ # random sampling
137
+ subset_ids = rand_ids.shift(@params[:batch_size])
138
+ rand_ids.concat(subset_ids)
139
+ # update the weight vector.
140
+ z = samples[subset_ids, true].dot(weight_vec.transpose)
141
+ coef = Numo::DFloat.zeros(@params[:batch_size])
142
+ coef[(z - y[subset_ids]).gt(@params[:epsilon]).where] = 1
143
+ coef[(y[subset_ids] - z).gt(@params[:epsilon]).where] = -1
144
+ mean_vec = samples[subset_ids, true].transpose.dot(coef) / @params[:batch_size]
145
+ weight_vec -= learning_rate(t) * (@params[:reg_param] * weight_vec + mean_vec)
146
+ # scale the weight vector.
147
+ normalize_weight_vec(weight_vec) if @params[:normalize]
148
+ end
149
+ split_weight_vec_bias(weight_vec)
150
+ end
151
+
152
+ def expand_feature(x)
153
+ Numo::NArray.hstack([x, Numo::DFloat.ones([x.shape[0], 1]) * @params[:bias_scale]])
154
+ end
155
+
156
+ def learning_rate(iter)
157
+ 1.0 / (@params[:reg_param] * (iter + 1))
158
+ end
159
+
160
+ def normalize_weight_vec(weight_vec)
161
+ norm = Math.sqrt(weight_vec.dot(weight_vec))
162
+ weight_vec * [1.0, (1.0 / @params[:reg_param]**0.5) / (norm + 1.0e-12)].min
163
+ end
164
+
165
+ def split_weight_vec_bias(weight_vec)
166
+ weights = @params[:fit_bias] ? weight_vec[0...-1] : weight_vec
167
+ bias = @params[:fit_bias] ? weight_vec[-1] : 0.0
168
+ [weights, bias]
169
+ end
170
+ end
171
+ end
172
+ end
@@ -1,6 +1,12 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'svmkit/validation'
4
+ require 'svmkit/base/base_estimator'
5
+ require 'svmkit/base/classifier'
6
+ require 'svmkit/base/regressor'
3
7
  require 'svmkit/base/splitter'
8
+ require 'svmkit/base/evaluator'
9
+ require 'svmkit/evaluation_measure/log_loss'
4
10
 
5
11
  module SVMKit
6
12
  # This module consists of the classes for model validation techniques.
@@ -51,9 +57,9 @@ module SVMKit
51
57
  # Perform the evalution of given classifier with cross-validation method.
52
58
  #
53
59
  # @param x [Numo::DFloat] (shape: [n_samples, n_features])
54
- # The dataset to be used to evaluate the classifier.
55
- # @param y [Numo::Int32] (shape: [n_samples])
56
- # The labels to be used to evaluate the classifier.
60
+ # The dataset to be used to evaluate the estimator.
61
+ # @param y [Numo::Int32 / Numo::DFloat] (shape: [n_samples] / [n_samples, n_outputs])
62
+ # The labels to be used to evaluate the classifier / The target values to be used to evaluate the regressor.
57
63
  # @return [Hash] The report summarizing the results of cross-validation.
58
64
  # * :fit_time (Array<Float>) The calculation times of fitting the estimator for each split.
59
65
  # * :test_score (Array<Float>) The scores of testing dataset for each split.
@@ -61,8 +67,14 @@ module SVMKit
61
67
  # the return_train_score is false.
62
68
  def perform(x, y)
63
69
  SVMKit::Validation.check_sample_array(x)
64
- SVMKit::Validation.check_label_array(y)
65
- SVMKit::Validation.check_sample_label_size(x, y)
70
+ if @estimator.is_a?(SVMKit::Base::Classifier)
71
+ SVMKit::Validation.check_label_array(y)
72
+ SVMKit::Validation.check_sample_label_size(x, y)
73
+ end
74
+ if @estimator.is_a?(SVMKit::Base::Regressor)
75
+ SVMKit::Validation.check_tvalue_array(y)
76
+ SVMKit::Validation.check_sample_tvalue_size(x, y)
77
+ end
66
78
  # Initialize the report of cross validation.
67
79
  report = { test_score: [], train_score: nil, fit_time: [] }
68
80
  report[:train_score] = [] if @return_train_score
@@ -71,9 +83,9 @@ module SVMKit
71
83
  # Split dataset into training and testing dataset.
72
84
  feature_ids = !kernel_machine? || train_ids
73
85
  train_x = x[train_ids, feature_ids]
74
- train_y = y[train_ids]
86
+ train_y = y.shape[1].nil? ? y[train_ids] : y[train_ids, true]
75
87
  test_x = x[test_ids, feature_ids]
76
- test_y = y[test_ids]
88
+ test_y = y.shape[1].nil? ? y[test_ids] : y[test_ids, true]
77
89
  # Fit the estimator.
78
90
  start_time = Time.now.to_i
79
91
  @estimator.fit(train_x, train_y)
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'svmkit/validation'
3
4
  require 'svmkit/base/splitter'
4
5
 
5
6
  module SVMKit
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'svmkit/validation'
3
4
  require 'svmkit/base/splitter'
4
5
 
5
6
  module SVMKit
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'svmkit/validation'
3
4
  require 'svmkit/base/base_estimator.rb'
4
5
  require 'svmkit/base/classifier.rb'
5
6
 
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'svmkit/validation'
3
4
  require 'svmkit/base/base_estimator'
4
5
  require 'svmkit/base/classifier'
5
6
 
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'svmkit/validation'
3
4
  require 'svmkit/base/base_estimator'
4
5
  require 'svmkit/base/classifier'
5
6
 
@@ -0,0 +1,94 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'svmkit/validation'
4
+ require 'svmkit/base/base_estimator'
5
+ require 'svmkit/base/regressor'
6
+
7
+ module SVMKit
8
+ module NearestNeighbors
9
+ # KNeighborsRegressor is a class that implements the regressor with the k-nearest neighbors rule.
10
+ # The current implementation uses the Euclidean distance for finding the neighbors.
11
+ #
12
+ # @example
13
+ # estimator =
14
+ # SVMKit::NearestNeighbor::KNeighborsRegressor.new(n_neighbors = 5)
15
+ # estimator.fit(training_samples, traininig_target_values)
16
+ # results = estimator.predict(testing_samples)
17
+ #
18
+ class KNeighborsRegressor
19
+ include Base::BaseEstimator
20
+ include Base::Regressor
21
+
22
+ # Return the prototypes for the nearest neighbor regressor.
23
+ # @return [Numo::DFloat] (shape: [n_samples, n_features])
24
+ attr_reader :prototypes
25
+
26
+ # Return the values of the prototypes
27
+ # @return [Numo::DFloat] (shape: [n_samples, n_outputs])
28
+ attr_reader :values
29
+
30
+ # Create a new regressor with the nearest neighbor rule.
31
+ #
32
+ # @param n_neighbors [Integer] The number of neighbors.
33
+ def initialize(n_neighbors: 5)
34
+ SVMKit::Validation.check_params_integer(n_neighbors: n_neighbors)
35
+ SVMKit::Validation.check_params_positive(n_neighbors: n_neighbors)
36
+ @params = {}
37
+ @params[:n_neighbors] = n_neighbors
38
+ @prototypes = nil
39
+ @values = nil
40
+ end
41
+
42
+ # Fit the model with given training data.
43
+ #
44
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
45
+ # @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
46
+ # @return [KNeighborsRegressor] The learned regressor itself.
47
+ def fit(x, y)
48
+ SVMKit::Validation.check_sample_array(x)
49
+ SVMKit::Validation.check_tvalue_array(y)
50
+ SVMKit::Validation.check_sample_tvalue_size(x, y)
51
+ @prototypes = x.dup
52
+ @values = y.dup
53
+ self
54
+ end
55
+
56
+ # Predict values for samples.
57
+ #
58
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
59
+ # @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted values per sample.
60
+ def predict(x)
61
+ SVMKit::Validation.check_sample_array(x)
62
+ # Initialize some variables.
63
+ n_samples, = x.shape
64
+ n_prototypes, n_outputs = @values.shape
65
+ n_neighbors = [@params[:n_neighbors], n_prototypes].min
66
+ # Calculate distance matrix.
67
+ distance_matrix = PairwiseMetric.euclidean_distance(x, @prototypes)
68
+ # Predict values for the given samples.
69
+ predicted_values = Array.new(n_samples) do |n|
70
+ neighbor_ids = distance_matrix[n, true].to_a.each_with_index.sort.map(&:last)[0...n_neighbors]
71
+ n_outputs.nil? ? @values[neighbor_ids].mean : @values[neighbor_ids, true].mean(0).to_a
72
+ end
73
+ Numo::DFloat[*predicted_values]
74
+ end
75
+
76
+ # Dump marshal data.
77
+ # @return [Hash] The marshal data about KNeighborsRegressor.
78
+ def marshal_dump
79
+ { params: @params,
80
+ prototypes: @prototypes,
81
+ values: @values }
82
+ end
83
+
84
+ # Load marshal data.
85
+ # @return [nil]
86
+ def marshal_load(obj)
87
+ @params = obj[:params]
88
+ @prototypes = obj[:prototypes]
89
+ @values = obj[:values]
90
+ nil
91
+ end
92
+ end
93
+ end
94
+ end
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'svmkit/validation'
4
+
3
5
  module SVMKit
4
6
  # Module for calculating pairwise distances, similarities, and kernels.
5
7
  module PairwiseMetric
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'svmkit/validation'
3
4
  require 'svmkit/base/base_estimator'
4
5
  require 'svmkit/base/classifier'
5
6
 
@@ -110,7 +111,7 @@ module SVMKit
110
111
  @bias_term[n] = bias
111
112
  end
112
113
  else
113
- negative_label = y.to_a.uniq.sort.first
114
+ negative_label = y.to_a.uniq.min
114
115
  bin_y = Numo::Int32.cast(y.ne(negative_label)) * 2 - 1
115
116
  @factor_mat, @weight_vec, @bias_term = binary_fit(x, bin_y)
116
117
  end
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'svmkit/validation'
3
4
  require 'svmkit/base/base_estimator'
4
5
  require 'svmkit/base/transformer'
5
6
 
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'svmkit/validation'
3
4
  require 'svmkit/base/base_estimator'
4
5
  require 'svmkit/base/transformer'
5
6
 
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'svmkit/validation'
3
4
  require 'svmkit/base/base_estimator'
4
5
  require 'svmkit/base/transformer'
5
6
 
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'svmkit/validation'
3
4
  require 'svmkit/base/base_estimator'
4
5
  require 'svmkit/base/transformer'
5
6
 
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'svmkit/validation'
3
4
  require 'svmkit/base/base_estimator'
4
5
  require 'svmkit/base/transformer'
5
6
 
@@ -27,7 +27,7 @@ module SVMKit
27
27
  def fit_sigmoid(df, bin_y, max_iter = 100, min_step = 1e-10, sigma = 1e-12)
28
28
  # Initialize some variables.
29
29
  n_samples = bin_y.size
30
- negative_label = bin_y.to_a.uniq.sort.first
30
+ negative_label = bin_y.to_a.uniq.min
31
31
  pos = bin_y.ne(negative_label)
32
32
  neg = bin_y.eq(negative_label)
33
33
  n_pos_samples = pos.count
@@ -1,8 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'svmkit/validation'
3
4
  require 'svmkit/base/base_estimator'
4
5
  require 'svmkit/base/classifier'
5
- require 'ostruct'
6
6
 
7
7
  module SVMKit
8
8
  # This module consists of the classes that implement tree models.
@@ -19,12 +19,24 @@ module SVMKit
19
19
  nil
20
20
  end
21
21
 
22
+ # @!visibility private
23
+ def check_tvalue_array(y)
24
+ raise TypeError, 'Expect class of target value vector to be Numo::DFloat' unless y.is_a?(Numo::DFloat)
25
+ nil
26
+ end
27
+
22
28
  # @!visibility private
23
29
  def check_sample_label_size(x, y)
24
30
  raise ArgumentError, 'Expect to have the same number of samples for sample matrix and label vector' unless x.shape[0] == y.shape[0]
25
31
  nil
26
32
  end
27
33
 
34
+ # @!visibility private
35
+ def check_sample_tvalue_size(x, y)
36
+ raise ArgumentError, 'Expect to have the same number of samples for sample matrix and target value vector' unless x.shape[0] == y.shape[0]
37
+ nil
38
+ end
39
+
28
40
  # @!visibility private
29
41
  def check_params_type(type, params = {})
30
42
  params.each { |k, v| raise TypeError, "Expect class of #{k} to be #{type}" unless v.is_a?(type) }
@@ -3,5 +3,5 @@
3
3
  # SVMKit is a machine learning library in Ruby.
4
4
  module SVMKit
5
5
  # @!visibility private
6
- VERSION = '0.2.9'.freeze
6
+ VERSION = '0.3.0'.freeze
7
7
  end
data/lib/svmkit.rb CHANGED
@@ -9,16 +9,19 @@ require 'svmkit/dataset'
9
9
  require 'svmkit/probabilistic_output'
10
10
  require 'svmkit/base/base_estimator'
11
11
  require 'svmkit/base/classifier'
12
+ require 'svmkit/base/regressor'
12
13
  require 'svmkit/base/transformer'
13
14
  require 'svmkit/base/splitter'
14
15
  require 'svmkit/base/evaluator'
15
16
  require 'svmkit/kernel_approximation/rbf'
16
17
  require 'svmkit/linear_model/svc'
18
+ require 'svmkit/linear_model/svr'
17
19
  require 'svmkit/linear_model/logistic_regression'
18
20
  require 'svmkit/kernel_machine/kernel_svc'
19
21
  require 'svmkit/polynomial_model/factorization_machine_classifier'
20
22
  require 'svmkit/multiclass/one_vs_rest_classifier'
21
23
  require 'svmkit/nearest_neighbors/k_neighbors_classifier'
24
+ require 'svmkit/nearest_neighbors/k_neighbors_regressor'
22
25
  require 'svmkit/naive_bayes/naive_bayes'
23
26
  require 'svmkit/tree/decision_tree_classifier'
24
27
  require 'svmkit/ensemble/random_forest_classifier'
@@ -35,3 +38,6 @@ require 'svmkit/evaluation_measure/precision'
35
38
  require 'svmkit/evaluation_measure/recall'
36
39
  require 'svmkit/evaluation_measure/f_score'
37
40
  require 'svmkit/evaluation_measure/log_loss'
41
+ require 'svmkit/evaluation_measure/r2_score'
42
+ require 'svmkit/evaluation_measure/mean_squared_error'
43
+ require 'svmkit/evaluation_measure/mean_absolute_error'
data/svmkit.gemspec CHANGED
@@ -18,7 +18,7 @@ SVMKit is a machine learninig library in Ruby.
18
18
  SVMKit provides machine learning algorithms with interfaces similar to Scikit-Learn in Python.
19
19
  SVMKit currently supports Linear / Kernel Support Vector Machine,
20
20
  Logistic Regression, Factorization Machine, Naive Bayes, Decision Tree, Random Forest,
21
- K-nearest neighbor classifier, and cross-validation.
21
+ K-nearest neighbor algorithm, and cross-validation.
22
22
  MSG
23
23
  spec.homepage = 'https://github.com/yoshoku/svmkit'
24
24
  spec.license = 'BSD-2-Clause'
@@ -38,13 +38,4 @@ MSG
38
38
  spec.add_development_dependency 'coveralls', '~> 0.8'
39
39
  spec.add_development_dependency 'rake', '~> 12.0'
40
40
  spec.add_development_dependency 'rspec', '~> 3.0'
41
-
42
- spec.post_install_message = <<MSG
43
- *************************************************************************
44
- Thank you for installing SVMKit!!
45
-
46
- Note that the SVMKit has been changed to use Numo::NArray for
47
- linear algebra library from version 0.2.0.
48
- *************************************************************************
49
- MSG
50
41
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: svmkit
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.9
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2018-05-02 00:00:00.000000000 Z
11
+ date: 2018-05-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: numo-narray
@@ -85,7 +85,7 @@ description: |
85
85
  SVMKit provides machine learning algorithms with interfaces similar to Scikit-Learn in Python.
86
86
  SVMKit currently supports Linear / Kernel Support Vector Machine,
87
87
  Logistic Regression, Factorization Machine, Naive Bayes, Decision Tree, Random Forest,
88
- K-nearest neighbor classifier, and cross-validation.
88
+ K-nearest neighbor algorithm, and cross-validation.
89
89
  email:
90
90
  - yoshoku@outlook.com
91
91
  executables: []
@@ -110,6 +110,7 @@ files:
110
110
  - lib/svmkit/base/base_estimator.rb
111
111
  - lib/svmkit/base/classifier.rb
112
112
  - lib/svmkit/base/evaluator.rb
113
+ - lib/svmkit/base/regressor.rb
113
114
  - lib/svmkit/base/splitter.rb
114
115
  - lib/svmkit/base/transformer.rb
115
116
  - lib/svmkit/dataset.rb
@@ -117,19 +118,24 @@ files:
117
118
  - lib/svmkit/evaluation_measure/accuracy.rb
118
119
  - lib/svmkit/evaluation_measure/f_score.rb
119
120
  - lib/svmkit/evaluation_measure/log_loss.rb
121
+ - lib/svmkit/evaluation_measure/mean_absolute_error.rb
122
+ - lib/svmkit/evaluation_measure/mean_squared_error.rb
120
123
  - lib/svmkit/evaluation_measure/precision.rb
121
124
  - lib/svmkit/evaluation_measure/precision_recall.rb
125
+ - lib/svmkit/evaluation_measure/r2_score.rb
122
126
  - lib/svmkit/evaluation_measure/recall.rb
123
127
  - lib/svmkit/kernel_approximation/rbf.rb
124
128
  - lib/svmkit/kernel_machine/kernel_svc.rb
125
129
  - lib/svmkit/linear_model/logistic_regression.rb
126
130
  - lib/svmkit/linear_model/svc.rb
131
+ - lib/svmkit/linear_model/svr.rb
127
132
  - lib/svmkit/model_selection/cross_validation.rb
128
133
  - lib/svmkit/model_selection/k_fold.rb
129
134
  - lib/svmkit/model_selection/stratified_k_fold.rb
130
135
  - lib/svmkit/multiclass/one_vs_rest_classifier.rb
131
136
  - lib/svmkit/naive_bayes/naive_bayes.rb
132
137
  - lib/svmkit/nearest_neighbors/k_neighbors_classifier.rb
138
+ - lib/svmkit/nearest_neighbors/k_neighbors_regressor.rb
133
139
  - lib/svmkit/pairwise_metric.rb
134
140
  - lib/svmkit/polynomial_model/factorization_machine_classifier.rb
135
141
  - lib/svmkit/preprocessing/l2_normalizer.rb
@@ -146,13 +152,7 @@ homepage: https://github.com/yoshoku/svmkit
146
152
  licenses:
147
153
  - BSD-2-Clause
148
154
  metadata: {}
149
- post_install_message: |
150
- *************************************************************************
151
- Thank you for installing SVMKit!!
152
-
153
- Note that the SVMKit has been changed to use Numo::NArray for
154
- linear algebra library from version 0.2.0.
155
- *************************************************************************
155
+ post_install_message:
156
156
  rdoc_options: []
157
157
  require_paths:
158
158
  - lib