RubyGems - svmkit - Versions diffs - 0.6.3 → 0.7.0 - Mend

svmkit 0.6.3 → 0.7.0

Files changed (9) hide show

checksums.yaml +4 -4
data/HISTORY.md +4 -0
data/README.md +1 -1
data/lib/svmkit.rb +2 -0
data/lib/svmkit/ensemble/ada_boost_classifier.rb +212 -0
data/lib/svmkit/ensemble/ada_boost_regressor.rb +200 -0
data/lib/svmkit/version.rb +1 -1
data/svmkit.gemspec +2 -2
metadata +7 -5

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 8f6c3a9b6df704497579a44ed1649baeff28364d8992cc781babb03ee1fe836a
-  data.tar.gz: 99502858d1cecbe65efe5ee342cc6719278f227b1305c75d5305d3362d00e295
+  metadata.gz: 1e3564c00ae91e7c4cc9db2d19b19df0158803571a18ba9b1446bea22c36b93e
+  data.tar.gz: ebd9a2d55e3935533144bc25498bc9db24d95cfa74fa9b2e8d6299a8b7d54409
 SHA512:
-  metadata.gz: 86bf9a6ce8de82c7b51c9f7ff3451c9834ca73a84abb48394769e65bf80ebf8b00c565f27b69882719ad81c5b33a27af0156cc1b3cc2949f530eeacf11560718
-  data.tar.gz: 14a4e5a9becd0b41f43ba1a4cdee545f12f8d18f339feae789593c527025a9c6c0bb50d7471c5ffefffbe1ba96dc46e409aaa25e781ef52830adac476531ce58
+  metadata.gz: 3fc3e77783fa89bb73b68bd39c21251eae553f8f70259dab31189569b1950f760b72c41b5f839e4d133d156f952995cd2201f2538076435828d19393ea8df007
+  data.tar.gz: 953a07537d01e28b4c00714aebcea67789e654896af7ed5bb18900fcbf3d3fdd0be6d60bf8bfe43932e9e6b0e17fc1ade13aa7c9a9aeb564cbcfd79626979bcc

data/HISTORY.md CHANGED

@@ -1,3 +1,7 @@
+# 0.7.0
+- Add class for AdaBoost classifier.
+- Add class for AdaBoost regressor.
 # 0.6.3
 - Fix bug on setting random seed and max_features parameter of Random Forest estimators.

data/README.md CHANGED

@@ -9,7 +9,7 @@ SVMKit is a machine learninig library in Ruby.
 SVMKit provides machine learning algorithms with interfaces similar to Scikit-Learn in Python.
 SVMKit currently supports Linear / Kernel Support Vector Machine,
 Logistic Regression, Linear Regression, Ridge, Lasso, Factorization Machine,
-Naive Bayes, Decision Tree, Random Forest, K-nearest neighbor classifier,
+Naive Bayes, Decision Tree, AdaBoost, Random Forest, K-nearest neighbor classifier,
 K-Means, DBSCAN, Principal Component Analysis, Non-negative Matrix Factorization
 and cross-validation.

data/lib/svmkit.rb CHANGED

@@ -36,6 +36,8 @@ require 'svmkit/naive_bayes/naive_bayes'
 require 'svmkit/tree/node'
 require 'svmkit/tree/decision_tree_classifier'
 require 'svmkit/tree/decision_tree_regressor'
+require 'svmkit/ensemble/ada_boost_classifier'
+require 'svmkit/ensemble/ada_boost_regressor'
 require 'svmkit/ensemble/random_forest_classifier'
 require 'svmkit/ensemble/random_forest_regressor'
 require 'svmkit/clustering/k_means'

data/lib/svmkit/ensemble/ada_boost_classifier.rb ADDED

@@ -0,0 +1,212 @@
+# frozen_string_literal: true
+require 'svmkit/validation'
+require 'svmkit/base/base_estimator'
+require 'svmkit/base/classifier'
+require 'svmkit/tree/decision_tree_classifier'
+module SVMKit
+  module Ensemble
+    # AdaBoostClassifier is a class that implements AdaBoost (SAMME.R) for classification.
+    # This class uses decision tree for a weak learner.
+    #
+    # @example
+    #   estimator =
+    #     SVMKit::Ensemble::AdaBoostClassifier.new(
+    #       n_estimators: 10, criterion: 'gini', max_depth: 3, max_leaf_nodes: 10, min_samples_leaf: 5, random_seed: 1)
+    #   estimator.fit(training_samples, traininig_labels)
+    #   results = estimator.predict(testing_samples)
+    #
+    # *Reference*
+    # - J. Zhu, S. Rosset, H. Zou, and T.Hashie, "Multi-class AdaBoost," Technical Report No. 430, Department of Statistics, University of Michigan, 2005.
+    class AdaBoostClassifier
+      include Base::BaseEstimator
+      include Base::Classifier
+      # Return the set of estimators.
+      # @return [Array<DecisionTreeClassifier>]
+      attr_reader :estimators
+      # Return the class labels.
+      # @return [Numo::Int32] (size: n_classes)
+      attr_reader :classes
+      # Return the importance for each feature.
+      # @return [Numo::DFloat] (size: n_features)
+      attr_reader :feature_importances
+      # Return the random generator for random selection of feature index.
+      # @return [Random]
+      attr_reader :rng
+      # Create a new classifier with AdaBoost.
+      #
+      # @param n_estimators [Integer] The numeber of decision trees for contructing random forest.
+      # @param criterion [String] The function to evalue spliting point. Supported criteria are 'gini' and 'entropy'.
+      # @param max_depth [Integer] The maximum depth of the tree.
+      #   If nil is given, decision tree grows without concern for depth.
+      # @param max_leaf_nodes [Integer] The maximum number of leaves on decision tree.
+      #   If nil is given, number of leaves is not limited.
+      # @param min_samples_leaf [Integer] The minimum number of samples at a leaf node.
+      # @param max_features [Integer] The number of features to consider when searching optimal split point.
+      #   If nil is given, split process considers all features.
+      # @param random_seed [Integer] The seed value using to initialize the random generator.
+      #   It is used to randomly determine the order of features when deciding spliting point.
+      def initialize(n_estimators: 50, criterion: 'gini', max_depth: nil, max_leaf_nodes: nil, min_samples_leaf: 1,
+                     max_features: nil, random_seed: nil)
+        SVMKit::Validation.check_params_type_or_nil(Integer, max_depth: max_depth, max_leaf_nodes: max_leaf_nodes,
+                                                             max_features: max_features, random_seed: random_seed)
+        SVMKit::Validation.check_params_integer(n_estimators: n_estimators, min_samples_leaf: min_samples_leaf)
+        SVMKit::Validation.check_params_string(criterion: criterion)
+        SVMKit::Validation.check_params_positive(n_estimators: n_estimators, max_depth: max_depth,
+                                                 max_leaf_nodes: max_leaf_nodes, min_samples_leaf: min_samples_leaf,
+                                                 max_features: max_features)
+        @params = {}
+        @params[:n_estimators] = n_estimators
+        @params[:criterion] = criterion
+        @params[:max_depth] = max_depth
+        @params[:max_leaf_nodes] = max_leaf_nodes
+        @params[:min_samples_leaf] = min_samples_leaf
+        @params[:max_features] = max_features
+        @params[:random_seed] = random_seed
+        @params[:random_seed] ||= srand
+        @estimators = nil
+        @classes = nil
+        @feature_importances = nil
+        @rng = Random.new(@params[:random_seed])
+      end
+      # Fit the model with given training data.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
+      # @param y [Numo::Int32] (shape: [n_samples]) The labels to be used for fitting the model.
+      # @return [AdaBoostClassifier] The learned classifier itself.
+      def fit(x, y) # rubocop:disable Metrics/AbcSize
+        SVMKit::Validation.check_sample_array(x)
+        SVMKit::Validation.check_label_array(y)
+        SVMKit::Validation.check_sample_label_size(x, y)
+        ## Initialize some variables.
+        n_samples, n_features = x.shape
+        @estimators = []
+        @feature_importances = Numo::DFloat.zeros(n_features)
+        @params[:max_features] = n_features unless @params[:max_features].is_a?(Integer)
+        @params[:max_features] = [[1, @params[:max_features]].max, n_features].min
+        @classes = Numo::Int32.asarray(y.to_a.uniq.sort)
+        n_classes = @classes.shape[0]
+        ## Boosting.
+        classes_arr = @classes.to_a
+        y_codes = Numo::DFloat.zeros(n_samples, n_classes) - 1.fdiv(n_classes - 1)
+        n_samples.times { |n| y_codes[n, classes_arr.index(y[n])] = 1.0 }
+        observation_weights = Numo::DFloat.zeros(n_samples) + 1.fdiv(n_samples)
+        @params[:n_estimators].times do |_t|
+          # Fit classfier.
+          ids = weighted_sampling(observation_weights)
+          break if y[ids].to_a.uniq.size != n_classes
+          tree = Tree::DecisionTreeClassifier.new(
+            criterion: @params[:criterion], max_depth: @params[:max_depth],
+            max_leaf_nodes: @params[:max_leaf_nodes], min_samples_leaf: @params[:min_samples_leaf],
+            max_features: @params[:max_features], random_seed: @rng.rand(int_max)
+          )
+          tree.fit(x[ids, true], y[ids])
+          # Calculate estimator error.
+          proba = tree.predict_proba(x).clip(1.0e-15, nil)
+          p = Numo::Int32.asarray(Array.new(n_samples) { |n| @classes[proba[n, true].max_index] })
+          inds = p.ne(y)
+          error = (observation_weights * inds).sum / observation_weights.sum
+          # Store model.
+          @estimators.push(tree)
+          @feature_importances += tree.feature_importances
+          break if error.zero?
+          # Update observation weights.
+          log_proba = Numo::NMath.log(proba)
+          observation_weights *= Numo::NMath.exp(-1.0 * (n_classes - 1).fdiv(n_classes) * (y_codes * log_proba).sum(1))
+          observation_weights = observation_weights.clip(1.0e-15, nil)
+          sum_observation_weights = observation_weights.sum
+          break if sum_observation_weights.zero?
+          observation_weights /= sum_observation_weights
+        end
+        @feature_importances /= @feature_importances.sum
+        self
+      end
+      # Calculate confidence scores for samples.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
+      # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence score per sample.
+      def decision_function(x)
+        SVMKit::Validation.check_sample_array(x)
+        n_samples, = x.shape
+        n_classes = @classes.size
+        sum_probs = Numo::DFloat.zeros(n_samples, n_classes)
+        @estimators.each do |tree|
+          log_proba = Numo::NMath.log(tree.predict_proba(x).clip(1.0e-15, nil))
+          sum_probs += (n_classes - 1) * (log_proba - 1.fdiv(n_classes) * Numo::DFloat[log_proba.sum(1)].transpose)
+        end
+        sum_probs /= @estimators.size
+      end
+      # Predict class labels for samples.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
+      # @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
+      def predict(x)
+        SVMKit::Validation.check_sample_array(x)
+        n_samples, = x.shape
+        probs = decision_function(x)
+        Numo::Int32.asarray(Array.new(n_samples) { |n| @classes[probs[n, true].max_index] })
+      end
+      # Predict probability for samples.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
+      # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
+      def predict_proba(x)
+        SVMKit::Validation.check_sample_array(x)
+        n_classes = @classes.size
+        probs = Numo::NMath.exp(1.fdiv(n_classes - 1) * decision_function(x))
+        sum_probs = probs.sum(1)
+        probs /= Numo::DFloat[sum_probs].transpose
+        probs
+      end
+      # Dump marshal data.
+      # @return [Hash] The marshal data about AdaBoostClassifier.
+      def marshal_dump
+        { params: @params, estimators: @estimators, classes: @classes,
+          feature_importances: @feature_importances, rng: @rng }
+      end
+      # Load marshal data.
+      # @return [nil]
+      def marshal_load(obj)
+        @params = obj[:params]
+        @estimators = obj[:estimators]
+        @classes = obj[:classes]
+        @feature_importances = obj[:feature_importances]
+        @rng = obj[:rng]
+        nil
+      end
+      private
+      def weighted_sampling(weights)
+        Array.new(weights.size) do
+          target = @rng.rand
+          chosen = 0
+          weights.each_with_index do |w, idx|
+            if target <= w
+              chosen = idx
+              break
+            end
+            target -= w
+          end
+          chosen
+        end
+      end
+      def int_max
+        @int_max ||= 2**([42].pack('i').size * 16 - 2) - 1
+      end
+    end
+  end
+end

data/lib/svmkit/ensemble/ada_boost_regressor.rb ADDED

@@ -0,0 +1,200 @@
+# frozen_string_literal: true
+require 'svmkit/validation'
+require 'svmkit/base/base_estimator'
+require 'svmkit/base/regressor'
+require 'svmkit/tree/decision_tree_regressor'
+module SVMKit
+  module Ensemble
+    # AdaBoostRegressor is a class that implements random forest for regression
+    # This class uses decision tree for a weak learner.
+    #
+    # @example
+    #   estimator =
+    #     SVMKit::Ensemble::AdaBoostRegressor.new(
+    #       n_estimators: 10, criterion: 'mse', max_depth: 3, max_leaf_nodes: 10, min_samples_leaf: 5, random_seed: 1)
+    #   estimator.fit(training_samples, traininig_values)
+    #   results = estimator.predict(testing_samples)
+    #
+    # *Reference*
+    # - D. L. Shrestha and D. P. Solomatine, "Experiments with AdaBoost.RT, an Improved Boosting Scheme for Regression," Neural Computation 18 (7), pp. 1678--1710, 2006.
+    #
+    class AdaBoostRegressor
+      include Base::BaseEstimator
+      include Base::Regressor
+      include Validation
+      # Return the set of estimators.
+      # @return [Array<DecisionTreeRegressor>]
+      attr_reader :estimators
+      # Return the weight for each weak learner.
+      # @return [Numo::DFloat] (size: n_estimates)
+      attr_reader :estimator_weights
+      # Return the importance for each feature.
+      # @return [Numo::DFloat] (size: n_features)
+      attr_reader :feature_importances
+      # Return the random generator for random selection of feature index.
+      # @return [Random]
+      attr_reader :rng
+      # Create a new regressor with random forest.
+      #
+      # @param n_estimators [Integer] The numeber of decision trees for contructing random forest.
+      # @param threshold [Float] The threshold for delimiting correct and incorrect predictions. That is constrained to [0, 1]
+      # @param exponent [Float] The exponent for the weight of each weak learner.
+      # @param criterion [String] The function to evalue spliting point. Supported criteria are 'gini' and 'entropy'.
+      # @param max_depth [Integer] The maximum depth of the tree.
+      #   If nil is given, decision tree grows without concern for depth.
+      # @param max_leaf_nodes [Integer] The maximum number of leaves on decision tree.
+      #   If nil is given, number of leaves is not limited.
+      # @param min_samples_leaf [Integer] The minimum number of samples at a leaf node.
+      # @param max_features [Integer] The number of features to consider when searching optimal split point.
+      #   If nil is given, split process considers all features.
+      # @param random_seed [Integer] The seed value using to initialize the random generator.
+      #   It is used to randomly determine the order of features when deciding spliting point.
+      def initialize(n_estimators: 10, threshold: 0.2, exponent: 1.0,
+                     criterion: 'mse', max_depth: nil, max_leaf_nodes: nil, min_samples_leaf: 1,
+                     max_features: nil, random_seed: nil)
+        check_params_type_or_nil(Integer, max_depth: max_depth, max_leaf_nodes: max_leaf_nodes,
+                                          max_features: max_features, random_seed: random_seed)
+        check_params_integer(n_estimators: n_estimators, min_samples_leaf: min_samples_leaf)
+        check_params_float(threshold: threshold, exponent: exponent)
+        check_params_string(criterion: criterion)
+        check_params_positive(n_estimators: n_estimators, threshold: threshold, exponent: exponent,
+                              max_depth: max_depth,
+                              max_leaf_nodes: max_leaf_nodes, min_samples_leaf: min_samples_leaf,
+                              max_features: max_features)
+        @params = {}
+        @params[:n_estimators] = n_estimators
+        @params[:threshold] = threshold
+        @params[:exponent] = exponent
+        @params[:criterion] = criterion
+        @params[:max_depth] = max_depth
+        @params[:max_leaf_nodes] = max_leaf_nodes
+        @params[:min_samples_leaf] = min_samples_leaf
+        @params[:max_features] = max_features
+        @params[:random_seed] = random_seed
+        @params[:random_seed] ||= srand
+        @estimators = nil
+        @feature_importances = nil
+        @rng = Random.new(@params[:random_seed])
+      end
+      # Fit the model with given training data.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
+      # @param y [Numo::DFloat] (shape: [n_samples]) The target values to be used for fitting the model.
+      # @return [AdaBoostRegressor] The learned regressor itself.
+      def fit(x, y) # rubocop:disable Metrics/AbcSize
+        check_sample_array(x)
+        check_tvalue_array(y)
+        check_sample_tvalue_size(x, y)
+        # Check target values
+        raise ArgumentError, 'Expect target value vector to be 1-D arrray' unless y.shape.size == 1
+        # Initialize some variables.
+        n_samples, n_features = x.shape
+        @params[:max_features] = n_features unless @params[:max_features].is_a?(Integer)
+        @params[:max_features] = [[1, @params[:max_features]].max, n_features].min
+        observation_weights = Numo::DFloat.zeros(n_samples) + 1.fdiv(n_samples)
+        @estimators = []
+        @estimator_weights = []
+        @feature_importances = Numo::DFloat.zeros(n_features)
+        # Construct forest.
+        @params[:n_estimators].times do |_t|
+          # Fit weak learner.
+          ids = weighted_sampling(observation_weights)
+          tree = Tree::DecisionTreeRegressor.new(
+            criterion: @params[:criterion], max_depth: @params[:max_depth],
+            max_leaf_nodes: @params[:max_leaf_nodes], min_samples_leaf: @params[:min_samples_leaf],
+            max_features: @params[:max_features], random_seed: @rng.rand(int_max)
+          )
+          tree.fit(x[ids, true], y[ids])
+          p = tree.predict(x)
+          # Calculate errors.
+          abs_err = ((p - y) / y).abs
+          err = observation_weights[abs_err.gt(@params[:threshold])].sum
+          break if err <= 0.0
+          # Calculate weight.
+          beta = err**@params[:exponent]
+          weight = Math.log(1.fdiv(beta))
+          # Store model.
+          @estimators.push(tree)
+          @estimator_weights.push(weight)
+          @feature_importances += weight * tree.feature_importances
+          # Update observation weights.
+          update = Numo::DFloat.ones(n_samples)
+          update[abs_err.le(@params[:threshold])] = beta
+          observation_weights *= update
+          observation_weights = observation_weights.clip(1.0e-15, nil)
+          sum_observation_weights = observation_weights.sum
+          break if sum_observation_weights.zero?
+          observation_weights /= sum_observation_weights
+        end
+        @estimator_weights = Numo::DFloat.asarray(@estimator_weights)
+        @feature_importances /= @estimator_weights.sum
+        self
+      end
+      # Predict values for samples.
+      #
+      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
+      # @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted value per sample.
+      def predict(x)
+        check_sample_array(x)
+        n_samples, = x.shape
+        predictions = Numo::DFloat.zeros(n_samples)
+        @estimators.size.times do |t|
+          predictions += @estimator_weights[t] * @estimators[t].predict(x)
+        end
+        sum_weight = @estimator_weights.sum
+        predictions / sum_weight
+      end
+      # Dump marshal data.
+      # @return [Hash] The marshal data about AdaBoostRegressor.
+      def marshal_dump
+        { params: @params,
+          estimators: @estimators,
+          estimator_weights: @estimator_weights,
+          feature_importances: @feature_importances,
+          rng: @rng }
+      end
+      # Load marshal data.
+      # @return [nil]
+      def marshal_load(obj)
+        @params = obj[:params]
+        @estimators = obj[:estimators]
+        @estimator_weights = obj[:estimator_weights]
+        @feature_importances = obj[:feature_importances]
+        @rng = obj[:rng]
+        nil
+      end
+      private
+      def weighted_sampling(weights)
+        Array.new(weights.size) do
+          target = @rng.rand
+          chosen = 0
+          weights.each_with_index do |w, idx|
+            if target <= w
+              chosen = idx
+              break
+            end
+            target -= w
+          end
+          chosen
+        end
+      end
+      def int_max
+        @int_max ||= 2**([42].pack('i').size * 16 - 2) - 1
+      end
+    end
+  end
+end

data/lib/svmkit/version.rb CHANGED

@@ -3,5 +3,5 @@
 # SVMKit is a machine learning library in Ruby.
 module SVMKit
   # @!visibility private
-  VERSION = '0.6.3'.freeze
+  VERSION = '0.7.0'.freeze
 end

data/svmkit.gemspec CHANGED

@@ -17,7 +17,7 @@ SVMKit is a machine learninig library in Ruby.
 SVMKit provides machine learning algorithms with interfaces similar to Scikit-Learn in Python.
 SVMKit currently supports Linear / Kernel Support Vector Machine,
 Logistic Regression, Linear Regression, Ridge, Lasso, Factorization Machine,
-Naive Bayes, Decision Tree, Random Forest, K-nearest neighbor algorithm,
+Naive Bayes, Decision Tree, AdaBoost, Random Forest, K-nearest neighbor algorithm,
 K-Means, DBSCAN, Principal Component Analysis, Non-negative Matrix Factorization
 and cross-validation.
 MSG
@@ -33,7 +33,7 @@ MSG
   spec.required_ruby_version = '>= 2.1'
-  spec.add_runtime_dependency 'numo-narray', '>= 0.9.0'
+  spec.add_runtime_dependency 'numo-narray', '>= 0.9.1'
   spec.add_development_dependency 'bundler', '~> 1.16'
   spec.add_development_dependency 'coveralls', '~> 0.8'

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: svmkit
 version: !ruby/object:Gem::Version
-  version: 0.6.3
+  version: 0.7.0
 platform: ruby
 authors:
 - yoshoku
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2018-11-25 00:00:00.000000000 Z
+date: 2018-12-02 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: numo-narray
@@ -16,14 +16,14 @@ dependencies:
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
-        version: 0.9.0
+        version: 0.9.1
   type: :runtime
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
-        version: 0.9.0
+        version: 0.9.1
 - !ruby/object:Gem::Dependency
   name: bundler
   requirement: !ruby/object:Gem::Requirement
@@ -85,7 +85,7 @@ description: |
   SVMKit provides machine learning algorithms with interfaces similar to Scikit-Learn in Python.
   SVMKit currently supports Linear / Kernel Support Vector Machine,
   Logistic Regression, Linear Regression, Ridge, Lasso, Factorization Machine,
-  Naive Bayes, Decision Tree, Random Forest, K-nearest neighbor algorithm,
+  Naive Bayes, Decision Tree, AdaBoost, Random Forest, K-nearest neighbor algorithm,
   K-Means, DBSCAN, Principal Component Analysis, Non-negative Matrix Factorization
   and cross-validation.
 email:
@@ -121,6 +121,8 @@ files:
 - lib/svmkit/dataset.rb
 - lib/svmkit/decomposition/nmf.rb
 - lib/svmkit/decomposition/pca.rb
+- lib/svmkit/ensemble/ada_boost_classifier.rb
+- lib/svmkit/ensemble/ada_boost_regressor.rb
 - lib/svmkit/ensemble/random_forest_classifier.rb
 - lib/svmkit/ensemble/random_forest_regressor.rb
 - lib/svmkit/evaluation_measure/accuracy.rb