svmkit 0.3.1 → 0.3.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/HISTORY.md +8 -0
- data/lib/svmkit/dataset.rb +18 -4
- data/lib/svmkit/ensemble/random_forest_classifier.rb +2 -1
- data/lib/svmkit/ensemble/random_forest_regressor.rb +141 -0
- data/lib/svmkit/polynomial_model/factorization_machine_regressor.rb +231 -0
- data/lib/svmkit/tree/decision_tree_classifier.rb +2 -65
- data/lib/svmkit/tree/decision_tree_regressor.rb +252 -0
- data/lib/svmkit/tree/node.rb +70 -0
- data/lib/svmkit/version.rb +1 -1
- data/lib/svmkit.rb +4 -0
- metadata +6 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 93ce9c2e79ac158b4a3e988afc547b1891419eb6e6b1845156cf98eaa3cdd578
|
4
|
+
data.tar.gz: 4e677653deebd035cbdcd5c98529b7f4fee6804075ecab1113dbccc0bf9c65ed
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7518039557e3c991c4a0cc112764198ed6340c8be1fa9c3fb746be21ffbb5518dd35651149cda5aba8ef52a36dfa6b17f47e1335893ae0cd1dfc5776a0e6bf8e
|
7
|
+
data.tar.gz: c062d9c2a7c04be82787a4d76a970855c2dc8ce0d4bf5531b6196b96873f4f8e679ee434af9a56c2ebf56ae9a8adb33387925050eb9b8964da613318f2a0e430
|
data/HISTORY.md
CHANGED
@@ -1,3 +1,11 @@
|
|
1
|
+
# 0.3.2
|
2
|
+
- Add class for Factorization Machine regressor.
|
3
|
+
- Add class for Decision Tree regressor.
|
4
|
+
- Add class for Random Forest regressor.
|
5
|
+
- Fix to support loading and dumping libsvm file with multi-target variables.
|
6
|
+
- Fix to require DecisionTreeClassifier on RandomForestClassifier.
|
7
|
+
- Fix some mistakes on document.
|
8
|
+
|
1
9
|
# 0.3.1
|
2
10
|
- Fix bug on decision function calculation of FactorizationMachineClassifier.
|
3
11
|
- Fix bug on weight updating process of KernelSVC.
|
data/lib/svmkit/dataset.rb
CHANGED
@@ -33,11 +33,13 @@ module SVMKit
|
|
33
33
|
# @param zero_based [Boolean] Whether the column index starts from 0 (true) or 1 (false).
|
34
34
|
def dump_libsvm_file(data, labels, filename, zero_based: false)
|
35
35
|
n_samples = [data.shape[0], labels.shape[0]].min
|
36
|
+
single_label = labels.shape[1].nil?
|
36
37
|
label_type = detect_dtype(labels)
|
37
38
|
value_type = detect_dtype(data)
|
38
39
|
File.open(filename, 'w') do |file|
|
39
40
|
n_samples.times do |n|
|
40
|
-
|
41
|
+
label = single_label ? labels[n] : labels[n, true].to_a
|
42
|
+
file.puts(dump_libsvm_line(label, data[n, true],
|
41
43
|
label_type, value_type, zero_based))
|
42
44
|
end
|
43
45
|
end
|
@@ -47,8 +49,7 @@ module SVMKit
|
|
47
49
|
|
48
50
|
def parse_libsvm_line(line, zero_based)
|
49
51
|
tokens = line.split
|
50
|
-
label = tokens.shift
|
51
|
-
label = label.to_i.to_s == label ? label.to_i : label.to_f
|
52
|
+
label = parse_label(tokens.shift)
|
52
53
|
ftvec = tokens.map do |el|
|
53
54
|
idx, val = el.split(':')
|
54
55
|
idx = idx.to_i - (zero_based == false ? 1 : 0)
|
@@ -60,6 +61,11 @@ module SVMKit
|
|
60
61
|
[label, ftvec, max_idx]
|
61
62
|
end
|
62
63
|
|
64
|
+
def parse_label(label)
|
65
|
+
lbl_arr = label.split(',').map { |lbl| lbl.to_i.to_s == lbl ? lbl.to_i : lbl.to_f }
|
66
|
+
lbl_arr.size > 1 ? lbl_arr : lbl_arr[0]
|
67
|
+
end
|
68
|
+
|
63
69
|
def convert_to_matrix(data, n_features)
|
64
70
|
mat = []
|
65
71
|
data.each do |ft|
|
@@ -80,13 +86,21 @@ module SVMKit
|
|
80
86
|
end
|
81
87
|
|
82
88
|
def dump_libsvm_line(label, ftvec, label_type, value_type, zero_based)
|
83
|
-
line =
|
89
|
+
line = dump_label(label, label_type.to_s)
|
84
90
|
ftvec.to_a.each_with_index do |val, n|
|
85
91
|
idx = n + (zero_based == false ? 1 : 0)
|
86
92
|
line += format(" %d:#{value_type}", idx, val) if val != 0.0
|
87
93
|
end
|
88
94
|
line
|
89
95
|
end
|
96
|
+
|
97
|
+
def dump_label(label, label_type_str)
|
98
|
+
if label.is_a?(Array)
|
99
|
+
label.map { |lbl| format(label_type_str, lbl) }.join(',')
|
100
|
+
else
|
101
|
+
format(label_type_str, label)
|
102
|
+
end
|
103
|
+
end
|
90
104
|
end
|
91
105
|
end
|
92
106
|
end
|
@@ -3,6 +3,7 @@
|
|
3
3
|
require 'svmkit/validation'
|
4
4
|
require 'svmkit/base/base_estimator'
|
5
5
|
require 'svmkit/base/classifier'
|
6
|
+
require 'svmkit/tree/decision_tree_classifier'
|
6
7
|
|
7
8
|
module SVMKit
|
8
9
|
# This module consists of the classes that implement ensemble-based methods.
|
@@ -32,7 +33,7 @@ module SVMKit
|
|
32
33
|
# @return [Numo::DFloat] (size: n_features)
|
33
34
|
attr_reader :feature_importances
|
34
35
|
|
35
|
-
# Return the random generator for
|
36
|
+
# Return the random generator for random selection of feature index.
|
36
37
|
# @return [Random]
|
37
38
|
attr_reader :rng
|
38
39
|
|
@@ -0,0 +1,141 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'pp'
|
4
|
+
require 'svmkit/validation'
|
5
|
+
require 'svmkit/base/base_estimator'
|
6
|
+
require 'svmkit/base/regressor'
|
7
|
+
require 'svmkit/tree/decision_tree_regressor'
|
8
|
+
|
9
|
+
module SVMKit
|
10
|
+
module Ensemble
|
11
|
+
# RandomForestRegressor is a class that implements random forest for regression
|
12
|
+
#
|
13
|
+
# @example
|
14
|
+
# estimator =
|
15
|
+
# SVMKit::Ensemble::RandomForestRegressor.new(
|
16
|
+
# n_estimators: 10, criterion: 'mse', max_depth: 3, max_leaf_nodes: 10, min_samples_leaf: 5, random_seed: 1)
|
17
|
+
# estimator.fit(training_samples, traininig_values)
|
18
|
+
# results = estimator.predict(testing_samples)
|
19
|
+
#
|
20
|
+
class RandomForestRegressor
|
21
|
+
include Base::BaseEstimator
|
22
|
+
include Base::Regressor
|
23
|
+
include Validation
|
24
|
+
|
25
|
+
# Return the set of estimators.
|
26
|
+
# @return [Array<DecisionTreeRegressor>]
|
27
|
+
attr_reader :estimators
|
28
|
+
|
29
|
+
# Return the importance for each feature.
|
30
|
+
# @return [Numo::DFloat] (size: n_features)
|
31
|
+
attr_reader :feature_importances
|
32
|
+
|
33
|
+
# Return the random generator for random selection of feature index.
|
34
|
+
# @return [Random]
|
35
|
+
attr_reader :rng
|
36
|
+
|
37
|
+
# Create a new regressor with random forest.
|
38
|
+
#
|
39
|
+
# @param n_estimators [Integer] The numeber of decision trees for contructing random forest.
|
40
|
+
# @param criterion [String] The function to evalue spliting point. Supported criteria are 'gini' and 'entropy'.
|
41
|
+
# @param max_depth [Integer] The maximum depth of the tree.
|
42
|
+
# If nil is given, decision tree grows without concern for depth.
|
43
|
+
# @param max_leaf_nodes [Integer] The maximum number of leaves on decision tree.
|
44
|
+
# If nil is given, number of leaves is not limited.
|
45
|
+
# @param min_samples_leaf [Integer] The minimum number of samples at a leaf node.
|
46
|
+
# @param max_features [Integer] The number of features to consider when searching optimal split point.
|
47
|
+
# If nil is given, split process considers all features.
|
48
|
+
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
49
|
+
# It is used to randomly determine the order of features when deciding spliting point.
|
50
|
+
def initialize(n_estimators: 10, criterion: 'mse', max_depth: nil, max_leaf_nodes: nil, min_samples_leaf: 1,
|
51
|
+
max_features: nil, random_seed: nil)
|
52
|
+
check_params_type_or_nil(Integer, max_depth: max_depth, max_leaf_nodes: max_leaf_nodes,
|
53
|
+
max_features: max_features, random_seed: random_seed)
|
54
|
+
check_params_integer(n_estimators: n_estimators, min_samples_leaf: min_samples_leaf)
|
55
|
+
check_params_string(criterion: criterion)
|
56
|
+
check_params_positive(n_estimators: n_estimators, max_depth: max_depth,
|
57
|
+
max_leaf_nodes: max_leaf_nodes, min_samples_leaf: min_samples_leaf,
|
58
|
+
max_features: max_features)
|
59
|
+
@params = {}
|
60
|
+
@params[:n_estimators] = n_estimators
|
61
|
+
@params[:criterion] = criterion
|
62
|
+
@params[:max_depth] = max_depth
|
63
|
+
@params[:max_leaf_nodes] = max_leaf_nodes
|
64
|
+
@params[:min_samples_leaf] = min_samples_leaf
|
65
|
+
@params[:max_features] = max_features
|
66
|
+
@params[:random_seed] = random_seed
|
67
|
+
@params[:random_seed] ||= srand
|
68
|
+
@estimators = nil
|
69
|
+
@feature_importances = nil
|
70
|
+
@rng = Random.new(@params[:random_seed])
|
71
|
+
end
|
72
|
+
|
73
|
+
# Fit the model with given training data.
|
74
|
+
#
|
75
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
76
|
+
# @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
|
77
|
+
# @return [RandomForestRegressor] The learned regressor itself.
|
78
|
+
def fit(x, y)
|
79
|
+
check_sample_array(x)
|
80
|
+
check_tvalue_array(y)
|
81
|
+
check_sample_tvalue_size(x, y)
|
82
|
+
# Initialize some variables.
|
83
|
+
n_samples, n_features = x.shape
|
84
|
+
@params[:max_features] ||= n_features
|
85
|
+
@params[:max_features] = [[1, @params[:max_features]].max, Math.sqrt(n_features).to_i].min
|
86
|
+
single_target = y.shape[1].nil?
|
87
|
+
# Construct forest.
|
88
|
+
@estimators = Array.new(@params[:n_estimators]) do |_n|
|
89
|
+
tree = Tree::DecisionTreeRegressor.new(
|
90
|
+
criterion: @params[:criterion], max_depth: @params[:max_depth],
|
91
|
+
max_leaf_nodes: @params[:max_leaf_nodes], min_samples_leaf: @params[:min_samples_leaf],
|
92
|
+
max_features: @params[:max_features], random_seed: @params[:random_seed]
|
93
|
+
)
|
94
|
+
bootstrap_ids = Array.new(n_samples) { @rng.rand(0...n_samples) }
|
95
|
+
tree.fit(x[bootstrap_ids, true], single_target ? y[bootstrap_ids] : y[bootstrap_ids, true])
|
96
|
+
end
|
97
|
+
# Calculate feature importances.
|
98
|
+
@feature_importances = @estimators.map(&:feature_importances).reduce(&:+)
|
99
|
+
@feature_importances /= @feature_importances.sum
|
100
|
+
self
|
101
|
+
end
|
102
|
+
|
103
|
+
# Predict values for samples.
|
104
|
+
#
|
105
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
|
106
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted value per sample.
|
107
|
+
def predict(x)
|
108
|
+
check_sample_array(x)
|
109
|
+
@estimators.map { |est| est.predict(x) }.reduce(&:+) / @params[:n_estimators]
|
110
|
+
end
|
111
|
+
|
112
|
+
# Return the index of the leaf that each sample reached.
|
113
|
+
#
|
114
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to assign each leaf.
|
115
|
+
# @return [Numo::Int32] (shape: [n_samples, n_estimators]) Leaf index for sample.
|
116
|
+
def apply(x)
|
117
|
+
SVMKit::Validation.check_sample_array(x)
|
118
|
+
Numo::Int32[*Array.new(@params[:n_estimators]) { |n| @estimators[n].apply(x) }].transpose
|
119
|
+
end
|
120
|
+
|
121
|
+
# Dump marshal data.
|
122
|
+
# @return [Hash] The marshal data about RandomForestRegressor
|
123
|
+
def marshal_dump
|
124
|
+
{ params: @params,
|
125
|
+
estimators: @estimators,
|
126
|
+
feature_importances: @feature_importances,
|
127
|
+
rng: @rng }
|
128
|
+
end
|
129
|
+
|
130
|
+
# Load marshal data.
|
131
|
+
# @return [nil]
|
132
|
+
def marshal_load(obj)
|
133
|
+
@params = obj[:params]
|
134
|
+
@estimators = obj[:estimators]
|
135
|
+
@feature_importances = obj[:feature_importances]
|
136
|
+
@rng = obj[:rng]
|
137
|
+
nil
|
138
|
+
end
|
139
|
+
end
|
140
|
+
end
|
141
|
+
end
|
@@ -0,0 +1,231 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'svmkit/validation'
|
4
|
+
require 'svmkit/base/base_estimator'
|
5
|
+
require 'svmkit/base/regressor'
|
6
|
+
|
7
|
+
module SVMKit
|
8
|
+
module PolynomialModel
|
9
|
+
# FactorizationMachineRegressor is a class that implements Factorization Machine
|
10
|
+
# with stochastic gradient descent (SGD) optimization.
|
11
|
+
#
|
12
|
+
# @example
|
13
|
+
# estimator =
|
14
|
+
# SVMKit::PolynomialModel::FactorizationMachineRegressor.new(
|
15
|
+
# n_factors: 10, reg_param_bias: 0.1, reg_param_weight: 0.1, reg_param_factor: 0.1,
|
16
|
+
# max_iter: 5000, batch_size: 50, random_seed: 1)
|
17
|
+
# estimator.fit(training_samples, traininig_values)
|
18
|
+
# results = estimator.predict(testing_samples)
|
19
|
+
#
|
20
|
+
# *Reference*
|
21
|
+
# - S. Rendle, "Factorization Machines with libFM," ACM Transactions on Intelligent Systems and Technology, vol. 3 (3), pp. 57:1--57:22, 2012.
|
22
|
+
# - S. Rendle, "Factorization Machines," Proc. the 10th IEEE International Conference on Data Mining (ICDM'10), pp. 995--1000, 2010.
|
23
|
+
# - I. Sutskever, J. Martens, G. Dahl, and G. Hinton, "On the importance of initialization and momentum in deep learning," Proc. the 30th International Conference on Machine Learning (ICML' 13), pp. 1139--1147, 2013.
|
24
|
+
# - G. Hinton, N. Srivastava, and K. Swersky, "Lecture 6e rmsprop," Neural Networks for Machine Learning, 2012.
|
25
|
+
class FactorizationMachineRegressor
|
26
|
+
include Base::BaseEstimator
|
27
|
+
include Base::Regressor
|
28
|
+
include Validation
|
29
|
+
|
30
|
+
# Return the factor matrix for Factorization Machine.
|
31
|
+
# @return [Numo::DFloat] (shape: [n_outputs, n_factors, n_features])
|
32
|
+
attr_reader :factor_mat
|
33
|
+
|
34
|
+
# Return the weight vector for Factorization Machine.
|
35
|
+
# @return [Numo::DFloat] (shape: [n_outputs, n_features])
|
36
|
+
attr_reader :weight_vec
|
37
|
+
|
38
|
+
# Return the bias term for Factoriazation Machine.
|
39
|
+
# @return [Numo::DFloat] (shape: [n_outputs])
|
40
|
+
attr_reader :bias_term
|
41
|
+
|
42
|
+
# Return the random generator for random sampling.
|
43
|
+
# @return [Random]
|
44
|
+
attr_reader :rng
|
45
|
+
|
46
|
+
# Create a new regressor with Factorization Machine.
|
47
|
+
#
|
48
|
+
# @param n_factors [Integer] The maximum number of iterations.
|
49
|
+
# @param reg_param_bias [Float] The regularization parameter for bias term.
|
50
|
+
# @param reg_param_weight [Float] The regularization parameter for weight vector.
|
51
|
+
# @param reg_param_factor [Float] The regularization parameter for factor matrix.
|
52
|
+
# @param init_std [Float] The standard deviation of normal random number for initialization of factor matrix.
|
53
|
+
# @param learning_rate [Float] The learning rate for optimization.
|
54
|
+
# @param decay [Float] The discounting factor for RMS prop optimization.
|
55
|
+
# @param momentum [Float] The Nesterov momentum for optimization.
|
56
|
+
# @param max_iter [Integer] The maximum number of iterations.
|
57
|
+
# @param batch_size [Integer] The size of the mini batches.
|
58
|
+
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
59
|
+
def initialize(n_factors: 2,
|
60
|
+
reg_param_bias: 1.0, reg_param_weight: 1.0, reg_param_factor: 1.0, init_std: 0.01,
|
61
|
+
learning_rate: 0.01, decay: 0.9, momentum: 0.9,
|
62
|
+
max_iter: 1000, batch_size: 10, random_seed: nil)
|
63
|
+
check_params_float(reg_param_bias: reg_param_bias, reg_param_weight: reg_param_weight,
|
64
|
+
reg_param_factor: reg_param_factor, init_std: init_std,
|
65
|
+
learning_rate: learning_rate, decay: decay, momentum: momentum)
|
66
|
+
check_params_integer(n_factors: n_factors, max_iter: max_iter, batch_size: batch_size)
|
67
|
+
check_params_type_or_nil(Integer, random_seed: random_seed)
|
68
|
+
check_params_positive(n_factors: n_factors, reg_param_bias: reg_param_bias,
|
69
|
+
reg_param_weight: reg_param_weight, reg_param_factor: reg_param_factor,
|
70
|
+
learning_rate: learning_rate, decay: decay, momentum: momentum,
|
71
|
+
max_iter: max_iter, batch_size: batch_size)
|
72
|
+
@params = {}
|
73
|
+
@params[:n_factors] = n_factors
|
74
|
+
@params[:reg_param_bias] = reg_param_bias
|
75
|
+
@params[:reg_param_weight] = reg_param_weight
|
76
|
+
@params[:reg_param_factor] = reg_param_factor
|
77
|
+
@params[:init_std] = init_std
|
78
|
+
@params[:learning_rate] = learning_rate
|
79
|
+
@params[:decay] = decay
|
80
|
+
@params[:momentum] = momentum
|
81
|
+
@params[:max_iter] = max_iter
|
82
|
+
@params[:batch_size] = batch_size
|
83
|
+
@params[:random_seed] = random_seed
|
84
|
+
@params[:random_seed] ||= srand
|
85
|
+
@factor_mat = nil
|
86
|
+
@weight_vec = nil
|
87
|
+
@bias_term = nil
|
88
|
+
@rng = Random.new(@params[:random_seed])
|
89
|
+
end
|
90
|
+
|
91
|
+
# Fit the model with given training data.
|
92
|
+
#
|
93
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
94
|
+
# @param y [Numo::Int32] (shape: [n_samples, n_outputs]) The target values to be used for fitting the model.
|
95
|
+
# @return [FactorizationMachineRegressor] The learned regressor itself.
|
96
|
+
def fit(x, y)
|
97
|
+
check_sample_array(x)
|
98
|
+
check_tvalue_array(y)
|
99
|
+
check_sample_tvalue_size(x, y)
|
100
|
+
|
101
|
+
n_outputs = y.shape[1].nil? ? 1 : y.shape[1]
|
102
|
+
_n_samples, n_features = x.shape
|
103
|
+
|
104
|
+
if n_outputs > 1
|
105
|
+
@factor_mat = Numo::DFloat.zeros(n_outputs, @params[:n_factors], n_features)
|
106
|
+
@weight_vec = Numo::DFloat.zeros(n_outputs, n_features)
|
107
|
+
@bias_term = Numo::DFloat.zeros(n_outputs)
|
108
|
+
n_outputs.times do |n|
|
109
|
+
factor, weight, bias = single_fit(x, y[true, n])
|
110
|
+
@factor_mat[n, true, true] = factor
|
111
|
+
@weight_vec[n, true] = weight
|
112
|
+
@bias_term[n] = bias
|
113
|
+
end
|
114
|
+
else
|
115
|
+
@factor_mat, @weight_vec, @bias_term = single_fit(x, y)
|
116
|
+
end
|
117
|
+
|
118
|
+
self
|
119
|
+
end
|
120
|
+
|
121
|
+
# Predict values for samples.
|
122
|
+
#
|
123
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
|
124
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted values per sample.
|
125
|
+
def predict(x)
|
126
|
+
check_sample_array(x)
|
127
|
+
linear_term = @bias_term + x.dot(@weight_vec.transpose)
|
128
|
+
factor_term = if @weight_vec.shape[1].nil?
|
129
|
+
0.5 * (@factor_mat.dot(x.transpose)**2 - (@factor_mat**2).dot(x.transpose**2)).sum(0)
|
130
|
+
else
|
131
|
+
0.5 * (@factor_mat.dot(x.transpose)**2 - (@factor_mat**2).dot(x.transpose**2)).sum(1).transpose
|
132
|
+
end
|
133
|
+
linear_term + factor_term
|
134
|
+
end
|
135
|
+
|
136
|
+
# Dump marshal data.
|
137
|
+
# @return [Hash] The marshal data about FactorizationMachineRegressor
|
138
|
+
def marshal_dump
|
139
|
+
{ params: @params,
|
140
|
+
factor_mat: @factor_mat,
|
141
|
+
weight_vec: @weight_vec,
|
142
|
+
bias_term: @bias_term,
|
143
|
+
rng: @rng }
|
144
|
+
end
|
145
|
+
|
146
|
+
# Load marshal data.
|
147
|
+
# @return [nil]
|
148
|
+
def marshal_load(obj)
|
149
|
+
@params = obj[:params]
|
150
|
+
@factor_mat = obj[:factor_mat]
|
151
|
+
@weight_vec = obj[:weight_vec]
|
152
|
+
@bias_term = obj[:bias_term]
|
153
|
+
@rng = obj[:rng]
|
154
|
+
nil
|
155
|
+
end
|
156
|
+
|
157
|
+
private
|
158
|
+
|
159
|
+
def single_fit(x, y)
|
160
|
+
# Initialize some variables.
|
161
|
+
n_samples, n_features = x.shape
|
162
|
+
rand_ids = [*0...n_samples].shuffle(random: @rng)
|
163
|
+
factor_mat = rand_normal([@params[:n_factors], n_features], 0, @params[:init_std])
|
164
|
+
factor_sqrsum = Numo::DFloat.zeros(factor_mat.shape)
|
165
|
+
factor_update = Numo::DFloat.zeros(factor_mat.shape)
|
166
|
+
weight_vec = Numo::DFloat.zeros(n_features)
|
167
|
+
weight_sqrsum = Numo::DFloat.zeros(n_features)
|
168
|
+
weight_update = Numo::DFloat.zeros(n_features)
|
169
|
+
bias_term = 0.0
|
170
|
+
bias_sqrsum = 0.0
|
171
|
+
bias_update = 0.0
|
172
|
+
# Start optimization.
|
173
|
+
@params[:max_iter].times do |_t|
|
174
|
+
# Random sampling.
|
175
|
+
subset_ids = rand_ids.shift(@params[:batch_size])
|
176
|
+
rand_ids.concat(subset_ids)
|
177
|
+
data = x[subset_ids, true]
|
178
|
+
values = y[subset_ids]
|
179
|
+
# Calculate gradients for loss function.
|
180
|
+
loss_grad = loss_gradient(data, values, factor_mat, weight_vec, bias_term)
|
181
|
+
next if loss_grad.ne(0.0).count.zero?
|
182
|
+
# Update each parameter.
|
183
|
+
bias_term, bias_sqrsum, bias_update =
|
184
|
+
update_param(bias_term, bias_sqrsum, bias_update,
|
185
|
+
bias_gradient(loss_grad, bias_term - @params[:momentum] * bias_update))
|
186
|
+
weight_vec, weight_sqrsum, weight_update =
|
187
|
+
update_param(weight_vec, weight_sqrsum, weight_update,
|
188
|
+
weight_gradient(loss_grad, data, weight_vec - @params[:momentum] * weight_update))
|
189
|
+
@params[:n_factors].times do |n|
|
190
|
+
factor_update[n, true], factor_sqrsum[n, true], factor_update[n, true] =
|
191
|
+
update_param(factor_update[n, true], factor_sqrsum[n, true], factor_update[n, true],
|
192
|
+
factor_gradient(loss_grad, data, factor_mat[n, true] - @params[:momentum] * factor_update[n, true]))
|
193
|
+
end
|
194
|
+
end
|
195
|
+
[factor_mat, weight_vec, bias_term]
|
196
|
+
end
|
197
|
+
|
198
|
+
def loss_gradient(x, y, factor, weight, bias)
|
199
|
+
z = bias + x.dot(weight) + 0.5 * (factor.dot(x.transpose)**2 - (factor**2).dot(x.transpose**2)).sum(0)
|
200
|
+
2.0 * (z - y)
|
201
|
+
end
|
202
|
+
|
203
|
+
def bias_gradient(loss_grad, bias)
|
204
|
+
loss_grad.mean + @params[:reg_param_bias] * bias
|
205
|
+
end
|
206
|
+
|
207
|
+
def weight_gradient(loss_grad, data, weight)
|
208
|
+
(loss_grad.expand_dims(1) * data).mean(0) + @params[:reg_param_weight] * weight
|
209
|
+
end
|
210
|
+
|
211
|
+
def factor_gradient(loss_grad, data, factor)
|
212
|
+
(loss_grad.expand_dims(1) * (data * data.dot(factor).expand_dims(1) - factor * (data**2))).mean(0) + @params[:reg_param_factor] * factor
|
213
|
+
end
|
214
|
+
|
215
|
+
def update_param(param, sqrsum, update, gr)
|
216
|
+
new_sqrsum = @params[:decay] * sqrsum + (1.0 - @params[:decay]) * gr**2
|
217
|
+
new_update = (@params[:learning_rate] / ((new_sqrsum + 1.0e-8)**0.5)) * gr
|
218
|
+
new_param = param - (new_update + @params[:momentum] * update)
|
219
|
+
[new_param, new_sqrsum, new_update]
|
220
|
+
end
|
221
|
+
|
222
|
+
def rand_uniform(shape)
|
223
|
+
Numo::DFloat[*Array.new(shape.inject(&:*)) { @rng.rand }].reshape(*shape)
|
224
|
+
end
|
225
|
+
|
226
|
+
def rand_normal(shape, mu, sigma)
|
227
|
+
mu + sigma * (Numo::NMath.sqrt(-2.0 * Numo::NMath.log(rand_uniform(shape))) * Numo::NMath.sin(2.0 * Math::PI * rand_uniform(shape)))
|
228
|
+
end
|
229
|
+
end
|
230
|
+
end
|
231
|
+
end
|
@@ -3,74 +3,11 @@
|
|
3
3
|
require 'svmkit/validation'
|
4
4
|
require 'svmkit/base/base_estimator'
|
5
5
|
require 'svmkit/base/classifier'
|
6
|
+
require 'svmkit/tree/node'
|
6
7
|
|
7
8
|
module SVMKit
|
8
9
|
# This module consists of the classes that implement tree models.
|
9
10
|
module Tree
|
10
|
-
# Node is a class that implements node used for construction of decision tree.
|
11
|
-
# This class is used for internal data structures.
|
12
|
-
class Node
|
13
|
-
# @!visibility private
|
14
|
-
attr_accessor :depth, :impurity, :n_samples, :probs, :leaf, :leaf_id, :left, :right, :feature_id, :threshold
|
15
|
-
|
16
|
-
# Create a new node for decision tree.
|
17
|
-
#
|
18
|
-
# @param depth [Integer] The depth of the node in tree.
|
19
|
-
# @param impurity [Float] The impurity of the node.
|
20
|
-
# @param n_samples [Integer] The number of the samples in the node.
|
21
|
-
# @param probs [Float] The probability of the node.
|
22
|
-
# @param leaf [Boolean] The flag indicating whether the node is a leaf.
|
23
|
-
# @param leaf_id [Integer] The leaf index of the node.
|
24
|
-
# @param left [Node] The left node.
|
25
|
-
# @param right [Node] The right node.
|
26
|
-
# @param feature_id [Integer] The feature index used for evaluation.
|
27
|
-
# @param threshold [Float] The threshold value of the feature for splitting the node.
|
28
|
-
def initialize(depth: 0, impurity: 0.0, n_samples: 0, probs: 0.0,
|
29
|
-
leaf: true, leaf_id: 0,
|
30
|
-
left: nil, right: nil, feature_id: 0, threshold: 0.0)
|
31
|
-
@depth = depth
|
32
|
-
@impurity = impurity
|
33
|
-
@n_samples = n_samples
|
34
|
-
@probs = probs
|
35
|
-
@leaf = leaf
|
36
|
-
@leaf_id = leaf_id
|
37
|
-
@left = left
|
38
|
-
@right = right
|
39
|
-
@feature_id = feature_id
|
40
|
-
@threshold = threshold
|
41
|
-
end
|
42
|
-
|
43
|
-
# Dump marshal data.
|
44
|
-
# @return [Hash] The marshal data about Node
|
45
|
-
def marshal_dump
|
46
|
-
{ depth: @depth,
|
47
|
-
impurity: @impurity,
|
48
|
-
n_samples: @n_samples,
|
49
|
-
probs: @probs,
|
50
|
-
leaf: @leaf,
|
51
|
-
leaf_id: @leaf_id,
|
52
|
-
left: @left,
|
53
|
-
right: @right,
|
54
|
-
feature_id: @feature_id,
|
55
|
-
threshold: @threshold }
|
56
|
-
end
|
57
|
-
|
58
|
-
# Load marshal data.
|
59
|
-
# @return [nil]
|
60
|
-
def marshal_load(obj)
|
61
|
-
@depth = obj[:depth]
|
62
|
-
@impurity = obj[:impurity]
|
63
|
-
@n_samples = obj[:n_samples]
|
64
|
-
@probs = obj[:probs]
|
65
|
-
@leaf = obj[:leaf]
|
66
|
-
@leaf_id = obj[:leaf_id]
|
67
|
-
@left = obj[:left]
|
68
|
-
@right = obj[:right]
|
69
|
-
@feature_id = obj[:feature_id]
|
70
|
-
@threshold = obj[:threshold]
|
71
|
-
end
|
72
|
-
end
|
73
|
-
|
74
11
|
# DecisionTreeClassifier is a class that implements decision tree for classification.
|
75
12
|
#
|
76
13
|
# @example
|
@@ -96,7 +33,7 @@ module SVMKit
|
|
96
33
|
# @return [Node]
|
97
34
|
attr_reader :tree
|
98
35
|
|
99
|
-
# Return the random generator for
|
36
|
+
# Return the random generator for random selection of feature index.
|
100
37
|
# @return [Random]
|
101
38
|
attr_reader :rng
|
102
39
|
|
@@ -0,0 +1,252 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'svmkit/validation'
|
4
|
+
require 'svmkit/base/base_estimator'
|
5
|
+
require 'svmkit/base/regressor'
|
6
|
+
require 'svmkit/tree/node'
|
7
|
+
|
8
|
+
module SVMKit
|
9
|
+
module Tree
|
10
|
+
# DecisionTreeRegressor is a class that implements decision tree for regression.
|
11
|
+
#
|
12
|
+
# @example
|
13
|
+
# estimator =
|
14
|
+
# SVMKit::Tree::DecisionTreeRegressor.new(
|
15
|
+
# max_depth: 3, max_leaf_nodes: 10, min_samples_leaf: 5, random_seed: 1)
|
16
|
+
# estimator.fit(training_samples, traininig_values)
|
17
|
+
# results = estimator.predict(testing_samples)
|
18
|
+
#
|
19
|
+
class DecisionTreeRegressor
|
20
|
+
include Base::BaseEstimator
|
21
|
+
include Base::Regressor
|
22
|
+
include Validation
|
23
|
+
|
24
|
+
# Return the importance for each feature.
|
25
|
+
# @return [Numo::DFloat] (size: n_features)
|
26
|
+
attr_reader :feature_importances
|
27
|
+
|
28
|
+
# Return the learned tree.
|
29
|
+
# @return [Node]
|
30
|
+
attr_reader :tree
|
31
|
+
|
32
|
+
# Return the random generator for random selection of feature index.
|
33
|
+
# @return [Random]
|
34
|
+
attr_reader :rng
|
35
|
+
|
36
|
+
# Return the values assigned each leaf.
|
37
|
+
# @return [Numo::DFloat] (shape: [n_leafs, n_outputs])
|
38
|
+
attr_reader :leaf_values
|
39
|
+
|
40
|
+
# Create a new regressor with decision tree algorithm.
|
41
|
+
#
|
42
|
+
# @param criterion [String] The function to evalue spliting point. Supported criteria are 'mae' and 'mse'.
|
43
|
+
# @param max_depth [Integer] The maximum depth of the tree.
|
44
|
+
# If nil is given, decision tree grows without concern for depth.
|
45
|
+
# @param max_leaf_nodes [Integer] The maximum number of leaves on decision tree.
|
46
|
+
# If nil is given, number of leaves is not limited.
|
47
|
+
# @param min_samples_leaf [Integer] The minimum number of samples at a leaf node.
|
48
|
+
# @param max_features [Integer] The number of features to consider when searching optimal split point.
|
49
|
+
# If nil is given, split process considers all features.
|
50
|
+
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
51
|
+
# It is used to randomly determine the order of features when deciding spliting point.
|
52
|
+
def initialize(criterion: 'mse', max_depth: nil, max_leaf_nodes: nil, min_samples_leaf: 1, max_features: nil,
|
53
|
+
random_seed: nil)
|
54
|
+
check_params_type_or_nil(Integer, max_depth: max_depth, max_leaf_nodes: max_leaf_nodes,
|
55
|
+
max_features: max_features, random_seed: random_seed)
|
56
|
+
check_params_integer(min_samples_leaf: min_samples_leaf)
|
57
|
+
check_params_string(criterion: criterion)
|
58
|
+
check_params_positive(max_depth: max_depth, max_leaf_nodes: max_leaf_nodes,
|
59
|
+
min_samples_leaf: min_samples_leaf, max_features: max_features)
|
60
|
+
@params = {}
|
61
|
+
@params[:criterion] = criterion
|
62
|
+
@params[:max_depth] = max_depth
|
63
|
+
@params[:max_leaf_nodes] = max_leaf_nodes
|
64
|
+
@params[:min_samples_leaf] = min_samples_leaf
|
65
|
+
@params[:max_features] = max_features
|
66
|
+
@params[:random_seed] = random_seed
|
67
|
+
@params[:random_seed] ||= srand
|
68
|
+
@criterion = :mse
|
69
|
+
@criterion = :mae if @params[:criterion] == 'mae'
|
70
|
+
@tree = nil
|
71
|
+
@feature_importances = nil
|
72
|
+
@n_leaves = nil
|
73
|
+
@leaf_values = nil
|
74
|
+
@rng = Random.new(@params[:random_seed])
|
75
|
+
end
|
76
|
+
|
77
|
+
# Fit the model with given training data.
|
78
|
+
#
|
79
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
80
|
+
# @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The taget values to be used for fitting the model.
|
81
|
+
# @return [DecisionTreeRegressor] The learned regressor itself.
|
82
|
+
def fit(x, y)
|
83
|
+
check_sample_array(x)
|
84
|
+
check_tvalue_array(y)
|
85
|
+
check_sample_tvalue_size(x, y)
|
86
|
+
single_target = y.shape[1].nil?
|
87
|
+
y = y.expand_dims(1) if single_target
|
88
|
+
n_samples, n_features = x.shape
|
89
|
+
@params[:max_features] = n_features if @params[:max_features].nil?
|
90
|
+
@params[:max_features] = [@params[:max_features], n_features].min
|
91
|
+
build_tree(x, y)
|
92
|
+
@leaf_values = @leaf_values[true] if single_target
|
93
|
+
eval_importance(n_samples, n_features)
|
94
|
+
self
|
95
|
+
end
|
96
|
+
|
97
|
+
# Predict values for samples.
|
98
|
+
#
|
99
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
|
100
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_outputs]) Predicted values per sample.
|
101
|
+
def predict(x)
|
102
|
+
check_sample_array(x)
|
103
|
+
@leaf_values.shape[1].nil? ? @leaf_values[apply(x)] : @leaf_values[apply(x), true]
|
104
|
+
end
|
105
|
+
|
106
|
+
# Return the index of the leaf that each sample reached.
|
107
|
+
#
|
108
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the values.
|
109
|
+
# @return [Numo::Int32] (shape: [n_samples]) Leaf index for sample.
|
110
|
+
def apply(x)
|
111
|
+
check_sample_array(x)
|
112
|
+
Numo::Int32[*(Array.new(x.shape[0]) { |n| apply_at_node(@tree, x[n, true]) })]
|
113
|
+
end
|
114
|
+
|
115
|
+
# Dump marshal data.
|
116
|
+
# @return [Hash] The marshal data about DecisionTreeRegressor
|
117
|
+
def marshal_dump
|
118
|
+
{ params: @params,
|
119
|
+
criterion: @criterion,
|
120
|
+
tree: @tree,
|
121
|
+
feature_importances: @feature_importances,
|
122
|
+
leaf_values: @leaf_values,
|
123
|
+
rng: @rng }
|
124
|
+
end
|
125
|
+
|
126
|
+
# Load marshal data.
|
127
|
+
# @return [nil]
|
128
|
+
def marshal_load(obj)
|
129
|
+
@params = obj[:params]
|
130
|
+
@criterion = obj[:criterion]
|
131
|
+
@tree = obj[:tree]
|
132
|
+
@feature_importances = obj[:feature_importances]
|
133
|
+
@leaf_values = obj[:leaf_values]
|
134
|
+
@rng = obj[:rng]
|
135
|
+
nil
|
136
|
+
end
|
137
|
+
|
138
|
+
private
|
139
|
+
|
140
|
+
def apply_at_node(node, sample)
|
141
|
+
return node.leaf_id if node.leaf
|
142
|
+
return apply_at_node(node.left, sample) if node.right.nil?
|
143
|
+
return apply_at_node(node.right, sample) if node.left.nil?
|
144
|
+
if sample[node.feature_id] <= node.threshold
|
145
|
+
apply_at_node(node.left, sample)
|
146
|
+
else
|
147
|
+
apply_at_node(node.right, sample)
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
def build_tree(x, y)
|
152
|
+
@n_leaves = 0
|
153
|
+
@leaf_values = []
|
154
|
+
@tree = grow_node(0, x, y)
|
155
|
+
@leaf_values = Numo::DFloat.cast(@leaf_values)
|
156
|
+
nil
|
157
|
+
end
|
158
|
+
|
159
|
+
def grow_node(depth, x, y)
|
160
|
+
unless @params[:max_leaf_nodes].nil?
|
161
|
+
return nil if @n_leaves >= @params[:max_leaf_nodes]
|
162
|
+
end
|
163
|
+
|
164
|
+
n_samples, n_features = x.shape
|
165
|
+
return nil if n_samples <= @params[:min_samples_leaf]
|
166
|
+
|
167
|
+
node = Node.new(depth: depth, impurity: impurity(y), n_samples: n_samples)
|
168
|
+
|
169
|
+
return put_leaf(node, y) if (y - y.mean(0)).sum.abs.zero?
|
170
|
+
|
171
|
+
unless @params[:max_depth].nil?
|
172
|
+
return put_leaf(node, y) if depth == @params[:max_depth]
|
173
|
+
end
|
174
|
+
|
175
|
+
feature_id, threshold, left_ids, right_ids, max_gain =
|
176
|
+
rand_ids(n_features).map { |f_id| [f_id, *best_split(x[true, f_id], y)] }.max_by(&:last)
|
177
|
+
return put_leaf(node, y) if max_gain.nil? || max_gain.zero?
|
178
|
+
|
179
|
+
node.left = grow_node(depth + 1, x[left_ids, true], y[left_ids, true])
|
180
|
+
node.right = grow_node(depth + 1, x[right_ids, true], y[right_ids, true])
|
181
|
+
return put_leaf(node, y) if node.left.nil? && node.right.nil?
|
182
|
+
|
183
|
+
node.feature_id = feature_id
|
184
|
+
node.threshold = threshold
|
185
|
+
node.leaf = false
|
186
|
+
node
|
187
|
+
end
|
188
|
+
|
189
|
+
def put_leaf(node, values)
|
190
|
+
node.probs = nil
|
191
|
+
node.leaf = true
|
192
|
+
node.leaf_id = @n_leaves
|
193
|
+
@n_leaves += 1
|
194
|
+
@leaf_values.push(values.mean(0))
|
195
|
+
node
|
196
|
+
end
|
197
|
+
|
198
|
+
def rand_ids(n)
|
199
|
+
[*0...n].sample(@params[:max_features], random: @rng)
|
200
|
+
end
|
201
|
+
|
202
|
+
def best_split(features, values)
|
203
|
+
features.to_a.uniq.sort.each_cons(2).map do |l, r|
|
204
|
+
threshold = 0.5 * (l + r)
|
205
|
+
left_ids, right_ids = splited_ids(features, threshold)
|
206
|
+
[threshold, left_ids, right_ids, gain(values, values[left_ids], values[right_ids])]
|
207
|
+
end.max_by(&:last)
|
208
|
+
end
|
209
|
+
|
210
|
+
def splited_ids(features, threshold)
|
211
|
+
[features.le(threshold).where.to_a, features.gt(threshold).where.to_a]
|
212
|
+
end
|
213
|
+
|
214
|
+
def gain(values, values_left, values_right)
|
215
|
+
prob_left = values_left.shape[0].fdiv(values.shape[0])
|
216
|
+
prob_right = values_right.shape[0].fdiv(values.shape[0])
|
217
|
+
impurity(values) - prob_left * impurity(values_left) - prob_right * impurity(values_right)
|
218
|
+
end
|
219
|
+
|
220
|
+
def impurity(values)
|
221
|
+
send(@criterion, values)
|
222
|
+
end
|
223
|
+
|
224
|
+
def mse(values)
|
225
|
+
((values - values.mean(0))**2).mean
|
226
|
+
end
|
227
|
+
|
228
|
+
def mae(values)
|
229
|
+
(values - values.mean(0)).abs.mean
|
230
|
+
end
|
231
|
+
|
232
|
+
def eval_importance(n_samples, n_features)
|
233
|
+
@feature_importances = Numo::DFloat.zeros(n_features)
|
234
|
+
eval_importance_at_node(@tree)
|
235
|
+
@feature_importances /= n_samples
|
236
|
+
normalizer = @feature_importances.sum
|
237
|
+
@feature_importances /= normalizer if normalizer > 0.0
|
238
|
+
nil
|
239
|
+
end
|
240
|
+
|
241
|
+
def eval_importance_at_node(node)
|
242
|
+
return nil if node.leaf
|
243
|
+
return nil if node.left.nil? || node.right.nil?
|
244
|
+
gain = node.n_samples * node.impurity -
|
245
|
+
node.left.n_samples * node.left.impurity - node.right.n_samples * node.right.impurity
|
246
|
+
@feature_importances[node.feature_id] += gain
|
247
|
+
eval_importance_at_node(node.left)
|
248
|
+
eval_importance_at_node(node.right)
|
249
|
+
end
|
250
|
+
end
|
251
|
+
end
|
252
|
+
end
|
@@ -0,0 +1,70 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module SVMKit
|
4
|
+
module Tree
|
5
|
+
# Node is a class that implements node used for construction of decision tree.
|
6
|
+
# This class is used for internal data structures.
|
7
|
+
class Node
|
8
|
+
# @!visibility private
|
9
|
+
attr_accessor :depth, :impurity, :n_samples, :probs, :leaf, :leaf_id, :left, :right, :feature_id, :threshold
|
10
|
+
|
11
|
+
# Create a new node for decision tree.
|
12
|
+
#
|
13
|
+
# @param depth [Integer] The depth of the node in tree.
|
14
|
+
# @param impurity [Float] The impurity of the node.
|
15
|
+
# @param n_samples [Integer] The number of the samples in the node.
|
16
|
+
# @param probs [Float] The probability of the node.
|
17
|
+
# @param leaf [Boolean] The flag indicating whether the node is a leaf.
|
18
|
+
# @param leaf_id [Integer] The leaf index of the node.
|
19
|
+
# @param left [Node] The left node.
|
20
|
+
# @param right [Node] The right node.
|
21
|
+
# @param feature_id [Integer] The feature index used for evaluation.
|
22
|
+
# @param threshold [Float] The threshold value of the feature for splitting the node.
|
23
|
+
def initialize(depth: 0, impurity: 0.0, n_samples: 0, probs: 0.0,
|
24
|
+
leaf: true, leaf_id: 0,
|
25
|
+
left: nil, right: nil, feature_id: 0, threshold: 0.0)
|
26
|
+
@depth = depth
|
27
|
+
@impurity = impurity
|
28
|
+
@n_samples = n_samples
|
29
|
+
@probs = probs
|
30
|
+
@leaf = leaf
|
31
|
+
@leaf_id = leaf_id
|
32
|
+
@left = left
|
33
|
+
@right = right
|
34
|
+
@feature_id = feature_id
|
35
|
+
@threshold = threshold
|
36
|
+
end
|
37
|
+
|
38
|
+
# Dump marshal data.
|
39
|
+
# @return [Hash] The marshal data about Node
|
40
|
+
def marshal_dump
|
41
|
+
{ depth: @depth,
|
42
|
+
impurity: @impurity,
|
43
|
+
n_samples: @n_samples,
|
44
|
+
probs: @probs,
|
45
|
+
leaf: @leaf,
|
46
|
+
leaf_id: @leaf_id,
|
47
|
+
left: @left,
|
48
|
+
right: @right,
|
49
|
+
feature_id: @feature_id,
|
50
|
+
threshold: @threshold }
|
51
|
+
end
|
52
|
+
|
53
|
+
# Load marshal data.
|
54
|
+
# @return [nil]
|
55
|
+
def marshal_load(obj)
|
56
|
+
@depth = obj[:depth]
|
57
|
+
@impurity = obj[:impurity]
|
58
|
+
@n_samples = obj[:n_samples]
|
59
|
+
@probs = obj[:probs]
|
60
|
+
@leaf = obj[:leaf]
|
61
|
+
@leaf_id = obj[:leaf_id]
|
62
|
+
@left = obj[:left]
|
63
|
+
@right = obj[:right]
|
64
|
+
@feature_id = obj[:feature_id]
|
65
|
+
@threshold = obj[:threshold]
|
66
|
+
nil
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
data/lib/svmkit/version.rb
CHANGED
data/lib/svmkit.rb
CHANGED
@@ -19,12 +19,16 @@ require 'svmkit/linear_model/svr'
|
|
19
19
|
require 'svmkit/linear_model/logistic_regression'
|
20
20
|
require 'svmkit/kernel_machine/kernel_svc'
|
21
21
|
require 'svmkit/polynomial_model/factorization_machine_classifier'
|
22
|
+
require 'svmkit/polynomial_model/factorization_machine_regressor'
|
22
23
|
require 'svmkit/multiclass/one_vs_rest_classifier'
|
23
24
|
require 'svmkit/nearest_neighbors/k_neighbors_classifier'
|
24
25
|
require 'svmkit/nearest_neighbors/k_neighbors_regressor'
|
25
26
|
require 'svmkit/naive_bayes/naive_bayes'
|
27
|
+
require 'svmkit/tree/node'
|
26
28
|
require 'svmkit/tree/decision_tree_classifier'
|
29
|
+
require 'svmkit/tree/decision_tree_regressor'
|
27
30
|
require 'svmkit/ensemble/random_forest_classifier'
|
31
|
+
require 'svmkit/ensemble/random_forest_regressor'
|
28
32
|
require 'svmkit/preprocessing/l2_normalizer'
|
29
33
|
require 'svmkit/preprocessing/min_max_scaler'
|
30
34
|
require 'svmkit/preprocessing/standard_scaler'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: svmkit
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-05-
|
11
|
+
date: 2018-05-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: numo-narray
|
@@ -115,6 +115,7 @@ files:
|
|
115
115
|
- lib/svmkit/base/transformer.rb
|
116
116
|
- lib/svmkit/dataset.rb
|
117
117
|
- lib/svmkit/ensemble/random_forest_classifier.rb
|
118
|
+
- lib/svmkit/ensemble/random_forest_regressor.rb
|
118
119
|
- lib/svmkit/evaluation_measure/accuracy.rb
|
119
120
|
- lib/svmkit/evaluation_measure/f_score.rb
|
120
121
|
- lib/svmkit/evaluation_measure/log_loss.rb
|
@@ -138,6 +139,7 @@ files:
|
|
138
139
|
- lib/svmkit/nearest_neighbors/k_neighbors_regressor.rb
|
139
140
|
- lib/svmkit/pairwise_metric.rb
|
140
141
|
- lib/svmkit/polynomial_model/factorization_machine_classifier.rb
|
142
|
+
- lib/svmkit/polynomial_model/factorization_machine_regressor.rb
|
141
143
|
- lib/svmkit/preprocessing/l2_normalizer.rb
|
142
144
|
- lib/svmkit/preprocessing/label_encoder.rb
|
143
145
|
- lib/svmkit/preprocessing/min_max_scaler.rb
|
@@ -145,6 +147,8 @@ files:
|
|
145
147
|
- lib/svmkit/preprocessing/standard_scaler.rb
|
146
148
|
- lib/svmkit/probabilistic_output.rb
|
147
149
|
- lib/svmkit/tree/decision_tree_classifier.rb
|
150
|
+
- lib/svmkit/tree/decision_tree_regressor.rb
|
151
|
+
- lib/svmkit/tree/node.rb
|
148
152
|
- lib/svmkit/validation.rb
|
149
153
|
- lib/svmkit/version.rb
|
150
154
|
- svmkit.gemspec
|