xgb 0.1.1 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/README.md +9 -2
- data/lib/xgb.rb +10 -1
- data/lib/xgb/booster.rb +11 -4
- data/lib/xgb/classifier.rb +14 -29
- data/lib/xgb/dmatrix.rb +18 -17
- data/lib/xgb/ffi.rb +14 -5
- data/lib/xgb/model.rb +35 -0
- data/lib/xgb/ranker.rb +14 -0
- data/lib/xgb/regressor.rb +12 -31
- data/lib/xgb/version.rb +1 -1
- metadata +5 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 148980f8a4991f1f98cd1740188e763a3bd96c98bc69b13a9de9aa00132a12f1
|
4
|
+
data.tar.gz: 31d90a3a064d032a7d1f371c6928f11b103bfc6f9c92dd1697cc538ef33f15fa
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 28fb08d373af3a3b198822ddea958ca0ee433145dbca091583b25945f27a048a3ec4a4ec43d88dc3c1dde67de7f72b7fb84e7668cfbd173f01d56992017ba4e2
|
7
|
+
data.tar.gz: 5bfb07db7c6b65d0a08010ab59328af75a97e74e025fcd4eb15ca88b05afec3303beb1569c99d59932a5426a64addce1c7fda90562342efcf52fc2c72d9b362a
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -18,7 +18,7 @@ gem 'xgb'
|
|
18
18
|
|
19
19
|
## Getting Started
|
20
20
|
|
21
|
-
This library follows the [
|
21
|
+
This library follows the [Python API](https://xgboost.readthedocs.io/en/latest/python/python_api.html). Some methods and options are missing at the moment. PRs welcome!
|
22
22
|
|
23
23
|
## Learning API
|
24
24
|
|
@@ -33,7 +33,8 @@ booster = Xgb.train(params, dtrain)
|
|
33
33
|
Predict
|
34
34
|
|
35
35
|
```ruby
|
36
|
-
|
36
|
+
dtest = Xgb::DMatrix.new(x_test)
|
37
|
+
booster.predict(dtest)
|
37
38
|
```
|
38
39
|
|
39
40
|
Save the model to a file
|
@@ -110,6 +111,12 @@ Get the importance of features
|
|
110
111
|
model.feature_importances
|
111
112
|
```
|
112
113
|
|
114
|
+
Early stopping
|
115
|
+
|
116
|
+
```ruby
|
117
|
+
model.fit(x, y, eval_set: [[x_test, y_test]], early_stopping_rounds: 5)
|
118
|
+
```
|
119
|
+
|
113
120
|
## Data
|
114
121
|
|
115
122
|
Data can be an array of arrays
|
data/lib/xgb.rb
CHANGED
@@ -5,16 +5,25 @@ require "ffi"
|
|
5
5
|
require "xgb/utils"
|
6
6
|
require "xgb/booster"
|
7
7
|
require "xgb/dmatrix"
|
8
|
-
require "xgb/ffi"
|
9
8
|
require "xgb/version"
|
10
9
|
|
11
10
|
# scikit-learn API
|
11
|
+
require "xgb/model"
|
12
12
|
require "xgb/classifier"
|
13
|
+
require "xgb/ranker"
|
13
14
|
require "xgb/regressor"
|
14
15
|
|
15
16
|
module Xgb
|
16
17
|
class Error < StandardError; end
|
17
18
|
|
19
|
+
class << self
|
20
|
+
attr_accessor :ffi_lib
|
21
|
+
end
|
22
|
+
self.ffi_lib = ["xgboost"]
|
23
|
+
|
24
|
+
# friendlier error message
|
25
|
+
autoload :FFI,"xgb/ffi"
|
26
|
+
|
18
27
|
class << self
|
19
28
|
def train(params, dtrain, num_boost_round: 10, evals: nil, early_stopping_rounds: nil, verbose_eval: true)
|
20
29
|
booster = Booster.new(params: params)
|
data/lib/xgb/booster.rb
CHANGED
@@ -5,6 +5,8 @@ module Xgb
|
|
5
5
|
def initialize(params: nil, model_file: nil)
|
6
6
|
@handle = ::FFI::MemoryPointer.new(:pointer)
|
7
7
|
check_result FFI.XGBoosterCreate(nil, 0, @handle)
|
8
|
+
ObjectSpace.define_finalizer(self, self.class.finalize(handle_pointer))
|
9
|
+
|
8
10
|
if model_file
|
9
11
|
check_result FFI.XGBoosterLoadModel(handle_pointer, model_file)
|
10
12
|
end
|
@@ -13,6 +15,11 @@ module Xgb
|
|
13
15
|
set_param(params)
|
14
16
|
end
|
15
17
|
|
18
|
+
def self.finalize(pointer)
|
19
|
+
# must use proc instead of stabby lambda
|
20
|
+
proc { FFI.XGBoosterFree(pointer) }
|
21
|
+
end
|
22
|
+
|
16
23
|
def update(dtrain, iteration)
|
17
24
|
check_result FFI.XGBoosterUpdateOneIter(handle_pointer, iteration, dtrain.handle_pointer)
|
18
25
|
end
|
@@ -43,10 +50,10 @@ module Xgb
|
|
43
50
|
|
44
51
|
def predict(data, ntree_limit: nil)
|
45
52
|
ntree_limit ||= 0
|
46
|
-
out_len = ::FFI::MemoryPointer.new(:
|
53
|
+
out_len = ::FFI::MemoryPointer.new(:uint64)
|
47
54
|
out_result = ::FFI::MemoryPointer.new(:pointer)
|
48
55
|
check_result FFI.XGBoosterPredict(handle_pointer, data.handle_pointer, 0, ntree_limit, out_len, out_result)
|
49
|
-
out = out_result.read_pointer.read_array_of_float(out_len.
|
56
|
+
out = out_result.read_pointer.read_array_of_float(out_len.read_uint64)
|
50
57
|
num_class = out.size / data.num_row
|
51
58
|
out = out.each_slice(num_class).to_a if num_class > 1
|
52
59
|
out
|
@@ -58,10 +65,10 @@ module Xgb
|
|
58
65
|
|
59
66
|
# returns an array of strings
|
60
67
|
def dump(fmap: "", with_stats: false, dump_format: "text")
|
61
|
-
out_len = ::FFI::MemoryPointer.new(:
|
68
|
+
out_len = ::FFI::MemoryPointer.new(:uint64)
|
62
69
|
out_result = ::FFI::MemoryPointer.new(:pointer)
|
63
70
|
check_result FFI.XGBoosterDumpModelEx(handle_pointer, fmap, with_stats ? 1 : 0, dump_format, out_len, out_result)
|
64
|
-
out_result.read_pointer.get_array_of_string(0, out_len.
|
71
|
+
out_result.read_pointer.get_array_of_string(0, out_len.read_uint64)
|
65
72
|
end
|
66
73
|
|
67
74
|
def dump_model(fout, fmap: "", with_stats: false, dump_format: "text")
|
data/lib/xgb/classifier.rb
CHANGED
@@ -1,16 +1,10 @@
|
|
1
1
|
module Xgb
|
2
|
-
class Classifier
|
3
|
-
def initialize(max_depth: 3, learning_rate: 0.1, n_estimators: 100, objective: "binary:logistic", importance_type: "gain")
|
4
|
-
|
5
|
-
max_depth: max_depth,
|
6
|
-
objective: objective,
|
7
|
-
learning_rate: learning_rate
|
8
|
-
}
|
9
|
-
@n_estimators = n_estimators
|
10
|
-
@importance_type = importance_type
|
2
|
+
class Classifier < Model
|
3
|
+
def initialize(max_depth: 3, learning_rate: 0.1, n_estimators: 100, objective: "binary:logistic", importance_type: "gain", **options)
|
4
|
+
super
|
11
5
|
end
|
12
6
|
|
13
|
-
def fit(x, y)
|
7
|
+
def fit(x, y, eval_set: nil, early_stopping_rounds: nil, verbose: true)
|
14
8
|
n_classes = y.uniq.size
|
15
9
|
|
16
10
|
params = @params.dup
|
@@ -20,18 +14,24 @@ module Xgb
|
|
20
14
|
end
|
21
15
|
|
22
16
|
dtrain = DMatrix.new(x, label: y)
|
23
|
-
|
17
|
+
evals = Array(eval_set).map.with_index { |v, i| [DMatrix.new(v[0], label: v[1]), "validation_#{i}"] }
|
18
|
+
|
19
|
+
@booster = Xgb.train(params, dtrain,
|
20
|
+
num_boost_round: @n_estimators,
|
21
|
+
early_stopping_rounds: early_stopping_rounds,
|
22
|
+
verbose_eval: verbose,
|
23
|
+
evals: evals
|
24
|
+
)
|
24
25
|
nil
|
25
26
|
end
|
26
27
|
|
27
28
|
def predict(data)
|
28
|
-
|
29
|
-
y_pred = @booster.predict(dmat)
|
29
|
+
y_pred = super(data)
|
30
30
|
|
31
31
|
if y_pred.first.is_a?(Array)
|
32
32
|
# multiple classes
|
33
33
|
y_pred.map do |v|
|
34
|
-
v.map.with_index.max_by { |v2,
|
34
|
+
v.map.with_index.max_by { |v2, _| v2 }.last
|
35
35
|
end
|
36
36
|
else
|
37
37
|
y_pred.map { |v| v > 0.5 ? 1 : 0 }
|
@@ -49,20 +49,5 @@ module Xgb
|
|
49
49
|
y_pred.map { |v| [1 - v, v] }
|
50
50
|
end
|
51
51
|
end
|
52
|
-
|
53
|
-
def save_model(fname)
|
54
|
-
@booster.save_model(fname)
|
55
|
-
end
|
56
|
-
|
57
|
-
def load_model(fname)
|
58
|
-
@booster = Booster.new(params: @params, model_file: fname)
|
59
|
-
end
|
60
|
-
|
61
|
-
def feature_importances
|
62
|
-
score = @booster.score(importance_type: @importance_type)
|
63
|
-
scores = @booster.feature_names.map { |k| score[k] || 0.0 }
|
64
|
-
total = scores.sum.to_f
|
65
|
-
scores.map { |s| s / total }
|
66
|
-
end
|
67
52
|
end
|
68
53
|
end
|
data/lib/xgb/dmatrix.rb
CHANGED
@@ -27,12 +27,19 @@ module Xgb
|
|
27
27
|
c_data = ::FFI::MemoryPointer.new(:float, nrow * ncol)
|
28
28
|
c_data.put_array_of_float(0, flat_data)
|
29
29
|
check_result FFI.XGDMatrixCreateFromMat(c_data, nrow, ncol, missing, @handle)
|
30
|
+
|
31
|
+
ObjectSpace.define_finalizer(self, self.class.finalize(handle_pointer))
|
30
32
|
end
|
31
33
|
|
32
34
|
set_float_info("label", label) if label
|
33
35
|
set_float_info("weight", weight) if weight
|
34
36
|
end
|
35
37
|
|
38
|
+
def self.finalize(pointer)
|
39
|
+
# must use proc instead of stabby lambda
|
40
|
+
proc { FFI.XGDMatrixFree(pointer) }
|
41
|
+
end
|
42
|
+
|
36
43
|
def label
|
37
44
|
float_info("label")
|
38
45
|
end
|
@@ -41,16 +48,22 @@ module Xgb
|
|
41
48
|
float_info("weight")
|
42
49
|
end
|
43
50
|
|
51
|
+
def group=(group)
|
52
|
+
c_data = ::FFI::MemoryPointer.new(:int, group.size)
|
53
|
+
c_data.put_array_of_int(0, group)
|
54
|
+
check_result FFI.XGDMatrixSetGroup(handle_pointer, c_data, group.size)
|
55
|
+
end
|
56
|
+
|
44
57
|
def num_row
|
45
|
-
out = ::FFI::MemoryPointer.new(:
|
58
|
+
out = ::FFI::MemoryPointer.new(:uint64)
|
46
59
|
check_result FFI.XGDMatrixNumRow(handle_pointer, out)
|
47
|
-
out.
|
60
|
+
out.read_uint64
|
48
61
|
end
|
49
62
|
|
50
63
|
def num_col
|
51
|
-
out = ::FFI::MemoryPointer.new(:
|
64
|
+
out = ::FFI::MemoryPointer.new(:uint64)
|
52
65
|
check_result FFI.XGDMatrixNumCol(handle_pointer, out)
|
53
|
-
out.
|
66
|
+
out.read_uint64
|
54
67
|
end
|
55
68
|
|
56
69
|
def slice(rindex)
|
@@ -76,15 +89,7 @@ module Xgb
|
|
76
89
|
private
|
77
90
|
|
78
91
|
def set_float_info(field, data)
|
79
|
-
data =
|
80
|
-
if matrix?(data)
|
81
|
-
data.to_a[0]
|
82
|
-
elsif daru_vector?(data) || narray?(data)
|
83
|
-
data.to_a
|
84
|
-
else
|
85
|
-
data
|
86
|
-
end
|
87
|
-
|
92
|
+
data = data.to_a unless data.is_a?(Array)
|
88
93
|
c_data = ::FFI::MemoryPointer.new(:float, data.size)
|
89
94
|
c_data.put_array_of_float(0, data)
|
90
95
|
check_result FFI.XGDMatrixSetFloatInfo(handle_pointer, field.to_s, c_data, data.size)
|
@@ -106,10 +111,6 @@ module Xgb
|
|
106
111
|
defined?(Daru::DataFrame) && data.is_a?(Daru::DataFrame)
|
107
112
|
end
|
108
113
|
|
109
|
-
def daru_vector?(data)
|
110
|
-
defined?(Daru::Vector) && data.is_a?(Daru::Vector)
|
111
|
-
end
|
112
|
-
|
113
114
|
def narray?(data)
|
114
115
|
defined?(Numo::NArray) && data.is_a?(Numo::NArray)
|
115
116
|
end
|
data/lib/xgb/ffi.rb
CHANGED
@@ -1,7 +1,13 @@
|
|
1
1
|
module Xgb
|
2
2
|
module FFI
|
3
3
|
extend ::FFI::Library
|
4
|
-
|
4
|
+
|
5
|
+
begin
|
6
|
+
ffi_lib Xgb.ffi_lib
|
7
|
+
rescue LoadError => e
|
8
|
+
raise e if ENV["XGB_DEBUG"]
|
9
|
+
raise LoadError, "Could not find XGBoost"
|
10
|
+
end
|
5
11
|
|
6
12
|
# https://github.com/dmlc/xgboost/blob/master/include/xgboost/c_api.h
|
7
13
|
# keep same order
|
@@ -10,18 +16,21 @@ module Xgb
|
|
10
16
|
attach_function :XGBGetLastError, %i[], :string
|
11
17
|
|
12
18
|
# dmatrix
|
13
|
-
attach_function :XGDMatrixCreateFromMat, %i[pointer
|
19
|
+
attach_function :XGDMatrixCreateFromMat, %i[pointer uint64 uint64 float pointer], :int
|
20
|
+
attach_function :XGDMatrixSetGroup, %i[pointer pointer uint64], :int
|
14
21
|
attach_function :XGDMatrixNumRow, %i[pointer pointer], :int
|
15
22
|
attach_function :XGDMatrixNumCol, %i[pointer pointer], :int
|
16
|
-
attach_function :XGDMatrixSliceDMatrix, %i[pointer pointer
|
23
|
+
attach_function :XGDMatrixSliceDMatrix, %i[pointer pointer uint64 pointer], :int
|
24
|
+
attach_function :XGDMatrixFree, %i[pointer], :int
|
17
25
|
attach_function :XGDMatrixSaveBinary, %i[pointer string int], :int
|
18
|
-
attach_function :XGDMatrixSetFloatInfo, %i[pointer string pointer
|
26
|
+
attach_function :XGDMatrixSetFloatInfo, %i[pointer string pointer uint64], :int
|
19
27
|
attach_function :XGDMatrixGetFloatInfo, %i[pointer string pointer pointer], :int
|
20
28
|
|
21
29
|
# booster
|
22
30
|
attach_function :XGBoosterCreate, %i[pointer int pointer], :int
|
23
31
|
attach_function :XGBoosterUpdateOneIter, %i[pointer int pointer], :int
|
24
|
-
attach_function :XGBoosterEvalOneIter, %i[pointer int pointer pointer
|
32
|
+
attach_function :XGBoosterEvalOneIter, %i[pointer int pointer pointer uint64 pointer], :int
|
33
|
+
attach_function :XGBoosterFree, %i[pointer], :int
|
25
34
|
attach_function :XGBoosterSetParam, %i[pointer string string], :int
|
26
35
|
attach_function :XGBoosterPredict, %i[pointer pointer int int pointer pointer], :int
|
27
36
|
attach_function :XGBoosterLoadModel, %i[pointer string], :int
|
data/lib/xgb/model.rb
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
module Xgb
|
2
|
+
class Model
|
3
|
+
attr_reader :booster
|
4
|
+
|
5
|
+
def initialize(max_depth: 3, learning_rate: 0.1, n_estimators: 100, objective: nil, importance_type: "gain", **options)
|
6
|
+
@params = {
|
7
|
+
max_depth: max_depth,
|
8
|
+
objective: objective,
|
9
|
+
learning_rate: learning_rate
|
10
|
+
}.merge(options)
|
11
|
+
@n_estimators = n_estimators
|
12
|
+
@importance_type = importance_type
|
13
|
+
end
|
14
|
+
|
15
|
+
def predict(data)
|
16
|
+
dmat = DMatrix.new(data)
|
17
|
+
@booster.predict(dmat)
|
18
|
+
end
|
19
|
+
|
20
|
+
def save_model(fname)
|
21
|
+
@booster.save_model(fname)
|
22
|
+
end
|
23
|
+
|
24
|
+
def load_model(fname)
|
25
|
+
@booster = Booster.new(params: @params, model_file: fname)
|
26
|
+
end
|
27
|
+
|
28
|
+
def feature_importances
|
29
|
+
score = @booster.score(importance_type: @importance_type)
|
30
|
+
scores = @booster.feature_names.map { |k| score[k] || 0.0 }
|
31
|
+
total = scores.sum.to_f
|
32
|
+
scores.map { |s| s / total }
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
data/lib/xgb/ranker.rb
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
module Xgb
|
2
|
+
class Ranker < Model
|
3
|
+
def initialize(max_depth: 3, learning_rate: 0.1, n_estimators: 100, objective: "rank:pairwise", importance_type: "gain", **options)
|
4
|
+
super
|
5
|
+
end
|
6
|
+
|
7
|
+
def fit(x, y, group)
|
8
|
+
dtrain = DMatrix.new(x, label: y)
|
9
|
+
dtrain.group = group
|
10
|
+
@booster = Xgb.train(@params, dtrain, num_boost_round: @n_estimators)
|
11
|
+
nil
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
data/lib/xgb/regressor.rb
CHANGED
@@ -1,39 +1,20 @@
|
|
1
1
|
module Xgb
|
2
|
-
class Regressor
|
3
|
-
def initialize(max_depth: 3, learning_rate: 0.1, n_estimators: 100, objective: "reg:squarederror", importance_type: "gain")
|
4
|
-
|
5
|
-
max_depth: max_depth,
|
6
|
-
objective: objective,
|
7
|
-
learning_rate: learning_rate
|
8
|
-
}
|
9
|
-
@n_estimators = n_estimators
|
10
|
-
@importance_type = importance_type
|
2
|
+
class Regressor < Model
|
3
|
+
def initialize(max_depth: 3, learning_rate: 0.1, n_estimators: 100, objective: "reg:squarederror", importance_type: "gain", **options)
|
4
|
+
super
|
11
5
|
end
|
12
6
|
|
13
|
-
def fit(x, y)
|
7
|
+
def fit(x, y, eval_set: nil, early_stopping_rounds: nil, verbose: true)
|
14
8
|
dtrain = DMatrix.new(x, label: y)
|
15
|
-
|
16
|
-
nil
|
17
|
-
end
|
9
|
+
evals = Array(eval_set).map.with_index { |v, i| [DMatrix.new(v[0], label: v[1]), "validation_#{i}"] }
|
18
10
|
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
end
|
27
|
-
|
28
|
-
def load_model(fname)
|
29
|
-
@booster = Booster.new(params: @params, model_file: fname)
|
30
|
-
end
|
31
|
-
|
32
|
-
def feature_importances
|
33
|
-
score = @booster.score(importance_type: @importance_type)
|
34
|
-
scores = @booster.feature_names.map { |k| score[k] || 0.0 }
|
35
|
-
total = scores.sum.to_f
|
36
|
-
scores.map { |s| s / total }
|
11
|
+
@booster = Xgb.train(@params, dtrain,
|
12
|
+
num_boost_round: @n_estimators,
|
13
|
+
early_stopping_rounds: early_stopping_rounds,
|
14
|
+
verbose_eval: verbose,
|
15
|
+
evals: evals
|
16
|
+
)
|
17
|
+
nil
|
37
18
|
end
|
38
19
|
end
|
39
20
|
end
|
data/lib/xgb/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: xgb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-08-
|
11
|
+
date: 2019-08-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ffi
|
@@ -107,6 +107,8 @@ files:
|
|
107
107
|
- lib/xgb/classifier.rb
|
108
108
|
- lib/xgb/dmatrix.rb
|
109
109
|
- lib/xgb/ffi.rb
|
110
|
+
- lib/xgb/model.rb
|
111
|
+
- lib/xgb/ranker.rb
|
110
112
|
- lib/xgb/regressor.rb
|
111
113
|
- lib/xgb/utils.rb
|
112
114
|
- lib/xgb/version.rb
|
@@ -129,7 +131,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
129
131
|
- !ruby/object:Gem::Version
|
130
132
|
version: '0'
|
131
133
|
requirements: []
|
132
|
-
rubygems_version: 3.0.
|
134
|
+
rubygems_version: 3.0.3
|
133
135
|
signing_key:
|
134
136
|
specification_version: 4
|
135
137
|
summary: XGBoost - the high performance machine learning library - for Ruby
|