xgb 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/README.md +9 -2
- data/lib/xgb.rb +10 -1
- data/lib/xgb/booster.rb +11 -4
- data/lib/xgb/classifier.rb +14 -29
- data/lib/xgb/dmatrix.rb +18 -17
- data/lib/xgb/ffi.rb +14 -5
- data/lib/xgb/model.rb +35 -0
- data/lib/xgb/ranker.rb +14 -0
- data/lib/xgb/regressor.rb +12 -31
- data/lib/xgb/version.rb +1 -1
- metadata +5 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 148980f8a4991f1f98cd1740188e763a3bd96c98bc69b13a9de9aa00132a12f1
|
4
|
+
data.tar.gz: 31d90a3a064d032a7d1f371c6928f11b103bfc6f9c92dd1697cc538ef33f15fa
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 28fb08d373af3a3b198822ddea958ca0ee433145dbca091583b25945f27a048a3ec4a4ec43d88dc3c1dde67de7f72b7fb84e7668cfbd173f01d56992017ba4e2
|
7
|
+
data.tar.gz: 5bfb07db7c6b65d0a08010ab59328af75a97e74e025fcd4eb15ca88b05afec3303beb1569c99d59932a5426a64addce1c7fda90562342efcf52fc2c72d9b362a
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -18,7 +18,7 @@ gem 'xgb'
|
|
18
18
|
|
19
19
|
## Getting Started
|
20
20
|
|
21
|
-
This library follows the [
|
21
|
+
This library follows the [Python API](https://xgboost.readthedocs.io/en/latest/python/python_api.html). Some methods and options are missing at the moment. PRs welcome!
|
22
22
|
|
23
23
|
## Learning API
|
24
24
|
|
@@ -33,7 +33,8 @@ booster = Xgb.train(params, dtrain)
|
|
33
33
|
Predict
|
34
34
|
|
35
35
|
```ruby
|
36
|
-
|
36
|
+
dtest = Xgb::DMatrix.new(x_test)
|
37
|
+
booster.predict(dtest)
|
37
38
|
```
|
38
39
|
|
39
40
|
Save the model to a file
|
@@ -110,6 +111,12 @@ Get the importance of features
|
|
110
111
|
model.feature_importances
|
111
112
|
```
|
112
113
|
|
114
|
+
Early stopping
|
115
|
+
|
116
|
+
```ruby
|
117
|
+
model.fit(x, y, eval_set: [[x_test, y_test]], early_stopping_rounds: 5)
|
118
|
+
```
|
119
|
+
|
113
120
|
## Data
|
114
121
|
|
115
122
|
Data can be an array of arrays
|
data/lib/xgb.rb
CHANGED
@@ -5,16 +5,25 @@ require "ffi"
|
|
5
5
|
require "xgb/utils"
|
6
6
|
require "xgb/booster"
|
7
7
|
require "xgb/dmatrix"
|
8
|
-
require "xgb/ffi"
|
9
8
|
require "xgb/version"
|
10
9
|
|
11
10
|
# scikit-learn API
|
11
|
+
require "xgb/model"
|
12
12
|
require "xgb/classifier"
|
13
|
+
require "xgb/ranker"
|
13
14
|
require "xgb/regressor"
|
14
15
|
|
15
16
|
module Xgb
|
16
17
|
class Error < StandardError; end
|
17
18
|
|
19
|
+
class << self
|
20
|
+
attr_accessor :ffi_lib
|
21
|
+
end
|
22
|
+
self.ffi_lib = ["xgboost"]
|
23
|
+
|
24
|
+
# friendlier error message
|
25
|
+
autoload :FFI,"xgb/ffi"
|
26
|
+
|
18
27
|
class << self
|
19
28
|
def train(params, dtrain, num_boost_round: 10, evals: nil, early_stopping_rounds: nil, verbose_eval: true)
|
20
29
|
booster = Booster.new(params: params)
|
data/lib/xgb/booster.rb
CHANGED
@@ -5,6 +5,8 @@ module Xgb
|
|
5
5
|
def initialize(params: nil, model_file: nil)
|
6
6
|
@handle = ::FFI::MemoryPointer.new(:pointer)
|
7
7
|
check_result FFI.XGBoosterCreate(nil, 0, @handle)
|
8
|
+
ObjectSpace.define_finalizer(self, self.class.finalize(handle_pointer))
|
9
|
+
|
8
10
|
if model_file
|
9
11
|
check_result FFI.XGBoosterLoadModel(handle_pointer, model_file)
|
10
12
|
end
|
@@ -13,6 +15,11 @@ module Xgb
|
|
13
15
|
set_param(params)
|
14
16
|
end
|
15
17
|
|
18
|
+
def self.finalize(pointer)
|
19
|
+
# must use proc instead of stabby lambda
|
20
|
+
proc { FFI.XGBoosterFree(pointer) }
|
21
|
+
end
|
22
|
+
|
16
23
|
def update(dtrain, iteration)
|
17
24
|
check_result FFI.XGBoosterUpdateOneIter(handle_pointer, iteration, dtrain.handle_pointer)
|
18
25
|
end
|
@@ -43,10 +50,10 @@ module Xgb
|
|
43
50
|
|
44
51
|
def predict(data, ntree_limit: nil)
|
45
52
|
ntree_limit ||= 0
|
46
|
-
out_len = ::FFI::MemoryPointer.new(:
|
53
|
+
out_len = ::FFI::MemoryPointer.new(:uint64)
|
47
54
|
out_result = ::FFI::MemoryPointer.new(:pointer)
|
48
55
|
check_result FFI.XGBoosterPredict(handle_pointer, data.handle_pointer, 0, ntree_limit, out_len, out_result)
|
49
|
-
out = out_result.read_pointer.read_array_of_float(out_len.
|
56
|
+
out = out_result.read_pointer.read_array_of_float(out_len.read_uint64)
|
50
57
|
num_class = out.size / data.num_row
|
51
58
|
out = out.each_slice(num_class).to_a if num_class > 1
|
52
59
|
out
|
@@ -58,10 +65,10 @@ module Xgb
|
|
58
65
|
|
59
66
|
# returns an array of strings
|
60
67
|
def dump(fmap: "", with_stats: false, dump_format: "text")
|
61
|
-
out_len = ::FFI::MemoryPointer.new(:
|
68
|
+
out_len = ::FFI::MemoryPointer.new(:uint64)
|
62
69
|
out_result = ::FFI::MemoryPointer.new(:pointer)
|
63
70
|
check_result FFI.XGBoosterDumpModelEx(handle_pointer, fmap, with_stats ? 1 : 0, dump_format, out_len, out_result)
|
64
|
-
out_result.read_pointer.get_array_of_string(0, out_len.
|
71
|
+
out_result.read_pointer.get_array_of_string(0, out_len.read_uint64)
|
65
72
|
end
|
66
73
|
|
67
74
|
def dump_model(fout, fmap: "", with_stats: false, dump_format: "text")
|
data/lib/xgb/classifier.rb
CHANGED
@@ -1,16 +1,10 @@
|
|
1
1
|
module Xgb
|
2
|
-
class Classifier
|
3
|
-
def initialize(max_depth: 3, learning_rate: 0.1, n_estimators: 100, objective: "binary:logistic", importance_type: "gain")
|
4
|
-
|
5
|
-
max_depth: max_depth,
|
6
|
-
objective: objective,
|
7
|
-
learning_rate: learning_rate
|
8
|
-
}
|
9
|
-
@n_estimators = n_estimators
|
10
|
-
@importance_type = importance_type
|
2
|
+
class Classifier < Model
|
3
|
+
def initialize(max_depth: 3, learning_rate: 0.1, n_estimators: 100, objective: "binary:logistic", importance_type: "gain", **options)
|
4
|
+
super
|
11
5
|
end
|
12
6
|
|
13
|
-
def fit(x, y)
|
7
|
+
def fit(x, y, eval_set: nil, early_stopping_rounds: nil, verbose: true)
|
14
8
|
n_classes = y.uniq.size
|
15
9
|
|
16
10
|
params = @params.dup
|
@@ -20,18 +14,24 @@ module Xgb
|
|
20
14
|
end
|
21
15
|
|
22
16
|
dtrain = DMatrix.new(x, label: y)
|
23
|
-
|
17
|
+
evals = Array(eval_set).map.with_index { |v, i| [DMatrix.new(v[0], label: v[1]), "validation_#{i}"] }
|
18
|
+
|
19
|
+
@booster = Xgb.train(params, dtrain,
|
20
|
+
num_boost_round: @n_estimators,
|
21
|
+
early_stopping_rounds: early_stopping_rounds,
|
22
|
+
verbose_eval: verbose,
|
23
|
+
evals: evals
|
24
|
+
)
|
24
25
|
nil
|
25
26
|
end
|
26
27
|
|
27
28
|
def predict(data)
|
28
|
-
|
29
|
-
y_pred = @booster.predict(dmat)
|
29
|
+
y_pred = super(data)
|
30
30
|
|
31
31
|
if y_pred.first.is_a?(Array)
|
32
32
|
# multiple classes
|
33
33
|
y_pred.map do |v|
|
34
|
-
v.map.with_index.max_by { |v2,
|
34
|
+
v.map.with_index.max_by { |v2, _| v2 }.last
|
35
35
|
end
|
36
36
|
else
|
37
37
|
y_pred.map { |v| v > 0.5 ? 1 : 0 }
|
@@ -49,20 +49,5 @@ module Xgb
|
|
49
49
|
y_pred.map { |v| [1 - v, v] }
|
50
50
|
end
|
51
51
|
end
|
52
|
-
|
53
|
-
def save_model(fname)
|
54
|
-
@booster.save_model(fname)
|
55
|
-
end
|
56
|
-
|
57
|
-
def load_model(fname)
|
58
|
-
@booster = Booster.new(params: @params, model_file: fname)
|
59
|
-
end
|
60
|
-
|
61
|
-
def feature_importances
|
62
|
-
score = @booster.score(importance_type: @importance_type)
|
63
|
-
scores = @booster.feature_names.map { |k| score[k] || 0.0 }
|
64
|
-
total = scores.sum.to_f
|
65
|
-
scores.map { |s| s / total }
|
66
|
-
end
|
67
52
|
end
|
68
53
|
end
|
data/lib/xgb/dmatrix.rb
CHANGED
@@ -27,12 +27,19 @@ module Xgb
|
|
27
27
|
c_data = ::FFI::MemoryPointer.new(:float, nrow * ncol)
|
28
28
|
c_data.put_array_of_float(0, flat_data)
|
29
29
|
check_result FFI.XGDMatrixCreateFromMat(c_data, nrow, ncol, missing, @handle)
|
30
|
+
|
31
|
+
ObjectSpace.define_finalizer(self, self.class.finalize(handle_pointer))
|
30
32
|
end
|
31
33
|
|
32
34
|
set_float_info("label", label) if label
|
33
35
|
set_float_info("weight", weight) if weight
|
34
36
|
end
|
35
37
|
|
38
|
+
def self.finalize(pointer)
|
39
|
+
# must use proc instead of stabby lambda
|
40
|
+
proc { FFI.XGDMatrixFree(pointer) }
|
41
|
+
end
|
42
|
+
|
36
43
|
def label
|
37
44
|
float_info("label")
|
38
45
|
end
|
@@ -41,16 +48,22 @@ module Xgb
|
|
41
48
|
float_info("weight")
|
42
49
|
end
|
43
50
|
|
51
|
+
def group=(group)
|
52
|
+
c_data = ::FFI::MemoryPointer.new(:int, group.size)
|
53
|
+
c_data.put_array_of_int(0, group)
|
54
|
+
check_result FFI.XGDMatrixSetGroup(handle_pointer, c_data, group.size)
|
55
|
+
end
|
56
|
+
|
44
57
|
def num_row
|
45
|
-
out = ::FFI::MemoryPointer.new(:
|
58
|
+
out = ::FFI::MemoryPointer.new(:uint64)
|
46
59
|
check_result FFI.XGDMatrixNumRow(handle_pointer, out)
|
47
|
-
out.
|
60
|
+
out.read_uint64
|
48
61
|
end
|
49
62
|
|
50
63
|
def num_col
|
51
|
-
out = ::FFI::MemoryPointer.new(:
|
64
|
+
out = ::FFI::MemoryPointer.new(:uint64)
|
52
65
|
check_result FFI.XGDMatrixNumCol(handle_pointer, out)
|
53
|
-
out.
|
66
|
+
out.read_uint64
|
54
67
|
end
|
55
68
|
|
56
69
|
def slice(rindex)
|
@@ -76,15 +89,7 @@ module Xgb
|
|
76
89
|
private
|
77
90
|
|
78
91
|
def set_float_info(field, data)
|
79
|
-
data =
|
80
|
-
if matrix?(data)
|
81
|
-
data.to_a[0]
|
82
|
-
elsif daru_vector?(data) || narray?(data)
|
83
|
-
data.to_a
|
84
|
-
else
|
85
|
-
data
|
86
|
-
end
|
87
|
-
|
92
|
+
data = data.to_a unless data.is_a?(Array)
|
88
93
|
c_data = ::FFI::MemoryPointer.new(:float, data.size)
|
89
94
|
c_data.put_array_of_float(0, data)
|
90
95
|
check_result FFI.XGDMatrixSetFloatInfo(handle_pointer, field.to_s, c_data, data.size)
|
@@ -106,10 +111,6 @@ module Xgb
|
|
106
111
|
defined?(Daru::DataFrame) && data.is_a?(Daru::DataFrame)
|
107
112
|
end
|
108
113
|
|
109
|
-
def daru_vector?(data)
|
110
|
-
defined?(Daru::Vector) && data.is_a?(Daru::Vector)
|
111
|
-
end
|
112
|
-
|
113
114
|
def narray?(data)
|
114
115
|
defined?(Numo::NArray) && data.is_a?(Numo::NArray)
|
115
116
|
end
|
data/lib/xgb/ffi.rb
CHANGED
@@ -1,7 +1,13 @@
|
|
1
1
|
module Xgb
|
2
2
|
module FFI
|
3
3
|
extend ::FFI::Library
|
4
|
-
|
4
|
+
|
5
|
+
begin
|
6
|
+
ffi_lib Xgb.ffi_lib
|
7
|
+
rescue LoadError => e
|
8
|
+
raise e if ENV["XGB_DEBUG"]
|
9
|
+
raise LoadError, "Could not find XGBoost"
|
10
|
+
end
|
5
11
|
|
6
12
|
# https://github.com/dmlc/xgboost/blob/master/include/xgboost/c_api.h
|
7
13
|
# keep same order
|
@@ -10,18 +16,21 @@ module Xgb
|
|
10
16
|
attach_function :XGBGetLastError, %i[], :string
|
11
17
|
|
12
18
|
# dmatrix
|
13
|
-
attach_function :XGDMatrixCreateFromMat, %i[pointer
|
19
|
+
attach_function :XGDMatrixCreateFromMat, %i[pointer uint64 uint64 float pointer], :int
|
20
|
+
attach_function :XGDMatrixSetGroup, %i[pointer pointer uint64], :int
|
14
21
|
attach_function :XGDMatrixNumRow, %i[pointer pointer], :int
|
15
22
|
attach_function :XGDMatrixNumCol, %i[pointer pointer], :int
|
16
|
-
attach_function :XGDMatrixSliceDMatrix, %i[pointer pointer
|
23
|
+
attach_function :XGDMatrixSliceDMatrix, %i[pointer pointer uint64 pointer], :int
|
24
|
+
attach_function :XGDMatrixFree, %i[pointer], :int
|
17
25
|
attach_function :XGDMatrixSaveBinary, %i[pointer string int], :int
|
18
|
-
attach_function :XGDMatrixSetFloatInfo, %i[pointer string pointer
|
26
|
+
attach_function :XGDMatrixSetFloatInfo, %i[pointer string pointer uint64], :int
|
19
27
|
attach_function :XGDMatrixGetFloatInfo, %i[pointer string pointer pointer], :int
|
20
28
|
|
21
29
|
# booster
|
22
30
|
attach_function :XGBoosterCreate, %i[pointer int pointer], :int
|
23
31
|
attach_function :XGBoosterUpdateOneIter, %i[pointer int pointer], :int
|
24
|
-
attach_function :XGBoosterEvalOneIter, %i[pointer int pointer pointer
|
32
|
+
attach_function :XGBoosterEvalOneIter, %i[pointer int pointer pointer uint64 pointer], :int
|
33
|
+
attach_function :XGBoosterFree, %i[pointer], :int
|
25
34
|
attach_function :XGBoosterSetParam, %i[pointer string string], :int
|
26
35
|
attach_function :XGBoosterPredict, %i[pointer pointer int int pointer pointer], :int
|
27
36
|
attach_function :XGBoosterLoadModel, %i[pointer string], :int
|
data/lib/xgb/model.rb
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
module Xgb
|
2
|
+
class Model
|
3
|
+
attr_reader :booster
|
4
|
+
|
5
|
+
def initialize(max_depth: 3, learning_rate: 0.1, n_estimators: 100, objective: nil, importance_type: "gain", **options)
|
6
|
+
@params = {
|
7
|
+
max_depth: max_depth,
|
8
|
+
objective: objective,
|
9
|
+
learning_rate: learning_rate
|
10
|
+
}.merge(options)
|
11
|
+
@n_estimators = n_estimators
|
12
|
+
@importance_type = importance_type
|
13
|
+
end
|
14
|
+
|
15
|
+
def predict(data)
|
16
|
+
dmat = DMatrix.new(data)
|
17
|
+
@booster.predict(dmat)
|
18
|
+
end
|
19
|
+
|
20
|
+
def save_model(fname)
|
21
|
+
@booster.save_model(fname)
|
22
|
+
end
|
23
|
+
|
24
|
+
def load_model(fname)
|
25
|
+
@booster = Booster.new(params: @params, model_file: fname)
|
26
|
+
end
|
27
|
+
|
28
|
+
def feature_importances
|
29
|
+
score = @booster.score(importance_type: @importance_type)
|
30
|
+
scores = @booster.feature_names.map { |k| score[k] || 0.0 }
|
31
|
+
total = scores.sum.to_f
|
32
|
+
scores.map { |s| s / total }
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
data/lib/xgb/ranker.rb
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
module Xgb
|
2
|
+
class Ranker < Model
|
3
|
+
def initialize(max_depth: 3, learning_rate: 0.1, n_estimators: 100, objective: "rank:pairwise", importance_type: "gain", **options)
|
4
|
+
super
|
5
|
+
end
|
6
|
+
|
7
|
+
def fit(x, y, group)
|
8
|
+
dtrain = DMatrix.new(x, label: y)
|
9
|
+
dtrain.group = group
|
10
|
+
@booster = Xgb.train(@params, dtrain, num_boost_round: @n_estimators)
|
11
|
+
nil
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
data/lib/xgb/regressor.rb
CHANGED
@@ -1,39 +1,20 @@
|
|
1
1
|
module Xgb
|
2
|
-
class Regressor
|
3
|
-
def initialize(max_depth: 3, learning_rate: 0.1, n_estimators: 100, objective: "reg:squarederror", importance_type: "gain")
|
4
|
-
|
5
|
-
max_depth: max_depth,
|
6
|
-
objective: objective,
|
7
|
-
learning_rate: learning_rate
|
8
|
-
}
|
9
|
-
@n_estimators = n_estimators
|
10
|
-
@importance_type = importance_type
|
2
|
+
class Regressor < Model
|
3
|
+
def initialize(max_depth: 3, learning_rate: 0.1, n_estimators: 100, objective: "reg:squarederror", importance_type: "gain", **options)
|
4
|
+
super
|
11
5
|
end
|
12
6
|
|
13
|
-
def fit(x, y)
|
7
|
+
def fit(x, y, eval_set: nil, early_stopping_rounds: nil, verbose: true)
|
14
8
|
dtrain = DMatrix.new(x, label: y)
|
15
|
-
|
16
|
-
nil
|
17
|
-
end
|
9
|
+
evals = Array(eval_set).map.with_index { |v, i| [DMatrix.new(v[0], label: v[1]), "validation_#{i}"] }
|
18
10
|
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
end
|
27
|
-
|
28
|
-
def load_model(fname)
|
29
|
-
@booster = Booster.new(params: @params, model_file: fname)
|
30
|
-
end
|
31
|
-
|
32
|
-
def feature_importances
|
33
|
-
score = @booster.score(importance_type: @importance_type)
|
34
|
-
scores = @booster.feature_names.map { |k| score[k] || 0.0 }
|
35
|
-
total = scores.sum.to_f
|
36
|
-
scores.map { |s| s / total }
|
11
|
+
@booster = Xgb.train(@params, dtrain,
|
12
|
+
num_boost_round: @n_estimators,
|
13
|
+
early_stopping_rounds: early_stopping_rounds,
|
14
|
+
verbose_eval: verbose,
|
15
|
+
evals: evals
|
16
|
+
)
|
17
|
+
nil
|
37
18
|
end
|
38
19
|
end
|
39
20
|
end
|
data/lib/xgb/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: xgb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-08-
|
11
|
+
date: 2019-08-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ffi
|
@@ -107,6 +107,8 @@ files:
|
|
107
107
|
- lib/xgb/classifier.rb
|
108
108
|
- lib/xgb/dmatrix.rb
|
109
109
|
- lib/xgb/ffi.rb
|
110
|
+
- lib/xgb/model.rb
|
111
|
+
- lib/xgb/ranker.rb
|
110
112
|
- lib/xgb/regressor.rb
|
111
113
|
- lib/xgb/utils.rb
|
112
114
|
- lib/xgb/version.rb
|
@@ -129,7 +131,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
129
131
|
- !ruby/object:Gem::Version
|
130
132
|
version: '0'
|
131
133
|
requirements: []
|
132
|
-
rubygems_version: 3.0.
|
134
|
+
rubygems_version: 3.0.3
|
133
135
|
signing_key:
|
134
136
|
specification_version: 4
|
135
137
|
summary: XGBoost - the high performance machine learning library - for Ruby
|