xgb 0.2.0 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +26 -0
- data/NOTICE.txt +1 -0
- data/README.md +22 -11
- data/lib/xgboost.rb +10 -1
- data/lib/xgboost/booster.rb +22 -11
- data/lib/xgboost/classifier.rb +1 -1
- data/lib/xgboost/dmatrix.rb +32 -7
- data/lib/xgboost/ffi.rb +10 -6
- data/lib/xgboost/model.rb +2 -6
- data/lib/xgboost/ranker.rb +1 -1
- data/lib/xgboost/regressor.rb +1 -1
- data/lib/xgboost/version.rb +1 -1
- data/vendor/libxgboost.dylib +0 -0
- data/vendor/libxgboost.so +0 -0
- data/vendor/xgboost.dll +0 -0
- metadata +16 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bb82540880bbecdc88d82eb71b0d3fd3e7cf276f05205ddc0f1900684c5602a2
|
4
|
+
data.tar.gz: 1463e06dce0ae99fdee5ccc1887d1a24d537fcdac7d89fb685701566083d5600
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4869e2465af9d824e56dfd180eff0a7b8aacc0dd9788cfb9f5d429e83c51b5bceb93b8ad93fcdb0b7b2e0ec81231204d32f738988af97304b6d2eee33fa2f709
|
7
|
+
data.tar.gz: 4206063987a450cbb82bb9d2ea0cc7cfa8bf8bb9df483d4f088943cc41ba9d396afef81136f9090e65249ed2a3d78806c3fca70de5e198a15f6199ed8b3a01e8
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,29 @@
|
|
1
|
+
## 0.4.1 (2020-08-26)
|
2
|
+
|
3
|
+
- Updated XGBoost to 1.2.0
|
4
|
+
|
5
|
+
## 0.4.0 (2020-05-17)
|
6
|
+
|
7
|
+
- Updated XGBoost to 1.1.0
|
8
|
+
- Changed default `learning_rate` and `max_depth` for Scikit-Learn API to match Python
|
9
|
+
- Added support for Rover
|
10
|
+
- Improved performance of Numo datasets
|
11
|
+
- Improved error message when OpenMP not found on Mac
|
12
|
+
|
13
|
+
## 0.3.1 (2020-04-16)
|
14
|
+
|
15
|
+
- Added `feature_names` and `feature_types` to `DMatrix`
|
16
|
+
- Added feature names to `dump`
|
17
|
+
|
18
|
+
## 0.3.0 (2020-02-19)
|
19
|
+
|
20
|
+
- Updated XGBoost to 1.0.0
|
21
|
+
|
22
|
+
## 0.2.1 (2020-02-11)
|
23
|
+
|
24
|
+
- Fixed `Could not find XGBoost` error on some Linux platforms
|
25
|
+
- Fixed `SignalException` on Windows
|
26
|
+
|
1
27
|
## 0.2.0 (2020-01-26)
|
2
28
|
|
3
29
|
- Prefer `XGBoost` over `Xgb`
|
data/NOTICE.txt
CHANGED
data/README.md
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
[XGBoost](https://github.com/dmlc/xgboost) - high performance gradient boosting - for Ruby
|
4
4
|
|
5
|
-
[](https://travis-ci.org/ankane/xgboost)
|
5
|
+
[](https://travis-ci.org/ankane/xgboost) [](https://ci.appveyor.com/project/ankane/xgboost/branch/master)
|
6
6
|
|
7
7
|
## Installation
|
8
8
|
|
@@ -12,9 +12,11 @@ Add this line to your application’s Gemfile:
|
|
12
12
|
gem 'xgb'
|
13
13
|
```
|
14
14
|
|
15
|
-
|
15
|
+
On Mac, also install OpenMP:
|
16
16
|
|
17
|
-
|
17
|
+
```sh
|
18
|
+
brew install libomp
|
19
|
+
```
|
18
20
|
|
19
21
|
## Learning API
|
20
22
|
|
@@ -70,7 +72,7 @@ CV
|
|
70
72
|
XGBoost.cv(params, dtrain, nfold: 3, verbose_eval: true)
|
71
73
|
```
|
72
74
|
|
73
|
-
Set metadata about a model
|
75
|
+
Set metadata about a model
|
74
76
|
|
75
77
|
```ruby
|
76
78
|
booster["key"] = "value"
|
@@ -135,16 +137,22 @@ Data can be an array of arrays
|
|
135
137
|
[[1, 2, 3], [4, 5, 6]]
|
136
138
|
```
|
137
139
|
|
138
|
-
Or a
|
140
|
+
Or a Numo array
|
139
141
|
|
140
142
|
```ruby
|
141
|
-
|
143
|
+
Numo::NArray.cast([[1, 2, 3], [4, 5, 6]])
|
142
144
|
```
|
143
145
|
|
144
|
-
Or a
|
146
|
+
Or a Rover data frame
|
145
147
|
|
146
148
|
```ruby
|
147
|
-
|
149
|
+
Rover.read_csv("houses.csv")
|
150
|
+
```
|
151
|
+
|
152
|
+
Or a Daru data frame
|
153
|
+
|
154
|
+
```ruby
|
155
|
+
Daru::DataFrame.from_csv("houses.csv")
|
148
156
|
```
|
149
157
|
|
150
158
|
## Helpful Resources
|
@@ -155,11 +163,13 @@ Numo::DFloat.new(3, 2).seq
|
|
155
163
|
## Related Projects
|
156
164
|
|
157
165
|
- [LightGBM](https://github.com/ankane/lightgbm) - LightGBM for Ruby
|
158
|
-
- [Eps](https://github.com/ankane/eps) - Machine
|
166
|
+
- [Eps](https://github.com/ankane/eps) - Machine learning for Ruby
|
159
167
|
|
160
168
|
## Credits
|
161
169
|
|
162
|
-
|
170
|
+
This library follows the [Python API](https://xgboost.readthedocs.io/en/latest/python/python_api.html), with the `get_` and `set_` prefixes removed from methods to make it more Ruby-like.
|
171
|
+
|
172
|
+
Thanks to the [xgboost](https://github.com/PairOnAir/xgboost-ruby) gem for showing how to use FFI.
|
163
173
|
|
164
174
|
## History
|
165
175
|
|
@@ -174,11 +184,12 @@ Everyone is encouraged to help improve this project. Here are a few ways you can
|
|
174
184
|
- Write, clarify, or fix documentation
|
175
185
|
- Suggest or add new features
|
176
186
|
|
177
|
-
To get started with development
|
187
|
+
To get started with development:
|
178
188
|
|
179
189
|
```sh
|
180
190
|
git clone https://github.com/ankane/xgboost.git
|
181
191
|
cd xgboost
|
182
192
|
bundle install
|
193
|
+
bundle exec rake vendor:all
|
183
194
|
bundle exec rake test
|
184
195
|
```
|
data/lib/xgboost.rb
CHANGED
@@ -31,7 +31,8 @@ module XGBoost
|
|
31
31
|
booster = Booster.new(params: params)
|
32
32
|
num_feature = dtrain.num_col
|
33
33
|
booster.set_param("num_feature", num_feature)
|
34
|
-
booster.feature_names =
|
34
|
+
booster.feature_names = dtrain.feature_names
|
35
|
+
booster.feature_types = dtrain.feature_types
|
35
36
|
evals ||= []
|
36
37
|
|
37
38
|
if early_stopping_rounds
|
@@ -156,6 +157,14 @@ module XGBoost
|
|
156
157
|
eval_hist
|
157
158
|
end
|
158
159
|
|
160
|
+
def lib_version
|
161
|
+
major = ::FFI::MemoryPointer.new(:int)
|
162
|
+
minor = ::FFI::MemoryPointer.new(:int)
|
163
|
+
patch = ::FFI::MemoryPointer.new(:int)
|
164
|
+
FFI.XGBoostVersion(major, minor, patch)
|
165
|
+
"#{major.read_int}.#{minor.read_int}.#{patch.read_int}"
|
166
|
+
end
|
167
|
+
|
159
168
|
private
|
160
169
|
|
161
170
|
def mean(arr)
|
data/lib/xgboost/booster.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
module XGBoost
|
2
2
|
class Booster
|
3
|
-
attr_accessor :best_iteration, :feature_names
|
3
|
+
attr_accessor :best_iteration, :feature_names, :feature_types
|
4
4
|
|
5
5
|
def initialize(params: nil, model_file: nil)
|
6
6
|
@handle = ::FFI::MemoryPointer.new(:pointer)
|
@@ -25,11 +25,8 @@ module XGBoost
|
|
25
25
|
end
|
26
26
|
|
27
27
|
def eval_set(evals, iteration)
|
28
|
-
dmats =
|
29
|
-
|
30
|
-
|
31
|
-
evnames = ::FFI::MemoryPointer.new(:pointer, evals.size)
|
32
|
-
evnames.write_array_of_pointer(evals.map { |v| ::FFI::MemoryPointer.from_string(v[1]) })
|
28
|
+
dmats = array_of_pointers(evals.map { |v| v[0].handle_pointer })
|
29
|
+
evnames = array_of_pointers(evals.map { |v| string_pointer(v[1]) })
|
33
30
|
|
34
31
|
out_result = ::FFI::MemoryPointer.new(:pointer)
|
35
32
|
|
@@ -52,7 +49,7 @@ module XGBoost
|
|
52
49
|
ntree_limit ||= 0
|
53
50
|
out_len = ::FFI::MemoryPointer.new(:uint64)
|
54
51
|
out_result = ::FFI::MemoryPointer.new(:pointer)
|
55
|
-
check_result FFI.XGBoosterPredict(handle_pointer, data.handle_pointer, 0, ntree_limit, out_len, out_result)
|
52
|
+
check_result FFI.XGBoosterPredict(handle_pointer, data.handle_pointer, 0, ntree_limit, 0, out_len, out_result)
|
56
53
|
out = out_result.read_pointer.read_array_of_float(read_uint64(out_len))
|
57
54
|
num_class = out.size / data.num_row
|
58
55
|
out = out.each_slice(num_class).to_a if num_class > 1
|
@@ -67,7 +64,13 @@ module XGBoost
|
|
67
64
|
def dump(fmap: "", with_stats: false, dump_format: "text")
|
68
65
|
out_len = ::FFI::MemoryPointer.new(:uint64)
|
69
66
|
out_result = ::FFI::MemoryPointer.new(:pointer)
|
70
|
-
|
67
|
+
|
68
|
+
names = feature_names || []
|
69
|
+
fnames = array_of_pointers(names.map { |fname| string_pointer(fname) })
|
70
|
+
ftypes = array_of_pointers(feature_types || Array.new(names.size, string_pointer("float")))
|
71
|
+
|
72
|
+
check_result FFI.XGBoosterDumpModelExWithFeatures(handle_pointer, names.size, fnames, ftypes, with_stats ? 1 : 0, dump_format, out_len, out_result)
|
73
|
+
|
71
74
|
out_result.read_pointer.get_array_of_string(0, read_uint64(out_len))
|
72
75
|
end
|
73
76
|
|
@@ -155,7 +158,7 @@ module XGBoost
|
|
155
158
|
end
|
156
159
|
|
157
160
|
def [](key_name)
|
158
|
-
key =
|
161
|
+
key = string_pointer(key_name)
|
159
162
|
success = ::FFI::MemoryPointer.new(:int)
|
160
163
|
out_result = ::FFI::MemoryPointer.new(:pointer)
|
161
164
|
|
@@ -165,8 +168,8 @@ module XGBoost
|
|
165
168
|
end
|
166
169
|
|
167
170
|
def []=(key_name, raw_value)
|
168
|
-
key =
|
169
|
-
value = raw_value.nil? ? nil :
|
171
|
+
key = string_pointer(key_name)
|
172
|
+
value = raw_value.nil? ? nil : string_pointer(raw_value)
|
170
173
|
|
171
174
|
check_result FFI.XGBoosterSetAttr(handle_pointer, key, value)
|
172
175
|
end
|
@@ -188,6 +191,14 @@ module XGBoost
|
|
188
191
|
@handle.read_pointer
|
189
192
|
end
|
190
193
|
|
194
|
+
def array_of_pointers(values)
|
195
|
+
::FFI::MemoryPointer.new(:pointer, values.size).write_array_of_pointer(values)
|
196
|
+
end
|
197
|
+
|
198
|
+
def string_pointer(value)
|
199
|
+
::FFI::MemoryPointer.from_string(value.to_s)
|
200
|
+
end
|
201
|
+
|
191
202
|
include Utils
|
192
203
|
end
|
193
204
|
end
|
data/lib/xgboost/classifier.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
module XGBoost
|
2
2
|
class Classifier < Model
|
3
|
-
def initialize(
|
3
|
+
def initialize(n_estimators: 100, objective: "binary:logistic", importance_type: "gain", **options)
|
4
4
|
super
|
5
5
|
end
|
6
6
|
|
data/lib/xgboost/dmatrix.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
module XGBoost
|
2
2
|
class DMatrix
|
3
|
-
attr_reader :data
|
3
|
+
attr_reader :data, :feature_names, :feature_types
|
4
4
|
|
5
5
|
def initialize(data, label: nil, weight: nil, missing: Float::NAN)
|
6
6
|
@data = data
|
@@ -15,21 +15,42 @@ module XGBoost
|
|
15
15
|
elsif daru?(data)
|
16
16
|
nrow, ncol = data.shape
|
17
17
|
flat_data = data.map_rows(&:to_a).flatten
|
18
|
-
|
18
|
+
@feature_names = data.each_vector.map(&:name)
|
19
|
+
@feature_types =
|
20
|
+
data.each_vector.map(&:db_type).map do |v|
|
21
|
+
case v
|
22
|
+
when "INTEGER"
|
23
|
+
"int"
|
24
|
+
when "DOUBLE"
|
25
|
+
"float"
|
26
|
+
else
|
27
|
+
raise Error, "Unknown feature type: #{v}"
|
28
|
+
end
|
29
|
+
end
|
30
|
+
elsif numo?(data)
|
19
31
|
nrow, ncol = data.shape
|
20
|
-
|
32
|
+
elsif rover?(data)
|
33
|
+
nrow, ncol = data.shape
|
34
|
+
@feature_names = data.keys
|
35
|
+
data = data.to_numo
|
21
36
|
else
|
22
37
|
nrow = data.count
|
23
38
|
ncol = data.first.count
|
24
39
|
flat_data = data.flatten
|
25
40
|
end
|
26
41
|
|
27
|
-
handle_missing(flat_data, missing)
|
28
42
|
c_data = ::FFI::MemoryPointer.new(:float, nrow * ncol)
|
29
|
-
|
43
|
+
if numo?(data)
|
44
|
+
c_data.write_bytes(data.cast_to(Numo::SFloat).to_string)
|
45
|
+
else
|
46
|
+
handle_missing(flat_data, missing)
|
47
|
+
c_data.write_array_of_float(flat_data)
|
48
|
+
end
|
30
49
|
check_result FFI.XGDMatrixCreateFromMat(c_data, nrow, ncol, missing, @handle)
|
31
50
|
|
32
51
|
ObjectSpace.define_finalizer(self, self.class.finalize(handle_pointer))
|
52
|
+
|
53
|
+
@feature_names ||= ncol.times.map { |i| "f#{i}" }
|
33
54
|
end
|
34
55
|
|
35
56
|
self.label = label if label
|
@@ -60,7 +81,7 @@ module XGBoost
|
|
60
81
|
def group=(group)
|
61
82
|
c_data = ::FFI::MemoryPointer.new(:int, group.size)
|
62
83
|
c_data.write_array_of_int(group)
|
63
|
-
check_result FFI.
|
84
|
+
check_result FFI.XGDMatrixSetUIntInfo(handle_pointer, "group", c_data, group.size)
|
64
85
|
end
|
65
86
|
|
66
87
|
def num_row
|
@@ -120,10 +141,14 @@ module XGBoost
|
|
120
141
|
defined?(Daru::DataFrame) && data.is_a?(Daru::DataFrame)
|
121
142
|
end
|
122
143
|
|
123
|
-
def
|
144
|
+
def numo?(data)
|
124
145
|
defined?(Numo::NArray) && data.is_a?(Numo::NArray)
|
125
146
|
end
|
126
147
|
|
148
|
+
def rover?(data)
|
149
|
+
defined?(Rover::DataFrame) && data.is_a?(Rover::DataFrame)
|
150
|
+
end
|
151
|
+
|
127
152
|
def handle_missing(data, missing)
|
128
153
|
data.map! { |v| v.nil? ? missing : v }
|
129
154
|
end
|
data/lib/xgboost/ffi.rb
CHANGED
@@ -5,19 +5,23 @@ module XGBoost
|
|
5
5
|
begin
|
6
6
|
ffi_lib XGBoost.ffi_lib
|
7
7
|
rescue LoadError => e
|
8
|
-
|
9
|
-
|
8
|
+
if e.message.include?("Library not loaded: /usr/local/opt/libomp/lib/libomp.dylib") && e.message.include?("Reason: image not found")
|
9
|
+
raise LoadError, "OpenMP not found. Run `brew install libomp`"
|
10
|
+
else
|
11
|
+
raise e
|
12
|
+
end
|
10
13
|
end
|
11
14
|
|
12
15
|
# https://github.com/dmlc/xgboost/blob/master/include/xgboost/c_api.h
|
13
16
|
# keep same order
|
14
17
|
|
15
|
-
#
|
18
|
+
# general
|
19
|
+
attach_function :XGBoostVersion, %i[pointer pointer pointer], :void
|
16
20
|
attach_function :XGBGetLastError, %i[], :string
|
17
21
|
|
18
22
|
# dmatrix
|
19
23
|
attach_function :XGDMatrixCreateFromMat, %i[pointer uint64 uint64 float pointer], :int
|
20
|
-
attach_function :
|
24
|
+
attach_function :XGDMatrixSetUIntInfo, %i[pointer string pointer uint64], :int
|
21
25
|
attach_function :XGDMatrixNumRow, %i[pointer pointer], :int
|
22
26
|
attach_function :XGDMatrixNumCol, %i[pointer pointer], :int
|
23
27
|
attach_function :XGDMatrixSliceDMatrix, %i[pointer pointer uint64 pointer], :int
|
@@ -32,10 +36,10 @@ module XGBoost
|
|
32
36
|
attach_function :XGBoosterEvalOneIter, %i[pointer int pointer pointer uint64 pointer], :int
|
33
37
|
attach_function :XGBoosterFree, %i[pointer], :int
|
34
38
|
attach_function :XGBoosterSetParam, %i[pointer string string], :int
|
35
|
-
attach_function :XGBoosterPredict, %i[pointer pointer int int pointer pointer], :int
|
39
|
+
attach_function :XGBoosterPredict, %i[pointer pointer int int int pointer pointer], :int
|
36
40
|
attach_function :XGBoosterLoadModel, %i[pointer string], :int
|
37
41
|
attach_function :XGBoosterSaveModel, %i[pointer string], :int
|
38
|
-
attach_function :
|
42
|
+
attach_function :XGBoosterDumpModelExWithFeatures, %i[pointer int pointer pointer int string pointer pointer], :int
|
39
43
|
attach_function :XGBoosterGetAttr, %i[pointer pointer pointer pointer], :int
|
40
44
|
attach_function :XGBoosterSetAttr, %i[pointer pointer pointer], :int
|
41
45
|
attach_function :XGBoosterGetAttrNames, %i[pointer pointer pointer], :int
|
data/lib/xgboost/model.rb
CHANGED
@@ -2,12 +2,8 @@ module XGBoost
|
|
2
2
|
class Model
|
3
3
|
attr_reader :booster
|
4
4
|
|
5
|
-
def initialize(
|
6
|
-
@params =
|
7
|
-
max_depth: max_depth,
|
8
|
-
objective: objective,
|
9
|
-
learning_rate: learning_rate
|
10
|
-
}.merge(options)
|
5
|
+
def initialize(n_estimators: 100, importance_type: "gain", **options)
|
6
|
+
@params = options
|
11
7
|
@n_estimators = n_estimators
|
12
8
|
@importance_type = importance_type
|
13
9
|
end
|
data/lib/xgboost/ranker.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
module XGBoost
|
2
2
|
class Ranker < Model
|
3
|
-
def initialize(
|
3
|
+
def initialize(n_estimators: 100, objective: "rank:pairwise", importance_type: "gain", **options)
|
4
4
|
super
|
5
5
|
end
|
6
6
|
|
data/lib/xgboost/regressor.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
module XGBoost
|
2
2
|
class Regressor < Model
|
3
|
-
def initialize(
|
3
|
+
def initialize(n_estimators: 100, objective: "reg:squarederror", importance_type: "gain", **options)
|
4
4
|
super
|
5
5
|
end
|
6
6
|
|
data/lib/xgboost/version.rb
CHANGED
data/vendor/libxgboost.dylib
CHANGED
Binary file
|
data/vendor/libxgboost.so
CHANGED
Binary file
|
data/vendor/xgboost.dll
CHANGED
Binary file
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: xgb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-08-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ffi
|
@@ -94,6 +94,20 @@ dependencies:
|
|
94
94
|
- - ">="
|
95
95
|
- !ruby/object:Gem::Version
|
96
96
|
version: '0'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: rover-df
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - ">="
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
type: :development
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - ">="
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0'
|
97
111
|
description:
|
98
112
|
email: andrew@chartkick.com
|
99
113
|
executables: []
|