xgb 0.2.1 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +25 -0
- data/NOTICE.txt +1 -0
- data/README.md +17 -5
- data/lib/xgboost.rb +10 -1
- data/lib/xgboost/booster.rb +22 -11
- data/lib/xgboost/classifier.rb +1 -1
- data/lib/xgboost/dmatrix.rb +32 -7
- data/lib/xgboost/ffi.rb +14 -5
- data/lib/xgboost/model.rb +2 -6
- data/lib/xgboost/ranker.rb +1 -1
- data/lib/xgboost/regressor.rb +1 -1
- data/lib/xgboost/version.rb +1 -1
- data/vendor/libxgboost.dylib +0 -0
- data/vendor/libxgboost.so +0 -0
- data/vendor/xgboost.dll +0 -0
- metadata +21 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0afc5631e2925f19ef4da234aa20aca2247806af0823c7e7e7267424edd23973
|
4
|
+
data.tar.gz: 7a7a6b9e9860f4ebbf8ee5ddcff133f5614482b85fb93b0594864c18a0c1daf9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e886ffe5ff3d34055ca5e071fbabf7306cdcb3456513c342300f1ad65211bde7322db163e81c1a3a92e122c58d9b241388e5bfba00c11bc89c4cc056707a8bcd
|
7
|
+
data.tar.gz: e422b88f36d1a23b429cc44d1c496707272d176884d29265057878bfd0e3010cf4348730746d99cfc1ca7e6f42a858c697d4233c39e186df10c2d61df2280097
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,28 @@
|
|
1
|
+
## 0.5.0 (2020-12-12)
|
2
|
+
|
3
|
+
- Updated XGBoost to 1.3.0
|
4
|
+
|
5
|
+
## 0.4.1 (2020-08-26)
|
6
|
+
|
7
|
+
- Updated XGBoost to 1.2.0
|
8
|
+
|
9
|
+
## 0.4.0 (2020-05-17)
|
10
|
+
|
11
|
+
- Updated XGBoost to 1.1.0
|
12
|
+
- Changed default `learning_rate` and `max_depth` for Scikit-Learn API to match Python
|
13
|
+
- Added support for Rover
|
14
|
+
- Improved performance of Numo datasets
|
15
|
+
- Improved error message when OpenMP not found on Mac
|
16
|
+
|
17
|
+
## 0.3.1 (2020-04-16)
|
18
|
+
|
19
|
+
- Added `feature_names` and `feature_types` to `DMatrix`
|
20
|
+
- Added feature names to `dump`
|
21
|
+
|
22
|
+
## 0.3.0 (2020-02-19)
|
23
|
+
|
24
|
+
- Updated XGBoost to 1.0.0
|
25
|
+
|
1
26
|
## 0.2.1 (2020-02-11)
|
2
27
|
|
3
28
|
- Fixed `Could not find XGBoost` error on some Linux platforms
|
data/NOTICE.txt
CHANGED
data/README.md
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
[XGBoost](https://github.com/dmlc/xgboost) - high performance gradient boosting - for Ruby
|
4
4
|
|
5
|
-
[![Build Status](https://
|
5
|
+
[![Build Status](https://github.com/ankane/xgboost/workflows/build/badge.svg?branch=master)](https://github.com/ankane/xgboost/actions)
|
6
6
|
|
7
7
|
## Installation
|
8
8
|
|
@@ -12,6 +12,12 @@ Add this line to your application’s Gemfile:
|
|
12
12
|
gem 'xgb'
|
13
13
|
```
|
14
14
|
|
15
|
+
On Mac, also install OpenMP:
|
16
|
+
|
17
|
+
```sh
|
18
|
+
brew install libomp
|
19
|
+
```
|
20
|
+
|
15
21
|
## Learning API
|
16
22
|
|
17
23
|
Prep your data
|
@@ -131,16 +137,22 @@ Data can be an array of arrays
|
|
131
137
|
[[1, 2, 3], [4, 5, 6]]
|
132
138
|
```
|
133
139
|
|
134
|
-
Or a
|
140
|
+
Or a Numo array
|
135
141
|
|
136
142
|
```ruby
|
137
|
-
|
143
|
+
Numo::NArray.cast([[1, 2, 3], [4, 5, 6]])
|
138
144
|
```
|
139
145
|
|
140
|
-
Or a
|
146
|
+
Or a Rover data frame
|
141
147
|
|
142
148
|
```ruby
|
143
|
-
|
149
|
+
Rover.read_csv("houses.csv")
|
150
|
+
```
|
151
|
+
|
152
|
+
Or a Daru data frame
|
153
|
+
|
154
|
+
```ruby
|
155
|
+
Daru::DataFrame.from_csv("houses.csv")
|
144
156
|
```
|
145
157
|
|
146
158
|
## Helpful Resources
|
data/lib/xgboost.rb
CHANGED
@@ -31,7 +31,8 @@ module XGBoost
|
|
31
31
|
booster = Booster.new(params: params)
|
32
32
|
num_feature = dtrain.num_col
|
33
33
|
booster.set_param("num_feature", num_feature)
|
34
|
-
booster.feature_names =
|
34
|
+
booster.feature_names = dtrain.feature_names
|
35
|
+
booster.feature_types = dtrain.feature_types
|
35
36
|
evals ||= []
|
36
37
|
|
37
38
|
if early_stopping_rounds
|
@@ -156,6 +157,14 @@ module XGBoost
|
|
156
157
|
eval_hist
|
157
158
|
end
|
158
159
|
|
160
|
+
def lib_version
|
161
|
+
major = ::FFI::MemoryPointer.new(:int)
|
162
|
+
minor = ::FFI::MemoryPointer.new(:int)
|
163
|
+
patch = ::FFI::MemoryPointer.new(:int)
|
164
|
+
FFI.XGBoostVersion(major, minor, patch)
|
165
|
+
"#{major.read_int}.#{minor.read_int}.#{patch.read_int}"
|
166
|
+
end
|
167
|
+
|
159
168
|
private
|
160
169
|
|
161
170
|
def mean(arr)
|
data/lib/xgboost/booster.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
module XGBoost
|
2
2
|
class Booster
|
3
|
-
attr_accessor :best_iteration, :feature_names
|
3
|
+
attr_accessor :best_iteration, :feature_names, :feature_types
|
4
4
|
|
5
5
|
def initialize(params: nil, model_file: nil)
|
6
6
|
@handle = ::FFI::MemoryPointer.new(:pointer)
|
@@ -25,11 +25,8 @@ module XGBoost
|
|
25
25
|
end
|
26
26
|
|
27
27
|
def eval_set(evals, iteration)
|
28
|
-
dmats =
|
29
|
-
|
30
|
-
|
31
|
-
evnames = ::FFI::MemoryPointer.new(:pointer, evals.size)
|
32
|
-
evnames.write_array_of_pointer(evals.map { |v| ::FFI::MemoryPointer.from_string(v[1]) })
|
28
|
+
dmats = array_of_pointers(evals.map { |v| v[0].handle_pointer })
|
29
|
+
evnames = array_of_pointers(evals.map { |v| string_pointer(v[1]) })
|
33
30
|
|
34
31
|
out_result = ::FFI::MemoryPointer.new(:pointer)
|
35
32
|
|
@@ -52,7 +49,7 @@ module XGBoost
|
|
52
49
|
ntree_limit ||= 0
|
53
50
|
out_len = ::FFI::MemoryPointer.new(:uint64)
|
54
51
|
out_result = ::FFI::MemoryPointer.new(:pointer)
|
55
|
-
check_result FFI.XGBoosterPredict(handle_pointer, data.handle_pointer, 0, ntree_limit, out_len, out_result)
|
52
|
+
check_result FFI.XGBoosterPredict(handle_pointer, data.handle_pointer, 0, ntree_limit, 0, out_len, out_result)
|
56
53
|
out = out_result.read_pointer.read_array_of_float(read_uint64(out_len))
|
57
54
|
num_class = out.size / data.num_row
|
58
55
|
out = out.each_slice(num_class).to_a if num_class > 1
|
@@ -67,7 +64,13 @@ module XGBoost
|
|
67
64
|
def dump(fmap: "", with_stats: false, dump_format: "text")
|
68
65
|
out_len = ::FFI::MemoryPointer.new(:uint64)
|
69
66
|
out_result = ::FFI::MemoryPointer.new(:pointer)
|
70
|
-
|
67
|
+
|
68
|
+
names = feature_names || []
|
69
|
+
fnames = array_of_pointers(names.map { |fname| string_pointer(fname) })
|
70
|
+
ftypes = array_of_pointers(feature_types || Array.new(names.size, string_pointer("float")))
|
71
|
+
|
72
|
+
check_result FFI.XGBoosterDumpModelExWithFeatures(handle_pointer, names.size, fnames, ftypes, with_stats ? 1 : 0, dump_format, out_len, out_result)
|
73
|
+
|
71
74
|
out_result.read_pointer.get_array_of_string(0, read_uint64(out_len))
|
72
75
|
end
|
73
76
|
|
@@ -155,7 +158,7 @@ module XGBoost
|
|
155
158
|
end
|
156
159
|
|
157
160
|
def [](key_name)
|
158
|
-
key =
|
161
|
+
key = string_pointer(key_name)
|
159
162
|
success = ::FFI::MemoryPointer.new(:int)
|
160
163
|
out_result = ::FFI::MemoryPointer.new(:pointer)
|
161
164
|
|
@@ -165,8 +168,8 @@ module XGBoost
|
|
165
168
|
end
|
166
169
|
|
167
170
|
def []=(key_name, raw_value)
|
168
|
-
key =
|
169
|
-
value = raw_value.nil? ? nil :
|
171
|
+
key = string_pointer(key_name)
|
172
|
+
value = raw_value.nil? ? nil : string_pointer(raw_value)
|
170
173
|
|
171
174
|
check_result FFI.XGBoosterSetAttr(handle_pointer, key, value)
|
172
175
|
end
|
@@ -188,6 +191,14 @@ module XGBoost
|
|
188
191
|
@handle.read_pointer
|
189
192
|
end
|
190
193
|
|
194
|
+
def array_of_pointers(values)
|
195
|
+
::FFI::MemoryPointer.new(:pointer, values.size).write_array_of_pointer(values)
|
196
|
+
end
|
197
|
+
|
198
|
+
def string_pointer(value)
|
199
|
+
::FFI::MemoryPointer.from_string(value.to_s)
|
200
|
+
end
|
201
|
+
|
191
202
|
include Utils
|
192
203
|
end
|
193
204
|
end
|
data/lib/xgboost/classifier.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
module XGBoost
|
2
2
|
class Classifier < Model
|
3
|
-
def initialize(
|
3
|
+
def initialize(n_estimators: 100, objective: "binary:logistic", importance_type: "gain", **options)
|
4
4
|
super
|
5
5
|
end
|
6
6
|
|
data/lib/xgboost/dmatrix.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
module XGBoost
|
2
2
|
class DMatrix
|
3
|
-
attr_reader :data
|
3
|
+
attr_reader :data, :feature_names, :feature_types
|
4
4
|
|
5
5
|
def initialize(data, label: nil, weight: nil, missing: Float::NAN)
|
6
6
|
@data = data
|
@@ -15,21 +15,42 @@ module XGBoost
|
|
15
15
|
elsif daru?(data)
|
16
16
|
nrow, ncol = data.shape
|
17
17
|
flat_data = data.map_rows(&:to_a).flatten
|
18
|
-
|
18
|
+
@feature_names = data.each_vector.map(&:name)
|
19
|
+
@feature_types =
|
20
|
+
data.each_vector.map(&:db_type).map do |v|
|
21
|
+
case v
|
22
|
+
when "INTEGER"
|
23
|
+
"int"
|
24
|
+
when "DOUBLE"
|
25
|
+
"float"
|
26
|
+
else
|
27
|
+
raise Error, "Unknown feature type: #{v}"
|
28
|
+
end
|
29
|
+
end
|
30
|
+
elsif numo?(data)
|
19
31
|
nrow, ncol = data.shape
|
20
|
-
|
32
|
+
elsif rover?(data)
|
33
|
+
nrow, ncol = data.shape
|
34
|
+
@feature_names = data.keys
|
35
|
+
data = data.to_numo
|
21
36
|
else
|
22
37
|
nrow = data.count
|
23
38
|
ncol = data.first.count
|
24
39
|
flat_data = data.flatten
|
25
40
|
end
|
26
41
|
|
27
|
-
handle_missing(flat_data, missing)
|
28
42
|
c_data = ::FFI::MemoryPointer.new(:float, nrow * ncol)
|
29
|
-
|
43
|
+
if numo?(data)
|
44
|
+
c_data.write_bytes(data.cast_to(Numo::SFloat).to_string)
|
45
|
+
else
|
46
|
+
handle_missing(flat_data, missing)
|
47
|
+
c_data.write_array_of_float(flat_data)
|
48
|
+
end
|
30
49
|
check_result FFI.XGDMatrixCreateFromMat(c_data, nrow, ncol, missing, @handle)
|
31
50
|
|
32
51
|
ObjectSpace.define_finalizer(self, self.class.finalize(handle_pointer))
|
52
|
+
|
53
|
+
@feature_names ||= ncol.times.map { |i| "f#{i}" }
|
33
54
|
end
|
34
55
|
|
35
56
|
self.label = label if label
|
@@ -60,7 +81,7 @@ module XGBoost
|
|
60
81
|
def group=(group)
|
61
82
|
c_data = ::FFI::MemoryPointer.new(:int, group.size)
|
62
83
|
c_data.write_array_of_int(group)
|
63
|
-
check_result FFI.
|
84
|
+
check_result FFI.XGDMatrixSetUIntInfo(handle_pointer, "group", c_data, group.size)
|
64
85
|
end
|
65
86
|
|
66
87
|
def num_row
|
@@ -120,10 +141,14 @@ module XGBoost
|
|
120
141
|
defined?(Daru::DataFrame) && data.is_a?(Daru::DataFrame)
|
121
142
|
end
|
122
143
|
|
123
|
-
def
|
144
|
+
def numo?(data)
|
124
145
|
defined?(Numo::NArray) && data.is_a?(Numo::NArray)
|
125
146
|
end
|
126
147
|
|
148
|
+
def rover?(data)
|
149
|
+
defined?(Rover::DataFrame) && data.is_a?(Rover::DataFrame)
|
150
|
+
end
|
151
|
+
|
127
152
|
def handle_missing(data, missing)
|
128
153
|
data.map! { |v| v.nil? ? missing : v }
|
129
154
|
end
|
data/lib/xgboost/ffi.rb
CHANGED
@@ -2,17 +2,26 @@ module XGBoost
|
|
2
2
|
module FFI
|
3
3
|
extend ::FFI::Library
|
4
4
|
|
5
|
-
|
5
|
+
begin
|
6
|
+
ffi_lib XGBoost.ffi_lib
|
7
|
+
rescue LoadError => e
|
8
|
+
if e.message.include?("Library not loaded: /usr/local/opt/libomp/lib/libomp.dylib") && e.message.include?("Reason: image not found")
|
9
|
+
raise LoadError, "OpenMP not found. Run `brew install libomp`"
|
10
|
+
else
|
11
|
+
raise e
|
12
|
+
end
|
13
|
+
end
|
6
14
|
|
7
15
|
# https://github.com/dmlc/xgboost/blob/master/include/xgboost/c_api.h
|
8
16
|
# keep same order
|
9
17
|
|
10
|
-
#
|
18
|
+
# general
|
19
|
+
attach_function :XGBoostVersion, %i[pointer pointer pointer], :void
|
11
20
|
attach_function :XGBGetLastError, %i[], :string
|
12
21
|
|
13
22
|
# dmatrix
|
14
23
|
attach_function :XGDMatrixCreateFromMat, %i[pointer uint64 uint64 float pointer], :int
|
15
|
-
attach_function :
|
24
|
+
attach_function :XGDMatrixSetUIntInfo, %i[pointer string pointer uint64], :int
|
16
25
|
attach_function :XGDMatrixNumRow, %i[pointer pointer], :int
|
17
26
|
attach_function :XGDMatrixNumCol, %i[pointer pointer], :int
|
18
27
|
attach_function :XGDMatrixSliceDMatrix, %i[pointer pointer uint64 pointer], :int
|
@@ -27,10 +36,10 @@ module XGBoost
|
|
27
36
|
attach_function :XGBoosterEvalOneIter, %i[pointer int pointer pointer uint64 pointer], :int
|
28
37
|
attach_function :XGBoosterFree, %i[pointer], :int
|
29
38
|
attach_function :XGBoosterSetParam, %i[pointer string string], :int
|
30
|
-
attach_function :XGBoosterPredict, %i[pointer pointer int int pointer pointer], :int
|
39
|
+
attach_function :XGBoosterPredict, %i[pointer pointer int int int pointer pointer], :int
|
31
40
|
attach_function :XGBoosterLoadModel, %i[pointer string], :int
|
32
41
|
attach_function :XGBoosterSaveModel, %i[pointer string], :int
|
33
|
-
attach_function :
|
42
|
+
attach_function :XGBoosterDumpModelExWithFeatures, %i[pointer int pointer pointer int string pointer pointer], :int
|
34
43
|
attach_function :XGBoosterGetAttr, %i[pointer pointer pointer pointer], :int
|
35
44
|
attach_function :XGBoosterSetAttr, %i[pointer pointer pointer], :int
|
36
45
|
attach_function :XGBoosterGetAttrNames, %i[pointer pointer pointer], :int
|
data/lib/xgboost/model.rb
CHANGED
@@ -2,12 +2,8 @@ module XGBoost
|
|
2
2
|
class Model
|
3
3
|
attr_reader :booster
|
4
4
|
|
5
|
-
def initialize(
|
6
|
-
@params =
|
7
|
-
max_depth: max_depth,
|
8
|
-
objective: objective,
|
9
|
-
learning_rate: learning_rate
|
10
|
-
}.merge(options)
|
5
|
+
def initialize(n_estimators: 100, importance_type: "gain", **options)
|
6
|
+
@params = options
|
11
7
|
@n_estimators = n_estimators
|
12
8
|
@importance_type = importance_type
|
13
9
|
end
|
data/lib/xgboost/ranker.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
module XGBoost
|
2
2
|
class Ranker < Model
|
3
|
-
def initialize(
|
3
|
+
def initialize(n_estimators: 100, objective: "rank:pairwise", importance_type: "gain", **options)
|
4
4
|
super
|
5
5
|
end
|
6
6
|
|
data/lib/xgboost/regressor.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
module XGBoost
|
2
2
|
class Regressor < Model
|
3
|
-
def initialize(
|
3
|
+
def initialize(n_estimators: 100, objective: "reg:squarederror", importance_type: "gain", **options)
|
4
4
|
super
|
5
5
|
end
|
6
6
|
|
data/lib/xgboost/version.rb
CHANGED
data/vendor/libxgboost.dylib
CHANGED
Binary file
|
data/vendor/libxgboost.so
CHANGED
Binary file
|
data/vendor/xgboost.dll
CHANGED
Binary file
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: xgb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-12-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ffi
|
@@ -94,7 +94,21 @@ dependencies:
|
|
94
94
|
- - ">="
|
95
95
|
- !ruby/object:Gem::Version
|
96
96
|
version: '0'
|
97
|
-
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: rover-df
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - ">="
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
type: :development
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - ">="
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0'
|
111
|
+
description:
|
98
112
|
email: andrew@chartkick.com
|
99
113
|
executables: []
|
100
114
|
extensions: []
|
@@ -123,7 +137,7 @@ homepage: https://github.com/ankane/xgboost
|
|
123
137
|
licenses:
|
124
138
|
- Apache-2.0
|
125
139
|
metadata: {}
|
126
|
-
post_install_message:
|
140
|
+
post_install_message:
|
127
141
|
rdoc_options: []
|
128
142
|
require_paths:
|
129
143
|
- lib
|
@@ -138,8 +152,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
138
152
|
- !ruby/object:Gem::Version
|
139
153
|
version: '0'
|
140
154
|
requirements: []
|
141
|
-
rubygems_version: 3.1.
|
142
|
-
signing_key:
|
155
|
+
rubygems_version: 3.1.4
|
156
|
+
signing_key:
|
143
157
|
specification_version: 4
|
144
158
|
summary: High performance gradient boosting for Ruby
|
145
159
|
test_files: []
|