xgb 0.3.0 → 0.5.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +25 -0
- data/NOTICE.txt +2 -1
- data/README.md +11 -5
- data/lib/xgboost.rb +3 -2
- data/lib/xgboost/booster.rb +21 -10
- data/lib/xgboost/classifier.rb +1 -1
- data/lib/xgboost/dmatrix.rb +31 -6
- data/lib/xgboost/ffi.rb +10 -2
- data/lib/xgboost/regressor.rb +1 -1
- data/lib/xgboost/version.rb +1 -1
- data/vendor/libxgboost.dylib +0 -0
- data/vendor/libxgboost.so +0 -0
- data/vendor/xgboost.dll +0 -0
- metadata +8 -78
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 73bd02bae172bccd30215402bc58a761f2f39c46002c6fee42d429e052470861
|
4
|
+
data.tar.gz: 2773ef6f7e0cb2cd2e71cf5c9329beb08aaca79806e1e359b448b78b684273c0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 04e3534c0a6cb921016a39b9331fea803ff8a54c5b9c16ea982f7f9cbde8503f51b088e2a77148ca516c215c505e7081b89e026cc7d230a607bbc03fc8a6873d
|
7
|
+
data.tar.gz: 38252580d26db1407d7e843b8c88210d3f65641bc9b0359d3318d22fc94fef8be834fba69f30e7cb35c24d8c364fb3ab18a0dc67f0141c73b30593c3a0154dd4
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,28 @@
|
|
1
|
+
## 0.5.1 (2021-02-08)
|
2
|
+
|
3
|
+
- Fixed error with validation sets without early stopping
|
4
|
+
|
5
|
+
## 0.5.0 (2020-12-12)
|
6
|
+
|
7
|
+
- Updated XGBoost to 1.3.0
|
8
|
+
|
9
|
+
## 0.4.1 (2020-08-26)
|
10
|
+
|
11
|
+
- Updated XGBoost to 1.2.0
|
12
|
+
|
13
|
+
## 0.4.0 (2020-05-17)
|
14
|
+
|
15
|
+
- Updated XGBoost to 1.1.0
|
16
|
+
- Changed default `learning_rate` and `max_depth` for Scikit-Learn API to match Python
|
17
|
+
- Added support for Rover
|
18
|
+
- Improved performance of Numo datasets
|
19
|
+
- Improved error message when OpenMP not found on Mac
|
20
|
+
|
21
|
+
## 0.3.1 (2020-04-16)
|
22
|
+
|
23
|
+
- Added `feature_names` and `feature_types` to `DMatrix`
|
24
|
+
- Added feature names to `dump`
|
25
|
+
|
1
26
|
## 0.3.0 (2020-02-19)
|
2
27
|
|
3
28
|
- Updated XGBoost to 1.0.0
|
data/NOTICE.txt
CHANGED
data/README.md
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
[XGBoost](https://github.com/dmlc/xgboost) - high performance gradient boosting - for Ruby
|
4
4
|
|
5
|
-
[![Build Status](https://
|
5
|
+
[![Build Status](https://github.com/ankane/xgboost/workflows/build/badge.svg?branch=master)](https://github.com/ankane/xgboost/actions)
|
6
6
|
|
7
7
|
## Installation
|
8
8
|
|
@@ -137,16 +137,22 @@ Data can be an array of arrays
|
|
137
137
|
[[1, 2, 3], [4, 5, 6]]
|
138
138
|
```
|
139
139
|
|
140
|
-
Or a
|
140
|
+
Or a Numo array
|
141
141
|
|
142
142
|
```ruby
|
143
|
-
|
143
|
+
Numo::NArray.cast([[1, 2, 3], [4, 5, 6]])
|
144
|
+
```
|
145
|
+
|
146
|
+
Or a Rover data frame
|
147
|
+
|
148
|
+
```ruby
|
149
|
+
Rover.read_csv("houses.csv")
|
144
150
|
```
|
145
151
|
|
146
|
-
Or a
|
152
|
+
Or a Daru data frame
|
147
153
|
|
148
154
|
```ruby
|
149
|
-
|
155
|
+
Daru::DataFrame.from_csv("houses.csv")
|
150
156
|
```
|
151
157
|
|
152
158
|
## Helpful Resources
|
data/lib/xgboost.rb
CHANGED
@@ -31,7 +31,8 @@ module XGBoost
|
|
31
31
|
booster = Booster.new(params: params)
|
32
32
|
num_feature = dtrain.num_col
|
33
33
|
booster.set_param("num_feature", num_feature)
|
34
|
-
booster.feature_names =
|
34
|
+
booster.feature_names = dtrain.feature_names
|
35
|
+
booster.feature_types = dtrain.feature_types
|
35
36
|
evals ||= []
|
36
37
|
|
37
38
|
if early_stopping_rounds
|
@@ -60,7 +61,7 @@ module XGBoost
|
|
60
61
|
best_score = score
|
61
62
|
best_iter = iteration
|
62
63
|
best_message = message
|
63
|
-
elsif iteration - best_iter >= early_stopping_rounds
|
64
|
+
elsif early_stopping_rounds && iteration - best_iter >= early_stopping_rounds
|
64
65
|
booster.best_iteration = best_iter
|
65
66
|
puts "Stopping. Best iteration:\n#{best_message}" if verbose_eval
|
66
67
|
break
|
data/lib/xgboost/booster.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
module XGBoost
|
2
2
|
class Booster
|
3
|
-
attr_accessor :best_iteration, :feature_names
|
3
|
+
attr_accessor :best_iteration, :feature_names, :feature_types
|
4
4
|
|
5
5
|
def initialize(params: nil, model_file: nil)
|
6
6
|
@handle = ::FFI::MemoryPointer.new(:pointer)
|
@@ -25,11 +25,8 @@ module XGBoost
|
|
25
25
|
end
|
26
26
|
|
27
27
|
def eval_set(evals, iteration)
|
28
|
-
dmats =
|
29
|
-
|
30
|
-
|
31
|
-
evnames = ::FFI::MemoryPointer.new(:pointer, evals.size)
|
32
|
-
evnames.write_array_of_pointer(evals.map { |v| ::FFI::MemoryPointer.from_string(v[1]) })
|
28
|
+
dmats = array_of_pointers(evals.map { |v| v[0].handle_pointer })
|
29
|
+
evnames = array_of_pointers(evals.map { |v| string_pointer(v[1]) })
|
33
30
|
|
34
31
|
out_result = ::FFI::MemoryPointer.new(:pointer)
|
35
32
|
|
@@ -67,7 +64,13 @@ module XGBoost
|
|
67
64
|
def dump(fmap: "", with_stats: false, dump_format: "text")
|
68
65
|
out_len = ::FFI::MemoryPointer.new(:uint64)
|
69
66
|
out_result = ::FFI::MemoryPointer.new(:pointer)
|
70
|
-
|
67
|
+
|
68
|
+
names = feature_names || []
|
69
|
+
fnames = array_of_pointers(names.map { |fname| string_pointer(fname) })
|
70
|
+
ftypes = array_of_pointers(feature_types || Array.new(names.size, string_pointer("float")))
|
71
|
+
|
72
|
+
check_result FFI.XGBoosterDumpModelExWithFeatures(handle_pointer, names.size, fnames, ftypes, with_stats ? 1 : 0, dump_format, out_len, out_result)
|
73
|
+
|
71
74
|
out_result.read_pointer.get_array_of_string(0, read_uint64(out_len))
|
72
75
|
end
|
73
76
|
|
@@ -155,7 +158,7 @@ module XGBoost
|
|
155
158
|
end
|
156
159
|
|
157
160
|
def [](key_name)
|
158
|
-
key =
|
161
|
+
key = string_pointer(key_name)
|
159
162
|
success = ::FFI::MemoryPointer.new(:int)
|
160
163
|
out_result = ::FFI::MemoryPointer.new(:pointer)
|
161
164
|
|
@@ -165,8 +168,8 @@ module XGBoost
|
|
165
168
|
end
|
166
169
|
|
167
170
|
def []=(key_name, raw_value)
|
168
|
-
key =
|
169
|
-
value = raw_value.nil? ? nil :
|
171
|
+
key = string_pointer(key_name)
|
172
|
+
value = raw_value.nil? ? nil : string_pointer(raw_value)
|
170
173
|
|
171
174
|
check_result FFI.XGBoosterSetAttr(handle_pointer, key, value)
|
172
175
|
end
|
@@ -188,6 +191,14 @@ module XGBoost
|
|
188
191
|
@handle.read_pointer
|
189
192
|
end
|
190
193
|
|
194
|
+
def array_of_pointers(values)
|
195
|
+
::FFI::MemoryPointer.new(:pointer, values.size).write_array_of_pointer(values)
|
196
|
+
end
|
197
|
+
|
198
|
+
def string_pointer(value)
|
199
|
+
::FFI::MemoryPointer.from_string(value.to_s)
|
200
|
+
end
|
201
|
+
|
191
202
|
include Utils
|
192
203
|
end
|
193
204
|
end
|
data/lib/xgboost/classifier.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
module XGBoost
|
2
2
|
class Classifier < Model
|
3
|
-
def initialize(
|
3
|
+
def initialize(n_estimators: 100, objective: "binary:logistic", importance_type: "gain", **options)
|
4
4
|
super
|
5
5
|
end
|
6
6
|
|
data/lib/xgboost/dmatrix.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
module XGBoost
|
2
2
|
class DMatrix
|
3
|
-
attr_reader :data
|
3
|
+
attr_reader :data, :feature_names, :feature_types
|
4
4
|
|
5
5
|
def initialize(data, label: nil, weight: nil, missing: Float::NAN)
|
6
6
|
@data = data
|
@@ -15,21 +15,42 @@ module XGBoost
|
|
15
15
|
elsif daru?(data)
|
16
16
|
nrow, ncol = data.shape
|
17
17
|
flat_data = data.map_rows(&:to_a).flatten
|
18
|
-
|
18
|
+
@feature_names = data.each_vector.map(&:name)
|
19
|
+
@feature_types =
|
20
|
+
data.each_vector.map(&:db_type).map do |v|
|
21
|
+
case v
|
22
|
+
when "INTEGER"
|
23
|
+
"int"
|
24
|
+
when "DOUBLE"
|
25
|
+
"float"
|
26
|
+
else
|
27
|
+
raise Error, "Unknown feature type: #{v}"
|
28
|
+
end
|
29
|
+
end
|
30
|
+
elsif numo?(data)
|
19
31
|
nrow, ncol = data.shape
|
20
|
-
|
32
|
+
elsif rover?(data)
|
33
|
+
nrow, ncol = data.shape
|
34
|
+
@feature_names = data.keys
|
35
|
+
data = data.to_numo
|
21
36
|
else
|
22
37
|
nrow = data.count
|
23
38
|
ncol = data.first.count
|
24
39
|
flat_data = data.flatten
|
25
40
|
end
|
26
41
|
|
27
|
-
handle_missing(flat_data, missing)
|
28
42
|
c_data = ::FFI::MemoryPointer.new(:float, nrow * ncol)
|
29
|
-
|
43
|
+
if numo?(data)
|
44
|
+
c_data.write_bytes(data.cast_to(Numo::SFloat).to_string)
|
45
|
+
else
|
46
|
+
handle_missing(flat_data, missing)
|
47
|
+
c_data.write_array_of_float(flat_data)
|
48
|
+
end
|
30
49
|
check_result FFI.XGDMatrixCreateFromMat(c_data, nrow, ncol, missing, @handle)
|
31
50
|
|
32
51
|
ObjectSpace.define_finalizer(self, self.class.finalize(handle_pointer))
|
52
|
+
|
53
|
+
@feature_names ||= ncol.times.map { |i| "f#{i}" }
|
33
54
|
end
|
34
55
|
|
35
56
|
self.label = label if label
|
@@ -120,10 +141,14 @@ module XGBoost
|
|
120
141
|
defined?(Daru::DataFrame) && data.is_a?(Daru::DataFrame)
|
121
142
|
end
|
122
143
|
|
123
|
-
def
|
144
|
+
def numo?(data)
|
124
145
|
defined?(Numo::NArray) && data.is_a?(Numo::NArray)
|
125
146
|
end
|
126
147
|
|
148
|
+
def rover?(data)
|
149
|
+
defined?(Rover::DataFrame) && data.is_a?(Rover::DataFrame)
|
150
|
+
end
|
151
|
+
|
127
152
|
def handle_missing(data, missing)
|
128
153
|
data.map! { |v| v.nil? ? missing : v }
|
129
154
|
end
|
data/lib/xgboost/ffi.rb
CHANGED
@@ -2,7 +2,15 @@ module XGBoost
|
|
2
2
|
module FFI
|
3
3
|
extend ::FFI::Library
|
4
4
|
|
5
|
-
|
5
|
+
begin
|
6
|
+
ffi_lib XGBoost.ffi_lib
|
7
|
+
rescue LoadError => e
|
8
|
+
if e.message.include?("Library not loaded: /usr/local/opt/libomp/lib/libomp.dylib") && e.message.include?("Reason: image not found")
|
9
|
+
raise LoadError, "OpenMP not found. Run `brew install libomp`"
|
10
|
+
else
|
11
|
+
raise e
|
12
|
+
end
|
13
|
+
end
|
6
14
|
|
7
15
|
# https://github.com/dmlc/xgboost/blob/master/include/xgboost/c_api.h
|
8
16
|
# keep same order
|
@@ -31,7 +39,7 @@ module XGBoost
|
|
31
39
|
attach_function :XGBoosterPredict, %i[pointer pointer int int int pointer pointer], :int
|
32
40
|
attach_function :XGBoosterLoadModel, %i[pointer string], :int
|
33
41
|
attach_function :XGBoosterSaveModel, %i[pointer string], :int
|
34
|
-
attach_function :
|
42
|
+
attach_function :XGBoosterDumpModelExWithFeatures, %i[pointer int pointer pointer int string pointer pointer], :int
|
35
43
|
attach_function :XGBoosterGetAttr, %i[pointer pointer pointer pointer], :int
|
36
44
|
attach_function :XGBoosterSetAttr, %i[pointer pointer pointer], :int
|
37
45
|
attach_function :XGBoosterGetAttrNames, %i[pointer pointer pointer], :int
|
data/lib/xgboost/regressor.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
module XGBoost
|
2
2
|
class Regressor < Model
|
3
|
-
def initialize(
|
3
|
+
def initialize(n_estimators: 100, objective: "reg:squarederror", importance_type: "gain", **options)
|
4
4
|
super
|
5
5
|
end
|
6
6
|
|
data/lib/xgboost/version.rb
CHANGED
data/vendor/libxgboost.dylib
CHANGED
Binary file
|
data/vendor/libxgboost.so
CHANGED
Binary file
|
data/vendor/xgboost.dll
CHANGED
Binary file
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: xgb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-02-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ffi
|
@@ -24,78 +24,8 @@ dependencies:
|
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '0'
|
27
|
-
|
28
|
-
|
29
|
-
requirement: !ruby/object:Gem::Requirement
|
30
|
-
requirements:
|
31
|
-
- - ">="
|
32
|
-
- !ruby/object:Gem::Version
|
33
|
-
version: '0'
|
34
|
-
type: :development
|
35
|
-
prerelease: false
|
36
|
-
version_requirements: !ruby/object:Gem::Requirement
|
37
|
-
requirements:
|
38
|
-
- - ">="
|
39
|
-
- !ruby/object:Gem::Version
|
40
|
-
version: '0'
|
41
|
-
- !ruby/object:Gem::Dependency
|
42
|
-
name: rake
|
43
|
-
requirement: !ruby/object:Gem::Requirement
|
44
|
-
requirements:
|
45
|
-
- - ">="
|
46
|
-
- !ruby/object:Gem::Version
|
47
|
-
version: '0'
|
48
|
-
type: :development
|
49
|
-
prerelease: false
|
50
|
-
version_requirements: !ruby/object:Gem::Requirement
|
51
|
-
requirements:
|
52
|
-
- - ">="
|
53
|
-
- !ruby/object:Gem::Version
|
54
|
-
version: '0'
|
55
|
-
- !ruby/object:Gem::Dependency
|
56
|
-
name: minitest
|
57
|
-
requirement: !ruby/object:Gem::Requirement
|
58
|
-
requirements:
|
59
|
-
- - ">="
|
60
|
-
- !ruby/object:Gem::Version
|
61
|
-
version: '5'
|
62
|
-
type: :development
|
63
|
-
prerelease: false
|
64
|
-
version_requirements: !ruby/object:Gem::Requirement
|
65
|
-
requirements:
|
66
|
-
- - ">="
|
67
|
-
- !ruby/object:Gem::Version
|
68
|
-
version: '5'
|
69
|
-
- !ruby/object:Gem::Dependency
|
70
|
-
name: daru
|
71
|
-
requirement: !ruby/object:Gem::Requirement
|
72
|
-
requirements:
|
73
|
-
- - ">="
|
74
|
-
- !ruby/object:Gem::Version
|
75
|
-
version: '0'
|
76
|
-
type: :development
|
77
|
-
prerelease: false
|
78
|
-
version_requirements: !ruby/object:Gem::Requirement
|
79
|
-
requirements:
|
80
|
-
- - ">="
|
81
|
-
- !ruby/object:Gem::Version
|
82
|
-
version: '0'
|
83
|
-
- !ruby/object:Gem::Dependency
|
84
|
-
name: numo-narray
|
85
|
-
requirement: !ruby/object:Gem::Requirement
|
86
|
-
requirements:
|
87
|
-
- - ">="
|
88
|
-
- !ruby/object:Gem::Version
|
89
|
-
version: '0'
|
90
|
-
type: :development
|
91
|
-
prerelease: false
|
92
|
-
version_requirements: !ruby/object:Gem::Requirement
|
93
|
-
requirements:
|
94
|
-
- - ">="
|
95
|
-
- !ruby/object:Gem::Version
|
96
|
-
version: '0'
|
97
|
-
description:
|
98
|
-
email: andrew@chartkick.com
|
27
|
+
description:
|
28
|
+
email: andrew@ankane.org
|
99
29
|
executables: []
|
100
30
|
extensions: []
|
101
31
|
extra_rdoc_files: []
|
@@ -123,7 +53,7 @@ homepage: https://github.com/ankane/xgboost
|
|
123
53
|
licenses:
|
124
54
|
- Apache-2.0
|
125
55
|
metadata: {}
|
126
|
-
post_install_message:
|
56
|
+
post_install_message:
|
127
57
|
rdoc_options: []
|
128
58
|
require_paths:
|
129
59
|
- lib
|
@@ -138,8 +68,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
138
68
|
- !ruby/object:Gem::Version
|
139
69
|
version: '0'
|
140
70
|
requirements: []
|
141
|
-
rubygems_version: 3.
|
142
|
-
signing_key:
|
71
|
+
rubygems_version: 3.2.3
|
72
|
+
signing_key:
|
143
73
|
specification_version: 4
|
144
74
|
summary: High performance gradient boosting for Ruby
|
145
75
|
test_files: []
|