xgb 0.8.0 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/NOTICE.txt +2 -2
- data/README.md +3 -2
- data/lib/xgboost/booster.rb +181 -65
- data/lib/xgboost/callback_container.rb +145 -0
- data/lib/xgboost/classifier.rb +1 -1
- data/lib/xgboost/cv_pack.rb +26 -0
- data/lib/xgboost/dmatrix.rb +190 -78
- data/lib/xgboost/early_stopping.rb +132 -0
- data/lib/xgboost/evaluation_monitor.rb +44 -0
- data/lib/xgboost/ffi.rb +12 -2
- data/lib/xgboost/model.rb +2 -1
- data/lib/xgboost/packed_booster.rb +51 -0
- data/lib/xgboost/regressor.rb +1 -1
- data/lib/xgboost/training_callback.rb +23 -0
- data/lib/xgboost/utils.rb +19 -4
- data/lib/xgboost/version.rb +1 -1
- data/lib/xgboost.rb +107 -112
- data/vendor/aarch64-linux/libxgboost.so +0 -0
- data/vendor/arm64-darwin/libxgboost.dylib +0 -0
- data/vendor/x64-mingw/xgboost.dll +0 -0
- data/vendor/x86_64-darwin/libxgboost.dylib +0 -0
- data/vendor/x86_64-linux/libxgboost.so +0 -0
- data/vendor/x86_64-linux-musl/libxgboost.so +0 -0
- metadata +10 -14
- data/vendor/aarch64-linux/LICENSE-rabit.txt +0 -28
- data/vendor/arm64-darwin/LICENSE-rabit.txt +0 -28
- data/vendor/x64-mingw/LICENSE-rabit.txt +0 -28
- data/vendor/x86_64-darwin/LICENSE-rabit.txt +0 -28
- data/vendor/x86_64-linux/LICENSE-rabit.txt +0 -28
- data/vendor/x86_64-linux-musl/LICENSE-rabit.txt +0 -28
data/lib/xgboost/dmatrix.rb
CHANGED
@@ -1,77 +1,70 @@
|
|
1
1
|
module XGBoost
|
2
2
|
class DMatrix
|
3
|
-
|
3
|
+
include Utils
|
4
|
+
|
5
|
+
attr_reader :handle
|
4
6
|
|
5
7
|
def initialize(data, label: nil, weight: nil, missing: Float::NAN)
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
end
|
8
|
+
if data.is_a?(::FFI::AutoPointer)
|
9
|
+
@handle = data
|
10
|
+
return
|
11
|
+
end
|
12
|
+
|
13
|
+
if matrix?(data)
|
14
|
+
nrow = data.row_count
|
15
|
+
ncol = data.column_count
|
16
|
+
flat_data = data.to_a.flatten
|
17
|
+
elsif daru?(data)
|
18
|
+
nrow, ncol = data.shape
|
19
|
+
flat_data = data.map_rows(&:to_a).flatten
|
20
|
+
feature_names = data.each_vector.map(&:name)
|
21
|
+
feature_types =
|
22
|
+
data.each_vector.map(&:db_type).map do |v|
|
23
|
+
case v
|
24
|
+
when "INTEGER"
|
25
|
+
"int"
|
26
|
+
when "DOUBLE"
|
27
|
+
"float"
|
28
|
+
else
|
29
|
+
raise Error, "Unknown feature type: #{v}"
|
29
30
|
end
|
30
|
-
elsif numo?(data)
|
31
|
-
nrow, ncol = data.shape
|
32
|
-
elsif rover?(data)
|
33
|
-
nrow, ncol = data.shape
|
34
|
-
@feature_names = data.keys
|
35
|
-
data = data.to_numo
|
36
|
-
else
|
37
|
-
nrow = data.count
|
38
|
-
ncol = data.first.count
|
39
|
-
if !data.all? { |r| r.size == ncol }
|
40
|
-
# TODO raise ArgumentError in 0.8.0
|
41
|
-
raise IndexError, "Rows have different sizes"
|
42
31
|
end
|
43
|
-
|
32
|
+
elsif numo?(data)
|
33
|
+
nrow, ncol = data.shape
|
34
|
+
elsif rover?(data)
|
35
|
+
nrow, ncol = data.shape
|
36
|
+
feature_names = data.keys
|
37
|
+
data = data.to_numo
|
38
|
+
else
|
39
|
+
nrow = data.count
|
40
|
+
ncol = data.first.count
|
41
|
+
if !data.all? { |r| r.size == ncol }
|
42
|
+
raise ArgumentError, "Rows have different sizes"
|
44
43
|
end
|
44
|
+
flat_data = data.flatten
|
45
|
+
end
|
45
46
|
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
check_result FFI.XGDMatrixCreateFromMat(c_data, nrow, ncol, missing, @handle)
|
47
|
+
c_data = ::FFI::MemoryPointer.new(:float, nrow * ncol)
|
48
|
+
if numo?(data)
|
49
|
+
c_data.write_bytes(data.cast_to(Numo::SFloat).to_string)
|
50
|
+
else
|
51
|
+
handle_missing(flat_data, missing)
|
52
|
+
c_data.write_array_of_float(flat_data)
|
53
|
+
end
|
54
54
|
|
55
|
-
|
55
|
+
out = ::FFI::MemoryPointer.new(:pointer)
|
56
|
+
check_call FFI.XGDMatrixCreateFromMat(c_data, nrow, ncol, missing, out)
|
57
|
+
@handle = ::FFI::AutoPointer.new(out.read_pointer, FFI.method(:XGDMatrixFree))
|
56
58
|
|
57
|
-
|
58
|
-
|
59
|
+
self.feature_names = feature_names || ncol.times.map { |i| "f#{i}" }
|
60
|
+
self.feature_types = feature_types if feature_types
|
59
61
|
|
60
62
|
self.label = label if label
|
61
63
|
self.weight = weight if weight
|
62
64
|
end
|
63
65
|
|
64
|
-
def
|
65
|
-
|
66
|
-
proc { FFI.XGDMatrixFree(::FFI::Pointer.new(:pointer, addr)) }
|
67
|
-
end
|
68
|
-
|
69
|
-
def label
|
70
|
-
float_info("label")
|
71
|
-
end
|
72
|
-
|
73
|
-
def weight
|
74
|
-
float_info("weight")
|
66
|
+
def save_binary(fname, silent: true)
|
67
|
+
check_call FFI.XGDMatrixSaveBinary(handle, fname, silent ? 1 : 0)
|
75
68
|
end
|
76
69
|
|
77
70
|
def label=(label)
|
@@ -85,39 +78,146 @@ module XGBoost
|
|
85
78
|
def group=(group)
|
86
79
|
c_data = ::FFI::MemoryPointer.new(:int, group.size)
|
87
80
|
c_data.write_array_of_int(group)
|
88
|
-
|
81
|
+
check_call FFI.XGDMatrixSetUIntInfo(handle, "group", c_data, group.size)
|
82
|
+
end
|
83
|
+
|
84
|
+
def label
|
85
|
+
float_info("label")
|
86
|
+
end
|
87
|
+
|
88
|
+
def weight
|
89
|
+
float_info("weight")
|
89
90
|
end
|
90
91
|
|
91
92
|
def num_row
|
92
93
|
out = ::FFI::MemoryPointer.new(:uint64)
|
93
|
-
|
94
|
-
read_uint64
|
94
|
+
check_call FFI.XGDMatrixNumRow(handle, out)
|
95
|
+
out.read_uint64
|
95
96
|
end
|
96
97
|
|
97
98
|
def num_col
|
98
99
|
out = ::FFI::MemoryPointer.new(:uint64)
|
99
|
-
|
100
|
-
read_uint64
|
100
|
+
check_call FFI.XGDMatrixNumCol(handle, out)
|
101
|
+
out.read_uint64
|
102
|
+
end
|
103
|
+
|
104
|
+
def num_nonmissing
|
105
|
+
out = ::FFI::MemoryPointer.new(:uint64)
|
106
|
+
check_call FFI.XGDMatrixNumNonMissing(handle, out)
|
107
|
+
out.read_uint64
|
108
|
+
end
|
109
|
+
|
110
|
+
def data_split_mode
|
111
|
+
out = ::FFI::MemoryPointer.new(:uint64)
|
112
|
+
check_call FFI.XGDMatrixDataSplitMode(handle, out)
|
113
|
+
out.read_uint64 == 0 ? :row : :col
|
101
114
|
end
|
102
115
|
|
103
116
|
def slice(rindex)
|
104
|
-
res = DMatrix.new(nil)
|
105
117
|
idxset = ::FFI::MemoryPointer.new(:int, rindex.count)
|
106
118
|
idxset.write_array_of_int(rindex)
|
107
|
-
|
108
|
-
|
119
|
+
out = ::FFI::MemoryPointer.new(:pointer)
|
120
|
+
check_call FFI.XGDMatrixSliceDMatrix(handle, idxset, rindex.size, out)
|
121
|
+
|
122
|
+
handle = ::FFI::AutoPointer.new(out.read_pointer, FFI.method(:XGDMatrixFree))
|
123
|
+
DMatrix.new(handle)
|
109
124
|
end
|
110
125
|
|
111
|
-
def
|
112
|
-
|
126
|
+
def feature_names
|
127
|
+
length = ::FFI::MemoryPointer.new(:uint64)
|
128
|
+
sarr = ::FFI::MemoryPointer.new(:pointer)
|
129
|
+
check_call(
|
130
|
+
FFI.XGDMatrixGetStrFeatureInfo(
|
131
|
+
handle,
|
132
|
+
"feature_name",
|
133
|
+
length,
|
134
|
+
sarr
|
135
|
+
)
|
136
|
+
)
|
137
|
+
feature_names = from_cstr_to_rbstr(sarr, length)
|
138
|
+
feature_names.empty? ? nil : feature_names
|
113
139
|
end
|
114
140
|
|
115
|
-
def
|
116
|
-
|
141
|
+
def feature_names=(feature_names)
|
142
|
+
if feature_names.nil?
|
143
|
+
check_call(
|
144
|
+
FFI.XGDMatrixSetStrFeatureInfo(
|
145
|
+
handle, "feature_name", nil, 0
|
146
|
+
)
|
147
|
+
)
|
148
|
+
return
|
149
|
+
end
|
150
|
+
|
151
|
+
# validate feature name
|
152
|
+
feature_names =
|
153
|
+
validate_feature_info(
|
154
|
+
feature_names,
|
155
|
+
num_col,
|
156
|
+
data_split_mode == :col,
|
157
|
+
"feature names"
|
158
|
+
)
|
159
|
+
if feature_names.length != feature_names.uniq.length
|
160
|
+
raise ArgumentError, "feature_names must be unique"
|
161
|
+
end
|
162
|
+
|
163
|
+
# prohibit the use symbols that may affect parsing. e.g. []<
|
164
|
+
if !feature_names.all? { |f| f.is_a?(String) && !["[", "]", "<"].any? { |x| f.include?(x) } }
|
165
|
+
raise ArgumentError, "feature_names must be string, and may not contain [, ] or <"
|
166
|
+
end
|
167
|
+
|
168
|
+
c_feature_names = array_of_pointers(feature_names.map { |f| string_pointer(f) })
|
169
|
+
check_call(
|
170
|
+
FFI.XGDMatrixSetStrFeatureInfo(
|
171
|
+
handle,
|
172
|
+
"feature_name",
|
173
|
+
c_feature_names,
|
174
|
+
feature_names.length
|
175
|
+
)
|
176
|
+
)
|
177
|
+
end
|
178
|
+
|
179
|
+
def feature_types
|
180
|
+
length = ::FFI::MemoryPointer.new(:uint64)
|
181
|
+
sarr = ::FFI::MemoryPointer.new(:pointer)
|
182
|
+
check_call(
|
183
|
+
FFI.XGDMatrixGetStrFeatureInfo(
|
184
|
+
handle,
|
185
|
+
"feature_type",
|
186
|
+
length,
|
187
|
+
sarr
|
188
|
+
)
|
189
|
+
)
|
190
|
+
res = from_cstr_to_rbstr(sarr, length)
|
191
|
+
res.empty? ? nil : res
|
117
192
|
end
|
118
193
|
|
119
|
-
def
|
120
|
-
|
194
|
+
def feature_types=(feature_types)
|
195
|
+
if feature_types.nil?
|
196
|
+
check_call(
|
197
|
+
FFI.XGDMatrixSetStrFeatureInfo(
|
198
|
+
handle, "feature_type", nil, 0
|
199
|
+
)
|
200
|
+
)
|
201
|
+
return
|
202
|
+
end
|
203
|
+
|
204
|
+
feature_types =
|
205
|
+
validate_feature_info(
|
206
|
+
feature_types,
|
207
|
+
num_col,
|
208
|
+
data_split_mode == :col,
|
209
|
+
"feature types"
|
210
|
+
)
|
211
|
+
|
212
|
+
c_feature_types = array_of_pointers(feature_types.map { |f| string_pointer(f) })
|
213
|
+
check_call(
|
214
|
+
FFI.XGDMatrixSetStrFeatureInfo(
|
215
|
+
handle,
|
216
|
+
"feature_type",
|
217
|
+
c_feature_types,
|
218
|
+
feature_types.length
|
219
|
+
)
|
220
|
+
)
|
121
221
|
end
|
122
222
|
|
123
223
|
private
|
@@ -126,17 +226,31 @@ module XGBoost
|
|
126
226
|
data = data.to_a unless data.is_a?(Array)
|
127
227
|
c_data = ::FFI::MemoryPointer.new(:float, data.size)
|
128
228
|
c_data.write_array_of_float(data)
|
129
|
-
|
229
|
+
check_call FFI.XGDMatrixSetFloatInfo(handle, field.to_s, c_data, data.size)
|
130
230
|
end
|
131
231
|
|
132
232
|
def float_info(field)
|
133
233
|
num_row ||= num_row()
|
134
|
-
out_len = ::FFI::MemoryPointer.new(:
|
234
|
+
out_len = ::FFI::MemoryPointer.new(:uint64)
|
135
235
|
out_dptr = ::FFI::MemoryPointer.new(:float, num_row)
|
136
|
-
|
236
|
+
check_call FFI.XGDMatrixGetFloatInfo(handle, field, out_len, out_dptr)
|
137
237
|
out_dptr.read_pointer.read_array_of_float(num_row)
|
138
238
|
end
|
139
239
|
|
240
|
+
def validate_feature_info(feature_info, n_features, is_column_split, name)
|
241
|
+
if !feature_info.is_a?(Array)
|
242
|
+
raise TypeError, "Expecting an array of strings for #{name}, got: #{feature_info.class.name}"
|
243
|
+
end
|
244
|
+
if feature_info.length != n_features && n_features != 0 && !is_column_split
|
245
|
+
msg = (
|
246
|
+
"#{name} must have the same length as the number of data columns, " +
|
247
|
+
"expected #{n_features}, got #{feature_info.length}"
|
248
|
+
)
|
249
|
+
raise ArgumentError, msg
|
250
|
+
end
|
251
|
+
feature_info
|
252
|
+
end
|
253
|
+
|
140
254
|
def matrix?(data)
|
141
255
|
defined?(Matrix) && data.is_a?(Matrix)
|
142
256
|
end
|
@@ -156,7 +270,5 @@ module XGBoost
|
|
156
270
|
def handle_missing(data, missing)
|
157
271
|
data.map! { |v| v.nil? ? missing : v }
|
158
272
|
end
|
159
|
-
|
160
|
-
include Utils
|
161
273
|
end
|
162
274
|
end
|
@@ -0,0 +1,132 @@
|
|
1
|
+
module XGBoost
|
2
|
+
class EarlyStopping < TrainingCallback
|
3
|
+
def initialize(
|
4
|
+
rounds:,
|
5
|
+
metric_name: nil,
|
6
|
+
data_name: nil,
|
7
|
+
maximize: nil,
|
8
|
+
save_best: false,
|
9
|
+
min_delta: 0.0
|
10
|
+
)
|
11
|
+
@data = data_name
|
12
|
+
@metric_name = metric_name
|
13
|
+
@rounds = rounds
|
14
|
+
@save_best = save_best
|
15
|
+
@maximize = maximize
|
16
|
+
@stopping_history = {}
|
17
|
+
@min_delta = min_delta
|
18
|
+
if @min_delta < 0
|
19
|
+
raise ArgumentError, "min_delta must be greater or equal to 0."
|
20
|
+
end
|
21
|
+
|
22
|
+
@current_rounds = 0
|
23
|
+
@best_scores = {}
|
24
|
+
@starting_round = 0
|
25
|
+
super()
|
26
|
+
end
|
27
|
+
|
28
|
+
def before_training(model)
|
29
|
+
@starting_round = model.num_boosted_rounds
|
30
|
+
model
|
31
|
+
end
|
32
|
+
|
33
|
+
def after_iteration(model, epoch, evals_log)
|
34
|
+
epoch += @starting_round
|
35
|
+
msg = "Must have at least 1 validation dataset for early stopping."
|
36
|
+
if evals_log.keys.length < 1
|
37
|
+
raise ArgumentError, msg
|
38
|
+
end
|
39
|
+
|
40
|
+
# Get data name
|
41
|
+
if @data
|
42
|
+
data_name = @data
|
43
|
+
else
|
44
|
+
# Use the last one as default.
|
45
|
+
data_name = evals_log.keys[-1]
|
46
|
+
end
|
47
|
+
if !evals_log.include?(data_name)
|
48
|
+
raise ArgumentError, "No dataset named: #{data_name}"
|
49
|
+
end
|
50
|
+
|
51
|
+
if !data_name.is_a?(String)
|
52
|
+
raise TypeError, "The name of the dataset should be a string. Got: #{data_name.class.name}"
|
53
|
+
end
|
54
|
+
data_log = evals_log[data_name]
|
55
|
+
|
56
|
+
# Get metric name
|
57
|
+
if @metric_name
|
58
|
+
metric_name = @metric_name
|
59
|
+
else
|
60
|
+
# Use last metric by default.
|
61
|
+
metric_name = data_log.keys[-1]
|
62
|
+
end
|
63
|
+
if !data_log.include?(metric_name)
|
64
|
+
raise ArgumentError, "No metric named: #{metric_name}"
|
65
|
+
end
|
66
|
+
|
67
|
+
# The latest score
|
68
|
+
score = data_log[metric_name][-1]
|
69
|
+
update_rounds(
|
70
|
+
score, data_name, metric_name, model, epoch
|
71
|
+
)
|
72
|
+
end
|
73
|
+
|
74
|
+
def after_training(model)
|
75
|
+
if !@save_best
|
76
|
+
return model
|
77
|
+
end
|
78
|
+
|
79
|
+
best_iteration = model.best_iteration
|
80
|
+
best_score = model.best_score
|
81
|
+
# model = model[..(best_iteration + 1)]
|
82
|
+
model.best_iteration = best_iteration
|
83
|
+
model.best_score = best_score
|
84
|
+
model
|
85
|
+
end
|
86
|
+
|
87
|
+
private
|
88
|
+
|
89
|
+
def update_rounds(score, name, metric, model, epoch)
|
90
|
+
get_s = lambda do |value|
|
91
|
+
value.is_a?(Array) ? value[0] : value
|
92
|
+
end
|
93
|
+
|
94
|
+
maximize = lambda do |new_, best|
|
95
|
+
get_s.(new_) - @min_delta > get_s.(best)
|
96
|
+
end
|
97
|
+
|
98
|
+
minimize = lambda do |new_, best|
|
99
|
+
get_s.(best) - @min_delta > get_s.(new_)
|
100
|
+
end
|
101
|
+
|
102
|
+
improve_op = @maximize ? maximize : minimize
|
103
|
+
|
104
|
+
if @stopping_history.empty?
|
105
|
+
# First round
|
106
|
+
@current_rounds = 0
|
107
|
+
@stopping_history[name] = {}
|
108
|
+
@stopping_history[name][metric] = [score]
|
109
|
+
@best_scores[name] = {}
|
110
|
+
@best_scores[name][metric] = [score]
|
111
|
+
model.set_attr(best_score: get_s.(score), best_iteration: epoch)
|
112
|
+
elsif !improve_op.(score, @best_scores[name][metric][-1])
|
113
|
+
# Not improved
|
114
|
+
@stopping_history[name][metric] << score
|
115
|
+
@current_rounds += 1
|
116
|
+
else
|
117
|
+
# Improved
|
118
|
+
@stopping_history[name][metric] << score
|
119
|
+
@best_scores[name][metric] << score
|
120
|
+
record = @stopping_history[name][metric][-1]
|
121
|
+
model.set_attr(best_score: get_s.(record), best_iteration: epoch)
|
122
|
+
@current_rounds = 0
|
123
|
+
end
|
124
|
+
|
125
|
+
if @current_rounds >= @rounds
|
126
|
+
# Should stop
|
127
|
+
return true
|
128
|
+
end
|
129
|
+
false
|
130
|
+
end
|
131
|
+
end
|
132
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
module XGBoost
|
2
|
+
class EvaluationMonitor < TrainingCallback
|
3
|
+
def initialize(period:, show_stdv: false)
|
4
|
+
@show_stdv = show_stdv
|
5
|
+
@period = period
|
6
|
+
end
|
7
|
+
|
8
|
+
def after_iteration(model, epoch, evals_log)
|
9
|
+
if evals_log.empty?
|
10
|
+
return false
|
11
|
+
end
|
12
|
+
|
13
|
+
msg = "[#{epoch}]"
|
14
|
+
evals_log.each do |data, metric|
|
15
|
+
metric.each do |metric_name, log|
|
16
|
+
stdv = nil
|
17
|
+
if log[-1].is_a?(Array)
|
18
|
+
score = log[-1][0]
|
19
|
+
stdv = log[-1][1]
|
20
|
+
else
|
21
|
+
score = log[-1]
|
22
|
+
end
|
23
|
+
msg += fmt_metric(data, metric_name, score, stdv)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
msg += "\n"
|
27
|
+
|
28
|
+
if epoch % @period == 0
|
29
|
+
puts msg
|
30
|
+
end
|
31
|
+
false
|
32
|
+
end
|
33
|
+
|
34
|
+
private
|
35
|
+
|
36
|
+
def fmt_metric(data, metric, score, std)
|
37
|
+
if !std.nil? && @show_stdv
|
38
|
+
"\t%s:%.5f+%.5f" % [data + "-" + metric, score, std]
|
39
|
+
else
|
40
|
+
"\t%s:%.5f" % [data + "-" + metric, score]
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
data/lib/xgboost/ffi.rb
CHANGED
@@ -22,8 +22,12 @@ module XGBoost
|
|
22
22
|
# dmatrix
|
23
23
|
attach_function :XGDMatrixCreateFromMat, %i[pointer uint64 uint64 float pointer], :int
|
24
24
|
attach_function :XGDMatrixSetUIntInfo, %i[pointer string pointer uint64], :int
|
25
|
+
attach_function :XGDMatrixSetStrFeatureInfo, %i[pointer string pointer uint64], :int
|
26
|
+
attach_function :XGDMatrixGetStrFeatureInfo, %i[pointer string pointer pointer], :int
|
25
27
|
attach_function :XGDMatrixNumRow, %i[pointer pointer], :int
|
26
28
|
attach_function :XGDMatrixNumCol, %i[pointer pointer], :int
|
29
|
+
attach_function :XGDMatrixNumNonMissing, %i[pointer pointer], :int
|
30
|
+
attach_function :XGDMatrixDataSplitMode, %i[pointer pointer], :int
|
27
31
|
attach_function :XGDMatrixSliceDMatrix, %i[pointer pointer uint64 pointer], :int
|
28
32
|
attach_function :XGDMatrixFree, %i[pointer], :int
|
29
33
|
attach_function :XGDMatrixSaveBinary, %i[pointer string int], :int
|
@@ -35,13 +39,19 @@ module XGBoost
|
|
35
39
|
attach_function :XGBoosterUpdateOneIter, %i[pointer int pointer], :int
|
36
40
|
attach_function :XGBoosterEvalOneIter, %i[pointer int pointer pointer uint64 pointer], :int
|
37
41
|
attach_function :XGBoosterFree, %i[pointer], :int
|
42
|
+
attach_function :XGBoosterReset, %i[pointer], :int
|
43
|
+
attach_function :XGBoosterBoostedRounds, %i[pointer pointer], :int
|
38
44
|
attach_function :XGBoosterSetParam, %i[pointer string string], :int
|
45
|
+
attach_function :XGBoosterGetNumFeature, %i[pointer pointer], :int
|
39
46
|
attach_function :XGBoosterPredict, %i[pointer pointer int int int pointer pointer], :int
|
40
47
|
attach_function :XGBoosterLoadModel, %i[pointer string], :int
|
41
48
|
attach_function :XGBoosterSaveModel, %i[pointer string], :int
|
49
|
+
attach_function :XGBoosterSaveJsonConfig, %i[pointer pointer pointer], :int
|
42
50
|
attach_function :XGBoosterDumpModelExWithFeatures, %i[pointer int pointer pointer int string pointer pointer], :int
|
43
|
-
attach_function :XGBoosterGetAttr, %i[pointer
|
44
|
-
attach_function :XGBoosterSetAttr, %i[pointer
|
51
|
+
attach_function :XGBoosterGetAttr, %i[pointer string pointer pointer], :int
|
52
|
+
attach_function :XGBoosterSetAttr, %i[pointer string string], :int
|
45
53
|
attach_function :XGBoosterGetAttrNames, %i[pointer pointer pointer], :int
|
54
|
+
attach_function :XGBoosterSetStrFeatureInfo, %i[pointer string pointer uint64], :int
|
55
|
+
attach_function :XGBoosterGetStrFeatureInfo, %i[pointer string pointer pointer], :int
|
46
56
|
end
|
47
57
|
end
|
data/lib/xgboost/model.rb
CHANGED
@@ -2,10 +2,11 @@ module XGBoost
|
|
2
2
|
class Model
|
3
3
|
attr_reader :booster
|
4
4
|
|
5
|
-
def initialize(n_estimators: 100, importance_type: "gain", **options)
|
5
|
+
def initialize(n_estimators: 100, importance_type: "gain", early_stopping_rounds: nil, **options)
|
6
6
|
@params = options
|
7
7
|
@n_estimators = n_estimators
|
8
8
|
@importance_type = importance_type
|
9
|
+
@early_stopping_rounds = early_stopping_rounds
|
9
10
|
end
|
10
11
|
|
11
12
|
def predict(data)
|
@@ -0,0 +1,51 @@
|
|
1
|
+
module XGBoost
|
2
|
+
class PackedBooster
|
3
|
+
def initialize(cvfolds)
|
4
|
+
@cvfolds = cvfolds
|
5
|
+
end
|
6
|
+
|
7
|
+
def update(iteration)
|
8
|
+
@cvfolds.each do |fold|
|
9
|
+
fold.update(iteration)
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
def set_attr(**kwargs)
|
14
|
+
@cvfolds.each do |f|
|
15
|
+
f.bst.set_attr(**kwargs)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def attr(key)
|
20
|
+
@cvfolds[0].bst.attr(key)
|
21
|
+
end
|
22
|
+
|
23
|
+
def eval_set(iteration)
|
24
|
+
@cvfolds.map { |f| f.eval_set(iteration) }
|
25
|
+
end
|
26
|
+
|
27
|
+
def best_iteration
|
28
|
+
@cvfolds[0].bst.best_iteration
|
29
|
+
end
|
30
|
+
|
31
|
+
def best_iteration=(iteration)
|
32
|
+
@cvfolds.each do |fold|
|
33
|
+
fold.best_iteration = iteration
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def best_score
|
38
|
+
@cvfolds[0].bst.best_score
|
39
|
+
end
|
40
|
+
|
41
|
+
def best_score=(score)
|
42
|
+
@cvfolds.each do |fold|
|
43
|
+
fold.best_score = score
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def num_boosted_rounds
|
48
|
+
@cvfolds[0].num_boosted_rounds
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
data/lib/xgboost/regressor.rb
CHANGED
@@ -10,7 +10,7 @@ module XGBoost
|
|
10
10
|
|
11
11
|
@booster = XGBoost.train(@params, dtrain,
|
12
12
|
num_boost_round: @n_estimators,
|
13
|
-
early_stopping_rounds: early_stopping_rounds,
|
13
|
+
early_stopping_rounds: early_stopping_rounds || @early_stopping_rounds,
|
14
14
|
verbose_eval: verbose,
|
15
15
|
evals: evals
|
16
16
|
)
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module XGBoost
|
2
|
+
class TrainingCallback
|
3
|
+
def before_training(model)
|
4
|
+
# Run before training starts
|
5
|
+
model
|
6
|
+
end
|
7
|
+
|
8
|
+
def after_training(model)
|
9
|
+
# Run after training is finished
|
10
|
+
model
|
11
|
+
end
|
12
|
+
|
13
|
+
def before_iteration(model, epoch, evals_log)
|
14
|
+
# Run before each iteration. Returns true when training should stop.
|
15
|
+
false
|
16
|
+
end
|
17
|
+
|
18
|
+
def after_iteration(model, epoch, evals_log)
|
19
|
+
# Run after each iteration. Returns true when training should stop.
|
20
|
+
false
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
data/lib/xgboost/utils.rb
CHANGED
@@ -2,7 +2,7 @@ module XGBoost
|
|
2
2
|
module Utils
|
3
3
|
private
|
4
4
|
|
5
|
-
def
|
5
|
+
def check_call(err)
|
6
6
|
if err != 0
|
7
7
|
# make friendly
|
8
8
|
message = FFI.XGBGetLastError.split("\n").first.split(/:\d+: /, 2).last
|
@@ -10,9 +10,24 @@ module XGBoost
|
|
10
10
|
end
|
11
11
|
end
|
12
12
|
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
def array_of_pointers(values)
|
14
|
+
arr = ::FFI::MemoryPointer.new(:pointer, values.size)
|
15
|
+
arr.write_array_of_pointer(values)
|
16
|
+
# keep reference for string pointers
|
17
|
+
arr.instance_variable_set(:@xgboost_ref, values)
|
18
|
+
arr
|
19
|
+
end
|
20
|
+
|
21
|
+
def string_pointer(value)
|
22
|
+
::FFI::MemoryPointer.from_string(value.to_s)
|
23
|
+
end
|
24
|
+
|
25
|
+
def from_cstr_to_rbstr(data, length)
|
26
|
+
res = []
|
27
|
+
length.read_uint64.times do |i|
|
28
|
+
res << data.read_pointer[i * ::FFI::Pointer.size].read_pointer.read_string.force_encoding(Encoding::UTF_8)
|
29
|
+
end
|
30
|
+
res
|
16
31
|
end
|
17
32
|
end
|
18
33
|
end
|
data/lib/xgboost/version.rb
CHANGED