xgb 0.7.3 → 0.9.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +13 -0
- data/NOTICE.txt +1 -1
- data/README.md +5 -5
- data/lib/xgboost/booster.rb +176 -65
- data/lib/xgboost/callback_container.rb +145 -0
- data/lib/xgboost/cv_pack.rb +26 -0
- data/lib/xgboost/dmatrix.rb +190 -78
- data/lib/xgboost/early_stopping.rb +132 -0
- data/lib/xgboost/evaluation_monitor.rb +44 -0
- data/lib/xgboost/ffi.rb +11 -2
- data/lib/xgboost/packed_booster.rb +51 -0
- data/lib/xgboost/ranker.rb +1 -1
- data/lib/xgboost/training_callback.rb +23 -0
- data/lib/xgboost/utils.rb +19 -4
- data/lib/xgboost/version.rb +1 -1
- data/lib/xgboost.rb +107 -112
- data/vendor/aarch64-linux/libxgboost.so +0 -0
- data/vendor/arm64-darwin/libxgboost.dylib +0 -0
- data/vendor/x64-mingw/xgboost.dll +0 -0
- data/vendor/x86_64-darwin/libxgboost.dylib +0 -0
- data/vendor/x86_64-linux/libxgboost.so +0 -0
- data/vendor/x86_64-linux-musl/libxgboost.so +0 -0
- metadata +11 -11
- data/vendor/aarch64-linux/LICENSE-rabit.txt +0 -28
- data/vendor/arm64-darwin/LICENSE-rabit.txt +0 -28
- data/vendor/x64-mingw/LICENSE-rabit.txt +0 -28
- data/vendor/x86_64-darwin/LICENSE-rabit.txt +0 -28
- data/vendor/x86_64-linux/LICENSE-rabit.txt +0 -28
- data/vendor/x86_64-linux-musl/LICENSE-rabit.txt +0 -28
data/lib/xgboost/dmatrix.rb
CHANGED
@@ -1,77 +1,70 @@
|
|
1
1
|
module XGBoost
|
2
2
|
class DMatrix
|
3
|
-
|
3
|
+
include Utils
|
4
|
+
|
5
|
+
attr_reader :handle
|
4
6
|
|
5
7
|
def initialize(data, label: nil, weight: nil, missing: Float::NAN)
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
end
|
8
|
+
if data.is_a?(::FFI::AutoPointer)
|
9
|
+
@handle = data
|
10
|
+
return
|
11
|
+
end
|
12
|
+
|
13
|
+
if matrix?(data)
|
14
|
+
nrow = data.row_count
|
15
|
+
ncol = data.column_count
|
16
|
+
flat_data = data.to_a.flatten
|
17
|
+
elsif daru?(data)
|
18
|
+
nrow, ncol = data.shape
|
19
|
+
flat_data = data.map_rows(&:to_a).flatten
|
20
|
+
feature_names = data.each_vector.map(&:name)
|
21
|
+
feature_types =
|
22
|
+
data.each_vector.map(&:db_type).map do |v|
|
23
|
+
case v
|
24
|
+
when "INTEGER"
|
25
|
+
"int"
|
26
|
+
when "DOUBLE"
|
27
|
+
"float"
|
28
|
+
else
|
29
|
+
raise Error, "Unknown feature type: #{v}"
|
29
30
|
end
|
30
|
-
elsif numo?(data)
|
31
|
-
nrow, ncol = data.shape
|
32
|
-
elsif rover?(data)
|
33
|
-
nrow, ncol = data.shape
|
34
|
-
@feature_names = data.keys
|
35
|
-
data = data.to_numo
|
36
|
-
else
|
37
|
-
nrow = data.count
|
38
|
-
ncol = data.first.count
|
39
|
-
if !data.all? { |r| r.size == ncol }
|
40
|
-
# TODO raise ArgumentError in 0.8.0
|
41
|
-
raise IndexError, "Rows have different sizes"
|
42
31
|
end
|
43
|
-
|
32
|
+
elsif numo?(data)
|
33
|
+
nrow, ncol = data.shape
|
34
|
+
elsif rover?(data)
|
35
|
+
nrow, ncol = data.shape
|
36
|
+
feature_names = data.keys
|
37
|
+
data = data.to_numo
|
38
|
+
else
|
39
|
+
nrow = data.count
|
40
|
+
ncol = data.first.count
|
41
|
+
if !data.all? { |r| r.size == ncol }
|
42
|
+
raise ArgumentError, "Rows have different sizes"
|
44
43
|
end
|
44
|
+
flat_data = data.flatten
|
45
|
+
end
|
45
46
|
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
check_result FFI.XGDMatrixCreateFromMat(c_data, nrow, ncol, missing, @handle)
|
47
|
+
c_data = ::FFI::MemoryPointer.new(:float, nrow * ncol)
|
48
|
+
if numo?(data)
|
49
|
+
c_data.write_bytes(data.cast_to(Numo::SFloat).to_string)
|
50
|
+
else
|
51
|
+
handle_missing(flat_data, missing)
|
52
|
+
c_data.write_array_of_float(flat_data)
|
53
|
+
end
|
54
54
|
|
55
|
-
|
55
|
+
out = ::FFI::MemoryPointer.new(:pointer)
|
56
|
+
check_call FFI.XGDMatrixCreateFromMat(c_data, nrow, ncol, missing, out)
|
57
|
+
@handle = ::FFI::AutoPointer.new(out.read_pointer, FFI.method(:XGDMatrixFree))
|
56
58
|
|
57
|
-
|
58
|
-
|
59
|
+
self.feature_names = feature_names || ncol.times.map { |i| "f#{i}" }
|
60
|
+
self.feature_types = feature_types if feature_types
|
59
61
|
|
60
62
|
self.label = label if label
|
61
63
|
self.weight = weight if weight
|
62
64
|
end
|
63
65
|
|
64
|
-
def
|
65
|
-
|
66
|
-
proc { FFI.XGDMatrixFree(::FFI::Pointer.new(:pointer, addr)) }
|
67
|
-
end
|
68
|
-
|
69
|
-
def label
|
70
|
-
float_info("label")
|
71
|
-
end
|
72
|
-
|
73
|
-
def weight
|
74
|
-
float_info("weight")
|
66
|
+
def save_binary(fname, silent: true)
|
67
|
+
check_call FFI.XGDMatrixSaveBinary(handle, fname, silent ? 1 : 0)
|
75
68
|
end
|
76
69
|
|
77
70
|
def label=(label)
|
@@ -85,39 +78,146 @@ module XGBoost
|
|
85
78
|
def group=(group)
|
86
79
|
c_data = ::FFI::MemoryPointer.new(:int, group.size)
|
87
80
|
c_data.write_array_of_int(group)
|
88
|
-
|
81
|
+
check_call FFI.XGDMatrixSetUIntInfo(handle, "group", c_data, group.size)
|
82
|
+
end
|
83
|
+
|
84
|
+
def label
|
85
|
+
float_info("label")
|
86
|
+
end
|
87
|
+
|
88
|
+
def weight
|
89
|
+
float_info("weight")
|
89
90
|
end
|
90
91
|
|
91
92
|
def num_row
|
92
93
|
out = ::FFI::MemoryPointer.new(:uint64)
|
93
|
-
|
94
|
-
read_uint64
|
94
|
+
check_call FFI.XGDMatrixNumRow(handle, out)
|
95
|
+
out.read_uint64
|
95
96
|
end
|
96
97
|
|
97
98
|
def num_col
|
98
99
|
out = ::FFI::MemoryPointer.new(:uint64)
|
99
|
-
|
100
|
-
read_uint64
|
100
|
+
check_call FFI.XGDMatrixNumCol(handle, out)
|
101
|
+
out.read_uint64
|
102
|
+
end
|
103
|
+
|
104
|
+
def num_nonmissing
|
105
|
+
out = ::FFI::MemoryPointer.new(:uint64)
|
106
|
+
check_call FFI.XGDMatrixNumNonMissing(handle, out)
|
107
|
+
out.read_uint64
|
108
|
+
end
|
109
|
+
|
110
|
+
def data_split_mode
|
111
|
+
out = ::FFI::MemoryPointer.new(:uint64)
|
112
|
+
check_call FFI.XGDMatrixDataSplitMode(handle, out)
|
113
|
+
out.read_uint64 == 0 ? :row : :col
|
101
114
|
end
|
102
115
|
|
103
116
|
def slice(rindex)
|
104
|
-
res = DMatrix.new(nil)
|
105
117
|
idxset = ::FFI::MemoryPointer.new(:int, rindex.count)
|
106
118
|
idxset.write_array_of_int(rindex)
|
107
|
-
|
108
|
-
|
119
|
+
out = ::FFI::MemoryPointer.new(:pointer)
|
120
|
+
check_call FFI.XGDMatrixSliceDMatrix(handle, idxset, rindex.size, out)
|
121
|
+
|
122
|
+
handle = ::FFI::AutoPointer.new(out.read_pointer, FFI.method(:XGDMatrixFree))
|
123
|
+
DMatrix.new(handle)
|
109
124
|
end
|
110
125
|
|
111
|
-
def
|
112
|
-
|
126
|
+
def feature_names
|
127
|
+
length = ::FFI::MemoryPointer.new(:uint64)
|
128
|
+
sarr = ::FFI::MemoryPointer.new(:pointer)
|
129
|
+
check_call(
|
130
|
+
FFI.XGDMatrixGetStrFeatureInfo(
|
131
|
+
handle,
|
132
|
+
"feature_name",
|
133
|
+
length,
|
134
|
+
sarr
|
135
|
+
)
|
136
|
+
)
|
137
|
+
feature_names = from_cstr_to_rbstr(sarr, length)
|
138
|
+
feature_names.empty? ? nil : feature_names
|
113
139
|
end
|
114
140
|
|
115
|
-
def
|
116
|
-
|
141
|
+
def feature_names=(feature_names)
|
142
|
+
if feature_names.nil?
|
143
|
+
check_call(
|
144
|
+
FFI.XGDMatrixSetStrFeatureInfo(
|
145
|
+
handle, "feature_name", nil, 0
|
146
|
+
)
|
147
|
+
)
|
148
|
+
return
|
149
|
+
end
|
150
|
+
|
151
|
+
# validate feature name
|
152
|
+
feature_names =
|
153
|
+
validate_feature_info(
|
154
|
+
feature_names,
|
155
|
+
num_col,
|
156
|
+
data_split_mode == :col,
|
157
|
+
"feature names"
|
158
|
+
)
|
159
|
+
if feature_names.length != feature_names.uniq.length
|
160
|
+
raise ArgumentError, "feature_names must be unique"
|
161
|
+
end
|
162
|
+
|
163
|
+
# prohibit the use symbols that may affect parsing. e.g. []<
|
164
|
+
if !feature_names.all? { |f| f.is_a?(String) && !["[", "]", "<"].any? { |x| f.include?(x) } }
|
165
|
+
raise ArgumentError, "feature_names must be string, and may not contain [, ] or <"
|
166
|
+
end
|
167
|
+
|
168
|
+
c_feature_names = array_of_pointers(feature_names.map { |f| string_pointer(f) })
|
169
|
+
check_call(
|
170
|
+
FFI.XGDMatrixSetStrFeatureInfo(
|
171
|
+
handle,
|
172
|
+
"feature_name",
|
173
|
+
c_feature_names,
|
174
|
+
feature_names.length
|
175
|
+
)
|
176
|
+
)
|
177
|
+
end
|
178
|
+
|
179
|
+
def feature_types
|
180
|
+
length = ::FFI::MemoryPointer.new(:uint64)
|
181
|
+
sarr = ::FFI::MemoryPointer.new(:pointer)
|
182
|
+
check_call(
|
183
|
+
FFI.XGDMatrixGetStrFeatureInfo(
|
184
|
+
handle,
|
185
|
+
"feature_type",
|
186
|
+
length,
|
187
|
+
sarr
|
188
|
+
)
|
189
|
+
)
|
190
|
+
res = from_cstr_to_rbstr(sarr, length)
|
191
|
+
res.empty? ? nil : res
|
117
192
|
end
|
118
193
|
|
119
|
-
def
|
120
|
-
|
194
|
+
def feature_types=(feature_types)
|
195
|
+
if feature_types.nil?
|
196
|
+
check_call(
|
197
|
+
FFI.XGDMatrixSetStrFeatureInfo(
|
198
|
+
handle, "feature_type", nil, 0
|
199
|
+
)
|
200
|
+
)
|
201
|
+
return
|
202
|
+
end
|
203
|
+
|
204
|
+
feature_types =
|
205
|
+
validate_feature_info(
|
206
|
+
feature_types,
|
207
|
+
num_col,
|
208
|
+
data_split_mode == :col,
|
209
|
+
"feature types"
|
210
|
+
)
|
211
|
+
|
212
|
+
c_feature_types = array_of_pointers(feature_types.map { |f| string_pointer(f) })
|
213
|
+
check_call(
|
214
|
+
FFI.XGDMatrixSetStrFeatureInfo(
|
215
|
+
handle,
|
216
|
+
"feature_type",
|
217
|
+
c_feature_types,
|
218
|
+
feature_types.length
|
219
|
+
)
|
220
|
+
)
|
121
221
|
end
|
122
222
|
|
123
223
|
private
|
@@ -126,17 +226,31 @@ module XGBoost
|
|
126
226
|
data = data.to_a unless data.is_a?(Array)
|
127
227
|
c_data = ::FFI::MemoryPointer.new(:float, data.size)
|
128
228
|
c_data.write_array_of_float(data)
|
129
|
-
|
229
|
+
check_call FFI.XGDMatrixSetFloatInfo(handle, field.to_s, c_data, data.size)
|
130
230
|
end
|
131
231
|
|
132
232
|
def float_info(field)
|
133
233
|
num_row ||= num_row()
|
134
|
-
out_len = ::FFI::MemoryPointer.new(:
|
234
|
+
out_len = ::FFI::MemoryPointer.new(:uint64)
|
135
235
|
out_dptr = ::FFI::MemoryPointer.new(:float, num_row)
|
136
|
-
|
236
|
+
check_call FFI.XGDMatrixGetFloatInfo(handle, field, out_len, out_dptr)
|
137
237
|
out_dptr.read_pointer.read_array_of_float(num_row)
|
138
238
|
end
|
139
239
|
|
240
|
+
def validate_feature_info(feature_info, n_features, is_column_split, name)
|
241
|
+
if !feature_info.is_a?(Array)
|
242
|
+
raise TypeError, "Expecting an array of strings for #{name}, got: #{feature_info.class.name}"
|
243
|
+
end
|
244
|
+
if feature_info.length != n_features && n_features != 0 && !is_column_split
|
245
|
+
msg = (
|
246
|
+
"#{name} must have the same length as the number of data columns, " +
|
247
|
+
"expected #{n_features}, got #{feature_info.length}"
|
248
|
+
)
|
249
|
+
raise ArgumentError, msg
|
250
|
+
end
|
251
|
+
feature_info
|
252
|
+
end
|
253
|
+
|
140
254
|
def matrix?(data)
|
141
255
|
defined?(Matrix) && data.is_a?(Matrix)
|
142
256
|
end
|
@@ -156,7 +270,5 @@ module XGBoost
|
|
156
270
|
def handle_missing(data, missing)
|
157
271
|
data.map! { |v| v.nil? ? missing : v }
|
158
272
|
end
|
159
|
-
|
160
|
-
include Utils
|
161
273
|
end
|
162
274
|
end
|
@@ -0,0 +1,132 @@
|
|
1
|
+
module XGBoost
|
2
|
+
class EarlyStopping < TrainingCallback
|
3
|
+
def initialize(
|
4
|
+
rounds:,
|
5
|
+
metric_name: nil,
|
6
|
+
data_name: nil,
|
7
|
+
maximize: nil,
|
8
|
+
save_best: false,
|
9
|
+
min_delta: 0.0
|
10
|
+
)
|
11
|
+
@data = data_name
|
12
|
+
@metric_name = metric_name
|
13
|
+
@rounds = rounds
|
14
|
+
@save_best = save_best
|
15
|
+
@maximize = maximize
|
16
|
+
@stopping_history = {}
|
17
|
+
@min_delta = min_delta
|
18
|
+
if @min_delta < 0
|
19
|
+
raise ArgumentError, "min_delta must be greater or equal to 0."
|
20
|
+
end
|
21
|
+
|
22
|
+
@current_rounds = 0
|
23
|
+
@best_scores = {}
|
24
|
+
@starting_round = 0
|
25
|
+
super()
|
26
|
+
end
|
27
|
+
|
28
|
+
def before_training(model)
|
29
|
+
@starting_round = model.num_boosted_rounds
|
30
|
+
model
|
31
|
+
end
|
32
|
+
|
33
|
+
def after_iteration(model, epoch, evals_log)
|
34
|
+
epoch += @starting_round
|
35
|
+
msg = "Must have at least 1 validation dataset for early stopping."
|
36
|
+
if evals_log.keys.length < 1
|
37
|
+
raise ArgumentError, msg
|
38
|
+
end
|
39
|
+
|
40
|
+
# Get data name
|
41
|
+
if @data
|
42
|
+
data_name = @data
|
43
|
+
else
|
44
|
+
# Use the last one as default.
|
45
|
+
data_name = evals_log.keys[-1]
|
46
|
+
end
|
47
|
+
if !evals_log.include?(data_name)
|
48
|
+
raise ArgumentError, "No dataset named: #{data_name}"
|
49
|
+
end
|
50
|
+
|
51
|
+
if !data_name.is_a?(String)
|
52
|
+
raise TypeError, "The name of the dataset should be a string. Got: #{data_name.class.name}"
|
53
|
+
end
|
54
|
+
data_log = evals_log[data_name]
|
55
|
+
|
56
|
+
# Get metric name
|
57
|
+
if @metric_name
|
58
|
+
metric_name = @metric_name
|
59
|
+
else
|
60
|
+
# Use last metric by default.
|
61
|
+
metric_name = data_log.keys[-1]
|
62
|
+
end
|
63
|
+
if !data_log.include?(metric_name)
|
64
|
+
raise ArgumentError, "No metric named: #{metric_name}"
|
65
|
+
end
|
66
|
+
|
67
|
+
# The latest score
|
68
|
+
score = data_log[metric_name][-1]
|
69
|
+
update_rounds(
|
70
|
+
score, data_name, metric_name, model, epoch
|
71
|
+
)
|
72
|
+
end
|
73
|
+
|
74
|
+
def after_training(model)
|
75
|
+
if !@save_best
|
76
|
+
return model
|
77
|
+
end
|
78
|
+
|
79
|
+
best_iteration = model.best_iteration
|
80
|
+
best_score = model.best_score
|
81
|
+
# model = model[..(best_iteration + 1)]
|
82
|
+
model.best_iteration = best_iteration
|
83
|
+
model.best_score = best_score
|
84
|
+
model
|
85
|
+
end
|
86
|
+
|
87
|
+
private
|
88
|
+
|
89
|
+
def update_rounds(score, name, metric, model, epoch)
|
90
|
+
get_s = lambda do |value|
|
91
|
+
value.is_a?(Array) ? value[0] : value
|
92
|
+
end
|
93
|
+
|
94
|
+
maximize = lambda do |new_, best|
|
95
|
+
get_s.(new_) - @min_delta > get_s.(best)
|
96
|
+
end
|
97
|
+
|
98
|
+
minimize = lambda do |new_, best|
|
99
|
+
get_s.(best) - @min_delta > get_s.(new_)
|
100
|
+
end
|
101
|
+
|
102
|
+
improve_op = @maximize ? maximize : minimize
|
103
|
+
|
104
|
+
if @stopping_history.empty?
|
105
|
+
# First round
|
106
|
+
@current_rounds = 0
|
107
|
+
@stopping_history[name] = {}
|
108
|
+
@stopping_history[name][metric] = [score]
|
109
|
+
@best_scores[name] = {}
|
110
|
+
@best_scores[name][metric] = [score]
|
111
|
+
model.set_attr(best_score: get_s.(score), best_iteration: epoch)
|
112
|
+
elsif !improve_op.(score, @best_scores[name][metric][-1])
|
113
|
+
# Not improved
|
114
|
+
@stopping_history[name][metric] << score
|
115
|
+
@current_rounds += 1
|
116
|
+
else
|
117
|
+
# Improved
|
118
|
+
@stopping_history[name][metric] << score
|
119
|
+
@best_scores[name][metric] << score
|
120
|
+
record = @stopping_history[name][metric][-1]
|
121
|
+
model.set_attr(best_score: get_s.(record), best_iteration: epoch)
|
122
|
+
@current_rounds = 0
|
123
|
+
end
|
124
|
+
|
125
|
+
if @current_rounds >= @rounds
|
126
|
+
# Should stop
|
127
|
+
return true
|
128
|
+
end
|
129
|
+
false
|
130
|
+
end
|
131
|
+
end
|
132
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
module XGBoost
|
2
|
+
class EvaluationMonitor < TrainingCallback
|
3
|
+
def initialize(period:, show_stdv: false)
|
4
|
+
@show_stdv = show_stdv
|
5
|
+
@period = period
|
6
|
+
end
|
7
|
+
|
8
|
+
def after_iteration(model, epoch, evals_log)
|
9
|
+
if evals_log.empty?
|
10
|
+
return false
|
11
|
+
end
|
12
|
+
|
13
|
+
msg = "[#{epoch}]"
|
14
|
+
evals_log.each do |data, metric|
|
15
|
+
metric.each do |metric_name, log|
|
16
|
+
stdv = nil
|
17
|
+
if log[-1].is_a?(Array)
|
18
|
+
score = log[-1][0]
|
19
|
+
stdv = log[-1][1]
|
20
|
+
else
|
21
|
+
score = log[-1]
|
22
|
+
end
|
23
|
+
msg += fmt_metric(data, metric_name, score, stdv)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
msg += "\n"
|
27
|
+
|
28
|
+
if epoch % @period == 0
|
29
|
+
puts msg
|
30
|
+
end
|
31
|
+
false
|
32
|
+
end
|
33
|
+
|
34
|
+
private
|
35
|
+
|
36
|
+
def fmt_metric(data, metric, score, std)
|
37
|
+
if !std.nil? && @show_stdv
|
38
|
+
"\t%s:%.5f+%.5f" % [data + "-" + metric, score, std]
|
39
|
+
else
|
40
|
+
"\t%s:%.5f" % [data + "-" + metric, score]
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
data/lib/xgboost/ffi.rb
CHANGED
@@ -22,8 +22,12 @@ module XGBoost
|
|
22
22
|
# dmatrix
|
23
23
|
attach_function :XGDMatrixCreateFromMat, %i[pointer uint64 uint64 float pointer], :int
|
24
24
|
attach_function :XGDMatrixSetUIntInfo, %i[pointer string pointer uint64], :int
|
25
|
+
attach_function :XGDMatrixSetStrFeatureInfo, %i[pointer string pointer uint64], :int
|
26
|
+
attach_function :XGDMatrixGetStrFeatureInfo, %i[pointer string pointer pointer], :int
|
25
27
|
attach_function :XGDMatrixNumRow, %i[pointer pointer], :int
|
26
28
|
attach_function :XGDMatrixNumCol, %i[pointer pointer], :int
|
29
|
+
attach_function :XGDMatrixNumNonMissing, %i[pointer pointer], :int
|
30
|
+
attach_function :XGDMatrixDataSplitMode, %i[pointer pointer], :int
|
27
31
|
attach_function :XGDMatrixSliceDMatrix, %i[pointer pointer uint64 pointer], :int
|
28
32
|
attach_function :XGDMatrixFree, %i[pointer], :int
|
29
33
|
attach_function :XGDMatrixSaveBinary, %i[pointer string int], :int
|
@@ -35,13 +39,18 @@ module XGBoost
|
|
35
39
|
attach_function :XGBoosterUpdateOneIter, %i[pointer int pointer], :int
|
36
40
|
attach_function :XGBoosterEvalOneIter, %i[pointer int pointer pointer uint64 pointer], :int
|
37
41
|
attach_function :XGBoosterFree, %i[pointer], :int
|
42
|
+
attach_function :XGBoosterBoostedRounds, %i[pointer pointer], :int
|
38
43
|
attach_function :XGBoosterSetParam, %i[pointer string string], :int
|
44
|
+
attach_function :XGBoosterGetNumFeature, %i[pointer pointer], :int
|
39
45
|
attach_function :XGBoosterPredict, %i[pointer pointer int int int pointer pointer], :int
|
40
46
|
attach_function :XGBoosterLoadModel, %i[pointer string], :int
|
41
47
|
attach_function :XGBoosterSaveModel, %i[pointer string], :int
|
48
|
+
attach_function :XGBoosterSaveJsonConfig, %i[pointer pointer pointer], :int
|
42
49
|
attach_function :XGBoosterDumpModelExWithFeatures, %i[pointer int pointer pointer int string pointer pointer], :int
|
43
|
-
attach_function :XGBoosterGetAttr, %i[pointer
|
44
|
-
attach_function :XGBoosterSetAttr, %i[pointer
|
50
|
+
attach_function :XGBoosterGetAttr, %i[pointer string pointer pointer], :int
|
51
|
+
attach_function :XGBoosterSetAttr, %i[pointer string string], :int
|
45
52
|
attach_function :XGBoosterGetAttrNames, %i[pointer pointer pointer], :int
|
53
|
+
attach_function :XGBoosterSetStrFeatureInfo, %i[pointer string pointer uint64], :int
|
54
|
+
attach_function :XGBoosterGetStrFeatureInfo, %i[pointer string pointer pointer], :int
|
46
55
|
end
|
47
56
|
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
module XGBoost
|
2
|
+
class PackedBooster
|
3
|
+
def initialize(cvfolds)
|
4
|
+
@cvfolds = cvfolds
|
5
|
+
end
|
6
|
+
|
7
|
+
def update(iteration)
|
8
|
+
@cvfolds.each do |fold|
|
9
|
+
fold.update(iteration)
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
def set_attr(**kwargs)
|
14
|
+
@cvfolds.each do |f|
|
15
|
+
f.bst.set_attr(**kwargs)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def attr(key)
|
20
|
+
@cvfolds[0].bst.attr(key)
|
21
|
+
end
|
22
|
+
|
23
|
+
def eval_set(iteration)
|
24
|
+
@cvfolds.map { |f| f.eval_set(iteration) }
|
25
|
+
end
|
26
|
+
|
27
|
+
def best_iteration
|
28
|
+
@cvfolds[0].bst.best_iteration
|
29
|
+
end
|
30
|
+
|
31
|
+
def best_iteration=(iteration)
|
32
|
+
@cvfolds.each do |fold|
|
33
|
+
fold.best_iteration = iteration
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def best_score
|
38
|
+
@cvfolds[0].bst.best_score
|
39
|
+
end
|
40
|
+
|
41
|
+
def best_score=(score)
|
42
|
+
@cvfolds.each do |fold|
|
43
|
+
fold.best_score = score
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def num_boosted_rounds
|
48
|
+
@cvfolds[0].num_boosted_rounds
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
data/lib/xgboost/ranker.rb
CHANGED
@@ -0,0 +1,23 @@
|
|
1
|
+
module XGBoost
|
2
|
+
class TrainingCallback
|
3
|
+
def before_training(model)
|
4
|
+
# Run before training starts
|
5
|
+
model
|
6
|
+
end
|
7
|
+
|
8
|
+
def after_training(model)
|
9
|
+
# Run after training is finished
|
10
|
+
model
|
11
|
+
end
|
12
|
+
|
13
|
+
def before_iteration(model, epoch, evals_log)
|
14
|
+
# Run before each iteration. Returns true when training should stop.
|
15
|
+
false
|
16
|
+
end
|
17
|
+
|
18
|
+
def after_iteration(model, epoch, evals_log)
|
19
|
+
# Run after each iteration. Returns true when training should stop.
|
20
|
+
false
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
data/lib/xgboost/utils.rb
CHANGED
@@ -2,7 +2,7 @@ module XGBoost
|
|
2
2
|
module Utils
|
3
3
|
private
|
4
4
|
|
5
|
-
def
|
5
|
+
def check_call(err)
|
6
6
|
if err != 0
|
7
7
|
# make friendly
|
8
8
|
message = FFI.XGBGetLastError.split("\n").first.split(/:\d+: /, 2).last
|
@@ -10,9 +10,24 @@ module XGBoost
|
|
10
10
|
end
|
11
11
|
end
|
12
12
|
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
def array_of_pointers(values)
|
14
|
+
arr = ::FFI::MemoryPointer.new(:pointer, values.size)
|
15
|
+
arr.write_array_of_pointer(values)
|
16
|
+
# keep reference for string pointers
|
17
|
+
arr.instance_variable_set(:@xgboost_ref, values)
|
18
|
+
arr
|
19
|
+
end
|
20
|
+
|
21
|
+
def string_pointer(value)
|
22
|
+
::FFI::MemoryPointer.from_string(value.to_s)
|
23
|
+
end
|
24
|
+
|
25
|
+
def from_cstr_to_rbstr(data, length)
|
26
|
+
res = []
|
27
|
+
length.read_uint64.times do |i|
|
28
|
+
res << data.read_pointer[i * ::FFI::Pointer.size].read_pointer.read_string.force_encoding(Encoding::UTF_8)
|
29
|
+
end
|
30
|
+
res
|
16
31
|
end
|
17
32
|
end
|
18
33
|
end
|
data/lib/xgboost/version.rb
CHANGED