lightgbm 0.1.7 → 0.1.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +11 -17
- data/lib/lightgbm.rb +13 -0
- data/lib/lightgbm/dataset.rb +81 -48
- data/lib/lightgbm/ffi.rb +4 -4
- data/lib/lightgbm/version.rb +1 -1
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d3b86686bc4575d069e469fb9c8911e93c7dcfb7622a415dc3e7ebee301947c3
|
4
|
+
data.tar.gz: a186cca11d4838fd13573b14ea154fff174804b31ae45236130c30a24f55ff7b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 32ef8f452075bcf4441b8e0fcf68e63332c42da61445301ecd0a85b51b2af1b7a730da0810e1b182ff5fdfe184884242a5489469d16e0e9a717d19bb145eb095
|
7
|
+
data.tar.gz: 41b76c5ac174b75ce4e4f21477e4d32a9607b550e70448dde2797a765813d08a6c75de2f7f6d04ea79b4e9fa8afd768d097be516781e27e4eee4f982af364dc0
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -1,8 +1,6 @@
|
|
1
1
|
# LightGBM
|
2
2
|
|
3
|
-
[LightGBM](https://github.com/microsoft/LightGBM) -
|
4
|
-
|
5
|
-
:fire: Uses the C API for blazing performance
|
3
|
+
[LightGBM](https://github.com/microsoft/LightGBM) - high performance gradient boosting - for Ruby
|
6
4
|
|
7
5
|
[![Build Status](https://travis-ci.org/ankane/lightgbm.svg?branch=master)](https://travis-ci.org/ankane/lightgbm)
|
8
6
|
|
@@ -20,16 +18,6 @@ On Mac, also install OpenMP:
|
|
20
18
|
brew install libomp
|
21
19
|
```
|
22
20
|
|
23
|
-
## Getting Started
|
24
|
-
|
25
|
-
This library follows the [Python API](https://lightgbm.readthedocs.io/en/latest/Python-API.html). A few differences are:
|
26
|
-
|
27
|
-
- The `get_` and `set_` prefixes are removed from methods
|
28
|
-
- The default verbosity is `-1`
|
29
|
-
- With the `cv` method, `stratified` is set to `false`
|
30
|
-
|
31
|
-
Some methods and options are also missing at the moment. PRs welcome!
|
32
|
-
|
33
21
|
## Training API
|
34
22
|
|
35
23
|
Prep your data
|
@@ -160,12 +148,18 @@ Numo::DFloat.new(3, 2).seq
|
|
160
148
|
|
161
149
|
## Related Projects
|
162
150
|
|
163
|
-
- [
|
164
|
-
- [Eps](https://github.com/ankane/eps) - Machine
|
151
|
+
- [XGBoost](https://github.com/ankane/xgboost) - XGBoost for Ruby
|
152
|
+
- [Eps](https://github.com/ankane/eps) - Machine learning for Ruby
|
165
153
|
|
166
154
|
## Credits
|
167
155
|
|
168
|
-
|
156
|
+
This library follows the [Python API](https://lightgbm.readthedocs.io/en/latest/Python-API.html). A few differences are:
|
157
|
+
|
158
|
+
- The `get_` and `set_` prefixes are removed from methods
|
159
|
+
- The default verbosity is `-1`
|
160
|
+
- With the `cv` method, `stratified` is set to `false`
|
161
|
+
|
162
|
+
Thanks to the [xgboost](https://github.com/PairOnAir/xgboost-ruby) gem for showing how to use FFI.
|
169
163
|
|
170
164
|
## History
|
171
165
|
|
@@ -180,7 +174,7 @@ Everyone is encouraged to help improve this project. Here are a few ways you can
|
|
180
174
|
- Write, clarify, or fix documentation
|
181
175
|
- Suggest or add new features
|
182
176
|
|
183
|
-
To get started with development
|
177
|
+
To get started with development:
|
184
178
|
|
185
179
|
```sh
|
186
180
|
git clone https://github.com/ankane/lightgbm.git
|
data/lib/lightgbm.rb
CHANGED
@@ -36,6 +36,8 @@ module LightGBM
|
|
36
36
|
booster.train_data_name = name || "training"
|
37
37
|
valid_contain_train = true
|
38
38
|
else
|
39
|
+
# ensure the validation set references the training set
|
40
|
+
data.reference = train_set
|
39
41
|
booster.add_valid(data, name || "valid_#{i}")
|
40
42
|
end
|
41
43
|
end
|
@@ -133,6 +135,7 @@ module LightGBM
|
|
133
135
|
if early_stopping_rounds
|
134
136
|
best_score = {}
|
135
137
|
best_iter = {}
|
138
|
+
best_iteration = nil
|
136
139
|
end
|
137
140
|
|
138
141
|
num_boost_round.times do |iteration|
|
@@ -172,6 +175,7 @@ module LightGBM
|
|
172
175
|
best_score[k] = score
|
173
176
|
best_iter[k] = iteration
|
174
177
|
elsif iteration - best_iter[k] >= early_stopping_rounds
|
178
|
+
best_iteration = best_iter[k]
|
175
179
|
stop_early = true
|
176
180
|
break
|
177
181
|
end
|
@@ -180,6 +184,15 @@ module LightGBM
|
|
180
184
|
end
|
181
185
|
end
|
182
186
|
|
187
|
+
if early_stopping_rounds
|
188
|
+
# use best iteration from first metric if not stopped early
|
189
|
+
best_iteration ||= best_iter[best_iter.keys.first]
|
190
|
+
eval_hist.each_key do |k|
|
191
|
+
# TODO uncomment for 0.2.0
|
192
|
+
# eval_hist[k] = eval_hist[k].first(best_iteration + 1)
|
193
|
+
end
|
194
|
+
end
|
195
|
+
|
183
196
|
eval_hist
|
184
197
|
end
|
185
198
|
|
data/lib/lightgbm/dataset.rb
CHANGED
@@ -4,51 +4,16 @@ module LightGBM
|
|
4
4
|
|
5
5
|
def initialize(data, label: nil, weight: nil, group: nil, params: nil, reference: nil, used_indices: nil, categorical_feature: "auto", feature_names: nil)
|
6
6
|
@data = data
|
7
|
+
@label = label
|
8
|
+
@weight = weight
|
9
|
+
@group = group
|
10
|
+
@params = params
|
11
|
+
@reference = reference
|
12
|
+
@used_indices = used_indices
|
13
|
+
@categorical_feature = categorical_feature
|
14
|
+
@feature_names = feature_names
|
7
15
|
|
8
|
-
|
9
|
-
params ||= {}
|
10
|
-
if categorical_feature != "auto" && categorical_feature.any?
|
11
|
-
params["categorical_feature"] ||= categorical_feature.join(",")
|
12
|
-
end
|
13
|
-
set_verbosity(params)
|
14
|
-
|
15
|
-
@handle = ::FFI::MemoryPointer.new(:pointer)
|
16
|
-
parameters = params_str(params)
|
17
|
-
reference = reference.handle_pointer if reference
|
18
|
-
if used_indices
|
19
|
-
used_row_indices = ::FFI::MemoryPointer.new(:int32, used_indices.count)
|
20
|
-
used_row_indices.write_array_of_int32(used_indices)
|
21
|
-
check_result FFI.LGBM_DatasetGetSubset(reference, used_row_indices, used_indices.count, parameters, @handle)
|
22
|
-
elsif data.is_a?(String)
|
23
|
-
check_result FFI.LGBM_DatasetCreateFromFile(data, parameters, reference, @handle)
|
24
|
-
else
|
25
|
-
if matrix?(data)
|
26
|
-
nrow = data.row_count
|
27
|
-
ncol = data.column_count
|
28
|
-
flat_data = data.to_a.flatten
|
29
|
-
elsif daru?(data)
|
30
|
-
nrow, ncol = data.shape
|
31
|
-
flat_data = data.map_rows(&:to_a).flatten
|
32
|
-
elsif narray?(data)
|
33
|
-
nrow, ncol = data.shape
|
34
|
-
flat_data = data.flatten.to_a
|
35
|
-
else
|
36
|
-
nrow = data.count
|
37
|
-
ncol = data.first.count
|
38
|
-
flat_data = data.flatten
|
39
|
-
end
|
40
|
-
|
41
|
-
handle_missing(flat_data)
|
42
|
-
c_data = ::FFI::MemoryPointer.new(:double, nrow * ncol)
|
43
|
-
c_data.write_array_of_double(flat_data)
|
44
|
-
check_result FFI.LGBM_DatasetCreateFromMat(c_data, 1, nrow, ncol, 1, parameters, reference, @handle)
|
45
|
-
end
|
46
|
-
ObjectSpace.define_finalizer(self, self.class.finalize(handle_pointer)) unless used_indices
|
47
|
-
|
48
|
-
self.label = label if label
|
49
|
-
self.weight = weight if weight
|
50
|
-
self.group = group if group
|
51
|
-
self.feature_names = feature_names if feature_names
|
16
|
+
construct
|
52
17
|
end
|
53
18
|
|
54
19
|
def label
|
@@ -59,10 +24,6 @@ module LightGBM
|
|
59
24
|
field("weight")
|
60
25
|
end
|
61
26
|
|
62
|
-
def label=(label)
|
63
|
-
set_field("label", label)
|
64
|
-
end
|
65
|
-
|
66
27
|
def feature_names
|
67
28
|
# must preallocate space
|
68
29
|
num_feature_names = ::FFI::MemoryPointer.new(:int)
|
@@ -73,20 +34,37 @@ module LightGBM
|
|
73
34
|
str_ptrs[0, num_feature_names.read_int].map(&:read_string)
|
74
35
|
end
|
75
36
|
|
37
|
+
def label=(label)
|
38
|
+
@label = label
|
39
|
+
set_field("label", label)
|
40
|
+
end
|
41
|
+
|
76
42
|
def weight=(weight)
|
43
|
+
@weight = weight
|
77
44
|
set_field("weight", weight)
|
78
45
|
end
|
79
46
|
|
80
47
|
def group=(group)
|
48
|
+
@group = group
|
81
49
|
set_field("group", group, type: :int32)
|
82
50
|
end
|
83
51
|
|
84
52
|
def feature_names=(feature_names)
|
53
|
+
@feature_names = feature_names
|
85
54
|
c_feature_names = ::FFI::MemoryPointer.new(:pointer, feature_names.size)
|
86
55
|
c_feature_names.write_array_of_pointer(feature_names.map { |v| ::FFI::MemoryPointer.from_string(v) })
|
87
56
|
check_result FFI.LGBM_DatasetSetFeatureNames(handle_pointer, c_feature_names, feature_names.size)
|
88
57
|
end
|
89
58
|
|
59
|
+
# TODO only update reference if not in chain
|
60
|
+
def reference=(reference)
|
61
|
+
if reference != @reference
|
62
|
+
@reference = reference
|
63
|
+
free_handle
|
64
|
+
construct
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
90
68
|
def num_data
|
91
69
|
out = ::FFI::MemoryPointer.new(:int)
|
92
70
|
check_result FFI.LGBM_DatasetGetNumData(handle_pointer, out)
|
@@ -124,6 +102,61 @@ module LightGBM
|
|
124
102
|
|
125
103
|
private
|
126
104
|
|
105
|
+
def construct
|
106
|
+
data = @data
|
107
|
+
used_indices = @used_indices
|
108
|
+
|
109
|
+
# TODO stringify params
|
110
|
+
params = @params || {}
|
111
|
+
if @categorical_feature != "auto" && @categorical_feature.any?
|
112
|
+
params["categorical_feature"] ||= @categorical_feature.join(",")
|
113
|
+
end
|
114
|
+
set_verbosity(params)
|
115
|
+
|
116
|
+
@handle = ::FFI::MemoryPointer.new(:pointer)
|
117
|
+
parameters = params_str(params)
|
118
|
+
reference = @reference.handle_pointer if @reference
|
119
|
+
if used_indices
|
120
|
+
used_row_indices = ::FFI::MemoryPointer.new(:int32, used_indices.count)
|
121
|
+
used_row_indices.write_array_of_int32(used_indices)
|
122
|
+
check_result FFI.LGBM_DatasetGetSubset(reference, used_row_indices, used_indices.count, parameters, @handle)
|
123
|
+
elsif data.is_a?(String)
|
124
|
+
check_result FFI.LGBM_DatasetCreateFromFile(data, parameters, reference, @handle)
|
125
|
+
else
|
126
|
+
if matrix?(data)
|
127
|
+
nrow = data.row_count
|
128
|
+
ncol = data.column_count
|
129
|
+
flat_data = data.to_a.flatten
|
130
|
+
elsif daru?(data)
|
131
|
+
nrow, ncol = data.shape
|
132
|
+
flat_data = data.map_rows(&:to_a).flatten
|
133
|
+
elsif narray?(data)
|
134
|
+
nrow, ncol = data.shape
|
135
|
+
flat_data = data.flatten.to_a
|
136
|
+
else
|
137
|
+
nrow = data.count
|
138
|
+
ncol = data.first.count
|
139
|
+
flat_data = data.flatten
|
140
|
+
end
|
141
|
+
|
142
|
+
handle_missing(flat_data)
|
143
|
+
c_data = ::FFI::MemoryPointer.new(:double, nrow * ncol)
|
144
|
+
c_data.write_array_of_double(flat_data)
|
145
|
+
check_result FFI.LGBM_DatasetCreateFromMat(c_data, 1, nrow, ncol, 1, parameters, reference, @handle)
|
146
|
+
end
|
147
|
+
ObjectSpace.define_finalizer(self, self.class.finalize(handle_pointer)) unless used_indices
|
148
|
+
|
149
|
+
self.label = @label if @label
|
150
|
+
self.weight = @weight if @weight
|
151
|
+
self.group = @group if @group
|
152
|
+
self.feature_names = @feature_names if @feature_names
|
153
|
+
end
|
154
|
+
|
155
|
+
def free_handle
|
156
|
+
FFI.LGBM_DatasetFree(handle_pointer)
|
157
|
+
ObjectSpace.undefine_finalizer(self)
|
158
|
+
end
|
159
|
+
|
127
160
|
def dump_text(filename)
|
128
161
|
check_result FFI.LGBM_DatasetDumpText(handle_pointer, filename)
|
129
162
|
end
|
data/lib/lightgbm/ffi.rb
CHANGED
@@ -5,11 +5,11 @@ module LightGBM
|
|
5
5
|
begin
|
6
6
|
ffi_lib LightGBM.ffi_lib
|
7
7
|
rescue LoadError => e
|
8
|
-
|
9
|
-
|
10
|
-
|
8
|
+
if e.message.include?("Library not loaded: /usr/local/opt/libomp/lib/libomp.dylib") && e.message.include?("Reason: image not found")
|
9
|
+
raise LoadError, "OpenMP not found. Run `brew install libomp`"
|
10
|
+
else
|
11
|
+
raise e
|
11
12
|
end
|
12
|
-
raise LoadError, "Could not find LightGBM"
|
13
13
|
end
|
14
14
|
|
15
15
|
# https://github.com/microsoft/LightGBM/blob/master/include/LightGBM/c_api.h
|
data/lib/lightgbm/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: lightgbm
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2020-05-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ffi
|
@@ -122,8 +122,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
122
122
|
- !ruby/object:Gem::Version
|
123
123
|
version: '0'
|
124
124
|
requirements: []
|
125
|
-
rubygems_version: 3.
|
125
|
+
rubygems_version: 3.1.2
|
126
126
|
signing_key:
|
127
127
|
specification_version: 4
|
128
|
-
summary:
|
128
|
+
summary: High performance gradient boosting for Ruby
|
129
129
|
test_files: []
|