lightgbm 0.1.7 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +11 -17
- data/lib/lightgbm.rb +13 -0
- data/lib/lightgbm/dataset.rb +81 -48
- data/lib/lightgbm/ffi.rb +4 -4
- data/lib/lightgbm/version.rb +1 -1
- metadata +4 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d3b86686bc4575d069e469fb9c8911e93c7dcfb7622a415dc3e7ebee301947c3
|
4
|
+
data.tar.gz: a186cca11d4838fd13573b14ea154fff174804b31ae45236130c30a24f55ff7b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 32ef8f452075bcf4441b8e0fcf68e63332c42da61445301ecd0a85b51b2af1b7a730da0810e1b182ff5fdfe184884242a5489469d16e0e9a717d19bb145eb095
|
7
|
+
data.tar.gz: 41b76c5ac174b75ce4e4f21477e4d32a9607b550e70448dde2797a765813d08a6c75de2f7f6d04ea79b4e9fa8afd768d097be516781e27e4eee4f982af364dc0
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -1,8 +1,6 @@
|
|
1
1
|
# LightGBM
|
2
2
|
|
3
|
-
[LightGBM](https://github.com/microsoft/LightGBM) -
|
4
|
-
|
5
|
-
:fire: Uses the C API for blazing performance
|
3
|
+
[LightGBM](https://github.com/microsoft/LightGBM) - high performance gradient boosting - for Ruby
|
6
4
|
|
7
5
|
[](https://travis-ci.org/ankane/lightgbm)
|
8
6
|
|
@@ -20,16 +18,6 @@ On Mac, also install OpenMP:
|
|
20
18
|
brew install libomp
|
21
19
|
```
|
22
20
|
|
23
|
-
## Getting Started
|
24
|
-
|
25
|
-
This library follows the [Python API](https://lightgbm.readthedocs.io/en/latest/Python-API.html). A few differences are:
|
26
|
-
|
27
|
-
- The `get_` and `set_` prefixes are removed from methods
|
28
|
-
- The default verbosity is `-1`
|
29
|
-
- With the `cv` method, `stratified` is set to `false`
|
30
|
-
|
31
|
-
Some methods and options are also missing at the moment. PRs welcome!
|
32
|
-
|
33
21
|
## Training API
|
34
22
|
|
35
23
|
Prep your data
|
@@ -160,12 +148,18 @@ Numo::DFloat.new(3, 2).seq
|
|
160
148
|
|
161
149
|
## Related Projects
|
162
150
|
|
163
|
-
- [
|
164
|
-
- [Eps](https://github.com/ankane/eps) - Machine
|
151
|
+
- [XGBoost](https://github.com/ankane/xgboost) - XGBoost for Ruby
|
152
|
+
- [Eps](https://github.com/ankane/eps) - Machine learning for Ruby
|
165
153
|
|
166
154
|
## Credits
|
167
155
|
|
168
|
-
|
156
|
+
This library follows the [Python API](https://lightgbm.readthedocs.io/en/latest/Python-API.html). A few differences are:
|
157
|
+
|
158
|
+
- The `get_` and `set_` prefixes are removed from methods
|
159
|
+
- The default verbosity is `-1`
|
160
|
+
- With the `cv` method, `stratified` is set to `false`
|
161
|
+
|
162
|
+
Thanks to the [xgboost](https://github.com/PairOnAir/xgboost-ruby) gem for showing how to use FFI.
|
169
163
|
|
170
164
|
## History
|
171
165
|
|
@@ -180,7 +174,7 @@ Everyone is encouraged to help improve this project. Here are a few ways you can
|
|
180
174
|
- Write, clarify, or fix documentation
|
181
175
|
- Suggest or add new features
|
182
176
|
|
183
|
-
To get started with development
|
177
|
+
To get started with development:
|
184
178
|
|
185
179
|
```sh
|
186
180
|
git clone https://github.com/ankane/lightgbm.git
|
data/lib/lightgbm.rb
CHANGED
@@ -36,6 +36,8 @@ module LightGBM
|
|
36
36
|
booster.train_data_name = name || "training"
|
37
37
|
valid_contain_train = true
|
38
38
|
else
|
39
|
+
# ensure the validation set references the training set
|
40
|
+
data.reference = train_set
|
39
41
|
booster.add_valid(data, name || "valid_#{i}")
|
40
42
|
end
|
41
43
|
end
|
@@ -133,6 +135,7 @@ module LightGBM
|
|
133
135
|
if early_stopping_rounds
|
134
136
|
best_score = {}
|
135
137
|
best_iter = {}
|
138
|
+
best_iteration = nil
|
136
139
|
end
|
137
140
|
|
138
141
|
num_boost_round.times do |iteration|
|
@@ -172,6 +175,7 @@ module LightGBM
|
|
172
175
|
best_score[k] = score
|
173
176
|
best_iter[k] = iteration
|
174
177
|
elsif iteration - best_iter[k] >= early_stopping_rounds
|
178
|
+
best_iteration = best_iter[k]
|
175
179
|
stop_early = true
|
176
180
|
break
|
177
181
|
end
|
@@ -180,6 +184,15 @@ module LightGBM
|
|
180
184
|
end
|
181
185
|
end
|
182
186
|
|
187
|
+
if early_stopping_rounds
|
188
|
+
# use best iteration from first metric if not stopped early
|
189
|
+
best_iteration ||= best_iter[best_iter.keys.first]
|
190
|
+
eval_hist.each_key do |k|
|
191
|
+
# TODO uncomment for 0.2.0
|
192
|
+
# eval_hist[k] = eval_hist[k].first(best_iteration + 1)
|
193
|
+
end
|
194
|
+
end
|
195
|
+
|
183
196
|
eval_hist
|
184
197
|
end
|
185
198
|
|
data/lib/lightgbm/dataset.rb
CHANGED
@@ -4,51 +4,16 @@ module LightGBM
|
|
4
4
|
|
5
5
|
def initialize(data, label: nil, weight: nil, group: nil, params: nil, reference: nil, used_indices: nil, categorical_feature: "auto", feature_names: nil)
|
6
6
|
@data = data
|
7
|
+
@label = label
|
8
|
+
@weight = weight
|
9
|
+
@group = group
|
10
|
+
@params = params
|
11
|
+
@reference = reference
|
12
|
+
@used_indices = used_indices
|
13
|
+
@categorical_feature = categorical_feature
|
14
|
+
@feature_names = feature_names
|
7
15
|
|
8
|
-
|
9
|
-
params ||= {}
|
10
|
-
if categorical_feature != "auto" && categorical_feature.any?
|
11
|
-
params["categorical_feature"] ||= categorical_feature.join(",")
|
12
|
-
end
|
13
|
-
set_verbosity(params)
|
14
|
-
|
15
|
-
@handle = ::FFI::MemoryPointer.new(:pointer)
|
16
|
-
parameters = params_str(params)
|
17
|
-
reference = reference.handle_pointer if reference
|
18
|
-
if used_indices
|
19
|
-
used_row_indices = ::FFI::MemoryPointer.new(:int32, used_indices.count)
|
20
|
-
used_row_indices.write_array_of_int32(used_indices)
|
21
|
-
check_result FFI.LGBM_DatasetGetSubset(reference, used_row_indices, used_indices.count, parameters, @handle)
|
22
|
-
elsif data.is_a?(String)
|
23
|
-
check_result FFI.LGBM_DatasetCreateFromFile(data, parameters, reference, @handle)
|
24
|
-
else
|
25
|
-
if matrix?(data)
|
26
|
-
nrow = data.row_count
|
27
|
-
ncol = data.column_count
|
28
|
-
flat_data = data.to_a.flatten
|
29
|
-
elsif daru?(data)
|
30
|
-
nrow, ncol = data.shape
|
31
|
-
flat_data = data.map_rows(&:to_a).flatten
|
32
|
-
elsif narray?(data)
|
33
|
-
nrow, ncol = data.shape
|
34
|
-
flat_data = data.flatten.to_a
|
35
|
-
else
|
36
|
-
nrow = data.count
|
37
|
-
ncol = data.first.count
|
38
|
-
flat_data = data.flatten
|
39
|
-
end
|
40
|
-
|
41
|
-
handle_missing(flat_data)
|
42
|
-
c_data = ::FFI::MemoryPointer.new(:double, nrow * ncol)
|
43
|
-
c_data.write_array_of_double(flat_data)
|
44
|
-
check_result FFI.LGBM_DatasetCreateFromMat(c_data, 1, nrow, ncol, 1, parameters, reference, @handle)
|
45
|
-
end
|
46
|
-
ObjectSpace.define_finalizer(self, self.class.finalize(handle_pointer)) unless used_indices
|
47
|
-
|
48
|
-
self.label = label if label
|
49
|
-
self.weight = weight if weight
|
50
|
-
self.group = group if group
|
51
|
-
self.feature_names = feature_names if feature_names
|
16
|
+
construct
|
52
17
|
end
|
53
18
|
|
54
19
|
def label
|
@@ -59,10 +24,6 @@ module LightGBM
|
|
59
24
|
field("weight")
|
60
25
|
end
|
61
26
|
|
62
|
-
def label=(label)
|
63
|
-
set_field("label", label)
|
64
|
-
end
|
65
|
-
|
66
27
|
def feature_names
|
67
28
|
# must preallocate space
|
68
29
|
num_feature_names = ::FFI::MemoryPointer.new(:int)
|
@@ -73,20 +34,37 @@ module LightGBM
|
|
73
34
|
str_ptrs[0, num_feature_names.read_int].map(&:read_string)
|
74
35
|
end
|
75
36
|
|
37
|
+
def label=(label)
|
38
|
+
@label = label
|
39
|
+
set_field("label", label)
|
40
|
+
end
|
41
|
+
|
76
42
|
def weight=(weight)
|
43
|
+
@weight = weight
|
77
44
|
set_field("weight", weight)
|
78
45
|
end
|
79
46
|
|
80
47
|
def group=(group)
|
48
|
+
@group = group
|
81
49
|
set_field("group", group, type: :int32)
|
82
50
|
end
|
83
51
|
|
84
52
|
def feature_names=(feature_names)
|
53
|
+
@feature_names = feature_names
|
85
54
|
c_feature_names = ::FFI::MemoryPointer.new(:pointer, feature_names.size)
|
86
55
|
c_feature_names.write_array_of_pointer(feature_names.map { |v| ::FFI::MemoryPointer.from_string(v) })
|
87
56
|
check_result FFI.LGBM_DatasetSetFeatureNames(handle_pointer, c_feature_names, feature_names.size)
|
88
57
|
end
|
89
58
|
|
59
|
+
# TODO only update reference if not in chain
|
60
|
+
def reference=(reference)
|
61
|
+
if reference != @reference
|
62
|
+
@reference = reference
|
63
|
+
free_handle
|
64
|
+
construct
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
90
68
|
def num_data
|
91
69
|
out = ::FFI::MemoryPointer.new(:int)
|
92
70
|
check_result FFI.LGBM_DatasetGetNumData(handle_pointer, out)
|
@@ -124,6 +102,61 @@ module LightGBM
|
|
124
102
|
|
125
103
|
private
|
126
104
|
|
105
|
+
def construct
|
106
|
+
data = @data
|
107
|
+
used_indices = @used_indices
|
108
|
+
|
109
|
+
# TODO stringify params
|
110
|
+
params = @params || {}
|
111
|
+
if @categorical_feature != "auto" && @categorical_feature.any?
|
112
|
+
params["categorical_feature"] ||= @categorical_feature.join(",")
|
113
|
+
end
|
114
|
+
set_verbosity(params)
|
115
|
+
|
116
|
+
@handle = ::FFI::MemoryPointer.new(:pointer)
|
117
|
+
parameters = params_str(params)
|
118
|
+
reference = @reference.handle_pointer if @reference
|
119
|
+
if used_indices
|
120
|
+
used_row_indices = ::FFI::MemoryPointer.new(:int32, used_indices.count)
|
121
|
+
used_row_indices.write_array_of_int32(used_indices)
|
122
|
+
check_result FFI.LGBM_DatasetGetSubset(reference, used_row_indices, used_indices.count, parameters, @handle)
|
123
|
+
elsif data.is_a?(String)
|
124
|
+
check_result FFI.LGBM_DatasetCreateFromFile(data, parameters, reference, @handle)
|
125
|
+
else
|
126
|
+
if matrix?(data)
|
127
|
+
nrow = data.row_count
|
128
|
+
ncol = data.column_count
|
129
|
+
flat_data = data.to_a.flatten
|
130
|
+
elsif daru?(data)
|
131
|
+
nrow, ncol = data.shape
|
132
|
+
flat_data = data.map_rows(&:to_a).flatten
|
133
|
+
elsif narray?(data)
|
134
|
+
nrow, ncol = data.shape
|
135
|
+
flat_data = data.flatten.to_a
|
136
|
+
else
|
137
|
+
nrow = data.count
|
138
|
+
ncol = data.first.count
|
139
|
+
flat_data = data.flatten
|
140
|
+
end
|
141
|
+
|
142
|
+
handle_missing(flat_data)
|
143
|
+
c_data = ::FFI::MemoryPointer.new(:double, nrow * ncol)
|
144
|
+
c_data.write_array_of_double(flat_data)
|
145
|
+
check_result FFI.LGBM_DatasetCreateFromMat(c_data, 1, nrow, ncol, 1, parameters, reference, @handle)
|
146
|
+
end
|
147
|
+
ObjectSpace.define_finalizer(self, self.class.finalize(handle_pointer)) unless used_indices
|
148
|
+
|
149
|
+
self.label = @label if @label
|
150
|
+
self.weight = @weight if @weight
|
151
|
+
self.group = @group if @group
|
152
|
+
self.feature_names = @feature_names if @feature_names
|
153
|
+
end
|
154
|
+
|
155
|
+
def free_handle
|
156
|
+
FFI.LGBM_DatasetFree(handle_pointer)
|
157
|
+
ObjectSpace.undefine_finalizer(self)
|
158
|
+
end
|
159
|
+
|
127
160
|
def dump_text(filename)
|
128
161
|
check_result FFI.LGBM_DatasetDumpText(handle_pointer, filename)
|
129
162
|
end
|
data/lib/lightgbm/ffi.rb
CHANGED
@@ -5,11 +5,11 @@ module LightGBM
|
|
5
5
|
begin
|
6
6
|
ffi_lib LightGBM.ffi_lib
|
7
7
|
rescue LoadError => e
|
8
|
-
|
9
|
-
|
10
|
-
|
8
|
+
if e.message.include?("Library not loaded: /usr/local/opt/libomp/lib/libomp.dylib") && e.message.include?("Reason: image not found")
|
9
|
+
raise LoadError, "OpenMP not found. Run `brew install libomp`"
|
10
|
+
else
|
11
|
+
raise e
|
11
12
|
end
|
12
|
-
raise LoadError, "Could not find LightGBM"
|
13
13
|
end
|
14
14
|
|
15
15
|
# https://github.com/microsoft/LightGBM/blob/master/include/LightGBM/c_api.h
|
data/lib/lightgbm/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: lightgbm
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2020-05-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ffi
|
@@ -122,8 +122,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
122
122
|
- !ruby/object:Gem::Version
|
123
123
|
version: '0'
|
124
124
|
requirements: []
|
125
|
-
rubygems_version: 3.
|
125
|
+
rubygems_version: 3.1.2
|
126
126
|
signing_key:
|
127
127
|
specification_version: 4
|
128
|
-
summary:
|
128
|
+
summary: High performance gradient boosting for Ruby
|
129
129
|
test_files: []
|