lightgbm 0.1.4 → 0.1.9
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +32 -5
- data/LICENSE.txt +22 -0
- data/README.md +42 -21
- data/lib/lightgbm.rb +17 -1
- data/lib/lightgbm/booster.rb +30 -18
- data/lib/lightgbm/classifier.rb +2 -2
- data/lib/lightgbm/dataset.rb +105 -60
- data/lib/lightgbm/ffi.rb +8 -3
- data/lib/lightgbm/regressor.rb +2 -2
- data/lib/lightgbm/utils.rb +21 -0
- data/lib/lightgbm/version.rb +1 -1
- data/vendor/LICENSE +21 -0
- data/vendor/lib_lightgbm.dll +0 -0
- data/vendor/lib_lightgbm.dylib +0 -0
- data/vendor/lib_lightgbm.so +0 -0
- metadata +9 -18
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a4aac9eac1ab0dadbf31d1e1fc2714e75a8c37075538aef5c53b42df1c34f658
|
4
|
+
data.tar.gz: ca3a1043c55184992b3fac611963062d01747449f6170719ef1f299d4f0474c9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c2f14ccc3b40690060d2ee533cfe46e137f40e91520aab3eb188a8a03a697a6956853b717e743cd357d0a059836fc32e9e0aa0fbe5a7dd4263b1ff3e94e79601
|
7
|
+
data.tar.gz: 64abdb43f4c45222dcbb39ddde1d21350b2b4958439dc84d8f13684f4e9bcd3aa2c0e49aa8fd9ae1dfc09b8bc0ce69592e95aab0542684f3c952e9e67ac4689f
|
data/CHANGELOG.md
CHANGED
@@ -1,4 +1,31 @@
|
|
1
|
-
## 0.1.
|
1
|
+
## 0.1.9 (2020-06-10)
|
2
|
+
|
3
|
+
- Added support for Rover
|
4
|
+
- Improved performance of Numo datasets
|
5
|
+
|
6
|
+
## 0.1.8 (2020-05-09)
|
7
|
+
|
8
|
+
- Improved error message when OpenMP not found on Mac
|
9
|
+
- Fixed `Cannot add validation data` error
|
10
|
+
|
11
|
+
## 0.1.7 (2019-12-05)
|
12
|
+
|
13
|
+
- Updated LightGBM to 2.3.1
|
14
|
+
- Switched to doubles for datasets and predictions
|
15
|
+
|
16
|
+
## 0.1.6 (2019-09-29)
|
17
|
+
|
18
|
+
- Updated LightGBM to 2.3.0
|
19
|
+
- Fixed error with JRuby
|
20
|
+
|
21
|
+
## 0.1.5 (2019-09-03)
|
22
|
+
|
23
|
+
- Packaged LightGBM with gem
|
24
|
+
- Added support for missing values
|
25
|
+
- Added `feature_names` to datasets
|
26
|
+
- Fixed Daru training and prediction
|
27
|
+
|
28
|
+
## 0.1.4 (2019-08-19)
|
2
29
|
|
3
30
|
- Friendlier message when LightGBM not found
|
4
31
|
- Added `Ranker`
|
@@ -6,22 +33,22 @@
|
|
6
33
|
- Free memory when objects are destroyed
|
7
34
|
- Removed unreleased `dump_text` method
|
8
35
|
|
9
|
-
## 0.1.3
|
36
|
+
## 0.1.3 (2019-08-16)
|
10
37
|
|
11
38
|
- Added Scikit-Learn API
|
12
39
|
- Added support for Daru and Numo::NArray
|
13
40
|
|
14
|
-
## 0.1.2
|
41
|
+
## 0.1.2 (2019-08-15)
|
15
42
|
|
16
43
|
- Added `cv` method
|
17
44
|
- Added early stopping
|
18
45
|
- Fixed multiclass classification
|
19
46
|
|
20
|
-
## 0.1.1
|
47
|
+
## 0.1.1 (2019-08-14)
|
21
48
|
|
22
49
|
- Added training API
|
23
50
|
- Added many methods
|
24
51
|
|
25
|
-
## 0.1.0
|
52
|
+
## 0.1.0 (2019-08-13)
|
26
53
|
|
27
54
|
- First release
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2019 Andrew Kane
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
CHANGED
@@ -1,45 +1,44 @@
|
|
1
1
|
# LightGBM
|
2
2
|
|
3
|
-
[LightGBM](https://github.com/microsoft/LightGBM) -
|
4
|
-
|
5
|
-
:fire: Uses the C API for blazing performance
|
3
|
+
[LightGBM](https://github.com/microsoft/LightGBM) - high performance gradient boosting - for Ruby
|
6
4
|
|
7
5
|
[![Build Status](https://travis-ci.org/ankane/lightgbm.svg?branch=master)](https://travis-ci.org/ankane/lightgbm)
|
8
6
|
|
9
7
|
## Installation
|
10
8
|
|
11
|
-
First, [install LightGBM](https://lightgbm.readthedocs.io/en/latest/Installation-Guide.html). On Mac, copy `lib_lightgbm.so` to `/usr/local/lib`.
|
12
|
-
|
13
9
|
Add this line to your application’s Gemfile:
|
14
10
|
|
15
11
|
```ruby
|
16
12
|
gem 'lightgbm'
|
17
13
|
```
|
18
14
|
|
19
|
-
|
15
|
+
On Mac, also install OpenMP:
|
20
16
|
|
21
|
-
|
17
|
+
```sh
|
18
|
+
brew install libomp
|
19
|
+
```
|
22
20
|
|
23
|
-
|
24
|
-
- The default verbosity is `-1`
|
25
|
-
- With the `cv` method, `stratified` is set to `false`
|
21
|
+
## Training API
|
26
22
|
|
27
|
-
|
23
|
+
Prep your data
|
28
24
|
|
29
|
-
|
25
|
+
```ruby
|
26
|
+
x = [[1, 2], [3, 4], [5, 6], [7, 8]]
|
27
|
+
y = [1, 2, 3, 4]
|
28
|
+
```
|
30
29
|
|
31
30
|
Train a model
|
32
31
|
|
33
32
|
```ruby
|
34
33
|
params = {objective: "regression"}
|
35
|
-
train_set = LightGBM::Dataset.new(
|
34
|
+
train_set = LightGBM::Dataset.new(x, label: y)
|
36
35
|
booster = LightGBM.train(params, train_set)
|
37
36
|
```
|
38
37
|
|
39
38
|
Predict
|
40
39
|
|
41
40
|
```ruby
|
42
|
-
booster.predict(
|
41
|
+
booster.predict(x)
|
43
42
|
```
|
44
43
|
|
45
44
|
Save the model to a file
|
@@ -130,16 +129,22 @@ Data can be an array of arrays
|
|
130
129
|
[[1, 2, 3], [4, 5, 6]]
|
131
130
|
```
|
132
131
|
|
133
|
-
Or a
|
132
|
+
Or a Numo NArray
|
134
133
|
|
135
134
|
```ruby
|
136
|
-
|
135
|
+
Numo::NArray.cast([[1, 2, 3], [4, 5, 6]])
|
137
136
|
```
|
138
137
|
|
139
|
-
Or a
|
138
|
+
Or a Rover data frame
|
139
|
+
|
140
|
+
```ruby
|
141
|
+
Rover.read_csv("houses.csv")
|
142
|
+
```
|
143
|
+
|
144
|
+
Or a Daru data frame
|
140
145
|
|
141
146
|
```ruby
|
142
|
-
|
147
|
+
Daru::DataFrame.from_csv("houses.csv")
|
143
148
|
```
|
144
149
|
|
145
150
|
## Helpful Resources
|
@@ -149,12 +154,18 @@ Numo::DFloat.new(3, 2).seq
|
|
149
154
|
|
150
155
|
## Related Projects
|
151
156
|
|
152
|
-
- [
|
153
|
-
- [Eps](https://github.com/ankane/eps) - Machine
|
157
|
+
- [XGBoost](https://github.com/ankane/xgboost) - XGBoost for Ruby
|
158
|
+
- [Eps](https://github.com/ankane/eps) - Machine learning for Ruby
|
154
159
|
|
155
160
|
## Credits
|
156
161
|
|
157
|
-
|
162
|
+
This library follows the [Python API](https://lightgbm.readthedocs.io/en/latest/Python-API.html). A few differences are:
|
163
|
+
|
164
|
+
- The `get_` and `set_` prefixes are removed from methods
|
165
|
+
- The default verbosity is `-1`
|
166
|
+
- With the `cv` method, `stratified` is set to `false`
|
167
|
+
|
168
|
+
Thanks to the [xgboost](https://github.com/PairOnAir/xgboost-ruby) gem for showing how to use FFI.
|
158
169
|
|
159
170
|
## History
|
160
171
|
|
@@ -168,3 +179,13 @@ Everyone is encouraged to help improve this project. Here are a few ways you can
|
|
168
179
|
- Fix bugs and [submit pull requests](https://github.com/ankane/lightgbm/pulls)
|
169
180
|
- Write, clarify, or fix documentation
|
170
181
|
- Suggest or add new features
|
182
|
+
|
183
|
+
To get started with development:
|
184
|
+
|
185
|
+
```sh
|
186
|
+
git clone https://github.com/ankane/lightgbm.git
|
187
|
+
cd lightgbm
|
188
|
+
bundle install
|
189
|
+
bundle exec rake vendor:all
|
190
|
+
bundle exec rake test
|
191
|
+
```
|
data/lib/lightgbm.rb
CHANGED
@@ -20,7 +20,8 @@ module LightGBM
|
|
20
20
|
attr_accessor :ffi_lib
|
21
21
|
end
|
22
22
|
lib_name = "lib_lightgbm.#{::FFI::Platform::LIBSUFFIX}"
|
23
|
-
|
23
|
+
vendor_lib = File.expand_path("../vendor/#{lib_name}", __dir__)
|
24
|
+
self.ffi_lib = [lib_name, "lib_lightgbm.so", vendor_lib]
|
24
25
|
|
25
26
|
# friendlier error message
|
26
27
|
autoload :FFI, "lightgbm/ffi"
|
@@ -35,10 +36,14 @@ module LightGBM
|
|
35
36
|
booster.train_data_name = name || "training"
|
36
37
|
valid_contain_train = true
|
37
38
|
else
|
39
|
+
# ensure the validation set references the training set
|
40
|
+
data.reference = train_set
|
38
41
|
booster.add_valid(data, name || "valid_#{i}")
|
39
42
|
end
|
40
43
|
end
|
41
44
|
|
45
|
+
raise ArgumentError, "For early stopping, at least one validation set is required" if early_stopping_rounds && !valid_sets.any? { |v| v != train_set }
|
46
|
+
|
42
47
|
booster.best_iteration = 0
|
43
48
|
|
44
49
|
if early_stopping_rounds
|
@@ -130,6 +135,7 @@ module LightGBM
|
|
130
135
|
if early_stopping_rounds
|
131
136
|
best_score = {}
|
132
137
|
best_iter = {}
|
138
|
+
best_iteration = nil
|
133
139
|
end
|
134
140
|
|
135
141
|
num_boost_round.times do |iteration|
|
@@ -169,6 +175,7 @@ module LightGBM
|
|
169
175
|
best_score[k] = score
|
170
176
|
best_iter[k] = iteration
|
171
177
|
elsif iteration - best_iter[k] >= early_stopping_rounds
|
178
|
+
best_iteration = best_iter[k]
|
172
179
|
stop_early = true
|
173
180
|
break
|
174
181
|
end
|
@@ -177,6 +184,15 @@ module LightGBM
|
|
177
184
|
end
|
178
185
|
end
|
179
186
|
|
187
|
+
if early_stopping_rounds
|
188
|
+
# use best iteration from first metric if not stopped early
|
189
|
+
best_iteration ||= best_iter[best_iter.keys.first]
|
190
|
+
eval_hist.each_key do |k|
|
191
|
+
# TODO uncomment for 0.2.0
|
192
|
+
# eval_hist[k] = eval_hist[k].first(best_iteration + 1)
|
193
|
+
end
|
194
|
+
end
|
195
|
+
|
180
196
|
eval_hist
|
181
197
|
end
|
182
198
|
|
data/lib/lightgbm/booster.rb
CHANGED
@@ -30,7 +30,7 @@ module LightGBM
|
|
30
30
|
|
31
31
|
def current_iteration
|
32
32
|
out = ::FFI::MemoryPointer.new(:int)
|
33
|
-
check_result FFI
|
33
|
+
check_result FFI.LGBM_BoosterGetCurrentIteration(handle_pointer, out)
|
34
34
|
out.read_int
|
35
35
|
end
|
36
36
|
|
@@ -38,11 +38,11 @@ module LightGBM
|
|
38
38
|
num_iteration ||= best_iteration
|
39
39
|
buffer_len = 1 << 20
|
40
40
|
out_len = ::FFI::MemoryPointer.new(:int64)
|
41
|
-
out_str = ::FFI::MemoryPointer.new(:
|
41
|
+
out_str = ::FFI::MemoryPointer.new(:char, buffer_len)
|
42
42
|
check_result FFI.LGBM_BoosterDumpModel(handle_pointer, start_iteration, num_iteration, buffer_len, out_len, out_str)
|
43
|
-
actual_len = out_len
|
43
|
+
actual_len = read_int64(out_len)
|
44
44
|
if actual_len > buffer_len
|
45
|
-
out_str = ::FFI::MemoryPointer.new(:
|
45
|
+
out_str = ::FFI::MemoryPointer.new(:char, actual_len)
|
46
46
|
check_result FFI.LGBM_BoosterDumpModel(handle_pointer, start_iteration, num_iteration, actual_len, out_len, out_str)
|
47
47
|
end
|
48
48
|
out_str.read_string
|
@@ -85,11 +85,11 @@ module LightGBM
|
|
85
85
|
num_iteration ||= best_iteration
|
86
86
|
buffer_len = 1 << 20
|
87
87
|
out_len = ::FFI::MemoryPointer.new(:int64)
|
88
|
-
out_str = ::FFI::MemoryPointer.new(:
|
88
|
+
out_str = ::FFI::MemoryPointer.new(:char, buffer_len)
|
89
89
|
check_result FFI.LGBM_BoosterSaveModelToString(handle_pointer, start_iteration, num_iteration, buffer_len, out_len, out_str)
|
90
|
-
actual_len = out_len
|
90
|
+
actual_len = read_int64(out_len)
|
91
91
|
if actual_len > buffer_len
|
92
|
-
out_str = ::FFI::MemoryPointer.new(:
|
92
|
+
out_str = ::FFI::MemoryPointer.new(:char, actual_len)
|
93
93
|
check_result FFI.LGBM_BoosterSaveModelToString(handle_pointer, start_iteration, num_iteration, actual_len, out_len, out_str)
|
94
94
|
end
|
95
95
|
out_str.read_string
|
@@ -104,19 +104,24 @@ module LightGBM
|
|
104
104
|
|
105
105
|
def num_model_per_iteration
|
106
106
|
out = ::FFI::MemoryPointer.new(:int)
|
107
|
-
check_result FFI
|
107
|
+
check_result FFI.LGBM_BoosterNumModelPerIteration(handle_pointer, out)
|
108
108
|
out.read_int
|
109
109
|
end
|
110
110
|
|
111
111
|
def num_trees
|
112
112
|
out = ::FFI::MemoryPointer.new(:int)
|
113
|
-
check_result FFI
|
113
|
+
check_result FFI.LGBM_BoosterNumberOfTotalModel(handle_pointer, out)
|
114
114
|
out.read_int
|
115
115
|
end
|
116
116
|
|
117
117
|
# TODO support different prediction types
|
118
118
|
def predict(input, num_iteration: nil, **params)
|
119
|
-
|
119
|
+
input =
|
120
|
+
if daru?(input)
|
121
|
+
input.map_rows(&:to_a)
|
122
|
+
else
|
123
|
+
input.to_a
|
124
|
+
end
|
120
125
|
|
121
126
|
singular = !input.first.is_a?(Array)
|
122
127
|
input = [input] if singular
|
@@ -124,13 +129,15 @@ module LightGBM
|
|
124
129
|
num_iteration ||= best_iteration
|
125
130
|
num_class ||= num_class()
|
126
131
|
|
127
|
-
|
128
|
-
|
132
|
+
flat_input = input.flatten
|
133
|
+
handle_missing(flat_input)
|
134
|
+
data = ::FFI::MemoryPointer.new(:double, input.count * input.first.count)
|
135
|
+
data.write_array_of_double(flat_input)
|
129
136
|
|
130
137
|
out_len = ::FFI::MemoryPointer.new(:int64)
|
131
138
|
out_result = ::FFI::MemoryPointer.new(:double, num_class * input.count)
|
132
|
-
check_result FFI.LGBM_BoosterPredictForMat(handle_pointer, data,
|
133
|
-
out = out_result.read_array_of_double(out_len
|
139
|
+
check_result FFI.LGBM_BoosterPredictForMat(handle_pointer, data, 1, input.count, input.first.count, 1, 0, num_iteration, params_str(params), out_len, out_result)
|
140
|
+
out = out_result.read_array_of_double(read_int64(out_len))
|
134
141
|
out = out.each_slice(num_class).to_a if num_class > 1
|
135
142
|
|
136
143
|
singular ? out.first : out
|
@@ -161,7 +168,7 @@ module LightGBM
|
|
161
168
|
|
162
169
|
def eval_counts
|
163
170
|
out = ::FFI::MemoryPointer.new(:int)
|
164
|
-
check_result FFI
|
171
|
+
check_result FFI.LGBM_BoosterGetEvalCounts(handle_pointer, out)
|
165
172
|
out.read_int
|
166
173
|
end
|
167
174
|
|
@@ -169,8 +176,8 @@ module LightGBM
|
|
169
176
|
eval_counts ||= eval_counts()
|
170
177
|
out_len = ::FFI::MemoryPointer.new(:int)
|
171
178
|
out_strs = ::FFI::MemoryPointer.new(:pointer, eval_counts)
|
172
|
-
str_ptrs = eval_counts.times.map { ::FFI::MemoryPointer.new(:
|
173
|
-
out_strs.
|
179
|
+
str_ptrs = eval_counts.times.map { ::FFI::MemoryPointer.new(:char, 255) }
|
180
|
+
out_strs.write_array_of_pointer(str_ptrs)
|
174
181
|
check_result FFI.LGBM_BoosterGetEvalNames(handle_pointer, out_len, out_strs)
|
175
182
|
str_ptrs.map(&:read_string)
|
176
183
|
end
|
@@ -191,10 +198,15 @@ module LightGBM
|
|
191
198
|
|
192
199
|
def num_class
|
193
200
|
out = ::FFI::MemoryPointer.new(:int)
|
194
|
-
check_result FFI
|
201
|
+
check_result FFI.LGBM_BoosterGetNumClasses(handle_pointer, out)
|
195
202
|
out.read_int
|
196
203
|
end
|
197
204
|
|
205
|
+
# read_int64 not available on JRuby
|
206
|
+
def read_int64(ptr)
|
207
|
+
ptr.read_array_of_int64(1).first
|
208
|
+
end
|
209
|
+
|
198
210
|
include Utils
|
199
211
|
end
|
200
212
|
end
|
data/lib/lightgbm/classifier.rb
CHANGED
@@ -15,8 +15,8 @@ module LightGBM
|
|
15
15
|
params[:objective] ||= "binary"
|
16
16
|
end
|
17
17
|
|
18
|
-
train_set = Dataset.new(x, label: y, categorical_feature: categorical_feature)
|
19
|
-
valid_sets = Array(eval_set).map { |v| Dataset.new(v[0], label: v[1], reference: train_set) }
|
18
|
+
train_set = Dataset.new(x, label: y, categorical_feature: categorical_feature, params: params)
|
19
|
+
valid_sets = Array(eval_set).map { |v| Dataset.new(v[0], label: v[1], reference: train_set, params: params) }
|
20
20
|
|
21
21
|
@booster = LightGBM.train(params, train_set,
|
22
22
|
num_boost_round: @n_estimators,
|
data/lib/lightgbm/dataset.rb
CHANGED
@@ -2,49 +2,18 @@ module LightGBM
|
|
2
2
|
class Dataset
|
3
3
|
attr_reader :data, :params
|
4
4
|
|
5
|
-
def initialize(data, label: nil, weight: nil, group: nil, params: nil, reference: nil, used_indices: nil, categorical_feature: "auto")
|
5
|
+
def initialize(data, label: nil, weight: nil, group: nil, params: nil, reference: nil, used_indices: nil, categorical_feature: "auto", feature_names: nil)
|
6
6
|
@data = data
|
7
|
+
@label = label
|
8
|
+
@weight = weight
|
9
|
+
@group = group
|
10
|
+
@params = params
|
11
|
+
@reference = reference
|
12
|
+
@used_indices = used_indices
|
13
|
+
@categorical_feature = categorical_feature
|
14
|
+
@feature_names = feature_names
|
7
15
|
|
8
|
-
|
9
|
-
params ||= {}
|
10
|
-
params["categorical_feature"] ||= categorical_feature.join(",") if categorical_feature != "auto"
|
11
|
-
set_verbosity(params)
|
12
|
-
|
13
|
-
@handle = ::FFI::MemoryPointer.new(:pointer)
|
14
|
-
parameters = params_str(params)
|
15
|
-
reference = reference.handle_pointer if reference
|
16
|
-
if used_indices
|
17
|
-
used_row_indices = ::FFI::MemoryPointer.new(:int32, used_indices.count)
|
18
|
-
used_row_indices.put_array_of_int32(0, used_indices)
|
19
|
-
check_result FFI.LGBM_DatasetGetSubset(reference, used_row_indices, used_indices.count, parameters, @handle)
|
20
|
-
elsif data.is_a?(String)
|
21
|
-
check_result FFI.LGBM_DatasetCreateFromFile(data, parameters, reference, @handle)
|
22
|
-
else
|
23
|
-
if matrix?(data)
|
24
|
-
nrow = data.row_count
|
25
|
-
ncol = data.column_count
|
26
|
-
flat_data = data.to_a.flatten
|
27
|
-
elsif daru?(data)
|
28
|
-
nrow, ncol = data.shape
|
29
|
-
flat_data = data.each_vector.map(&:to_a).flatten
|
30
|
-
elsif narray?(data)
|
31
|
-
nrow, ncol = data.shape
|
32
|
-
flat_data = data.flatten.to_a
|
33
|
-
else
|
34
|
-
nrow = data.count
|
35
|
-
ncol = data.first.count
|
36
|
-
flat_data = data.flatten
|
37
|
-
end
|
38
|
-
|
39
|
-
c_data = ::FFI::MemoryPointer.new(:float, nrow * ncol)
|
40
|
-
c_data.put_array_of_float(0, flat_data)
|
41
|
-
check_result FFI.LGBM_DatasetCreateFromMat(c_data, 0, nrow, ncol, 1, parameters, reference, @handle)
|
42
|
-
end
|
43
|
-
ObjectSpace.define_finalizer(self, self.class.finalize(handle_pointer)) unless used_indices
|
44
|
-
|
45
|
-
self.label = label if label
|
46
|
-
self.weight = weight if weight
|
47
|
-
self.group = group if group
|
16
|
+
construct
|
48
17
|
end
|
49
18
|
|
50
19
|
def label
|
@@ -55,18 +24,47 @@ module LightGBM
|
|
55
24
|
field("weight")
|
56
25
|
end
|
57
26
|
|
27
|
+
def feature_names
|
28
|
+
# must preallocate space
|
29
|
+
num_feature_names = ::FFI::MemoryPointer.new(:int)
|
30
|
+
out_strs = ::FFI::MemoryPointer.new(:pointer, 1000)
|
31
|
+
str_ptrs = 1000.times.map { ::FFI::MemoryPointer.new(:char, 255) }
|
32
|
+
out_strs.write_array_of_pointer(str_ptrs)
|
33
|
+
check_result FFI.LGBM_DatasetGetFeatureNames(handle_pointer, out_strs, num_feature_names)
|
34
|
+
str_ptrs[0, num_feature_names.read_int].map(&:read_string)
|
35
|
+
end
|
36
|
+
|
58
37
|
def label=(label)
|
38
|
+
@label = label
|
59
39
|
set_field("label", label)
|
60
40
|
end
|
61
41
|
|
62
42
|
def weight=(weight)
|
43
|
+
@weight = weight
|
63
44
|
set_field("weight", weight)
|
64
45
|
end
|
65
46
|
|
66
47
|
def group=(group)
|
48
|
+
@group = group
|
67
49
|
set_field("group", group, type: :int32)
|
68
50
|
end
|
69
51
|
|
52
|
+
def feature_names=(feature_names)
|
53
|
+
@feature_names = feature_names
|
54
|
+
c_feature_names = ::FFI::MemoryPointer.new(:pointer, feature_names.size)
|
55
|
+
c_feature_names.write_array_of_pointer(feature_names.map { |v| ::FFI::MemoryPointer.from_string(v) })
|
56
|
+
check_result FFI.LGBM_DatasetSetFeatureNames(handle_pointer, c_feature_names, feature_names.size)
|
57
|
+
end
|
58
|
+
|
59
|
+
# TODO only update reference if not in chain
|
60
|
+
def reference=(reference)
|
61
|
+
if reference != @reference
|
62
|
+
@reference = reference
|
63
|
+
free_handle
|
64
|
+
construct
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
70
68
|
def num_data
|
71
69
|
out = ::FFI::MemoryPointer.new(:int)
|
72
70
|
check_result FFI.LGBM_DatasetGetNumData(handle_pointer, out)
|
@@ -83,11 +81,6 @@ module LightGBM
|
|
83
81
|
check_result FFI.LGBM_DatasetSaveBinary(handle_pointer, filename)
|
84
82
|
end
|
85
83
|
|
86
|
-
# not released yet
|
87
|
-
# def dump_text(filename)
|
88
|
-
# check_result FFI.LGBM_DatasetDumpText(handle_pointer, filename)
|
89
|
-
# end
|
90
|
-
|
91
84
|
def subset(used_indices, params: nil)
|
92
85
|
# categorical_feature passed via params
|
93
86
|
params ||= self.params
|
@@ -109,6 +102,70 @@ module LightGBM
|
|
109
102
|
|
110
103
|
private
|
111
104
|
|
105
|
+
def construct
|
106
|
+
data = @data
|
107
|
+
used_indices = @used_indices
|
108
|
+
|
109
|
+
# TODO stringify params
|
110
|
+
params = @params || {}
|
111
|
+
if @categorical_feature != "auto" && @categorical_feature.any?
|
112
|
+
params["categorical_feature"] ||= @categorical_feature.join(",")
|
113
|
+
end
|
114
|
+
set_verbosity(params)
|
115
|
+
|
116
|
+
@handle = ::FFI::MemoryPointer.new(:pointer)
|
117
|
+
parameters = params_str(params)
|
118
|
+
reference = @reference.handle_pointer if @reference
|
119
|
+
if used_indices
|
120
|
+
used_row_indices = ::FFI::MemoryPointer.new(:int32, used_indices.count)
|
121
|
+
used_row_indices.write_array_of_int32(used_indices)
|
122
|
+
check_result FFI.LGBM_DatasetGetSubset(reference, used_row_indices, used_indices.count, parameters, @handle)
|
123
|
+
elsif data.is_a?(String)
|
124
|
+
check_result FFI.LGBM_DatasetCreateFromFile(data, parameters, reference, @handle)
|
125
|
+
else
|
126
|
+
if matrix?(data)
|
127
|
+
nrow = data.row_count
|
128
|
+
ncol = data.column_count
|
129
|
+
flat_data = data.to_a.flatten
|
130
|
+
elsif daru?(data)
|
131
|
+
nrow, ncol = data.shape
|
132
|
+
flat_data = data.map_rows(&:to_a).flatten
|
133
|
+
elsif numo?(data) || rover?(data)
|
134
|
+
data = data.to_numo if rover?(data)
|
135
|
+
nrow, ncol = data.shape
|
136
|
+
else
|
137
|
+
nrow = data.count
|
138
|
+
ncol = data.first.count
|
139
|
+
flat_data = data.flatten
|
140
|
+
end
|
141
|
+
|
142
|
+
c_data = ::FFI::MemoryPointer.new(:double, nrow * ncol)
|
143
|
+
if numo?(data)
|
144
|
+
c_data.write_bytes(data.cast_to(Numo::DFloat).to_string)
|
145
|
+
else
|
146
|
+
handle_missing(flat_data)
|
147
|
+
c_data.write_array_of_double(flat_data)
|
148
|
+
end
|
149
|
+
|
150
|
+
check_result FFI.LGBM_DatasetCreateFromMat(c_data, 1, nrow, ncol, 1, parameters, reference, @handle)
|
151
|
+
end
|
152
|
+
ObjectSpace.define_finalizer(self, self.class.finalize(handle_pointer)) unless used_indices
|
153
|
+
|
154
|
+
self.label = @label if @label
|
155
|
+
self.weight = @weight if @weight
|
156
|
+
self.group = @group if @group
|
157
|
+
self.feature_names = @feature_names if @feature_names
|
158
|
+
end
|
159
|
+
|
160
|
+
def free_handle
|
161
|
+
FFI.LGBM_DatasetFree(handle_pointer)
|
162
|
+
ObjectSpace.undefine_finalizer(self)
|
163
|
+
end
|
164
|
+
|
165
|
+
def dump_text(filename)
|
166
|
+
check_result FFI.LGBM_DatasetDumpText(handle_pointer, filename)
|
167
|
+
end
|
168
|
+
|
112
169
|
def field(field_name)
|
113
170
|
num_data = self.num_data
|
114
171
|
out_len = ::FFI::MemoryPointer.new(:int)
|
@@ -122,27 +179,15 @@ module LightGBM
|
|
122
179
|
data = data.to_a unless data.is_a?(Array)
|
123
180
|
if type == :int32
|
124
181
|
c_data = ::FFI::MemoryPointer.new(:int32, data.count)
|
125
|
-
c_data.
|
182
|
+
c_data.write_array_of_int32(data)
|
126
183
|
check_result FFI.LGBM_DatasetSetField(handle_pointer, field_name, c_data, data.count, 2)
|
127
184
|
else
|
128
185
|
c_data = ::FFI::MemoryPointer.new(:float, data.count)
|
129
|
-
c_data.
|
186
|
+
c_data.write_array_of_float(data)
|
130
187
|
check_result FFI.LGBM_DatasetSetField(handle_pointer, field_name, c_data, data.count, 0)
|
131
188
|
end
|
132
189
|
end
|
133
190
|
|
134
|
-
def matrix?(data)
|
135
|
-
defined?(Matrix) && data.is_a?(Matrix)
|
136
|
-
end
|
137
|
-
|
138
|
-
def daru?(data)
|
139
|
-
defined?(Daru::DataFrame) && data.is_a?(Daru::DataFrame)
|
140
|
-
end
|
141
|
-
|
142
|
-
def narray?(data)
|
143
|
-
defined?(Numo::NArray) && data.is_a?(Numo::NArray)
|
144
|
-
end
|
145
|
-
|
146
191
|
include Utils
|
147
192
|
end
|
148
193
|
end
|
data/lib/lightgbm/ffi.rb
CHANGED
@@ -5,8 +5,11 @@ module LightGBM
|
|
5
5
|
begin
|
6
6
|
ffi_lib LightGBM.ffi_lib
|
7
7
|
rescue LoadError => e
|
8
|
-
|
9
|
-
|
8
|
+
if e.message.include?("Library not loaded: /usr/local/opt/libomp/lib/libomp.dylib") && e.message.include?("Reason: image not found")
|
9
|
+
raise LoadError, "OpenMP not found. Run `brew install libomp`"
|
10
|
+
else
|
11
|
+
raise e
|
12
|
+
end
|
10
13
|
end
|
11
14
|
|
12
15
|
# https://github.com/microsoft/LightGBM/blob/master/include/LightGBM/c_api.h
|
@@ -19,9 +22,11 @@ module LightGBM
|
|
19
22
|
attach_function :LGBM_DatasetCreateFromFile, %i[string string pointer pointer], :int
|
20
23
|
attach_function :LGBM_DatasetCreateFromMat, %i[pointer int int32 int32 int string pointer pointer], :int
|
21
24
|
attach_function :LGBM_DatasetGetSubset, %i[pointer pointer int32 string pointer], :int
|
25
|
+
attach_function :LGBM_DatasetSetFeatureNames, %i[pointer pointer int], :int
|
26
|
+
attach_function :LGBM_DatasetGetFeatureNames, %i[pointer pointer pointer], :int
|
22
27
|
attach_function :LGBM_DatasetFree, %i[pointer], :int
|
23
28
|
attach_function :LGBM_DatasetSaveBinary, %i[pointer string], :int
|
24
|
-
|
29
|
+
attach_function :LGBM_DatasetDumpText, %i[pointer string], :int
|
25
30
|
attach_function :LGBM_DatasetSetField, %i[pointer string pointer int int], :int
|
26
31
|
attach_function :LGBM_DatasetGetField, %i[pointer string pointer pointer pointer], :int
|
27
32
|
attach_function :LGBM_DatasetGetNumData, %i[pointer pointer], :int
|
data/lib/lightgbm/regressor.rb
CHANGED
@@ -5,8 +5,8 @@ module LightGBM
|
|
5
5
|
end
|
6
6
|
|
7
7
|
def fit(x, y, categorical_feature: "auto", eval_set: nil, eval_names: [], early_stopping_rounds: nil, verbose: true)
|
8
|
-
train_set = Dataset.new(x, label: y, categorical_feature: categorical_feature)
|
9
|
-
valid_sets = Array(eval_set).map { |v| Dataset.new(v[0], label: v[1], reference: train_set) }
|
8
|
+
train_set = Dataset.new(x, label: y, categorical_feature: categorical_feature, params: @params)
|
9
|
+
valid_sets = Array(eval_set).map { |v| Dataset.new(v[0], label: v[1], reference: train_set, params: @params) }
|
10
10
|
|
11
11
|
@booster = LightGBM.train(@params, train_set,
|
12
12
|
num_boost_round: @n_estimators,
|
data/lib/lightgbm/utils.rb
CHANGED
@@ -23,5 +23,26 @@ module LightGBM
|
|
23
23
|
params["verbosity"] = -1
|
24
24
|
end
|
25
25
|
end
|
26
|
+
|
27
|
+
# for categorical, NaN and negative value are the same
|
28
|
+
def handle_missing(data)
|
29
|
+
data.map! { |v| v.nil? ? Float::NAN : v }
|
30
|
+
end
|
31
|
+
|
32
|
+
def matrix?(data)
|
33
|
+
defined?(Matrix) && data.is_a?(Matrix)
|
34
|
+
end
|
35
|
+
|
36
|
+
def daru?(data)
|
37
|
+
defined?(Daru::DataFrame) && data.is_a?(Daru::DataFrame)
|
38
|
+
end
|
39
|
+
|
40
|
+
def numo?(data)
|
41
|
+
defined?(Numo::NArray) && data.is_a?(Numo::NArray)
|
42
|
+
end
|
43
|
+
|
44
|
+
def rover?(data)
|
45
|
+
defined?(Rover::DataFrame) && data.is_a?(Rover::DataFrame)
|
46
|
+
end
|
26
47
|
end
|
27
48
|
end
|
data/lib/lightgbm/version.rb
CHANGED
data/vendor/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) Microsoft Corporation
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
Binary file
|
Binary file
|
Binary file
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: lightgbm
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2020-06-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ffi
|
@@ -80,20 +80,6 @@ dependencies:
|
|
80
80
|
- - ">="
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: '0'
|
83
|
-
- !ruby/object:Gem::Dependency
|
84
|
-
name: numo-narray
|
85
|
-
requirement: !ruby/object:Gem::Requirement
|
86
|
-
requirements:
|
87
|
-
- - ">="
|
88
|
-
- !ruby/object:Gem::Version
|
89
|
-
version: '0'
|
90
|
-
type: :development
|
91
|
-
prerelease: false
|
92
|
-
version_requirements: !ruby/object:Gem::Requirement
|
93
|
-
requirements:
|
94
|
-
- - ">="
|
95
|
-
- !ruby/object:Gem::Version
|
96
|
-
version: '0'
|
97
83
|
description:
|
98
84
|
email: andrew@chartkick.com
|
99
85
|
executables: []
|
@@ -101,6 +87,7 @@ extensions: []
|
|
101
87
|
extra_rdoc_files: []
|
102
88
|
files:
|
103
89
|
- CHANGELOG.md
|
90
|
+
- LICENSE.txt
|
104
91
|
- README.md
|
105
92
|
- lib/lightgbm.rb
|
106
93
|
- lib/lightgbm/booster.rb
|
@@ -112,6 +99,10 @@ files:
|
|
112
99
|
- lib/lightgbm/regressor.rb
|
113
100
|
- lib/lightgbm/utils.rb
|
114
101
|
- lib/lightgbm/version.rb
|
102
|
+
- vendor/LICENSE
|
103
|
+
- vendor/lib_lightgbm.dll
|
104
|
+
- vendor/lib_lightgbm.dylib
|
105
|
+
- vendor/lib_lightgbm.so
|
115
106
|
homepage: https://github.com/ankane/lightgbm
|
116
107
|
licenses:
|
117
108
|
- MIT
|
@@ -131,8 +122,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
131
122
|
- !ruby/object:Gem::Version
|
132
123
|
version: '0'
|
133
124
|
requirements: []
|
134
|
-
rubygems_version: 3.
|
125
|
+
rubygems_version: 3.1.2
|
135
126
|
signing_key:
|
136
127
|
specification_version: 4
|
137
|
-
summary:
|
128
|
+
summary: High performance gradient boosting for Ruby
|
138
129
|
test_files: []
|