lightgbm 0.1.4 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +32 -5
- data/LICENSE.txt +22 -0
- data/README.md +42 -21
- data/lib/lightgbm.rb +17 -1
- data/lib/lightgbm/booster.rb +30 -18
- data/lib/lightgbm/classifier.rb +2 -2
- data/lib/lightgbm/dataset.rb +105 -60
- data/lib/lightgbm/ffi.rb +8 -3
- data/lib/lightgbm/regressor.rb +2 -2
- data/lib/lightgbm/utils.rb +21 -0
- data/lib/lightgbm/version.rb +1 -1
- data/vendor/LICENSE +21 -0
- data/vendor/lib_lightgbm.dll +0 -0
- data/vendor/lib_lightgbm.dylib +0 -0
- data/vendor/lib_lightgbm.so +0 -0
- metadata +9 -18
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a4aac9eac1ab0dadbf31d1e1fc2714e75a8c37075538aef5c53b42df1c34f658
|
4
|
+
data.tar.gz: ca3a1043c55184992b3fac611963062d01747449f6170719ef1f299d4f0474c9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c2f14ccc3b40690060d2ee533cfe46e137f40e91520aab3eb188a8a03a697a6956853b717e743cd357d0a059836fc32e9e0aa0fbe5a7dd4263b1ff3e94e79601
|
7
|
+
data.tar.gz: 64abdb43f4c45222dcbb39ddde1d21350b2b4958439dc84d8f13684f4e9bcd3aa2c0e49aa8fd9ae1dfc09b8bc0ce69592e95aab0542684f3c952e9e67ac4689f
|
data/CHANGELOG.md
CHANGED
@@ -1,4 +1,31 @@
|
|
1
|
-
## 0.1.
|
1
|
+
## 0.1.9 (2020-06-10)
|
2
|
+
|
3
|
+
- Added support for Rover
|
4
|
+
- Improved performance of Numo datasets
|
5
|
+
|
6
|
+
## 0.1.8 (2020-05-09)
|
7
|
+
|
8
|
+
- Improved error message when OpenMP not found on Mac
|
9
|
+
- Fixed `Cannot add validation data` error
|
10
|
+
|
11
|
+
## 0.1.7 (2019-12-05)
|
12
|
+
|
13
|
+
- Updated LightGBM to 2.3.1
|
14
|
+
- Switched to doubles for datasets and predictions
|
15
|
+
|
16
|
+
## 0.1.6 (2019-09-29)
|
17
|
+
|
18
|
+
- Updated LightGBM to 2.3.0
|
19
|
+
- Fixed error with JRuby
|
20
|
+
|
21
|
+
## 0.1.5 (2019-09-03)
|
22
|
+
|
23
|
+
- Packaged LightGBM with gem
|
24
|
+
- Added support for missing values
|
25
|
+
- Added `feature_names` to datasets
|
26
|
+
- Fixed Daru training and prediction
|
27
|
+
|
28
|
+
## 0.1.4 (2019-08-19)
|
2
29
|
|
3
30
|
- Friendlier message when LightGBM not found
|
4
31
|
- Added `Ranker`
|
@@ -6,22 +33,22 @@
|
|
6
33
|
- Free memory when objects are destroyed
|
7
34
|
- Removed unreleased `dump_text` method
|
8
35
|
|
9
|
-
## 0.1.3
|
36
|
+
## 0.1.3 (2019-08-16)
|
10
37
|
|
11
38
|
- Added Scikit-Learn API
|
12
39
|
- Added support for Daru and Numo::NArray
|
13
40
|
|
14
|
-
## 0.1.2
|
41
|
+
## 0.1.2 (2019-08-15)
|
15
42
|
|
16
43
|
- Added `cv` method
|
17
44
|
- Added early stopping
|
18
45
|
- Fixed multiclass classification
|
19
46
|
|
20
|
-
## 0.1.1
|
47
|
+
## 0.1.1 (2019-08-14)
|
21
48
|
|
22
49
|
- Added training API
|
23
50
|
- Added many methods
|
24
51
|
|
25
|
-
## 0.1.0
|
52
|
+
## 0.1.0 (2019-08-13)
|
26
53
|
|
27
54
|
- First release
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2019 Andrew Kane
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
CHANGED
@@ -1,45 +1,44 @@
|
|
1
1
|
# LightGBM
|
2
2
|
|
3
|
-
[LightGBM](https://github.com/microsoft/LightGBM) -
|
4
|
-
|
5
|
-
:fire: Uses the C API for blazing performance
|
3
|
+
[LightGBM](https://github.com/microsoft/LightGBM) - high performance gradient boosting - for Ruby
|
6
4
|
|
7
5
|
[](https://travis-ci.org/ankane/lightgbm)
|
8
6
|
|
9
7
|
## Installation
|
10
8
|
|
11
|
-
First, [install LightGBM](https://lightgbm.readthedocs.io/en/latest/Installation-Guide.html). On Mac, copy `lib_lightgbm.so` to `/usr/local/lib`.
|
12
|
-
|
13
9
|
Add this line to your application’s Gemfile:
|
14
10
|
|
15
11
|
```ruby
|
16
12
|
gem 'lightgbm'
|
17
13
|
```
|
18
14
|
|
19
|
-
|
15
|
+
On Mac, also install OpenMP:
|
20
16
|
|
21
|
-
|
17
|
+
```sh
|
18
|
+
brew install libomp
|
19
|
+
```
|
22
20
|
|
23
|
-
|
24
|
-
- The default verbosity is `-1`
|
25
|
-
- With the `cv` method, `stratified` is set to `false`
|
21
|
+
## Training API
|
26
22
|
|
27
|
-
|
23
|
+
Prep your data
|
28
24
|
|
29
|
-
|
25
|
+
```ruby
|
26
|
+
x = [[1, 2], [3, 4], [5, 6], [7, 8]]
|
27
|
+
y = [1, 2, 3, 4]
|
28
|
+
```
|
30
29
|
|
31
30
|
Train a model
|
32
31
|
|
33
32
|
```ruby
|
34
33
|
params = {objective: "regression"}
|
35
|
-
train_set = LightGBM::Dataset.new(
|
34
|
+
train_set = LightGBM::Dataset.new(x, label: y)
|
36
35
|
booster = LightGBM.train(params, train_set)
|
37
36
|
```
|
38
37
|
|
39
38
|
Predict
|
40
39
|
|
41
40
|
```ruby
|
42
|
-
booster.predict(
|
41
|
+
booster.predict(x)
|
43
42
|
```
|
44
43
|
|
45
44
|
Save the model to a file
|
@@ -130,16 +129,22 @@ Data can be an array of arrays
|
|
130
129
|
[[1, 2, 3], [4, 5, 6]]
|
131
130
|
```
|
132
131
|
|
133
|
-
Or a
|
132
|
+
Or a Numo NArray
|
134
133
|
|
135
134
|
```ruby
|
136
|
-
|
135
|
+
Numo::NArray.cast([[1, 2, 3], [4, 5, 6]])
|
137
136
|
```
|
138
137
|
|
139
|
-
Or a
|
138
|
+
Or a Rover data frame
|
139
|
+
|
140
|
+
```ruby
|
141
|
+
Rover.read_csv("houses.csv")
|
142
|
+
```
|
143
|
+
|
144
|
+
Or a Daru data frame
|
140
145
|
|
141
146
|
```ruby
|
142
|
-
|
147
|
+
Daru::DataFrame.from_csv("houses.csv")
|
143
148
|
```
|
144
149
|
|
145
150
|
## Helpful Resources
|
@@ -149,12 +154,18 @@ Numo::DFloat.new(3, 2).seq
|
|
149
154
|
|
150
155
|
## Related Projects
|
151
156
|
|
152
|
-
- [
|
153
|
-
- [Eps](https://github.com/ankane/eps) - Machine
|
157
|
+
- [XGBoost](https://github.com/ankane/xgboost) - XGBoost for Ruby
|
158
|
+
- [Eps](https://github.com/ankane/eps) - Machine learning for Ruby
|
154
159
|
|
155
160
|
## Credits
|
156
161
|
|
157
|
-
|
162
|
+
This library follows the [Python API](https://lightgbm.readthedocs.io/en/latest/Python-API.html). A few differences are:
|
163
|
+
|
164
|
+
- The `get_` and `set_` prefixes are removed from methods
|
165
|
+
- The default verbosity is `-1`
|
166
|
+
- With the `cv` method, `stratified` is set to `false`
|
167
|
+
|
168
|
+
Thanks to the [xgboost](https://github.com/PairOnAir/xgboost-ruby) gem for showing how to use FFI.
|
158
169
|
|
159
170
|
## History
|
160
171
|
|
@@ -168,3 +179,13 @@ Everyone is encouraged to help improve this project. Here are a few ways you can
|
|
168
179
|
- Fix bugs and [submit pull requests](https://github.com/ankane/lightgbm/pulls)
|
169
180
|
- Write, clarify, or fix documentation
|
170
181
|
- Suggest or add new features
|
182
|
+
|
183
|
+
To get started with development:
|
184
|
+
|
185
|
+
```sh
|
186
|
+
git clone https://github.com/ankane/lightgbm.git
|
187
|
+
cd lightgbm
|
188
|
+
bundle install
|
189
|
+
bundle exec rake vendor:all
|
190
|
+
bundle exec rake test
|
191
|
+
```
|
data/lib/lightgbm.rb
CHANGED
@@ -20,7 +20,8 @@ module LightGBM
|
|
20
20
|
attr_accessor :ffi_lib
|
21
21
|
end
|
22
22
|
lib_name = "lib_lightgbm.#{::FFI::Platform::LIBSUFFIX}"
|
23
|
-
|
23
|
+
vendor_lib = File.expand_path("../vendor/#{lib_name}", __dir__)
|
24
|
+
self.ffi_lib = [lib_name, "lib_lightgbm.so", vendor_lib]
|
24
25
|
|
25
26
|
# friendlier error message
|
26
27
|
autoload :FFI, "lightgbm/ffi"
|
@@ -35,10 +36,14 @@ module LightGBM
|
|
35
36
|
booster.train_data_name = name || "training"
|
36
37
|
valid_contain_train = true
|
37
38
|
else
|
39
|
+
# ensure the validation set references the training set
|
40
|
+
data.reference = train_set
|
38
41
|
booster.add_valid(data, name || "valid_#{i}")
|
39
42
|
end
|
40
43
|
end
|
41
44
|
|
45
|
+
raise ArgumentError, "For early stopping, at least one validation set is required" if early_stopping_rounds && !valid_sets.any? { |v| v != train_set }
|
46
|
+
|
42
47
|
booster.best_iteration = 0
|
43
48
|
|
44
49
|
if early_stopping_rounds
|
@@ -130,6 +135,7 @@ module LightGBM
|
|
130
135
|
if early_stopping_rounds
|
131
136
|
best_score = {}
|
132
137
|
best_iter = {}
|
138
|
+
best_iteration = nil
|
133
139
|
end
|
134
140
|
|
135
141
|
num_boost_round.times do |iteration|
|
@@ -169,6 +175,7 @@ module LightGBM
|
|
169
175
|
best_score[k] = score
|
170
176
|
best_iter[k] = iteration
|
171
177
|
elsif iteration - best_iter[k] >= early_stopping_rounds
|
178
|
+
best_iteration = best_iter[k]
|
172
179
|
stop_early = true
|
173
180
|
break
|
174
181
|
end
|
@@ -177,6 +184,15 @@ module LightGBM
|
|
177
184
|
end
|
178
185
|
end
|
179
186
|
|
187
|
+
if early_stopping_rounds
|
188
|
+
# use best iteration from first metric if not stopped early
|
189
|
+
best_iteration ||= best_iter[best_iter.keys.first]
|
190
|
+
eval_hist.each_key do |k|
|
191
|
+
# TODO uncomment for 0.2.0
|
192
|
+
# eval_hist[k] = eval_hist[k].first(best_iteration + 1)
|
193
|
+
end
|
194
|
+
end
|
195
|
+
|
180
196
|
eval_hist
|
181
197
|
end
|
182
198
|
|
data/lib/lightgbm/booster.rb
CHANGED
@@ -30,7 +30,7 @@ module LightGBM
|
|
30
30
|
|
31
31
|
def current_iteration
|
32
32
|
out = ::FFI::MemoryPointer.new(:int)
|
33
|
-
check_result FFI
|
33
|
+
check_result FFI.LGBM_BoosterGetCurrentIteration(handle_pointer, out)
|
34
34
|
out.read_int
|
35
35
|
end
|
36
36
|
|
@@ -38,11 +38,11 @@ module LightGBM
|
|
38
38
|
num_iteration ||= best_iteration
|
39
39
|
buffer_len = 1 << 20
|
40
40
|
out_len = ::FFI::MemoryPointer.new(:int64)
|
41
|
-
out_str = ::FFI::MemoryPointer.new(:
|
41
|
+
out_str = ::FFI::MemoryPointer.new(:char, buffer_len)
|
42
42
|
check_result FFI.LGBM_BoosterDumpModel(handle_pointer, start_iteration, num_iteration, buffer_len, out_len, out_str)
|
43
|
-
actual_len = out_len
|
43
|
+
actual_len = read_int64(out_len)
|
44
44
|
if actual_len > buffer_len
|
45
|
-
out_str = ::FFI::MemoryPointer.new(:
|
45
|
+
out_str = ::FFI::MemoryPointer.new(:char, actual_len)
|
46
46
|
check_result FFI.LGBM_BoosterDumpModel(handle_pointer, start_iteration, num_iteration, actual_len, out_len, out_str)
|
47
47
|
end
|
48
48
|
out_str.read_string
|
@@ -85,11 +85,11 @@ module LightGBM
|
|
85
85
|
num_iteration ||= best_iteration
|
86
86
|
buffer_len = 1 << 20
|
87
87
|
out_len = ::FFI::MemoryPointer.new(:int64)
|
88
|
-
out_str = ::FFI::MemoryPointer.new(:
|
88
|
+
out_str = ::FFI::MemoryPointer.new(:char, buffer_len)
|
89
89
|
check_result FFI.LGBM_BoosterSaveModelToString(handle_pointer, start_iteration, num_iteration, buffer_len, out_len, out_str)
|
90
|
-
actual_len = out_len
|
90
|
+
actual_len = read_int64(out_len)
|
91
91
|
if actual_len > buffer_len
|
92
|
-
out_str = ::FFI::MemoryPointer.new(:
|
92
|
+
out_str = ::FFI::MemoryPointer.new(:char, actual_len)
|
93
93
|
check_result FFI.LGBM_BoosterSaveModelToString(handle_pointer, start_iteration, num_iteration, actual_len, out_len, out_str)
|
94
94
|
end
|
95
95
|
out_str.read_string
|
@@ -104,19 +104,24 @@ module LightGBM
|
|
104
104
|
|
105
105
|
def num_model_per_iteration
|
106
106
|
out = ::FFI::MemoryPointer.new(:int)
|
107
|
-
check_result FFI
|
107
|
+
check_result FFI.LGBM_BoosterNumModelPerIteration(handle_pointer, out)
|
108
108
|
out.read_int
|
109
109
|
end
|
110
110
|
|
111
111
|
def num_trees
|
112
112
|
out = ::FFI::MemoryPointer.new(:int)
|
113
|
-
check_result FFI
|
113
|
+
check_result FFI.LGBM_BoosterNumberOfTotalModel(handle_pointer, out)
|
114
114
|
out.read_int
|
115
115
|
end
|
116
116
|
|
117
117
|
# TODO support different prediction types
|
118
118
|
def predict(input, num_iteration: nil, **params)
|
119
|
-
|
119
|
+
input =
|
120
|
+
if daru?(input)
|
121
|
+
input.map_rows(&:to_a)
|
122
|
+
else
|
123
|
+
input.to_a
|
124
|
+
end
|
120
125
|
|
121
126
|
singular = !input.first.is_a?(Array)
|
122
127
|
input = [input] if singular
|
@@ -124,13 +129,15 @@ module LightGBM
|
|
124
129
|
num_iteration ||= best_iteration
|
125
130
|
num_class ||= num_class()
|
126
131
|
|
127
|
-
|
128
|
-
|
132
|
+
flat_input = input.flatten
|
133
|
+
handle_missing(flat_input)
|
134
|
+
data = ::FFI::MemoryPointer.new(:double, input.count * input.first.count)
|
135
|
+
data.write_array_of_double(flat_input)
|
129
136
|
|
130
137
|
out_len = ::FFI::MemoryPointer.new(:int64)
|
131
138
|
out_result = ::FFI::MemoryPointer.new(:double, num_class * input.count)
|
132
|
-
check_result FFI.LGBM_BoosterPredictForMat(handle_pointer, data,
|
133
|
-
out = out_result.read_array_of_double(out_len
|
139
|
+
check_result FFI.LGBM_BoosterPredictForMat(handle_pointer, data, 1, input.count, input.first.count, 1, 0, num_iteration, params_str(params), out_len, out_result)
|
140
|
+
out = out_result.read_array_of_double(read_int64(out_len))
|
134
141
|
out = out.each_slice(num_class).to_a if num_class > 1
|
135
142
|
|
136
143
|
singular ? out.first : out
|
@@ -161,7 +168,7 @@ module LightGBM
|
|
161
168
|
|
162
169
|
def eval_counts
|
163
170
|
out = ::FFI::MemoryPointer.new(:int)
|
164
|
-
check_result FFI
|
171
|
+
check_result FFI.LGBM_BoosterGetEvalCounts(handle_pointer, out)
|
165
172
|
out.read_int
|
166
173
|
end
|
167
174
|
|
@@ -169,8 +176,8 @@ module LightGBM
|
|
169
176
|
eval_counts ||= eval_counts()
|
170
177
|
out_len = ::FFI::MemoryPointer.new(:int)
|
171
178
|
out_strs = ::FFI::MemoryPointer.new(:pointer, eval_counts)
|
172
|
-
str_ptrs = eval_counts.times.map { ::FFI::MemoryPointer.new(:
|
173
|
-
out_strs.
|
179
|
+
str_ptrs = eval_counts.times.map { ::FFI::MemoryPointer.new(:char, 255) }
|
180
|
+
out_strs.write_array_of_pointer(str_ptrs)
|
174
181
|
check_result FFI.LGBM_BoosterGetEvalNames(handle_pointer, out_len, out_strs)
|
175
182
|
str_ptrs.map(&:read_string)
|
176
183
|
end
|
@@ -191,10 +198,15 @@ module LightGBM
|
|
191
198
|
|
192
199
|
def num_class
|
193
200
|
out = ::FFI::MemoryPointer.new(:int)
|
194
|
-
check_result FFI
|
201
|
+
check_result FFI.LGBM_BoosterGetNumClasses(handle_pointer, out)
|
195
202
|
out.read_int
|
196
203
|
end
|
197
204
|
|
205
|
+
# read_int64 not available on JRuby
|
206
|
+
def read_int64(ptr)
|
207
|
+
ptr.read_array_of_int64(1).first
|
208
|
+
end
|
209
|
+
|
198
210
|
include Utils
|
199
211
|
end
|
200
212
|
end
|
data/lib/lightgbm/classifier.rb
CHANGED
@@ -15,8 +15,8 @@ module LightGBM
|
|
15
15
|
params[:objective] ||= "binary"
|
16
16
|
end
|
17
17
|
|
18
|
-
train_set = Dataset.new(x, label: y, categorical_feature: categorical_feature)
|
19
|
-
valid_sets = Array(eval_set).map { |v| Dataset.new(v[0], label: v[1], reference: train_set) }
|
18
|
+
train_set = Dataset.new(x, label: y, categorical_feature: categorical_feature, params: params)
|
19
|
+
valid_sets = Array(eval_set).map { |v| Dataset.new(v[0], label: v[1], reference: train_set, params: params) }
|
20
20
|
|
21
21
|
@booster = LightGBM.train(params, train_set,
|
22
22
|
num_boost_round: @n_estimators,
|
data/lib/lightgbm/dataset.rb
CHANGED
@@ -2,49 +2,18 @@ module LightGBM
|
|
2
2
|
class Dataset
|
3
3
|
attr_reader :data, :params
|
4
4
|
|
5
|
-
def initialize(data, label: nil, weight: nil, group: nil, params: nil, reference: nil, used_indices: nil, categorical_feature: "auto")
|
5
|
+
def initialize(data, label: nil, weight: nil, group: nil, params: nil, reference: nil, used_indices: nil, categorical_feature: "auto", feature_names: nil)
|
6
6
|
@data = data
|
7
|
+
@label = label
|
8
|
+
@weight = weight
|
9
|
+
@group = group
|
10
|
+
@params = params
|
11
|
+
@reference = reference
|
12
|
+
@used_indices = used_indices
|
13
|
+
@categorical_feature = categorical_feature
|
14
|
+
@feature_names = feature_names
|
7
15
|
|
8
|
-
|
9
|
-
params ||= {}
|
10
|
-
params["categorical_feature"] ||= categorical_feature.join(",") if categorical_feature != "auto"
|
11
|
-
set_verbosity(params)
|
12
|
-
|
13
|
-
@handle = ::FFI::MemoryPointer.new(:pointer)
|
14
|
-
parameters = params_str(params)
|
15
|
-
reference = reference.handle_pointer if reference
|
16
|
-
if used_indices
|
17
|
-
used_row_indices = ::FFI::MemoryPointer.new(:int32, used_indices.count)
|
18
|
-
used_row_indices.put_array_of_int32(0, used_indices)
|
19
|
-
check_result FFI.LGBM_DatasetGetSubset(reference, used_row_indices, used_indices.count, parameters, @handle)
|
20
|
-
elsif data.is_a?(String)
|
21
|
-
check_result FFI.LGBM_DatasetCreateFromFile(data, parameters, reference, @handle)
|
22
|
-
else
|
23
|
-
if matrix?(data)
|
24
|
-
nrow = data.row_count
|
25
|
-
ncol = data.column_count
|
26
|
-
flat_data = data.to_a.flatten
|
27
|
-
elsif daru?(data)
|
28
|
-
nrow, ncol = data.shape
|
29
|
-
flat_data = data.each_vector.map(&:to_a).flatten
|
30
|
-
elsif narray?(data)
|
31
|
-
nrow, ncol = data.shape
|
32
|
-
flat_data = data.flatten.to_a
|
33
|
-
else
|
34
|
-
nrow = data.count
|
35
|
-
ncol = data.first.count
|
36
|
-
flat_data = data.flatten
|
37
|
-
end
|
38
|
-
|
39
|
-
c_data = ::FFI::MemoryPointer.new(:float, nrow * ncol)
|
40
|
-
c_data.put_array_of_float(0, flat_data)
|
41
|
-
check_result FFI.LGBM_DatasetCreateFromMat(c_data, 0, nrow, ncol, 1, parameters, reference, @handle)
|
42
|
-
end
|
43
|
-
ObjectSpace.define_finalizer(self, self.class.finalize(handle_pointer)) unless used_indices
|
44
|
-
|
45
|
-
self.label = label if label
|
46
|
-
self.weight = weight if weight
|
47
|
-
self.group = group if group
|
16
|
+
construct
|
48
17
|
end
|
49
18
|
|
50
19
|
def label
|
@@ -55,18 +24,47 @@ module LightGBM
|
|
55
24
|
field("weight")
|
56
25
|
end
|
57
26
|
|
27
|
+
def feature_names
|
28
|
+
# must preallocate space
|
29
|
+
num_feature_names = ::FFI::MemoryPointer.new(:int)
|
30
|
+
out_strs = ::FFI::MemoryPointer.new(:pointer, 1000)
|
31
|
+
str_ptrs = 1000.times.map { ::FFI::MemoryPointer.new(:char, 255) }
|
32
|
+
out_strs.write_array_of_pointer(str_ptrs)
|
33
|
+
check_result FFI.LGBM_DatasetGetFeatureNames(handle_pointer, out_strs, num_feature_names)
|
34
|
+
str_ptrs[0, num_feature_names.read_int].map(&:read_string)
|
35
|
+
end
|
36
|
+
|
58
37
|
def label=(label)
|
38
|
+
@label = label
|
59
39
|
set_field("label", label)
|
60
40
|
end
|
61
41
|
|
62
42
|
def weight=(weight)
|
43
|
+
@weight = weight
|
63
44
|
set_field("weight", weight)
|
64
45
|
end
|
65
46
|
|
66
47
|
def group=(group)
|
48
|
+
@group = group
|
67
49
|
set_field("group", group, type: :int32)
|
68
50
|
end
|
69
51
|
|
52
|
+
def feature_names=(feature_names)
|
53
|
+
@feature_names = feature_names
|
54
|
+
c_feature_names = ::FFI::MemoryPointer.new(:pointer, feature_names.size)
|
55
|
+
c_feature_names.write_array_of_pointer(feature_names.map { |v| ::FFI::MemoryPointer.from_string(v) })
|
56
|
+
check_result FFI.LGBM_DatasetSetFeatureNames(handle_pointer, c_feature_names, feature_names.size)
|
57
|
+
end
|
58
|
+
|
59
|
+
# TODO only update reference if not in chain
|
60
|
+
def reference=(reference)
|
61
|
+
if reference != @reference
|
62
|
+
@reference = reference
|
63
|
+
free_handle
|
64
|
+
construct
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
70
68
|
def num_data
|
71
69
|
out = ::FFI::MemoryPointer.new(:int)
|
72
70
|
check_result FFI.LGBM_DatasetGetNumData(handle_pointer, out)
|
@@ -83,11 +81,6 @@ module LightGBM
|
|
83
81
|
check_result FFI.LGBM_DatasetSaveBinary(handle_pointer, filename)
|
84
82
|
end
|
85
83
|
|
86
|
-
# not released yet
|
87
|
-
# def dump_text(filename)
|
88
|
-
# check_result FFI.LGBM_DatasetDumpText(handle_pointer, filename)
|
89
|
-
# end
|
90
|
-
|
91
84
|
def subset(used_indices, params: nil)
|
92
85
|
# categorical_feature passed via params
|
93
86
|
params ||= self.params
|
@@ -109,6 +102,70 @@ module LightGBM
|
|
109
102
|
|
110
103
|
private
|
111
104
|
|
105
|
+
def construct
|
106
|
+
data = @data
|
107
|
+
used_indices = @used_indices
|
108
|
+
|
109
|
+
# TODO stringify params
|
110
|
+
params = @params || {}
|
111
|
+
if @categorical_feature != "auto" && @categorical_feature.any?
|
112
|
+
params["categorical_feature"] ||= @categorical_feature.join(",")
|
113
|
+
end
|
114
|
+
set_verbosity(params)
|
115
|
+
|
116
|
+
@handle = ::FFI::MemoryPointer.new(:pointer)
|
117
|
+
parameters = params_str(params)
|
118
|
+
reference = @reference.handle_pointer if @reference
|
119
|
+
if used_indices
|
120
|
+
used_row_indices = ::FFI::MemoryPointer.new(:int32, used_indices.count)
|
121
|
+
used_row_indices.write_array_of_int32(used_indices)
|
122
|
+
check_result FFI.LGBM_DatasetGetSubset(reference, used_row_indices, used_indices.count, parameters, @handle)
|
123
|
+
elsif data.is_a?(String)
|
124
|
+
check_result FFI.LGBM_DatasetCreateFromFile(data, parameters, reference, @handle)
|
125
|
+
else
|
126
|
+
if matrix?(data)
|
127
|
+
nrow = data.row_count
|
128
|
+
ncol = data.column_count
|
129
|
+
flat_data = data.to_a.flatten
|
130
|
+
elsif daru?(data)
|
131
|
+
nrow, ncol = data.shape
|
132
|
+
flat_data = data.map_rows(&:to_a).flatten
|
133
|
+
elsif numo?(data) || rover?(data)
|
134
|
+
data = data.to_numo if rover?(data)
|
135
|
+
nrow, ncol = data.shape
|
136
|
+
else
|
137
|
+
nrow = data.count
|
138
|
+
ncol = data.first.count
|
139
|
+
flat_data = data.flatten
|
140
|
+
end
|
141
|
+
|
142
|
+
c_data = ::FFI::MemoryPointer.new(:double, nrow * ncol)
|
143
|
+
if numo?(data)
|
144
|
+
c_data.write_bytes(data.cast_to(Numo::DFloat).to_string)
|
145
|
+
else
|
146
|
+
handle_missing(flat_data)
|
147
|
+
c_data.write_array_of_double(flat_data)
|
148
|
+
end
|
149
|
+
|
150
|
+
check_result FFI.LGBM_DatasetCreateFromMat(c_data, 1, nrow, ncol, 1, parameters, reference, @handle)
|
151
|
+
end
|
152
|
+
ObjectSpace.define_finalizer(self, self.class.finalize(handle_pointer)) unless used_indices
|
153
|
+
|
154
|
+
self.label = @label if @label
|
155
|
+
self.weight = @weight if @weight
|
156
|
+
self.group = @group if @group
|
157
|
+
self.feature_names = @feature_names if @feature_names
|
158
|
+
end
|
159
|
+
|
160
|
+
def free_handle
|
161
|
+
FFI.LGBM_DatasetFree(handle_pointer)
|
162
|
+
ObjectSpace.undefine_finalizer(self)
|
163
|
+
end
|
164
|
+
|
165
|
+
def dump_text(filename)
|
166
|
+
check_result FFI.LGBM_DatasetDumpText(handle_pointer, filename)
|
167
|
+
end
|
168
|
+
|
112
169
|
def field(field_name)
|
113
170
|
num_data = self.num_data
|
114
171
|
out_len = ::FFI::MemoryPointer.new(:int)
|
@@ -122,27 +179,15 @@ module LightGBM
|
|
122
179
|
data = data.to_a unless data.is_a?(Array)
|
123
180
|
if type == :int32
|
124
181
|
c_data = ::FFI::MemoryPointer.new(:int32, data.count)
|
125
|
-
c_data.
|
182
|
+
c_data.write_array_of_int32(data)
|
126
183
|
check_result FFI.LGBM_DatasetSetField(handle_pointer, field_name, c_data, data.count, 2)
|
127
184
|
else
|
128
185
|
c_data = ::FFI::MemoryPointer.new(:float, data.count)
|
129
|
-
c_data.
|
186
|
+
c_data.write_array_of_float(data)
|
130
187
|
check_result FFI.LGBM_DatasetSetField(handle_pointer, field_name, c_data, data.count, 0)
|
131
188
|
end
|
132
189
|
end
|
133
190
|
|
134
|
-
def matrix?(data)
|
135
|
-
defined?(Matrix) && data.is_a?(Matrix)
|
136
|
-
end
|
137
|
-
|
138
|
-
def daru?(data)
|
139
|
-
defined?(Daru::DataFrame) && data.is_a?(Daru::DataFrame)
|
140
|
-
end
|
141
|
-
|
142
|
-
def narray?(data)
|
143
|
-
defined?(Numo::NArray) && data.is_a?(Numo::NArray)
|
144
|
-
end
|
145
|
-
|
146
191
|
include Utils
|
147
192
|
end
|
148
193
|
end
|
data/lib/lightgbm/ffi.rb
CHANGED
@@ -5,8 +5,11 @@ module LightGBM
|
|
5
5
|
begin
|
6
6
|
ffi_lib LightGBM.ffi_lib
|
7
7
|
rescue LoadError => e
|
8
|
-
|
9
|
-
|
8
|
+
if e.message.include?("Library not loaded: /usr/local/opt/libomp/lib/libomp.dylib") && e.message.include?("Reason: image not found")
|
9
|
+
raise LoadError, "OpenMP not found. Run `brew install libomp`"
|
10
|
+
else
|
11
|
+
raise e
|
12
|
+
end
|
10
13
|
end
|
11
14
|
|
12
15
|
# https://github.com/microsoft/LightGBM/blob/master/include/LightGBM/c_api.h
|
@@ -19,9 +22,11 @@ module LightGBM
|
|
19
22
|
attach_function :LGBM_DatasetCreateFromFile, %i[string string pointer pointer], :int
|
20
23
|
attach_function :LGBM_DatasetCreateFromMat, %i[pointer int int32 int32 int string pointer pointer], :int
|
21
24
|
attach_function :LGBM_DatasetGetSubset, %i[pointer pointer int32 string pointer], :int
|
25
|
+
attach_function :LGBM_DatasetSetFeatureNames, %i[pointer pointer int], :int
|
26
|
+
attach_function :LGBM_DatasetGetFeatureNames, %i[pointer pointer pointer], :int
|
22
27
|
attach_function :LGBM_DatasetFree, %i[pointer], :int
|
23
28
|
attach_function :LGBM_DatasetSaveBinary, %i[pointer string], :int
|
24
|
-
|
29
|
+
attach_function :LGBM_DatasetDumpText, %i[pointer string], :int
|
25
30
|
attach_function :LGBM_DatasetSetField, %i[pointer string pointer int int], :int
|
26
31
|
attach_function :LGBM_DatasetGetField, %i[pointer string pointer pointer pointer], :int
|
27
32
|
attach_function :LGBM_DatasetGetNumData, %i[pointer pointer], :int
|
data/lib/lightgbm/regressor.rb
CHANGED
@@ -5,8 +5,8 @@ module LightGBM
|
|
5
5
|
end
|
6
6
|
|
7
7
|
def fit(x, y, categorical_feature: "auto", eval_set: nil, eval_names: [], early_stopping_rounds: nil, verbose: true)
|
8
|
-
train_set = Dataset.new(x, label: y, categorical_feature: categorical_feature)
|
9
|
-
valid_sets = Array(eval_set).map { |v| Dataset.new(v[0], label: v[1], reference: train_set) }
|
8
|
+
train_set = Dataset.new(x, label: y, categorical_feature: categorical_feature, params: @params)
|
9
|
+
valid_sets = Array(eval_set).map { |v| Dataset.new(v[0], label: v[1], reference: train_set, params: @params) }
|
10
10
|
|
11
11
|
@booster = LightGBM.train(@params, train_set,
|
12
12
|
num_boost_round: @n_estimators,
|
data/lib/lightgbm/utils.rb
CHANGED
@@ -23,5 +23,26 @@ module LightGBM
|
|
23
23
|
params["verbosity"] = -1
|
24
24
|
end
|
25
25
|
end
|
26
|
+
|
27
|
+
# for categorical, NaN and negative value are the same
|
28
|
+
def handle_missing(data)
|
29
|
+
data.map! { |v| v.nil? ? Float::NAN : v }
|
30
|
+
end
|
31
|
+
|
32
|
+
def matrix?(data)
|
33
|
+
defined?(Matrix) && data.is_a?(Matrix)
|
34
|
+
end
|
35
|
+
|
36
|
+
def daru?(data)
|
37
|
+
defined?(Daru::DataFrame) && data.is_a?(Daru::DataFrame)
|
38
|
+
end
|
39
|
+
|
40
|
+
def numo?(data)
|
41
|
+
defined?(Numo::NArray) && data.is_a?(Numo::NArray)
|
42
|
+
end
|
43
|
+
|
44
|
+
def rover?(data)
|
45
|
+
defined?(Rover::DataFrame) && data.is_a?(Rover::DataFrame)
|
46
|
+
end
|
26
47
|
end
|
27
48
|
end
|
data/lib/lightgbm/version.rb
CHANGED
data/vendor/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) Microsoft Corporation
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
Binary file
|
Binary file
|
Binary file
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: lightgbm
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2020-06-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ffi
|
@@ -80,20 +80,6 @@ dependencies:
|
|
80
80
|
- - ">="
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: '0'
|
83
|
-
- !ruby/object:Gem::Dependency
|
84
|
-
name: numo-narray
|
85
|
-
requirement: !ruby/object:Gem::Requirement
|
86
|
-
requirements:
|
87
|
-
- - ">="
|
88
|
-
- !ruby/object:Gem::Version
|
89
|
-
version: '0'
|
90
|
-
type: :development
|
91
|
-
prerelease: false
|
92
|
-
version_requirements: !ruby/object:Gem::Requirement
|
93
|
-
requirements:
|
94
|
-
- - ">="
|
95
|
-
- !ruby/object:Gem::Version
|
96
|
-
version: '0'
|
97
83
|
description:
|
98
84
|
email: andrew@chartkick.com
|
99
85
|
executables: []
|
@@ -101,6 +87,7 @@ extensions: []
|
|
101
87
|
extra_rdoc_files: []
|
102
88
|
files:
|
103
89
|
- CHANGELOG.md
|
90
|
+
- LICENSE.txt
|
104
91
|
- README.md
|
105
92
|
- lib/lightgbm.rb
|
106
93
|
- lib/lightgbm/booster.rb
|
@@ -112,6 +99,10 @@ files:
|
|
112
99
|
- lib/lightgbm/regressor.rb
|
113
100
|
- lib/lightgbm/utils.rb
|
114
101
|
- lib/lightgbm/version.rb
|
102
|
+
- vendor/LICENSE
|
103
|
+
- vendor/lib_lightgbm.dll
|
104
|
+
- vendor/lib_lightgbm.dylib
|
105
|
+
- vendor/lib_lightgbm.so
|
115
106
|
homepage: https://github.com/ankane/lightgbm
|
116
107
|
licenses:
|
117
108
|
- MIT
|
@@ -131,8 +122,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
131
122
|
- !ruby/object:Gem::Version
|
132
123
|
version: '0'
|
133
124
|
requirements: []
|
134
|
-
rubygems_version: 3.
|
125
|
+
rubygems_version: 3.1.2
|
135
126
|
signing_key:
|
136
127
|
specification_version: 4
|
137
|
-
summary:
|
128
|
+
summary: High performance gradient boosting for Ruby
|
138
129
|
test_files: []
|