lightgbm 0.1.4 → 0.1.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/LICENSE.txt +22 -0
- data/README.md +16 -5
- data/lib/lightgbm.rb +4 -1
- data/lib/lightgbm/booster.rb +9 -2
- data/lib/lightgbm/classifier.rb +2 -2
- data/lib/lightgbm/dataset.rb +23 -15
- data/lib/lightgbm/ffi.rb +5 -0
- data/lib/lightgbm/regressor.rb +2 -2
- data/lib/lightgbm/utils.rb +17 -0
- data/lib/lightgbm/version.rb +1 -1
- data/vendor/LICENSE +21 -0
- data/vendor/lib_lightgbm.dll +0 -0
- data/vendor/lib_lightgbm.dylib +0 -0
- data/vendor/lib_lightgbm.so +0 -0
- metadata +7 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: dad09a50916e229119d99de66aa676868224af44d8958916284718edb7fb3a3f
|
4
|
+
data.tar.gz: 2f7d299a7037455a4ebd1ccaf41bc102ec66808b9e806a59194fd14699204364
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: aa415946a82be07f1b9b8823b60fc75030f9e5547859078459583fa64d9212aad11db39d30e8e327b1fe23f48fc2ce9c6ddbd0445b52f73b44374a07c4724f5c
|
7
|
+
data.tar.gz: 99357888fde412672ecd804396cbd3a524b7dee6318d46d92a07064b858cd5f62f144f6b680a25ae0f1ae567df7d4670fcf38ba706fa31cbdfe58fd79eabc62d
|
data/CHANGELOG.md
CHANGED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2019 Andrew Kane
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
CHANGED
@@ -8,19 +8,23 @@
|
|
8
8
|
|
9
9
|
## Installation
|
10
10
|
|
11
|
-
First, [install LightGBM](https://lightgbm.readthedocs.io/en/latest/Installation-Guide.html). On Mac, copy `lib_lightgbm.so` to `/usr/local/lib`.
|
12
|
-
|
13
11
|
Add this line to your application’s Gemfile:
|
14
12
|
|
15
13
|
```ruby
|
16
14
|
gem 'lightgbm'
|
17
15
|
```
|
18
16
|
|
17
|
+
LightGBM is packaged with the gem, so no need to install it separately. On Mac, also run:
|
18
|
+
|
19
|
+
```sh
|
20
|
+
brew install libomp
|
21
|
+
```
|
22
|
+
|
19
23
|
## Getting Started
|
20
24
|
|
21
25
|
This library follows the [Python API](https://lightgbm.readthedocs.io/en/latest/Python-API.html). A few differences are:
|
22
26
|
|
23
|
-
- The `get_`
|
27
|
+
- The `get_` and `set_` prefixes are removed from methods
|
24
28
|
- The default verbosity is `-1`
|
25
29
|
- With the `cv` method, `stratified` is set to `false`
|
26
30
|
|
@@ -28,18 +32,25 @@ Some methods and options are also missing at the moment. PRs welcome!
|
|
28
32
|
|
29
33
|
## Training API
|
30
34
|
|
35
|
+
Prep your data
|
36
|
+
|
37
|
+
```ruby
|
38
|
+
x = [[1, 2], [3, 4], [5, 6], [7, 8]]
|
39
|
+
y = [1, 2, 3, 4]
|
40
|
+
```
|
41
|
+
|
31
42
|
Train a model
|
32
43
|
|
33
44
|
```ruby
|
34
45
|
params = {objective: "regression"}
|
35
|
-
train_set = LightGBM::Dataset.new(
|
46
|
+
train_set = LightGBM::Dataset.new(x, label: y)
|
36
47
|
booster = LightGBM.train(params, train_set)
|
37
48
|
```
|
38
49
|
|
39
50
|
Predict
|
40
51
|
|
41
52
|
```ruby
|
42
|
-
booster.predict(
|
53
|
+
booster.predict(x)
|
43
54
|
```
|
44
55
|
|
45
56
|
Save the model to a file
|
data/lib/lightgbm.rb
CHANGED
@@ -20,7 +20,8 @@ module LightGBM
|
|
20
20
|
attr_accessor :ffi_lib
|
21
21
|
end
|
22
22
|
lib_name = "lib_lightgbm.#{::FFI::Platform::LIBSUFFIX}"
|
23
|
-
|
23
|
+
vendor_lib = File.expand_path("../vendor/#{lib_name}", __dir__)
|
24
|
+
self.ffi_lib = [lib_name, "lib_lightgbm.so", vendor_lib]
|
24
25
|
|
25
26
|
# friendlier error message
|
26
27
|
autoload :FFI, "lightgbm/ffi"
|
@@ -39,6 +40,8 @@ module LightGBM
|
|
39
40
|
end
|
40
41
|
end
|
41
42
|
|
43
|
+
raise ArgumentError, "For early stopping, at least one validation set is required" if early_stopping_rounds && !valid_sets.any? { |v| v != train_set }
|
44
|
+
|
42
45
|
booster.best_iteration = 0
|
43
46
|
|
44
47
|
if early_stopping_rounds
|
data/lib/lightgbm/booster.rb
CHANGED
@@ -116,7 +116,12 @@ module LightGBM
|
|
116
116
|
|
117
117
|
# TODO support different prediction types
|
118
118
|
def predict(input, num_iteration: nil, **params)
|
119
|
-
|
119
|
+
input =
|
120
|
+
if daru?(input)
|
121
|
+
input.map_rows(&:to_a)
|
122
|
+
else
|
123
|
+
input.to_a
|
124
|
+
end
|
120
125
|
|
121
126
|
singular = !input.first.is_a?(Array)
|
122
127
|
input = [input] if singular
|
@@ -124,8 +129,10 @@ module LightGBM
|
|
124
129
|
num_iteration ||= best_iteration
|
125
130
|
num_class ||= num_class()
|
126
131
|
|
132
|
+
flat_input = input.flatten
|
133
|
+
handle_missing(flat_input)
|
127
134
|
data = ::FFI::MemoryPointer.new(:float, input.count * input.first.count)
|
128
|
-
data.put_array_of_float(0,
|
135
|
+
data.put_array_of_float(0, flat_input)
|
129
136
|
|
130
137
|
out_len = ::FFI::MemoryPointer.new(:int64)
|
131
138
|
out_result = ::FFI::MemoryPointer.new(:double, num_class * input.count)
|
data/lib/lightgbm/classifier.rb
CHANGED
@@ -15,8 +15,8 @@ module LightGBM
|
|
15
15
|
params[:objective] ||= "binary"
|
16
16
|
end
|
17
17
|
|
18
|
-
train_set = Dataset.new(x, label: y, categorical_feature: categorical_feature)
|
19
|
-
valid_sets = Array(eval_set).map { |v| Dataset.new(v[0], label: v[1], reference: train_set) }
|
18
|
+
train_set = Dataset.new(x, label: y, categorical_feature: categorical_feature, params: params)
|
19
|
+
valid_sets = Array(eval_set).map { |v| Dataset.new(v[0], label: v[1], reference: train_set, params: params) }
|
20
20
|
|
21
21
|
@booster = LightGBM.train(params, train_set,
|
22
22
|
num_boost_round: @n_estimators,
|
data/lib/lightgbm/dataset.rb
CHANGED
@@ -2,12 +2,14 @@ module LightGBM
|
|
2
2
|
class Dataset
|
3
3
|
attr_reader :data, :params
|
4
4
|
|
5
|
-
def initialize(data, label: nil, weight: nil, group: nil, params: nil, reference: nil, used_indices: nil, categorical_feature: "auto")
|
5
|
+
def initialize(data, label: nil, weight: nil, group: nil, params: nil, reference: nil, used_indices: nil, categorical_feature: "auto", feature_names: nil)
|
6
6
|
@data = data
|
7
7
|
|
8
8
|
# TODO stringify params
|
9
9
|
params ||= {}
|
10
|
-
|
10
|
+
if categorical_feature != "auto" && categorical_feature.any?
|
11
|
+
params["categorical_feature"] ||= categorical_feature.join(",")
|
12
|
+
end
|
11
13
|
set_verbosity(params)
|
12
14
|
|
13
15
|
@handle = ::FFI::MemoryPointer.new(:pointer)
|
@@ -26,7 +28,7 @@ module LightGBM
|
|
26
28
|
flat_data = data.to_a.flatten
|
27
29
|
elsif daru?(data)
|
28
30
|
nrow, ncol = data.shape
|
29
|
-
flat_data = data.
|
31
|
+
flat_data = data.map_rows(&:to_a).flatten
|
30
32
|
elsif narray?(data)
|
31
33
|
nrow, ncol = data.shape
|
32
34
|
flat_data = data.flatten.to_a
|
@@ -36,6 +38,7 @@ module LightGBM
|
|
36
38
|
flat_data = data.flatten
|
37
39
|
end
|
38
40
|
|
41
|
+
handle_missing(flat_data)
|
39
42
|
c_data = ::FFI::MemoryPointer.new(:float, nrow * ncol)
|
40
43
|
c_data.put_array_of_float(0, flat_data)
|
41
44
|
check_result FFI.LGBM_DatasetCreateFromMat(c_data, 0, nrow, ncol, 1, parameters, reference, @handle)
|
@@ -45,6 +48,7 @@ module LightGBM
|
|
45
48
|
self.label = label if label
|
46
49
|
self.weight = weight if weight
|
47
50
|
self.group = group if group
|
51
|
+
self.feature_names = feature_names if feature_names
|
48
52
|
end
|
49
53
|
|
50
54
|
def label
|
@@ -59,6 +63,16 @@ module LightGBM
|
|
59
63
|
set_field("label", label)
|
60
64
|
end
|
61
65
|
|
66
|
+
def feature_names
|
67
|
+
# must preallocate space
|
68
|
+
num_feature_names = ::FFI::MemoryPointer.new(:int)
|
69
|
+
out_strs = ::FFI::MemoryPointer.new(:pointer, 1000)
|
70
|
+
str_ptrs = 1000.times.map { ::FFI::MemoryPointer.new(:string, 255) }
|
71
|
+
out_strs.put_array_of_pointer(0, str_ptrs)
|
72
|
+
check_result FFI.LGBM_DatasetGetFeatureNames(handle_pointer, out_strs, num_feature_names)
|
73
|
+
str_ptrs[0, num_feature_names.read_int].map(&:read_string)
|
74
|
+
end
|
75
|
+
|
62
76
|
def weight=(weight)
|
63
77
|
set_field("weight", weight)
|
64
78
|
end
|
@@ -67,6 +81,12 @@ module LightGBM
|
|
67
81
|
set_field("group", group, type: :int32)
|
68
82
|
end
|
69
83
|
|
84
|
+
def feature_names=(feature_names)
|
85
|
+
c_feature_names = ::FFI::MemoryPointer.new(:pointer, feature_names.size)
|
86
|
+
c_feature_names.write_array_of_pointer(feature_names.map { |v| ::FFI::MemoryPointer.from_string(v) })
|
87
|
+
check_result FFI.LGBM_DatasetSetFeatureNames(handle_pointer, c_feature_names, feature_names.size)
|
88
|
+
end
|
89
|
+
|
70
90
|
def num_data
|
71
91
|
out = ::FFI::MemoryPointer.new(:int)
|
72
92
|
check_result FFI.LGBM_DatasetGetNumData(handle_pointer, out)
|
@@ -131,18 +151,6 @@ module LightGBM
|
|
131
151
|
end
|
132
152
|
end
|
133
153
|
|
134
|
-
def matrix?(data)
|
135
|
-
defined?(Matrix) && data.is_a?(Matrix)
|
136
|
-
end
|
137
|
-
|
138
|
-
def daru?(data)
|
139
|
-
defined?(Daru::DataFrame) && data.is_a?(Daru::DataFrame)
|
140
|
-
end
|
141
|
-
|
142
|
-
def narray?(data)
|
143
|
-
defined?(Numo::NArray) && data.is_a?(Numo::NArray)
|
144
|
-
end
|
145
|
-
|
146
154
|
include Utils
|
147
155
|
end
|
148
156
|
end
|
data/lib/lightgbm/ffi.rb
CHANGED
@@ -6,6 +6,9 @@ module LightGBM
|
|
6
6
|
ffi_lib LightGBM.ffi_lib
|
7
7
|
rescue LoadError => e
|
8
8
|
raise e if ENV["LIGHTGBM_DEBUG"]
|
9
|
+
if e.message.include?("libomp")
|
10
|
+
raise LoadError, "Could not find OpenMP"
|
11
|
+
end
|
9
12
|
raise LoadError, "Could not find LightGBM"
|
10
13
|
end
|
11
14
|
|
@@ -19,6 +22,8 @@ module LightGBM
|
|
19
22
|
attach_function :LGBM_DatasetCreateFromFile, %i[string string pointer pointer], :int
|
20
23
|
attach_function :LGBM_DatasetCreateFromMat, %i[pointer int int32 int32 int string pointer pointer], :int
|
21
24
|
attach_function :LGBM_DatasetGetSubset, %i[pointer pointer int32 string pointer], :int
|
25
|
+
attach_function :LGBM_DatasetSetFeatureNames, %i[pointer pointer int], :int
|
26
|
+
attach_function :LGBM_DatasetGetFeatureNames, %i[pointer pointer pointer], :int
|
22
27
|
attach_function :LGBM_DatasetFree, %i[pointer], :int
|
23
28
|
attach_function :LGBM_DatasetSaveBinary, %i[pointer string], :int
|
24
29
|
# attach_function :LGBM_DatasetDumpText, %i[pointer string], :int
|
data/lib/lightgbm/regressor.rb
CHANGED
@@ -5,8 +5,8 @@ module LightGBM
|
|
5
5
|
end
|
6
6
|
|
7
7
|
def fit(x, y, categorical_feature: "auto", eval_set: nil, eval_names: [], early_stopping_rounds: nil, verbose: true)
|
8
|
-
train_set = Dataset.new(x, label: y, categorical_feature: categorical_feature)
|
9
|
-
valid_sets = Array(eval_set).map { |v| Dataset.new(v[0], label: v[1], reference: train_set) }
|
8
|
+
train_set = Dataset.new(x, label: y, categorical_feature: categorical_feature, params: @params)
|
9
|
+
valid_sets = Array(eval_set).map { |v| Dataset.new(v[0], label: v[1], reference: train_set, params: @params) }
|
10
10
|
|
11
11
|
@booster = LightGBM.train(@params, train_set,
|
12
12
|
num_boost_round: @n_estimators,
|
data/lib/lightgbm/utils.rb
CHANGED
@@ -23,5 +23,22 @@ module LightGBM
|
|
23
23
|
params["verbosity"] = -1
|
24
24
|
end
|
25
25
|
end
|
26
|
+
|
27
|
+
# for categorical, NaN and negative value are the same
|
28
|
+
def handle_missing(data)
|
29
|
+
data.map! { |v| v.nil? ? Float::NAN : v }
|
30
|
+
end
|
31
|
+
|
32
|
+
def matrix?(data)
|
33
|
+
defined?(Matrix) && data.is_a?(Matrix)
|
34
|
+
end
|
35
|
+
|
36
|
+
def daru?(data)
|
37
|
+
defined?(Daru::DataFrame) && data.is_a?(Daru::DataFrame)
|
38
|
+
end
|
39
|
+
|
40
|
+
def narray?(data)
|
41
|
+
defined?(Numo::NArray) && data.is_a?(Numo::NArray)
|
42
|
+
end
|
26
43
|
end
|
27
44
|
end
|
data/lib/lightgbm/version.rb
CHANGED
data/vendor/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) Microsoft Corporation
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
Binary file
|
Binary file
|
Binary file
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: lightgbm
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-09-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ffi
|
@@ -101,6 +101,7 @@ extensions: []
|
|
101
101
|
extra_rdoc_files: []
|
102
102
|
files:
|
103
103
|
- CHANGELOG.md
|
104
|
+
- LICENSE.txt
|
104
105
|
- README.md
|
105
106
|
- lib/lightgbm.rb
|
106
107
|
- lib/lightgbm/booster.rb
|
@@ -112,6 +113,10 @@ files:
|
|
112
113
|
- lib/lightgbm/regressor.rb
|
113
114
|
- lib/lightgbm/utils.rb
|
114
115
|
- lib/lightgbm/version.rb
|
116
|
+
- vendor/LICENSE
|
117
|
+
- vendor/lib_lightgbm.dll
|
118
|
+
- vendor/lib_lightgbm.dylib
|
119
|
+
- vendor/lib_lightgbm.so
|
115
120
|
homepage: https://github.com/ankane/lightgbm
|
116
121
|
licenses:
|
117
122
|
- MIT
|