lightgbm 0.1.4 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/LICENSE.txt +22 -0
- data/README.md +16 -5
- data/lib/lightgbm.rb +4 -1
- data/lib/lightgbm/booster.rb +9 -2
- data/lib/lightgbm/classifier.rb +2 -2
- data/lib/lightgbm/dataset.rb +23 -15
- data/lib/lightgbm/ffi.rb +5 -0
- data/lib/lightgbm/regressor.rb +2 -2
- data/lib/lightgbm/utils.rb +17 -0
- data/lib/lightgbm/version.rb +1 -1
- data/vendor/LICENSE +21 -0
- data/vendor/lib_lightgbm.dll +0 -0
- data/vendor/lib_lightgbm.dylib +0 -0
- data/vendor/lib_lightgbm.so +0 -0
- metadata +7 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: dad09a50916e229119d99de66aa676868224af44d8958916284718edb7fb3a3f
|
4
|
+
data.tar.gz: 2f7d299a7037455a4ebd1ccaf41bc102ec66808b9e806a59194fd14699204364
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: aa415946a82be07f1b9b8823b60fc75030f9e5547859078459583fa64d9212aad11db39d30e8e327b1fe23f48fc2ce9c6ddbd0445b52f73b44374a07c4724f5c
|
7
|
+
data.tar.gz: 99357888fde412672ecd804396cbd3a524b7dee6318d46d92a07064b858cd5f62f144f6b680a25ae0f1ae567df7d4670fcf38ba706fa31cbdfe58fd79eabc62d
|
data/CHANGELOG.md
CHANGED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2019 Andrew Kane
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
CHANGED
@@ -8,19 +8,23 @@
|
|
8
8
|
|
9
9
|
## Installation
|
10
10
|
|
11
|
-
First, [install LightGBM](https://lightgbm.readthedocs.io/en/latest/Installation-Guide.html). On Mac, copy `lib_lightgbm.so` to `/usr/local/lib`.
|
12
|
-
|
13
11
|
Add this line to your application’s Gemfile:
|
14
12
|
|
15
13
|
```ruby
|
16
14
|
gem 'lightgbm'
|
17
15
|
```
|
18
16
|
|
17
|
+
LightGBM is packaged with the gem, so no need to install it separately. On Mac, also run:
|
18
|
+
|
19
|
+
```sh
|
20
|
+
brew install libomp
|
21
|
+
```
|
22
|
+
|
19
23
|
## Getting Started
|
20
24
|
|
21
25
|
This library follows the [Python API](https://lightgbm.readthedocs.io/en/latest/Python-API.html). A few differences are:
|
22
26
|
|
23
|
-
- The `get_`
|
27
|
+
- The `get_` and `set_` prefixes are removed from methods
|
24
28
|
- The default verbosity is `-1`
|
25
29
|
- With the `cv` method, `stratified` is set to `false`
|
26
30
|
|
@@ -28,18 +32,25 @@ Some methods and options are also missing at the moment. PRs welcome!
|
|
28
32
|
|
29
33
|
## Training API
|
30
34
|
|
35
|
+
Prep your data
|
36
|
+
|
37
|
+
```ruby
|
38
|
+
x = [[1, 2], [3, 4], [5, 6], [7, 8]]
|
39
|
+
y = [1, 2, 3, 4]
|
40
|
+
```
|
41
|
+
|
31
42
|
Train a model
|
32
43
|
|
33
44
|
```ruby
|
34
45
|
params = {objective: "regression"}
|
35
|
-
train_set = LightGBM::Dataset.new(
|
46
|
+
train_set = LightGBM::Dataset.new(x, label: y)
|
36
47
|
booster = LightGBM.train(params, train_set)
|
37
48
|
```
|
38
49
|
|
39
50
|
Predict
|
40
51
|
|
41
52
|
```ruby
|
42
|
-
booster.predict(
|
53
|
+
booster.predict(x)
|
43
54
|
```
|
44
55
|
|
45
56
|
Save the model to a file
|
data/lib/lightgbm.rb
CHANGED
@@ -20,7 +20,8 @@ module LightGBM
|
|
20
20
|
attr_accessor :ffi_lib
|
21
21
|
end
|
22
22
|
lib_name = "lib_lightgbm.#{::FFI::Platform::LIBSUFFIX}"
|
23
|
-
|
23
|
+
vendor_lib = File.expand_path("../vendor/#{lib_name}", __dir__)
|
24
|
+
self.ffi_lib = [lib_name, "lib_lightgbm.so", vendor_lib]
|
24
25
|
|
25
26
|
# friendlier error message
|
26
27
|
autoload :FFI, "lightgbm/ffi"
|
@@ -39,6 +40,8 @@ module LightGBM
|
|
39
40
|
end
|
40
41
|
end
|
41
42
|
|
43
|
+
raise ArgumentError, "For early stopping, at least one validation set is required" if early_stopping_rounds && !valid_sets.any? { |v| v != train_set }
|
44
|
+
|
42
45
|
booster.best_iteration = 0
|
43
46
|
|
44
47
|
if early_stopping_rounds
|
data/lib/lightgbm/booster.rb
CHANGED
@@ -116,7 +116,12 @@ module LightGBM
|
|
116
116
|
|
117
117
|
# TODO support different prediction types
|
118
118
|
def predict(input, num_iteration: nil, **params)
|
119
|
-
|
119
|
+
input =
|
120
|
+
if daru?(input)
|
121
|
+
input.map_rows(&:to_a)
|
122
|
+
else
|
123
|
+
input.to_a
|
124
|
+
end
|
120
125
|
|
121
126
|
singular = !input.first.is_a?(Array)
|
122
127
|
input = [input] if singular
|
@@ -124,8 +129,10 @@ module LightGBM
|
|
124
129
|
num_iteration ||= best_iteration
|
125
130
|
num_class ||= num_class()
|
126
131
|
|
132
|
+
flat_input = input.flatten
|
133
|
+
handle_missing(flat_input)
|
127
134
|
data = ::FFI::MemoryPointer.new(:float, input.count * input.first.count)
|
128
|
-
data.put_array_of_float(0,
|
135
|
+
data.put_array_of_float(0, flat_input)
|
129
136
|
|
130
137
|
out_len = ::FFI::MemoryPointer.new(:int64)
|
131
138
|
out_result = ::FFI::MemoryPointer.new(:double, num_class * input.count)
|
data/lib/lightgbm/classifier.rb
CHANGED
@@ -15,8 +15,8 @@ module LightGBM
|
|
15
15
|
params[:objective] ||= "binary"
|
16
16
|
end
|
17
17
|
|
18
|
-
train_set = Dataset.new(x, label: y, categorical_feature: categorical_feature)
|
19
|
-
valid_sets = Array(eval_set).map { |v| Dataset.new(v[0], label: v[1], reference: train_set) }
|
18
|
+
train_set = Dataset.new(x, label: y, categorical_feature: categorical_feature, params: params)
|
19
|
+
valid_sets = Array(eval_set).map { |v| Dataset.new(v[0], label: v[1], reference: train_set, params: params) }
|
20
20
|
|
21
21
|
@booster = LightGBM.train(params, train_set,
|
22
22
|
num_boost_round: @n_estimators,
|
data/lib/lightgbm/dataset.rb
CHANGED
@@ -2,12 +2,14 @@ module LightGBM
|
|
2
2
|
class Dataset
|
3
3
|
attr_reader :data, :params
|
4
4
|
|
5
|
-
def initialize(data, label: nil, weight: nil, group: nil, params: nil, reference: nil, used_indices: nil, categorical_feature: "auto")
|
5
|
+
def initialize(data, label: nil, weight: nil, group: nil, params: nil, reference: nil, used_indices: nil, categorical_feature: "auto", feature_names: nil)
|
6
6
|
@data = data
|
7
7
|
|
8
8
|
# TODO stringify params
|
9
9
|
params ||= {}
|
10
|
-
|
10
|
+
if categorical_feature != "auto" && categorical_feature.any?
|
11
|
+
params["categorical_feature"] ||= categorical_feature.join(",")
|
12
|
+
end
|
11
13
|
set_verbosity(params)
|
12
14
|
|
13
15
|
@handle = ::FFI::MemoryPointer.new(:pointer)
|
@@ -26,7 +28,7 @@ module LightGBM
|
|
26
28
|
flat_data = data.to_a.flatten
|
27
29
|
elsif daru?(data)
|
28
30
|
nrow, ncol = data.shape
|
29
|
-
flat_data = data.
|
31
|
+
flat_data = data.map_rows(&:to_a).flatten
|
30
32
|
elsif narray?(data)
|
31
33
|
nrow, ncol = data.shape
|
32
34
|
flat_data = data.flatten.to_a
|
@@ -36,6 +38,7 @@ module LightGBM
|
|
36
38
|
flat_data = data.flatten
|
37
39
|
end
|
38
40
|
|
41
|
+
handle_missing(flat_data)
|
39
42
|
c_data = ::FFI::MemoryPointer.new(:float, nrow * ncol)
|
40
43
|
c_data.put_array_of_float(0, flat_data)
|
41
44
|
check_result FFI.LGBM_DatasetCreateFromMat(c_data, 0, nrow, ncol, 1, parameters, reference, @handle)
|
@@ -45,6 +48,7 @@ module LightGBM
|
|
45
48
|
self.label = label if label
|
46
49
|
self.weight = weight if weight
|
47
50
|
self.group = group if group
|
51
|
+
self.feature_names = feature_names if feature_names
|
48
52
|
end
|
49
53
|
|
50
54
|
def label
|
@@ -59,6 +63,16 @@ module LightGBM
|
|
59
63
|
set_field("label", label)
|
60
64
|
end
|
61
65
|
|
66
|
+
def feature_names
|
67
|
+
# must preallocate space
|
68
|
+
num_feature_names = ::FFI::MemoryPointer.new(:int)
|
69
|
+
out_strs = ::FFI::MemoryPointer.new(:pointer, 1000)
|
70
|
+
str_ptrs = 1000.times.map { ::FFI::MemoryPointer.new(:string, 255) }
|
71
|
+
out_strs.put_array_of_pointer(0, str_ptrs)
|
72
|
+
check_result FFI.LGBM_DatasetGetFeatureNames(handle_pointer, out_strs, num_feature_names)
|
73
|
+
str_ptrs[0, num_feature_names.read_int].map(&:read_string)
|
74
|
+
end
|
75
|
+
|
62
76
|
def weight=(weight)
|
63
77
|
set_field("weight", weight)
|
64
78
|
end
|
@@ -67,6 +81,12 @@ module LightGBM
|
|
67
81
|
set_field("group", group, type: :int32)
|
68
82
|
end
|
69
83
|
|
84
|
+
def feature_names=(feature_names)
|
85
|
+
c_feature_names = ::FFI::MemoryPointer.new(:pointer, feature_names.size)
|
86
|
+
c_feature_names.write_array_of_pointer(feature_names.map { |v| ::FFI::MemoryPointer.from_string(v) })
|
87
|
+
check_result FFI.LGBM_DatasetSetFeatureNames(handle_pointer, c_feature_names, feature_names.size)
|
88
|
+
end
|
89
|
+
|
70
90
|
def num_data
|
71
91
|
out = ::FFI::MemoryPointer.new(:int)
|
72
92
|
check_result FFI.LGBM_DatasetGetNumData(handle_pointer, out)
|
@@ -131,18 +151,6 @@ module LightGBM
|
|
131
151
|
end
|
132
152
|
end
|
133
153
|
|
134
|
-
def matrix?(data)
|
135
|
-
defined?(Matrix) && data.is_a?(Matrix)
|
136
|
-
end
|
137
|
-
|
138
|
-
def daru?(data)
|
139
|
-
defined?(Daru::DataFrame) && data.is_a?(Daru::DataFrame)
|
140
|
-
end
|
141
|
-
|
142
|
-
def narray?(data)
|
143
|
-
defined?(Numo::NArray) && data.is_a?(Numo::NArray)
|
144
|
-
end
|
145
|
-
|
146
154
|
include Utils
|
147
155
|
end
|
148
156
|
end
|
data/lib/lightgbm/ffi.rb
CHANGED
@@ -6,6 +6,9 @@ module LightGBM
|
|
6
6
|
ffi_lib LightGBM.ffi_lib
|
7
7
|
rescue LoadError => e
|
8
8
|
raise e if ENV["LIGHTGBM_DEBUG"]
|
9
|
+
if e.message.include?("libomp")
|
10
|
+
raise LoadError, "Could not find OpenMP"
|
11
|
+
end
|
9
12
|
raise LoadError, "Could not find LightGBM"
|
10
13
|
end
|
11
14
|
|
@@ -19,6 +22,8 @@ module LightGBM
|
|
19
22
|
attach_function :LGBM_DatasetCreateFromFile, %i[string string pointer pointer], :int
|
20
23
|
attach_function :LGBM_DatasetCreateFromMat, %i[pointer int int32 int32 int string pointer pointer], :int
|
21
24
|
attach_function :LGBM_DatasetGetSubset, %i[pointer pointer int32 string pointer], :int
|
25
|
+
attach_function :LGBM_DatasetSetFeatureNames, %i[pointer pointer int], :int
|
26
|
+
attach_function :LGBM_DatasetGetFeatureNames, %i[pointer pointer pointer], :int
|
22
27
|
attach_function :LGBM_DatasetFree, %i[pointer], :int
|
23
28
|
attach_function :LGBM_DatasetSaveBinary, %i[pointer string], :int
|
24
29
|
# attach_function :LGBM_DatasetDumpText, %i[pointer string], :int
|
data/lib/lightgbm/regressor.rb
CHANGED
@@ -5,8 +5,8 @@ module LightGBM
|
|
5
5
|
end
|
6
6
|
|
7
7
|
def fit(x, y, categorical_feature: "auto", eval_set: nil, eval_names: [], early_stopping_rounds: nil, verbose: true)
|
8
|
-
train_set = Dataset.new(x, label: y, categorical_feature: categorical_feature)
|
9
|
-
valid_sets = Array(eval_set).map { |v| Dataset.new(v[0], label: v[1], reference: train_set) }
|
8
|
+
train_set = Dataset.new(x, label: y, categorical_feature: categorical_feature, params: @params)
|
9
|
+
valid_sets = Array(eval_set).map { |v| Dataset.new(v[0], label: v[1], reference: train_set, params: @params) }
|
10
10
|
|
11
11
|
@booster = LightGBM.train(@params, train_set,
|
12
12
|
num_boost_round: @n_estimators,
|
data/lib/lightgbm/utils.rb
CHANGED
@@ -23,5 +23,22 @@ module LightGBM
|
|
23
23
|
params["verbosity"] = -1
|
24
24
|
end
|
25
25
|
end
|
26
|
+
|
27
|
+
# for categorical, NaN and negative value are the same
|
28
|
+
def handle_missing(data)
|
29
|
+
data.map! { |v| v.nil? ? Float::NAN : v }
|
30
|
+
end
|
31
|
+
|
32
|
+
def matrix?(data)
|
33
|
+
defined?(Matrix) && data.is_a?(Matrix)
|
34
|
+
end
|
35
|
+
|
36
|
+
def daru?(data)
|
37
|
+
defined?(Daru::DataFrame) && data.is_a?(Daru::DataFrame)
|
38
|
+
end
|
39
|
+
|
40
|
+
def narray?(data)
|
41
|
+
defined?(Numo::NArray) && data.is_a?(Numo::NArray)
|
42
|
+
end
|
26
43
|
end
|
27
44
|
end
|
data/lib/lightgbm/version.rb
CHANGED
data/vendor/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) Microsoft Corporation
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
Binary file
|
Binary file
|
Binary file
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: lightgbm
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-
|
11
|
+
date: 2019-09-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ffi
|
@@ -101,6 +101,7 @@ extensions: []
|
|
101
101
|
extra_rdoc_files: []
|
102
102
|
files:
|
103
103
|
- CHANGELOG.md
|
104
|
+
- LICENSE.txt
|
104
105
|
- README.md
|
105
106
|
- lib/lightgbm.rb
|
106
107
|
- lib/lightgbm/booster.rb
|
@@ -112,6 +113,10 @@ files:
|
|
112
113
|
- lib/lightgbm/regressor.rb
|
113
114
|
- lib/lightgbm/utils.rb
|
114
115
|
- lib/lightgbm/version.rb
|
116
|
+
- vendor/LICENSE
|
117
|
+
- vendor/lib_lightgbm.dll
|
118
|
+
- vendor/lib_lightgbm.dylib
|
119
|
+
- vendor/lib_lightgbm.so
|
115
120
|
homepage: https://github.com/ankane/lightgbm
|
116
121
|
licenses:
|
117
122
|
- MIT
|