lightgbm 0.1.3 → 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/README.md +8 -2
- data/lib/lightgbm.rb +11 -1
- data/lib/lightgbm/booster.rb +6 -6
- data/lib/lightgbm/classifier.rb +19 -28
- data/lib/lightgbm/dataset.rb +39 -19
- data/lib/lightgbm/ffi.rb +8 -2
- data/lib/lightgbm/model.rb +30 -0
- data/lib/lightgbm/ranker.rb +21 -0
- data/lib/lightgbm/regressor.rb +16 -25
- data/lib/lightgbm/version.rb +1 -1
- metadata +5 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 81f1f695112234bb576afaab35f4bf276d1f9c4a4adf0c74831cd1bb73f6baa0
|
4
|
+
data.tar.gz: 59ef1f3c581f83e108ce2a6f2c847bb7488fc2ea7f39ba217aaeffbf46e99351
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 21297d26e88957dd60d0aa61da19aa53aa632958adaf069b0efe2c8dae35e2e21e74c374da3509e337ca3268613b14dc541aee5012df086af7e8f784adb5063d
|
7
|
+
data.tar.gz: 7dbdc0fccaf256a1a835aea3eaa51fe326a0cd4b8cde168a6b8c27ff00c6412a5b5d82583fb1c14749f001f002a4fa3c3e156b9c4b2174b1e91f9449a4fa9ba1
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -18,7 +18,7 @@ gem 'lightgbm'
|
|
18
18
|
|
19
19
|
## Getting Started
|
20
20
|
|
21
|
-
This library follows the [
|
21
|
+
This library follows the [Python API](https://lightgbm.readthedocs.io/en/latest/Python-API.html). A few differences are:
|
22
22
|
|
23
23
|
- The `get_` prefix is removed from methods
|
24
24
|
- The default verbosity is `-1`
|
@@ -63,7 +63,7 @@ booster.feature_importance
|
|
63
63
|
Early stopping
|
64
64
|
|
65
65
|
```ruby
|
66
|
-
LightGBM.train(params, train_set,
|
66
|
+
LightGBM.train(params, train_set, valid_sets: [train_set, test_set], early_stopping_rounds: 5)
|
67
67
|
```
|
68
68
|
|
69
69
|
CV
|
@@ -116,6 +116,12 @@ Get the importance of features
|
|
116
116
|
model.feature_importances
|
117
117
|
```
|
118
118
|
|
119
|
+
Early stopping
|
120
|
+
|
121
|
+
```ruby
|
122
|
+
model.fit(x, y, eval_set: [[x_test, y_test]], early_stopping_rounds: 5)
|
123
|
+
```
|
124
|
+
|
119
125
|
## Data
|
120
126
|
|
121
127
|
Data can be an array of arrays
|
data/lib/lightgbm.rb
CHANGED
@@ -5,16 +5,26 @@ require "ffi"
|
|
5
5
|
require "lightgbm/utils"
|
6
6
|
require "lightgbm/booster"
|
7
7
|
require "lightgbm/dataset"
|
8
|
-
require "lightgbm/ffi"
|
9
8
|
require "lightgbm/version"
|
10
9
|
|
11
10
|
# scikit-learn API
|
11
|
+
require "lightgbm/model"
|
12
12
|
require "lightgbm/classifier"
|
13
|
+
require "lightgbm/ranker"
|
13
14
|
require "lightgbm/regressor"
|
14
15
|
|
15
16
|
module LightGBM
|
16
17
|
class Error < StandardError; end
|
17
18
|
|
19
|
+
class << self
|
20
|
+
attr_accessor :ffi_lib
|
21
|
+
end
|
22
|
+
lib_name = "lib_lightgbm.#{::FFI::Platform::LIBSUFFIX}"
|
23
|
+
self.ffi_lib = [lib_name, "lib_lightgbm.so"]
|
24
|
+
|
25
|
+
# friendlier error message
|
26
|
+
autoload :FFI, "lightgbm/ffi"
|
27
|
+
|
18
28
|
class << self
|
19
29
|
def train(params, train_set, num_boost_round: 100, valid_sets: [], valid_names: [], early_stopping_rounds: nil, verbose_eval: true)
|
20
30
|
booster = Booster.new(params: params, train_set: train_set)
|
data/lib/lightgbm/booster.rb
CHANGED
@@ -14,8 +14,7 @@ module LightGBM
|
|
14
14
|
set_verbosity(params)
|
15
15
|
check_result FFI.LGBM_BoosterCreate(train_set.handle_pointer, params_str(params), @handle)
|
16
16
|
end
|
17
|
-
|
18
|
-
# ObjectSpace.define_finalizer(self, self.class.finalize(handle_pointer))
|
17
|
+
ObjectSpace.define_finalizer(self, self.class.finalize(handle_pointer))
|
19
18
|
|
20
19
|
self.best_iteration = -1
|
21
20
|
|
@@ -23,10 +22,6 @@ module LightGBM
|
|
23
22
|
@name_valid_sets = []
|
24
23
|
end
|
25
24
|
|
26
|
-
def self.finalize(pointer)
|
27
|
-
-> { FFI.LGBM_BoosterFree(pointer) }
|
28
|
-
end
|
29
|
-
|
30
25
|
def add_valid(data, name)
|
31
26
|
check_result FFI.LGBM_BoosterAddValidData(handle_pointer, data.handle_pointer)
|
32
27
|
@name_valid_sets << name
|
@@ -153,6 +148,11 @@ module LightGBM
|
|
153
148
|
finished.read_int == 1
|
154
149
|
end
|
155
150
|
|
151
|
+
def self.finalize(pointer)
|
152
|
+
# must use proc instead of stabby lambda
|
153
|
+
proc { FFI.LGBM_BoosterFree(pointer) }
|
154
|
+
end
|
155
|
+
|
156
156
|
private
|
157
157
|
|
158
158
|
def handle_pointer
|
data/lib/lightgbm/classifier.rb
CHANGED
@@ -1,15 +1,10 @@
|
|
1
1
|
module LightGBM
|
2
|
-
class Classifier
|
3
|
-
def initialize(num_leaves: 31, learning_rate: 0.1, n_estimators: 100, objective: nil)
|
4
|
-
|
5
|
-
num_leaves: num_leaves,
|
6
|
-
learning_rate: learning_rate
|
7
|
-
}
|
8
|
-
@params[:objective] = objective if objective
|
9
|
-
@n_estimators = n_estimators
|
2
|
+
class Classifier < Model
|
3
|
+
def initialize(num_leaves: 31, learning_rate: 0.1, n_estimators: 100, objective: nil, **options)
|
4
|
+
super
|
10
5
|
end
|
11
6
|
|
12
|
-
def fit(x, y)
|
7
|
+
def fit(x, y, eval_set: nil, eval_names: [], categorical_feature: "auto", early_stopping_rounds: nil, verbose: true)
|
13
8
|
n_classes = y.uniq.size
|
14
9
|
|
15
10
|
params = @params.dup
|
@@ -20,26 +15,34 @@ module LightGBM
|
|
20
15
|
params[:objective] ||= "binary"
|
21
16
|
end
|
22
17
|
|
23
|
-
train_set = Dataset.new(x, label: y)
|
24
|
-
|
18
|
+
train_set = Dataset.new(x, label: y, categorical_feature: categorical_feature)
|
19
|
+
valid_sets = Array(eval_set).map { |v| Dataset.new(v[0], label: v[1], reference: train_set) }
|
20
|
+
|
21
|
+
@booster = LightGBM.train(params, train_set,
|
22
|
+
num_boost_round: @n_estimators,
|
23
|
+
early_stopping_rounds: early_stopping_rounds,
|
24
|
+
verbose_eval: verbose,
|
25
|
+
valid_sets: valid_sets,
|
26
|
+
valid_names: eval_names
|
27
|
+
)
|
25
28
|
nil
|
26
29
|
end
|
27
30
|
|
28
|
-
def predict(data)
|
29
|
-
y_pred = @booster.predict(data)
|
31
|
+
def predict(data, num_iteration: nil)
|
32
|
+
y_pred = @booster.predict(data, num_iteration: num_iteration)
|
30
33
|
|
31
34
|
if y_pred.first.is_a?(Array)
|
32
35
|
# multiple classes
|
33
36
|
y_pred.map do |v|
|
34
|
-
v.map.with_index.max_by { |v2,
|
37
|
+
v.map.with_index.max_by { |v2, _| v2 }.last
|
35
38
|
end
|
36
39
|
else
|
37
40
|
y_pred.map { |v| v > 0.5 ? 1 : 0 }
|
38
41
|
end
|
39
42
|
end
|
40
43
|
|
41
|
-
def predict_proba(data)
|
42
|
-
y_pred = @booster.predict(data)
|
44
|
+
def predict_proba(data, num_iteration: nil)
|
45
|
+
y_pred = @booster.predict(data, num_iteration: num_iteration)
|
43
46
|
|
44
47
|
if y_pred.first.is_a?(Array)
|
45
48
|
# multiple classes
|
@@ -48,17 +51,5 @@ module LightGBM
|
|
48
51
|
y_pred.map { |v| [1 - v, v] }
|
49
52
|
end
|
50
53
|
end
|
51
|
-
|
52
|
-
def save_model(fname)
|
53
|
-
@booster.save_model(fname)
|
54
|
-
end
|
55
|
-
|
56
|
-
def load_model(fname)
|
57
|
-
@booster = Booster.new(params: @params, model_file: fname)
|
58
|
-
end
|
59
|
-
|
60
|
-
def feature_importances
|
61
|
-
@booster.feature_importance
|
62
|
-
end
|
63
54
|
end
|
64
55
|
end
|
data/lib/lightgbm/dataset.rb
CHANGED
@@ -2,7 +2,7 @@ module LightGBM
|
|
2
2
|
class Dataset
|
3
3
|
attr_reader :data, :params
|
4
4
|
|
5
|
-
def initialize(data, label: nil, weight: nil, params: nil, reference: nil, used_indices: nil, categorical_feature: "auto")
|
5
|
+
def initialize(data, label: nil, weight: nil, group: nil, params: nil, reference: nil, used_indices: nil, categorical_feature: "auto")
|
6
6
|
@data = data
|
7
7
|
|
8
8
|
# TODO stringify params
|
@@ -13,12 +13,12 @@ module LightGBM
|
|
13
13
|
@handle = ::FFI::MemoryPointer.new(:pointer)
|
14
14
|
parameters = params_str(params)
|
15
15
|
reference = reference.handle_pointer if reference
|
16
|
-
if
|
17
|
-
check_result FFI.LGBM_DatasetCreateFromFile(data, parameters, reference, @handle)
|
18
|
-
elsif used_indices
|
16
|
+
if used_indices
|
19
17
|
used_row_indices = ::FFI::MemoryPointer.new(:int32, used_indices.count)
|
20
18
|
used_row_indices.put_array_of_int32(0, used_indices)
|
21
19
|
check_result FFI.LGBM_DatasetGetSubset(reference, used_row_indices, used_indices.count, parameters, @handle)
|
20
|
+
elsif data.is_a?(String)
|
21
|
+
check_result FFI.LGBM_DatasetCreateFromFile(data, parameters, reference, @handle)
|
22
22
|
else
|
23
23
|
if matrix?(data)
|
24
24
|
nrow = data.row_count
|
@@ -40,11 +40,11 @@ module LightGBM
|
|
40
40
|
c_data.put_array_of_float(0, flat_data)
|
41
41
|
check_result FFI.LGBM_DatasetCreateFromMat(c_data, 0, nrow, ncol, 1, parameters, reference, @handle)
|
42
42
|
end
|
43
|
-
|
44
|
-
# ObjectSpace.define_finalizer(self, self.class.finalize(handle_pointer))
|
43
|
+
ObjectSpace.define_finalizer(self, self.class.finalize(handle_pointer)) unless used_indices
|
45
44
|
|
46
|
-
|
47
|
-
|
45
|
+
self.label = label if label
|
46
|
+
self.weight = weight if weight
|
47
|
+
self.group = group if group
|
48
48
|
end
|
49
49
|
|
50
50
|
def label
|
@@ -55,6 +55,18 @@ module LightGBM
|
|
55
55
|
field("weight")
|
56
56
|
end
|
57
57
|
|
58
|
+
def label=(label)
|
59
|
+
set_field("label", label)
|
60
|
+
end
|
61
|
+
|
62
|
+
def weight=(weight)
|
63
|
+
set_field("weight", weight)
|
64
|
+
end
|
65
|
+
|
66
|
+
def group=(group)
|
67
|
+
set_field("group", group, type: :int32)
|
68
|
+
end
|
69
|
+
|
58
70
|
def num_data
|
59
71
|
out = ::FFI::MemoryPointer.new(:int)
|
60
72
|
check_result FFI.LGBM_DatasetGetNumData(handle_pointer, out)
|
@@ -71,9 +83,10 @@ module LightGBM
|
|
71
83
|
check_result FFI.LGBM_DatasetSaveBinary(handle_pointer, filename)
|
72
84
|
end
|
73
85
|
|
74
|
-
|
75
|
-
|
76
|
-
|
86
|
+
# not released yet
|
87
|
+
# def dump_text(filename)
|
88
|
+
# check_result FFI.LGBM_DatasetDumpText(handle_pointer, filename)
|
89
|
+
# end
|
77
90
|
|
78
91
|
def subset(used_indices, params: nil)
|
79
92
|
# categorical_feature passed via params
|
@@ -85,14 +98,15 @@ module LightGBM
|
|
85
98
|
)
|
86
99
|
end
|
87
100
|
|
88
|
-
def self.finalize(pointer)
|
89
|
-
-> { FFI.LGBM_DatasetFree(pointer) }
|
90
|
-
end
|
91
|
-
|
92
101
|
def handle_pointer
|
93
102
|
@handle.read_pointer
|
94
103
|
end
|
95
104
|
|
105
|
+
def self.finalize(pointer)
|
106
|
+
# must use proc instead of stabby lambda
|
107
|
+
proc { FFI.LGBM_DatasetFree(pointer) }
|
108
|
+
end
|
109
|
+
|
96
110
|
private
|
97
111
|
|
98
112
|
def field(field_name)
|
@@ -104,11 +118,17 @@ module LightGBM
|
|
104
118
|
out_ptr.read_pointer.read_array_of_float(num_data)
|
105
119
|
end
|
106
120
|
|
107
|
-
def set_field(field_name, data)
|
121
|
+
def set_field(field_name, data, type: :float)
|
108
122
|
data = data.to_a unless data.is_a?(Array)
|
109
|
-
|
110
|
-
|
111
|
-
|
123
|
+
if type == :int32
|
124
|
+
c_data = ::FFI::MemoryPointer.new(:int32, data.count)
|
125
|
+
c_data.put_array_of_int32(0, data)
|
126
|
+
check_result FFI.LGBM_DatasetSetField(handle_pointer, field_name, c_data, data.count, 2)
|
127
|
+
else
|
128
|
+
c_data = ::FFI::MemoryPointer.new(:float, data.count)
|
129
|
+
c_data.put_array_of_float(0, data)
|
130
|
+
check_result FFI.LGBM_DatasetSetField(handle_pointer, field_name, c_data, data.count, 0)
|
131
|
+
end
|
112
132
|
end
|
113
133
|
|
114
134
|
def matrix?(data)
|
data/lib/lightgbm/ffi.rb
CHANGED
@@ -1,7 +1,13 @@
|
|
1
1
|
module LightGBM
|
2
2
|
module FFI
|
3
3
|
extend ::FFI::Library
|
4
|
-
|
4
|
+
|
5
|
+
begin
|
6
|
+
ffi_lib LightGBM.ffi_lib
|
7
|
+
rescue LoadError => e
|
8
|
+
raise e if ENV["LIGHTGBM_DEBUG"]
|
9
|
+
raise LoadError, "Could not find LightGBM"
|
10
|
+
end
|
5
11
|
|
6
12
|
# https://github.com/microsoft/LightGBM/blob/master/include/LightGBM/c_api.h
|
7
13
|
# keep same order
|
@@ -15,7 +21,7 @@ module LightGBM
|
|
15
21
|
attach_function :LGBM_DatasetGetSubset, %i[pointer pointer int32 string pointer], :int
|
16
22
|
attach_function :LGBM_DatasetFree, %i[pointer], :int
|
17
23
|
attach_function :LGBM_DatasetSaveBinary, %i[pointer string], :int
|
18
|
-
attach_function :LGBM_DatasetDumpText, %i[pointer string], :int
|
24
|
+
# attach_function :LGBM_DatasetDumpText, %i[pointer string], :int
|
19
25
|
attach_function :LGBM_DatasetSetField, %i[pointer string pointer int int], :int
|
20
26
|
attach_function :LGBM_DatasetGetField, %i[pointer string pointer pointer pointer], :int
|
21
27
|
attach_function :LGBM_DatasetGetNumData, %i[pointer pointer], :int
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module LightGBM
|
2
|
+
class Model
|
3
|
+
attr_reader :booster
|
4
|
+
|
5
|
+
def initialize(num_leaves: 31, learning_rate: 0.1, n_estimators: 100, objective: nil, **options)
|
6
|
+
@params = {
|
7
|
+
num_leaves: num_leaves,
|
8
|
+
learning_rate: learning_rate
|
9
|
+
}.merge(options)
|
10
|
+
@params[:objective] = objective if objective
|
11
|
+
@n_estimators = n_estimators
|
12
|
+
end
|
13
|
+
|
14
|
+
def save_model(fname)
|
15
|
+
@booster.save_model(fname)
|
16
|
+
end
|
17
|
+
|
18
|
+
def load_model(fname)
|
19
|
+
@booster = Booster.new(params: @params, model_file: fname)
|
20
|
+
end
|
21
|
+
|
22
|
+
def best_iteration
|
23
|
+
@booster.best_iteration
|
24
|
+
end
|
25
|
+
|
26
|
+
def feature_importances
|
27
|
+
@booster.feature_importance
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module LightGBM
|
2
|
+
class Ranker < Model
|
3
|
+
def initialize(num_leaves: 31, learning_rate: 0.1, n_estimators: 100, objective: "lambdarank", **options)
|
4
|
+
super
|
5
|
+
end
|
6
|
+
|
7
|
+
def fit(x, y, group:, categorical_feature: "auto", early_stopping_rounds: nil, verbose: true)
|
8
|
+
train_set = Dataset.new(x, label: y, group: group, categorical_feature: categorical_feature)
|
9
|
+
@booster = LightGBM.train(@params, train_set,
|
10
|
+
num_boost_round: @n_estimators,
|
11
|
+
early_stopping_rounds: early_stopping_rounds,
|
12
|
+
verbose_eval: verbose
|
13
|
+
)
|
14
|
+
nil
|
15
|
+
end
|
16
|
+
|
17
|
+
def predict(data, num_iteration: nil)
|
18
|
+
@booster.predict(data, num_iteration: num_iteration)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
data/lib/lightgbm/regressor.rb
CHANGED
@@ -1,34 +1,25 @@
|
|
1
1
|
module LightGBM
|
2
|
-
class Regressor
|
3
|
-
def initialize(num_leaves: 31, learning_rate: 0.1, n_estimators: 100, objective:
|
4
|
-
|
5
|
-
num_leaves: num_leaves,
|
6
|
-
learning_rate: learning_rate
|
7
|
-
}
|
8
|
-
@params[:objective] = objective if objective
|
9
|
-
@n_estimators = n_estimators
|
2
|
+
class Regressor < Model
|
3
|
+
def initialize(num_leaves: 31, learning_rate: 0.1, n_estimators: 100, objective: "regression", **options)
|
4
|
+
super
|
10
5
|
end
|
11
6
|
|
12
|
-
def fit(x, y)
|
13
|
-
train_set = Dataset.new(x, label: y)
|
14
|
-
|
15
|
-
nil
|
16
|
-
end
|
17
|
-
|
18
|
-
def predict(data)
|
19
|
-
@booster.predict(data)
|
20
|
-
end
|
7
|
+
def fit(x, y, categorical_feature: "auto", eval_set: nil, eval_names: [], early_stopping_rounds: nil, verbose: true)
|
8
|
+
train_set = Dataset.new(x, label: y, categorical_feature: categorical_feature)
|
9
|
+
valid_sets = Array(eval_set).map { |v| Dataset.new(v[0], label: v[1], reference: train_set) }
|
21
10
|
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
11
|
+
@booster = LightGBM.train(@params, train_set,
|
12
|
+
num_boost_round: @n_estimators,
|
13
|
+
early_stopping_rounds: early_stopping_rounds,
|
14
|
+
verbose_eval: verbose,
|
15
|
+
valid_sets: valid_sets,
|
16
|
+
valid_names: eval_names
|
17
|
+
)
|
18
|
+
nil
|
28
19
|
end
|
29
20
|
|
30
|
-
def
|
31
|
-
@booster.
|
21
|
+
def predict(data, num_iteration: nil)
|
22
|
+
@booster.predict(data, num_iteration: num_iteration)
|
32
23
|
end
|
33
24
|
end
|
34
25
|
end
|
data/lib/lightgbm/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: lightgbm
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-08-
|
11
|
+
date: 2019-08-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ffi
|
@@ -107,6 +107,8 @@ files:
|
|
107
107
|
- lib/lightgbm/classifier.rb
|
108
108
|
- lib/lightgbm/dataset.rb
|
109
109
|
- lib/lightgbm/ffi.rb
|
110
|
+
- lib/lightgbm/model.rb
|
111
|
+
- lib/lightgbm/ranker.rb
|
110
112
|
- lib/lightgbm/regressor.rb
|
111
113
|
- lib/lightgbm/utils.rb
|
112
114
|
- lib/lightgbm/version.rb
|
@@ -129,7 +131,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
129
131
|
- !ruby/object:Gem::Version
|
130
132
|
version: '0'
|
131
133
|
requirements: []
|
132
|
-
rubygems_version: 3.0.
|
134
|
+
rubygems_version: 3.0.3
|
133
135
|
signing_key:
|
134
136
|
specification_version: 4
|
135
137
|
summary: LightGBM - the high performance machine learning library - for Ruby
|