lightgbm 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/README.md +8 -2
- data/lib/lightgbm.rb +11 -1
- data/lib/lightgbm/booster.rb +6 -6
- data/lib/lightgbm/classifier.rb +19 -28
- data/lib/lightgbm/dataset.rb +39 -19
- data/lib/lightgbm/ffi.rb +8 -2
- data/lib/lightgbm/model.rb +30 -0
- data/lib/lightgbm/ranker.rb +21 -0
- data/lib/lightgbm/regressor.rb +16 -25
- data/lib/lightgbm/version.rb +1 -1
- metadata +5 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 81f1f695112234bb576afaab35f4bf276d1f9c4a4adf0c74831cd1bb73f6baa0
|
4
|
+
data.tar.gz: 59ef1f3c581f83e108ce2a6f2c847bb7488fc2ea7f39ba217aaeffbf46e99351
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 21297d26e88957dd60d0aa61da19aa53aa632958adaf069b0efe2c8dae35e2e21e74c374da3509e337ca3268613b14dc541aee5012df086af7e8f784adb5063d
|
7
|
+
data.tar.gz: 7dbdc0fccaf256a1a835aea3eaa51fe326a0cd4b8cde168a6b8c27ff00c6412a5b5d82583fb1c14749f001f002a4fa3c3e156b9c4b2174b1e91f9449a4fa9ba1
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -18,7 +18,7 @@ gem 'lightgbm'
|
|
18
18
|
|
19
19
|
## Getting Started
|
20
20
|
|
21
|
-
This library follows the [
|
21
|
+
This library follows the [Python API](https://lightgbm.readthedocs.io/en/latest/Python-API.html). A few differences are:
|
22
22
|
|
23
23
|
- The `get_` prefix is removed from methods
|
24
24
|
- The default verbosity is `-1`
|
@@ -63,7 +63,7 @@ booster.feature_importance
|
|
63
63
|
Early stopping
|
64
64
|
|
65
65
|
```ruby
|
66
|
-
LightGBM.train(params, train_set,
|
66
|
+
LightGBM.train(params, train_set, valid_sets: [train_set, test_set], early_stopping_rounds: 5)
|
67
67
|
```
|
68
68
|
|
69
69
|
CV
|
@@ -116,6 +116,12 @@ Get the importance of features
|
|
116
116
|
model.feature_importances
|
117
117
|
```
|
118
118
|
|
119
|
+
Early stopping
|
120
|
+
|
121
|
+
```ruby
|
122
|
+
model.fit(x, y, eval_set: [[x_test, y_test]], early_stopping_rounds: 5)
|
123
|
+
```
|
124
|
+
|
119
125
|
## Data
|
120
126
|
|
121
127
|
Data can be an array of arrays
|
data/lib/lightgbm.rb
CHANGED
@@ -5,16 +5,26 @@ require "ffi"
|
|
5
5
|
require "lightgbm/utils"
|
6
6
|
require "lightgbm/booster"
|
7
7
|
require "lightgbm/dataset"
|
8
|
-
require "lightgbm/ffi"
|
9
8
|
require "lightgbm/version"
|
10
9
|
|
11
10
|
# scikit-learn API
|
11
|
+
require "lightgbm/model"
|
12
12
|
require "lightgbm/classifier"
|
13
|
+
require "lightgbm/ranker"
|
13
14
|
require "lightgbm/regressor"
|
14
15
|
|
15
16
|
module LightGBM
|
16
17
|
class Error < StandardError; end
|
17
18
|
|
19
|
+
class << self
|
20
|
+
attr_accessor :ffi_lib
|
21
|
+
end
|
22
|
+
lib_name = "lib_lightgbm.#{::FFI::Platform::LIBSUFFIX}"
|
23
|
+
self.ffi_lib = [lib_name, "lib_lightgbm.so"]
|
24
|
+
|
25
|
+
# friendlier error message
|
26
|
+
autoload :FFI, "lightgbm/ffi"
|
27
|
+
|
18
28
|
class << self
|
19
29
|
def train(params, train_set, num_boost_round: 100, valid_sets: [], valid_names: [], early_stopping_rounds: nil, verbose_eval: true)
|
20
30
|
booster = Booster.new(params: params, train_set: train_set)
|
data/lib/lightgbm/booster.rb
CHANGED
@@ -14,8 +14,7 @@ module LightGBM
|
|
14
14
|
set_verbosity(params)
|
15
15
|
check_result FFI.LGBM_BoosterCreate(train_set.handle_pointer, params_str(params), @handle)
|
16
16
|
end
|
17
|
-
|
18
|
-
# ObjectSpace.define_finalizer(self, self.class.finalize(handle_pointer))
|
17
|
+
ObjectSpace.define_finalizer(self, self.class.finalize(handle_pointer))
|
19
18
|
|
20
19
|
self.best_iteration = -1
|
21
20
|
|
@@ -23,10 +22,6 @@ module LightGBM
|
|
23
22
|
@name_valid_sets = []
|
24
23
|
end
|
25
24
|
|
26
|
-
def self.finalize(pointer)
|
27
|
-
-> { FFI.LGBM_BoosterFree(pointer) }
|
28
|
-
end
|
29
|
-
|
30
25
|
def add_valid(data, name)
|
31
26
|
check_result FFI.LGBM_BoosterAddValidData(handle_pointer, data.handle_pointer)
|
32
27
|
@name_valid_sets << name
|
@@ -153,6 +148,11 @@ module LightGBM
|
|
153
148
|
finished.read_int == 1
|
154
149
|
end
|
155
150
|
|
151
|
+
def self.finalize(pointer)
|
152
|
+
# must use proc instead of stabby lambda
|
153
|
+
proc { FFI.LGBM_BoosterFree(pointer) }
|
154
|
+
end
|
155
|
+
|
156
156
|
private
|
157
157
|
|
158
158
|
def handle_pointer
|
data/lib/lightgbm/classifier.rb
CHANGED
@@ -1,15 +1,10 @@
|
|
1
1
|
module LightGBM
|
2
|
-
class Classifier
|
3
|
-
def initialize(num_leaves: 31, learning_rate: 0.1, n_estimators: 100, objective: nil)
|
4
|
-
|
5
|
-
num_leaves: num_leaves,
|
6
|
-
learning_rate: learning_rate
|
7
|
-
}
|
8
|
-
@params[:objective] = objective if objective
|
9
|
-
@n_estimators = n_estimators
|
2
|
+
class Classifier < Model
|
3
|
+
def initialize(num_leaves: 31, learning_rate: 0.1, n_estimators: 100, objective: nil, **options)
|
4
|
+
super
|
10
5
|
end
|
11
6
|
|
12
|
-
def fit(x, y)
|
7
|
+
def fit(x, y, eval_set: nil, eval_names: [], categorical_feature: "auto", early_stopping_rounds: nil, verbose: true)
|
13
8
|
n_classes = y.uniq.size
|
14
9
|
|
15
10
|
params = @params.dup
|
@@ -20,26 +15,34 @@ module LightGBM
|
|
20
15
|
params[:objective] ||= "binary"
|
21
16
|
end
|
22
17
|
|
23
|
-
train_set = Dataset.new(x, label: y)
|
24
|
-
|
18
|
+
train_set = Dataset.new(x, label: y, categorical_feature: categorical_feature)
|
19
|
+
valid_sets = Array(eval_set).map { |v| Dataset.new(v[0], label: v[1], reference: train_set) }
|
20
|
+
|
21
|
+
@booster = LightGBM.train(params, train_set,
|
22
|
+
num_boost_round: @n_estimators,
|
23
|
+
early_stopping_rounds: early_stopping_rounds,
|
24
|
+
verbose_eval: verbose,
|
25
|
+
valid_sets: valid_sets,
|
26
|
+
valid_names: eval_names
|
27
|
+
)
|
25
28
|
nil
|
26
29
|
end
|
27
30
|
|
28
|
-
def predict(data)
|
29
|
-
y_pred = @booster.predict(data)
|
31
|
+
def predict(data, num_iteration: nil)
|
32
|
+
y_pred = @booster.predict(data, num_iteration: num_iteration)
|
30
33
|
|
31
34
|
if y_pred.first.is_a?(Array)
|
32
35
|
# multiple classes
|
33
36
|
y_pred.map do |v|
|
34
|
-
v.map.with_index.max_by { |v2,
|
37
|
+
v.map.with_index.max_by { |v2, _| v2 }.last
|
35
38
|
end
|
36
39
|
else
|
37
40
|
y_pred.map { |v| v > 0.5 ? 1 : 0 }
|
38
41
|
end
|
39
42
|
end
|
40
43
|
|
41
|
-
def predict_proba(data)
|
42
|
-
y_pred = @booster.predict(data)
|
44
|
+
def predict_proba(data, num_iteration: nil)
|
45
|
+
y_pred = @booster.predict(data, num_iteration: num_iteration)
|
43
46
|
|
44
47
|
if y_pred.first.is_a?(Array)
|
45
48
|
# multiple classes
|
@@ -48,17 +51,5 @@ module LightGBM
|
|
48
51
|
y_pred.map { |v| [1 - v, v] }
|
49
52
|
end
|
50
53
|
end
|
51
|
-
|
52
|
-
def save_model(fname)
|
53
|
-
@booster.save_model(fname)
|
54
|
-
end
|
55
|
-
|
56
|
-
def load_model(fname)
|
57
|
-
@booster = Booster.new(params: @params, model_file: fname)
|
58
|
-
end
|
59
|
-
|
60
|
-
def feature_importances
|
61
|
-
@booster.feature_importance
|
62
|
-
end
|
63
54
|
end
|
64
55
|
end
|
data/lib/lightgbm/dataset.rb
CHANGED
@@ -2,7 +2,7 @@ module LightGBM
|
|
2
2
|
class Dataset
|
3
3
|
attr_reader :data, :params
|
4
4
|
|
5
|
-
def initialize(data, label: nil, weight: nil, params: nil, reference: nil, used_indices: nil, categorical_feature: "auto")
|
5
|
+
def initialize(data, label: nil, weight: nil, group: nil, params: nil, reference: nil, used_indices: nil, categorical_feature: "auto")
|
6
6
|
@data = data
|
7
7
|
|
8
8
|
# TODO stringify params
|
@@ -13,12 +13,12 @@ module LightGBM
|
|
13
13
|
@handle = ::FFI::MemoryPointer.new(:pointer)
|
14
14
|
parameters = params_str(params)
|
15
15
|
reference = reference.handle_pointer if reference
|
16
|
-
if
|
17
|
-
check_result FFI.LGBM_DatasetCreateFromFile(data, parameters, reference, @handle)
|
18
|
-
elsif used_indices
|
16
|
+
if used_indices
|
19
17
|
used_row_indices = ::FFI::MemoryPointer.new(:int32, used_indices.count)
|
20
18
|
used_row_indices.put_array_of_int32(0, used_indices)
|
21
19
|
check_result FFI.LGBM_DatasetGetSubset(reference, used_row_indices, used_indices.count, parameters, @handle)
|
20
|
+
elsif data.is_a?(String)
|
21
|
+
check_result FFI.LGBM_DatasetCreateFromFile(data, parameters, reference, @handle)
|
22
22
|
else
|
23
23
|
if matrix?(data)
|
24
24
|
nrow = data.row_count
|
@@ -40,11 +40,11 @@ module LightGBM
|
|
40
40
|
c_data.put_array_of_float(0, flat_data)
|
41
41
|
check_result FFI.LGBM_DatasetCreateFromMat(c_data, 0, nrow, ncol, 1, parameters, reference, @handle)
|
42
42
|
end
|
43
|
-
|
44
|
-
# ObjectSpace.define_finalizer(self, self.class.finalize(handle_pointer))
|
43
|
+
ObjectSpace.define_finalizer(self, self.class.finalize(handle_pointer)) unless used_indices
|
45
44
|
|
46
|
-
|
47
|
-
|
45
|
+
self.label = label if label
|
46
|
+
self.weight = weight if weight
|
47
|
+
self.group = group if group
|
48
48
|
end
|
49
49
|
|
50
50
|
def label
|
@@ -55,6 +55,18 @@ module LightGBM
|
|
55
55
|
field("weight")
|
56
56
|
end
|
57
57
|
|
58
|
+
def label=(label)
|
59
|
+
set_field("label", label)
|
60
|
+
end
|
61
|
+
|
62
|
+
def weight=(weight)
|
63
|
+
set_field("weight", weight)
|
64
|
+
end
|
65
|
+
|
66
|
+
def group=(group)
|
67
|
+
set_field("group", group, type: :int32)
|
68
|
+
end
|
69
|
+
|
58
70
|
def num_data
|
59
71
|
out = ::FFI::MemoryPointer.new(:int)
|
60
72
|
check_result FFI.LGBM_DatasetGetNumData(handle_pointer, out)
|
@@ -71,9 +83,10 @@ module LightGBM
|
|
71
83
|
check_result FFI.LGBM_DatasetSaveBinary(handle_pointer, filename)
|
72
84
|
end
|
73
85
|
|
74
|
-
|
75
|
-
|
76
|
-
|
86
|
+
# not released yet
|
87
|
+
# def dump_text(filename)
|
88
|
+
# check_result FFI.LGBM_DatasetDumpText(handle_pointer, filename)
|
89
|
+
# end
|
77
90
|
|
78
91
|
def subset(used_indices, params: nil)
|
79
92
|
# categorical_feature passed via params
|
@@ -85,14 +98,15 @@ module LightGBM
|
|
85
98
|
)
|
86
99
|
end
|
87
100
|
|
88
|
-
def self.finalize(pointer)
|
89
|
-
-> { FFI.LGBM_DatasetFree(pointer) }
|
90
|
-
end
|
91
|
-
|
92
101
|
def handle_pointer
|
93
102
|
@handle.read_pointer
|
94
103
|
end
|
95
104
|
|
105
|
+
def self.finalize(pointer)
|
106
|
+
# must use proc instead of stabby lambda
|
107
|
+
proc { FFI.LGBM_DatasetFree(pointer) }
|
108
|
+
end
|
109
|
+
|
96
110
|
private
|
97
111
|
|
98
112
|
def field(field_name)
|
@@ -104,11 +118,17 @@ module LightGBM
|
|
104
118
|
out_ptr.read_pointer.read_array_of_float(num_data)
|
105
119
|
end
|
106
120
|
|
107
|
-
def set_field(field_name, data)
|
121
|
+
def set_field(field_name, data, type: :float)
|
108
122
|
data = data.to_a unless data.is_a?(Array)
|
109
|
-
|
110
|
-
|
111
|
-
|
123
|
+
if type == :int32
|
124
|
+
c_data = ::FFI::MemoryPointer.new(:int32, data.count)
|
125
|
+
c_data.put_array_of_int32(0, data)
|
126
|
+
check_result FFI.LGBM_DatasetSetField(handle_pointer, field_name, c_data, data.count, 2)
|
127
|
+
else
|
128
|
+
c_data = ::FFI::MemoryPointer.new(:float, data.count)
|
129
|
+
c_data.put_array_of_float(0, data)
|
130
|
+
check_result FFI.LGBM_DatasetSetField(handle_pointer, field_name, c_data, data.count, 0)
|
131
|
+
end
|
112
132
|
end
|
113
133
|
|
114
134
|
def matrix?(data)
|
data/lib/lightgbm/ffi.rb
CHANGED
@@ -1,7 +1,13 @@
|
|
1
1
|
module LightGBM
|
2
2
|
module FFI
|
3
3
|
extend ::FFI::Library
|
4
|
-
|
4
|
+
|
5
|
+
begin
|
6
|
+
ffi_lib LightGBM.ffi_lib
|
7
|
+
rescue LoadError => e
|
8
|
+
raise e if ENV["LIGHTGBM_DEBUG"]
|
9
|
+
raise LoadError, "Could not find LightGBM"
|
10
|
+
end
|
5
11
|
|
6
12
|
# https://github.com/microsoft/LightGBM/blob/master/include/LightGBM/c_api.h
|
7
13
|
# keep same order
|
@@ -15,7 +21,7 @@ module LightGBM
|
|
15
21
|
attach_function :LGBM_DatasetGetSubset, %i[pointer pointer int32 string pointer], :int
|
16
22
|
attach_function :LGBM_DatasetFree, %i[pointer], :int
|
17
23
|
attach_function :LGBM_DatasetSaveBinary, %i[pointer string], :int
|
18
|
-
attach_function :LGBM_DatasetDumpText, %i[pointer string], :int
|
24
|
+
# attach_function :LGBM_DatasetDumpText, %i[pointer string], :int
|
19
25
|
attach_function :LGBM_DatasetSetField, %i[pointer string pointer int int], :int
|
20
26
|
attach_function :LGBM_DatasetGetField, %i[pointer string pointer pointer pointer], :int
|
21
27
|
attach_function :LGBM_DatasetGetNumData, %i[pointer pointer], :int
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module LightGBM
|
2
|
+
class Model
|
3
|
+
attr_reader :booster
|
4
|
+
|
5
|
+
def initialize(num_leaves: 31, learning_rate: 0.1, n_estimators: 100, objective: nil, **options)
|
6
|
+
@params = {
|
7
|
+
num_leaves: num_leaves,
|
8
|
+
learning_rate: learning_rate
|
9
|
+
}.merge(options)
|
10
|
+
@params[:objective] = objective if objective
|
11
|
+
@n_estimators = n_estimators
|
12
|
+
end
|
13
|
+
|
14
|
+
def save_model(fname)
|
15
|
+
@booster.save_model(fname)
|
16
|
+
end
|
17
|
+
|
18
|
+
def load_model(fname)
|
19
|
+
@booster = Booster.new(params: @params, model_file: fname)
|
20
|
+
end
|
21
|
+
|
22
|
+
def best_iteration
|
23
|
+
@booster.best_iteration
|
24
|
+
end
|
25
|
+
|
26
|
+
def feature_importances
|
27
|
+
@booster.feature_importance
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module LightGBM
|
2
|
+
class Ranker < Model
|
3
|
+
def initialize(num_leaves: 31, learning_rate: 0.1, n_estimators: 100, objective: "lambdarank", **options)
|
4
|
+
super
|
5
|
+
end
|
6
|
+
|
7
|
+
def fit(x, y, group:, categorical_feature: "auto", early_stopping_rounds: nil, verbose: true)
|
8
|
+
train_set = Dataset.new(x, label: y, group: group, categorical_feature: categorical_feature)
|
9
|
+
@booster = LightGBM.train(@params, train_set,
|
10
|
+
num_boost_round: @n_estimators,
|
11
|
+
early_stopping_rounds: early_stopping_rounds,
|
12
|
+
verbose_eval: verbose
|
13
|
+
)
|
14
|
+
nil
|
15
|
+
end
|
16
|
+
|
17
|
+
def predict(data, num_iteration: nil)
|
18
|
+
@booster.predict(data, num_iteration: num_iteration)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
data/lib/lightgbm/regressor.rb
CHANGED
@@ -1,34 +1,25 @@
|
|
1
1
|
module LightGBM
|
2
|
-
class Regressor
|
3
|
-
def initialize(num_leaves: 31, learning_rate: 0.1, n_estimators: 100, objective:
|
4
|
-
|
5
|
-
num_leaves: num_leaves,
|
6
|
-
learning_rate: learning_rate
|
7
|
-
}
|
8
|
-
@params[:objective] = objective if objective
|
9
|
-
@n_estimators = n_estimators
|
2
|
+
class Regressor < Model
|
3
|
+
def initialize(num_leaves: 31, learning_rate: 0.1, n_estimators: 100, objective: "regression", **options)
|
4
|
+
super
|
10
5
|
end
|
11
6
|
|
12
|
-
def fit(x, y)
|
13
|
-
train_set = Dataset.new(x, label: y)
|
14
|
-
|
15
|
-
nil
|
16
|
-
end
|
17
|
-
|
18
|
-
def predict(data)
|
19
|
-
@booster.predict(data)
|
20
|
-
end
|
7
|
+
def fit(x, y, categorical_feature: "auto", eval_set: nil, eval_names: [], early_stopping_rounds: nil, verbose: true)
|
8
|
+
train_set = Dataset.new(x, label: y, categorical_feature: categorical_feature)
|
9
|
+
valid_sets = Array(eval_set).map { |v| Dataset.new(v[0], label: v[1], reference: train_set) }
|
21
10
|
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
11
|
+
@booster = LightGBM.train(@params, train_set,
|
12
|
+
num_boost_round: @n_estimators,
|
13
|
+
early_stopping_rounds: early_stopping_rounds,
|
14
|
+
verbose_eval: verbose,
|
15
|
+
valid_sets: valid_sets,
|
16
|
+
valid_names: eval_names
|
17
|
+
)
|
18
|
+
nil
|
28
19
|
end
|
29
20
|
|
30
|
-
def
|
31
|
-
@booster.
|
21
|
+
def predict(data, num_iteration: nil)
|
22
|
+
@booster.predict(data, num_iteration: num_iteration)
|
32
23
|
end
|
33
24
|
end
|
34
25
|
end
|
data/lib/lightgbm/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: lightgbm
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-08-
|
11
|
+
date: 2019-08-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ffi
|
@@ -107,6 +107,8 @@ files:
|
|
107
107
|
- lib/lightgbm/classifier.rb
|
108
108
|
- lib/lightgbm/dataset.rb
|
109
109
|
- lib/lightgbm/ffi.rb
|
110
|
+
- lib/lightgbm/model.rb
|
111
|
+
- lib/lightgbm/ranker.rb
|
110
112
|
- lib/lightgbm/regressor.rb
|
111
113
|
- lib/lightgbm/utils.rb
|
112
114
|
- lib/lightgbm/version.rb
|
@@ -129,7 +131,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
129
131
|
- !ruby/object:Gem::Version
|
130
132
|
version: '0'
|
131
133
|
requirements: []
|
132
|
-
rubygems_version: 3.0.
|
134
|
+
rubygems_version: 3.0.3
|
133
135
|
signing_key:
|
134
136
|
specification_version: 4
|
135
137
|
summary: LightGBM - the high performance machine learning library - for Ruby
|