lightgbm 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +54 -6
- data/lib/lightgbm.rb +17 -0
- data/lib/lightgbm/booster.rb +89 -7
- data/lib/lightgbm/dataset.rb +78 -0
- data/lib/lightgbm/ffi.rb +25 -0
- data/lib/lightgbm/utils.rb +19 -0
- data/lib/lightgbm/version.rb +1 -1
- metadata +3 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 49e0eef0a10a444e0cc24c8188268d349037fb12054b3f3f73ab14ed54fae3d7
|
4
|
+
data.tar.gz: 9ee78189ec31bfb3dc9cea6fe5836f97a010097cc821819d05236a899d0654af
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d95050754e85ee004df08c4761f31f1bfc97e3efbcd3ea0ae2251f5a84eeff2978e16118411ddeced74a9c7d3fd731176488cbbac4ed2bdd840e55e4dd6172db
|
7
|
+
data.tar.gz: 52dcca52827fffca3d638c814eec359c4f2d397b025cc9ee99323bebbbf5436d3a7fcf569390f0c71eb9623c68a711b0e32ba38a53364873fe9d0de99b2f3f66
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -1,10 +1,14 @@
|
|
1
1
|
# LightGBM
|
2
2
|
|
3
|
-
LightGBM for Ruby
|
3
|
+
[LightGBM](https://github.com/microsoft/LightGBM) for Ruby
|
4
|
+
|
5
|
+
:fire: Uses the C API for blazing performance
|
6
|
+
|
7
|
+
[](https://travis-ci.org/ankane/lightgbm)
|
4
8
|
|
5
9
|
## Installation
|
6
10
|
|
7
|
-
First, [install LightGBM](https://lightgbm.readthedocs.io/en/latest/Installation-Guide.html).
|
11
|
+
First, [install LightGBM](https://lightgbm.readthedocs.io/en/latest/Installation-Guide.html). On Mac, copy `lib_lightgbm.so` to `/usr/local/lib`.
|
8
12
|
|
9
13
|
Add this line to your application’s Gemfile:
|
10
14
|
|
@@ -12,21 +16,65 @@ Add this line to your application’s Gemfile:
|
|
12
16
|
gem 'lightgbm'
|
13
17
|
```
|
14
18
|
|
15
|
-
|
19
|
+
Train a model
|
16
20
|
|
17
21
|
```ruby
|
18
|
-
|
22
|
+
params = {objective: "regression"}
|
23
|
+
train_set = LightGBM::Dataset.new(x_train, label: y_train)
|
24
|
+
booster = LightGBM.train(params, train_set)
|
19
25
|
```
|
20
26
|
|
21
27
|
Predict
|
22
28
|
|
23
29
|
```ruby
|
24
|
-
booster.predict(
|
30
|
+
booster.predict(x_test)
|
31
|
+
```
|
32
|
+
|
33
|
+
Save the model
|
34
|
+
|
35
|
+
```ruby
|
36
|
+
booster.save_model("model.txt")
|
37
|
+
```
|
38
|
+
|
39
|
+
Load a model from a file
|
40
|
+
|
41
|
+
```ruby
|
42
|
+
booster = LightGBM::Booster.new(model_file: "model.txt")
|
43
|
+
```
|
44
|
+
|
45
|
+
Get feature importance
|
46
|
+
|
47
|
+
```ruby
|
48
|
+
booster.feature_importance
|
49
|
+
```
|
50
|
+
|
51
|
+
## Reference
|
52
|
+
|
53
|
+
### Booster
|
54
|
+
|
55
|
+
```ruby
|
56
|
+
booster = LightGBM::Booster.new(model_str: "tree...")
|
57
|
+
booster.to_json
|
58
|
+
booster.model_to_string
|
59
|
+
booster.current_iteration
|
60
|
+
```
|
61
|
+
|
62
|
+
### Dataset
|
63
|
+
|
64
|
+
```ruby
|
65
|
+
dataset = LightGBM::Dataset.new(data, label: label, weight: weight, params: params)
|
66
|
+
dataset.num_data
|
67
|
+
dataset.num_feature
|
68
|
+
|
69
|
+
# note: only works with unquoted CSVs
|
70
|
+
dataset = LightGBM::Dataset.new("data.csv", params: {headers: true, label: "name:label"})
|
71
|
+
dataset.save_binary("train.bin")
|
72
|
+
dataset.dump_text("train.txt")
|
25
73
|
```
|
26
74
|
|
27
75
|
## Credits
|
28
76
|
|
29
|
-
Thanks to the [xgboost](https://github.com/PairOnAir/xgboost-ruby) gem for serving as an initial reference.
|
77
|
+
Thanks to the [xgboost](https://github.com/PairOnAir/xgboost-ruby) gem for serving as an initial reference, and Selva Prabhakaran for the [test datasets](https://github.com/selva86/datasets).
|
30
78
|
|
31
79
|
## History
|
32
80
|
|
data/lib/lightgbm.rb
CHANGED
@@ -2,6 +2,23 @@
|
|
2
2
|
require "ffi"
|
3
3
|
|
4
4
|
# modules
|
5
|
+
require "lightgbm/utils"
|
5
6
|
require "lightgbm/booster"
|
7
|
+
require "lightgbm/dataset"
|
6
8
|
require "lightgbm/ffi"
|
7
9
|
require "lightgbm/version"
|
10
|
+
|
11
|
+
module LightGBM
|
12
|
+
class Error < StandardError; end
|
13
|
+
|
14
|
+
def self.train(params, train_set, num_boost_round: 100, valid_sets: [], valid_names: [])
|
15
|
+
booster = Booster.new(params: params, train_set: train_set)
|
16
|
+
valid_sets.zip(valid_names) do |data, name|
|
17
|
+
booster.add_valid(data, name)
|
18
|
+
end
|
19
|
+
num_boost_round.times do
|
20
|
+
booster.update
|
21
|
+
end
|
22
|
+
booster
|
23
|
+
end
|
24
|
+
end
|
data/lib/lightgbm/booster.rb
CHANGED
@@ -1,18 +1,30 @@
|
|
1
1
|
module LightGBM
|
2
2
|
class Booster
|
3
|
-
def initialize(model_file:)
|
3
|
+
def initialize(params: nil, train_set: nil, model_file: nil, model_str: nil)
|
4
4
|
@handle = ::FFI::MemoryPointer.new(:pointer)
|
5
|
-
if
|
5
|
+
if model_str
|
6
|
+
out_num_iterations = ::FFI::MemoryPointer.new(:int)
|
7
|
+
check_result FFI.LGBM_BoosterLoadModelFromString(model_str, out_num_iterations, @handle)
|
8
|
+
elsif model_file
|
6
9
|
out_num_iterations = ::FFI::MemoryPointer.new(:int)
|
7
10
|
check_result FFI.LGBM_BoosterCreateFromModelfile(model_file, out_num_iterations, @handle)
|
11
|
+
else
|
12
|
+
check_result FFI.LGBM_BoosterCreate(train_set.handle_pointer, params_str(params), @handle)
|
8
13
|
end
|
9
|
-
|
14
|
+
# causes "Stack consistency error"
|
15
|
+
# ObjectSpace.define_finalizer(self, self.class.finalize(handle_pointer))
|
10
16
|
end
|
11
17
|
|
12
18
|
def self.finalize(pointer)
|
13
19
|
-> { FFI.LGBM_BoosterFree(pointer) }
|
14
20
|
end
|
15
21
|
|
22
|
+
# TODO handle name
|
23
|
+
def add_valid(data, name)
|
24
|
+
check_result FFI.LGBM_BoosterAddValidData(handle_pointer, data.handle_pointer)
|
25
|
+
self # consistent with Python API
|
26
|
+
end
|
27
|
+
|
16
28
|
def predict(input)
|
17
29
|
raise TypeError unless input.is_a?(Array)
|
18
30
|
|
@@ -31,16 +43,86 @@ module LightGBM
|
|
31
43
|
singular ? out : out.first
|
32
44
|
end
|
33
45
|
|
34
|
-
|
46
|
+
def save_model(filename)
|
47
|
+
check_result FFI.LGBM_BoosterSaveModel(handle_pointer, 0, 0, filename)
|
48
|
+
self # consistent with Python API
|
49
|
+
end
|
50
|
+
|
51
|
+
def update
|
52
|
+
finished = ::FFI::MemoryPointer.new(:int)
|
53
|
+
check_result FFI.LGBM_BoosterUpdateOneIter(handle_pointer, finished)
|
54
|
+
finished.read_int == 1
|
55
|
+
end
|
56
|
+
|
57
|
+
def feature_importance(iteration: nil, importance_type: "split")
|
58
|
+
iteration ||= best_iteration
|
59
|
+
importance_type =
|
60
|
+
case importance_type
|
61
|
+
when "split"
|
62
|
+
0
|
63
|
+
when "gain"
|
64
|
+
1
|
65
|
+
else
|
66
|
+
-1
|
67
|
+
end
|
68
|
+
|
69
|
+
num_features = self.num_features
|
70
|
+
out_result = ::FFI::MemoryPointer.new(:double, num_features)
|
71
|
+
check_result FFI.LGBM_BoosterFeatureImportance(handle_pointer, iteration, importance_type, out_result)
|
72
|
+
out_result.read_array_of_double(num_features)
|
73
|
+
end
|
74
|
+
|
75
|
+
def num_features
|
76
|
+
out = ::FFI::MemoryPointer.new(:int)
|
77
|
+
check_result FFI.LGBM_BoosterGetNumFeature(handle_pointer, out)
|
78
|
+
out.read_int
|
79
|
+
end
|
35
80
|
|
36
|
-
def
|
37
|
-
|
38
|
-
|
81
|
+
def current_iteration
|
82
|
+
out = ::FFI::MemoryPointer.new(:int)
|
83
|
+
check_result FFI::LGBM_BoosterGetCurrentIteration(handle_pointer, out)
|
84
|
+
out.read_int
|
85
|
+
end
|
86
|
+
|
87
|
+
# TODO fix
|
88
|
+
def best_iteration
|
89
|
+
-1
|
90
|
+
end
|
91
|
+
|
92
|
+
def model_to_string(num_iteration: nil, start_iteration: 0)
|
93
|
+
num_iteration ||= best_iteration
|
94
|
+
buffer_len = 1 << 20
|
95
|
+
out_len = ::FFI::MemoryPointer.new(:int64)
|
96
|
+
out_str = ::FFI::MemoryPointer.new(:string, buffer_len)
|
97
|
+
check_result FFI.LGBM_BoosterSaveModelToString(handle_pointer, start_iteration, num_iteration, buffer_len, out_len, out_str)
|
98
|
+
actual_len = out_len.read_int64
|
99
|
+
if actual_len > buffer_len
|
100
|
+
out_str = ::FFI::MemoryPointer.new(:string, actual_len)
|
101
|
+
check_result FFI.LGBM_BoosterSaveModelToString(handle_pointer, start_iteration, num_iteration, actual_len, out_len, out_str)
|
102
|
+
end
|
103
|
+
out_str.read_string
|
104
|
+
end
|
105
|
+
|
106
|
+
def to_json(num_iteration: nil, start_iteration: 0)
|
107
|
+
num_iteration ||= best_iteration
|
108
|
+
buffer_len = 1 << 20
|
109
|
+
out_len = ::FFI::MemoryPointer.new(:int64)
|
110
|
+
out_str = ::FFI::MemoryPointer.new(:string, buffer_len)
|
111
|
+
check_result FFI.LGBM_BoosterDumpModel(handle_pointer, start_iteration, num_iteration, buffer_len, out_len, out_str)
|
112
|
+
actual_len = out_len.read_int64
|
113
|
+
if actual_len > buffer_len
|
114
|
+
out_str = ::FFI::MemoryPointer.new(:string, actual_len)
|
115
|
+
check_result FFI.LGBM_BoosterDumpModel(handle_pointer, start_iteration, num_iteration, actual_len, out_len, out_str)
|
39
116
|
end
|
117
|
+
out_str.read_string
|
40
118
|
end
|
41
119
|
|
120
|
+
private
|
121
|
+
|
42
122
|
def handle_pointer
|
43
123
|
@handle.read_pointer
|
44
124
|
end
|
125
|
+
|
126
|
+
include Utils
|
45
127
|
end
|
46
128
|
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
module LightGBM
|
2
|
+
class Dataset
|
3
|
+
attr_reader :data, :params
|
4
|
+
|
5
|
+
def initialize(data, label: nil, weight: nil, params: nil)
|
6
|
+
@data = data
|
7
|
+
|
8
|
+
@handle = ::FFI::MemoryPointer.new(:pointer)
|
9
|
+
if data.is_a?(String)
|
10
|
+
check_result FFI.LGBM_DatasetCreateFromFile(data, params_str(params), nil, @handle)
|
11
|
+
else
|
12
|
+
c_data = ::FFI::MemoryPointer.new(:float, data.count * data.first.count)
|
13
|
+
c_data.put_array_of_float(0, data.flatten)
|
14
|
+
check_result FFI.LGBM_DatasetCreateFromMat(c_data, 0, data.count, data.first.count, 1, params_str(params), nil, @handle)
|
15
|
+
end
|
16
|
+
# causes "Stack consistency error"
|
17
|
+
# ObjectSpace.define_finalizer(self, self.class.finalize(handle_pointer))
|
18
|
+
|
19
|
+
set_field("label", label) if label
|
20
|
+
set_field("weight", weight) if weight
|
21
|
+
end
|
22
|
+
|
23
|
+
def label
|
24
|
+
field("label")
|
25
|
+
end
|
26
|
+
|
27
|
+
def weight
|
28
|
+
field("weight")
|
29
|
+
end
|
30
|
+
|
31
|
+
def num_data
|
32
|
+
out = ::FFI::MemoryPointer.new(:int)
|
33
|
+
check_result FFI.LGBM_DatasetGetNumData(handle_pointer, out)
|
34
|
+
out.read_int
|
35
|
+
end
|
36
|
+
|
37
|
+
def num_feature
|
38
|
+
out = ::FFI::MemoryPointer.new(:int)
|
39
|
+
check_result FFI.LGBM_DatasetGetNumFeature(handle_pointer, out)
|
40
|
+
out.read_int
|
41
|
+
end
|
42
|
+
|
43
|
+
def save_binary(filename)
|
44
|
+
check_result FFI.LGBM_DatasetSaveBinary(handle_pointer, filename)
|
45
|
+
end
|
46
|
+
|
47
|
+
def dump_text(filename)
|
48
|
+
check_result FFI.LGBM_DatasetDumpText(handle_pointer, filename)
|
49
|
+
end
|
50
|
+
|
51
|
+
def self.finalize(pointer)
|
52
|
+
-> { FFI.LGBM_DatasetFree(pointer) }
|
53
|
+
end
|
54
|
+
|
55
|
+
def handle_pointer
|
56
|
+
@handle.read_pointer
|
57
|
+
end
|
58
|
+
|
59
|
+
private
|
60
|
+
|
61
|
+
def field(field_name)
|
62
|
+
num_data = self.num_data
|
63
|
+
out_len = ::FFI::MemoryPointer.new(:int)
|
64
|
+
out_ptr = ::FFI::MemoryPointer.new(:float, num_data)
|
65
|
+
out_type = ::FFI::MemoryPointer.new(:int)
|
66
|
+
check_result FFI.LGBM_DatasetGetField(handle_pointer, field_name, out_len, out_ptr, out_type)
|
67
|
+
out_ptr.read_pointer.read_array_of_float(num_data)
|
68
|
+
end
|
69
|
+
|
70
|
+
def set_field(field_name, data)
|
71
|
+
c_data = ::FFI::MemoryPointer.new(:float, data.count)
|
72
|
+
c_data.put_array_of_float(0, data)
|
73
|
+
check_result FFI.LGBM_DatasetSetField(handle_pointer, field_name, c_data, data.count, 0)
|
74
|
+
end
|
75
|
+
|
76
|
+
include Utils
|
77
|
+
end
|
78
|
+
end
|
data/lib/lightgbm/ffi.rb
CHANGED
@@ -4,10 +4,35 @@ module LightGBM
|
|
4
4
|
ffi_lib ["lightgbm", "lib_lightgbm.so"]
|
5
5
|
|
6
6
|
# https://github.com/microsoft/LightGBM/blob/master/include/LightGBM/c_api.h
|
7
|
+
# keep same order
|
8
|
+
|
9
|
+
# error
|
7
10
|
attach_function :LGBM_GetLastError, %i[], :string
|
11
|
+
|
12
|
+
# dataset
|
13
|
+
attach_function :LGBM_DatasetCreateFromFile, %i[string string pointer pointer], :int
|
14
|
+
attach_function :LGBM_DatasetCreateFromMat, %i[pointer int int32 int32 int string pointer pointer], :int
|
15
|
+
attach_function :LGBM_DatasetFree, %i[pointer], :int
|
16
|
+
attach_function :LGBM_DatasetSaveBinary, %i[pointer string], :int
|
17
|
+
attach_function :LGBM_DatasetDumpText, %i[pointer string], :int
|
18
|
+
attach_function :LGBM_DatasetSetField, %i[pointer string pointer int int], :int
|
19
|
+
attach_function :LGBM_DatasetGetField, %i[pointer string pointer pointer pointer], :int
|
20
|
+
attach_function :LGBM_DatasetGetNumData, %i[pointer pointer], :int
|
21
|
+
attach_function :LGBM_DatasetGetNumFeature, %i[pointer pointer], :int
|
22
|
+
|
23
|
+
# booster
|
8
24
|
attach_function :LGBM_BoosterCreate, %i[pointer string pointer], :int
|
9
25
|
attach_function :LGBM_BoosterCreateFromModelfile, %i[string pointer pointer], :int
|
26
|
+
attach_function :LGBM_BoosterLoadModelFromString, %i[string pointer pointer], :int
|
10
27
|
attach_function :LGBM_BoosterFree, %i[pointer], :int
|
28
|
+
attach_function :LGBM_BoosterAddValidData, %i[pointer pointer], :int
|
29
|
+
attach_function :LGBM_BoosterUpdateOneIter, %i[pointer pointer], :int
|
30
|
+
attach_function :LGBM_BoosterGetCurrentIteration, %i[pointer pointer], :int
|
31
|
+
attach_function :LGBM_BoosterGetNumFeature, %i[pointer pointer], :int
|
11
32
|
attach_function :LGBM_BoosterPredictForMat, %i[pointer pointer int int32 int32 int int int string pointer pointer], :int
|
33
|
+
attach_function :LGBM_BoosterSaveModel, %i[pointer int int string], :int
|
34
|
+
attach_function :LGBM_BoosterSaveModelToString, %i[pointer int int int64 pointer pointer], :int
|
35
|
+
attach_function :LGBM_BoosterDumpModel, %i[pointer int int int64 pointer pointer], :int
|
36
|
+
attach_function :LGBM_BoosterFeatureImportance, %i[pointer int int pointer], :int
|
12
37
|
end
|
13
38
|
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module LightGBM
|
2
|
+
module Utils
|
3
|
+
private
|
4
|
+
|
5
|
+
def check_result(err)
|
6
|
+
raise LightGBM::Error, FFI.LGBM_GetLastError if err != 0
|
7
|
+
end
|
8
|
+
|
9
|
+
# remove spaces in keys and values to prevent injection
|
10
|
+
def params_str(params)
|
11
|
+
(params || {}).map { |k, v| [check_param(k.to_s), check_param(v.to_s)].join("=") }.join(" ")
|
12
|
+
end
|
13
|
+
|
14
|
+
def check_param(v)
|
15
|
+
raise ArgumentError, "Invalid parameter" if /[[:space:]]/.match(v)
|
16
|
+
v
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
data/lib/lightgbm/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: lightgbm
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
@@ -76,7 +76,9 @@ files:
|
|
76
76
|
- README.md
|
77
77
|
- lib/lightgbm.rb
|
78
78
|
- lib/lightgbm/booster.rb
|
79
|
+
- lib/lightgbm/dataset.rb
|
79
80
|
- lib/lightgbm/ffi.rb
|
81
|
+
- lib/lightgbm/utils.rb
|
80
82
|
- lib/lightgbm/version.rb
|
81
83
|
homepage: https://github.com/ankane/lightgbm
|
82
84
|
licenses:
|