lightgbm 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +54 -6
- data/lib/lightgbm.rb +17 -0
- data/lib/lightgbm/booster.rb +89 -7
- data/lib/lightgbm/dataset.rb +78 -0
- data/lib/lightgbm/ffi.rb +25 -0
- data/lib/lightgbm/utils.rb +19 -0
- data/lib/lightgbm/version.rb +1 -1
- metadata +3 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 49e0eef0a10a444e0cc24c8188268d349037fb12054b3f3f73ab14ed54fae3d7
|
4
|
+
data.tar.gz: 9ee78189ec31bfb3dc9cea6fe5836f97a010097cc821819d05236a899d0654af
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d95050754e85ee004df08c4761f31f1bfc97e3efbcd3ea0ae2251f5a84eeff2978e16118411ddeced74a9c7d3fd731176488cbbac4ed2bdd840e55e4dd6172db
|
7
|
+
data.tar.gz: 52dcca52827fffca3d638c814eec359c4f2d397b025cc9ee99323bebbbf5436d3a7fcf569390f0c71eb9623c68a711b0e32ba38a53364873fe9d0de99b2f3f66
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -1,10 +1,14 @@
|
|
1
1
|
# LightGBM
|
2
2
|
|
3
|
-
LightGBM for Ruby
|
3
|
+
[LightGBM](https://github.com/microsoft/LightGBM) for Ruby
|
4
|
+
|
5
|
+
:fire: Uses the C API for blazing performance
|
6
|
+
|
7
|
+
[![Build Status](https://travis-ci.org/ankane/lightgbm.svg?branch=master)](https://travis-ci.org/ankane/lightgbm)
|
4
8
|
|
5
9
|
## Installation
|
6
10
|
|
7
|
-
First, [install LightGBM](https://lightgbm.readthedocs.io/en/latest/Installation-Guide.html).
|
11
|
+
First, [install LightGBM](https://lightgbm.readthedocs.io/en/latest/Installation-Guide.html). On Mac, copy `lib_lightgbm.so` to `/usr/local/lib`.
|
8
12
|
|
9
13
|
Add this line to your application’s Gemfile:
|
10
14
|
|
@@ -12,21 +16,65 @@ Add this line to your application’s Gemfile:
|
|
12
16
|
gem 'lightgbm'
|
13
17
|
```
|
14
18
|
|
15
|
-
|
19
|
+
Train a model
|
16
20
|
|
17
21
|
```ruby
|
18
|
-
|
22
|
+
params = {objective: "regression"}
|
23
|
+
train_set = LightGBM::Dataset.new(x_train, label: y_train)
|
24
|
+
booster = LightGBM.train(params, train_set)
|
19
25
|
```
|
20
26
|
|
21
27
|
Predict
|
22
28
|
|
23
29
|
```ruby
|
24
|
-
booster.predict(
|
30
|
+
booster.predict(x_test)
|
31
|
+
```
|
32
|
+
|
33
|
+
Save the model
|
34
|
+
|
35
|
+
```ruby
|
36
|
+
booster.save_model("model.txt")
|
37
|
+
```
|
38
|
+
|
39
|
+
Load a model from a file
|
40
|
+
|
41
|
+
```ruby
|
42
|
+
booster = LightGBM::Booster.new(model_file: "model.txt")
|
43
|
+
```
|
44
|
+
|
45
|
+
Get feature importance
|
46
|
+
|
47
|
+
```ruby
|
48
|
+
booster.feature_importance
|
49
|
+
```
|
50
|
+
|
51
|
+
## Reference
|
52
|
+
|
53
|
+
### Booster
|
54
|
+
|
55
|
+
```ruby
|
56
|
+
booster = LightGBM::Booster.new(model_str: "tree...")
|
57
|
+
booster.to_json
|
58
|
+
booster.model_to_string
|
59
|
+
booster.current_iteration
|
60
|
+
```
|
61
|
+
|
62
|
+
### Dataset
|
63
|
+
|
64
|
+
```ruby
|
65
|
+
dataset = LightGBM::Dataset.new(data, label: label, weight: weight, params: params)
|
66
|
+
dataset.num_data
|
67
|
+
dataset.num_feature
|
68
|
+
|
69
|
+
# note: only works with unquoted CSVs
|
70
|
+
dataset = LightGBM::Dataset.new("data.csv", params: {headers: true, label: "name:label"})
|
71
|
+
dataset.save_binary("train.bin")
|
72
|
+
dataset.dump_text("train.txt")
|
25
73
|
```
|
26
74
|
|
27
75
|
## Credits
|
28
76
|
|
29
|
-
Thanks to the [xgboost](https://github.com/PairOnAir/xgboost-ruby) gem for serving as an initial reference.
|
77
|
+
Thanks to the [xgboost](https://github.com/PairOnAir/xgboost-ruby) gem for serving as an initial reference, and Selva Prabhakaran for the [test datasets](https://github.com/selva86/datasets).
|
30
78
|
|
31
79
|
## History
|
32
80
|
|
data/lib/lightgbm.rb
CHANGED
@@ -2,6 +2,23 @@
|
|
2
2
|
require "ffi"
|
3
3
|
|
4
4
|
# modules
|
5
|
+
require "lightgbm/utils"
|
5
6
|
require "lightgbm/booster"
|
7
|
+
require "lightgbm/dataset"
|
6
8
|
require "lightgbm/ffi"
|
7
9
|
require "lightgbm/version"
|
10
|
+
|
11
|
+
module LightGBM
|
12
|
+
class Error < StandardError; end
|
13
|
+
|
14
|
+
def self.train(params, train_set, num_boost_round: 100, valid_sets: [], valid_names: [])
|
15
|
+
booster = Booster.new(params: params, train_set: train_set)
|
16
|
+
valid_sets.zip(valid_names) do |data, name|
|
17
|
+
booster.add_valid(data, name)
|
18
|
+
end
|
19
|
+
num_boost_round.times do
|
20
|
+
booster.update
|
21
|
+
end
|
22
|
+
booster
|
23
|
+
end
|
24
|
+
end
|
data/lib/lightgbm/booster.rb
CHANGED
@@ -1,18 +1,30 @@
|
|
1
1
|
module LightGBM
|
2
2
|
class Booster
|
3
|
-
def initialize(model_file:)
|
3
|
+
def initialize(params: nil, train_set: nil, model_file: nil, model_str: nil)
|
4
4
|
@handle = ::FFI::MemoryPointer.new(:pointer)
|
5
|
-
if
|
5
|
+
if model_str
|
6
|
+
out_num_iterations = ::FFI::MemoryPointer.new(:int)
|
7
|
+
check_result FFI.LGBM_BoosterLoadModelFromString(model_str, out_num_iterations, @handle)
|
8
|
+
elsif model_file
|
6
9
|
out_num_iterations = ::FFI::MemoryPointer.new(:int)
|
7
10
|
check_result FFI.LGBM_BoosterCreateFromModelfile(model_file, out_num_iterations, @handle)
|
11
|
+
else
|
12
|
+
check_result FFI.LGBM_BoosterCreate(train_set.handle_pointer, params_str(params), @handle)
|
8
13
|
end
|
9
|
-
|
14
|
+
# causes "Stack consistency error"
|
15
|
+
# ObjectSpace.define_finalizer(self, self.class.finalize(handle_pointer))
|
10
16
|
end
|
11
17
|
|
12
18
|
def self.finalize(pointer)
|
13
19
|
-> { FFI.LGBM_BoosterFree(pointer) }
|
14
20
|
end
|
15
21
|
|
22
|
+
# TODO handle name
|
23
|
+
def add_valid(data, name)
|
24
|
+
check_result FFI.LGBM_BoosterAddValidData(handle_pointer, data.handle_pointer)
|
25
|
+
self # consistent with Python API
|
26
|
+
end
|
27
|
+
|
16
28
|
def predict(input)
|
17
29
|
raise TypeError unless input.is_a?(Array)
|
18
30
|
|
@@ -31,16 +43,86 @@ module LightGBM
|
|
31
43
|
singular ? out : out.first
|
32
44
|
end
|
33
45
|
|
34
|
-
|
46
|
+
def save_model(filename)
|
47
|
+
check_result FFI.LGBM_BoosterSaveModel(handle_pointer, 0, 0, filename)
|
48
|
+
self # consistent with Python API
|
49
|
+
end
|
50
|
+
|
51
|
+
def update
|
52
|
+
finished = ::FFI::MemoryPointer.new(:int)
|
53
|
+
check_result FFI.LGBM_BoosterUpdateOneIter(handle_pointer, finished)
|
54
|
+
finished.read_int == 1
|
55
|
+
end
|
56
|
+
|
57
|
+
def feature_importance(iteration: nil, importance_type: "split")
|
58
|
+
iteration ||= best_iteration
|
59
|
+
importance_type =
|
60
|
+
case importance_type
|
61
|
+
when "split"
|
62
|
+
0
|
63
|
+
when "gain"
|
64
|
+
1
|
65
|
+
else
|
66
|
+
-1
|
67
|
+
end
|
68
|
+
|
69
|
+
num_features = self.num_features
|
70
|
+
out_result = ::FFI::MemoryPointer.new(:double, num_features)
|
71
|
+
check_result FFI.LGBM_BoosterFeatureImportance(handle_pointer, iteration, importance_type, out_result)
|
72
|
+
out_result.read_array_of_double(num_features)
|
73
|
+
end
|
74
|
+
|
75
|
+
def num_features
|
76
|
+
out = ::FFI::MemoryPointer.new(:int)
|
77
|
+
check_result FFI.LGBM_BoosterGetNumFeature(handle_pointer, out)
|
78
|
+
out.read_int
|
79
|
+
end
|
35
80
|
|
36
|
-
def
|
37
|
-
|
38
|
-
|
81
|
+
def current_iteration
|
82
|
+
out = ::FFI::MemoryPointer.new(:int)
|
83
|
+
check_result FFI::LGBM_BoosterGetCurrentIteration(handle_pointer, out)
|
84
|
+
out.read_int
|
85
|
+
end
|
86
|
+
|
87
|
+
# TODO fix
|
88
|
+
def best_iteration
|
89
|
+
-1
|
90
|
+
end
|
91
|
+
|
92
|
+
def model_to_string(num_iteration: nil, start_iteration: 0)
|
93
|
+
num_iteration ||= best_iteration
|
94
|
+
buffer_len = 1 << 20
|
95
|
+
out_len = ::FFI::MemoryPointer.new(:int64)
|
96
|
+
out_str = ::FFI::MemoryPointer.new(:string, buffer_len)
|
97
|
+
check_result FFI.LGBM_BoosterSaveModelToString(handle_pointer, start_iteration, num_iteration, buffer_len, out_len, out_str)
|
98
|
+
actual_len = out_len.read_int64
|
99
|
+
if actual_len > buffer_len
|
100
|
+
out_str = ::FFI::MemoryPointer.new(:string, actual_len)
|
101
|
+
check_result FFI.LGBM_BoosterSaveModelToString(handle_pointer, start_iteration, num_iteration, actual_len, out_len, out_str)
|
102
|
+
end
|
103
|
+
out_str.read_string
|
104
|
+
end
|
105
|
+
|
106
|
+
def to_json(num_iteration: nil, start_iteration: 0)
|
107
|
+
num_iteration ||= best_iteration
|
108
|
+
buffer_len = 1 << 20
|
109
|
+
out_len = ::FFI::MemoryPointer.new(:int64)
|
110
|
+
out_str = ::FFI::MemoryPointer.new(:string, buffer_len)
|
111
|
+
check_result FFI.LGBM_BoosterDumpModel(handle_pointer, start_iteration, num_iteration, buffer_len, out_len, out_str)
|
112
|
+
actual_len = out_len.read_int64
|
113
|
+
if actual_len > buffer_len
|
114
|
+
out_str = ::FFI::MemoryPointer.new(:string, actual_len)
|
115
|
+
check_result FFI.LGBM_BoosterDumpModel(handle_pointer, start_iteration, num_iteration, actual_len, out_len, out_str)
|
39
116
|
end
|
117
|
+
out_str.read_string
|
40
118
|
end
|
41
119
|
|
120
|
+
private
|
121
|
+
|
42
122
|
def handle_pointer
|
43
123
|
@handle.read_pointer
|
44
124
|
end
|
125
|
+
|
126
|
+
include Utils
|
45
127
|
end
|
46
128
|
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
module LightGBM
|
2
|
+
class Dataset
|
3
|
+
attr_reader :data, :params
|
4
|
+
|
5
|
+
def initialize(data, label: nil, weight: nil, params: nil)
|
6
|
+
@data = data
|
7
|
+
|
8
|
+
@handle = ::FFI::MemoryPointer.new(:pointer)
|
9
|
+
if data.is_a?(String)
|
10
|
+
check_result FFI.LGBM_DatasetCreateFromFile(data, params_str(params), nil, @handle)
|
11
|
+
else
|
12
|
+
c_data = ::FFI::MemoryPointer.new(:float, data.count * data.first.count)
|
13
|
+
c_data.put_array_of_float(0, data.flatten)
|
14
|
+
check_result FFI.LGBM_DatasetCreateFromMat(c_data, 0, data.count, data.first.count, 1, params_str(params), nil, @handle)
|
15
|
+
end
|
16
|
+
# causes "Stack consistency error"
|
17
|
+
# ObjectSpace.define_finalizer(self, self.class.finalize(handle_pointer))
|
18
|
+
|
19
|
+
set_field("label", label) if label
|
20
|
+
set_field("weight", weight) if weight
|
21
|
+
end
|
22
|
+
|
23
|
+
def label
|
24
|
+
field("label")
|
25
|
+
end
|
26
|
+
|
27
|
+
def weight
|
28
|
+
field("weight")
|
29
|
+
end
|
30
|
+
|
31
|
+
def num_data
|
32
|
+
out = ::FFI::MemoryPointer.new(:int)
|
33
|
+
check_result FFI.LGBM_DatasetGetNumData(handle_pointer, out)
|
34
|
+
out.read_int
|
35
|
+
end
|
36
|
+
|
37
|
+
def num_feature
|
38
|
+
out = ::FFI::MemoryPointer.new(:int)
|
39
|
+
check_result FFI.LGBM_DatasetGetNumFeature(handle_pointer, out)
|
40
|
+
out.read_int
|
41
|
+
end
|
42
|
+
|
43
|
+
def save_binary(filename)
|
44
|
+
check_result FFI.LGBM_DatasetSaveBinary(handle_pointer, filename)
|
45
|
+
end
|
46
|
+
|
47
|
+
def dump_text(filename)
|
48
|
+
check_result FFI.LGBM_DatasetDumpText(handle_pointer, filename)
|
49
|
+
end
|
50
|
+
|
51
|
+
def self.finalize(pointer)
|
52
|
+
-> { FFI.LGBM_DatasetFree(pointer) }
|
53
|
+
end
|
54
|
+
|
55
|
+
def handle_pointer
|
56
|
+
@handle.read_pointer
|
57
|
+
end
|
58
|
+
|
59
|
+
private
|
60
|
+
|
61
|
+
def field(field_name)
|
62
|
+
num_data = self.num_data
|
63
|
+
out_len = ::FFI::MemoryPointer.new(:int)
|
64
|
+
out_ptr = ::FFI::MemoryPointer.new(:float, num_data)
|
65
|
+
out_type = ::FFI::MemoryPointer.new(:int)
|
66
|
+
check_result FFI.LGBM_DatasetGetField(handle_pointer, field_name, out_len, out_ptr, out_type)
|
67
|
+
out_ptr.read_pointer.read_array_of_float(num_data)
|
68
|
+
end
|
69
|
+
|
70
|
+
def set_field(field_name, data)
|
71
|
+
c_data = ::FFI::MemoryPointer.new(:float, data.count)
|
72
|
+
c_data.put_array_of_float(0, data)
|
73
|
+
check_result FFI.LGBM_DatasetSetField(handle_pointer, field_name, c_data, data.count, 0)
|
74
|
+
end
|
75
|
+
|
76
|
+
include Utils
|
77
|
+
end
|
78
|
+
end
|
data/lib/lightgbm/ffi.rb
CHANGED
@@ -4,10 +4,35 @@ module LightGBM
|
|
4
4
|
ffi_lib ["lightgbm", "lib_lightgbm.so"]
|
5
5
|
|
6
6
|
# https://github.com/microsoft/LightGBM/blob/master/include/LightGBM/c_api.h
|
7
|
+
# keep same order
|
8
|
+
|
9
|
+
# error
|
7
10
|
attach_function :LGBM_GetLastError, %i[], :string
|
11
|
+
|
12
|
+
# dataset
|
13
|
+
attach_function :LGBM_DatasetCreateFromFile, %i[string string pointer pointer], :int
|
14
|
+
attach_function :LGBM_DatasetCreateFromMat, %i[pointer int int32 int32 int string pointer pointer], :int
|
15
|
+
attach_function :LGBM_DatasetFree, %i[pointer], :int
|
16
|
+
attach_function :LGBM_DatasetSaveBinary, %i[pointer string], :int
|
17
|
+
attach_function :LGBM_DatasetDumpText, %i[pointer string], :int
|
18
|
+
attach_function :LGBM_DatasetSetField, %i[pointer string pointer int int], :int
|
19
|
+
attach_function :LGBM_DatasetGetField, %i[pointer string pointer pointer pointer], :int
|
20
|
+
attach_function :LGBM_DatasetGetNumData, %i[pointer pointer], :int
|
21
|
+
attach_function :LGBM_DatasetGetNumFeature, %i[pointer pointer], :int
|
22
|
+
|
23
|
+
# booster
|
8
24
|
attach_function :LGBM_BoosterCreate, %i[pointer string pointer], :int
|
9
25
|
attach_function :LGBM_BoosterCreateFromModelfile, %i[string pointer pointer], :int
|
26
|
+
attach_function :LGBM_BoosterLoadModelFromString, %i[string pointer pointer], :int
|
10
27
|
attach_function :LGBM_BoosterFree, %i[pointer], :int
|
28
|
+
attach_function :LGBM_BoosterAddValidData, %i[pointer pointer], :int
|
29
|
+
attach_function :LGBM_BoosterUpdateOneIter, %i[pointer pointer], :int
|
30
|
+
attach_function :LGBM_BoosterGetCurrentIteration, %i[pointer pointer], :int
|
31
|
+
attach_function :LGBM_BoosterGetNumFeature, %i[pointer pointer], :int
|
11
32
|
attach_function :LGBM_BoosterPredictForMat, %i[pointer pointer int int32 int32 int int int string pointer pointer], :int
|
33
|
+
attach_function :LGBM_BoosterSaveModel, %i[pointer int int string], :int
|
34
|
+
attach_function :LGBM_BoosterSaveModelToString, %i[pointer int int int64 pointer pointer], :int
|
35
|
+
attach_function :LGBM_BoosterDumpModel, %i[pointer int int int64 pointer pointer], :int
|
36
|
+
attach_function :LGBM_BoosterFeatureImportance, %i[pointer int int pointer], :int
|
12
37
|
end
|
13
38
|
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module LightGBM
|
2
|
+
module Utils
|
3
|
+
private
|
4
|
+
|
5
|
+
def check_result(err)
|
6
|
+
raise LightGBM::Error, FFI.LGBM_GetLastError if err != 0
|
7
|
+
end
|
8
|
+
|
9
|
+
# remove spaces in keys and values to prevent injection
|
10
|
+
def params_str(params)
|
11
|
+
(params || {}).map { |k, v| [check_param(k.to_s), check_param(v.to_s)].join("=") }.join(" ")
|
12
|
+
end
|
13
|
+
|
14
|
+
def check_param(v)
|
15
|
+
raise ArgumentError, "Invalid parameter" if /[[:space:]]/.match(v)
|
16
|
+
v
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
data/lib/lightgbm/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: lightgbm
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
@@ -76,7 +76,9 @@ files:
|
|
76
76
|
- README.md
|
77
77
|
- lib/lightgbm.rb
|
78
78
|
- lib/lightgbm/booster.rb
|
79
|
+
- lib/lightgbm/dataset.rb
|
79
80
|
- lib/lightgbm/ffi.rb
|
81
|
+
- lib/lightgbm/utils.rb
|
80
82
|
- lib/lightgbm/version.rb
|
81
83
|
homepage: https://github.com/ankane/lightgbm
|
82
84
|
licenses:
|