lightgbm 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +79 -9
- data/lib/lightgbm.rb +6 -1
- data/lib/lightgbm/booster.rb +1 -1
- data/lib/lightgbm/classifier.rb +64 -0
- data/lib/lightgbm/dataset.rb +32 -3
- data/lib/lightgbm/regressor.rb +34 -0
- data/lib/lightgbm/version.rb +1 -1
- metadata +33 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3d841acf71e8af7111178da8c2062b47900ec953a94154a0cdf9f28bf7d61714
|
4
|
+
data.tar.gz: 6ed019f4094803a06be77008e48870fb8db3acac4b83f3675eaeae4e20c27fdb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 477e25066789028e7b8a8a78107c1ed823bd06d96d97afdda41b502e2e3e4a9e0065888c414effe4ace4097baa4d4b18988c4ee6b4a9d06347992afa201a52b5
|
7
|
+
data.tar.gz: eabb924994ffcafce6cb9038a60e3327528d2308d39c62bc336a06191e471ff412e141f9117446abc068aabbe9d1d16be59cc8bdca889270219895e85ec9e57b
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# LightGBM
|
2
2
|
|
3
|
-
[LightGBM](https://github.com/microsoft/LightGBM) for Ruby
|
3
|
+
[LightGBM](https://github.com/microsoft/LightGBM) - the high performance machine learning library - for Ruby
|
4
4
|
|
5
5
|
:fire: Uses the C API for blazing performance
|
6
6
|
|
@@ -18,6 +18,16 @@ gem 'lightgbm'
|
|
18
18
|
|
19
19
|
## Getting Started
|
20
20
|
|
21
|
+
This library follows the [Data Structure, Training, and Scikit-Learn APIs](https://lightgbm.readthedocs.io/en/latest/Python-API.html) of the Python library. A few differences are:
|
22
|
+
|
23
|
+
- The `get_` prefix is removed from methods
|
24
|
+
- The default verbosity is `-1`
|
25
|
+
- With the `cv` method, `stratified` is set to `false`
|
26
|
+
|
27
|
+
Some methods and options are also missing at the moment. PRs welcome!
|
28
|
+
|
29
|
+
## Training API
|
30
|
+
|
21
31
|
Train a model
|
22
32
|
|
23
33
|
```ruby
|
@@ -44,38 +54,98 @@ Load the model from a file
|
|
44
54
|
booster = LightGBM::Booster.new(model_file: "model.txt")
|
45
55
|
```
|
46
56
|
|
47
|
-
Get
|
57
|
+
Get the importance of features
|
48
58
|
|
49
59
|
```ruby
|
50
60
|
booster.feature_importance
|
51
61
|
```
|
52
62
|
|
53
|
-
|
63
|
+
Early stopping
|
54
64
|
|
55
65
|
```ruby
|
56
66
|
LightGBM.train(params, train_set, valid_set: [train_set, test_set], early_stopping_rounds: 5)
|
57
67
|
```
|
58
68
|
|
59
|
-
|
69
|
+
CV
|
60
70
|
|
61
71
|
```ruby
|
62
72
|
LightGBM.cv(params, train_set, nfold: 5, verbose_eval: true)
|
63
73
|
```
|
64
74
|
|
65
|
-
##
|
75
|
+
## Scikit-Learn API
|
66
76
|
|
67
|
-
|
77
|
+
Prep your data
|
68
78
|
|
69
|
-
|
70
|
-
|
79
|
+
```ruby
|
80
|
+
x = [[1, 2], [3, 4], [5, 6], [7, 8]]
|
81
|
+
y = [1, 2, 3, 4]
|
82
|
+
```
|
71
83
|
|
72
|
-
|
84
|
+
Train a model
|
85
|
+
|
86
|
+
```ruby
|
87
|
+
model = LightGBM::Regressor.new
|
88
|
+
model.fit(x, y)
|
89
|
+
```
|
90
|
+
|
91
|
+
> For classification, use `LightGBM::Classifier`
|
92
|
+
|
93
|
+
Predict
|
94
|
+
|
95
|
+
```ruby
|
96
|
+
model.predict(x)
|
97
|
+
```
|
98
|
+
|
99
|
+
> For classification, use `predict_proba` for probabilities
|
100
|
+
|
101
|
+
Save the model to a file
|
102
|
+
|
103
|
+
```ruby
|
104
|
+
model.save_model("model.txt")
|
105
|
+
```
|
106
|
+
|
107
|
+
Load the model from a file
|
108
|
+
|
109
|
+
```ruby
|
110
|
+
model.load_model("model.txt")
|
111
|
+
```
|
112
|
+
|
113
|
+
Get the importance of features
|
114
|
+
|
115
|
+
```ruby
|
116
|
+
model.feature_importances
|
117
|
+
```
|
118
|
+
|
119
|
+
## Data
|
120
|
+
|
121
|
+
Data can be an array of arrays
|
122
|
+
|
123
|
+
```ruby
|
124
|
+
[[1, 2, 3], [4, 5, 6]]
|
125
|
+
```
|
126
|
+
|
127
|
+
Or a Daru data frame
|
128
|
+
|
129
|
+
```ruby
|
130
|
+
Daru::DataFrame.from_csv("houses.csv")
|
131
|
+
```
|
132
|
+
|
133
|
+
Or a Numo NArray
|
134
|
+
|
135
|
+
```ruby
|
136
|
+
Numo::DFloat.new(3, 2).seq
|
137
|
+
```
|
73
138
|
|
74
139
|
## Helpful Resources
|
75
140
|
|
76
141
|
- [Parameters](https://lightgbm.readthedocs.io/en/latest/Parameters.html)
|
77
142
|
- [Parameter Tuning](https://lightgbm.readthedocs.io/en/latest/Parameters-Tuning.html)
|
78
143
|
|
144
|
+
## Related Projects
|
145
|
+
|
146
|
+
- [Xgb](https://github.com/ankane/xgb) - XGBoost for Ruby
|
147
|
+
- [Eps](https://github.com/ankane/eps) - Machine Learning for Ruby
|
148
|
+
|
79
149
|
## Credits
|
80
150
|
|
81
151
|
Thanks to the [xgboost](https://github.com/PairOnAir/xgboost-ruby) gem for serving as an initial reference, and Selva Prabhakaran for the [test datasets](https://github.com/selva86/datasets).
|
data/lib/lightgbm.rb
CHANGED
@@ -8,11 +8,15 @@ require "lightgbm/dataset"
|
|
8
8
|
require "lightgbm/ffi"
|
9
9
|
require "lightgbm/version"
|
10
10
|
|
11
|
+
# scikit-learn API
|
12
|
+
require "lightgbm/classifier"
|
13
|
+
require "lightgbm/regressor"
|
14
|
+
|
11
15
|
module LightGBM
|
12
16
|
class Error < StandardError; end
|
13
17
|
|
14
18
|
class << self
|
15
|
-
def train(params, train_set,num_boost_round: 100, valid_sets: [], valid_names: [], early_stopping_rounds: nil, verbose_eval: true)
|
19
|
+
def train(params, train_set, num_boost_round: 100, valid_sets: [], valid_names: [], early_stopping_rounds: nil, verbose_eval: true)
|
16
20
|
booster = Booster.new(params: params, train_set: train_set)
|
17
21
|
|
18
22
|
valid_contain_train = false
|
@@ -150,6 +154,7 @@ module LightGBM
|
|
150
154
|
if early_stopping_rounds
|
151
155
|
stop_early = false
|
152
156
|
means.each do |k, score|
|
157
|
+
# TODO fix higher better
|
153
158
|
if best_score[k].nil? || score < best_score[k]
|
154
159
|
best_score[k] = score
|
155
160
|
best_iter[k] = iteration
|
data/lib/lightgbm/booster.rb
CHANGED
@@ -77,7 +77,7 @@ module LightGBM
|
|
77
77
|
num_feature = self.num_feature
|
78
78
|
out_result = ::FFI::MemoryPointer.new(:double, num_feature)
|
79
79
|
check_result FFI.LGBM_BoosterFeatureImportance(handle_pointer, iteration, importance_type, out_result)
|
80
|
-
out_result.read_array_of_double(num_feature)
|
80
|
+
out_result.read_array_of_double(num_feature).map(&:to_i)
|
81
81
|
end
|
82
82
|
|
83
83
|
def model_from_string(model_str)
|
@@ -0,0 +1,64 @@
|
|
1
|
+
module LightGBM
|
2
|
+
class Classifier
|
3
|
+
def initialize(num_leaves: 31, learning_rate: 0.1, n_estimators: 100, objective: nil)
|
4
|
+
@params = {
|
5
|
+
num_leaves: num_leaves,
|
6
|
+
learning_rate: learning_rate
|
7
|
+
}
|
8
|
+
@params[:objective] = objective if objective
|
9
|
+
@n_estimators = n_estimators
|
10
|
+
end
|
11
|
+
|
12
|
+
def fit(x, y)
|
13
|
+
n_classes = y.uniq.size
|
14
|
+
|
15
|
+
params = @params.dup
|
16
|
+
if n_classes > 2
|
17
|
+
params[:objective] ||= "multiclass"
|
18
|
+
params[:num_class] = n_classes
|
19
|
+
else
|
20
|
+
params[:objective] ||= "binary"
|
21
|
+
end
|
22
|
+
|
23
|
+
train_set = Dataset.new(x, label: y)
|
24
|
+
@booster = LightGBM.train(params, train_set, num_boost_round: @n_estimators)
|
25
|
+
nil
|
26
|
+
end
|
27
|
+
|
28
|
+
def predict(data)
|
29
|
+
y_pred = @booster.predict(data)
|
30
|
+
|
31
|
+
if y_pred.first.is_a?(Array)
|
32
|
+
# multiple classes
|
33
|
+
y_pred.map do |v|
|
34
|
+
v.map.with_index.max_by { |v2, i| v2 }.last
|
35
|
+
end
|
36
|
+
else
|
37
|
+
y_pred.map { |v| v > 0.5 ? 1 : 0 }
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def predict_proba(data)
|
42
|
+
y_pred = @booster.predict(data)
|
43
|
+
|
44
|
+
if y_pred.first.is_a?(Array)
|
45
|
+
# multiple classes
|
46
|
+
y_pred
|
47
|
+
else
|
48
|
+
y_pred.map { |v| [1 - v, v] }
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def save_model(fname)
|
53
|
+
@booster.save_model(fname)
|
54
|
+
end
|
55
|
+
|
56
|
+
def load_model(fname)
|
57
|
+
@booster = Booster.new(params: @params, model_file: fname)
|
58
|
+
end
|
59
|
+
|
60
|
+
def feature_importances
|
61
|
+
@booster.feature_importance
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
data/lib/lightgbm/dataset.rb
CHANGED
@@ -20,9 +20,25 @@ module LightGBM
|
|
20
20
|
used_row_indices.put_array_of_int32(0, used_indices)
|
21
21
|
check_result FFI.LGBM_DatasetGetSubset(reference, used_row_indices, used_indices.count, parameters, @handle)
|
22
22
|
else
|
23
|
-
|
24
|
-
|
25
|
-
|
23
|
+
if matrix?(data)
|
24
|
+
nrow = data.row_count
|
25
|
+
ncol = data.column_count
|
26
|
+
flat_data = data.to_a.flatten
|
27
|
+
elsif daru?(data)
|
28
|
+
nrow, ncol = data.shape
|
29
|
+
flat_data = data.each_vector.map(&:to_a).flatten
|
30
|
+
elsif narray?(data)
|
31
|
+
nrow, ncol = data.shape
|
32
|
+
flat_data = data.flatten.to_a
|
33
|
+
else
|
34
|
+
nrow = data.count
|
35
|
+
ncol = data.first.count
|
36
|
+
flat_data = data.flatten
|
37
|
+
end
|
38
|
+
|
39
|
+
c_data = ::FFI::MemoryPointer.new(:float, nrow * ncol)
|
40
|
+
c_data.put_array_of_float(0, flat_data)
|
41
|
+
check_result FFI.LGBM_DatasetCreateFromMat(c_data, 0, nrow, ncol, 1, parameters, reference, @handle)
|
26
42
|
end
|
27
43
|
# causes "Stack consistency error"
|
28
44
|
# ObjectSpace.define_finalizer(self, self.class.finalize(handle_pointer))
|
@@ -89,11 +105,24 @@ module LightGBM
|
|
89
105
|
end
|
90
106
|
|
91
107
|
def set_field(field_name, data)
|
108
|
+
data = data.to_a unless data.is_a?(Array)
|
92
109
|
c_data = ::FFI::MemoryPointer.new(:float, data.count)
|
93
110
|
c_data.put_array_of_float(0, data)
|
94
111
|
check_result FFI.LGBM_DatasetSetField(handle_pointer, field_name, c_data, data.count, 0)
|
95
112
|
end
|
96
113
|
|
114
|
+
def matrix?(data)
|
115
|
+
defined?(Matrix) && data.is_a?(Matrix)
|
116
|
+
end
|
117
|
+
|
118
|
+
def daru?(data)
|
119
|
+
defined?(Daru::DataFrame) && data.is_a?(Daru::DataFrame)
|
120
|
+
end
|
121
|
+
|
122
|
+
def narray?(data)
|
123
|
+
defined?(Numo::NArray) && data.is_a?(Numo::NArray)
|
124
|
+
end
|
125
|
+
|
97
126
|
include Utils
|
98
127
|
end
|
99
128
|
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
module LightGBM
|
2
|
+
class Regressor
|
3
|
+
def initialize(num_leaves: 31, learning_rate: 0.1, n_estimators: 100, objective: nil)
|
4
|
+
@params = {
|
5
|
+
num_leaves: num_leaves,
|
6
|
+
learning_rate: learning_rate
|
7
|
+
}
|
8
|
+
@params[:objective] = objective if objective
|
9
|
+
@n_estimators = n_estimators
|
10
|
+
end
|
11
|
+
|
12
|
+
def fit(x, y)
|
13
|
+
train_set = Dataset.new(x, label: y)
|
14
|
+
@booster = LightGBM.train(@params, train_set, num_boost_round: @n_estimators)
|
15
|
+
nil
|
16
|
+
end
|
17
|
+
|
18
|
+
def predict(data)
|
19
|
+
@booster.predict(data)
|
20
|
+
end
|
21
|
+
|
22
|
+
def save_model(fname)
|
23
|
+
@booster.save_model(fname)
|
24
|
+
end
|
25
|
+
|
26
|
+
def load_model(fname)
|
27
|
+
@booster = Booster.new(params: @params, model_file: fname)
|
28
|
+
end
|
29
|
+
|
30
|
+
def feature_importances
|
31
|
+
@booster.feature_importance
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
data/lib/lightgbm/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: lightgbm
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-08-
|
11
|
+
date: 2019-08-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ffi
|
@@ -66,6 +66,34 @@ dependencies:
|
|
66
66
|
- - ">="
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '5'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: daru
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: numo-narray
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
69
97
|
description:
|
70
98
|
email: andrew@chartkick.com
|
71
99
|
executables: []
|
@@ -76,8 +104,10 @@ files:
|
|
76
104
|
- README.md
|
77
105
|
- lib/lightgbm.rb
|
78
106
|
- lib/lightgbm/booster.rb
|
107
|
+
- lib/lightgbm/classifier.rb
|
79
108
|
- lib/lightgbm/dataset.rb
|
80
109
|
- lib/lightgbm/ffi.rb
|
110
|
+
- lib/lightgbm/regressor.rb
|
81
111
|
- lib/lightgbm/utils.rb
|
82
112
|
- lib/lightgbm/version.rb
|
83
113
|
homepage: https://github.com/ankane/lightgbm
|
@@ -102,5 +132,5 @@ requirements: []
|
|
102
132
|
rubygems_version: 3.0.4
|
103
133
|
signing_key:
|
104
134
|
specification_version: 4
|
105
|
-
summary: LightGBM for Ruby
|
135
|
+
summary: LightGBM - the high performance machine learning library - for Ruby
|
106
136
|
test_files: []
|