lightgbm 0.1.3 → 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3d841acf71e8af7111178da8c2062b47900ec953a94154a0cdf9f28bf7d61714
4
- data.tar.gz: 6ed019f4094803a06be77008e48870fb8db3acac4b83f3675eaeae4e20c27fdb
3
+ metadata.gz: 81f1f695112234bb576afaab35f4bf276d1f9c4a4adf0c74831cd1bb73f6baa0
4
+ data.tar.gz: 59ef1f3c581f83e108ce2a6f2c847bb7488fc2ea7f39ba217aaeffbf46e99351
5
5
  SHA512:
6
- metadata.gz: 477e25066789028e7b8a8a78107c1ed823bd06d96d97afdda41b502e2e3e4a9e0065888c414effe4ace4097baa4d4b18988c4ee6b4a9d06347992afa201a52b5
7
- data.tar.gz: eabb924994ffcafce6cb9038a60e3327528d2308d39c62bc336a06191e471ff412e141f9117446abc068aabbe9d1d16be59cc8bdca889270219895e85ec9e57b
6
+ metadata.gz: 21297d26e88957dd60d0aa61da19aa53aa632958adaf069b0efe2c8dae35e2e21e74c374da3509e337ca3268613b14dc541aee5012df086af7e8f784adb5063d
7
+ data.tar.gz: 7dbdc0fccaf256a1a835aea3eaa51fe326a0cd4b8cde168a6b8c27ff00c6412a5b5d82583fb1c14749f001f002a4fa3c3e156b9c4b2174b1e91f9449a4fa9ba1
@@ -1,3 +1,11 @@
1
+ ## 0.1.4
2
+
3
+ - Friendlier message when LightGBM not found
4
+ - Added `Ranker`
5
+ - Added early stopping to Scikit-Learn API
6
+ - Free memory when objects are destroyed
7
+ - Removed unreleased `dump_text` method
8
+
1
9
  ## 0.1.3
2
10
 
3
11
  - Added Scikit-Learn API
data/README.md CHANGED
@@ -18,7 +18,7 @@ gem 'lightgbm'
18
18
 
19
19
  ## Getting Started
20
20
 
21
- This library follows the [Data Structure, Training, and Scikit-Learn APIs](https://lightgbm.readthedocs.io/en/latest/Python-API.html) of the Python library. A few differences are:
21
+ This library follows the [Python API](https://lightgbm.readthedocs.io/en/latest/Python-API.html). A few differences are:
22
22
 
23
23
  - The `get_` prefix is removed from methods
24
24
  - The default verbosity is `-1`
@@ -63,7 +63,7 @@ booster.feature_importance
63
63
  Early stopping
64
64
 
65
65
  ```ruby
66
- LightGBM.train(params, train_set, valid_set: [train_set, test_set], early_stopping_rounds: 5)
66
+ LightGBM.train(params, train_set, valid_sets: [train_set, test_set], early_stopping_rounds: 5)
67
67
  ```
68
68
 
69
69
  CV
@@ -116,6 +116,12 @@ Get the importance of features
116
116
  model.feature_importances
117
117
  ```
118
118
 
119
+ Early stopping
120
+
121
+ ```ruby
122
+ model.fit(x, y, eval_set: [[x_test, y_test]], early_stopping_rounds: 5)
123
+ ```
124
+
119
125
  ## Data
120
126
 
121
127
  Data can be an array of arrays
@@ -5,16 +5,26 @@ require "ffi"
5
5
  require "lightgbm/utils"
6
6
  require "lightgbm/booster"
7
7
  require "lightgbm/dataset"
8
- require "lightgbm/ffi"
9
8
  require "lightgbm/version"
10
9
 
11
10
  # scikit-learn API
11
+ require "lightgbm/model"
12
12
  require "lightgbm/classifier"
13
+ require "lightgbm/ranker"
13
14
  require "lightgbm/regressor"
14
15
 
15
16
  module LightGBM
16
17
  class Error < StandardError; end
17
18
 
19
+ class << self
20
+ attr_accessor :ffi_lib
21
+ end
22
+ lib_name = "lib_lightgbm.#{::FFI::Platform::LIBSUFFIX}"
23
+ self.ffi_lib = [lib_name, "lib_lightgbm.so"]
24
+
25
+ # friendlier error message
26
+ autoload :FFI, "lightgbm/ffi"
27
+
18
28
  class << self
19
29
  def train(params, train_set, num_boost_round: 100, valid_sets: [], valid_names: [], early_stopping_rounds: nil, verbose_eval: true)
20
30
  booster = Booster.new(params: params, train_set: train_set)
@@ -14,8 +14,7 @@ module LightGBM
14
14
  set_verbosity(params)
15
15
  check_result FFI.LGBM_BoosterCreate(train_set.handle_pointer, params_str(params), @handle)
16
16
  end
17
- # causes "Stack consistency error"
18
- # ObjectSpace.define_finalizer(self, self.class.finalize(handle_pointer))
17
+ ObjectSpace.define_finalizer(self, self.class.finalize(handle_pointer))
19
18
 
20
19
  self.best_iteration = -1
21
20
 
@@ -23,10 +22,6 @@ module LightGBM
23
22
  @name_valid_sets = []
24
23
  end
25
24
 
26
- def self.finalize(pointer)
27
- -> { FFI.LGBM_BoosterFree(pointer) }
28
- end
29
-
30
25
  def add_valid(data, name)
31
26
  check_result FFI.LGBM_BoosterAddValidData(handle_pointer, data.handle_pointer)
32
27
  @name_valid_sets << name
@@ -153,6 +148,11 @@ module LightGBM
153
148
  finished.read_int == 1
154
149
  end
155
150
 
151
+ def self.finalize(pointer)
152
+ # must use proc instead of stabby lambda
153
+ proc { FFI.LGBM_BoosterFree(pointer) }
154
+ end
155
+
156
156
  private
157
157
 
158
158
  def handle_pointer
@@ -1,15 +1,10 @@
1
1
  module LightGBM
2
- class Classifier
3
- def initialize(num_leaves: 31, learning_rate: 0.1, n_estimators: 100, objective: nil)
4
- @params = {
5
- num_leaves: num_leaves,
6
- learning_rate: learning_rate
7
- }
8
- @params[:objective] = objective if objective
9
- @n_estimators = n_estimators
2
+ class Classifier < Model
3
+ def initialize(num_leaves: 31, learning_rate: 0.1, n_estimators: 100, objective: nil, **options)
4
+ super
10
5
  end
11
6
 
12
- def fit(x, y)
7
+ def fit(x, y, eval_set: nil, eval_names: [], categorical_feature: "auto", early_stopping_rounds: nil, verbose: true)
13
8
  n_classes = y.uniq.size
14
9
 
15
10
  params = @params.dup
@@ -20,26 +15,34 @@ module LightGBM
20
15
  params[:objective] ||= "binary"
21
16
  end
22
17
 
23
- train_set = Dataset.new(x, label: y)
24
- @booster = LightGBM.train(params, train_set, num_boost_round: @n_estimators)
18
+ train_set = Dataset.new(x, label: y, categorical_feature: categorical_feature)
19
+ valid_sets = Array(eval_set).map { |v| Dataset.new(v[0], label: v[1], reference: train_set) }
20
+
21
+ @booster = LightGBM.train(params, train_set,
22
+ num_boost_round: @n_estimators,
23
+ early_stopping_rounds: early_stopping_rounds,
24
+ verbose_eval: verbose,
25
+ valid_sets: valid_sets,
26
+ valid_names: eval_names
27
+ )
25
28
  nil
26
29
  end
27
30
 
28
- def predict(data)
29
- y_pred = @booster.predict(data)
31
+ def predict(data, num_iteration: nil)
32
+ y_pred = @booster.predict(data, num_iteration: num_iteration)
30
33
 
31
34
  if y_pred.first.is_a?(Array)
32
35
  # multiple classes
33
36
  y_pred.map do |v|
34
- v.map.with_index.max_by { |v2, i| v2 }.last
37
+ v.map.with_index.max_by { |v2, _| v2 }.last
35
38
  end
36
39
  else
37
40
  y_pred.map { |v| v > 0.5 ? 1 : 0 }
38
41
  end
39
42
  end
40
43
 
41
- def predict_proba(data)
42
- y_pred = @booster.predict(data)
44
+ def predict_proba(data, num_iteration: nil)
45
+ y_pred = @booster.predict(data, num_iteration: num_iteration)
43
46
 
44
47
  if y_pred.first.is_a?(Array)
45
48
  # multiple classes
@@ -48,17 +51,5 @@ module LightGBM
48
51
  y_pred.map { |v| [1 - v, v] }
49
52
  end
50
53
  end
51
-
52
- def save_model(fname)
53
- @booster.save_model(fname)
54
- end
55
-
56
- def load_model(fname)
57
- @booster = Booster.new(params: @params, model_file: fname)
58
- end
59
-
60
- def feature_importances
61
- @booster.feature_importance
62
- end
63
54
  end
64
55
  end
@@ -2,7 +2,7 @@ module LightGBM
2
2
  class Dataset
3
3
  attr_reader :data, :params
4
4
 
5
- def initialize(data, label: nil, weight: nil, params: nil, reference: nil, used_indices: nil, categorical_feature: "auto")
5
+ def initialize(data, label: nil, weight: nil, group: nil, params: nil, reference: nil, used_indices: nil, categorical_feature: "auto")
6
6
  @data = data
7
7
 
8
8
  # TODO stringify params
@@ -13,12 +13,12 @@ module LightGBM
13
13
  @handle = ::FFI::MemoryPointer.new(:pointer)
14
14
  parameters = params_str(params)
15
15
  reference = reference.handle_pointer if reference
16
- if data.is_a?(String)
17
- check_result FFI.LGBM_DatasetCreateFromFile(data, parameters, reference, @handle)
18
- elsif used_indices
16
+ if used_indices
19
17
  used_row_indices = ::FFI::MemoryPointer.new(:int32, used_indices.count)
20
18
  used_row_indices.put_array_of_int32(0, used_indices)
21
19
  check_result FFI.LGBM_DatasetGetSubset(reference, used_row_indices, used_indices.count, parameters, @handle)
20
+ elsif data.is_a?(String)
21
+ check_result FFI.LGBM_DatasetCreateFromFile(data, parameters, reference, @handle)
22
22
  else
23
23
  if matrix?(data)
24
24
  nrow = data.row_count
@@ -40,11 +40,11 @@ module LightGBM
40
40
  c_data.put_array_of_float(0, flat_data)
41
41
  check_result FFI.LGBM_DatasetCreateFromMat(c_data, 0, nrow, ncol, 1, parameters, reference, @handle)
42
42
  end
43
- # causes "Stack consistency error"
44
- # ObjectSpace.define_finalizer(self, self.class.finalize(handle_pointer))
43
+ ObjectSpace.define_finalizer(self, self.class.finalize(handle_pointer)) unless used_indices
45
44
 
46
- set_field("label", label) if label
47
- set_field("weight", weight) if weight
45
+ self.label = label if label
46
+ self.weight = weight if weight
47
+ self.group = group if group
48
48
  end
49
49
 
50
50
  def label
@@ -55,6 +55,18 @@ module LightGBM
55
55
  field("weight")
56
56
  end
57
57
 
58
+ def label=(label)
59
+ set_field("label", label)
60
+ end
61
+
62
+ def weight=(weight)
63
+ set_field("weight", weight)
64
+ end
65
+
66
+ def group=(group)
67
+ set_field("group", group, type: :int32)
68
+ end
69
+
58
70
  def num_data
59
71
  out = ::FFI::MemoryPointer.new(:int)
60
72
  check_result FFI.LGBM_DatasetGetNumData(handle_pointer, out)
@@ -71,9 +83,10 @@ module LightGBM
71
83
  check_result FFI.LGBM_DatasetSaveBinary(handle_pointer, filename)
72
84
  end
73
85
 
74
- def dump_text(filename)
75
- check_result FFI.LGBM_DatasetDumpText(handle_pointer, filename)
76
- end
86
+ # not released yet
87
+ # def dump_text(filename)
88
+ # check_result FFI.LGBM_DatasetDumpText(handle_pointer, filename)
89
+ # end
77
90
 
78
91
  def subset(used_indices, params: nil)
79
92
  # categorical_feature passed via params
@@ -85,14 +98,15 @@ module LightGBM
85
98
  )
86
99
  end
87
100
 
88
- def self.finalize(pointer)
89
- -> { FFI.LGBM_DatasetFree(pointer) }
90
- end
91
-
92
101
  def handle_pointer
93
102
  @handle.read_pointer
94
103
  end
95
104
 
105
+ def self.finalize(pointer)
106
+ # must use proc instead of stabby lambda
107
+ proc { FFI.LGBM_DatasetFree(pointer) }
108
+ end
109
+
96
110
  private
97
111
 
98
112
  def field(field_name)
@@ -104,11 +118,17 @@ module LightGBM
104
118
  out_ptr.read_pointer.read_array_of_float(num_data)
105
119
  end
106
120
 
107
- def set_field(field_name, data)
121
+ def set_field(field_name, data, type: :float)
108
122
  data = data.to_a unless data.is_a?(Array)
109
- c_data = ::FFI::MemoryPointer.new(:float, data.count)
110
- c_data.put_array_of_float(0, data)
111
- check_result FFI.LGBM_DatasetSetField(handle_pointer, field_name, c_data, data.count, 0)
123
+ if type == :int32
124
+ c_data = ::FFI::MemoryPointer.new(:int32, data.count)
125
+ c_data.put_array_of_int32(0, data)
126
+ check_result FFI.LGBM_DatasetSetField(handle_pointer, field_name, c_data, data.count, 2)
127
+ else
128
+ c_data = ::FFI::MemoryPointer.new(:float, data.count)
129
+ c_data.put_array_of_float(0, data)
130
+ check_result FFI.LGBM_DatasetSetField(handle_pointer, field_name, c_data, data.count, 0)
131
+ end
112
132
  end
113
133
 
114
134
  def matrix?(data)
@@ -1,7 +1,13 @@
1
1
  module LightGBM
2
2
  module FFI
3
3
  extend ::FFI::Library
4
- ffi_lib ["lightgbm", "lib_lightgbm.so"]
4
+
5
+ begin
6
+ ffi_lib LightGBM.ffi_lib
7
+ rescue LoadError => e
8
+ raise e if ENV["LIGHTGBM_DEBUG"]
9
+ raise LoadError, "Could not find LightGBM"
10
+ end
5
11
 
6
12
  # https://github.com/microsoft/LightGBM/blob/master/include/LightGBM/c_api.h
7
13
  # keep same order
@@ -15,7 +21,7 @@ module LightGBM
15
21
  attach_function :LGBM_DatasetGetSubset, %i[pointer pointer int32 string pointer], :int
16
22
  attach_function :LGBM_DatasetFree, %i[pointer], :int
17
23
  attach_function :LGBM_DatasetSaveBinary, %i[pointer string], :int
18
- attach_function :LGBM_DatasetDumpText, %i[pointer string], :int
24
+ # attach_function :LGBM_DatasetDumpText, %i[pointer string], :int
19
25
  attach_function :LGBM_DatasetSetField, %i[pointer string pointer int int], :int
20
26
  attach_function :LGBM_DatasetGetField, %i[pointer string pointer pointer pointer], :int
21
27
  attach_function :LGBM_DatasetGetNumData, %i[pointer pointer], :int
@@ -0,0 +1,30 @@
1
+ module LightGBM
2
+ class Model
3
+ attr_reader :booster
4
+
5
+ def initialize(num_leaves: 31, learning_rate: 0.1, n_estimators: 100, objective: nil, **options)
6
+ @params = {
7
+ num_leaves: num_leaves,
8
+ learning_rate: learning_rate
9
+ }.merge(options)
10
+ @params[:objective] = objective if objective
11
+ @n_estimators = n_estimators
12
+ end
13
+
14
+ def save_model(fname)
15
+ @booster.save_model(fname)
16
+ end
17
+
18
+ def load_model(fname)
19
+ @booster = Booster.new(params: @params, model_file: fname)
20
+ end
21
+
22
+ def best_iteration
23
+ @booster.best_iteration
24
+ end
25
+
26
+ def feature_importances
27
+ @booster.feature_importance
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,21 @@
1
+ module LightGBM
2
+ class Ranker < Model
3
+ def initialize(num_leaves: 31, learning_rate: 0.1, n_estimators: 100, objective: "lambdarank", **options)
4
+ super
5
+ end
6
+
7
+ def fit(x, y, group:, categorical_feature: "auto", early_stopping_rounds: nil, verbose: true)
8
+ train_set = Dataset.new(x, label: y, group: group, categorical_feature: categorical_feature)
9
+ @booster = LightGBM.train(@params, train_set,
10
+ num_boost_round: @n_estimators,
11
+ early_stopping_rounds: early_stopping_rounds,
12
+ verbose_eval: verbose
13
+ )
14
+ nil
15
+ end
16
+
17
+ def predict(data, num_iteration: nil)
18
+ @booster.predict(data, num_iteration: num_iteration)
19
+ end
20
+ end
21
+ end
@@ -1,34 +1,25 @@
1
1
  module LightGBM
2
- class Regressor
3
- def initialize(num_leaves: 31, learning_rate: 0.1, n_estimators: 100, objective: nil)
4
- @params = {
5
- num_leaves: num_leaves,
6
- learning_rate: learning_rate
7
- }
8
- @params[:objective] = objective if objective
9
- @n_estimators = n_estimators
2
+ class Regressor < Model
3
+ def initialize(num_leaves: 31, learning_rate: 0.1, n_estimators: 100, objective: "regression", **options)
4
+ super
10
5
  end
11
6
 
12
- def fit(x, y)
13
- train_set = Dataset.new(x, label: y)
14
- @booster = LightGBM.train(@params, train_set, num_boost_round: @n_estimators)
15
- nil
16
- end
17
-
18
- def predict(data)
19
- @booster.predict(data)
20
- end
7
+ def fit(x, y, categorical_feature: "auto", eval_set: nil, eval_names: [], early_stopping_rounds: nil, verbose: true)
8
+ train_set = Dataset.new(x, label: y, categorical_feature: categorical_feature)
9
+ valid_sets = Array(eval_set).map { |v| Dataset.new(v[0], label: v[1], reference: train_set) }
21
10
 
22
- def save_model(fname)
23
- @booster.save_model(fname)
24
- end
25
-
26
- def load_model(fname)
27
- @booster = Booster.new(params: @params, model_file: fname)
11
+ @booster = LightGBM.train(@params, train_set,
12
+ num_boost_round: @n_estimators,
13
+ early_stopping_rounds: early_stopping_rounds,
14
+ verbose_eval: verbose,
15
+ valid_sets: valid_sets,
16
+ valid_names: eval_names
17
+ )
18
+ nil
28
19
  end
29
20
 
30
- def feature_importances
31
- @booster.feature_importance
21
+ def predict(data, num_iteration: nil)
22
+ @booster.predict(data, num_iteration: num_iteration)
32
23
  end
33
24
  end
34
25
  end
@@ -1,3 +1,3 @@
1
1
  module LightGBM
2
- VERSION = "0.1.3"
2
+ VERSION = "0.1.4"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lightgbm
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.1.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-08-16 00:00:00.000000000 Z
11
+ date: 2019-08-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ffi
@@ -107,6 +107,8 @@ files:
107
107
  - lib/lightgbm/classifier.rb
108
108
  - lib/lightgbm/dataset.rb
109
109
  - lib/lightgbm/ffi.rb
110
+ - lib/lightgbm/model.rb
111
+ - lib/lightgbm/ranker.rb
110
112
  - lib/lightgbm/regressor.rb
111
113
  - lib/lightgbm/utils.rb
112
114
  - lib/lightgbm/version.rb
@@ -129,7 +131,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
129
131
  - !ruby/object:Gem::Version
130
132
  version: '0'
131
133
  requirements: []
132
- rubygems_version: 3.0.4
134
+ rubygems_version: 3.0.3
133
135
  signing_key:
134
136
  specification_version: 4
135
137
  summary: LightGBM - the high performance machine learning library - for Ruby