lightgbm 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3d841acf71e8af7111178da8c2062b47900ec953a94154a0cdf9f28bf7d61714
4
- data.tar.gz: 6ed019f4094803a06be77008e48870fb8db3acac4b83f3675eaeae4e20c27fdb
3
+ metadata.gz: 81f1f695112234bb576afaab35f4bf276d1f9c4a4adf0c74831cd1bb73f6baa0
4
+ data.tar.gz: 59ef1f3c581f83e108ce2a6f2c847bb7488fc2ea7f39ba217aaeffbf46e99351
5
5
  SHA512:
6
- metadata.gz: 477e25066789028e7b8a8a78107c1ed823bd06d96d97afdda41b502e2e3e4a9e0065888c414effe4ace4097baa4d4b18988c4ee6b4a9d06347992afa201a52b5
7
- data.tar.gz: eabb924994ffcafce6cb9038a60e3327528d2308d39c62bc336a06191e471ff412e141f9117446abc068aabbe9d1d16be59cc8bdca889270219895e85ec9e57b
6
+ metadata.gz: 21297d26e88957dd60d0aa61da19aa53aa632958adaf069b0efe2c8dae35e2e21e74c374da3509e337ca3268613b14dc541aee5012df086af7e8f784adb5063d
7
+ data.tar.gz: 7dbdc0fccaf256a1a835aea3eaa51fe326a0cd4b8cde168a6b8c27ff00c6412a5b5d82583fb1c14749f001f002a4fa3c3e156b9c4b2174b1e91f9449a4fa9ba1
@@ -1,3 +1,11 @@
1
+ ## 0.1.4
2
+
3
+ - Friendlier message when LightGBM not found
4
+ - Added `Ranker`
5
+ - Added early stopping to Scikit-Learn API
6
+ - Free memory when objects are destroyed
7
+ - Removed unreleased `dump_text` method
8
+
1
9
  ## 0.1.3
2
10
 
3
11
  - Added Scikit-Learn API
data/README.md CHANGED
@@ -18,7 +18,7 @@ gem 'lightgbm'
18
18
 
19
19
  ## Getting Started
20
20
 
21
- This library follows the [Data Structure, Training, and Scikit-Learn APIs](https://lightgbm.readthedocs.io/en/latest/Python-API.html) of the Python library. A few differences are:
21
+ This library follows the [Python API](https://lightgbm.readthedocs.io/en/latest/Python-API.html). A few differences are:
22
22
 
23
23
  - The `get_` prefix is removed from methods
24
24
  - The default verbosity is `-1`
@@ -63,7 +63,7 @@ booster.feature_importance
63
63
  Early stopping
64
64
 
65
65
  ```ruby
66
- LightGBM.train(params, train_set, valid_set: [train_set, test_set], early_stopping_rounds: 5)
66
+ LightGBM.train(params, train_set, valid_sets: [train_set, test_set], early_stopping_rounds: 5)
67
67
  ```
68
68
 
69
69
  CV
@@ -116,6 +116,12 @@ Get the importance of features
116
116
  model.feature_importances
117
117
  ```
118
118
 
119
+ Early stopping
120
+
121
+ ```ruby
122
+ model.fit(x, y, eval_set: [[x_test, y_test]], early_stopping_rounds: 5)
123
+ ```
124
+
119
125
  ## Data
120
126
 
121
127
  Data can be an array of arrays
@@ -5,16 +5,26 @@ require "ffi"
5
5
  require "lightgbm/utils"
6
6
  require "lightgbm/booster"
7
7
  require "lightgbm/dataset"
8
- require "lightgbm/ffi"
9
8
  require "lightgbm/version"
10
9
 
11
10
  # scikit-learn API
11
+ require "lightgbm/model"
12
12
  require "lightgbm/classifier"
13
+ require "lightgbm/ranker"
13
14
  require "lightgbm/regressor"
14
15
 
15
16
  module LightGBM
16
17
  class Error < StandardError; end
17
18
 
19
+ class << self
20
+ attr_accessor :ffi_lib
21
+ end
22
+ lib_name = "lib_lightgbm.#{::FFI::Platform::LIBSUFFIX}"
23
+ self.ffi_lib = [lib_name, "lib_lightgbm.so"]
24
+
25
+ # friendlier error message
26
+ autoload :FFI, "lightgbm/ffi"
27
+
18
28
  class << self
19
29
  def train(params, train_set, num_boost_round: 100, valid_sets: [], valid_names: [], early_stopping_rounds: nil, verbose_eval: true)
20
30
  booster = Booster.new(params: params, train_set: train_set)
@@ -14,8 +14,7 @@ module LightGBM
14
14
  set_verbosity(params)
15
15
  check_result FFI.LGBM_BoosterCreate(train_set.handle_pointer, params_str(params), @handle)
16
16
  end
17
- # causes "Stack consistency error"
18
- # ObjectSpace.define_finalizer(self, self.class.finalize(handle_pointer))
17
+ ObjectSpace.define_finalizer(self, self.class.finalize(handle_pointer))
19
18
 
20
19
  self.best_iteration = -1
21
20
 
@@ -23,10 +22,6 @@ module LightGBM
23
22
  @name_valid_sets = []
24
23
  end
25
24
 
26
- def self.finalize(pointer)
27
- -> { FFI.LGBM_BoosterFree(pointer) }
28
- end
29
-
30
25
  def add_valid(data, name)
31
26
  check_result FFI.LGBM_BoosterAddValidData(handle_pointer, data.handle_pointer)
32
27
  @name_valid_sets << name
@@ -153,6 +148,11 @@ module LightGBM
153
148
  finished.read_int == 1
154
149
  end
155
150
 
151
+ def self.finalize(pointer)
152
+ # must use proc instead of stabby lambda
153
+ proc { FFI.LGBM_BoosterFree(pointer) }
154
+ end
155
+
156
156
  private
157
157
 
158
158
  def handle_pointer
@@ -1,15 +1,10 @@
1
1
  module LightGBM
2
- class Classifier
3
- def initialize(num_leaves: 31, learning_rate: 0.1, n_estimators: 100, objective: nil)
4
- @params = {
5
- num_leaves: num_leaves,
6
- learning_rate: learning_rate
7
- }
8
- @params[:objective] = objective if objective
9
- @n_estimators = n_estimators
2
+ class Classifier < Model
3
+ def initialize(num_leaves: 31, learning_rate: 0.1, n_estimators: 100, objective: nil, **options)
4
+ super
10
5
  end
11
6
 
12
- def fit(x, y)
7
+ def fit(x, y, eval_set: nil, eval_names: [], categorical_feature: "auto", early_stopping_rounds: nil, verbose: true)
13
8
  n_classes = y.uniq.size
14
9
 
15
10
  params = @params.dup
@@ -20,26 +15,34 @@ module LightGBM
20
15
  params[:objective] ||= "binary"
21
16
  end
22
17
 
23
- train_set = Dataset.new(x, label: y)
24
- @booster = LightGBM.train(params, train_set, num_boost_round: @n_estimators)
18
+ train_set = Dataset.new(x, label: y, categorical_feature: categorical_feature)
19
+ valid_sets = Array(eval_set).map { |v| Dataset.new(v[0], label: v[1], reference: train_set) }
20
+
21
+ @booster = LightGBM.train(params, train_set,
22
+ num_boost_round: @n_estimators,
23
+ early_stopping_rounds: early_stopping_rounds,
24
+ verbose_eval: verbose,
25
+ valid_sets: valid_sets,
26
+ valid_names: eval_names
27
+ )
25
28
  nil
26
29
  end
27
30
 
28
- def predict(data)
29
- y_pred = @booster.predict(data)
31
+ def predict(data, num_iteration: nil)
32
+ y_pred = @booster.predict(data, num_iteration: num_iteration)
30
33
 
31
34
  if y_pred.first.is_a?(Array)
32
35
  # multiple classes
33
36
  y_pred.map do |v|
34
- v.map.with_index.max_by { |v2, i| v2 }.last
37
+ v.map.with_index.max_by { |v2, _| v2 }.last
35
38
  end
36
39
  else
37
40
  y_pred.map { |v| v > 0.5 ? 1 : 0 }
38
41
  end
39
42
  end
40
43
 
41
- def predict_proba(data)
42
- y_pred = @booster.predict(data)
44
+ def predict_proba(data, num_iteration: nil)
45
+ y_pred = @booster.predict(data, num_iteration: num_iteration)
43
46
 
44
47
  if y_pred.first.is_a?(Array)
45
48
  # multiple classes
@@ -48,17 +51,5 @@ module LightGBM
48
51
  y_pred.map { |v| [1 - v, v] }
49
52
  end
50
53
  end
51
-
52
- def save_model(fname)
53
- @booster.save_model(fname)
54
- end
55
-
56
- def load_model(fname)
57
- @booster = Booster.new(params: @params, model_file: fname)
58
- end
59
-
60
- def feature_importances
61
- @booster.feature_importance
62
- end
63
54
  end
64
55
  end
@@ -2,7 +2,7 @@ module LightGBM
2
2
  class Dataset
3
3
  attr_reader :data, :params
4
4
 
5
- def initialize(data, label: nil, weight: nil, params: nil, reference: nil, used_indices: nil, categorical_feature: "auto")
5
+ def initialize(data, label: nil, weight: nil, group: nil, params: nil, reference: nil, used_indices: nil, categorical_feature: "auto")
6
6
  @data = data
7
7
 
8
8
  # TODO stringify params
@@ -13,12 +13,12 @@ module LightGBM
13
13
  @handle = ::FFI::MemoryPointer.new(:pointer)
14
14
  parameters = params_str(params)
15
15
  reference = reference.handle_pointer if reference
16
- if data.is_a?(String)
17
- check_result FFI.LGBM_DatasetCreateFromFile(data, parameters, reference, @handle)
18
- elsif used_indices
16
+ if used_indices
19
17
  used_row_indices = ::FFI::MemoryPointer.new(:int32, used_indices.count)
20
18
  used_row_indices.put_array_of_int32(0, used_indices)
21
19
  check_result FFI.LGBM_DatasetGetSubset(reference, used_row_indices, used_indices.count, parameters, @handle)
20
+ elsif data.is_a?(String)
21
+ check_result FFI.LGBM_DatasetCreateFromFile(data, parameters, reference, @handle)
22
22
  else
23
23
  if matrix?(data)
24
24
  nrow = data.row_count
@@ -40,11 +40,11 @@ module LightGBM
40
40
  c_data.put_array_of_float(0, flat_data)
41
41
  check_result FFI.LGBM_DatasetCreateFromMat(c_data, 0, nrow, ncol, 1, parameters, reference, @handle)
42
42
  end
43
- # causes "Stack consistency error"
44
- # ObjectSpace.define_finalizer(self, self.class.finalize(handle_pointer))
43
+ ObjectSpace.define_finalizer(self, self.class.finalize(handle_pointer)) unless used_indices
45
44
 
46
- set_field("label", label) if label
47
- set_field("weight", weight) if weight
45
+ self.label = label if label
46
+ self.weight = weight if weight
47
+ self.group = group if group
48
48
  end
49
49
 
50
50
  def label
@@ -55,6 +55,18 @@ module LightGBM
55
55
  field("weight")
56
56
  end
57
57
 
58
+ def label=(label)
59
+ set_field("label", label)
60
+ end
61
+
62
+ def weight=(weight)
63
+ set_field("weight", weight)
64
+ end
65
+
66
+ def group=(group)
67
+ set_field("group", group, type: :int32)
68
+ end
69
+
58
70
  def num_data
59
71
  out = ::FFI::MemoryPointer.new(:int)
60
72
  check_result FFI.LGBM_DatasetGetNumData(handle_pointer, out)
@@ -71,9 +83,10 @@ module LightGBM
71
83
  check_result FFI.LGBM_DatasetSaveBinary(handle_pointer, filename)
72
84
  end
73
85
 
74
- def dump_text(filename)
75
- check_result FFI.LGBM_DatasetDumpText(handle_pointer, filename)
76
- end
86
+ # not released yet
87
+ # def dump_text(filename)
88
+ # check_result FFI.LGBM_DatasetDumpText(handle_pointer, filename)
89
+ # end
77
90
 
78
91
  def subset(used_indices, params: nil)
79
92
  # categorical_feature passed via params
@@ -85,14 +98,15 @@ module LightGBM
85
98
  )
86
99
  end
87
100
 
88
- def self.finalize(pointer)
89
- -> { FFI.LGBM_DatasetFree(pointer) }
90
- end
91
-
92
101
  def handle_pointer
93
102
  @handle.read_pointer
94
103
  end
95
104
 
105
+ def self.finalize(pointer)
106
+ # must use proc instead of stabby lambda
107
+ proc { FFI.LGBM_DatasetFree(pointer) }
108
+ end
109
+
96
110
  private
97
111
 
98
112
  def field(field_name)
@@ -104,11 +118,17 @@ module LightGBM
104
118
  out_ptr.read_pointer.read_array_of_float(num_data)
105
119
  end
106
120
 
107
- def set_field(field_name, data)
121
+ def set_field(field_name, data, type: :float)
108
122
  data = data.to_a unless data.is_a?(Array)
109
- c_data = ::FFI::MemoryPointer.new(:float, data.count)
110
- c_data.put_array_of_float(0, data)
111
- check_result FFI.LGBM_DatasetSetField(handle_pointer, field_name, c_data, data.count, 0)
123
+ if type == :int32
124
+ c_data = ::FFI::MemoryPointer.new(:int32, data.count)
125
+ c_data.put_array_of_int32(0, data)
126
+ check_result FFI.LGBM_DatasetSetField(handle_pointer, field_name, c_data, data.count, 2)
127
+ else
128
+ c_data = ::FFI::MemoryPointer.new(:float, data.count)
129
+ c_data.put_array_of_float(0, data)
130
+ check_result FFI.LGBM_DatasetSetField(handle_pointer, field_name, c_data, data.count, 0)
131
+ end
112
132
  end
113
133
 
114
134
  def matrix?(data)
@@ -1,7 +1,13 @@
1
1
  module LightGBM
2
2
  module FFI
3
3
  extend ::FFI::Library
4
- ffi_lib ["lightgbm", "lib_lightgbm.so"]
4
+
5
+ begin
6
+ ffi_lib LightGBM.ffi_lib
7
+ rescue LoadError => e
8
+ raise e if ENV["LIGHTGBM_DEBUG"]
9
+ raise LoadError, "Could not find LightGBM"
10
+ end
5
11
 
6
12
  # https://github.com/microsoft/LightGBM/blob/master/include/LightGBM/c_api.h
7
13
  # keep same order
@@ -15,7 +21,7 @@ module LightGBM
15
21
  attach_function :LGBM_DatasetGetSubset, %i[pointer pointer int32 string pointer], :int
16
22
  attach_function :LGBM_DatasetFree, %i[pointer], :int
17
23
  attach_function :LGBM_DatasetSaveBinary, %i[pointer string], :int
18
- attach_function :LGBM_DatasetDumpText, %i[pointer string], :int
24
+ # attach_function :LGBM_DatasetDumpText, %i[pointer string], :int
19
25
  attach_function :LGBM_DatasetSetField, %i[pointer string pointer int int], :int
20
26
  attach_function :LGBM_DatasetGetField, %i[pointer string pointer pointer pointer], :int
21
27
  attach_function :LGBM_DatasetGetNumData, %i[pointer pointer], :int
@@ -0,0 +1,30 @@
1
+ module LightGBM
2
+ class Model
3
+ attr_reader :booster
4
+
5
+ def initialize(num_leaves: 31, learning_rate: 0.1, n_estimators: 100, objective: nil, **options)
6
+ @params = {
7
+ num_leaves: num_leaves,
8
+ learning_rate: learning_rate
9
+ }.merge(options)
10
+ @params[:objective] = objective if objective
11
+ @n_estimators = n_estimators
12
+ end
13
+
14
+ def save_model(fname)
15
+ @booster.save_model(fname)
16
+ end
17
+
18
+ def load_model(fname)
19
+ @booster = Booster.new(params: @params, model_file: fname)
20
+ end
21
+
22
+ def best_iteration
23
+ @booster.best_iteration
24
+ end
25
+
26
+ def feature_importances
27
+ @booster.feature_importance
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,21 @@
1
+ module LightGBM
2
+ class Ranker < Model
3
+ def initialize(num_leaves: 31, learning_rate: 0.1, n_estimators: 100, objective: "lambdarank", **options)
4
+ super
5
+ end
6
+
7
+ def fit(x, y, group:, categorical_feature: "auto", early_stopping_rounds: nil, verbose: true)
8
+ train_set = Dataset.new(x, label: y, group: group, categorical_feature: categorical_feature)
9
+ @booster = LightGBM.train(@params, train_set,
10
+ num_boost_round: @n_estimators,
11
+ early_stopping_rounds: early_stopping_rounds,
12
+ verbose_eval: verbose
13
+ )
14
+ nil
15
+ end
16
+
17
+ def predict(data, num_iteration: nil)
18
+ @booster.predict(data, num_iteration: num_iteration)
19
+ end
20
+ end
21
+ end
@@ -1,34 +1,25 @@
1
1
  module LightGBM
2
- class Regressor
3
- def initialize(num_leaves: 31, learning_rate: 0.1, n_estimators: 100, objective: nil)
4
- @params = {
5
- num_leaves: num_leaves,
6
- learning_rate: learning_rate
7
- }
8
- @params[:objective] = objective if objective
9
- @n_estimators = n_estimators
2
+ class Regressor < Model
3
+ def initialize(num_leaves: 31, learning_rate: 0.1, n_estimators: 100, objective: "regression", **options)
4
+ super
10
5
  end
11
6
 
12
- def fit(x, y)
13
- train_set = Dataset.new(x, label: y)
14
- @booster = LightGBM.train(@params, train_set, num_boost_round: @n_estimators)
15
- nil
16
- end
17
-
18
- def predict(data)
19
- @booster.predict(data)
20
- end
7
+ def fit(x, y, categorical_feature: "auto", eval_set: nil, eval_names: [], early_stopping_rounds: nil, verbose: true)
8
+ train_set = Dataset.new(x, label: y, categorical_feature: categorical_feature)
9
+ valid_sets = Array(eval_set).map { |v| Dataset.new(v[0], label: v[1], reference: train_set) }
21
10
 
22
- def save_model(fname)
23
- @booster.save_model(fname)
24
- end
25
-
26
- def load_model(fname)
27
- @booster = Booster.new(params: @params, model_file: fname)
11
+ @booster = LightGBM.train(@params, train_set,
12
+ num_boost_round: @n_estimators,
13
+ early_stopping_rounds: early_stopping_rounds,
14
+ verbose_eval: verbose,
15
+ valid_sets: valid_sets,
16
+ valid_names: eval_names
17
+ )
18
+ nil
28
19
  end
29
20
 
30
- def feature_importances
31
- @booster.feature_importance
21
+ def predict(data, num_iteration: nil)
22
+ @booster.predict(data, num_iteration: num_iteration)
32
23
  end
33
24
  end
34
25
  end
@@ -1,3 +1,3 @@
1
1
  module LightGBM
2
- VERSION = "0.1.3"
2
+ VERSION = "0.1.4"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lightgbm
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.3
4
+ version: 0.1.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-08-16 00:00:00.000000000 Z
11
+ date: 2019-08-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ffi
@@ -107,6 +107,8 @@ files:
107
107
  - lib/lightgbm/classifier.rb
108
108
  - lib/lightgbm/dataset.rb
109
109
  - lib/lightgbm/ffi.rb
110
+ - lib/lightgbm/model.rb
111
+ - lib/lightgbm/ranker.rb
110
112
  - lib/lightgbm/regressor.rb
111
113
  - lib/lightgbm/utils.rb
112
114
  - lib/lightgbm/version.rb
@@ -129,7 +131,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
129
131
  - !ruby/object:Gem::Version
130
132
  version: '0'
131
133
  requirements: []
132
- rubygems_version: 3.0.4
134
+ rubygems_version: 3.0.3
133
135
  signing_key:
134
136
  specification_version: 4
135
137
  summary: LightGBM - the high performance machine learning library - for Ruby