lightgbm 0.1.5 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: dad09a50916e229119d99de66aa676868224af44d8958916284718edb7fb3a3f
4
- data.tar.gz: 2f7d299a7037455a4ebd1ccaf41bc102ec66808b9e806a59194fd14699204364
3
+ metadata.gz: 2b3405aa832aaa365ce38d59bf5a36b318f8d02c1a236ea144eb9b102bf7f07e
4
+ data.tar.gz: 5be378b94486a668fffdc276a73ff62f24a24b644932bd776fd877f4b69b157a
5
5
  SHA512:
6
- metadata.gz: aa415946a82be07f1b9b8823b60fc75030f9e5547859078459583fa64d9212aad11db39d30e8e327b1fe23f48fc2ce9c6ddbd0445b52f73b44374a07c4724f5c
7
- data.tar.gz: 99357888fde412672ecd804396cbd3a524b7dee6318d46d92a07064b858cd5f62f144f6b680a25ae0f1ae567df7d4670fcf38ba706fa31cbdfe58fd79eabc62d
6
+ metadata.gz: 914379fe9271306117fc222534915f3c19de1c6b165ecc053f49b9089da0b85594c8430958f03f28a23d98fcf2879c4948c8d3e5fecee920c2984700928380ec
7
+ data.tar.gz: 4f93ae6b4a2c3123d452ecf41d7ada8473af397f12ddafb4e34b4e552aae4b6ef23ecb75b54ce3f7f64ba0985402c6172070de4afaca2ea614875f6d91c2de1a
@@ -1,11 +1,35 @@
1
- ## 0.1.5
1
+ ## 0.2.0 (2020-08-31)
2
+
3
+ - Updated LightGBM to 3.0.0
4
+
5
+ ## 0.1.9 (2020-06-10)
6
+
7
+ - Added support for Rover
8
+ - Improved performance of Numo datasets
9
+
10
+ ## 0.1.8 (2020-05-09)
11
+
12
+ - Improved error message when OpenMP not found on Mac
13
+ - Fixed `Cannot add validation data` error
14
+
15
+ ## 0.1.7 (2019-12-05)
16
+
17
+ - Updated LightGBM to 2.3.1
18
+ - Switched to doubles for datasets and predictions
19
+
20
+ ## 0.1.6 (2019-09-29)
21
+
22
+ - Updated LightGBM to 2.3.0
23
+ - Fixed error with JRuby
24
+
25
+ ## 0.1.5 (2019-09-03)
2
26
 
3
27
  - Packaged LightGBM with gem
4
28
  - Added support for missing values
5
29
  - Added `feature_names` to datasets
6
30
  - Fixed Daru training and prediction
7
31
 
8
- ## 0.1.4
32
+ ## 0.1.4 (2019-08-19)
9
33
 
10
34
  - Friendlier message when LightGBM not found
11
35
  - Added `Ranker`
@@ -13,22 +37,22 @@
13
37
  - Free memory when objects are destroyed
14
38
  - Removed unreleased `dump_text` method
15
39
 
16
- ## 0.1.3
40
+ ## 0.1.3 (2019-08-16)
17
41
 
18
42
  - Added Scikit-Learn API
19
43
  - Added support for Daru and Numo::NArray
20
44
 
21
- ## 0.1.2
45
+ ## 0.1.2 (2019-08-15)
22
46
 
23
47
  - Added `cv` method
24
48
  - Added early stopping
25
49
  - Fixed multiclass classification
26
50
 
27
- ## 0.1.1
51
+ ## 0.1.1 (2019-08-14)
28
52
 
29
53
  - Added training API
30
54
  - Added many methods
31
55
 
32
- ## 0.1.0
56
+ ## 0.1.0 (2019-08-13)
33
57
 
34
58
  - First release
@@ -1,22 +1,22 @@
1
- Copyright (c) 2019 Andrew Kane
1
+ The MIT License (MIT)
2
2
 
3
- MIT License
3
+ Copyright (c) Microsoft Corporation
4
+ Copyright (c) 2019-2020 Andrew Kane
4
5
 
5
- Permission is hereby granted, free of charge, to any person obtaining
6
- a copy of this software and associated documentation files (the
7
- "Software"), to deal in the Software without restriction, including
8
- without limitation the rights to use, copy, modify, merge, publish,
9
- distribute, sublicense, and/or sell copies of the Software, and to
10
- permit persons to whom the Software is furnished to do so, subject to
11
- the following conditions:
6
+ Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ of this software and associated documentation files (the "Software"), to deal
8
+ in the Software without restriction, including without limitation the rights
9
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ copies of the Software, and to permit persons to whom the Software is
11
+ furnished to do so, subject to the following conditions:
12
12
 
13
- The above copyright notice and this permission notice shall be
14
- included in all copies or substantial portions of the Software.
13
+ The above copyright notice and this permission notice shall be included in all
14
+ copies or substantial portions of the Software.
15
15
 
16
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
+ SOFTWARE.
data/README.md CHANGED
@@ -1,8 +1,6 @@
1
1
  # LightGBM
2
2
 
3
- [LightGBM](https://github.com/microsoft/LightGBM) - the high performance machine learning library - for Ruby
4
-
5
- :fire: Uses the C API for blazing performance
3
+ [LightGBM](https://github.com/microsoft/LightGBM) - high performance gradient boosting - for Ruby
6
4
 
7
5
  [![Build Status](https://travis-ci.org/ankane/lightgbm.svg?branch=master)](https://travis-ci.org/ankane/lightgbm)
8
6
 
@@ -14,22 +12,12 @@ Add this line to your application’s Gemfile:
14
12
  gem 'lightgbm'
15
13
  ```
16
14
 
17
- LightGBM is packaged with the gem, so no need to install it separately. On Mac, also run:
15
+ On Mac, also install OpenMP:
18
16
 
19
17
  ```sh
20
18
  brew install libomp
21
19
  ```
22
20
 
23
- ## Getting Started
24
-
25
- This library follows the [Python API](https://lightgbm.readthedocs.io/en/latest/Python-API.html). A few differences are:
26
-
27
- - The `get_` and `set_` prefixes are removed from methods
28
- - The default verbosity is `-1`
29
- - With the `cv` method, `stratified` is set to `false`
30
-
31
- Some methods and options are also missing at the moment. PRs welcome!
32
-
33
21
  ## Training API
34
22
 
35
23
  Prep your data
@@ -141,16 +129,22 @@ Data can be an array of arrays
141
129
  [[1, 2, 3], [4, 5, 6]]
142
130
  ```
143
131
 
144
- Or a Daru data frame
132
+ Or a Numo array
145
133
 
146
134
  ```ruby
147
- Daru::DataFrame.from_csv("houses.csv")
135
+ Numo::NArray.cast([[1, 2, 3], [4, 5, 6]])
148
136
  ```
149
137
 
150
- Or a Numo NArray
138
+ Or a Rover data frame
151
139
 
152
140
  ```ruby
153
- Numo::DFloat.new(3, 2).seq
141
+ Rover.read_csv("houses.csv")
142
+ ```
143
+
144
+ Or a Daru data frame
145
+
146
+ ```ruby
147
+ Daru::DataFrame.from_csv("houses.csv")
154
148
  ```
155
149
 
156
150
  ## Helpful Resources
@@ -160,12 +154,18 @@ Numo::DFloat.new(3, 2).seq
160
154
 
161
155
  ## Related Projects
162
156
 
163
- - [Xgb](https://github.com/ankane/xgb) - XGBoost for Ruby
164
- - [Eps](https://github.com/ankane/eps) - Machine Learning for Ruby
157
+ - [XGBoost](https://github.com/ankane/xgboost) - XGBoost for Ruby
158
+ - [Eps](https://github.com/ankane/eps) - Machine learning for Ruby
165
159
 
166
160
  ## Credits
167
161
 
168
- Thanks to the [xgboost](https://github.com/PairOnAir/xgboost-ruby) gem for serving as an initial reference, and Selva Prabhakaran for the [test datasets](https://github.com/selva86/datasets).
162
+ This library follows the [Python API](https://lightgbm.readthedocs.io/en/latest/Python-API.html). A few differences are:
163
+
164
+ - The `get_` and `set_` prefixes are removed from methods
165
+ - The default verbosity is `-1`
166
+ - With the `cv` method, `stratified` is set to `false`
167
+
168
+ Thanks to the [xgboost](https://github.com/PairOnAir/xgboost-ruby) gem for showing how to use FFI.
169
169
 
170
170
  ## History
171
171
 
@@ -179,3 +179,13 @@ Everyone is encouraged to help improve this project. Here are a few ways you can
179
179
  - Fix bugs and [submit pull requests](https://github.com/ankane/lightgbm/pulls)
180
180
  - Write, clarify, or fix documentation
181
181
  - Suggest or add new features
182
+
183
+ To get started with development:
184
+
185
+ ```sh
186
+ git clone https://github.com/ankane/lightgbm.git
187
+ cd lightgbm
188
+ bundle install
189
+ bundle exec rake vendor:all
190
+ bundle exec rake test
191
+ ```
@@ -36,6 +36,8 @@ module LightGBM
36
36
  booster.train_data_name = name || "training"
37
37
  valid_contain_train = true
38
38
  else
39
+ # ensure the validation set references the training set
40
+ data.reference = train_set
39
41
  booster.add_valid(data, name || "valid_#{i}")
40
42
  end
41
43
  end
@@ -59,16 +61,14 @@ module LightGBM
59
61
  # print results
60
62
  messages = []
61
63
 
64
+ eval_valid = booster.eval_valid
62
65
  if valid_contain_train
63
- # not sure why reversed in output
64
- booster.eval_train.reverse.each do |res|
65
- messages << "%s's %s: %g" % [res[0], res[1], res[2]]
66
- end
66
+ eval_valid = eval_valid + booster.eval_train
67
67
  end
68
-
69
- eval_valid = booster.eval_valid
70
68
  # not sure why reversed in output
71
- eval_valid.reverse.each do |res|
69
+ eval_valid.reverse!
70
+
71
+ eval_valid.each do |res|
72
72
  messages << "%s's %s: %g" % [res[0], res[1], res[2]]
73
73
  end
74
74
 
@@ -133,6 +133,7 @@ module LightGBM
133
133
  if early_stopping_rounds
134
134
  best_score = {}
135
135
  best_iter = {}
136
+ best_iteration = nil
136
137
  end
137
138
 
138
139
  num_boost_round.times do |iteration|
@@ -172,6 +173,7 @@ module LightGBM
172
173
  best_score[k] = score
173
174
  best_iter[k] = iteration
174
175
  elsif iteration - best_iter[k] >= early_stopping_rounds
176
+ best_iteration = best_iter[k]
175
177
  stop_early = true
176
178
  break
177
179
  end
@@ -180,6 +182,14 @@ module LightGBM
180
182
  end
181
183
  end
182
184
 
185
+ if early_stopping_rounds
186
+ # use best iteration from first metric if not stopped early
187
+ best_iteration ||= best_iter[best_iter.keys.first]
188
+ eval_hist.each_key do |k|
189
+ eval_hist[k] = eval_hist[k].first(best_iteration + 1)
190
+ end
191
+ end
192
+
183
193
  eval_hist
184
194
  end
185
195
 
@@ -30,7 +30,7 @@ module LightGBM
30
30
 
31
31
  def current_iteration
32
32
  out = ::FFI::MemoryPointer.new(:int)
33
- check_result FFI::LGBM_BoosterGetCurrentIteration(handle_pointer, out)
33
+ check_result FFI.LGBM_BoosterGetCurrentIteration(handle_pointer, out)
34
34
  out.read_int
35
35
  end
36
36
 
@@ -38,12 +38,13 @@ module LightGBM
38
38
  num_iteration ||= best_iteration
39
39
  buffer_len = 1 << 20
40
40
  out_len = ::FFI::MemoryPointer.new(:int64)
41
- out_str = ::FFI::MemoryPointer.new(:string, buffer_len)
42
- check_result FFI.LGBM_BoosterDumpModel(handle_pointer, start_iteration, num_iteration, buffer_len, out_len, out_str)
43
- actual_len = out_len.read_int64
41
+ out_str = ::FFI::MemoryPointer.new(:char, buffer_len)
42
+ feature_importance_type = 0 # TODO add option
43
+ check_result FFI.LGBM_BoosterDumpModel(handle_pointer, start_iteration, num_iteration, feature_importance_type, buffer_len, out_len, out_str)
44
+ actual_len = read_int64(out_len)
44
45
  if actual_len > buffer_len
45
- out_str = ::FFI::MemoryPointer.new(:string, actual_len)
46
- check_result FFI.LGBM_BoosterDumpModel(handle_pointer, start_iteration, num_iteration, actual_len, out_len, out_str)
46
+ out_str = ::FFI::MemoryPointer.new(:char, actual_len)
47
+ check_result FFI.LGBM_BoosterDumpModel(handle_pointer, start_iteration, num_iteration, feature_importance_type, actual_len, out_len, out_str)
47
48
  end
48
49
  out_str.read_string
49
50
  end
@@ -85,12 +86,13 @@ module LightGBM
85
86
  num_iteration ||= best_iteration
86
87
  buffer_len = 1 << 20
87
88
  out_len = ::FFI::MemoryPointer.new(:int64)
88
- out_str = ::FFI::MemoryPointer.new(:string, buffer_len)
89
- check_result FFI.LGBM_BoosterSaveModelToString(handle_pointer, start_iteration, num_iteration, buffer_len, out_len, out_str)
90
- actual_len = out_len.read_int64
89
+ out_str = ::FFI::MemoryPointer.new(:char, buffer_len)
90
+ feature_importance_type = 0 # TODO add option
91
+ check_result FFI.LGBM_BoosterSaveModelToString(handle_pointer, start_iteration, num_iteration, feature_importance_type, buffer_len, out_len, out_str)
92
+ actual_len = read_int64(out_len)
91
93
  if actual_len > buffer_len
92
- out_str = ::FFI::MemoryPointer.new(:string, actual_len)
93
- check_result FFI.LGBM_BoosterSaveModelToString(handle_pointer, start_iteration, num_iteration, actual_len, out_len, out_str)
94
+ out_str = ::FFI::MemoryPointer.new(:char, actual_len)
95
+ check_result FFI.LGBM_BoosterSaveModelToString(handle_pointer, start_iteration, num_iteration, feature_importance_type, actual_len, out_len, out_str)
94
96
  end
95
97
  out_str.read_string
96
98
  end
@@ -104,18 +106,18 @@ module LightGBM
104
106
 
105
107
  def num_model_per_iteration
106
108
  out = ::FFI::MemoryPointer.new(:int)
107
- check_result FFI::LGBM_BoosterNumModelPerIteration(handle_pointer, out)
109
+ check_result FFI.LGBM_BoosterNumModelPerIteration(handle_pointer, out)
108
110
  out.read_int
109
111
  end
110
112
 
111
113
  def num_trees
112
114
  out = ::FFI::MemoryPointer.new(:int)
113
- check_result FFI::LGBM_BoosterNumberOfTotalModel(handle_pointer, out)
115
+ check_result FFI.LGBM_BoosterNumberOfTotalModel(handle_pointer, out)
114
116
  out.read_int
115
117
  end
116
118
 
117
119
  # TODO support different prediction types
118
- def predict(input, num_iteration: nil, **params)
120
+ def predict(input, start_iteration: nil, num_iteration: nil, **params)
119
121
  input =
120
122
  if daru?(input)
121
123
  input.map_rows(&:to_a)
@@ -126,18 +128,19 @@ module LightGBM
126
128
  singular = !input.first.is_a?(Array)
127
129
  input = [input] if singular
128
130
 
131
+ start_iteration ||= 0
129
132
  num_iteration ||= best_iteration
130
133
  num_class ||= num_class()
131
134
 
132
135
  flat_input = input.flatten
133
136
  handle_missing(flat_input)
134
- data = ::FFI::MemoryPointer.new(:float, input.count * input.first.count)
135
- data.put_array_of_float(0, flat_input)
137
+ data = ::FFI::MemoryPointer.new(:double, input.count * input.first.count)
138
+ data.write_array_of_double(flat_input)
136
139
 
137
140
  out_len = ::FFI::MemoryPointer.new(:int64)
138
141
  out_result = ::FFI::MemoryPointer.new(:double, num_class * input.count)
139
- check_result FFI.LGBM_BoosterPredictForMat(handle_pointer, data, 0, input.count, input.first.count, 1, 0, num_iteration, params_str(params), out_len, out_result)
140
- out = out_result.read_array_of_double(out_len.read_int64)
142
+ check_result FFI.LGBM_BoosterPredictForMat(handle_pointer, data, 1, input.count, input.first.count, 1, 0, start_iteration, num_iteration, params_str(params), out_len, out_result)
143
+ out = out_result.read_array_of_double(read_int64(out_len))
141
144
  out = out.each_slice(num_class).to_a if num_class > 1
142
145
 
143
146
  singular ? out.first : out
@@ -145,7 +148,8 @@ module LightGBM
145
148
 
146
149
  def save_model(filename, num_iteration: nil, start_iteration: 0)
147
150
  num_iteration ||= best_iteration
148
- check_result FFI.LGBM_BoosterSaveModel(handle_pointer, start_iteration, num_iteration, filename)
151
+ feature_importance_type = 0 # TODO add
152
+ check_result FFI.LGBM_BoosterSaveModel(handle_pointer, start_iteration, num_iteration, feature_importance_type, filename)
149
153
  self # consistent with Python API
150
154
  end
151
155
 
@@ -168,17 +172,19 @@ module LightGBM
168
172
 
169
173
  def eval_counts
170
174
  out = ::FFI::MemoryPointer.new(:int)
171
- check_result FFI::LGBM_BoosterGetEvalCounts(handle_pointer, out)
175
+ check_result FFI.LGBM_BoosterGetEvalCounts(handle_pointer, out)
172
176
  out.read_int
173
177
  end
174
178
 
175
179
  def eval_names
176
180
  eval_counts ||= eval_counts()
177
181
  out_len = ::FFI::MemoryPointer.new(:int)
182
+ out_buffer_len = ::FFI::MemoryPointer.new(:size_t)
178
183
  out_strs = ::FFI::MemoryPointer.new(:pointer, eval_counts)
179
- str_ptrs = eval_counts.times.map { ::FFI::MemoryPointer.new(:string, 255) }
180
- out_strs.put_array_of_pointer(0, str_ptrs)
181
- check_result FFI.LGBM_BoosterGetEvalNames(handle_pointer, out_len, out_strs)
184
+ buffer_len = 255
185
+ str_ptrs = eval_counts.times.map { ::FFI::MemoryPointer.new(:char, buffer_len) }
186
+ out_strs.write_array_of_pointer(str_ptrs)
187
+ check_result FFI.LGBM_BoosterGetEvalNames(handle_pointer, eval_counts, out_len, buffer_len, out_buffer_len, out_strs)
182
188
  str_ptrs.map(&:read_string)
183
189
  end
184
190
 
@@ -198,10 +204,15 @@ module LightGBM
198
204
 
199
205
  def num_class
200
206
  out = ::FFI::MemoryPointer.new(:int)
201
- check_result FFI::LGBM_BoosterGetNumClasses(handle_pointer, out)
207
+ check_result FFI.LGBM_BoosterGetNumClasses(handle_pointer, out)
202
208
  out.read_int
203
209
  end
204
210
 
211
+ # read_int64 not available on JRuby
212
+ def read_int64(ptr)
213
+ ptr.read_array_of_int64(1).first
214
+ end
215
+
205
216
  include Utils
206
217
  end
207
218
  end
@@ -4,51 +4,16 @@ module LightGBM
4
4
 
5
5
  def initialize(data, label: nil, weight: nil, group: nil, params: nil, reference: nil, used_indices: nil, categorical_feature: "auto", feature_names: nil)
6
6
  @data = data
7
+ @label = label
8
+ @weight = weight
9
+ @group = group
10
+ @params = params
11
+ @reference = reference
12
+ @used_indices = used_indices
13
+ @categorical_feature = categorical_feature
14
+ @feature_names = feature_names
7
15
 
8
- # TODO stringify params
9
- params ||= {}
10
- if categorical_feature != "auto" && categorical_feature.any?
11
- params["categorical_feature"] ||= categorical_feature.join(",")
12
- end
13
- set_verbosity(params)
14
-
15
- @handle = ::FFI::MemoryPointer.new(:pointer)
16
- parameters = params_str(params)
17
- reference = reference.handle_pointer if reference
18
- if used_indices
19
- used_row_indices = ::FFI::MemoryPointer.new(:int32, used_indices.count)
20
- used_row_indices.put_array_of_int32(0, used_indices)
21
- check_result FFI.LGBM_DatasetGetSubset(reference, used_row_indices, used_indices.count, parameters, @handle)
22
- elsif data.is_a?(String)
23
- check_result FFI.LGBM_DatasetCreateFromFile(data, parameters, reference, @handle)
24
- else
25
- if matrix?(data)
26
- nrow = data.row_count
27
- ncol = data.column_count
28
- flat_data = data.to_a.flatten
29
- elsif daru?(data)
30
- nrow, ncol = data.shape
31
- flat_data = data.map_rows(&:to_a).flatten
32
- elsif narray?(data)
33
- nrow, ncol = data.shape
34
- flat_data = data.flatten.to_a
35
- else
36
- nrow = data.count
37
- ncol = data.first.count
38
- flat_data = data.flatten
39
- end
40
-
41
- handle_missing(flat_data)
42
- c_data = ::FFI::MemoryPointer.new(:float, nrow * ncol)
43
- c_data.put_array_of_float(0, flat_data)
44
- check_result FFI.LGBM_DatasetCreateFromMat(c_data, 0, nrow, ncol, 1, parameters, reference, @handle)
45
- end
46
- ObjectSpace.define_finalizer(self, self.class.finalize(handle_pointer)) unless used_indices
47
-
48
- self.label = label if label
49
- self.weight = weight if weight
50
- self.group = group if group
51
- self.feature_names = feature_names if feature_names
16
+ construct
52
17
  end
53
18
 
54
19
  def label
@@ -59,34 +24,50 @@ module LightGBM
59
24
  field("weight")
60
25
  end
61
26
 
62
- def label=(label)
63
- set_field("label", label)
64
- end
65
-
66
27
  def feature_names
67
28
  # must preallocate space
68
29
  num_feature_names = ::FFI::MemoryPointer.new(:int)
69
- out_strs = ::FFI::MemoryPointer.new(:pointer, 1000)
70
- str_ptrs = 1000.times.map { ::FFI::MemoryPointer.new(:string, 255) }
71
- out_strs.put_array_of_pointer(0, str_ptrs)
72
- check_result FFI.LGBM_DatasetGetFeatureNames(handle_pointer, out_strs, num_feature_names)
30
+ out_buffer_len = ::FFI::MemoryPointer.new(:size_t)
31
+ len = 1000
32
+ out_strs = ::FFI::MemoryPointer.new(:pointer, len)
33
+ buffer_len = 255
34
+ str_ptrs = len.times.map { ::FFI::MemoryPointer.new(:char, buffer_len) }
35
+ out_strs.write_array_of_pointer(str_ptrs)
36
+ check_result FFI.LGBM_DatasetGetFeatureNames(handle_pointer, len, num_feature_names, buffer_len, out_buffer_len, out_strs)
73
37
  str_ptrs[0, num_feature_names.read_int].map(&:read_string)
74
38
  end
75
39
 
40
+ def label=(label)
41
+ @label = label
42
+ set_field("label", label)
43
+ end
44
+
76
45
  def weight=(weight)
46
+ @weight = weight
77
47
  set_field("weight", weight)
78
48
  end
79
49
 
80
50
  def group=(group)
51
+ @group = group
81
52
  set_field("group", group, type: :int32)
82
53
  end
83
54
 
84
55
  def feature_names=(feature_names)
56
+ @feature_names = feature_names
85
57
  c_feature_names = ::FFI::MemoryPointer.new(:pointer, feature_names.size)
86
58
  c_feature_names.write_array_of_pointer(feature_names.map { |v| ::FFI::MemoryPointer.from_string(v) })
87
59
  check_result FFI.LGBM_DatasetSetFeatureNames(handle_pointer, c_feature_names, feature_names.size)
88
60
  end
89
61
 
62
+ # TODO only update reference if not in chain
63
+ def reference=(reference)
64
+ if reference != @reference
65
+ @reference = reference
66
+ free_handle
67
+ construct
68
+ end
69
+ end
70
+
90
71
  def num_data
91
72
  out = ::FFI::MemoryPointer.new(:int)
92
73
  check_result FFI.LGBM_DatasetGetNumData(handle_pointer, out)
@@ -103,11 +84,6 @@ module LightGBM
103
84
  check_result FFI.LGBM_DatasetSaveBinary(handle_pointer, filename)
104
85
  end
105
86
 
106
- # not released yet
107
- # def dump_text(filename)
108
- # check_result FFI.LGBM_DatasetDumpText(handle_pointer, filename)
109
- # end
110
-
111
87
  def subset(used_indices, params: nil)
112
88
  # categorical_feature passed via params
113
89
  params ||= self.params
@@ -129,6 +105,70 @@ module LightGBM
129
105
 
130
106
  private
131
107
 
108
+ def construct
109
+ data = @data
110
+ used_indices = @used_indices
111
+
112
+ # TODO stringify params
113
+ params = @params || {}
114
+ if @categorical_feature != "auto" && @categorical_feature.any?
115
+ params["categorical_feature"] ||= @categorical_feature.join(",")
116
+ end
117
+ set_verbosity(params)
118
+
119
+ @handle = ::FFI::MemoryPointer.new(:pointer)
120
+ parameters = params_str(params)
121
+ reference = @reference.handle_pointer if @reference
122
+ if used_indices
123
+ used_row_indices = ::FFI::MemoryPointer.new(:int32, used_indices.count)
124
+ used_row_indices.write_array_of_int32(used_indices)
125
+ check_result FFI.LGBM_DatasetGetSubset(reference, used_row_indices, used_indices.count, parameters, @handle)
126
+ elsif data.is_a?(String)
127
+ check_result FFI.LGBM_DatasetCreateFromFile(data, parameters, reference, @handle)
128
+ else
129
+ if matrix?(data)
130
+ nrow = data.row_count
131
+ ncol = data.column_count
132
+ flat_data = data.to_a.flatten
133
+ elsif daru?(data)
134
+ nrow, ncol = data.shape
135
+ flat_data = data.map_rows(&:to_a).flatten
136
+ elsif numo?(data) || rover?(data)
137
+ data = data.to_numo if rover?(data)
138
+ nrow, ncol = data.shape
139
+ else
140
+ nrow = data.count
141
+ ncol = data.first.count
142
+ flat_data = data.flatten
143
+ end
144
+
145
+ c_data = ::FFI::MemoryPointer.new(:double, nrow * ncol)
146
+ if numo?(data)
147
+ c_data.write_bytes(data.cast_to(Numo::DFloat).to_string)
148
+ else
149
+ handle_missing(flat_data)
150
+ c_data.write_array_of_double(flat_data)
151
+ end
152
+
153
+ check_result FFI.LGBM_DatasetCreateFromMat(c_data, 1, nrow, ncol, 1, parameters, reference, @handle)
154
+ end
155
+ ObjectSpace.define_finalizer(self, self.class.finalize(handle_pointer)) unless used_indices
156
+
157
+ self.label = @label if @label
158
+ self.weight = @weight if @weight
159
+ self.group = @group if @group
160
+ self.feature_names = @feature_names if @feature_names
161
+ end
162
+
163
+ def free_handle
164
+ FFI.LGBM_DatasetFree(handle_pointer)
165
+ ObjectSpace.undefine_finalizer(self)
166
+ end
167
+
168
+ def dump_text(filename)
169
+ check_result FFI.LGBM_DatasetDumpText(handle_pointer, filename)
170
+ end
171
+
132
172
  def field(field_name)
133
173
  num_data = self.num_data
134
174
  out_len = ::FFI::MemoryPointer.new(:int)
@@ -142,11 +182,11 @@ module LightGBM
142
182
  data = data.to_a unless data.is_a?(Array)
143
183
  if type == :int32
144
184
  c_data = ::FFI::MemoryPointer.new(:int32, data.count)
145
- c_data.put_array_of_int32(0, data)
185
+ c_data.write_array_of_int32(data)
146
186
  check_result FFI.LGBM_DatasetSetField(handle_pointer, field_name, c_data, data.count, 2)
147
187
  else
148
188
  c_data = ::FFI::MemoryPointer.new(:float, data.count)
149
- c_data.put_array_of_float(0, data)
189
+ c_data.write_array_of_float(data)
150
190
  check_result FFI.LGBM_DatasetSetField(handle_pointer, field_name, c_data, data.count, 0)
151
191
  end
152
192
  end
@@ -5,11 +5,11 @@ module LightGBM
5
5
  begin
6
6
  ffi_lib LightGBM.ffi_lib
7
7
  rescue LoadError => e
8
- raise e if ENV["LIGHTGBM_DEBUG"]
9
- if e.message.include?("libomp")
10
- raise LoadError, "Could not find OpenMP"
8
+ if e.message.include?("Library not loaded: /usr/local/opt/libomp/lib/libomp.dylib") && e.message.include?("Reason: image not found")
9
+ raise LoadError, "OpenMP not found. Run `brew install libomp`"
10
+ else
11
+ raise e
11
12
  end
12
- raise LoadError, "Could not find LightGBM"
13
13
  end
14
14
 
15
15
  # https://github.com/microsoft/LightGBM/blob/master/include/LightGBM/c_api.h
@@ -23,10 +23,10 @@ module LightGBM
23
23
  attach_function :LGBM_DatasetCreateFromMat, %i[pointer int int32 int32 int string pointer pointer], :int
24
24
  attach_function :LGBM_DatasetGetSubset, %i[pointer pointer int32 string pointer], :int
25
25
  attach_function :LGBM_DatasetSetFeatureNames, %i[pointer pointer int], :int
26
- attach_function :LGBM_DatasetGetFeatureNames, %i[pointer pointer pointer], :int
26
+ attach_function :LGBM_DatasetGetFeatureNames, %i[pointer int pointer size_t pointer pointer], :int
27
27
  attach_function :LGBM_DatasetFree, %i[pointer], :int
28
28
  attach_function :LGBM_DatasetSaveBinary, %i[pointer string], :int
29
- # attach_function :LGBM_DatasetDumpText, %i[pointer string], :int
29
+ attach_function :LGBM_DatasetDumpText, %i[pointer string], :int
30
30
  attach_function :LGBM_DatasetSetField, %i[pointer string pointer int int], :int
31
31
  attach_function :LGBM_DatasetGetField, %i[pointer string pointer pointer pointer], :int
32
32
  attach_function :LGBM_DatasetGetNumData, %i[pointer pointer], :int
@@ -44,13 +44,13 @@ module LightGBM
44
44
  attach_function :LGBM_BoosterNumModelPerIteration, %i[pointer pointer], :int
45
45
  attach_function :LGBM_BoosterNumberOfTotalModel, %i[pointer pointer], :int
46
46
  attach_function :LGBM_BoosterGetEvalCounts, %i[pointer pointer], :int
47
- attach_function :LGBM_BoosterGetEvalNames, %i[pointer pointer pointer], :int
47
+ attach_function :LGBM_BoosterGetEvalNames, %i[pointer int pointer size_t pointer pointer], :int
48
48
  attach_function :LGBM_BoosterGetNumFeature, %i[pointer pointer], :int
49
49
  attach_function :LGBM_BoosterGetEval, %i[pointer int pointer pointer], :int
50
- attach_function :LGBM_BoosterPredictForMat, %i[pointer pointer int int32 int32 int int int string pointer pointer], :int
51
- attach_function :LGBM_BoosterSaveModel, %i[pointer int int string], :int
52
- attach_function :LGBM_BoosterSaveModelToString, %i[pointer int int int64 pointer pointer], :int
53
- attach_function :LGBM_BoosterDumpModel, %i[pointer int int int64 pointer pointer], :int
50
+ attach_function :LGBM_BoosterPredictForMat, %i[pointer pointer int int32 int32 int int int int string pointer pointer], :int
51
+ attach_function :LGBM_BoosterSaveModel, %i[pointer int int int string], :int
52
+ attach_function :LGBM_BoosterSaveModelToString, %i[pointer int int int int64 pointer pointer], :int
53
+ attach_function :LGBM_BoosterDumpModel, %i[pointer int int int int64 pointer pointer], :int
54
54
  attach_function :LGBM_BoosterFeatureImportance, %i[pointer int int pointer], :int
55
55
  end
56
56
  end
@@ -37,8 +37,12 @@ module LightGBM
37
37
  defined?(Daru::DataFrame) && data.is_a?(Daru::DataFrame)
38
38
  end
39
39
 
40
- def narray?(data)
40
+ def numo?(data)
41
41
  defined?(Numo::NArray) && data.is_a?(Numo::NArray)
42
42
  end
43
+
44
+ def rover?(data)
45
+ defined?(Rover::DataFrame) && data.is_a?(Rover::DataFrame)
46
+ end
43
47
  end
44
48
  end
@@ -1,3 +1,3 @@
1
1
  module LightGBM
2
- VERSION = "0.1.5"
2
+ VERSION = "0.2.0"
3
3
  end
Binary file
Binary file
Binary file
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lightgbm
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.5
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-09-03 00:00:00.000000000 Z
11
+ date: 2020-09-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ffi
@@ -80,20 +80,6 @@ dependencies:
80
80
  - - ">="
81
81
  - !ruby/object:Gem::Version
82
82
  version: '0'
83
- - !ruby/object:Gem::Dependency
84
- name: numo-narray
85
- requirement: !ruby/object:Gem::Requirement
86
- requirements:
87
- - - ">="
88
- - !ruby/object:Gem::Version
89
- version: '0'
90
- type: :development
91
- prerelease: false
92
- version_requirements: !ruby/object:Gem::Requirement
93
- requirements:
94
- - - ">="
95
- - !ruby/object:Gem::Version
96
- version: '0'
97
83
  description:
98
84
  email: andrew@chartkick.com
99
85
  executables: []
@@ -136,8 +122,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
136
122
  - !ruby/object:Gem::Version
137
123
  version: '0'
138
124
  requirements: []
139
- rubygems_version: 3.0.3
125
+ rubygems_version: 3.1.2
140
126
  signing_key:
141
127
  specification_version: 4
142
- summary: LightGBM - the high performance machine learning library - for Ruby
128
+ summary: High performance gradient boosting for Ruby
143
129
  test_files: []