lightgbm 0.1.4 → 0.1.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 81f1f695112234bb576afaab35f4bf276d1f9c4a4adf0c74831cd1bb73f6baa0
4
- data.tar.gz: 59ef1f3c581f83e108ce2a6f2c847bb7488fc2ea7f39ba217aaeffbf46e99351
3
+ metadata.gz: dad09a50916e229119d99de66aa676868224af44d8958916284718edb7fb3a3f
4
+ data.tar.gz: 2f7d299a7037455a4ebd1ccaf41bc102ec66808b9e806a59194fd14699204364
5
5
  SHA512:
6
- metadata.gz: 21297d26e88957dd60d0aa61da19aa53aa632958adaf069b0efe2c8dae35e2e21e74c374da3509e337ca3268613b14dc541aee5012df086af7e8f784adb5063d
7
- data.tar.gz: 7dbdc0fccaf256a1a835aea3eaa51fe326a0cd4b8cde168a6b8c27ff00c6412a5b5d82583fb1c14749f001f002a4fa3c3e156b9c4b2174b1e91f9449a4fa9ba1
6
+ metadata.gz: aa415946a82be07f1b9b8823b60fc75030f9e5547859078459583fa64d9212aad11db39d30e8e327b1fe23f48fc2ce9c6ddbd0445b52f73b44374a07c4724f5c
7
+ data.tar.gz: 99357888fde412672ecd804396cbd3a524b7dee6318d46d92a07064b858cd5f62f144f6b680a25ae0f1ae567df7d4670fcf38ba706fa31cbdfe58fd79eabc62d
@@ -1,3 +1,10 @@
1
+ ## 0.1.5
2
+
3
+ - Packaged LightGBM with gem
4
+ - Added support for missing values
5
+ - Added `feature_names` to datasets
6
+ - Fixed Daru training and prediction
7
+
1
8
  ## 0.1.4
2
9
 
3
10
  - Friendlier message when LightGBM not found
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2019 Andrew Kane
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md CHANGED
@@ -8,19 +8,23 @@
8
8
 
9
9
  ## Installation
10
10
 
11
- First, [install LightGBM](https://lightgbm.readthedocs.io/en/latest/Installation-Guide.html). On Mac, copy `lib_lightgbm.so` to `/usr/local/lib`.
12
-
13
11
  Add this line to your application’s Gemfile:
14
12
 
15
13
  ```ruby
16
14
  gem 'lightgbm'
17
15
  ```
18
16
 
17
+ LightGBM is packaged with the gem, so no need to install it separately. On Mac, also run:
18
+
19
+ ```sh
20
+ brew install libomp
21
+ ```
22
+
19
23
  ## Getting Started
20
24
 
21
25
  This library follows the [Python API](https://lightgbm.readthedocs.io/en/latest/Python-API.html). A few differences are:
22
26
 
23
- - The `get_` prefix is removed from methods
27
+ - The `get_` and `set_` prefixes are removed from methods
24
28
  - The default verbosity is `-1`
25
29
  - With the `cv` method, `stratified` is set to `false`
26
30
 
@@ -28,18 +32,25 @@ Some methods and options are also missing at the moment. PRs welcome!
28
32
 
29
33
  ## Training API
30
34
 
35
+ Prep your data
36
+
37
+ ```ruby
38
+ x = [[1, 2], [3, 4], [5, 6], [7, 8]]
39
+ y = [1, 2, 3, 4]
40
+ ```
41
+
31
42
  Train a model
32
43
 
33
44
  ```ruby
34
45
  params = {objective: "regression"}
35
- train_set = LightGBM::Dataset.new(x_train, label: y_train)
46
+ train_set = LightGBM::Dataset.new(x, label: y)
36
47
  booster = LightGBM.train(params, train_set)
37
48
  ```
38
49
 
39
50
  Predict
40
51
 
41
52
  ```ruby
42
- booster.predict(x_test)
53
+ booster.predict(x)
43
54
  ```
44
55
 
45
56
  Save the model to a file
@@ -20,7 +20,8 @@ module LightGBM
20
20
  attr_accessor :ffi_lib
21
21
  end
22
22
  lib_name = "lib_lightgbm.#{::FFI::Platform::LIBSUFFIX}"
23
- self.ffi_lib = [lib_name, "lib_lightgbm.so"]
23
+ vendor_lib = File.expand_path("../vendor/#{lib_name}", __dir__)
24
+ self.ffi_lib = [lib_name, "lib_lightgbm.so", vendor_lib]
24
25
 
25
26
  # friendlier error message
26
27
  autoload :FFI, "lightgbm/ffi"
@@ -39,6 +40,8 @@ module LightGBM
39
40
  end
40
41
  end
41
42
 
43
+ raise ArgumentError, "For early stopping, at least one validation set is required" if early_stopping_rounds && !valid_sets.any? { |v| v != train_set }
44
+
42
45
  booster.best_iteration = 0
43
46
 
44
47
  if early_stopping_rounds
@@ -116,7 +116,12 @@ module LightGBM
116
116
 
117
117
  # TODO support different prediction types
118
118
  def predict(input, num_iteration: nil, **params)
119
- raise TypeError unless input.is_a?(Array)
119
+ input =
120
+ if daru?(input)
121
+ input.map_rows(&:to_a)
122
+ else
123
+ input.to_a
124
+ end
120
125
 
121
126
  singular = !input.first.is_a?(Array)
122
127
  input = [input] if singular
@@ -124,8 +129,10 @@ module LightGBM
124
129
  num_iteration ||= best_iteration
125
130
  num_class ||= num_class()
126
131
 
132
+ flat_input = input.flatten
133
+ handle_missing(flat_input)
127
134
  data = ::FFI::MemoryPointer.new(:float, input.count * input.first.count)
128
- data.put_array_of_float(0, input.flatten)
135
+ data.put_array_of_float(0, flat_input)
129
136
 
130
137
  out_len = ::FFI::MemoryPointer.new(:int64)
131
138
  out_result = ::FFI::MemoryPointer.new(:double, num_class * input.count)
@@ -15,8 +15,8 @@ module LightGBM
15
15
  params[:objective] ||= "binary"
16
16
  end
17
17
 
18
- train_set = Dataset.new(x, label: y, categorical_feature: categorical_feature)
19
- valid_sets = Array(eval_set).map { |v| Dataset.new(v[0], label: v[1], reference: train_set) }
18
+ train_set = Dataset.new(x, label: y, categorical_feature: categorical_feature, params: params)
19
+ valid_sets = Array(eval_set).map { |v| Dataset.new(v[0], label: v[1], reference: train_set, params: params) }
20
20
 
21
21
  @booster = LightGBM.train(params, train_set,
22
22
  num_boost_round: @n_estimators,
@@ -2,12 +2,14 @@ module LightGBM
2
2
  class Dataset
3
3
  attr_reader :data, :params
4
4
 
5
- def initialize(data, label: nil, weight: nil, group: nil, params: nil, reference: nil, used_indices: nil, categorical_feature: "auto")
5
+ def initialize(data, label: nil, weight: nil, group: nil, params: nil, reference: nil, used_indices: nil, categorical_feature: "auto", feature_names: nil)
6
6
  @data = data
7
7
 
8
8
  # TODO stringify params
9
9
  params ||= {}
10
- params["categorical_feature"] ||= categorical_feature.join(",") if categorical_feature != "auto"
10
+ if categorical_feature != "auto" && categorical_feature.any?
11
+ params["categorical_feature"] ||= categorical_feature.join(",")
12
+ end
11
13
  set_verbosity(params)
12
14
 
13
15
  @handle = ::FFI::MemoryPointer.new(:pointer)
@@ -26,7 +28,7 @@ module LightGBM
26
28
  flat_data = data.to_a.flatten
27
29
  elsif daru?(data)
28
30
  nrow, ncol = data.shape
29
- flat_data = data.each_vector.map(&:to_a).flatten
31
+ flat_data = data.map_rows(&:to_a).flatten
30
32
  elsif narray?(data)
31
33
  nrow, ncol = data.shape
32
34
  flat_data = data.flatten.to_a
@@ -36,6 +38,7 @@ module LightGBM
36
38
  flat_data = data.flatten
37
39
  end
38
40
 
41
+ handle_missing(flat_data)
39
42
  c_data = ::FFI::MemoryPointer.new(:float, nrow * ncol)
40
43
  c_data.put_array_of_float(0, flat_data)
41
44
  check_result FFI.LGBM_DatasetCreateFromMat(c_data, 0, nrow, ncol, 1, parameters, reference, @handle)
@@ -45,6 +48,7 @@ module LightGBM
45
48
  self.label = label if label
46
49
  self.weight = weight if weight
47
50
  self.group = group if group
51
+ self.feature_names = feature_names if feature_names
48
52
  end
49
53
 
50
54
  def label
@@ -59,6 +63,16 @@ module LightGBM
59
63
  set_field("label", label)
60
64
  end
61
65
 
66
+ def feature_names
67
+ # must preallocate space
68
+ num_feature_names = ::FFI::MemoryPointer.new(:int)
69
+ out_strs = ::FFI::MemoryPointer.new(:pointer, 1000)
70
+ str_ptrs = 1000.times.map { ::FFI::MemoryPointer.new(:string, 255) }
71
+ out_strs.put_array_of_pointer(0, str_ptrs)
72
+ check_result FFI.LGBM_DatasetGetFeatureNames(handle_pointer, out_strs, num_feature_names)
73
+ str_ptrs[0, num_feature_names.read_int].map(&:read_string)
74
+ end
75
+
62
76
  def weight=(weight)
63
77
  set_field("weight", weight)
64
78
  end
@@ -67,6 +81,12 @@ module LightGBM
67
81
  set_field("group", group, type: :int32)
68
82
  end
69
83
 
84
+ def feature_names=(feature_names)
85
+ c_feature_names = ::FFI::MemoryPointer.new(:pointer, feature_names.size)
86
+ c_feature_names.write_array_of_pointer(feature_names.map { |v| ::FFI::MemoryPointer.from_string(v) })
87
+ check_result FFI.LGBM_DatasetSetFeatureNames(handle_pointer, c_feature_names, feature_names.size)
88
+ end
89
+
70
90
  def num_data
71
91
  out = ::FFI::MemoryPointer.new(:int)
72
92
  check_result FFI.LGBM_DatasetGetNumData(handle_pointer, out)
@@ -131,18 +151,6 @@ module LightGBM
131
151
  end
132
152
  end
133
153
 
134
- def matrix?(data)
135
- defined?(Matrix) && data.is_a?(Matrix)
136
- end
137
-
138
- def daru?(data)
139
- defined?(Daru::DataFrame) && data.is_a?(Daru::DataFrame)
140
- end
141
-
142
- def narray?(data)
143
- defined?(Numo::NArray) && data.is_a?(Numo::NArray)
144
- end
145
-
146
154
  include Utils
147
155
  end
148
156
  end
@@ -6,6 +6,9 @@ module LightGBM
6
6
  ffi_lib LightGBM.ffi_lib
7
7
  rescue LoadError => e
8
8
  raise e if ENV["LIGHTGBM_DEBUG"]
9
+ if e.message.include?("libomp")
10
+ raise LoadError, "Could not find OpenMP"
11
+ end
9
12
  raise LoadError, "Could not find LightGBM"
10
13
  end
11
14
 
@@ -19,6 +22,8 @@ module LightGBM
19
22
  attach_function :LGBM_DatasetCreateFromFile, %i[string string pointer pointer], :int
20
23
  attach_function :LGBM_DatasetCreateFromMat, %i[pointer int int32 int32 int string pointer pointer], :int
21
24
  attach_function :LGBM_DatasetGetSubset, %i[pointer pointer int32 string pointer], :int
25
+ attach_function :LGBM_DatasetSetFeatureNames, %i[pointer pointer int], :int
26
+ attach_function :LGBM_DatasetGetFeatureNames, %i[pointer pointer pointer], :int
22
27
  attach_function :LGBM_DatasetFree, %i[pointer], :int
23
28
  attach_function :LGBM_DatasetSaveBinary, %i[pointer string], :int
24
29
  # attach_function :LGBM_DatasetDumpText, %i[pointer string], :int
@@ -5,8 +5,8 @@ module LightGBM
5
5
  end
6
6
 
7
7
  def fit(x, y, categorical_feature: "auto", eval_set: nil, eval_names: [], early_stopping_rounds: nil, verbose: true)
8
- train_set = Dataset.new(x, label: y, categorical_feature: categorical_feature)
9
- valid_sets = Array(eval_set).map { |v| Dataset.new(v[0], label: v[1], reference: train_set) }
8
+ train_set = Dataset.new(x, label: y, categorical_feature: categorical_feature, params: @params)
9
+ valid_sets = Array(eval_set).map { |v| Dataset.new(v[0], label: v[1], reference: train_set, params: @params) }
10
10
 
11
11
  @booster = LightGBM.train(@params, train_set,
12
12
  num_boost_round: @n_estimators,
@@ -23,5 +23,22 @@ module LightGBM
23
23
  params["verbosity"] = -1
24
24
  end
25
25
  end
26
+
27
+ # for categorical, NaN and negative value are the same
28
+ def handle_missing(data)
29
+ data.map! { |v| v.nil? ? Float::NAN : v }
30
+ end
31
+
32
+ def matrix?(data)
33
+ defined?(Matrix) && data.is_a?(Matrix)
34
+ end
35
+
36
+ def daru?(data)
37
+ defined?(Daru::DataFrame) && data.is_a?(Daru::DataFrame)
38
+ end
39
+
40
+ def narray?(data)
41
+ defined?(Numo::NArray) && data.is_a?(Numo::NArray)
42
+ end
26
43
  end
27
44
  end
@@ -1,3 +1,3 @@
1
1
  module LightGBM
2
- VERSION = "0.1.4"
2
+ VERSION = "0.1.5"
3
3
  end
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) Microsoft Corporation
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
Binary file
Binary file
Binary file
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lightgbm
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.4
4
+ version: 0.1.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-08-19 00:00:00.000000000 Z
11
+ date: 2019-09-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ffi
@@ -101,6 +101,7 @@ extensions: []
101
101
  extra_rdoc_files: []
102
102
  files:
103
103
  - CHANGELOG.md
104
+ - LICENSE.txt
104
105
  - README.md
105
106
  - lib/lightgbm.rb
106
107
  - lib/lightgbm/booster.rb
@@ -112,6 +113,10 @@ files:
112
113
  - lib/lightgbm/regressor.rb
113
114
  - lib/lightgbm/utils.rb
114
115
  - lib/lightgbm/version.rb
116
+ - vendor/LICENSE
117
+ - vendor/lib_lightgbm.dll
118
+ - vendor/lib_lightgbm.dylib
119
+ - vendor/lib_lightgbm.so
115
120
  homepage: https://github.com/ankane/lightgbm
116
121
  licenses:
117
122
  - MIT