lightgbm 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c7222cd44123f8c35d26b91dbf0288e3d0f7a87e3204a6a501f7590648896d2f
4
- data.tar.gz: 2a288731615d9042b0b60184c534bd00d3d64d1d8be1b8dbc2aecb782fd3332b
3
+ metadata.gz: 49e0eef0a10a444e0cc24c8188268d349037fb12054b3f3f73ab14ed54fae3d7
4
+ data.tar.gz: 9ee78189ec31bfb3dc9cea6fe5836f97a010097cc821819d05236a899d0654af
5
5
  SHA512:
6
- metadata.gz: 290d729343c88d2054082692cff9dc948332933439b11915bd507338dc28354fec936b7eded198c8a6bd18d9821ff32d1dccc146c71db7567ddc6037838c0e4b
7
- data.tar.gz: 24cd193a72188d43b71cc223d45cc943d710149d42b7ee0fb11eeab8fdf0a9e006ae021313d0881d91cebfbc510a08864721af42f4976e5cd0663ca1a6534f39
6
+ metadata.gz: d95050754e85ee004df08c4761f31f1bfc97e3efbcd3ea0ae2251f5a84eeff2978e16118411ddeced74a9c7d3fd731176488cbbac4ed2bdd840e55e4dd6172db
7
+ data.tar.gz: 52dcca52827fffca3d638c814eec359c4f2d397b025cc9ee99323bebbbf5436d3a7fcf569390f0c71eb9623c68a711b0e32ba38a53364873fe9d0de99b2f3f66
data/CHANGELOG.md CHANGED
@@ -1,3 +1,8 @@
1
+ ## 0.1.1
2
+
3
+ - Added training API
4
+ - Added many methods
5
+
1
6
  ## 0.1.0
2
7
 
3
8
  - First release
data/README.md CHANGED
@@ -1,10 +1,14 @@
1
1
  # LightGBM
2
2
 
3
- LightGBM for Ruby
3
+ [LightGBM](https://github.com/microsoft/LightGBM) for Ruby
4
+
5
+ :fire: Uses the C API for blazing performance
6
+
7
+ [![Build Status](https://travis-ci.org/ankane/lightgbm.svg?branch=master)](https://travis-ci.org/ankane/lightgbm)
4
8
 
5
9
  ## Installation
6
10
 
7
- First, [install LightGBM](https://lightgbm.readthedocs.io/en/latest/Installation-Guide.html).
11
+ First, [install LightGBM](https://lightgbm.readthedocs.io/en/latest/Installation-Guide.html). On Mac, copy `lib_lightgbm.so` to `/usr/local/lib`.
8
12
 
9
13
  Add this line to your application’s Gemfile:
10
14
 
@@ -12,21 +16,65 @@ Add this line to your application’s Gemfile:
12
16
  gem 'lightgbm'
13
17
  ```
14
18
 
15
- Load a model
19
+ Train a model
16
20
 
17
21
  ```ruby
18
- booster = LightGBM::Booster.new(model_file: "model.txt")
22
+ params = {objective: "regression"}
23
+ train_set = LightGBM::Dataset.new(x_train, label: y_train)
24
+ booster = LightGBM.train(params, train_set)
19
25
  ```
20
26
 
21
27
  Predict
22
28
 
23
29
  ```ruby
24
- booster.predict([[1, 2], [3, 4]])
30
+ booster.predict(x_test)
31
+ ```
32
+
33
+ Save the model
34
+
35
+ ```ruby
36
+ booster.save_model("model.txt")
37
+ ```
38
+
39
+ Load a model from a file
40
+
41
+ ```ruby
42
+ booster = LightGBM::Booster.new(model_file: "model.txt")
43
+ ```
44
+
45
+ Get feature importance
46
+
47
+ ```ruby
48
+ booster.feature_importance
49
+ ```
50
+
51
+ ## Reference
52
+
53
+ ### Booster
54
+
55
+ ```ruby
56
+ booster = LightGBM::Booster.new(model_str: "tree...")
57
+ booster.to_json
58
+ booster.model_to_string
59
+ booster.current_iteration
60
+ ```
61
+
62
+ ### Dataset
63
+
64
+ ```ruby
65
+ dataset = LightGBM::Dataset.new(data, label: label, weight: weight, params: params)
66
+ dataset.num_data
67
+ dataset.num_feature
68
+
69
+ # note: only works with unquoted CSVs
70
+ dataset = LightGBM::Dataset.new("data.csv", params: {headers: true, label: "name:label"})
71
+ dataset.save_binary("train.bin")
72
+ dataset.dump_text("train.txt")
25
73
  ```
26
74
 
27
75
  ## Credits
28
76
 
29
- Thanks to the [xgboost](https://github.com/PairOnAir/xgboost-ruby) gem for serving as an initial reference.
77
+ Thanks to the [xgboost](https://github.com/PairOnAir/xgboost-ruby) gem for serving as an initial reference, and Selva Prabhakaran for the [test datasets](https://github.com/selva86/datasets).
30
78
 
31
79
  ## History
32
80
 
data/lib/lightgbm.rb CHANGED
@@ -2,6 +2,23 @@
2
2
  require "ffi"
3
3
 
4
4
  # modules
5
+ require "lightgbm/utils"
5
6
  require "lightgbm/booster"
7
+ require "lightgbm/dataset"
6
8
  require "lightgbm/ffi"
7
9
  require "lightgbm/version"
10
+
11
+ module LightGBM
12
+ class Error < StandardError; end
13
+
14
+ def self.train(params, train_set, num_boost_round: 100, valid_sets: [], valid_names: [])
15
+ booster = Booster.new(params: params, train_set: train_set)
16
+ valid_sets.zip(valid_names) do |data, name|
17
+ booster.add_valid(data, name)
18
+ end
19
+ num_boost_round.times do
20
+ booster.update
21
+ end
22
+ booster
23
+ end
24
+ end
@@ -1,18 +1,30 @@
1
1
  module LightGBM
2
2
  class Booster
3
- def initialize(model_file:)
3
+ def initialize(params: nil, train_set: nil, model_file: nil, model_str: nil)
4
4
  @handle = ::FFI::MemoryPointer.new(:pointer)
5
- if model_file
5
+ if model_str
6
+ out_num_iterations = ::FFI::MemoryPointer.new(:int)
7
+ check_result FFI.LGBM_BoosterLoadModelFromString(model_str, out_num_iterations, @handle)
8
+ elsif model_file
6
9
  out_num_iterations = ::FFI::MemoryPointer.new(:int)
7
10
  check_result FFI.LGBM_BoosterCreateFromModelfile(model_file, out_num_iterations, @handle)
11
+ else
12
+ check_result FFI.LGBM_BoosterCreate(train_set.handle_pointer, params_str(params), @handle)
8
13
  end
9
- ObjectSpace.define_finalizer(self, self.class.finalize(handle_pointer))
14
+ # causes "Stack consistency error"
15
+ # ObjectSpace.define_finalizer(self, self.class.finalize(handle_pointer))
10
16
  end
11
17
 
12
18
  def self.finalize(pointer)
13
19
  -> { FFI.LGBM_BoosterFree(pointer) }
14
20
  end
15
21
 
22
+ # TODO handle name
23
+ def add_valid(data, name)
24
+ check_result FFI.LGBM_BoosterAddValidData(handle_pointer, data.handle_pointer)
25
+ self # consistent with Python API
26
+ end
27
+
16
28
  def predict(input)
17
29
  raise TypeError unless input.is_a?(Array)
18
30
 
@@ -31,16 +43,86 @@ module LightGBM
31
43
  singular ? out : out.first
32
44
  end
33
45
 
34
- private
46
+ def save_model(filename)
47
+ check_result FFI.LGBM_BoosterSaveModel(handle_pointer, 0, 0, filename)
48
+ self # consistent with Python API
49
+ end
50
+
51
+ def update
52
+ finished = ::FFI::MemoryPointer.new(:int)
53
+ check_result FFI.LGBM_BoosterUpdateOneIter(handle_pointer, finished)
54
+ finished.read_int == 1
55
+ end
56
+
57
+ def feature_importance(iteration: nil, importance_type: "split")
58
+ iteration ||= best_iteration
59
+ importance_type =
60
+ case importance_type
61
+ when "split"
62
+ 0
63
+ when "gain"
64
+ 1
65
+ else
66
+ -1
67
+ end
68
+
69
+ num_features = self.num_features
70
+ out_result = ::FFI::MemoryPointer.new(:double, num_features)
71
+ check_result FFI.LGBM_BoosterFeatureImportance(handle_pointer, iteration, importance_type, out_result)
72
+ out_result.read_array_of_double(num_features)
73
+ end
74
+
75
+ def num_features
76
+ out = ::FFI::MemoryPointer.new(:int)
77
+ check_result FFI.LGBM_BoosterGetNumFeature(handle_pointer, out)
78
+ out.read_int
79
+ end
35
80
 
36
- def check_result(err)
37
- if err != 0
38
- raise FFI.LGBM_GetLastError
81
+ def current_iteration
82
+ out = ::FFI::MemoryPointer.new(:int)
83
+ check_result FFI::LGBM_BoosterGetCurrentIteration(handle_pointer, out)
84
+ out.read_int
85
+ end
86
+
87
+ # TODO fix
88
+ def best_iteration
89
+ -1
90
+ end
91
+
92
+ def model_to_string(num_iteration: nil, start_iteration: 0)
93
+ num_iteration ||= best_iteration
94
+ buffer_len = 1 << 20
95
+ out_len = ::FFI::MemoryPointer.new(:int64)
96
+ out_str = ::FFI::MemoryPointer.new(:string, buffer_len)
97
+ check_result FFI.LGBM_BoosterSaveModelToString(handle_pointer, start_iteration, num_iteration, buffer_len, out_len, out_str)
98
+ actual_len = out_len.read_int64
99
+ if actual_len > buffer_len
100
+ out_str = ::FFI::MemoryPointer.new(:string, actual_len)
101
+ check_result FFI.LGBM_BoosterSaveModelToString(handle_pointer, start_iteration, num_iteration, actual_len, out_len, out_str)
102
+ end
103
+ out_str.read_string
104
+ end
105
+
106
+ def to_json(num_iteration: nil, start_iteration: 0)
107
+ num_iteration ||= best_iteration
108
+ buffer_len = 1 << 20
109
+ out_len = ::FFI::MemoryPointer.new(:int64)
110
+ out_str = ::FFI::MemoryPointer.new(:string, buffer_len)
111
+ check_result FFI.LGBM_BoosterDumpModel(handle_pointer, start_iteration, num_iteration, buffer_len, out_len, out_str)
112
+ actual_len = out_len.read_int64
113
+ if actual_len > buffer_len
114
+ out_str = ::FFI::MemoryPointer.new(:string, actual_len)
115
+ check_result FFI.LGBM_BoosterDumpModel(handle_pointer, start_iteration, num_iteration, actual_len, out_len, out_str)
39
116
  end
117
+ out_str.read_string
40
118
  end
41
119
 
120
+ private
121
+
42
122
  def handle_pointer
43
123
  @handle.read_pointer
44
124
  end
125
+
126
+ include Utils
45
127
  end
46
128
  end
@@ -0,0 +1,78 @@
1
+ module LightGBM
2
+ class Dataset
3
+ attr_reader :data, :params
4
+
5
+ def initialize(data, label: nil, weight: nil, params: nil)
6
+ @data = data
7
+
8
+ @handle = ::FFI::MemoryPointer.new(:pointer)
9
+ if data.is_a?(String)
10
+ check_result FFI.LGBM_DatasetCreateFromFile(data, params_str(params), nil, @handle)
11
+ else
12
+ c_data = ::FFI::MemoryPointer.new(:float, data.count * data.first.count)
13
+ c_data.put_array_of_float(0, data.flatten)
14
+ check_result FFI.LGBM_DatasetCreateFromMat(c_data, 0, data.count, data.first.count, 1, params_str(params), nil, @handle)
15
+ end
16
+ # causes "Stack consistency error"
17
+ # ObjectSpace.define_finalizer(self, self.class.finalize(handle_pointer))
18
+
19
+ set_field("label", label) if label
20
+ set_field("weight", weight) if weight
21
+ end
22
+
23
+ def label
24
+ field("label")
25
+ end
26
+
27
+ def weight
28
+ field("weight")
29
+ end
30
+
31
+ def num_data
32
+ out = ::FFI::MemoryPointer.new(:int)
33
+ check_result FFI.LGBM_DatasetGetNumData(handle_pointer, out)
34
+ out.read_int
35
+ end
36
+
37
+ def num_feature
38
+ out = ::FFI::MemoryPointer.new(:int)
39
+ check_result FFI.LGBM_DatasetGetNumFeature(handle_pointer, out)
40
+ out.read_int
41
+ end
42
+
43
+ def save_binary(filename)
44
+ check_result FFI.LGBM_DatasetSaveBinary(handle_pointer, filename)
45
+ end
46
+
47
+ def dump_text(filename)
48
+ check_result FFI.LGBM_DatasetDumpText(handle_pointer, filename)
49
+ end
50
+
51
+ def self.finalize(pointer)
52
+ -> { FFI.LGBM_DatasetFree(pointer) }
53
+ end
54
+
55
+ def handle_pointer
56
+ @handle.read_pointer
57
+ end
58
+
59
+ private
60
+
61
+ def field(field_name)
62
+ num_data = self.num_data
63
+ out_len = ::FFI::MemoryPointer.new(:int)
64
+ out_ptr = ::FFI::MemoryPointer.new(:float, num_data)
65
+ out_type = ::FFI::MemoryPointer.new(:int)
66
+ check_result FFI.LGBM_DatasetGetField(handle_pointer, field_name, out_len, out_ptr, out_type)
67
+ out_ptr.read_pointer.read_array_of_float(num_data)
68
+ end
69
+
70
+ def set_field(field_name, data)
71
+ c_data = ::FFI::MemoryPointer.new(:float, data.count)
72
+ c_data.put_array_of_float(0, data)
73
+ check_result FFI.LGBM_DatasetSetField(handle_pointer, field_name, c_data, data.count, 0)
74
+ end
75
+
76
+ include Utils
77
+ end
78
+ end
data/lib/lightgbm/ffi.rb CHANGED
@@ -4,10 +4,35 @@ module LightGBM
4
4
  ffi_lib ["lightgbm", "lib_lightgbm.so"]
5
5
 
6
6
  # https://github.com/microsoft/LightGBM/blob/master/include/LightGBM/c_api.h
7
+ # keep same order
8
+
9
+ # error
7
10
  attach_function :LGBM_GetLastError, %i[], :string
11
+
12
+ # dataset
13
+ attach_function :LGBM_DatasetCreateFromFile, %i[string string pointer pointer], :int
14
+ attach_function :LGBM_DatasetCreateFromMat, %i[pointer int int32 int32 int string pointer pointer], :int
15
+ attach_function :LGBM_DatasetFree, %i[pointer], :int
16
+ attach_function :LGBM_DatasetSaveBinary, %i[pointer string], :int
17
+ attach_function :LGBM_DatasetDumpText, %i[pointer string], :int
18
+ attach_function :LGBM_DatasetSetField, %i[pointer string pointer int int], :int
19
+ attach_function :LGBM_DatasetGetField, %i[pointer string pointer pointer pointer], :int
20
+ attach_function :LGBM_DatasetGetNumData, %i[pointer pointer], :int
21
+ attach_function :LGBM_DatasetGetNumFeature, %i[pointer pointer], :int
22
+
23
+ # booster
8
24
  attach_function :LGBM_BoosterCreate, %i[pointer string pointer], :int
9
25
  attach_function :LGBM_BoosterCreateFromModelfile, %i[string pointer pointer], :int
26
+ attach_function :LGBM_BoosterLoadModelFromString, %i[string pointer pointer], :int
10
27
  attach_function :LGBM_BoosterFree, %i[pointer], :int
28
+ attach_function :LGBM_BoosterAddValidData, %i[pointer pointer], :int
29
+ attach_function :LGBM_BoosterUpdateOneIter, %i[pointer pointer], :int
30
+ attach_function :LGBM_BoosterGetCurrentIteration, %i[pointer pointer], :int
31
+ attach_function :LGBM_BoosterGetNumFeature, %i[pointer pointer], :int
11
32
  attach_function :LGBM_BoosterPredictForMat, %i[pointer pointer int int32 int32 int int int string pointer pointer], :int
33
+ attach_function :LGBM_BoosterSaveModel, %i[pointer int int string], :int
34
+ attach_function :LGBM_BoosterSaveModelToString, %i[pointer int int int64 pointer pointer], :int
35
+ attach_function :LGBM_BoosterDumpModel, %i[pointer int int int64 pointer pointer], :int
36
+ attach_function :LGBM_BoosterFeatureImportance, %i[pointer int int pointer], :int
12
37
  end
13
38
  end
@@ -0,0 +1,19 @@
1
+ module LightGBM
2
+ module Utils
3
+ private
4
+
5
+ def check_result(err)
6
+ raise LightGBM::Error, FFI.LGBM_GetLastError if err != 0
7
+ end
8
+
9
+ # remove spaces in keys and values to prevent injection
10
+ def params_str(params)
11
+ (params || {}).map { |k, v| [check_param(k.to_s), check_param(v.to_s)].join("=") }.join(" ")
12
+ end
13
+
14
+ def check_param(v)
15
+ raise ArgumentError, "Invalid parameter" if /[[:space:]]/.match(v)
16
+ v
17
+ end
18
+ end
19
+ end
@@ -1,3 +1,3 @@
1
1
  module LightGBM
2
- VERSION = "0.1.0"
2
+ VERSION = "0.1.1"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lightgbm
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
@@ -76,7 +76,9 @@ files:
76
76
  - README.md
77
77
  - lib/lightgbm.rb
78
78
  - lib/lightgbm/booster.rb
79
+ - lib/lightgbm/dataset.rb
79
80
  - lib/lightgbm/ffi.rb
81
+ - lib/lightgbm/utils.rb
80
82
  - lib/lightgbm/version.rb
81
83
  homepage: https://github.com/ankane/lightgbm
82
84
  licenses: