lightgbm 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c7222cd44123f8c35d26b91dbf0288e3d0f7a87e3204a6a501f7590648896d2f
4
- data.tar.gz: 2a288731615d9042b0b60184c534bd00d3d64d1d8be1b8dbc2aecb782fd3332b
3
+ metadata.gz: 49e0eef0a10a444e0cc24c8188268d349037fb12054b3f3f73ab14ed54fae3d7
4
+ data.tar.gz: 9ee78189ec31bfb3dc9cea6fe5836f97a010097cc821819d05236a899d0654af
5
5
  SHA512:
6
- metadata.gz: 290d729343c88d2054082692cff9dc948332933439b11915bd507338dc28354fec936b7eded198c8a6bd18d9821ff32d1dccc146c71db7567ddc6037838c0e4b
7
- data.tar.gz: 24cd193a72188d43b71cc223d45cc943d710149d42b7ee0fb11eeab8fdf0a9e006ae021313d0881d91cebfbc510a08864721af42f4976e5cd0663ca1a6534f39
6
+ metadata.gz: d95050754e85ee004df08c4761f31f1bfc97e3efbcd3ea0ae2251f5a84eeff2978e16118411ddeced74a9c7d3fd731176488cbbac4ed2bdd840e55e4dd6172db
7
+ data.tar.gz: 52dcca52827fffca3d638c814eec359c4f2d397b025cc9ee99323bebbbf5436d3a7fcf569390f0c71eb9623c68a711b0e32ba38a53364873fe9d0de99b2f3f66
data/CHANGELOG.md CHANGED
@@ -1,3 +1,8 @@
1
+ ## 0.1.1
2
+
3
+ - Added training API
4
+ - Added many methods
5
+
1
6
  ## 0.1.0
2
7
 
3
8
  - First release
data/README.md CHANGED
@@ -1,10 +1,14 @@
1
1
  # LightGBM
2
2
 
3
- LightGBM for Ruby
3
+ [LightGBM](https://github.com/microsoft/LightGBM) for Ruby
4
+
5
+ :fire: Uses the C API for blazing performance
6
+
7
+ [![Build Status](https://travis-ci.org/ankane/lightgbm.svg?branch=master)](https://travis-ci.org/ankane/lightgbm)
4
8
 
5
9
  ## Installation
6
10
 
7
- First, [install LightGBM](https://lightgbm.readthedocs.io/en/latest/Installation-Guide.html).
11
+ First, [install LightGBM](https://lightgbm.readthedocs.io/en/latest/Installation-Guide.html). On Mac, copy `lib_lightgbm.so` to `/usr/local/lib`.
8
12
 
9
13
  Add this line to your application’s Gemfile:
10
14
 
@@ -12,21 +16,65 @@ Add this line to your application’s Gemfile:
12
16
  gem 'lightgbm'
13
17
  ```
14
18
 
15
- Load a model
19
+ Train a model
16
20
 
17
21
  ```ruby
18
- booster = LightGBM::Booster.new(model_file: "model.txt")
22
+ params = {objective: "regression"}
23
+ train_set = LightGBM::Dataset.new(x_train, label: y_train)
24
+ booster = LightGBM.train(params, train_set)
19
25
  ```
20
26
 
21
27
  Predict
22
28
 
23
29
  ```ruby
24
- booster.predict([[1, 2], [3, 4]])
30
+ booster.predict(x_test)
31
+ ```
32
+
33
+ Save the model
34
+
35
+ ```ruby
36
+ booster.save_model("model.txt")
37
+ ```
38
+
39
+ Load a model from a file
40
+
41
+ ```ruby
42
+ booster = LightGBM::Booster.new(model_file: "model.txt")
43
+ ```
44
+
45
+ Get feature importance
46
+
47
+ ```ruby
48
+ booster.feature_importance
49
+ ```
50
+
51
+ ## Reference
52
+
53
+ ### Booster
54
+
55
+ ```ruby
56
+ booster = LightGBM::Booster.new(model_str: "tree...")
57
+ booster.to_json
58
+ booster.model_to_string
59
+ booster.current_iteration
60
+ ```
61
+
62
+ ### Dataset
63
+
64
+ ```ruby
65
+ dataset = LightGBM::Dataset.new(data, label: label, weight: weight, params: params)
66
+ dataset.num_data
67
+ dataset.num_feature
68
+
69
+ # note: only works with unquoted CSVs
70
+ dataset = LightGBM::Dataset.new("data.csv", params: {headers: true, label: "name:label"})
71
+ dataset.save_binary("train.bin")
72
+ dataset.dump_text("train.txt")
25
73
  ```
26
74
 
27
75
  ## Credits
28
76
 
29
- Thanks to the [xgboost](https://github.com/PairOnAir/xgboost-ruby) gem for serving as an initial reference.
77
+ Thanks to the [xgboost](https://github.com/PairOnAir/xgboost-ruby) gem for serving as an initial reference, and Selva Prabhakaran for the [test datasets](https://github.com/selva86/datasets).
30
78
 
31
79
  ## History
32
80
 
data/lib/lightgbm.rb CHANGED
@@ -2,6 +2,23 @@
2
2
  require "ffi"
3
3
 
4
4
  # modules
5
+ require "lightgbm/utils"
5
6
  require "lightgbm/booster"
7
+ require "lightgbm/dataset"
6
8
  require "lightgbm/ffi"
7
9
  require "lightgbm/version"
10
+
11
+ module LightGBM
12
+ class Error < StandardError; end
13
+
14
+ def self.train(params, train_set, num_boost_round: 100, valid_sets: [], valid_names: [])
15
+ booster = Booster.new(params: params, train_set: train_set)
16
+ valid_sets.zip(valid_names) do |data, name|
17
+ booster.add_valid(data, name)
18
+ end
19
+ num_boost_round.times do
20
+ booster.update
21
+ end
22
+ booster
23
+ end
24
+ end
@@ -1,18 +1,30 @@
1
1
  module LightGBM
2
2
  class Booster
3
- def initialize(model_file:)
3
+ def initialize(params: nil, train_set: nil, model_file: nil, model_str: nil)
4
4
  @handle = ::FFI::MemoryPointer.new(:pointer)
5
- if model_file
5
+ if model_str
6
+ out_num_iterations = ::FFI::MemoryPointer.new(:int)
7
+ check_result FFI.LGBM_BoosterLoadModelFromString(model_str, out_num_iterations, @handle)
8
+ elsif model_file
6
9
  out_num_iterations = ::FFI::MemoryPointer.new(:int)
7
10
  check_result FFI.LGBM_BoosterCreateFromModelfile(model_file, out_num_iterations, @handle)
11
+ else
12
+ check_result FFI.LGBM_BoosterCreate(train_set.handle_pointer, params_str(params), @handle)
8
13
  end
9
- ObjectSpace.define_finalizer(self, self.class.finalize(handle_pointer))
14
+ # causes "Stack consistency error"
15
+ # ObjectSpace.define_finalizer(self, self.class.finalize(handle_pointer))
10
16
  end
11
17
 
12
18
  def self.finalize(pointer)
13
19
  -> { FFI.LGBM_BoosterFree(pointer) }
14
20
  end
15
21
 
22
+ # TODO handle name
23
+ def add_valid(data, name)
24
+ check_result FFI.LGBM_BoosterAddValidData(handle_pointer, data.handle_pointer)
25
+ self # consistent with Python API
26
+ end
27
+
16
28
  def predict(input)
17
29
  raise TypeError unless input.is_a?(Array)
18
30
 
@@ -31,16 +43,86 @@ module LightGBM
31
43
  singular ? out : out.first
32
44
  end
33
45
 
34
- private
46
+ def save_model(filename)
47
+ check_result FFI.LGBM_BoosterSaveModel(handle_pointer, 0, 0, filename)
48
+ self # consistent with Python API
49
+ end
50
+
51
+ def update
52
+ finished = ::FFI::MemoryPointer.new(:int)
53
+ check_result FFI.LGBM_BoosterUpdateOneIter(handle_pointer, finished)
54
+ finished.read_int == 1
55
+ end
56
+
57
+ def feature_importance(iteration: nil, importance_type: "split")
58
+ iteration ||= best_iteration
59
+ importance_type =
60
+ case importance_type
61
+ when "split"
62
+ 0
63
+ when "gain"
64
+ 1
65
+ else
66
+ -1
67
+ end
68
+
69
+ num_features = self.num_features
70
+ out_result = ::FFI::MemoryPointer.new(:double, num_features)
71
+ check_result FFI.LGBM_BoosterFeatureImportance(handle_pointer, iteration, importance_type, out_result)
72
+ out_result.read_array_of_double(num_features)
73
+ end
74
+
75
+ def num_features
76
+ out = ::FFI::MemoryPointer.new(:int)
77
+ check_result FFI.LGBM_BoosterGetNumFeature(handle_pointer, out)
78
+ out.read_int
79
+ end
35
80
 
36
- def check_result(err)
37
- if err != 0
38
- raise FFI.LGBM_GetLastError
81
+ def current_iteration
82
+ out = ::FFI::MemoryPointer.new(:int)
83
+ check_result FFI::LGBM_BoosterGetCurrentIteration(handle_pointer, out)
84
+ out.read_int
85
+ end
86
+
87
+ # TODO fix
88
+ def best_iteration
89
+ -1
90
+ end
91
+
92
+ def model_to_string(num_iteration: nil, start_iteration: 0)
93
+ num_iteration ||= best_iteration
94
+ buffer_len = 1 << 20
95
+ out_len = ::FFI::MemoryPointer.new(:int64)
96
+ out_str = ::FFI::MemoryPointer.new(:string, buffer_len)
97
+ check_result FFI.LGBM_BoosterSaveModelToString(handle_pointer, start_iteration, num_iteration, buffer_len, out_len, out_str)
98
+ actual_len = out_len.read_int64
99
+ if actual_len > buffer_len
100
+ out_str = ::FFI::MemoryPointer.new(:string, actual_len)
101
+ check_result FFI.LGBM_BoosterSaveModelToString(handle_pointer, start_iteration, num_iteration, actual_len, out_len, out_str)
102
+ end
103
+ out_str.read_string
104
+ end
105
+
106
+ def to_json(num_iteration: nil, start_iteration: 0)
107
+ num_iteration ||= best_iteration
108
+ buffer_len = 1 << 20
109
+ out_len = ::FFI::MemoryPointer.new(:int64)
110
+ out_str = ::FFI::MemoryPointer.new(:string, buffer_len)
111
+ check_result FFI.LGBM_BoosterDumpModel(handle_pointer, start_iteration, num_iteration, buffer_len, out_len, out_str)
112
+ actual_len = out_len.read_int64
113
+ if actual_len > buffer_len
114
+ out_str = ::FFI::MemoryPointer.new(:string, actual_len)
115
+ check_result FFI.LGBM_BoosterDumpModel(handle_pointer, start_iteration, num_iteration, actual_len, out_len, out_str)
39
116
  end
117
+ out_str.read_string
40
118
  end
41
119
 
120
+ private
121
+
42
122
  def handle_pointer
43
123
  @handle.read_pointer
44
124
  end
125
+
126
+ include Utils
45
127
  end
46
128
  end
@@ -0,0 +1,78 @@
1
+ module LightGBM
2
+ class Dataset
3
+ attr_reader :data, :params
4
+
5
+ def initialize(data, label: nil, weight: nil, params: nil)
6
+ @data = data
7
+
8
+ @handle = ::FFI::MemoryPointer.new(:pointer)
9
+ if data.is_a?(String)
10
+ check_result FFI.LGBM_DatasetCreateFromFile(data, params_str(params), nil, @handle)
11
+ else
12
+ c_data = ::FFI::MemoryPointer.new(:float, data.count * data.first.count)
13
+ c_data.put_array_of_float(0, data.flatten)
14
+ check_result FFI.LGBM_DatasetCreateFromMat(c_data, 0, data.count, data.first.count, 1, params_str(params), nil, @handle)
15
+ end
16
+ # causes "Stack consistency error"
17
+ # ObjectSpace.define_finalizer(self, self.class.finalize(handle_pointer))
18
+
19
+ set_field("label", label) if label
20
+ set_field("weight", weight) if weight
21
+ end
22
+
23
+ def label
24
+ field("label")
25
+ end
26
+
27
+ def weight
28
+ field("weight")
29
+ end
30
+
31
+ def num_data
32
+ out = ::FFI::MemoryPointer.new(:int)
33
+ check_result FFI.LGBM_DatasetGetNumData(handle_pointer, out)
34
+ out.read_int
35
+ end
36
+
37
+ def num_feature
38
+ out = ::FFI::MemoryPointer.new(:int)
39
+ check_result FFI.LGBM_DatasetGetNumFeature(handle_pointer, out)
40
+ out.read_int
41
+ end
42
+
43
+ def save_binary(filename)
44
+ check_result FFI.LGBM_DatasetSaveBinary(handle_pointer, filename)
45
+ end
46
+
47
+ def dump_text(filename)
48
+ check_result FFI.LGBM_DatasetDumpText(handle_pointer, filename)
49
+ end
50
+
51
+ def self.finalize(pointer)
52
+ -> { FFI.LGBM_DatasetFree(pointer) }
53
+ end
54
+
55
+ def handle_pointer
56
+ @handle.read_pointer
57
+ end
58
+
59
+ private
60
+
61
+ def field(field_name)
62
+ num_data = self.num_data
63
+ out_len = ::FFI::MemoryPointer.new(:int)
64
+ out_ptr = ::FFI::MemoryPointer.new(:float, num_data)
65
+ out_type = ::FFI::MemoryPointer.new(:int)
66
+ check_result FFI.LGBM_DatasetGetField(handle_pointer, field_name, out_len, out_ptr, out_type)
67
+ out_ptr.read_pointer.read_array_of_float(num_data)
68
+ end
69
+
70
+ def set_field(field_name, data)
71
+ c_data = ::FFI::MemoryPointer.new(:float, data.count)
72
+ c_data.put_array_of_float(0, data)
73
+ check_result FFI.LGBM_DatasetSetField(handle_pointer, field_name, c_data, data.count, 0)
74
+ end
75
+
76
+ include Utils
77
+ end
78
+ end
data/lib/lightgbm/ffi.rb CHANGED
@@ -4,10 +4,35 @@ module LightGBM
4
4
  ffi_lib ["lightgbm", "lib_lightgbm.so"]
5
5
 
6
6
  # https://github.com/microsoft/LightGBM/blob/master/include/LightGBM/c_api.h
7
+ # keep same order
8
+
9
+ # error
7
10
  attach_function :LGBM_GetLastError, %i[], :string
11
+
12
+ # dataset
13
+ attach_function :LGBM_DatasetCreateFromFile, %i[string string pointer pointer], :int
14
+ attach_function :LGBM_DatasetCreateFromMat, %i[pointer int int32 int32 int string pointer pointer], :int
15
+ attach_function :LGBM_DatasetFree, %i[pointer], :int
16
+ attach_function :LGBM_DatasetSaveBinary, %i[pointer string], :int
17
+ attach_function :LGBM_DatasetDumpText, %i[pointer string], :int
18
+ attach_function :LGBM_DatasetSetField, %i[pointer string pointer int int], :int
19
+ attach_function :LGBM_DatasetGetField, %i[pointer string pointer pointer pointer], :int
20
+ attach_function :LGBM_DatasetGetNumData, %i[pointer pointer], :int
21
+ attach_function :LGBM_DatasetGetNumFeature, %i[pointer pointer], :int
22
+
23
+ # booster
8
24
  attach_function :LGBM_BoosterCreate, %i[pointer string pointer], :int
9
25
  attach_function :LGBM_BoosterCreateFromModelfile, %i[string pointer pointer], :int
26
+ attach_function :LGBM_BoosterLoadModelFromString, %i[string pointer pointer], :int
10
27
  attach_function :LGBM_BoosterFree, %i[pointer], :int
28
+ attach_function :LGBM_BoosterAddValidData, %i[pointer pointer], :int
29
+ attach_function :LGBM_BoosterUpdateOneIter, %i[pointer pointer], :int
30
+ attach_function :LGBM_BoosterGetCurrentIteration, %i[pointer pointer], :int
31
+ attach_function :LGBM_BoosterGetNumFeature, %i[pointer pointer], :int
11
32
  attach_function :LGBM_BoosterPredictForMat, %i[pointer pointer int int32 int32 int int int string pointer pointer], :int
33
+ attach_function :LGBM_BoosterSaveModel, %i[pointer int int string], :int
34
+ attach_function :LGBM_BoosterSaveModelToString, %i[pointer int int int64 pointer pointer], :int
35
+ attach_function :LGBM_BoosterDumpModel, %i[pointer int int int64 pointer pointer], :int
36
+ attach_function :LGBM_BoosterFeatureImportance, %i[pointer int int pointer], :int
12
37
  end
13
38
  end
@@ -0,0 +1,19 @@
1
+ module LightGBM
2
+ module Utils
3
+ private
4
+
5
+ def check_result(err)
6
+ raise LightGBM::Error, FFI.LGBM_GetLastError if err != 0
7
+ end
8
+
9
+ # remove spaces in keys and values to prevent injection
10
+ def params_str(params)
11
+ (params || {}).map { |k, v| [check_param(k.to_s), check_param(v.to_s)].join("=") }.join(" ")
12
+ end
13
+
14
+ def check_param(v)
15
+ raise ArgumentError, "Invalid parameter" if /[[:space:]]/.match(v)
16
+ v
17
+ end
18
+ end
19
+ end
@@ -1,3 +1,3 @@
1
1
  module LightGBM
2
- VERSION = "0.1.0"
2
+ VERSION = "0.1.1"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lightgbm
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
@@ -76,7 +76,9 @@ files:
76
76
  - README.md
77
77
  - lib/lightgbm.rb
78
78
  - lib/lightgbm/booster.rb
79
+ - lib/lightgbm/dataset.rb
79
80
  - lib/lightgbm/ffi.rb
81
+ - lib/lightgbm/utils.rb
80
82
  - lib/lightgbm/version.rb
81
83
  homepage: https://github.com/ankane/lightgbm
82
84
  licenses: