xgb 0.3.0 → 0.5.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e9ad2900e477bc8e8fa5aca236a49fb4d41d8ec28b3c975049b16023a89cfe60
4
- data.tar.gz: 3228781e80b95310a5c874b27fd69ab29ec875e79daf1bdbe99d09178c1ed2b2
3
+ metadata.gz: 73bd02bae172bccd30215402bc58a761f2f39c46002c6fee42d429e052470861
4
+ data.tar.gz: 2773ef6f7e0cb2cd2e71cf5c9329beb08aaca79806e1e359b448b78b684273c0
5
5
  SHA512:
6
- metadata.gz: 228b317e43933701e2b9d3bdd0a1fc9c8781e64e2ec494b63a8be3efa5487350b022b64bcaffb4088e7e24c5698c39f23864248a87277e63d07ed7bd17573a09
7
- data.tar.gz: f8cb92f5f10b20bb7342ee126a75cfb44fc85b0bd01e941b2031ad5b957be9c5c6060b22737ac04584f51eb0ed41668431c849a0417484358496606abf3690de
6
+ metadata.gz: 04e3534c0a6cb921016a39b9331fea803ff8a54c5b9c16ea982f7f9cbde8503f51b088e2a77148ca516c215c505e7081b89e026cc7d230a607bbc03fc8a6873d
7
+ data.tar.gz: 38252580d26db1407d7e843b8c88210d3f65641bc9b0359d3318d22fc94fef8be834fba69f30e7cb35c24d8c364fb3ab18a0dc67f0141c73b30593c3a0154dd4
data/CHANGELOG.md CHANGED
@@ -1,3 +1,28 @@
1
+ ## 0.5.1 (2021-02-08)
2
+
3
+ - Fixed error with validation sets without early stopping
4
+
5
+ ## 0.5.0 (2020-12-12)
6
+
7
+ - Updated XGBoost to 1.3.0
8
+
9
+ ## 0.4.1 (2020-08-26)
10
+
11
+ - Updated XGBoost to 1.2.0
12
+
13
+ ## 0.4.0 (2020-05-17)
14
+
15
+ - Updated XGBoost to 1.1.0
16
+ - Changed default `learning_rate` and `max_depth` for Scikit-Learn API to match Python
17
+ - Added support for Rover
18
+ - Improved performance of Numo datasets
19
+ - Improved error message when OpenMP not found on Mac
20
+
21
+ ## 0.3.1 (2020-04-16)
22
+
23
+ - Added `feature_names` and `feature_types` to `DMatrix`
24
+ - Added feature names to `dump`
25
+
1
26
  ## 0.3.0 (2020-02-19)
2
27
 
3
28
  - Updated XGBoost to 1.0.0
data/NOTICE.txt CHANGED
@@ -1,4 +1,5 @@
1
- Copyright 2019-2020 Andrew Kane
1
+ Copyright XGBoost contributors
2
+ Copyright 2019-2021 Andrew Kane
2
3
 
3
4
  Licensed under the Apache License, Version 2.0 (the "License");
4
5
  you may not use this file except in compliance with the License.
data/README.md CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  [XGBoost](https://github.com/dmlc/xgboost) - high performance gradient boosting - for Ruby
4
4
 
5
- [![Build Status](https://travis-ci.org/ankane/xgboost.svg?branch=master)](https://travis-ci.org/ankane/xgboost) [![Build status](https://ci.appveyor.com/api/projects/status/s8umwyuahvj68m6p/branch/master?svg=true)](https://ci.appveyor.com/project/ankane/xgboost/branch/master)
5
+ [![Build Status](https://github.com/ankane/xgboost/workflows/build/badge.svg?branch=master)](https://github.com/ankane/xgboost/actions)
6
6
 
7
7
  ## Installation
8
8
 
@@ -137,16 +137,22 @@ Data can be an array of arrays
137
137
  [[1, 2, 3], [4, 5, 6]]
138
138
  ```
139
139
 
140
- Or a Daru data frame
140
+ Or a Numo array
141
141
 
142
142
  ```ruby
143
- Daru::DataFrame.from_csv("houses.csv")
143
+ Numo::NArray.cast([[1, 2, 3], [4, 5, 6]])
144
+ ```
145
+
146
+ Or a Rover data frame
147
+
148
+ ```ruby
149
+ Rover.read_csv("houses.csv")
144
150
  ```
145
151
 
146
- Or a Numo NArray
152
+ Or a Daru data frame
147
153
 
148
154
  ```ruby
149
- Numo::DFloat.new(3, 2).seq
155
+ Daru::DataFrame.from_csv("houses.csv")
150
156
  ```
151
157
 
152
158
  ## Helpful Resources
data/lib/xgboost.rb CHANGED
@@ -31,7 +31,8 @@ module XGBoost
31
31
  booster = Booster.new(params: params)
32
32
  num_feature = dtrain.num_col
33
33
  booster.set_param("num_feature", num_feature)
34
- booster.feature_names = num_feature.times.map { |i| "f#{i}" }
34
+ booster.feature_names = dtrain.feature_names
35
+ booster.feature_types = dtrain.feature_types
35
36
  evals ||= []
36
37
 
37
38
  if early_stopping_rounds
@@ -60,7 +61,7 @@ module XGBoost
60
61
  best_score = score
61
62
  best_iter = iteration
62
63
  best_message = message
63
- elsif iteration - best_iter >= early_stopping_rounds
64
+ elsif early_stopping_rounds && iteration - best_iter >= early_stopping_rounds
64
65
  booster.best_iteration = best_iter
65
66
  puts "Stopping. Best iteration:\n#{best_message}" if verbose_eval
66
67
  break
@@ -1,6 +1,6 @@
1
1
  module XGBoost
2
2
  class Booster
3
- attr_accessor :best_iteration, :feature_names
3
+ attr_accessor :best_iteration, :feature_names, :feature_types
4
4
 
5
5
  def initialize(params: nil, model_file: nil)
6
6
  @handle = ::FFI::MemoryPointer.new(:pointer)
@@ -25,11 +25,8 @@ module XGBoost
25
25
  end
26
26
 
27
27
  def eval_set(evals, iteration)
28
- dmats = ::FFI::MemoryPointer.new(:pointer, evals.size)
29
- dmats.write_array_of_pointer(evals.map { |v| v[0].handle_pointer })
30
-
31
- evnames = ::FFI::MemoryPointer.new(:pointer, evals.size)
32
- evnames.write_array_of_pointer(evals.map { |v| ::FFI::MemoryPointer.from_string(v[1]) })
28
+ dmats = array_of_pointers(evals.map { |v| v[0].handle_pointer })
29
+ evnames = array_of_pointers(evals.map { |v| string_pointer(v[1]) })
33
30
 
34
31
  out_result = ::FFI::MemoryPointer.new(:pointer)
35
32
 
@@ -67,7 +64,13 @@ module XGBoost
67
64
  def dump(fmap: "", with_stats: false, dump_format: "text")
68
65
  out_len = ::FFI::MemoryPointer.new(:uint64)
69
66
  out_result = ::FFI::MemoryPointer.new(:pointer)
70
- check_result FFI.XGBoosterDumpModelEx(handle_pointer, fmap, with_stats ? 1 : 0, dump_format, out_len, out_result)
67
+
68
+ names = feature_names || []
69
+ fnames = array_of_pointers(names.map { |fname| string_pointer(fname) })
70
+ ftypes = array_of_pointers(feature_types || Array.new(names.size, string_pointer("float")))
71
+
72
+ check_result FFI.XGBoosterDumpModelExWithFeatures(handle_pointer, names.size, fnames, ftypes, with_stats ? 1 : 0, dump_format, out_len, out_result)
73
+
71
74
  out_result.read_pointer.get_array_of_string(0, read_uint64(out_len))
72
75
  end
73
76
 
@@ -155,7 +158,7 @@ module XGBoost
155
158
  end
156
159
 
157
160
  def [](key_name)
158
- key = ::FFI::MemoryPointer.from_string(key_name)
161
+ key = string_pointer(key_name)
159
162
  success = ::FFI::MemoryPointer.new(:int)
160
163
  out_result = ::FFI::MemoryPointer.new(:pointer)
161
164
 
@@ -165,8 +168,8 @@ module XGBoost
165
168
  end
166
169
 
167
170
  def []=(key_name, raw_value)
168
- key = ::FFI::MemoryPointer.from_string(key_name)
169
- value = raw_value.nil? ? nil : ::FFI::MemoryPointer.from_string(raw_value)
171
+ key = string_pointer(key_name)
172
+ value = raw_value.nil? ? nil : string_pointer(raw_value)
170
173
 
171
174
  check_result FFI.XGBoosterSetAttr(handle_pointer, key, value)
172
175
  end
@@ -188,6 +191,14 @@ module XGBoost
188
191
  @handle.read_pointer
189
192
  end
190
193
 
194
+ def array_of_pointers(values)
195
+ ::FFI::MemoryPointer.new(:pointer, values.size).write_array_of_pointer(values)
196
+ end
197
+
198
+ def string_pointer(value)
199
+ ::FFI::MemoryPointer.from_string(value.to_s)
200
+ end
201
+
191
202
  include Utils
192
203
  end
193
204
  end
@@ -1,6 +1,6 @@
1
1
  module XGBoost
2
2
  class Classifier < Model
3
- def initialize(max_depth: 3, learning_rate: 0.1, n_estimators: 100, objective: "binary:logistic", importance_type: "gain", **options)
3
+ def initialize(n_estimators: 100, objective: "binary:logistic", importance_type: "gain", **options)
4
4
  super
5
5
  end
6
6
 
@@ -1,6 +1,6 @@
1
1
  module XGBoost
2
2
  class DMatrix
3
- attr_reader :data
3
+ attr_reader :data, :feature_names, :feature_types
4
4
 
5
5
  def initialize(data, label: nil, weight: nil, missing: Float::NAN)
6
6
  @data = data
@@ -15,21 +15,42 @@ module XGBoost
15
15
  elsif daru?(data)
16
16
  nrow, ncol = data.shape
17
17
  flat_data = data.map_rows(&:to_a).flatten
18
- elsif narray?(data)
18
+ @feature_names = data.each_vector.map(&:name)
19
+ @feature_types =
20
+ data.each_vector.map(&:db_type).map do |v|
21
+ case v
22
+ when "INTEGER"
23
+ "int"
24
+ when "DOUBLE"
25
+ "float"
26
+ else
27
+ raise Error, "Unknown feature type: #{v}"
28
+ end
29
+ end
30
+ elsif numo?(data)
19
31
  nrow, ncol = data.shape
20
- flat_data = data.flatten.to_a
32
+ elsif rover?(data)
33
+ nrow, ncol = data.shape
34
+ @feature_names = data.keys
35
+ data = data.to_numo
21
36
  else
22
37
  nrow = data.count
23
38
  ncol = data.first.count
24
39
  flat_data = data.flatten
25
40
  end
26
41
 
27
- handle_missing(flat_data, missing)
28
42
  c_data = ::FFI::MemoryPointer.new(:float, nrow * ncol)
29
- c_data.write_array_of_float(flat_data)
43
+ if numo?(data)
44
+ c_data.write_bytes(data.cast_to(Numo::SFloat).to_string)
45
+ else
46
+ handle_missing(flat_data, missing)
47
+ c_data.write_array_of_float(flat_data)
48
+ end
30
49
  check_result FFI.XGDMatrixCreateFromMat(c_data, nrow, ncol, missing, @handle)
31
50
 
32
51
  ObjectSpace.define_finalizer(self, self.class.finalize(handle_pointer))
52
+
53
+ @feature_names ||= ncol.times.map { |i| "f#{i}" }
33
54
  end
34
55
 
35
56
  self.label = label if label
@@ -120,10 +141,14 @@ module XGBoost
120
141
  defined?(Daru::DataFrame) && data.is_a?(Daru::DataFrame)
121
142
  end
122
143
 
123
- def narray?(data)
144
+ def numo?(data)
124
145
  defined?(Numo::NArray) && data.is_a?(Numo::NArray)
125
146
  end
126
147
 
148
+ def rover?(data)
149
+ defined?(Rover::DataFrame) && data.is_a?(Rover::DataFrame)
150
+ end
151
+
127
152
  def handle_missing(data, missing)
128
153
  data.map! { |v| v.nil? ? missing : v }
129
154
  end
data/lib/xgboost/ffi.rb CHANGED
@@ -2,7 +2,15 @@ module XGBoost
2
2
  module FFI
3
3
  extend ::FFI::Library
4
4
 
5
- ffi_lib XGBoost.ffi_lib
5
+ begin
6
+ ffi_lib XGBoost.ffi_lib
7
+ rescue LoadError => e
8
+ if e.message.include?("Library not loaded: /usr/local/opt/libomp/lib/libomp.dylib") && e.message.include?("Reason: image not found")
9
+ raise LoadError, "OpenMP not found. Run `brew install libomp`"
10
+ else
11
+ raise e
12
+ end
13
+ end
6
14
 
7
15
  # https://github.com/dmlc/xgboost/blob/master/include/xgboost/c_api.h
8
16
  # keep same order
@@ -31,7 +39,7 @@ module XGBoost
31
39
  attach_function :XGBoosterPredict, %i[pointer pointer int int int pointer pointer], :int
32
40
  attach_function :XGBoosterLoadModel, %i[pointer string], :int
33
41
  attach_function :XGBoosterSaveModel, %i[pointer string], :int
34
- attach_function :XGBoosterDumpModelEx, %i[pointer string int string pointer pointer], :int
42
+ attach_function :XGBoosterDumpModelExWithFeatures, %i[pointer int pointer pointer int string pointer pointer], :int
35
43
  attach_function :XGBoosterGetAttr, %i[pointer pointer pointer pointer], :int
36
44
  attach_function :XGBoosterSetAttr, %i[pointer pointer pointer], :int
37
45
  attach_function :XGBoosterGetAttrNames, %i[pointer pointer pointer], :int
@@ -1,6 +1,6 @@
1
1
  module XGBoost
2
2
  class Regressor < Model
3
- def initialize(max_depth: 3, learning_rate: 0.1, n_estimators: 100, objective: "reg:squarederror", importance_type: "gain", **options)
3
+ def initialize(n_estimators: 100, objective: "reg:squarederror", importance_type: "gain", **options)
4
4
  super
5
5
  end
6
6
 
@@ -1,3 +1,3 @@
1
1
  module XGBoost
2
- VERSION = "0.3.0"
2
+ VERSION = "0.5.1"
3
3
  end
Binary file
data/vendor/libxgboost.so CHANGED
Binary file
data/vendor/xgboost.dll CHANGED
Binary file
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: xgb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.5.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-02-20 00:00:00.000000000 Z
11
+ date: 2021-02-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ffi
@@ -24,78 +24,8 @@ dependencies:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
26
  version: '0'
27
- - !ruby/object:Gem::Dependency
28
- name: bundler
29
- requirement: !ruby/object:Gem::Requirement
30
- requirements:
31
- - - ">="
32
- - !ruby/object:Gem::Version
33
- version: '0'
34
- type: :development
35
- prerelease: false
36
- version_requirements: !ruby/object:Gem::Requirement
37
- requirements:
38
- - - ">="
39
- - !ruby/object:Gem::Version
40
- version: '0'
41
- - !ruby/object:Gem::Dependency
42
- name: rake
43
- requirement: !ruby/object:Gem::Requirement
44
- requirements:
45
- - - ">="
46
- - !ruby/object:Gem::Version
47
- version: '0'
48
- type: :development
49
- prerelease: false
50
- version_requirements: !ruby/object:Gem::Requirement
51
- requirements:
52
- - - ">="
53
- - !ruby/object:Gem::Version
54
- version: '0'
55
- - !ruby/object:Gem::Dependency
56
- name: minitest
57
- requirement: !ruby/object:Gem::Requirement
58
- requirements:
59
- - - ">="
60
- - !ruby/object:Gem::Version
61
- version: '5'
62
- type: :development
63
- prerelease: false
64
- version_requirements: !ruby/object:Gem::Requirement
65
- requirements:
66
- - - ">="
67
- - !ruby/object:Gem::Version
68
- version: '5'
69
- - !ruby/object:Gem::Dependency
70
- name: daru
71
- requirement: !ruby/object:Gem::Requirement
72
- requirements:
73
- - - ">="
74
- - !ruby/object:Gem::Version
75
- version: '0'
76
- type: :development
77
- prerelease: false
78
- version_requirements: !ruby/object:Gem::Requirement
79
- requirements:
80
- - - ">="
81
- - !ruby/object:Gem::Version
82
- version: '0'
83
- - !ruby/object:Gem::Dependency
84
- name: numo-narray
85
- requirement: !ruby/object:Gem::Requirement
86
- requirements:
87
- - - ">="
88
- - !ruby/object:Gem::Version
89
- version: '0'
90
- type: :development
91
- prerelease: false
92
- version_requirements: !ruby/object:Gem::Requirement
93
- requirements:
94
- - - ">="
95
- - !ruby/object:Gem::Version
96
- version: '0'
97
- description:
98
- email: andrew@chartkick.com
27
+ description:
28
+ email: andrew@ankane.org
99
29
  executables: []
100
30
  extensions: []
101
31
  extra_rdoc_files: []
@@ -123,7 +53,7 @@ homepage: https://github.com/ankane/xgboost
123
53
  licenses:
124
54
  - Apache-2.0
125
55
  metadata: {}
126
- post_install_message:
56
+ post_install_message:
127
57
  rdoc_options: []
128
58
  require_paths:
129
59
  - lib
@@ -138,8 +68,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
138
68
  - !ruby/object:Gem::Version
139
69
  version: '0'
140
70
  requirements: []
141
- rubygems_version: 3.1.2
142
- signing_key:
71
+ rubygems_version: 3.2.3
72
+ signing_key:
143
73
  specification_version: 4
144
74
  summary: High performance gradient boosting for Ruby
145
75
  test_files: []