xgb 0.2.1 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 79a40c703992619e8f834ff11e8eff704b96bf5eb0af8a47b4d0ab410cb68fff
4
- data.tar.gz: a8924ce9e90ee78d7ad4c98ae659f83e68155dfb5c1e3fcbed4797053c266922
3
+ metadata.gz: 0afc5631e2925f19ef4da234aa20aca2247806af0823c7e7e7267424edd23973
4
+ data.tar.gz: 7a7a6b9e9860f4ebbf8ee5ddcff133f5614482b85fb93b0594864c18a0c1daf9
5
5
  SHA512:
6
- metadata.gz: 2401d7accb4a2709c0ee0a9fd76b451bff641775fbe16f884de7b3a1288c7e210b1e3ea1b7b76734f8ff29aa9eac59ab161bb06b92340c7e7744ddf58fb8a8ae
7
- data.tar.gz: a256ab14fa84e7b10f54b7f465ccf0b261c1e7854ae85e1715e13e8d591a4fa756d5a2892e4783e2549cf6d9f29ab061438035ba305f7ab23e86341d53effb4a
6
+ metadata.gz: e886ffe5ff3d34055ca5e071fbabf7306cdcb3456513c342300f1ad65211bde7322db163e81c1a3a92e122c58d9b241388e5bfba00c11bc89c4cc056707a8bcd
7
+ data.tar.gz: e422b88f36d1a23b429cc44d1c496707272d176884d29265057878bfd0e3010cf4348730746d99cfc1ca7e6f42a858c697d4233c39e186df10c2d61df2280097
@@ -1,3 +1,28 @@
1
+ ## 0.5.0 (2020-12-12)
2
+
3
+ - Updated XGBoost to 1.3.0
4
+
5
+ ## 0.4.1 (2020-08-26)
6
+
7
+ - Updated XGBoost to 1.2.0
8
+
9
+ ## 0.4.0 (2020-05-17)
10
+
11
+ - Updated XGBoost to 1.1.0
12
+ - Changed default `learning_rate` and `max_depth` for Scikit-Learn API to match Python
13
+ - Added support for Rover
14
+ - Improved performance of Numo datasets
15
+ - Improved error message when OpenMP not found on Mac
16
+
17
+ ## 0.3.1 (2020-04-16)
18
+
19
+ - Added `feature_names` and `feature_types` to `DMatrix`
20
+ - Added feature names to `dump`
21
+
22
+ ## 0.3.0 (2020-02-19)
23
+
24
+ - Updated XGBoost to 1.0.0
25
+
1
26
  ## 0.2.1 (2020-02-11)
2
27
 
3
28
  - Fixed `Could not find XGBoost` error on some Linux platforms
data/NOTICE.txt CHANGED
@@ -1,3 +1,4 @@
1
+ Copyright XGBoost contributors
1
2
  Copyright 2019-2020 Andrew Kane
2
3
 
3
4
  Licensed under the Apache License, Version 2.0 (the "License");
data/README.md CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  [XGBoost](https://github.com/dmlc/xgboost) - high performance gradient boosting - for Ruby
4
4
 
5
- [![Build Status](https://travis-ci.org/ankane/xgboost.svg?branch=master)](https://travis-ci.org/ankane/xgboost) [![Build status](https://ci.appveyor.com/api/projects/status/s8umwyuahvj68m6p/branch/master?svg=true)](https://ci.appveyor.com/project/ankane/xgboost/branch/master)
5
+ [![Build Status](https://github.com/ankane/xgboost/workflows/build/badge.svg?branch=master)](https://github.com/ankane/xgboost/actions)
6
6
 
7
7
  ## Installation
8
8
 
@@ -12,6 +12,12 @@ Add this line to your application’s Gemfile:
12
12
  gem 'xgb'
13
13
  ```
14
14
 
15
+ On Mac, also install OpenMP:
16
+
17
+ ```sh
18
+ brew install libomp
19
+ ```
20
+
15
21
  ## Learning API
16
22
 
17
23
  Prep your data
@@ -131,16 +137,22 @@ Data can be an array of arrays
131
137
  [[1, 2, 3], [4, 5, 6]]
132
138
  ```
133
139
 
134
- Or a Daru data frame
140
+ Or a Numo array
135
141
 
136
142
  ```ruby
137
- Daru::DataFrame.from_csv("houses.csv")
143
+ Numo::NArray.cast([[1, 2, 3], [4, 5, 6]])
138
144
  ```
139
145
 
140
- Or a Numo NArray
146
+ Or a Rover data frame
141
147
 
142
148
  ```ruby
143
- Numo::DFloat.new(3, 2).seq
149
+ Rover.read_csv("houses.csv")
150
+ ```
151
+
152
+ Or a Daru data frame
153
+
154
+ ```ruby
155
+ Daru::DataFrame.from_csv("houses.csv")
144
156
  ```
145
157
 
146
158
  ## Helpful Resources
@@ -31,7 +31,8 @@ module XGBoost
31
31
  booster = Booster.new(params: params)
32
32
  num_feature = dtrain.num_col
33
33
  booster.set_param("num_feature", num_feature)
34
- booster.feature_names = num_feature.times.map { |i| "f#{i}" }
34
+ booster.feature_names = dtrain.feature_names
35
+ booster.feature_types = dtrain.feature_types
35
36
  evals ||= []
36
37
 
37
38
  if early_stopping_rounds
@@ -156,6 +157,14 @@ module XGBoost
156
157
  eval_hist
157
158
  end
158
159
 
160
+ def lib_version
161
+ major = ::FFI::MemoryPointer.new(:int)
162
+ minor = ::FFI::MemoryPointer.new(:int)
163
+ patch = ::FFI::MemoryPointer.new(:int)
164
+ FFI.XGBoostVersion(major, minor, patch)
165
+ "#{major.read_int}.#{minor.read_int}.#{patch.read_int}"
166
+ end
167
+
159
168
  private
160
169
 
161
170
  def mean(arr)
@@ -1,6 +1,6 @@
1
1
  module XGBoost
2
2
  class Booster
3
- attr_accessor :best_iteration, :feature_names
3
+ attr_accessor :best_iteration, :feature_names, :feature_types
4
4
 
5
5
  def initialize(params: nil, model_file: nil)
6
6
  @handle = ::FFI::MemoryPointer.new(:pointer)
@@ -25,11 +25,8 @@ module XGBoost
25
25
  end
26
26
 
27
27
  def eval_set(evals, iteration)
28
- dmats = ::FFI::MemoryPointer.new(:pointer, evals.size)
29
- dmats.write_array_of_pointer(evals.map { |v| v[0].handle_pointer })
30
-
31
- evnames = ::FFI::MemoryPointer.new(:pointer, evals.size)
32
- evnames.write_array_of_pointer(evals.map { |v| ::FFI::MemoryPointer.from_string(v[1]) })
28
+ dmats = array_of_pointers(evals.map { |v| v[0].handle_pointer })
29
+ evnames = array_of_pointers(evals.map { |v| string_pointer(v[1]) })
33
30
 
34
31
  out_result = ::FFI::MemoryPointer.new(:pointer)
35
32
 
@@ -52,7 +49,7 @@ module XGBoost
52
49
  ntree_limit ||= 0
53
50
  out_len = ::FFI::MemoryPointer.new(:uint64)
54
51
  out_result = ::FFI::MemoryPointer.new(:pointer)
55
- check_result FFI.XGBoosterPredict(handle_pointer, data.handle_pointer, 0, ntree_limit, out_len, out_result)
52
+ check_result FFI.XGBoosterPredict(handle_pointer, data.handle_pointer, 0, ntree_limit, 0, out_len, out_result)
56
53
  out = out_result.read_pointer.read_array_of_float(read_uint64(out_len))
57
54
  num_class = out.size / data.num_row
58
55
  out = out.each_slice(num_class).to_a if num_class > 1
@@ -67,7 +64,13 @@ module XGBoost
67
64
  def dump(fmap: "", with_stats: false, dump_format: "text")
68
65
  out_len = ::FFI::MemoryPointer.new(:uint64)
69
66
  out_result = ::FFI::MemoryPointer.new(:pointer)
70
- check_result FFI.XGBoosterDumpModelEx(handle_pointer, fmap, with_stats ? 1 : 0, dump_format, out_len, out_result)
67
+
68
+ names = feature_names || []
69
+ fnames = array_of_pointers(names.map { |fname| string_pointer(fname) })
70
+ ftypes = array_of_pointers(feature_types || Array.new(names.size, string_pointer("float")))
71
+
72
+ check_result FFI.XGBoosterDumpModelExWithFeatures(handle_pointer, names.size, fnames, ftypes, with_stats ? 1 : 0, dump_format, out_len, out_result)
73
+
71
74
  out_result.read_pointer.get_array_of_string(0, read_uint64(out_len))
72
75
  end
73
76
 
@@ -155,7 +158,7 @@ module XGBoost
155
158
  end
156
159
 
157
160
  def [](key_name)
158
- key = ::FFI::MemoryPointer.from_string(key_name)
161
+ key = string_pointer(key_name)
159
162
  success = ::FFI::MemoryPointer.new(:int)
160
163
  out_result = ::FFI::MemoryPointer.new(:pointer)
161
164
 
@@ -165,8 +168,8 @@ module XGBoost
165
168
  end
166
169
 
167
170
  def []=(key_name, raw_value)
168
- key = ::FFI::MemoryPointer.from_string(key_name)
169
- value = raw_value.nil? ? nil : ::FFI::MemoryPointer.from_string(raw_value)
171
+ key = string_pointer(key_name)
172
+ value = raw_value.nil? ? nil : string_pointer(raw_value)
170
173
 
171
174
  check_result FFI.XGBoosterSetAttr(handle_pointer, key, value)
172
175
  end
@@ -188,6 +191,14 @@ module XGBoost
188
191
  @handle.read_pointer
189
192
  end
190
193
 
194
+ def array_of_pointers(values)
195
+ ::FFI::MemoryPointer.new(:pointer, values.size).write_array_of_pointer(values)
196
+ end
197
+
198
+ def string_pointer(value)
199
+ ::FFI::MemoryPointer.from_string(value.to_s)
200
+ end
201
+
191
202
  include Utils
192
203
  end
193
204
  end
@@ -1,6 +1,6 @@
1
1
  module XGBoost
2
2
  class Classifier < Model
3
- def initialize(max_depth: 3, learning_rate: 0.1, n_estimators: 100, objective: "binary:logistic", importance_type: "gain", **options)
3
+ def initialize(n_estimators: 100, objective: "binary:logistic", importance_type: "gain", **options)
4
4
  super
5
5
  end
6
6
 
@@ -1,6 +1,6 @@
1
1
  module XGBoost
2
2
  class DMatrix
3
- attr_reader :data
3
+ attr_reader :data, :feature_names, :feature_types
4
4
 
5
5
  def initialize(data, label: nil, weight: nil, missing: Float::NAN)
6
6
  @data = data
@@ -15,21 +15,42 @@ module XGBoost
15
15
  elsif daru?(data)
16
16
  nrow, ncol = data.shape
17
17
  flat_data = data.map_rows(&:to_a).flatten
18
- elsif narray?(data)
18
+ @feature_names = data.each_vector.map(&:name)
19
+ @feature_types =
20
+ data.each_vector.map(&:db_type).map do |v|
21
+ case v
22
+ when "INTEGER"
23
+ "int"
24
+ when "DOUBLE"
25
+ "float"
26
+ else
27
+ raise Error, "Unknown feature type: #{v}"
28
+ end
29
+ end
30
+ elsif numo?(data)
19
31
  nrow, ncol = data.shape
20
- flat_data = data.flatten.to_a
32
+ elsif rover?(data)
33
+ nrow, ncol = data.shape
34
+ @feature_names = data.keys
35
+ data = data.to_numo
21
36
  else
22
37
  nrow = data.count
23
38
  ncol = data.first.count
24
39
  flat_data = data.flatten
25
40
  end
26
41
 
27
- handle_missing(flat_data, missing)
28
42
  c_data = ::FFI::MemoryPointer.new(:float, nrow * ncol)
29
- c_data.write_array_of_float(flat_data)
43
+ if numo?(data)
44
+ c_data.write_bytes(data.cast_to(Numo::SFloat).to_string)
45
+ else
46
+ handle_missing(flat_data, missing)
47
+ c_data.write_array_of_float(flat_data)
48
+ end
30
49
  check_result FFI.XGDMatrixCreateFromMat(c_data, nrow, ncol, missing, @handle)
31
50
 
32
51
  ObjectSpace.define_finalizer(self, self.class.finalize(handle_pointer))
52
+
53
+ @feature_names ||= ncol.times.map { |i| "f#{i}" }
33
54
  end
34
55
 
35
56
  self.label = label if label
@@ -60,7 +81,7 @@ module XGBoost
60
81
  def group=(group)
61
82
  c_data = ::FFI::MemoryPointer.new(:int, group.size)
62
83
  c_data.write_array_of_int(group)
63
- check_result FFI.XGDMatrixSetGroup(handle_pointer, c_data, group.size)
84
+ check_result FFI.XGDMatrixSetUIntInfo(handle_pointer, "group", c_data, group.size)
64
85
  end
65
86
 
66
87
  def num_row
@@ -120,10 +141,14 @@ module XGBoost
120
141
  defined?(Daru::DataFrame) && data.is_a?(Daru::DataFrame)
121
142
  end
122
143
 
123
- def narray?(data)
144
+ def numo?(data)
124
145
  defined?(Numo::NArray) && data.is_a?(Numo::NArray)
125
146
  end
126
147
 
148
+ def rover?(data)
149
+ defined?(Rover::DataFrame) && data.is_a?(Rover::DataFrame)
150
+ end
151
+
127
152
  def handle_missing(data, missing)
128
153
  data.map! { |v| v.nil? ? missing : v }
129
154
  end
@@ -2,17 +2,26 @@ module XGBoost
2
2
  module FFI
3
3
  extend ::FFI::Library
4
4
 
5
- ffi_lib XGBoost.ffi_lib
5
+ begin
6
+ ffi_lib XGBoost.ffi_lib
7
+ rescue LoadError => e
8
+ if e.message.include?("Library not loaded: /usr/local/opt/libomp/lib/libomp.dylib") && e.message.include?("Reason: image not found")
9
+ raise LoadError, "OpenMP not found. Run `brew install libomp`"
10
+ else
11
+ raise e
12
+ end
13
+ end
6
14
 
7
15
  # https://github.com/dmlc/xgboost/blob/master/include/xgboost/c_api.h
8
16
  # keep same order
9
17
 
10
- # error
18
+ # general
19
+ attach_function :XGBoostVersion, %i[pointer pointer pointer], :void
11
20
  attach_function :XGBGetLastError, %i[], :string
12
21
 
13
22
  # dmatrix
14
23
  attach_function :XGDMatrixCreateFromMat, %i[pointer uint64 uint64 float pointer], :int
15
- attach_function :XGDMatrixSetGroup, %i[pointer pointer uint64], :int
24
+ attach_function :XGDMatrixSetUIntInfo, %i[pointer string pointer uint64], :int
16
25
  attach_function :XGDMatrixNumRow, %i[pointer pointer], :int
17
26
  attach_function :XGDMatrixNumCol, %i[pointer pointer], :int
18
27
  attach_function :XGDMatrixSliceDMatrix, %i[pointer pointer uint64 pointer], :int
@@ -27,10 +36,10 @@ module XGBoost
27
36
  attach_function :XGBoosterEvalOneIter, %i[pointer int pointer pointer uint64 pointer], :int
28
37
  attach_function :XGBoosterFree, %i[pointer], :int
29
38
  attach_function :XGBoosterSetParam, %i[pointer string string], :int
30
- attach_function :XGBoosterPredict, %i[pointer pointer int int pointer pointer], :int
39
+ attach_function :XGBoosterPredict, %i[pointer pointer int int int pointer pointer], :int
31
40
  attach_function :XGBoosterLoadModel, %i[pointer string], :int
32
41
  attach_function :XGBoosterSaveModel, %i[pointer string], :int
33
- attach_function :XGBoosterDumpModelEx, %i[pointer string int string pointer pointer], :int
42
+ attach_function :XGBoosterDumpModelExWithFeatures, %i[pointer int pointer pointer int string pointer pointer], :int
34
43
  attach_function :XGBoosterGetAttr, %i[pointer pointer pointer pointer], :int
35
44
  attach_function :XGBoosterSetAttr, %i[pointer pointer pointer], :int
36
45
  attach_function :XGBoosterGetAttrNames, %i[pointer pointer pointer], :int
@@ -2,12 +2,8 @@ module XGBoost
2
2
  class Model
3
3
  attr_reader :booster
4
4
 
5
- def initialize(max_depth: 3, learning_rate: 0.1, n_estimators: 100, objective: nil, importance_type: "gain", **options)
6
- @params = {
7
- max_depth: max_depth,
8
- objective: objective,
9
- learning_rate: learning_rate
10
- }.merge(options)
5
+ def initialize(n_estimators: 100, importance_type: "gain", **options)
6
+ @params = options
11
7
  @n_estimators = n_estimators
12
8
  @importance_type = importance_type
13
9
  end
@@ -1,6 +1,6 @@
1
1
  module XGBoost
2
2
  class Ranker < Model
3
- def initialize(max_depth: 3, learning_rate: 0.1, n_estimators: 100, objective: "rank:pairwise", importance_type: "gain", **options)
3
+ def initialize(n_estimators: 100, objective: "rank:pairwise", importance_type: "gain", **options)
4
4
  super
5
5
  end
6
6
 
@@ -1,6 +1,6 @@
1
1
  module XGBoost
2
2
  class Regressor < Model
3
- def initialize(max_depth: 3, learning_rate: 0.1, n_estimators: 100, objective: "reg:squarederror", importance_type: "gain", **options)
3
+ def initialize(n_estimators: 100, objective: "reg:squarederror", importance_type: "gain", **options)
4
4
  super
5
5
  end
6
6
 
@@ -1,3 +1,3 @@
1
1
  module XGBoost
2
- VERSION = "0.2.1"
2
+ VERSION = "0.5.0"
3
3
  end
Binary file
Binary file
Binary file
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: xgb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-02-12 00:00:00.000000000 Z
11
+ date: 2020-12-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ffi
@@ -94,7 +94,21 @@ dependencies:
94
94
  - - ">="
95
95
  - !ruby/object:Gem::Version
96
96
  version: '0'
97
- description:
97
+ - !ruby/object:Gem::Dependency
98
+ name: rover-df
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ description:
98
112
  email: andrew@chartkick.com
99
113
  executables: []
100
114
  extensions: []
@@ -123,7 +137,7 @@ homepage: https://github.com/ankane/xgboost
123
137
  licenses:
124
138
  - Apache-2.0
125
139
  metadata: {}
126
- post_install_message:
140
+ post_install_message:
127
141
  rdoc_options: []
128
142
  require_paths:
129
143
  - lib
@@ -138,8 +152,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
138
152
  - !ruby/object:Gem::Version
139
153
  version: '0'
140
154
  requirements: []
141
- rubygems_version: 3.1.2
142
- signing_key:
155
+ rubygems_version: 3.1.4
156
+ signing_key:
143
157
  specification_version: 4
144
158
  summary: High performance gradient boosting for Ruby
145
159
  test_files: []