RubyGems - xgb - Versions diffs - 0.1.1 → 0.1.2 - Mend

xgb 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 1bb50395d579da91b18754bc75e780cbb2e98fd7a48a17c34514230d1c4828d1
-  data.tar.gz: 3d2f9c5a72c63c2622a973805c9f2caa9bd4de7b5c67f8c4b5445fd9a71993c3
+  metadata.gz: 148980f8a4991f1f98cd1740188e763a3bd96c98bc69b13a9de9aa00132a12f1
+  data.tar.gz: 31d90a3a064d032a7d1f371c6928f11b103bfc6f9c92dd1697cc538ef33f15fa
 SHA512:
-  metadata.gz: f141b3ea0b6ceb8549198fd6ad8a07f6947201409478fc4829fe625da376e40d8028427a5aa34191b565aa275d27bb03e2082bb8fc489f6da6a2a09b3bbf2c2f
-  data.tar.gz: c393f4fdbe240ffc14b64f22f17d149ed393070fb0752f9ec49dd94bcfa88f446ea21bc5bf9a96bef7759c5c47033dd0480a4001d477b8c487cf5dcf8be19b81
+  metadata.gz: 28fb08d373af3a3b198822ddea958ca0ee433145dbca091583b25945f27a048a3ec4a4ec43d88dc3c1dde67de7f72b7fb84e7668cfbd173f01d56992017ba4e2
+  data.tar.gz: 5bfb07db7c6b65d0a08010ab59328af75a97e74e025fcd4eb15ca88b05afec3303beb1569c99d59932a5426a64addce1c7fda90562342efcf52fc2c72d9b362a

data/CHANGELOG.md CHANGED

@@ -1,3 +1,10 @@
+## 0.1.2
+- Friendlier message when XGBoost not found
+- Free memory when objects are destroyed
+- Added `Ranker`
+- Added early stopping to Scikit-Learn API
 ## 0.1.1
 - Added Scikit-Learn API

data/README.md CHANGED

@@ -18,7 +18,7 @@ gem 'xgb'
 ## Getting Started
-This library follows the [Core Data Structure, Learning and Scikit-Learn APIs](https://xgboost.readthedocs.io/en/latest/python/python_api.html) of the Python library. Some methods and options are missing at the moment. PRs welcome!
+This library follows the [Python API](https://xgboost.readthedocs.io/en/latest/python/python_api.html). Some methods and options are missing at the moment. PRs welcome!
 ## Learning API
@@ -33,7 +33,8 @@ booster = Xgb.train(params, dtrain)
 Predict
 ```ruby
-booster.predict(x_test)
+dtest = Xgb::DMatrix.new(x_test)
+booster.predict(dtest)
 ```
 Save the model to a file
@@ -110,6 +111,12 @@ Get the importance of features
 model.feature_importances
 ```
+Early stopping
+```ruby
+model.fit(x, y, eval_set: [[x_test, y_test]], early_stopping_rounds: 5)
+```
 ## Data
 Data can be an array of arrays

data/lib/xgb.rb CHANGED

@@ -5,16 +5,25 @@ require "ffi"
 require "xgb/utils"
 require "xgb/booster"
 require "xgb/dmatrix"
-require "xgb/ffi"
 require "xgb/version"
 # scikit-learn API
+require "xgb/model"
 require "xgb/classifier"
+require "xgb/ranker"
 require "xgb/regressor"
 module Xgb
   class Error < StandardError; end
+  class << self
+    attr_accessor :ffi_lib
+  end
+  self.ffi_lib = ["xgboost"]
+  # friendlier error message
+  autoload :FFI,"xgb/ffi"
   class << self
     def train(params, dtrain, num_boost_round: 10, evals: nil, early_stopping_rounds: nil, verbose_eval: true)
       booster = Booster.new(params: params)

data/lib/xgb/booster.rb CHANGED

@@ -5,6 +5,8 @@ module Xgb
     def initialize(params: nil, model_file: nil)
       @handle = ::FFI::MemoryPointer.new(:pointer)
       check_result FFI.XGBoosterCreate(nil, 0, @handle)
+      ObjectSpace.define_finalizer(self, self.class.finalize(handle_pointer))
       if model_file
         check_result FFI.XGBoosterLoadModel(handle_pointer, model_file)
       end
@@ -13,6 +15,11 @@ module Xgb
       set_param(params)
     end
+    def self.finalize(pointer)
+      # must use proc instead of stabby lambda
+      proc { FFI.XGBoosterFree(pointer) }
+    end
     def update(dtrain, iteration)
       check_result FFI.XGBoosterUpdateOneIter(handle_pointer, iteration, dtrain.handle_pointer)
     end
@@ -43,10 +50,10 @@ module Xgb
     def predict(data, ntree_limit: nil)
       ntree_limit ||= 0
-      out_len = ::FFI::MemoryPointer.new(:ulong)
+      out_len = ::FFI::MemoryPointer.new(:uint64)
       out_result = ::FFI::MemoryPointer.new(:pointer)
       check_result FFI.XGBoosterPredict(handle_pointer, data.handle_pointer, 0, ntree_limit, out_len, out_result)
-      out = out_result.read_pointer.read_array_of_float(out_len.read_ulong)
+      out = out_result.read_pointer.read_array_of_float(out_len.read_uint64)
       num_class = out.size / data.num_row
       out = out.each_slice(num_class).to_a if num_class > 1
       out
@@ -58,10 +65,10 @@ module Xgb
     # returns an array of strings
     def dump(fmap: "", with_stats: false, dump_format: "text")
-      out_len = ::FFI::MemoryPointer.new(:ulong)
+      out_len = ::FFI::MemoryPointer.new(:uint64)
       out_result = ::FFI::MemoryPointer.new(:pointer)
       check_result FFI.XGBoosterDumpModelEx(handle_pointer, fmap, with_stats ? 1 : 0, dump_format, out_len, out_result)
-      out_result.read_pointer.get_array_of_string(0, out_len.read_ulong)
+      out_result.read_pointer.get_array_of_string(0, out_len.read_uint64)
     end
     def dump_model(fout, fmap: "", with_stats: false, dump_format: "text")

data/lib/xgb/classifier.rb CHANGED

@@ -1,16 +1,10 @@
 module Xgb
-  class Classifier
-    def initialize(max_depth: 3, learning_rate: 0.1, n_estimators: 100, objective: "binary:logistic", importance_type: "gain")
-      @params = {
-        max_depth: max_depth,
-        objective: objective,
-        learning_rate: learning_rate
-      }
-      @n_estimators = n_estimators
-      @importance_type = importance_type
+  class Classifier < Model
+    def initialize(max_depth: 3, learning_rate: 0.1, n_estimators: 100, objective: "binary:logistic", importance_type: "gain", **options)
+      super
     end
-    def fit(x, y)
+    def fit(x, y, eval_set: nil, early_stopping_rounds: nil, verbose: true)
       n_classes = y.uniq.size
       params = @params.dup
@@ -20,18 +14,24 @@ module Xgb
       end
       dtrain = DMatrix.new(x, label: y)
-      @booster = Xgb.train(params, dtrain, num_boost_round: @n_estimators)
+      evals = Array(eval_set).map.with_index { |v, i| [DMatrix.new(v[0], label: v[1]), "validation_#{i}"] }
+      @booster = Xgb.train(params, dtrain,
+        num_boost_round: @n_estimators,
+        early_stopping_rounds: early_stopping_rounds,
+        verbose_eval: verbose,
+        evals: evals
+      )
       nil
     end
     def predict(data)
-      dmat = DMatrix.new(data)
-      y_pred = @booster.predict(dmat)
+      y_pred = super(data)
       if y_pred.first.is_a?(Array)
         # multiple classes
         y_pred.map do |v|
-          v.map.with_index.max_by { |v2, i| v2 }.last
+          v.map.with_index.max_by { |v2, _| v2 }.last
         end
       else
         y_pred.map { |v| v > 0.5 ? 1 : 0 }
@@ -49,20 +49,5 @@ module Xgb
         y_pred.map { |v| [1 - v, v] }
       end
     end
-    def save_model(fname)
-      @booster.save_model(fname)
-    end
-    def load_model(fname)
-      @booster = Booster.new(params: @params, model_file: fname)
-    end
-    def feature_importances
-      score = @booster.score(importance_type: @importance_type)
-      scores = @booster.feature_names.map { |k| score[k] || 0.0 }
-      total = scores.sum.to_f
-      scores.map { |s| s / total }
-    end
   end
 end

data/lib/xgb/dmatrix.rb CHANGED

@@ -27,12 +27,19 @@ module Xgb
         c_data = ::FFI::MemoryPointer.new(:float, nrow * ncol)
         c_data.put_array_of_float(0, flat_data)
         check_result FFI.XGDMatrixCreateFromMat(c_data, nrow, ncol, missing, @handle)
+        ObjectSpace.define_finalizer(self, self.class.finalize(handle_pointer))
       end
       set_float_info("label", label) if label
       set_float_info("weight", weight) if weight
     end
+    def self.finalize(pointer)
+      # must use proc instead of stabby lambda
+      proc { FFI.XGDMatrixFree(pointer) }
+    end
     def label
       float_info("label")
     end
@@ -41,16 +48,22 @@ module Xgb
       float_info("weight")
     end
+    def group=(group)
+      c_data = ::FFI::MemoryPointer.new(:int, group.size)
+      c_data.put_array_of_int(0, group)
+      check_result FFI.XGDMatrixSetGroup(handle_pointer, c_data, group.size)
+    end
     def num_row
-      out = ::FFI::MemoryPointer.new(:ulong)
+      out = ::FFI::MemoryPointer.new(:uint64)
       check_result FFI.XGDMatrixNumRow(handle_pointer, out)
-      out.read_ulong
+      out.read_uint64
     end
     def num_col
-      out = ::FFI::MemoryPointer.new(:ulong)
+      out = ::FFI::MemoryPointer.new(:uint64)
       check_result FFI.XGDMatrixNumCol(handle_pointer, out)
-      out.read_ulong
+      out.read_uint64
     end
     def slice(rindex)
@@ -76,15 +89,7 @@ module Xgb
     private
     def set_float_info(field, data)
-      data =
-        if matrix?(data)
-          data.to_a[0]
-        elsif daru_vector?(data) || narray?(data)
-          data.to_a
-        else
-          data
-        end
+      data = data.to_a unless data.is_a?(Array)
       c_data = ::FFI::MemoryPointer.new(:float, data.size)
       c_data.put_array_of_float(0, data)
       check_result FFI.XGDMatrixSetFloatInfo(handle_pointer, field.to_s, c_data, data.size)
@@ -106,10 +111,6 @@ module Xgb
       defined?(Daru::DataFrame) && data.is_a?(Daru::DataFrame)
     end
-    def daru_vector?(data)
-      defined?(Daru::Vector) && data.is_a?(Daru::Vector)
-    end
     def narray?(data)
       defined?(Numo::NArray) && data.is_a?(Numo::NArray)
     end

data/lib/xgb/ffi.rb CHANGED

@@ -1,7 +1,13 @@
 module Xgb
   module FFI
     extend ::FFI::Library
-    ffi_lib ["xgboost"]
+    begin
+      ffi_lib Xgb.ffi_lib
+    rescue LoadError => e
+      raise e if ENV["XGB_DEBUG"]
+      raise LoadError, "Could not find XGBoost"
+    end
     # https://github.com/dmlc/xgboost/blob/master/include/xgboost/c_api.h
     # keep same order
@@ -10,18 +16,21 @@ module Xgb
     attach_function :XGBGetLastError, %i[], :string
     # dmatrix
-    attach_function :XGDMatrixCreateFromMat, %i[pointer ulong ulong float pointer], :int
+    attach_function :XGDMatrixCreateFromMat, %i[pointer uint64 uint64 float pointer], :int
+    attach_function :XGDMatrixSetGroup, %i[pointer pointer uint64], :int
     attach_function :XGDMatrixNumRow, %i[pointer pointer], :int
     attach_function :XGDMatrixNumCol, %i[pointer pointer], :int
-    attach_function :XGDMatrixSliceDMatrix, %i[pointer pointer ulong pointer], :int
+    attach_function :XGDMatrixSliceDMatrix, %i[pointer pointer uint64 pointer], :int
+    attach_function :XGDMatrixFree, %i[pointer], :int
     attach_function :XGDMatrixSaveBinary, %i[pointer string int], :int
-    attach_function :XGDMatrixSetFloatInfo, %i[pointer string pointer ulong], :int
+    attach_function :XGDMatrixSetFloatInfo, %i[pointer string pointer uint64], :int
     attach_function :XGDMatrixGetFloatInfo, %i[pointer string pointer pointer], :int
     # booster
     attach_function :XGBoosterCreate, %i[pointer int pointer], :int
     attach_function :XGBoosterUpdateOneIter, %i[pointer int pointer], :int
-    attach_function :XGBoosterEvalOneIter, %i[pointer int pointer pointer ulong pointer], :int
+    attach_function :XGBoosterEvalOneIter, %i[pointer int pointer pointer uint64 pointer], :int
+    attach_function :XGBoosterFree, %i[pointer], :int
     attach_function :XGBoosterSetParam, %i[pointer string string], :int
     attach_function :XGBoosterPredict, %i[pointer pointer int int pointer pointer], :int
     attach_function :XGBoosterLoadModel, %i[pointer string], :int

data/lib/xgb/model.rb ADDED

@@ -0,0 +1,35 @@
+module Xgb
+  class Model
+    attr_reader :booster
+    def initialize(max_depth: 3, learning_rate: 0.1, n_estimators: 100, objective: nil, importance_type: "gain", **options)
+      @params = {
+        max_depth: max_depth,
+        objective: objective,
+        learning_rate: learning_rate
+      }.merge(options)
+      @n_estimators = n_estimators
+      @importance_type = importance_type
+    end
+    def predict(data)
+      dmat = DMatrix.new(data)
+      @booster.predict(dmat)
+    end
+    def save_model(fname)
+      @booster.save_model(fname)
+    end
+    def load_model(fname)
+      @booster = Booster.new(params: @params, model_file: fname)
+    end
+    def feature_importances
+      score = @booster.score(importance_type: @importance_type)
+      scores = @booster.feature_names.map { |k| score[k] || 0.0 }
+      total = scores.sum.to_f
+      scores.map { |s| s / total }
+    end
+  end
+end

data/lib/xgb/ranker.rb ADDED

@@ -0,0 +1,14 @@
+module Xgb
+  class Ranker < Model
+    def initialize(max_depth: 3, learning_rate: 0.1, n_estimators: 100, objective: "rank:pairwise", importance_type: "gain", **options)
+      super
+    end
+    def fit(x, y, group)
+      dtrain = DMatrix.new(x, label: y)
+      dtrain.group = group
+      @booster = Xgb.train(@params, dtrain, num_boost_round: @n_estimators)
+      nil
+    end
+  end
+end

data/lib/xgb/regressor.rb CHANGED

@@ -1,39 +1,20 @@
 module Xgb
-  class Regressor
-    def initialize(max_depth: 3, learning_rate: 0.1, n_estimators: 100, objective: "reg:squarederror", importance_type: "gain")
-      @params = {
-        max_depth: max_depth,
-        objective: objective,
-        learning_rate: learning_rate
-      }
-      @n_estimators = n_estimators
-      @importance_type = importance_type
+  class Regressor < Model
+    def initialize(max_depth: 3, learning_rate: 0.1, n_estimators: 100, objective: "reg:squarederror", importance_type: "gain", **options)
+      super
     end
-    def fit(x, y)
+    def fit(x, y, eval_set: nil, early_stopping_rounds: nil, verbose: true)
       dtrain = DMatrix.new(x, label: y)
-      @booster = Xgb.train(@params, dtrain, num_boost_round: @n_estimators)
-      nil
-    end
+      evals = Array(eval_set).map.with_index { |v, i| [DMatrix.new(v[0], label: v[1]), "validation_#{i}"] }
-    def predict(data)
-      dmat = DMatrix.new(data)
-      @booster.predict(dmat)
-    end
-    def save_model(fname)
-      @booster.save_model(fname)
-    end
-    def load_model(fname)
-      @booster = Booster.new(params: @params, model_file: fname)
-    end
-    def feature_importances
-      score = @booster.score(importance_type: @importance_type)
-      scores = @booster.feature_names.map { |k| score[k] || 0.0 }
-      total = scores.sum.to_f
-      scores.map { |s| s / total }
+      @booster = Xgb.train(@params, dtrain,
+        num_boost_round: @n_estimators,
+        early_stopping_rounds: early_stopping_rounds,
+        verbose_eval: verbose,
+        evals: evals
+      )
+      nil
     end
   end
 end

data/lib/xgb/version.rb CHANGED

@@ -1,3 +1,3 @@
 module Xgb
-  VERSION = "0.1.1"
+  VERSION = "0.1.2"
 end

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: xgb
 version: !ruby/object:Gem::Version
-  version: 0.1.1
+  version: 0.1.2
 platform: ruby
 authors:
 - Andrew Kane
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2019-08-16 00:00:00.000000000 Z
+date: 2019-08-19 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: ffi
@@ -107,6 +107,8 @@ files:
 - lib/xgb/classifier.rb
 - lib/xgb/dmatrix.rb
 - lib/xgb/ffi.rb
+- lib/xgb/model.rb
+- lib/xgb/ranker.rb
 - lib/xgb/regressor.rb
 - lib/xgb/utils.rb
 - lib/xgb/version.rb
@@ -129,7 +131,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubygems_version: 3.0.4
+rubygems_version: 3.0.3
 signing_key:
 specification_version: 4
 summary: XGBoost - the high performance machine learning library - for Ruby