RubyGems - wandb - Versions diffs - 0.1.2 → 0.1.6 - Mend

wandb 0.1.2 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

checksums.yaml +4 -4
data/lib/wandb/version.rb +1 -1
data/lib/wandb/xgboost_callback.rb +80 -40
data/lib/wandb.rb +131 -7
metadata +2 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: fc087b1331eb3548a0c0826543a956c44ac2f301cf67de163a8b98cfbd425dd1
-  data.tar.gz: 7320addea0abaa3851ae9a1990c078f78fdb3f55d284ab7b7617cbb4e2203d92
+  metadata.gz: 310ede46ccba30aa00bfbb81fbb8ffdefd818528c7ae09523e7dca9629258892
+  data.tar.gz: 07643a463e807e99db1a4e874bd85b749f6d65b644f15e0423505ead63ec7749
 SHA512:
-  metadata.gz: 8225c6929a867bc6d7e3da95698fa4d71f7f87bf2e8b1004601aa9cb7c8214c327570531a292b5f5df5a85f608fb498944ef22093803f7e527c49f87af640f74
-  data.tar.gz: 22e9685e8b57ea486f19fc91d3253ebed100c708f40aa739155b7eb5ec6c664a3ef6108ead7c933cd5c6e0a3324cb91b1020f3765d7b44143d87ca5c80d72f4e
+  metadata.gz: 00e7a6fe5e888d931c1abbc6047207859a46b024c83a05a6e87feeea8d20576f2f3da1d6a5c851c9dd945e7bd564ecfc56b5c4707ed3b54284aea97ecb4dac46
+  data.tar.gz: d4b2c592d6a4a5e079024d48cb7a97834a883f1e5e12dbe1c856255d4beed84c90f54f629d818cad14513f4778b08657e6781c958ff977fe607d1a74c0a2deec

data/lib/wandb/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module Wandb
-  VERSION = "0.1.2"
+  VERSION = "0.1.6"
 end

data/lib/wandb/xgboost_callback.rb CHANGED Viewed

@@ -1,26 +1,53 @@
+require "xgb"
+require "tempfile"
+require "fileutils"
 module Wandb
-  class XGBoostCallback
+  class XGBoostCallback < XGBoost::TrainingCallback
     MINIMIZE_METRICS = %w[rmse logloss error] # Add other metrics as needed
     MAXIMIZE_METRICS = %w[auc accuracy] # Add other metrics as needed
-    def initialize(log_model: false, log_feature_importance: true, importance_type: "gain", define_metric: true)
-      @log_model = log_model
-      @log_feature_importance = log_feature_importance
-      @importance_type = importance_type
-      @define_metric = define_metric
+    class Opts
+      attr_accessor :options
-      return if Wandb.current_run
+      def initialize(options = {})
+        @options = options
+      end
-      raise "You must call wandb.init() before WandbCallback()"
+      def default(key, default)
+        options.key?(key) ? options[key] : default
+      end
     end
-    def before_training(model:)
-      # Update Wandb config with model configuration
-      Wandb.current_run.config = model.params
-      Wandb.log(model.params)
+    attr_accessor :project_name, :api_key, :custom_loggers
+    def initialize(options = {})
+      options = Opts.new(options)
+      @log_model = options.default(:log_model, false)
+      @log_feature_importance = options.default(:log_feature_importance, true)
+      @importance_type = options.default(:importance_type, "gain")
+      @define_metric = options.default(:define_metric, true)
+      @api_key = options.default(:api_key, ENV["WANDB_API_KEY"])
+      @project_name = options.default(:project_name, nil)
+      @custom_loggers = options.default(:custom_loggers, [])
     end
-    def after_training(model:)
+    def before_training(model)
+      Wandb.login(api_key: api_key)
+      Wandb.init(project: project_name)
+      config = JSON.parse(model.save_config)
+      log_conf = {
+        learning_rate: config.dig("learner", "gradient_booster", "tree_train_param", "learning_rate").to_f,
+        max_depth: config.dig("learner", "gradient_booster", "tree_train_param", "max_depth").to_f,
+        n_estimators: model.num_boosted_rounds
+      }
+      Wandb.current_run.config = log_conf
+      Wandb.log(log_conf)
+      model
+    end
+    def after_training(model)
       # Log the model as an artifact
       log_model_as_artifact(model) if @log_model
@@ -28,54 +55,67 @@ module Wandb
       log_feature_importance(model) if @log_feature_importance
       # Log best score and best iteration
-      return unless model.best_score
+      unless model.best_score
+        finish
+        return model
+      end
       Wandb.log(
         "best_score" => model.best_score.to_f,
         "best_iteration" => model.best_iteration.to_i
       )
+      finish
+      model
     end
-    def before_iteration(model:, epoch:, evals:)
-      # noop
+    def finish
+      Wandb.finish
+      FileUtils.rm_rf(File.join(Dir.pwd, "wandb"))
     end
-    def after_iteration(model:, epoch:, evals:, res:)
-      res.each do |metric_name, value|
-        data, metric = metric_name.split("-", 2)
-        full_metric_name = "#{data}-#{metric}"
-        if @define_metric
-          define_metric(data, metric)
-          Wandb.log({ full_metric_name => value })
-        else
-          Wandb.log({ full_metric_name => value })
-        end
-      end
+    def before_iteration(_model, _epoch, _history)
+      false
+    end
-      Wandb.log({ "epoch" => epoch })
-      @define_metric = false
+    def after_iteration(model, epoch, history)
+      history.each do |split, metric_scores|
+        metric = metric_scores.keys.first
+        values = metric_scores.values.last
+        epoch_value = values[epoch]
+        define_metric(split, metric) if @define_metric && epoch == 0
+        full_metric_name = "#{split}-#{metric}"
+        Wandb.log({ full_metric_name => epoch_value })
+      end
+      @custom_loggers.each do |logger|
+        logger.call(model, epoch, history)
+      end
+      Wandb.log("epoch" => epoch)
+      false
     end
     private
     def log_model_as_artifact(model)
-      model_name = "#{Wandb.current_run.id}_model.json"
-      model_path = File.join(Wandb.current_run.dir, model_name)
-      model.save_model(model_path)
-      model_artifact = Wandb.Artifact(name: model_name, type: "model")
-      model_artifact.add_file(model_path)
-      Wandb.current_run.log_artifact(model_artifact)
+      Dir.mktmpdir("wandb_xgboost_model") do |tmp_dir|
+        model_name = "model.json"
+        model_path = File.join(tmp_dir, model_name)
+        model.save_model(model_path)
+        model_artifact = Wandb.artifact(name: model_name, type: "model")
+        model_artifact.add_file(model_path)
+        Wandb.current_run.log_artifact(model_artifact)
+      end
     end
     def log_feature_importance(model)
       fi = model.score(importance_type: @importance_type)
       fi_data = fi.map { |k, v| [k, v] }
-      table = Wandb.Table(data: fi_data, columns: %w[Feature Importance])
-      bar_plot = Wandb.plot.bar(table, "Feature", "Importance", title: "Feature Importance")
-      Wandb.log({ "Feature Importance" => bar_plot })
+      table = Wandb::Table.new(data: fi_data, columns: %w[Feature Importance])
+      bar_plot = Wandb::Plot.bar(table.table, "Feature", "Importance", title: "Feature Importance")
+      Wandb.log({ "Feature Importance" => bar_plot.__pyptr__ })
     end
     def define_metric(data, metric_name)

data/lib/wandb.rb CHANGED Viewed

@@ -16,10 +16,6 @@ module Wandb
       @wandb ||= PyCall.import_module("wandb")
     end
-    def Table(*args, **kwargs)
-      __pyptr__.Table.new(*args, **kwargs)
-    end
     def plot(*args, **kwargs)
       __pyptr__.plot(*args, **kwargs)
     end
@@ -30,11 +26,12 @@ module Wandb
     end
     # Expose wandb.Artifact
-    def Artifact(*args, **kwargs)
-      __pyptr__.Artifact.new(*args, **kwargs)
+    def artifact(*args, **kwargs)
+      py_artifact = __pyptr__.Artifact.new(*args, **kwargs)
+      Artifact.new(py_artifact)
     end
-    def Error
+    def error
       __pyptr__.Error
     end
@@ -72,6 +69,10 @@ module Wandb
     def api
       @api ||= Api.new(__pyptr__.Api.new)
     end
+    def plot
+      Plot
+    end
   end
   # Run class
@@ -80,6 +81,10 @@ module Wandb
       @run = run
     end
+    def run_id
+      @run.run_id
+    end
     def log(metrics = {})
       metrics.symbolize_keys!
       @run.log(metrics, {})
@@ -104,6 +109,53 @@ module Wandb
     def config=(new_config)
       @run.config.update(PyCall::Dict.new(new_config))
     end
+    def log_artifact(artifact)
+      @run.log_artifact(artifact.__pyptr__)
+    end
+  end
+  # Artifact class
+  class Artifact
+    def initialize(artifact)
+      @artifact = artifact
+    end
+    def __pyptr__
+      @artifact
+    end
+    def name
+      @artifact.name
+    end
+    def type
+      @artifact.type
+    end
+    def add_file(local_path, name = nil)
+      @artifact.add_file(local_path, name)
+    end
+    def add_dir(local_dir, name = nil)
+      @artifact.add_dir(local_dir, name)
+    end
+    def get_path(name)
+      @artifact.get_path(name)
+    end
+    def metadata
+      @artifact.metadata
+    end
+    def metadata=(new_metadata)
+      @artifact.metadata = new_metadata
+    end
+    def save
+      @artifact.save
+    end
   end
   # Api class
@@ -137,6 +189,78 @@ module Wandb
       @project.description
     end
   end
+  # Table class
+  class Table
+    attr_accessor :table, :data, :columns
+    def initialize(data: {}, columns: [])
+      @table = Wandb.__pyptr__.Table.new(data: data, columns: columns)
+      @data = data
+      @columns = columns
+    end
+    def __pyptr__
+      @table
+    end
+    def add_data(*args)
+      @table.add_data(*args)
+    end
+    def add_column(name, data)
+      @table.add_column(name, data)
+    end
+    def get_column(name)
+      @table.get_column(name)
+    end
+    def columns
+      @table.columns
+    end
+    def data
+      @table.data
+    end
+    def to_pandas
+      @table.get_dataframe
+    end
+  end
+  # Plot class
+  class Plot
+    class << self
+      def bar(table, x_key, y_key, title: nil)
+        py_plot = Wandb.__pyptr__.plot.bar(table.__pyptr__, x_key, y_key, title: title)
+        new(py_plot)
+      end
+      def line(table, x_key, y_key, title: nil)
+        py_plot = Wandb.__pyptr__.plot.line(table.__pyptr__, x_key, y_key, title: title)
+        new(py_plot)
+      end
+      def scatter(table, x_key, y_key, title: nil)
+        py_plot = Wandb.__pyptr__.plot.scatter(table.__pyptr__, x_key, y_key, title: title)
+        new(py_plot)
+      end
+      def histogram(table, value_key, title: nil)
+        py_plot = Wandb.__pyptr__.plot.histogram(table.__pyptr__, value_key, title: title)
+        new(py_plot)
+      end
+    end
+    def initialize(plot)
+      @plot = plot
+    end
+    def __pyptr__
+      @plot
+    end
+  end
 end
 require_relative "wandb/xgboost_callback"

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: wandb
 version: !ruby/object:Gem::Version
-  version: 0.1.2
+  version: 0.1.6
 platform: ruby
 authors:
 - Brett Shollenberger
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2024-10-11 00:00:00.000000000 Z
+date: 2024-10-18 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: pycall