RubyGems - easy_ml - Versions diffs - 0.2.0.pre.rc77 → 0.2.0.pre.rc81 - Mend

easy_ml 0.2.0.pre.rc77 → 0.2.0.pre.rc81

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (55) hide show

checksums.yaml +4 -4
data/app/controllers/easy_ml/datasets_controller.rb +3 -3
data/app/controllers/easy_ml/models_controller.rb +4 -3
data/app/frontend/components/ModelForm.tsx +16 -0
data/app/frontend/components/ScheduleModal.tsx +0 -2
data/app/frontend/components/dataset/PreprocessingConfig.tsx +7 -6
data/app/jobs/easy_ml/application_job.rb +1 -0
data/app/jobs/easy_ml/batch_job.rb +47 -6
data/app/jobs/easy_ml/compute_feature_job.rb +10 -10
data/app/jobs/easy_ml/reaper.rb +14 -10
data/app/jobs/easy_ml/refresh_dataset_job.rb +2 -0
data/app/jobs/easy_ml/sync_datasource_job.rb +1 -0
data/app/models/concerns/easy_ml/dataframe_serialization.rb +1 -17
data/app/models/easy_ml/column/imputers/base.rb +1 -1
data/app/models/easy_ml/column/imputers/imputer.rb +2 -0
data/app/models/easy_ml/column/imputers/today.rb +1 -1
data/app/models/easy_ml/column/selector.rb +0 -8
data/app/models/easy_ml/column.rb +1 -1
data/app/models/easy_ml/column_list.rb +2 -3
data/app/models/easy_ml/dataset/learner/base.rb +2 -2
data/app/models/easy_ml/dataset/learner/eager.rb +3 -1
data/app/models/easy_ml/dataset/learner/lazy.rb +4 -1
data/app/models/easy_ml/dataset.rb +47 -38
data/app/models/easy_ml/datasource.rb +0 -6
data/app/models/easy_ml/feature.rb +33 -8
data/app/models/easy_ml/model.rb +27 -4
data/app/models/easy_ml/models/hyperparameters/xgboost/gbtree.rb +21 -5
data/app/models/easy_ml/models/xgboost/evals_callback.rb +9 -5
data/app/models/easy_ml/models/xgboost.rb +58 -36
data/app/models/easy_ml/retraining_run.rb +1 -1
data/app/serializers/easy_ml/model_serializer.rb +1 -0
data/lib/easy_ml/core/tuner/adapters/base_adapter.rb +16 -3
data/lib/easy_ml/core/tuner/adapters/xgboost_adapter.rb +0 -17
data/lib/easy_ml/core/tuner.rb +14 -5
data/lib/easy_ml/data/dataset_manager/reader/base.rb +12 -0
data/lib/easy_ml/data/dataset_manager/reader/data_frame.rb +8 -3
data/lib/easy_ml/data/dataset_manager/reader/file.rb +5 -0
data/lib/easy_ml/data/dataset_manager/reader.rb +7 -1
data/lib/easy_ml/data/dataset_manager/writer/base.rb +26 -9
data/lib/easy_ml/data/dataset_manager/writer.rb +5 -1
data/lib/easy_ml/data/dataset_manager.rb +18 -4
data/lib/easy_ml/data/embeddings/adapters.rb +56 -0
data/lib/easy_ml/data/embeddings/compression.rb +0 -0
data/lib/easy_ml/data/embeddings.rb +43 -0
data/lib/easy_ml/data/polars_column.rb +19 -5
data/lib/easy_ml/engine.rb +16 -14
data/lib/easy_ml/feature_store.rb +19 -16
data/lib/easy_ml/support/lockable.rb +1 -5
data/lib/easy_ml/version.rb +1 -1
data/public/easy_ml/assets/.vite/manifest.json +1 -1
data/public/easy_ml/assets/assets/entrypoints/Application.tsx-Bbf3mD_b.js +522 -0
data/public/easy_ml/assets/assets/entrypoints/{Application.tsx-B1qLZuyu.js.map → Application.tsx-Bbf3mD_b.js.map} +1 -1
metadata +9 -7
data/app/models/easy_ml/datasources/polars_datasource.rb +0 -69
data/public/easy_ml/assets/assets/entrypoints/Application.tsx-B1qLZuyu.js +0 -522

data/app/models/easy_ml/dataset.rb CHANGED Viewed

@@ -180,6 +180,8 @@ module EasyML
       EasyML::Reaper.kill(EasyML::RefreshDatasetJob, id)
       update(workflow_status: :ready)
       unlock!
+      features.update_all(needs_fit: true, workflow_status: "ready")
+      features.each(&:wipe)
     end
     def refresh_async
@@ -201,12 +203,6 @@ module EasyML
       @raw = initialize_split("raw")
     end
-    def clipped
-      return @clipped if @clipped && @clipped.dataset
-      @clipped = initialize_split("clipped")
-    end
     def processed
       return @processed if @processed && @processed.dataset
@@ -236,20 +232,20 @@ module EasyML
       cleanup
       refresh_datasource!
       split_data
-      process_data
+      fit
     end
     def prepare
       prepare_features
       refresh_datasource
       split_data
-      process_data
+      fit
     end
     def actually_refresh
       refreshing do
-        learn(delete: false) # After syncing datasource, learn new statistics + sync columns
-        process_data
+        fit
+        normalize_all
         fully_reload
         learn
         learn_statistics(type: :processed) # After processing data, we learn any new statistics
@@ -287,6 +283,7 @@ module EasyML
     def fit_features(async: false, features: self.features, force: false)
       features_to_compute = force ? features : features.needs_fit
+      puts "Features to compute.... #{features_to_compute}"
       return after_fit_features if features_to_compute.empty?
       features.first.fit(features: features_to_compute, async: async)
@@ -295,10 +292,12 @@ module EasyML
     measure_method_timing :fit_features
     def after_fit_features
+      puts "After fit features"
       unlock!
       reload
       return if failed?
+      puts "Actually refresh..."
       actually_refresh
     end
@@ -385,6 +384,8 @@ module EasyML
     def unlock!
       Support::Lockable.unlock!(lock_key)
+      features.each(&:unlock!)
+      true
     end
     def locked?
@@ -427,12 +428,6 @@ module EasyML
       (read_attribute(:statistics) || {}).with_indifferent_access
     end
-    def process_data
-      learn(delete: false)
-      fit
-      normalize_all
-    end
     def needs_learn?
       return true if columns_need_refresh?
@@ -483,13 +478,31 @@ module EasyML
       df = apply_missing_columns(df, inference: inference)
       df = columns.transform(df, inference: inference)
       df = apply_features(df, features)
-      df = columns.transform(df, inference: inference, computed: true)
+      df = columns.transform(df, inference: inference)
       df = apply_column_mask(df, inference: inference) unless all_columns
       df = drop_nulls(df) unless inference
       df, = processed.split_features_targets(df, true, target) if split_ys
       df
     end
+    # Massage out one-hot cats to their canonical name
+    #
+    # Takes: ["Sex_male", "Sex_female", "Embarked_c", "PassengerId"]
+    # Returns: ["Embarked", "Sex", "PassengerId"]
+    def regular_columns(col_list)
+      one_hot_cats = columns.allowed_categories.invert.reduce({}) do |h, (k, v)|
+        h.tap do
+          k.each do |k2|
+            h["#{v}_#{k2}"] = v
+          end
+        end
+      end
+      col_list.map do |col|
+        one_hot_cats.key?(col) ? one_hot_cats[col] : col
+      end.uniq.sort
+    end
     measure_method_timing :normalize
     def missing_required_fields(df)
@@ -537,7 +550,6 @@ module EasyML
     def cleanup
       raw.cleanup
-      clipped.cleanup
       processed.cleanup
     end
@@ -705,6 +717,20 @@ module EasyML
       reload
     end
+    def list_nulls(input = nil, list_raw = false)
+      input = data(lazy: true) if input.nil?
+      case input
+      when Polars::DataFrame
+        input = input.lazy
+      when String, Symbol
+        input = input.to_sym
+        input = send(input).data(lazy: true)
+      end
+      col_list = EasyML::Data::DatasetManager.list_nulls(input)
+      list_raw ? col_list : regular_columns(col_list)
+    end
     private
     def apply_date_splitter_config
@@ -730,10 +756,8 @@ module EasyML
     def initialize_splits
       @raw = nil
-      @clipped = nil
       @processed = nil
       raw
-      clipped
       processed
     end
@@ -778,11 +802,12 @@ module EasyML
       processed.cleanup
       SPLIT_ORDER.each do |segment|
-        df = clipped.read(segment)
+        df = raw.read(segment)
         learn_computed_columns(df) if segment == :train
         processed_df = normalize(df, all_columns: true)
         processed.save(segment, processed_df)
       end
+      features.select { |f| !f.fittable? }.each(&:after_transform)
       @normalized = true
     end
@@ -825,26 +850,10 @@ module EasyML
     end
     def fit
-      apply_clip
+      learn(delete: false)
       learn_statistics(type: :raw)
     end
-    def apply_clip
-      clipped.cleanup
-      SPLIT_ORDER.each do |segment|
-        df = raw.send(segment, lazy: true, all_columns: true)
-        clipped.save(
-          segment,
-          columns.apply_clip(df) # Ensuring this returns a LazyFrame means we'll automatically use sink_parquet
-        )
-      end
-    end
-    measure_method_timing :apply_clip
-    # log_method :fit, "Learning statistics", verbose: true
     def split_data!
       split_data(force: true)
     end

data/app/models/easy_ml/datasource.rb CHANGED Viewed

@@ -22,7 +22,6 @@ module EasyML
     DATASOURCE_OPTIONS = {
       "s3" => "EasyML::Datasources::S3Datasource",
       "file" => "EasyML::Datasources::FileDatasource",
-      "polars" => "EasyML::Datasources::PolarsDatasource",
     }
     DATASOURCE_TYPES = [
       {
@@ -35,11 +34,6 @@ module EasyML
         label: "Local Files",
         description: "Connect to data stored in local files",
       },
-      {
-        value: "polars",
-        label: "Polars DataFrame",
-        description: "In-memory dataframe storage using Polars",
-      },
     ].freeze
     DATASOURCE_NAMES = DATASOURCE_OPTIONS.keys.freeze
     DATASOURCE_CONSTANTS = DATASOURCE_OPTIONS.values.map(&:constantize)

data/app/models/easy_ml/feature.rb CHANGED Viewed

@@ -78,16 +78,24 @@ module EasyML
     scope :never_applied, -> { where(applied_at: nil) }
     scope :never_fit, -> do
             fittable = where(fit_at: nil)
-            fittable = fittable.select { |f| f.adapter.respond_to?(:fit) }
+            fittable = fittable.select(&:fittable?)
             where(id: fittable.map(&:id))
           end
     scope :needs_fit, -> { has_changes.or(never_applied).or(never_fit) }
-    scope :ready_to_apply, -> { where(needs_fit: false).where.not(id: has_changes.map(&:id)) }
+    scope :ready_to_apply, -> do
+            base = where(needs_fit: false).where.not(id: has_changes.map(&:id))
+            doesnt_fit = where_no_fit
+            where(id: base.map(&:id).concat(doesnt_fit.map(&:id)))
+          end
+    scope :fittable, -> { all.select(&:fittable?) }
+    scope :where_no_fit, -> { all.reject(&:fittable?) }
     before_save :apply_defaults, if: :new_record?
     before_save :update_sha
     after_find :update_from_feature_class
     before_save :update_from_feature_class
+    before_destroy :wipe
     def feature_klass
       feature_class.constantize
@@ -99,6 +107,10 @@ module EasyML
       feature_klass.present?
     end
+    def fittable?
+      adapter.respond_to?(:fit)
+    end
     def adapter
       @adapter ||= feature_klass.new
     end
@@ -197,7 +209,7 @@ module EasyML
       end
       EasyML::Data::Partition::Boundaries.new(
-        reader.data(lazy: true),
+        reader.data(lazy: true, all_columns: true),
         primary_key,
         batch_size
       ).to_a.map.with_index do |partition, idx|
@@ -207,18 +219,23 @@ module EasyML
           batch_end: partition[:partition_end],
           batch_number: feature_position,
           subbatch_number: idx,
-          parent_batch_id: Random.uuid,
         }
       end
     end
     def wipe
+      update(needs_fit: true) if fittable?
       feature_store.wipe
     end
     def fit(features: [self], async: false)
       ordered_features = features.sort_by(&:feature_position)
-      jobs = ordered_features.map(&:build_batches)
+      parent_batch_id = Random.uuid
+      jobs = ordered_features.select(&:fittable?).map do |feature|
+        feature.build_batches.map do |batch_args|
+          batch_args.merge(parent_batch_id: parent_batch_id)
+        end
+      end
       job_count = jobs.dup.flatten.size
       ordered_features.each(&:wipe)
@@ -445,7 +462,7 @@ module EasyML
     def after_fit
       update_sha
-      feature_store.compact
+      feature_store.compact if fittable?
       updates = {
         fit_at: Time.current,
         needs_fit: false,
@@ -454,6 +471,14 @@ module EasyML
       update!(updates)
     end
+    def after_transform
+      feature_store.compact if !fittable?
+    end
+    def unlock!
+      feature_store.unlock!
+    end
     UNCONFIGURABLE_COLUMNS = %w(
       id
       dataset_id
@@ -508,14 +533,14 @@ module EasyML
       new_sha = compute_sha
       if new_sha != self.sha
         self.sha = new_sha
-        self.needs_fit = true
+        self.needs_fit = fittable?
       end
     end
     def update_from_feature_class
       if read_attribute(:batch_size) != config.dig(:batch_size)
         write_attribute(:batch_size, config.dig(:batch_size))
-        self.needs_fit = true
+        self.needs_fit = fittable?
       end
       if self.primary_key != config.dig(:primary_key)

data/app/models/easy_ml/model.rb CHANGED Viewed

@@ -45,7 +45,7 @@ module EasyML
     MODEL_NAMES = MODEL_OPTIONS.keys.freeze
     MODEL_CONSTANTS = MODEL_OPTIONS.values.map(&:constantize)
-    add_configuration_attributes :task, :objective, :hyperparameters, :callbacks, :metrics
+    add_configuration_attributes :task, :objective, :hyperparameters, :callbacks, :metrics, :weights_column
     MODEL_CONSTANTS.flat_map(&:configuration_attributes).each do |attribute|
       add_configuration_attributes attribute
     end
@@ -182,12 +182,15 @@ module EasyML
       lock_model do
         run = pending_run
         run.wrap_training do
+          raise untrainable_error unless trainable?
           best_params = nil
           if run.should_tune?
             best_params = hyperparameter_search(&progress_block)
+          else
+            fit(&progress_block)
+            save
           end
-          fit(&progress_block)
-          save
           [self, best_params]
         end
         update(is_training: false)
@@ -258,7 +261,7 @@ module EasyML
     def formatted_version
       return nil unless version
-      Time.strptime(version, "%Y%m%d%H%M%S").strftime("%B %-d, %Y at %-l:%M %p")
+      UTC.parse(version).in_time_zone(EasyML::Configuration.timezone).strftime("%B %-d, %Y at %-l:%M %p")
     end
     def last_run_at
@@ -277,6 +280,22 @@ module EasyML
     alias_method :latest_version, :inference_version
     alias_method :deployed, :inference_version
+    def trainable?
+      adapter.trainable?
+    end
+    def untrainable_columns
+      adapter.untrainable_columns
+    end
+    def untrainable_error
+      %Q(
+        Cannot train dataset containing null values!
+        Apply preprocessing to the following columns:
+        #{untrainable_columns.join(", ")}
+      )
+    end
     def predict(xs)
       load_model!
       unless xs.is_a?(XGBoost::DMatrix)
@@ -375,6 +394,10 @@ module EasyML
       adapter.after_tuning
     end
+    def cleanup
+      adapter.cleanup
+    end
     def fit_in_batches(tuning: false, batch_size: nil, batch_overlap: nil, batch_key: nil, checkpoint_dir: Rails.root.join("tmp", "xgboost_checkpoints"), &progress_block)
       adapter.fit_in_batches(tuning: tuning, batch_size: batch_size, batch_overlap: batch_overlap, batch_key: batch_key, checkpoint_dir: checkpoint_dir, &progress_block)
     end

data/app/models/easy_ml/models/hyperparameters/xgboost/gbtree.rb CHANGED Viewed

@@ -37,6 +37,20 @@ module EasyML
                 max: 10,
                 step: 0.1,
               },
+              scale_pos_weight: {
+                label: "Scale Pos Weight",
+                description: "Balance of positive and negative weights",
+                min: 0,
+                max: 200,
+                step: 1,
+              },
+              max_delta_step: {
+                label: "Max Delta Step",
+                description: "Maximum delta step",
+                min: 0,
+                max: 10,
+                step: 1,
+              },
               gamma: {
                 label: "Gamma",
                 description: "Minimum loss reduction required to make a further partition",
@@ -81,11 +95,13 @@ module EasyML
                     label: "Histogram",
                     description: "Fast histogram optimized approximate greedy algorithm",
                   },
-                  {
-                    value: "gpu_hist",
-                    label: "GPU Histogram",
-                    description: "GPU implementation of hist algorithm",
-                  },
+                # Only when compiled wih GPU support...
+                # How to make this not a default optoin
+                # {
+                #   value: "gpu_hist",
+                #   label: "GPU Histogram",
+                #   description: "GPU implementation of hist algorithm",
+                # },
                 ],
               },
             )

data/app/models/easy_ml/models/xgboost/evals_callback.rb CHANGED Viewed

@@ -36,7 +36,7 @@ module EasyML
           if tuner.present?
             [tuner.x_valid, tuner.y_valid]
           else
-            model.dataset.valid(split_ys: true)
+            model.dataset.valid(split_ys: true, lazy: true)
           end
         end
@@ -47,7 +47,8 @@ module EasyML
           if epoch % log_frequency == 0
             model.adapter.external_model = booster
             x_valid, y_valid = valid_dataset
-            @preprocessed ||= model.preprocess(x_valid)
+            x_valid = x_valid.select(model.dataset.col_order(inference: true))
+            @preprocessed ||= model.preprocess(x_valid, y_valid)
             y_pred = model.predict(@preprocessed)
             dataset = model.dataset.valid(all_columns: true)
@@ -102,7 +103,7 @@ module EasyML
           model.callbacks.detect { |cb| cb.class == Wandb::XGBoostCallback }
         end
-        def track_cumulative_feature_importance(finish = true)
+        def track_cumulative_feature_importance
           return unless @feature_importances
           project_name = model.adapter.get_wandb_project
@@ -126,13 +127,16 @@ module EasyML
             "feature_importance" => bar_plot.__pyptr__,
           }
           Wandb.log(log_data)
-          model.adapter.delete_wandb_project if finish
-          Wandb.finish if finish
         end
         def after_tuning
           track_cumulative_feature_importance
         end
+        def cleanup
+          model.adapter.delete_wandb_project
+          Wandb.finish
+        end
       end
     end
   end

data/app/models/easy_ml/models/xgboost.rb CHANGED Viewed

@@ -135,6 +135,12 @@ module EasyML
         end
       end
+      def cleanup
+        model.callbacks.each do |callback|
+          callback.cleanup if callback.respond_to?(:cleanup)
+        end
+      end
       def prepare_callbacks(tuner)
         set_wandb_project(tuner.project_name)
@@ -421,11 +427,11 @@ module EasyML
       def prepare_data
         if @d_train.nil?
           col_order = dataset.col_order
-          x_sample, y_sample = dataset.train(split_ys: true, limit: 5, select: col_order)
+          x_sample, y_sample = dataset.train(split_ys: true, limit: 5, select: col_order, lazy: true)
           preprocess(x_sample, y_sample) # Ensure we fail fast if the dataset is misconfigured
-          x_train, y_train = dataset.train(split_ys: true, select: col_order)
-          x_valid, y_valid = dataset.valid(split_ys: true, select: col_order)
-          x_test, y_test = dataset.test(split_ys: true, select: col_order)
+          x_train, y_train = dataset.train(split_ys: true, select: col_order, lazy: true)
+          x_valid, y_valid = dataset.valid(split_ys: true, select: col_order, lazy: true)
+          x_test, y_test = dataset.test(split_ys: true, select: col_order, lazy: true)
           @d_train = preprocess(x_train, y_train)
           @d_valid = preprocess(x_valid, y_valid)
           @d_test = preprocess(x_test, y_test)
@@ -434,21 +440,60 @@ module EasyML
         [@d_train, @d_valid, @d_test]
       end
+      def trainable?
+        untrainable_columns.empty?
+      end
+      def untrainable_columns
+        model.dataset.refresh if model.dataset.processed.nil?
+        model.dataset.list_nulls(
+          model.dataset.processed.data(lazy: true)
+        )
+      end
       def preprocess(xs, ys = nil)
         return xs if xs.is_a?(::XGBoost::DMatrix)
+        lazy = xs.is_a?(Polars::LazyFrame)
+        return xs if (lazy ? xs.limit(1).collect : xs).shape[0] == 0
+        weights_col = model.weights_column || nil
+        if weights_col == model.dataset.target
+          raise ArgumentError, "Weight column cannot be the target column"
+        end
+        # Extract feature columns (all columns except label and weight)
+        feature_cols = xs.columns
+        feature_cols -= [weights_col] if weights_col
+        # Get features, labels and weights
+        begin
+          features = lazy ? xs.select(feature_cols).collect.to_numo : xs.select(feature_cols).to_numo
+        rescue => e
+          binding.pry
+        end
+        weights = weights_col ? (lazy ? xs.select(weights_col).collect.to_numo : xs.select(weights_col).to_numo) : nil
+        weights = weights.flatten if weights
+        if ys.present?
+          ys = ys.is_a?(Array) ? Polars::Series.new(ys) : ys
+          labels = lazy ? ys.collect.to_numo.flatten : ys.to_numo.flatten
+        else
+          labels = nil
+        end
+        kwargs = {
+          label: labels,
+          weight: weights,
+        }.compact
-        orig_xs = xs.dup
-        column_names = xs.columns
-        xs = _preprocess(xs)
-        ys = ys.nil? ? nil : _preprocess(ys).flatten
-        kwargs = { label: ys }.compact
         begin
-          ::XGBoost::DMatrix.new(xs, **kwargs).tap do |dmat|
-            dmat.feature_names = column_names
+          ::XGBoost::DMatrix.new(features, **kwargs).tap do |dmatrix|
+            dmatrix.feature_names = feature_cols
           end
         rescue StandardError => e
-          problematic_columns = orig_xs.schema.select { |k, v| [Polars::Categorical, Polars::String].include?(v) }
-          problematic_xs = orig_xs.select(problematic_columns.keys)
+          problematic_columns = xs.schema.select { |k, v| [Polars::Categorical, Polars::String].include?(v) }
+          problematic_xs = lazy ? xs.lazy.select(problematic_columns.keys).collect : xs.select(problematic_columns.keys)
           raise %(
             Error building data for XGBoost.
             Apply preprocessing to columns
@@ -501,29 +546,6 @@ module EasyML
         cb_container.after_iteration(@booster, current_iteration, d_train, evals)
       end
-      def _preprocess(df)
-        return df if df.is_a?(Array)
-        df.to_a.map do |row|
-          row.values.map do |value|
-            case value
-            when Time
-              value.to_i # Convert Time to Unix timestamp
-            when Date
-              value.to_time.to_i # Convert Date to Unix timestamp
-            when String
-              value
-            when TrueClass, FalseClass
-              value ? 1.0 : 0.0 # Convert booleans to 1.0 and 0.0
-            when Integer
-              value
-            else
-              value.to_f # Ensure everything else is converted to a float
-            end
-          end
-        end
-      end
       def initialize_model
         @xgboost_model = model_class.new(n_estimators: @hyperparameters.to_h.dig(:n_estimators))
         if block_given?

data/app/models/easy_ml/retraining_run.rb CHANGED Viewed

@@ -150,7 +150,7 @@ module EasyML
       training_model.dataset.refresh
       evaluator = retraining_job.evaluator.symbolize_keys
-      x_test, y_test = training_model.dataset.test(split_ys: true)
+      x_test, y_test = training_model.dataset.test(split_ys: true, all_columns: true)
       y_pred = training_model.predict(x_test)
       metric = evaluator[:metric].to_sym

data/app/serializers/easy_ml/model_serializer.rb CHANGED Viewed

@@ -27,6 +27,7 @@ module EasyML
                :model_type,
                :task,
                :objective,
+               :weights_column,
                :metrics,
                :dataset_id,
                :status,

data/lib/easy_ml/core/tuner/adapters/base_adapter.rb CHANGED Viewed

@@ -18,12 +18,22 @@ module EasyML
           end
           def defaults
-            {}
+            model.adapter.hyperparameters.class.hyperparameter_constants.transform_values do |constant|
+              values = constant.slice(:min, :max, :step, :options)
+              if values.key?(:options)
+                values[:options] = values[:options].map { |option| option[:value] }
+              end
+              values
+            end
           end
           def run_trial(trial)
             config = deep_merge_defaults(self.config.clone.deep_symbolize_keys)
-            suggest_parameters(trial, config)
+            # For first trial, re-use the original hyperparameters, so they
+            # serve as our starting point/imputers
+            unless trial == 1
+              suggest_parameters(trial, config)
+            end
             yield model
           end
@@ -57,8 +67,11 @@ module EasyML
             min = param_config[:min]
             max = param_config[:max]
             log = param_config[:log]
+            options = param_config[:options]
-            if log
+            if options
+              trial.suggest_categorical(param_name.to_s, options)
+            elsif log
               trial.suggest_loguniform(param_name.to_s, min, max)
             elsif max.is_a?(Integer) && min.is_a?(Integer)
               trial.suggest_int(param_name.to_s, min, max)

data/lib/easy_ml/core/tuner/adapters/xgboost_adapter.rb CHANGED Viewed

@@ -5,23 +5,6 @@ module EasyML
     class Tuner
       module Adapters
         class XGBoostAdapter < BaseAdapter
-          def defaults
-            {
-              learning_rate: {
-                min: 0.001,
-                max: 0.1,
-                log: true,
-              },
-              n_estimators: {
-                min: 100,
-                max: 1_000,
-              },
-              max_depth: {
-                min: 2,
-                max: 20,
-              },
-            }
-          end
         end
       end
     end