RubyGems - easy_ml - Versions diffs - 0.2.0.pre.rc84 → 0.2.0.pre.rc88 - Mend

easy_ml 0.2.0.pre.rc84 → 0.2.0.pre.rc88

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (58) hide show

checksums.yaml +4 -4
data/app/controllers/easy_ml/datasets_controller.rb +19 -3
data/app/frontend/components/dataset/PreprocessingConfig.tsx +523 -150
data/app/frontend/types/dataset.ts +5 -2
data/app/models/easy_ml/column/imputers/base.rb +23 -2
data/app/models/easy_ml/column/imputers/embedding_encoder.rb +18 -0
data/app/models/easy_ml/column/imputers/imputer.rb +1 -0
data/app/models/easy_ml/column/imputers/most_frequent.rb +1 -1
data/app/models/easy_ml/column/imputers/one_hot_encoder.rb +1 -1
data/app/models/easy_ml/column/imputers/ordinal_encoder.rb +1 -1
data/app/models/easy_ml/column/imputers.rb +47 -41
data/app/models/easy_ml/column/selector.rb +2 -2
data/app/models/easy_ml/column.rb +260 -56
data/app/models/easy_ml/column_history.rb +6 -0
data/app/models/easy_ml/column_list.rb +30 -1
data/app/models/easy_ml/dataset/learner/lazy/embedding.rb +10 -0
data/app/models/easy_ml/dataset/learner/lazy/query.rb +2 -0
data/app/models/easy_ml/dataset/learner.rb +11 -0
data/app/models/easy_ml/dataset.rb +6 -19
data/app/models/easy_ml/lineage_history.rb +17 -0
data/app/models/easy_ml/model.rb +11 -1
data/app/models/easy_ml/models/xgboost.rb +37 -7
data/app/models/easy_ml/pca_model.rb +21 -0
data/app/models/easy_ml/prediction.rb +2 -1
data/app/serializers/easy_ml/column_serializer.rb +13 -1
data/config/initializers/inflections.rb +1 -0
data/lib/easy_ml/data/dataset_manager/writer/append_only.rb +6 -8
data/lib/easy_ml/data/dataset_manager/writer/base.rb +15 -2
data/lib/easy_ml/data/dataset_manager/writer/partitioned.rb +0 -1
data/lib/easy_ml/data/dataset_manager/writer.rb +2 -0
data/lib/easy_ml/data/embeddings/compressor.rb +179 -0
data/lib/easy_ml/data/embeddings/embedder.rb +226 -0
data/lib/easy_ml/data/embeddings.rb +61 -0
data/lib/easy_ml/data/polars_column.rb +3 -0
data/lib/easy_ml/data/polars_reader.rb +54 -23
data/lib/easy_ml/data/polars_schema.rb +28 -2
data/lib/easy_ml/data/splits/file_split.rb +7 -2
data/lib/easy_ml/data.rb +1 -0
data/lib/easy_ml/embedding_store.rb +92 -0
data/lib/easy_ml/engine.rb +4 -2
data/lib/easy_ml/predict.rb +42 -20
data/lib/easy_ml/railtie/generators/migration/migration_generator.rb +5 -0
data/lib/easy_ml/railtie/templates/migration/add_is_primary_key_to_easy_ml_columns.rb.tt +9 -0
data/lib/easy_ml/railtie/templates/migration/add_metadata_to_easy_ml_predictions.rb.tt +6 -0
data/lib/easy_ml/railtie/templates/migration/add_pca_model_id_to_easy_ml_columns.rb.tt +9 -0
data/lib/easy_ml/railtie/templates/migration/add_workflow_status_to_easy_ml_dataset_histories.rb.tt +13 -0
data/lib/easy_ml/railtie/templates/migration/create_easy_ml_pca_models.rb.tt +14 -0
data/lib/easy_ml/version.rb +1 -1
data/lib/easy_ml.rb +1 -0
data/public/easy_ml/assets/.vite/manifest.json +2 -2
data/public/easy_ml/assets/assets/Application-DfPoyRr8.css +1 -0
data/public/easy_ml/assets/assets/entrypoints/Application.tsx-KENNRQpC.js +533 -0
data/public/easy_ml/assets/assets/entrypoints/Application.tsx-KENNRQpC.js.map +1 -0
metadata +59 -6
data/lib/tasks/profile.rake +0 -40
data/public/easy_ml/assets/assets/Application-nnn_XLuL.css +0 -1
data/public/easy_ml/assets/assets/entrypoints/Application.tsx-Bbf3mD_b.js +0 -522
data/public/easy_ml/assets/assets/entrypoints/Application.tsx-Bbf3mD_b.js.map +0 -1

data/lib/easy_ml/data/embeddings.rb ADDED Viewed

@@ -0,0 +1,61 @@
+module EasyML
+  module Data
+    class Embeddings
+      require_relative "embeddings/compressor"
+      require_relative "embeddings/embedder"
+      attr_reader :df, :column, :model, :adapter, :compression,
+                  :embeddings, :compressed_embeddings, :config,
+                  :llm, :output_column, :preset, :dimensions
+      def initialize(options = {})
+        @df = options[:df]
+        @column = options[:column]
+        @output_column = options[:output_column]
+        @llm = options[:llm] || "openai"
+        @config = options[:config] || {}
+        @preset = options.dig(:preset)
+        @dimensions = options.dig(:dimensions)
+        @pca_model = options.dig(:pca_model)
+      end
+      def create
+        embed
+        compress(embeddings)
+      end
+      def embed
+        @embeddings ||= adapter.embed(df, column, output_column)
+      end
+      def compress(embeddings, fit: false)
+        @compressed_embeddings ||= compressor.compress(embeddings, column, output_column, fit: fit)
+      end
+      def pca_model
+        return @pca_model if @pca_model.present?
+        return @compressor.pca_model if @compressor
+        nil
+      end
+      private
+      def adapter
+        @adapter ||= EasyML::Data::Embeddings::Embedder.new(llm, config)
+      end
+      def compressor_args
+        {
+          preset: preset,
+          dimensions: dimensions,
+          pca_model: pca_model,
+        }.compact
+      end
+      def compressor
+        @compressor ||= EasyML::Data::Embeddings::Compressor.new(compressor_args)
+      end
+    end
+  end
+end

data/lib/easy_ml/data/polars_column.rb CHANGED Viewed

@@ -13,6 +13,7 @@ module EasyML
         text: Polars::String,
         categorical: Polars::Categorical,
         null: Polars::Null,
+        array: Polars::List,
       }
       POLARS_MAP = {
         Polars::Float64 => :float,
@@ -25,6 +26,8 @@ module EasyML
         Polars::String => :string,
         Polars::Categorical => :categorical,
         Polars::Null => :null,
+        Polars::List => :array,
+        Polars::Array => :array,
       }.stringify_keys
       include EasyML::Timing

data/lib/easy_ml/data/polars_reader.rb CHANGED Viewed

@@ -88,32 +88,34 @@ module EasyML
       end
       def query(files = nil, drop_cols: [], filter: nil, limit: nil, select: nil, unique: nil, sort: nil, descending: false,
-                             batch_size: nil, batch_start: nil, batch_key: nil, lazy: false, &block)
+                             batch_size: nil, batch_start: nil, batch_key: nil, lazy: false, cast: nil, &block)
         files ||= self.files
         PolarsReader.query(files, drop_cols: drop_cols, filter: filter, limit: limit,
                                   select: select, unique: unique, sort: sort, descending: descending,
-                                  batch_size: batch_size, batch_start: batch_start, batch_key: batch_key, lazy: lazy, &block)
+                                  batch_size: batch_size, batch_start: batch_start, batch_key: batch_key, lazy: lazy, cast: cast, &block)
       end
       def self.query(files, drop_cols: [], filter: nil, limit: nil, select: nil, unique: nil, sort: nil, descending: false,
-                            batch_size: nil, batch_start: nil, batch_key: nil, lazy: false, &block)
+                            batch_size: nil, batch_start: nil, batch_key: nil, lazy: false, cast: nil, &block)
         unless batch_size.present?
           result = query_files(files, drop_cols: drop_cols, filter: filter, limit: limit, select: select,
-                                      unique: unique, sort: sort, descending: descending)
+                                      unique: unique, sort: sort, descending: descending, cast: cast)
           return lazy ? result : result.collect
         end
-        return batch_enumerator(files, drop_cols: drop_cols, filter: filter, limit: limit, select: select, unique: unique, sort: sort, descending: descending,
-                                       batch_size: batch_size, batch_start: batch_start, batch_key: batch_key) unless block_given?
+        unless block_given?
+          return batch_enumerator(files, drop_cols: drop_cols, filter: filter, limit: limit, select: select, unique: unique, sort: sort, descending: descending,
+                                         batch_size: batch_size, batch_start: batch_start, batch_key: batch_key, cast: cast)
+        end
         process_batches(files, drop_cols: drop_cols, filter: filter, limit: limit, select: select, unique: unique, sort: sort, descending: descending,
-                               batch_size: batch_size, batch_start: batch_start, batch_key: batch_key, &block)
+                               batch_size: batch_size, batch_start: batch_start, batch_key: batch_key, cast: cast, &block)
       end
       private
       def self.batch_enumerator(files, drop_cols: [], filter: nil, limit: nil, select: nil, unique: nil, sort: nil, descending: false,
-                                       batch_size: nil, batch_start: nil, batch_key: nil, &block)
+                                       batch_size: nil, batch_start: nil, batch_key: nil, cast: nil, &block)
         Enumerator.new do |yielder|
           process_batches(files, drop_cols: drop_cols, filter: filter, limit: limit, select: select, unique: unique, sort: sort, descending: descending,
                                  batch_size: batch_size, batch_start: batch_start, batch_key: batch_key) do |batch|
@@ -123,27 +125,32 @@ module EasyML
       end
       def self.process_batches(files, drop_cols: [], filter: nil, limit: nil, select: nil, unique: nil, sort: nil, descending: false,
-                                      batch_size: nil, batch_start: nil, batch_key: nil, &block)
+                                      batch_size: nil, batch_start: nil, batch_key: nil, cast: nil, &block)
         batch_key ||= identify_primary_key(files, select: select)
         raise "When using batch_size, sort must match primary key (#{batch_key})" if sort.present? && batch_key != sort
         sort = batch_key
-        batch_start = query_files(files, sort: sort, descending: descending, select: batch_key, limit: 1).collect[batch_key].to_a.last unless batch_start
-        final_value = query_files(files, sort: sort, descending: !descending, select: batch_key, limit: 1).collect[batch_key].to_a.last
+        batch_start ||= query_files(files, sort: sort, descending: descending, select: batch_key, cast: cast,
+                                           limit: 1).collect[batch_key].to_a.last
+        final_value = query_files(files, sort: sort, descending: !descending, select: batch_key, cast: cast,
+                                         limit: 1).collect[batch_key].to_a.last
         is_first_batch = true
         current_start = batch_start
         while current_start < final_value
           filter = is_first_batch ? Polars.col(sort) >= current_start : Polars.col(sort) > current_start
-          batch = query_files(files, drop_cols: drop_cols, filter: filter, limit: batch_size, select: select, unique: unique, sort: sort, descending: descending)
+          batch = query_files(files, drop_cols: drop_cols, filter: filter, limit: batch_size, select: select,
+                                     unique: unique, sort: sort, descending: descending, cast: cast)
           yield batch
-          current_start = query_files(files, sort: sort, descending: descending, limit: batch_size, filter: filter).sort(sort, reverse: !descending).limit(1).select(batch_key).collect[batch_key].to_a.last
+          current_start = query_files(files, sort: sort, descending: descending, limit: batch_size, filter: filter, cast: cast).sort(
+            sort, reverse: !descending,
+          ).limit(1).select(batch_key).collect[batch_key].to_a.last
           is_first_batch = false
         end
       end
-      def self.query_files(files, drop_cols: [], filter: nil, limit: nil, select: nil, unique: nil, sort: nil, descending: false)
+      def self.query_files(files, drop_cols: [], filter: nil, limit: nil, select: nil, unique: nil, sort: nil, descending: false, cast: nil)
         lazy_frames = to_lazy_frames(files)
         combined_lazy_df = Polars.concat(lazy_frames)
@@ -160,6 +167,32 @@ module EasyML
         drop_cols &= combined_lazy_df.columns
         combined_lazy_df = combined_lazy_df.drop(drop_cols) unless drop_cols.empty?
+        if cast && cast.keys.any?
+          schema = combined_lazy_df.schema
+          in_schema = schema.keys & cast.keys
+          cast = cast.select do |col, dtype|
+            in_schema.include?(col) && dtype != schema[col]
+          end
+          combined_lazy_df = combined_lazy_df.with_columns(
+            cast.map do |col, dtype|
+              Polars.col(col).cast(dtype).alias(col)
+            end
+          )
+        end
+        str_types = [Polars::Utf8, Polars::String, Polars::Categorical]
+        str_keys = combined_lazy_df.schema.select { |k, v| v.class.in?(str_types) }
+        # Cast empty strings to null
+        str_keys.each do |k, v|
+          combined_lazy_df = combined_lazy_df.with_columns(
+            Polars.when(
+              Polars.col(k).eq("")
+            ).then(nil)
+              .otherwise(Polars.col(k))
+              .alias(k)
+          )
+        end
         # Collect the DataFrame (execute the lazy operations)
         combined_lazy_df = combined_lazy_df.limit(limit) if limit
         combined_lazy_df
@@ -184,16 +217,12 @@ module EasyML
         if primary_keys.count > 1
           key = primary_keys.detect { |key| key.underscore.split("_").any? { |k| k.match?(/id/) } }
-          if key
-            primary_keys = [key]
-          end
+          primary_keys = [key] if key
         end
-        if primary_keys.count != 1
-          raise "Unable to determine primary key for dataset"
-        end
+        raise "Unable to determine primary key for dataset" if primary_keys.count != 1
-        return primary_keys.first
+        primary_keys.first
       end
       def self.lazy_schema(files)
@@ -249,7 +278,7 @@ module EasyML
         date_cols = (filtered[:dtypes] || {}).select { |k, v| v.class == Polars::Datetime }.keys
         filtered[:dtypes] = (filtered[:dtypes] || {}).reject { |k, v| v.class == Polars::Datetime }.compact.to_h
         filtered = filtered.select { |k, _| supported_params.include?(k) }
-        return filtered, date_cols
+        [filtered, date_cols]
       end
       def csv_files
@@ -261,7 +290,9 @@ module EasyML
       end
       def columns_to_dtypes(columns)
-        columns.reduce({}) { |h, c| h[c.name] = c.polars_type; h }
+        columns.each_with_object({}) do |c, h|
+          h[c.name] = c.polars_type
+        end
       end
       def cast(df, columns = [])

data/lib/easy_ml/data/polars_schema.rb CHANGED Viewed

@@ -22,9 +22,9 @@ module EasyML
         schema.reduce({}) do |h, (key, type_info)|
           h.tap do
             polars_type = PolarsColumn.sym_to_polars(type_info[:type].to_sym)
-            params = type_info[:params]&.transform_keys(&:to_sym) || {}
+            params = deserialize_params(type_info[:params])
-            h[key] = polars_type.new(**params)
+            h[key] = initialize_polars_type(polars_type, params)
           end
         end
       end
@@ -38,6 +38,28 @@ module EasyML
       private
+      def self.initialize_polars_type(polars_type, params)
+        case polars_type.name
+        when "Polars::List"
+          polars_type.new(params[:inner])
+        else
+          polars_type.new(**params)
+        end
+      end
+      def self.deserialize_params(params)
+        params.reduce({}) do |h, (k, param)|
+          h.tap do
+            case k.to_sym
+            when :inner
+              h[:inner] = PolarsColumn.sym_to_polars(param.to_sym)
+            else
+              h[k] = param
+            end
+          end
+        end
+      end
       def self.dtype_params(dtype)
         case dtype
         when Polars::Categorical
@@ -47,6 +69,10 @@ module EasyML
             time_unit: dtype.time_unit,
             time_zone: dtype.time_zone,
           }
+        when Polars::List, Polars::Array
+          {
+            inner: PolarsColumn.polars_to_sym(dtype.inner),
+          }
         else
           {}
         end

data/lib/easy_ml/data/splits/file_split.rb CHANGED Viewed

@@ -22,7 +22,7 @@ module EasyML
         end
         def processed?
-          dir.match?(%r{processed$})
+          dir.match?(/processed$/)
         end
         def raw?
@@ -121,8 +121,12 @@ module EasyML
         end
         def read(segment, split_ys: false, target: nil, drop_cols: [], filter: nil, limit: nil, select: nil,
-                          unique: nil, sort: nil, descending: false, batch_size: nil, batch_start: nil, batch_key: nil, lazy: false, &block)
+                          unique: nil, sort: nil, descending: false, batch_size: nil, batch_start: nil,
+                          batch_key: nil, lazy: false, cast: true, &block)
           files = files_for_segment(segment)
+          if cast == true
+            cast = dataset.columns.cast(processed? ? :processed : :raw)
+          end
           return split_ys ? [nil, nil] : nil if files.empty?
           query_params = {
@@ -136,6 +140,7 @@ module EasyML
             batch_size: batch_size,
             batch_start: batch_start,
             batch_key: batch_key,
+            cast: cast,
             lazy: lazy,
           }.compact

data/lib/easy_ml/data.rb CHANGED Viewed

@@ -3,6 +3,7 @@ module EasyML
     require_relative "data/utils"
     require_relative "data/polars_reader"
     require_relative "data/polars_in_memory"
+    require_relative "data/embeddings"
     require_relative "data/synced_directory"
     require_relative "data/splits"
     require_relative "data/polars_column"

data/lib/easy_ml/embedding_store.rb ADDED Viewed

@@ -0,0 +1,92 @@
+module EasyML
+  class EmbeddingStore
+    attr_reader :column, :dataset, :datasource, :full_store, :compressed_store
+    def initialize(column)
+      @column = column
+      @dataset = column&.dataset
+      @datasource = dataset&.datasource
+      @full_store = EasyML::Data::DatasetManager.new(defaults.merge!(root_dir: embedding_dir(compressed: false)))
+      @compressed_store = EasyML::Data::DatasetManager.new(defaults.merge!(root_dir: embedding_dir(compressed: true)))
+    end
+    def cp(old_version, new_version)
+      false
+    end
+    def wipe
+      full_store.wipe
+      compressed_store.wipe
+    end
+    def files
+      full_store.files + compressed_store.files
+    end
+    def empty?(compressed: false)
+      if compressed
+        compressed_store.empty?
+      else
+        full_store.empty?
+      end
+    end
+    def compact
+      full_store.compact
+      compressed_store.compact
+    end
+    def store(df, compressed: false)
+      df = df.select(column.name, column.embedding_column).filter(Polars.col(column.embedding_column).is_not_null)
+      if compressed
+        compressed_store.store(df)
+      else
+        full_store.store(df)
+      end
+    end
+    def query(**kwargs)
+      compressed = kwargs.delete(:compressed) || false
+      if compressed
+        compressed_store.query(**kwargs).filter(Polars.col(column.embedding_column).is_not_null)
+      else
+        full_store.query(**kwargs).filter(Polars.col(column.embedding_column).is_not_null)
+      end
+    end
+    private
+    def defaults
+      datasource_config = column&.dataset&.datasource&.configuration
+      if datasource_config
+        options = {
+          filenames: "embedding",
+          append_only: true,
+          primary_key: column.name,
+          s3_bucket: datasource_config.dig("s3_bucket") || EasyML::Configuration.s3_bucket,
+          s3_prefix: s3_prefix,
+          polars_args: datasource_config.dig("polars_args"),
+        }.compact
+      else
+        {}
+      end
+    end
+    def embedding_dir(compressed: false)
+      File.join(
+        Rails.root,
+        "easy_ml/datasets",
+        column&.dataset&.name&.parameterize&.gsub("-", "_"),
+        "embeddings",
+        compressed ? "compressed" : "full",
+        column&.name&.parameterize&.gsub("-", "_")
+      )
+    end
+    def s3_prefix
+      File.join("datasets", embedding_dir.split("datasets").last)
+    end
+  end
+end

data/lib/easy_ml/engine.rb CHANGED Viewed

@@ -3,14 +3,12 @@ require "awesome_print"
 require "rails/all"
 require "inertia_rails"
 require "jsonapi/serializer"
-require "numo/narray"
 require "numpy"
 require "parallel"
 require "polars-df"
 require "pycall"
 require "optuna"
 require "wandb"
-require "xgb"
 require "rails/engine"
 require "activerecord-import"
 require "historiographer"
@@ -19,6 +17,10 @@ require "rake"
 require "resque/tasks"
 require "zhong"
 require "dotenv"
+require "langchainrb"
+require "numo/narray"
+require "xgb"
+require "rumale"
 module EasyML
   class Engine < Rails::Engine

data/lib/easy_ml/predict.rb CHANGED Viewed

@@ -19,29 +19,23 @@ module EasyML
     def self.predict(model_name, df, serialize: false)
       df = normalize_input(df)
-      raw_input = df.to_hashes
-      df = instance.normalize(model_name, df)
-      normalized_input = df.to_hashes
-      preds = instance.predict(model_name, df)
-      current_version = instance.get_model(model_name)
+      output = make_predictions(model_name, df) do |model, normalized_df|
+        model.predict(normalized_df)
+      end
-      output = preds.zip(raw_input, normalized_input).map do |pred, raw, norm|
-        EasyML::Prediction.create!(
-          model: current_version.model,
-          model_history: current_version,
-          prediction_type: current_version.model.task,
-          prediction_value: pred,
-          raw_input: raw,
-          normalized_input: norm,
-        )
+      if serialize
+        EasyML::PredictionSerializer.new(output).serializable_hash
+      else
+        output
       end
+    end
-      output = if output.is_a?(Array) && output.count == 1
-          output.first
-        else
-          output
-        end
+    def self.predict_proba(model_name, df, serialize: false)
+      df = normalize_input(df)
+      output = make_predictions(model_name, df) do |model, normalized_df|
+        probas = model.predict_proba(normalized_df)
+        probas.map { |proba_array| proba_array.map { |p| p.round(4) } }
+      end
       if serialize
         EasyML::PredictionSerializer.new(output).serializable_hash
@@ -58,6 +52,10 @@ module EasyML
       get_model(model_name).predict(df)
     end
+    def predict_proba(model_name, df)
+      get_model(model_name).predict_proba(df)
+    end
     def self.validate_input(model_name, df)
       df = normalize_input(df)
       instance.get_model(model_name).dataset.validate_input(df)
@@ -82,6 +80,30 @@ module EasyML
     private
+    def self.make_predictions(model_name, df)
+      raw_input = df.to_hashes
+      normalized_df = instance.normalize(model_name, df)
+      normalized_input = normalized_df.to_hashes
+      current_version = instance.get_model(model_name)
+      predictions = yield(current_version, normalized_df)
+      proba = predictions.is_a?(Array) ? predictions : nil
+      output = predictions.zip(raw_input, normalized_input).map do |pred, raw, norm|
+        EasyML::Prediction.create!(
+          model: current_version.model,
+          model_history: current_version,
+          prediction_type: current_version.model.task,
+          prediction_value: pred,
+          raw_input: raw,
+          normalized_input: norm,
+          metadata: proba ? { probabilities: pred } : {},
+        )
+      end
+      output.count == 1 ? output.first : output
+    end
     def load_model(model_name)
       current_model = EasyML::Model.find_by!(slug: model_name).inference_version

data/lib/easy_ml/railtie/generators/migration/migration_generator.rb CHANGED Viewed

@@ -54,6 +54,11 @@ module EasyML
             add_raw_schema_to_datasets
             remove_evaluator_from_retraining_jobs
             add_unique_constraint_to_easy_ml_model_names
+            add_is_primary_key_to_easy_ml_columns
+            create_easy_ml_pca_models
+            add_pca_model_id_to_easy_ml_columns
+            add_workflow_status_to_easy_ml_dataset_histories
+            add_metadata_to_easy_ml_predictions
           ].freeze
           # Specify the next migration number

data/lib/easy_ml/railtie/templates/migration/add_is_primary_key_to_easy_ml_columns.rb.tt ADDED Viewed

@@ -0,0 +1,9 @@
+class AddIsPrimaryKeyToEasyMLColumns < ActiveRecord::Migration[<%= ActiveRecord::Migration.current_version %>]
+  def change
+    add_column :easy_ml_columns, :is_primary_key, :boolean
+    add_index :easy_ml_columns, :is_primary_key
+    add_column :easy_ml_column_histories, :is_primary_key, :boolean
+    add_index :easy_ml_column_histories, :is_primary_key
+  end
+end

data/lib/easy_ml/railtie/templates/migration/add_metadata_to_easy_ml_predictions.rb.tt ADDED Viewed

@@ -0,0 +1,6 @@
+class AddMetadataToEasyMLPredictions < ActiveRecord::Migration[<%= ActiveRecord::Migration.current_version %>]
+  def change
+    add_column :easy_ml_predictions, :metadata, :jsonb, default: {}, null: false
+    add_index :easy_ml_predictions, :metadata, using: :gin
+  end
+end

data/lib/easy_ml/railtie/templates/migration/add_pca_model_id_to_easy_ml_columns.rb.tt ADDED Viewed

@@ -0,0 +1,9 @@
+class AddPCAModelIdToEasyMLColumns < ActiveRecord::Migration[<%= ActiveRecord::Migration.current_version %>]
+  def change
+    add_column :easy_ml_columns, :pca_model_id, :integer
+    add_index :easy_ml_columns, :pca_model_id
+    add_column :easy_ml_column_histories, :pca_model_id, :integer
+    add_index :easy_ml_column_histories, :pca_model_id
+  end
+end

data/lib/easy_ml/railtie/templates/migration/add_workflow_status_to_easy_ml_dataset_histories.rb.tt ADDED Viewed

@@ -0,0 +1,13 @@
+class AddWorkflowStatusToEasyMLDatasetHistories < ActiveRecord::Migration[<%= ActiveRecord::Migration.current_version %>]
+  def change
+    unless column_exists?(:easy_ml_dataset_histories, :workflow_status)
+      add_column :easy_ml_dataset_histories, :workflow_status, :string
+      add_index :easy_ml_dataset_histories, :workflow_status
+    end
+    unless column_exists?(:easy_ml_feature_histories, :workflow_status)
+      add_column :easy_ml_feature_histories, :workflow_status, :string
+      add_index :easy_ml_feature_histories, :workflow_status
+    end
+  end
+end

data/lib/easy_ml/railtie/templates/migration/create_easy_ml_pca_models.rb.tt ADDED Viewed

@@ -0,0 +1,14 @@
+class CreateEasyMLPCAModels < ActiveRecord::Migration[<%= ActiveRecord::Migration.current_version %>]
+  def change
+    unless table_exists?(:easy_ml_pca_models)
+      create_table :easy_ml_pca_models do |t|
+        t.binary :model, null: false
+        t.datetime :fit_at
+        t.timestamps
+        t.index :created_at
+        t.index :fit_at
+      end
+    end
+  end
+end

data/lib/easy_ml/version.rb CHANGED Viewed

@@ -1,7 +1,7 @@
 # frozen_string_literal: true
 module EasyML
-  VERSION = "0.2.0-rc84"
+  VERSION = "0.2.0-rc88"
   module Version
   end

data/lib/easy_ml.rb CHANGED Viewed

@@ -25,6 +25,7 @@ module EasyML
   require_relative "easy_ml/evaluators"
   require_relative "easy_ml/features"
   require_relative "easy_ml/feature_store"
+  require_relative "easy_ml/embedding_store"
   require_relative "easy_ml/core"
   require_relative "easy_ml/predict"
   require_relative "easy_ml/pending_migrations"

data/public/easy_ml/assets/.vite/manifest.json CHANGED Viewed

@@ -1,11 +1,11 @@
 {
   "entrypoints/Application.tsx": {
-    "file": "assets/entrypoints/Application.tsx-Bbf3mD_b.js",
+    "file": "assets/entrypoints/Application.tsx-KENNRQpC.js",
     "name": "entrypoints/Application.tsx",
     "src": "entrypoints/Application.tsx",
     "isEntry": true,
     "css": [
-      "assets/Application-nnn_XLuL.css"
+      "assets/Application-DfPoyRr8.css"
     ]
   }
 }