RubyGems - easy_ml - Versions diffs - 0.2.0.pre.rc65 → 0.2.0.pre.rc69 - Mend

easy_ml 0.2.0.pre.rc65 → 0.2.0.pre.rc69

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

checksums.yaml +4 -4
data/app/frontend/components/dataset/PreprocessingConfig.tsx +4 -4
data/app/jobs/easy_ml/compute_feature_job.rb +11 -28
data/app/models/easy_ml/column.rb +10 -2
data/app/models/easy_ml/dataset.rb +8 -3
data/app/models/easy_ml/datasource.rb +2 -0
data/app/models/easy_ml/feature.rb +29 -6
data/lib/easy_ml/feature_store.rb +3 -3
data/lib/easy_ml/version.rb +1 -1
data/public/easy_ml/assets/.vite/manifest.json +1 -1
data/public/easy_ml/assets/assets/entrypoints/{Application.tsx-v1q2Ux1T.js → Application.tsx-CibZcrBc.js} +36 -36
data/public/easy_ml/assets/assets/entrypoints/{Application.tsx-v1q2Ux1T.js.map → Application.tsx-CibZcrBc.js.map} +1 -1
metadata +3 -4
data/app/jobs/easy_ml/finalize_feature_job.rb +0 -13

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: d6993d639004ee88981816cf11422f458d2fa5caa121e760d075c7a73ae70195
-  data.tar.gz: 0e60804c7d59f8c3402be88b6b6ae5e24a7c9542875cbc29f606bbd500227b1f
+  metadata.gz: b4c3878d6cb51daa13de6d41b0480ce8d5f5288266e68866d9e2532de3d372b5
+  data.tar.gz: 62b963738afea40ffa9c00624164c8293b4b6994cd3c21e67162c8e61715c0de
 SHA512:
-  metadata.gz: b11150da87d6dafb5d0a71f0c9b8391012a388ba51eb17544dca044d8616b2d9898fa65ef37e7c8cb22f627669e648b23edc37f6582f5aa40d4619228c57ed02
-  data.tar.gz: 7fd29a43e9a2a15b3388e2c592fe5772d15e394103d7c0651fbe0404abd7bd8637e8ff32f3c38cac4220ce815ab49f369c5767d00dab0cd5ed060a924d3fa8bb
+  metadata.gz: e8a6d8ee4af5fbac1f45b79e5edee2e8f9f4e3807616d05ab98ea9bc3ae42f08d546fb1a7e14b22338df8961716cf54f54300eb33d0a049582e64b7f54ea8d86
+  data.tar.gz: 61b7efd2451e0189f9f8ebb7149ca7707723bdf6cfa28e232b88fe0b286b86d6607fbbc4f222aaf97a7d175ef0e947041b9dd61ec2ef4bcec8e922561dce6c27

data/app/frontend/components/dataset/PreprocessingConfig.tsx CHANGED Viewed

@@ -264,18 +264,18 @@ export function PreprocessingConfig({
   const renderStrategySpecificInfo = (type: 'training' | 'inference') => {
     const strategy = type === 'training' ? training : inference;
     let content;
-    if (strategy.method === 'most_frequent' && column.statistics?.raw.most_frequent_value) {
+    if (strategy.method === 'most_frequent' && column.statistics?.raw.most_frequent_value !== undefined) {
       content = `Most Frequent Value: ${column.statistics.raw.most_frequent_value}`
     } else if (strategy.method === 'ffill') {
       const lastValue = column.statistics?.raw.last_value;
-      if (lastValue !== undefined && lastValue !== null) {
+      if (lastValue !== undefined) {
         content = `Forward Fill using Last Value: ${lastValue}`;
       } else {
         content = 'Set date column & apply preprocessing to see last value';
       }
-    } else if (strategy.method === 'median' && column.statistics?.raw?.median !== undefined && column.statistics?.raw?.median !== null) {
+    } else if (strategy.method === 'median' && column.statistics?.raw?.median !== undefined) {
       content = `Median: ${column.statistics.raw.median}`
-    } else if (strategy.method === 'mean' && column.statistics?.raw?.mean !== undefined && column.statistics?.raw?.mean !== null) {
+    } else if (strategy.method === 'mean' && column.statistics?.raw?.mean !== undefined) {
       content = `Mean: ${column.statistics.raw.mean}`
     } else {
       return null;

data/app/jobs/easy_ml/compute_feature_job.rb CHANGED Viewed

@@ -5,40 +5,23 @@ module EasyML
     @queue = :easy_ml
     def self.perform(batch_id, batch_args = {})
-      begin
-        # This is very, very, very, very, very important
-        # if you don't dup the batch_args, resque-batched-job will
-        # fail in some non-obvious ways, because it will try to
-        # decode to match the original batch args EXACTLY.
-        #
-        # This will waste your time so please just don't remove this .dup!!!
-        #
-        # https://github.com/drfeelngood/resque-batched-job/blob/master/lib/resque/plugins/batched_job.rb#L86
-        batch_args = batch_args.dup
-        run_one_batch(batch_id, batch_args)
-      rescue => e
-        EasyML::Feature.transaction do
-          return if dataset.reload.workflow_status == :failed
-          feature.update(workflow_status: :failed)
-          dataset.update(workflow_status: :failed)
-          build_error_with_context(dataset, e, batch_id, feature)
-        end
-      end
+      # This is very, very, very, very, very important
+      # if you don't dup the batch_args, resque-batched-job will
+      # fail in some non-obvious ways, because it will try to
+      # decode to match the original batch args EXACTLY.
+      #
+      # This will waste your time so please just don't remove this .dup!!!
+      #
+      # https://github.com/drfeelngood/resque-batched-job/blob/master/lib/resque/plugins/batched_job.rb#L86
+      batch_args = batch_args.dup
+      puts "Running batch #{batch_id} with args #{batch_args}"
+      run_one_batch(batch_id, batch_args)
     end
     def self.run_one_batch(batch_id, batch_args)
       EasyML::Feature.fit_one_batch(batch_id, batch_args)
     end
-    def self.build_error_with_context(dataset, error, batch_id, feature)
-      error = EasyML::Event.handle_error(dataset, error)
-      batch = feature.build_batch(batch_id: batch_id)
-      # Convert any dataframes in the context to serialized form
-      error.create_context(context: batch)
-    end
     def self.after_batch_hook(batch_id, *args)
       batch_args = fetch_batch_arguments(batch_id).flatten.map(&:symbolize_keys)
       feature_ids = batch_args.pluck(:feature_id).uniq

data/app/models/easy_ml/column.rb CHANGED Viewed

@@ -43,6 +43,7 @@ module EasyML
     before_save :set_defaults
     before_save :set_feature_lineage
     before_save :set_polars_datatype
+    after_find :ensure_feature_exists
     # Scopes
     scope :visible, -> { where(hidden: false) }
@@ -108,6 +109,13 @@ module EasyML
           }
     scope :is_learning, -> { where(is_learning: true) }
+    def ensure_feature_exists
+      if feature && !feature.has_code?
+        feature.destroy
+        update(feature_id: nil)
+      end
+    end
     def display_attributes
       attributes.except(:statistics)
     end
@@ -282,9 +290,9 @@ module EasyML
       end
     end
-    alias_method :feature, :computing_feature
     def set_feature_lineage
+      return if dataset.nil?
       if dataset.features.computed_column_names.include?(name)
         if computed_by.nil?
           assign_attributes(

data/app/models/easy_ml/dataset.rb CHANGED Viewed

@@ -112,7 +112,7 @@ module EasyML
     end
     def schema
-      read_attribute(:schema) || datasource.schema
+      read_attribute(:schema) || datasource.schema || datasource.after_sync.schema
     end
     def processed_schema
@@ -186,9 +186,12 @@ module EasyML
     def actually_refresh
       refreshing do
+        puts "actually_refresh"
         learn(delete: false) # After syncing datasource, learn new statistics + sync columns
         process_data
+        puts "process_data"
         fully_reload
+        puts "Learning..."
         learn
         learn_statistics(type: :processed) # After processing data, we learn any new statistics
         now = UTC.now
@@ -208,7 +211,9 @@ module EasyML
       return refresh_async if async
       refreshing do
+        puts "prepare.."
         prepare
+        puts "fit features..."
         fit_features(async: async)
       end
     end
@@ -219,13 +224,13 @@ module EasyML
     def fit_features(async: false, features: self.features, force: false)
       features_to_compute = force ? features : features.needs_fit
-      return if features_to_compute.empty?
+      return after_fit_features if features_to_compute.empty?
       features.first.fit(features: features_to_compute, async: async)
     end
     def after_fit_features
-      puts "AFTER FIT FEATURES!"
+      puts "after fit features..."
       unlock!
       reload
       return if failed?

data/app/models/easy_ml/datasource.rb CHANGED Viewed

@@ -122,10 +122,12 @@ module EasyML
       self.refreshed_at = Time.now
       self.sha = adapter.sha
       save
+      self.schema
     end
     def refresh
       unless adapter.needs_refresh?
+        after_sync if schema.nil?
         update(sha: adapter.sha) if sha.nil?
         update!(is_syncing: false)
         return

data/app/models/easy_ml/feature.rb CHANGED Viewed

@@ -82,7 +82,7 @@ module EasyML
             where(id: fittable.map(&:id))
           end
     scope :needs_fit, -> { has_changes.or(never_applied).or(never_fit) }
-    scope :ready_to_apply, -> { where.not(id: needs_fit.map(&:id)) }
+    scope :ready_to_apply, -> { where(needs_fit: false).where.not(id: has_changes.map(&:id)) }
     before_save :apply_defaults, if: :new_record?
     before_save :update_sha
@@ -95,6 +95,10 @@ module EasyML
       raise InvalidFeatureError, "Invalid feature class: #{feature_class}"
     end
+    def has_code?
+      feature_klass.present?
+    end
     def adapter
       @adapter ||= feature_klass.new
     end
@@ -250,18 +254,31 @@ module EasyML
       dataset = feature.dataset
       # Check if any feature has failed before proceeding
-      if dataset.features.any? { |f| f.workflow_status == "failed" }
-        return
-      end
+      return if dataset.features.any? { |f| f.workflow_status == "failed" }
       feature.update(workflow_status: :analyzing) if feature.workflow_status == :ready
       begin
         feature.fit_batch(batch_args.merge!(batch_id: batch_id))
       rescue => e
-        EasyML::Feature.fit_feature_failed(dataset, e)
+        EasyML::Feature.transaction do
+          return if dataset.reload.workflow_status == :failed
+          feature.update(workflow_status: :failed)
+          dataset.update(workflow_status: :failed)
+          build_error_with_context(dataset, e, batch_id, feature)
+        end
         raise e
       end
     end
+    def self.build_error_with_context(dataset, error, batch_id, feature)
+      error = EasyML::Event.handle_error(dataset, error)
+      batch = feature.build_batch(batch_id: batch_id)
+      # Convert any dataframes in the context to serialized form
+      error.create_context(context: batch)
+    end
     def self.fit_feature_failed(dataset, e)
       dataset.update(workflow_status: :failed)
       EasyML::Event.handle_error(dataset, e)
@@ -447,6 +464,8 @@ module EasyML
     end
     def after_fit
+      update_sha
       updates = {
         fit_at: Time.current,
         needs_fit: false,
@@ -508,7 +527,11 @@ module EasyML
     end
     def feature_klass
-      @feature_klass ||= EasyML::Features::Registry.find(feature_class.to_s).dig(:feature_class).constantize
+      begin
+        @feature_klass ||= EasyML::Features::Registry.find(feature_class.to_s).dig(:feature_class).constantize
+      rescue => e
+        nil
+      end
     end
     def config

data/lib/easy_ml/feature_store.rb CHANGED Viewed

@@ -15,10 +15,10 @@ module EasyML
       max_key = df[primary_key].max
       batch_size = feature.batch_size || 10_000
-      # Try to parse as integers if they're strings
       begin
-        min_key = min_key.to_i if min_key.is_a?(String)
-        max_key = max_key.to_i if max_key.is_a?(String)
+        # We are intentionally not using to_i, so it will raise an error for keys like "A1"
+        min_key = Integer(min_key) if min_key.is_a?(String)
+        max_key = Integer(max_key) if max_key.is_a?(String)
       rescue ArgumentError
         return store_without_partitioning(df)
       end

data/lib/easy_ml/version.rb CHANGED Viewed

@@ -1,7 +1,7 @@
 # frozen_string_literal: true
 module EasyML
-  VERSION = "0.2.0-rc65"
+  VERSION = "0.2.0-rc69"
   module Version
   end

data/public/easy_ml/assets/.vite/manifest.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "entrypoints/Application.tsx": {
-    "file": "assets/entrypoints/Application.tsx-v1q2Ux1T.js",
+    "file": "assets/entrypoints/Application.tsx-CibZcrBc.js",
     "name": "entrypoints/Application.tsx",
     "src": "entrypoints/Application.tsx",
     "isEntry": true,