easy_ml 0.2.0.pre.rc65 → 0.2.0.pre.rc68

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d6993d639004ee88981816cf11422f458d2fa5caa121e760d075c7a73ae70195
4
- data.tar.gz: 0e60804c7d59f8c3402be88b6b6ae5e24a7c9542875cbc29f606bbd500227b1f
3
+ metadata.gz: 2404bc2b6613d95627dc89265e76351f648d9b45e65f5935a71a0e2334d08d2c
4
+ data.tar.gz: 072f1d58d6e5d7864e3cdb365701c6fb6e635bc20e3e9ef415fe52772671ce76
5
5
  SHA512:
6
- metadata.gz: b11150da87d6dafb5d0a71f0c9b8391012a388ba51eb17544dca044d8616b2d9898fa65ef37e7c8cb22f627669e648b23edc37f6582f5aa40d4619228c57ed02
7
- data.tar.gz: 7fd29a43e9a2a15b3388e2c592fe5772d15e394103d7c0651fbe0404abd7bd8637e8ff32f3c38cac4220ce815ab49f369c5767d00dab0cd5ed060a924d3fa8bb
6
+ metadata.gz: d4e8ab597db1470630e555448fb057af07fab38ff5125fd7ce548db5ba71ca7abac26d1761fb892459890b5c8bce169cd9e165d80b4b91ec6a806dda1a36cbe5
7
+ data.tar.gz: 71cab456d196c15e03b16e2677cb3b64e08527b2b4acc20ac8ee6c0f0ceea671b74df0d61c174a914cfa85f7e476ad77f264ae78e6b604ed8f50ca2756d84956
@@ -5,40 +5,22 @@ module EasyML
5
5
  @queue = :easy_ml
6
6
 
7
7
  def self.perform(batch_id, batch_args = {})
8
- begin
9
- # This is very, very, very, very, very important
10
- # if you don't dup the batch_args, resque-batched-job will
11
- # fail in some non-obvious ways, because it will try to
12
- # decode to match the original batch args EXACTLY.
13
- #
14
- # This will waste your time so please just don't remove this .dup!!!
15
- #
16
- # https://github.com/drfeelngood/resque-batched-job/blob/master/lib/resque/plugins/batched_job.rb#L86
17
- batch_args = batch_args.dup
18
- run_one_batch(batch_id, batch_args)
19
- rescue => e
20
- EasyML::Feature.transaction do
21
- return if dataset.reload.workflow_status == :failed
22
-
23
- feature.update(workflow_status: :failed)
24
- dataset.update(workflow_status: :failed)
25
- build_error_with_context(dataset, e, batch_id, feature)
26
- end
27
- end
8
+ # This is very, very, very, very, very important
9
+ # if you don't dup the batch_args, resque-batched-job will
10
+ # fail in some non-obvious ways, because it will try to
11
+ # decode to match the original batch args EXACTLY.
12
+ #
13
+ # This will waste your time so please just don't remove this .dup!!!
14
+ #
15
+ # https://github.com/drfeelngood/resque-batched-job/blob/master/lib/resque/plugins/batched_job.rb#L86
16
+ batch_args = batch_args.dup
17
+ run_one_batch(batch_id, batch_args)
28
18
  end
29
19
 
30
20
  def self.run_one_batch(batch_id, batch_args)
31
21
  EasyML::Feature.fit_one_batch(batch_id, batch_args)
32
22
  end
33
23
 
34
- def self.build_error_with_context(dataset, error, batch_id, feature)
35
- error = EasyML::Event.handle_error(dataset, error)
36
- batch = feature.build_batch(batch_id: batch_id)
37
-
38
- # Convert any dataframes in the context to serialized form
39
- error.create_context(context: batch)
40
- end
41
-
42
24
  def self.after_batch_hook(batch_id, *args)
43
25
  batch_args = fetch_batch_arguments(batch_id).flatten.map(&:symbolize_keys)
44
26
  feature_ids = batch_args.pluck(:feature_id).uniq
@@ -43,6 +43,7 @@ module EasyML
43
43
  before_save :set_defaults
44
44
  before_save :set_feature_lineage
45
45
  before_save :set_polars_datatype
46
+ after_find :ensure_feature_exists
46
47
 
47
48
  # Scopes
48
49
  scope :visible, -> { where(hidden: false) }
@@ -108,6 +109,13 @@ module EasyML
108
109
  }
109
110
  scope :is_learning, -> { where(is_learning: true) }
110
111
 
112
+ def ensure_feature_exists
113
+ if feature && !feature.has_code?
114
+ feature.destroy
115
+ update(feature_id: nil)
116
+ end
117
+ end
118
+
111
119
  def display_attributes
112
120
  attributes.except(:statistics)
113
121
  end
@@ -285,6 +293,8 @@ module EasyML
285
293
  alias_method :feature, :computing_feature
286
294
 
287
295
  def set_feature_lineage
296
+ return if dataset.nil?
297
+
288
298
  if dataset.features.computed_column_names.include?(name)
289
299
  if computed_by.nil?
290
300
  assign_attributes(
@@ -219,13 +219,12 @@ module EasyML
219
219
 
220
220
  def fit_features(async: false, features: self.features, force: false)
221
221
  features_to_compute = force ? features : features.needs_fit
222
- return if features_to_compute.empty?
222
+ return after_fit_features if features_to_compute.empty?
223
223
 
224
224
  features.first.fit(features: features_to_compute, async: async)
225
225
  end
226
226
 
227
227
  def after_fit_features
228
- puts "AFTER FIT FEATURES!"
229
228
  unlock!
230
229
  reload
231
230
  return if failed?
@@ -82,7 +82,7 @@ module EasyML
82
82
  where(id: fittable.map(&:id))
83
83
  end
84
84
  scope :needs_fit, -> { has_changes.or(never_applied).or(never_fit) }
85
- scope :ready_to_apply, -> { where.not(id: needs_fit.map(&:id)) }
85
+ scope :ready_to_apply, -> { where(needs_fit: false).where.not(id: has_changes.map(&:id)) }
86
86
 
87
87
  before_save :apply_defaults, if: :new_record?
88
88
  before_save :update_sha
@@ -95,6 +95,10 @@ module EasyML
95
95
  raise InvalidFeatureError, "Invalid feature class: #{feature_class}"
96
96
  end
97
97
 
98
+ def has_code?
99
+ feature_klass.present?
100
+ end
101
+
98
102
  def adapter
99
103
  @adapter ||= feature_klass.new
100
104
  end
@@ -250,18 +254,31 @@ module EasyML
250
254
  dataset = feature.dataset
251
255
 
252
256
  # Check if any feature has failed before proceeding
253
- if dataset.features.any? { |f| f.workflow_status == "failed" }
254
- return
255
- end
257
+ return if dataset.features.any? { |f| f.workflow_status == "failed" }
258
+
256
259
  feature.update(workflow_status: :analyzing) if feature.workflow_status == :ready
257
260
  begin
258
261
  feature.fit_batch(batch_args.merge!(batch_id: batch_id))
259
262
  rescue => e
260
- EasyML::Feature.fit_feature_failed(dataset, e)
263
+ EasyML::Feature.transaction do
264
+ return if dataset.reload.workflow_status == :failed
265
+
266
+ feature.update(workflow_status: :failed)
267
+ dataset.update(workflow_status: :failed)
268
+ build_error_with_context(dataset, e, batch_id, feature)
269
+ end
261
270
  raise e
262
271
  end
263
272
  end
264
273
 
274
+ def self.build_error_with_context(dataset, error, batch_id, feature)
275
+ error = EasyML::Event.handle_error(dataset, error)
276
+ batch = feature.build_batch(batch_id: batch_id)
277
+
278
+ # Convert any dataframes in the context to serialized form
279
+ error.create_context(context: batch)
280
+ end
281
+
265
282
  def self.fit_feature_failed(dataset, e)
266
283
  dataset.update(workflow_status: :failed)
267
284
  EasyML::Event.handle_error(dataset, e)
@@ -447,6 +464,8 @@ module EasyML
447
464
  end
448
465
 
449
466
  def after_fit
467
+ update_sha
468
+
450
469
  updates = {
451
470
  fit_at: Time.current,
452
471
  needs_fit: false,
@@ -508,7 +527,11 @@ module EasyML
508
527
  end
509
528
 
510
529
  def feature_klass
511
- @feature_klass ||= EasyML::Features::Registry.find(feature_class.to_s).dig(:feature_class).constantize
530
+ begin
531
+ @feature_klass ||= EasyML::Features::Registry.find(feature_class.to_s).dig(:feature_class).constantize
532
+ rescue => e
533
+ nil
534
+ end
512
535
  end
513
536
 
514
537
  def config
@@ -15,10 +15,10 @@ module EasyML
15
15
  max_key = df[primary_key].max
16
16
  batch_size = feature.batch_size || 10_000
17
17
 
18
- # Try to parse as integers if they're strings
19
18
  begin
20
- min_key = min_key.to_i if min_key.is_a?(String)
21
- max_key = max_key.to_i if max_key.is_a?(String)
19
+ # We are intentionally not using to_i, so it will raise an error for keys like "A1"
20
+ min_key = Integer(min_key) if min_key.is_a?(String)
21
+ max_key = Integer(max_key) if max_key.is_a?(String)
22
22
  rescue ArgumentError
23
23
  return store_without_partitioning(df)
24
24
  end
@@ -1,7 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module EasyML
4
- VERSION = "0.2.0-rc65"
4
+ VERSION = "0.2.0-rc68"
5
5
 
6
6
  module Version
7
7
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: easy_ml
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0.pre.rc65
4
+ version: 0.2.0.pre.rc68
5
5
  platform: ruby
6
6
  authors:
7
7
  - Brett Shollenberger
@@ -553,7 +553,6 @@ files:
553
553
  - app/jobs/easy_ml/clean_job.rb
554
554
  - app/jobs/easy_ml/compute_feature_job.rb
555
555
  - app/jobs/easy_ml/deploy_job.rb
556
- - app/jobs/easy_ml/finalize_feature_job.rb
557
556
  - app/jobs/easy_ml/refresh_dataset_job.rb
558
557
  - app/jobs/easy_ml/schedule_retraining_job.rb
559
558
  - app/jobs/easy_ml/sync_datasource_job.rb
@@ -1,13 +0,0 @@
1
- module EasyML
2
- class FinalizeFeatureJob < ApplicationJob
3
- queue_as :features
4
-
5
- def perform(feature_id)
6
- feature = EasyML::Feature.find(feature_id)
7
- feature.update!(
8
- applied_at: Time.current,
9
- needs_fit: false,
10
- )
11
- end
12
- end
13
- end