easy_ml 0.2.0.pre.rc65 → 0.2.0.pre.rc68
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/app/jobs/easy_ml/compute_feature_job.rb +10 -28
- data/app/models/easy_ml/column.rb +10 -0
- data/app/models/easy_ml/dataset.rb +1 -2
- data/app/models/easy_ml/feature.rb +29 -6
- data/lib/easy_ml/feature_store.rb +3 -3
- data/lib/easy_ml/version.rb +1 -1
- metadata +1 -2
- data/app/jobs/easy_ml/finalize_feature_job.rb +0 -13
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2404bc2b6613d95627dc89265e76351f648d9b45e65f5935a71a0e2334d08d2c
|
4
|
+
data.tar.gz: 072f1d58d6e5d7864e3cdb365701c6fb6e635bc20e3e9ef415fe52772671ce76
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d4e8ab597db1470630e555448fb057af07fab38ff5125fd7ce548db5ba71ca7abac26d1761fb892459890b5c8bce169cd9e165d80b4b91ec6a806dda1a36cbe5
|
7
|
+
data.tar.gz: 71cab456d196c15e03b16e2677cb3b64e08527b2b4acc20ac8ee6c0f0ceea671b74df0d61c174a914cfa85f7e476ad77f264ae78e6b604ed8f50ca2756d84956
|
@@ -5,40 +5,22 @@ module EasyML
|
|
5
5
|
@queue = :easy_ml
|
6
6
|
|
7
7
|
def self.perform(batch_id, batch_args = {})
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
run_one_batch(batch_id, batch_args)
|
19
|
-
rescue => e
|
20
|
-
EasyML::Feature.transaction do
|
21
|
-
return if dataset.reload.workflow_status == :failed
|
22
|
-
|
23
|
-
feature.update(workflow_status: :failed)
|
24
|
-
dataset.update(workflow_status: :failed)
|
25
|
-
build_error_with_context(dataset, e, batch_id, feature)
|
26
|
-
end
|
27
|
-
end
|
8
|
+
# This is very, very, very, very, very important
|
9
|
+
# if you don't dup the batch_args, resque-batched-job will
|
10
|
+
# fail in some non-obvious ways, because it will try to
|
11
|
+
# decode to match the original batch args EXACTLY.
|
12
|
+
#
|
13
|
+
# This will waste your time so please just don't remove this .dup!!!
|
14
|
+
#
|
15
|
+
# https://github.com/drfeelngood/resque-batched-job/blob/master/lib/resque/plugins/batched_job.rb#L86
|
16
|
+
batch_args = batch_args.dup
|
17
|
+
run_one_batch(batch_id, batch_args)
|
28
18
|
end
|
29
19
|
|
30
20
|
def self.run_one_batch(batch_id, batch_args)
|
31
21
|
EasyML::Feature.fit_one_batch(batch_id, batch_args)
|
32
22
|
end
|
33
23
|
|
34
|
-
def self.build_error_with_context(dataset, error, batch_id, feature)
|
35
|
-
error = EasyML::Event.handle_error(dataset, error)
|
36
|
-
batch = feature.build_batch(batch_id: batch_id)
|
37
|
-
|
38
|
-
# Convert any dataframes in the context to serialized form
|
39
|
-
error.create_context(context: batch)
|
40
|
-
end
|
41
|
-
|
42
24
|
def self.after_batch_hook(batch_id, *args)
|
43
25
|
batch_args = fetch_batch_arguments(batch_id).flatten.map(&:symbolize_keys)
|
44
26
|
feature_ids = batch_args.pluck(:feature_id).uniq
|
@@ -43,6 +43,7 @@ module EasyML
|
|
43
43
|
before_save :set_defaults
|
44
44
|
before_save :set_feature_lineage
|
45
45
|
before_save :set_polars_datatype
|
46
|
+
after_find :ensure_feature_exists
|
46
47
|
|
47
48
|
# Scopes
|
48
49
|
scope :visible, -> { where(hidden: false) }
|
@@ -108,6 +109,13 @@ module EasyML
|
|
108
109
|
}
|
109
110
|
scope :is_learning, -> { where(is_learning: true) }
|
110
111
|
|
112
|
+
def ensure_feature_exists
|
113
|
+
if feature && !feature.has_code?
|
114
|
+
feature.destroy
|
115
|
+
update(feature_id: nil)
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
111
119
|
def display_attributes
|
112
120
|
attributes.except(:statistics)
|
113
121
|
end
|
@@ -285,6 +293,8 @@ module EasyML
|
|
285
293
|
alias_method :feature, :computing_feature
|
286
294
|
|
287
295
|
def set_feature_lineage
|
296
|
+
return if dataset.nil?
|
297
|
+
|
288
298
|
if dataset.features.computed_column_names.include?(name)
|
289
299
|
if computed_by.nil?
|
290
300
|
assign_attributes(
|
@@ -219,13 +219,12 @@ module EasyML
|
|
219
219
|
|
220
220
|
def fit_features(async: false, features: self.features, force: false)
|
221
221
|
features_to_compute = force ? features : features.needs_fit
|
222
|
-
return if features_to_compute.empty?
|
222
|
+
return after_fit_features if features_to_compute.empty?
|
223
223
|
|
224
224
|
features.first.fit(features: features_to_compute, async: async)
|
225
225
|
end
|
226
226
|
|
227
227
|
def after_fit_features
|
228
|
-
puts "AFTER FIT FEATURES!"
|
229
228
|
unlock!
|
230
229
|
reload
|
231
230
|
return if failed?
|
@@ -82,7 +82,7 @@ module EasyML
|
|
82
82
|
where(id: fittable.map(&:id))
|
83
83
|
end
|
84
84
|
scope :needs_fit, -> { has_changes.or(never_applied).or(never_fit) }
|
85
|
-
scope :ready_to_apply, -> { where.not(id:
|
85
|
+
scope :ready_to_apply, -> { where(needs_fit: false).where.not(id: has_changes.map(&:id)) }
|
86
86
|
|
87
87
|
before_save :apply_defaults, if: :new_record?
|
88
88
|
before_save :update_sha
|
@@ -95,6 +95,10 @@ module EasyML
|
|
95
95
|
raise InvalidFeatureError, "Invalid feature class: #{feature_class}"
|
96
96
|
end
|
97
97
|
|
98
|
+
def has_code?
|
99
|
+
feature_klass.present?
|
100
|
+
end
|
101
|
+
|
98
102
|
def adapter
|
99
103
|
@adapter ||= feature_klass.new
|
100
104
|
end
|
@@ -250,18 +254,31 @@ module EasyML
|
|
250
254
|
dataset = feature.dataset
|
251
255
|
|
252
256
|
# Check if any feature has failed before proceeding
|
253
|
-
if dataset.features.any? { |f| f.workflow_status == "failed" }
|
254
|
-
|
255
|
-
end
|
257
|
+
return if dataset.features.any? { |f| f.workflow_status == "failed" }
|
258
|
+
|
256
259
|
feature.update(workflow_status: :analyzing) if feature.workflow_status == :ready
|
257
260
|
begin
|
258
261
|
feature.fit_batch(batch_args.merge!(batch_id: batch_id))
|
259
262
|
rescue => e
|
260
|
-
EasyML::Feature.
|
263
|
+
EasyML::Feature.transaction do
|
264
|
+
return if dataset.reload.workflow_status == :failed
|
265
|
+
|
266
|
+
feature.update(workflow_status: :failed)
|
267
|
+
dataset.update(workflow_status: :failed)
|
268
|
+
build_error_with_context(dataset, e, batch_id, feature)
|
269
|
+
end
|
261
270
|
raise e
|
262
271
|
end
|
263
272
|
end
|
264
273
|
|
274
|
+
def self.build_error_with_context(dataset, error, batch_id, feature)
|
275
|
+
error = EasyML::Event.handle_error(dataset, error)
|
276
|
+
batch = feature.build_batch(batch_id: batch_id)
|
277
|
+
|
278
|
+
# Convert any dataframes in the context to serialized form
|
279
|
+
error.create_context(context: batch)
|
280
|
+
end
|
281
|
+
|
265
282
|
def self.fit_feature_failed(dataset, e)
|
266
283
|
dataset.update(workflow_status: :failed)
|
267
284
|
EasyML::Event.handle_error(dataset, e)
|
@@ -447,6 +464,8 @@ module EasyML
|
|
447
464
|
end
|
448
465
|
|
449
466
|
def after_fit
|
467
|
+
update_sha
|
468
|
+
|
450
469
|
updates = {
|
451
470
|
fit_at: Time.current,
|
452
471
|
needs_fit: false,
|
@@ -508,7 +527,11 @@ module EasyML
|
|
508
527
|
end
|
509
528
|
|
510
529
|
def feature_klass
|
511
|
-
|
530
|
+
begin
|
531
|
+
@feature_klass ||= EasyML::Features::Registry.find(feature_class.to_s).dig(:feature_class).constantize
|
532
|
+
rescue => e
|
533
|
+
nil
|
534
|
+
end
|
512
535
|
end
|
513
536
|
|
514
537
|
def config
|
@@ -15,10 +15,10 @@ module EasyML
|
|
15
15
|
max_key = df[primary_key].max
|
16
16
|
batch_size = feature.batch_size || 10_000
|
17
17
|
|
18
|
-
# Try to parse as integers if they're strings
|
19
18
|
begin
|
20
|
-
|
21
|
-
|
19
|
+
# We are intentionally not using to_i, so it will raise an error for keys like "A1"
|
20
|
+
min_key = Integer(min_key) if min_key.is_a?(String)
|
21
|
+
max_key = Integer(max_key) if max_key.is_a?(String)
|
22
22
|
rescue ArgumentError
|
23
23
|
return store_without_partitioning(df)
|
24
24
|
end
|
data/lib/easy_ml/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: easy_ml
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.0.pre.
|
4
|
+
version: 0.2.0.pre.rc68
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Brett Shollenberger
|
@@ -553,7 +553,6 @@ files:
|
|
553
553
|
- app/jobs/easy_ml/clean_job.rb
|
554
554
|
- app/jobs/easy_ml/compute_feature_job.rb
|
555
555
|
- app/jobs/easy_ml/deploy_job.rb
|
556
|
-
- app/jobs/easy_ml/finalize_feature_job.rb
|
557
556
|
- app/jobs/easy_ml/refresh_dataset_job.rb
|
558
557
|
- app/jobs/easy_ml/schedule_retraining_job.rb
|
559
558
|
- app/jobs/easy_ml/sync_datasource_job.rb
|