easy_ml 0.2.0.pre.rc81 → 0.2.0.pre.rc83
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/app/models/easy_ml/feature.rb +6 -1
- data/app/models/easy_ml/model.rb +2 -2
- data/app/models/easy_ml/models/xgboost/evals_callback.rb +1 -1
- data/app/models/easy_ml/models/xgboost.rb +6 -10
- data/lib/easy_ml/core/tuner.rb +2 -2
- data/lib/easy_ml/data/embeddings/compression.rb +1 -0
- data/lib/easy_ml/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2d5a13f7e4d772c20491b6c8c1ffd52674e0e29972ead72409904118fdc33035
|
4
|
+
data.tar.gz: 56f66ab6a77dbadb6512e92fe1cf4651419ad46c7b2be42c12ea219e70f7991b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 53fdbb0198863035ba8ac69b36936ce20a5d7a3fe057f13f8cb95e67e437f31ff5606e8f976a71f9d7772a788ce986c6e0afb61c5a76ef2eac5163e736144d9d
|
7
|
+
data.tar.gz: cbfc250d20481840fe45342df010e9f2a1adb8fe10d963a068d127d784e6b20e72c4ae7b99d8ece5b71c7e32a1ace81bb6d71c10f1a3ad019b7ec987ef891dd0
|
@@ -34,6 +34,7 @@ module EasyML
|
|
34
34
|
def compute_sha(feature_class)
|
35
35
|
require "digest"
|
36
36
|
path = feature_class.constantize.instance_method(:transform).source_location.first
|
37
|
+
return nil unless File.exist?(path)
|
37
38
|
current_mtime = File.mtime(path)
|
38
39
|
cache_key = "feature_sha/#{path}"
|
39
40
|
|
@@ -81,7 +82,10 @@ module EasyML
|
|
81
82
|
fittable = fittable.select(&:fittable?)
|
82
83
|
where(id: fittable.map(&:id))
|
83
84
|
end
|
84
|
-
scope :needs_fit, -> { has_changes.or(never_applied).or(never_fit) }
|
85
|
+
scope :needs_fit, -> { has_changes.or(never_applied).or(never_fit).or(datasource_was_refreshed) }
|
86
|
+
scope :datasource_was_refreshed, -> do
|
87
|
+
where(id: all.select(&:datasource_was_refreshed?).map(&:id))
|
88
|
+
end
|
85
89
|
scope :ready_to_apply, -> do
|
86
90
|
base = where(needs_fit: false).where.not(id: has_changes.map(&:id))
|
87
91
|
doesnt_fit = where_no_fit
|
@@ -144,6 +148,7 @@ module EasyML
|
|
144
148
|
end
|
145
149
|
|
146
150
|
def datasource_was_refreshed?
|
151
|
+
return false unless fittable?
|
147
152
|
return true if fit_at.nil?
|
148
153
|
return false if dataset.datasource.refreshed_at.nil?
|
149
154
|
|
data/app/models/easy_ml/model.rb
CHANGED
@@ -624,8 +624,8 @@ module EasyML
|
|
624
624
|
private
|
625
625
|
|
626
626
|
def default_evaluation_inputs
|
627
|
-
x_true, y_true = dataset.test(split_ys: true)
|
628
|
-
ds = dataset.test(all_columns: true)
|
627
|
+
x_true, y_true = dataset.processed.test(split_ys: true, all_columns: true)
|
628
|
+
ds = dataset.processed.test(all_columns: true)
|
629
629
|
y_pred = predict(x_true)
|
630
630
|
{
|
631
631
|
x_true: x_true,
|
@@ -50,7 +50,7 @@ module EasyML
|
|
50
50
|
x_valid = x_valid.select(model.dataset.col_order(inference: true))
|
51
51
|
@preprocessed ||= model.preprocess(x_valid, y_valid)
|
52
52
|
y_pred = model.predict(@preprocessed)
|
53
|
-
dataset = model.dataset.valid(all_columns: true)
|
53
|
+
dataset = model.dataset.processed.valid(all_columns: true)
|
54
54
|
|
55
55
|
metrics = model.evaluate(y_pred: y_pred, y_true: y_valid, x_true: x_valid, dataset: dataset)
|
56
56
|
Wandb.log(metrics)
|
@@ -427,11 +427,11 @@ module EasyML
|
|
427
427
|
def prepare_data
|
428
428
|
if @d_train.nil?
|
429
429
|
col_order = dataset.col_order
|
430
|
-
x_sample, y_sample = dataset.train(split_ys: true, limit: 5, select: col_order, lazy: true)
|
430
|
+
x_sample, y_sample = dataset.processed.train(split_ys: true, limit: 5, select: col_order, lazy: true)
|
431
431
|
preprocess(x_sample, y_sample) # Ensure we fail fast if the dataset is misconfigured
|
432
|
-
x_train, y_train = dataset.train(split_ys: true, select: col_order, lazy: true)
|
433
|
-
x_valid, y_valid = dataset.valid(split_ys: true, select: col_order, lazy: true)
|
434
|
-
x_test, y_test = dataset.test(split_ys: true, select: col_order, lazy: true)
|
432
|
+
x_train, y_train = dataset.processed.train(split_ys: true, select: col_order, lazy: true)
|
433
|
+
x_valid, y_valid = dataset.processed.valid(split_ys: true, select: col_order, lazy: true)
|
434
|
+
x_test, y_test = dataset.processed.test(split_ys: true, select: col_order, lazy: true)
|
435
435
|
@d_train = preprocess(x_train, y_train)
|
436
436
|
@d_valid = preprocess(x_valid, y_valid)
|
437
437
|
@d_test = preprocess(x_test, y_test)
|
@@ -457,7 +457,7 @@ module EasyML
|
|
457
457
|
lazy = xs.is_a?(Polars::LazyFrame)
|
458
458
|
return xs if (lazy ? xs.limit(1).collect : xs).shape[0] == 0
|
459
459
|
|
460
|
-
weights_col = model.weights_column || nil
|
460
|
+
weights_col = (model.weights_column.nil? || model.weights_column.blank?) ? nil : model.weights_column
|
461
461
|
|
462
462
|
if weights_col == model.dataset.target
|
463
463
|
raise ArgumentError, "Weight column cannot be the target column"
|
@@ -468,11 +468,7 @@ module EasyML
|
|
468
468
|
feature_cols -= [weights_col] if weights_col
|
469
469
|
|
470
470
|
# Get features, labels and weights
|
471
|
-
|
472
|
-
features = lazy ? xs.select(feature_cols).collect.to_numo : xs.select(feature_cols).to_numo
|
473
|
-
rescue => e
|
474
|
-
binding.pry
|
475
|
-
end
|
471
|
+
features = lazy ? xs.select(feature_cols).collect.to_numo : xs.select(feature_cols).to_numo
|
476
472
|
weights = weights_col ? (lazy ? xs.select(weights_col).collect.to_numo : xs.select(weights_col).to_numo) : nil
|
477
473
|
weights = weights.flatten if weights
|
478
474
|
if ys.present?
|
data/lib/easy_ml/core/tuner.rb
CHANGED
@@ -73,13 +73,13 @@ module EasyML
|
|
73
73
|
model.task = task
|
74
74
|
|
75
75
|
model.dataset.refresh if model.dataset.needs_refresh?
|
76
|
-
x_valid, y_valid = model.dataset.valid(split_ys: true, all_columns: true)
|
76
|
+
x_valid, y_valid = model.dataset.processed.valid(split_ys: true, all_columns: true)
|
77
77
|
x_normalized = model.dataset.normalize(x_valid, inference: true)
|
78
78
|
x_normalized = model.preprocess(x_normalized)
|
79
79
|
self.x_valid = x_valid
|
80
80
|
self.y_valid = y_valid
|
81
81
|
self.x_normalized = x_normalized
|
82
|
-
self.dataset = model.dataset.valid(all_columns: true)
|
82
|
+
self.dataset = model.dataset.processed.valid(all_columns: true)
|
83
83
|
adapter.tune_started_at = tune_started_at
|
84
84
|
adapter.x_valid = x_valid
|
85
85
|
adapter.y_valid = y_valid
|
@@ -0,0 +1 @@
|
|
1
|
+
|
data/lib/easy_ml/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: easy_ml
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.0.pre.
|
4
|
+
version: 0.2.0.pre.rc83
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Brett Shollenberger
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-02-
|
11
|
+
date: 2025-02-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activerecord
|