easy_ml 0.2.0.pre.rc81 → 0.2.0.pre.rc83

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 169873e9ea5e1b00f7a4e499a2aeffc377615757cdd4b5fe6d70c8c454b9d426
4
- data.tar.gz: fc1d4509606f011bd3adbdf367e767a3e9dfc4fdbb6b5cd91bb413f72da364b2
3
+ metadata.gz: 2d5a13f7e4d772c20491b6c8c1ffd52674e0e29972ead72409904118fdc33035
4
+ data.tar.gz: 56f66ab6a77dbadb6512e92fe1cf4651419ad46c7b2be42c12ea219e70f7991b
5
5
  SHA512:
6
- metadata.gz: f4ea106a66d3185f612e481b607cef21f5453a00ef6eb12558e53f43aa1d68b8f20d1e950c1840d97180ba6271db9b9b42c8a2d480c22ca8c1e33d1b768590d2
7
- data.tar.gz: 24885cdecd46d612be8b8ce7d4bd9889bdf60420bc82c01993cfe0168e454ebdaaa70899f5cf9cc879ff89a895fd00b1761d01c945dafffc784f7bc1e4a2fb8e
6
+ metadata.gz: 53fdbb0198863035ba8ac69b36936ce20a5d7a3fe057f13f8cb95e67e437f31ff5606e8f976a71f9d7772a788ce986c6e0afb61c5a76ef2eac5163e736144d9d
7
+ data.tar.gz: cbfc250d20481840fe45342df010e9f2a1adb8fe10d963a068d127d784e6b20e72c4ae7b99d8ece5b71c7e32a1ace81bb6d71c10f1a3ad019b7ec987ef891dd0
@@ -34,6 +34,7 @@ module EasyML
34
34
  def compute_sha(feature_class)
35
35
  require "digest"
36
36
  path = feature_class.constantize.instance_method(:transform).source_location.first
37
+ return nil unless File.exist?(path)
37
38
  current_mtime = File.mtime(path)
38
39
  cache_key = "feature_sha/#{path}"
39
40
 
@@ -81,7 +82,10 @@ module EasyML
81
82
  fittable = fittable.select(&:fittable?)
82
83
  where(id: fittable.map(&:id))
83
84
  end
84
- scope :needs_fit, -> { has_changes.or(never_applied).or(never_fit) }
85
+ scope :needs_fit, -> { has_changes.or(never_applied).or(never_fit).or(datasource_was_refreshed) }
86
+ scope :datasource_was_refreshed, -> do
87
+ where(id: all.select(&:datasource_was_refreshed?).map(&:id))
88
+ end
85
89
  scope :ready_to_apply, -> do
86
90
  base = where(needs_fit: false).where.not(id: has_changes.map(&:id))
87
91
  doesnt_fit = where_no_fit
@@ -144,6 +148,7 @@ module EasyML
144
148
  end
145
149
 
146
150
  def datasource_was_refreshed?
151
+ return false unless fittable?
147
152
  return true if fit_at.nil?
148
153
  return false if dataset.datasource.refreshed_at.nil?
149
154
 
@@ -624,8 +624,8 @@ module EasyML
624
624
  private
625
625
 
626
626
  def default_evaluation_inputs
627
- x_true, y_true = dataset.test(split_ys: true)
628
- ds = dataset.test(all_columns: true)
627
+ x_true, y_true = dataset.processed.test(split_ys: true, all_columns: true)
628
+ ds = dataset.processed.test(all_columns: true)
629
629
  y_pred = predict(x_true)
630
630
  {
631
631
  x_true: x_true,
@@ -50,7 +50,7 @@ module EasyML
50
50
  x_valid = x_valid.select(model.dataset.col_order(inference: true))
51
51
  @preprocessed ||= model.preprocess(x_valid, y_valid)
52
52
  y_pred = model.predict(@preprocessed)
53
- dataset = model.dataset.valid(all_columns: true)
53
+ dataset = model.dataset.processed.valid(all_columns: true)
54
54
 
55
55
  metrics = model.evaluate(y_pred: y_pred, y_true: y_valid, x_true: x_valid, dataset: dataset)
56
56
  Wandb.log(metrics)
@@ -427,11 +427,11 @@ module EasyML
427
427
  def prepare_data
428
428
  if @d_train.nil?
429
429
  col_order = dataset.col_order
430
- x_sample, y_sample = dataset.train(split_ys: true, limit: 5, select: col_order, lazy: true)
430
+ x_sample, y_sample = dataset.processed.train(split_ys: true, limit: 5, select: col_order, lazy: true)
431
431
  preprocess(x_sample, y_sample) # Ensure we fail fast if the dataset is misconfigured
432
- x_train, y_train = dataset.train(split_ys: true, select: col_order, lazy: true)
433
- x_valid, y_valid = dataset.valid(split_ys: true, select: col_order, lazy: true)
434
- x_test, y_test = dataset.test(split_ys: true, select: col_order, lazy: true)
432
+ x_train, y_train = dataset.processed.train(split_ys: true, select: col_order, lazy: true)
433
+ x_valid, y_valid = dataset.processed.valid(split_ys: true, select: col_order, lazy: true)
434
+ x_test, y_test = dataset.processed.test(split_ys: true, select: col_order, lazy: true)
435
435
  @d_train = preprocess(x_train, y_train)
436
436
  @d_valid = preprocess(x_valid, y_valid)
437
437
  @d_test = preprocess(x_test, y_test)
@@ -457,7 +457,7 @@ module EasyML
457
457
  lazy = xs.is_a?(Polars::LazyFrame)
458
458
  return xs if (lazy ? xs.limit(1).collect : xs).shape[0] == 0
459
459
 
460
- weights_col = model.weights_column || nil
460
+ weights_col = (model.weights_column.nil? || model.weights_column.blank?) ? nil : model.weights_column
461
461
 
462
462
  if weights_col == model.dataset.target
463
463
  raise ArgumentError, "Weight column cannot be the target column"
@@ -468,11 +468,7 @@ module EasyML
468
468
  feature_cols -= [weights_col] if weights_col
469
469
 
470
470
  # Get features, labels and weights
471
- begin
472
- features = lazy ? xs.select(feature_cols).collect.to_numo : xs.select(feature_cols).to_numo
473
- rescue => e
474
- binding.pry
475
- end
471
+ features = lazy ? xs.select(feature_cols).collect.to_numo : xs.select(feature_cols).to_numo
476
472
  weights = weights_col ? (lazy ? xs.select(weights_col).collect.to_numo : xs.select(weights_col).to_numo) : nil
477
473
  weights = weights.flatten if weights
478
474
  if ys.present?
@@ -73,13 +73,13 @@ module EasyML
73
73
  model.task = task
74
74
 
75
75
  model.dataset.refresh if model.dataset.needs_refresh?
76
- x_valid, y_valid = model.dataset.valid(split_ys: true, all_columns: true)
76
+ x_valid, y_valid = model.dataset.processed.valid(split_ys: true, all_columns: true)
77
77
  x_normalized = model.dataset.normalize(x_valid, inference: true)
78
78
  x_normalized = model.preprocess(x_normalized)
79
79
  self.x_valid = x_valid
80
80
  self.y_valid = y_valid
81
81
  self.x_normalized = x_normalized
82
- self.dataset = model.dataset.valid(all_columns: true)
82
+ self.dataset = model.dataset.processed.valid(all_columns: true)
83
83
  adapter.tune_started_at = tune_started_at
84
84
  adapter.x_valid = x_valid
85
85
  adapter.y_valid = y_valid
@@ -1,7 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module EasyML
4
- VERSION = "0.2.0-rc81"
4
+ VERSION = "0.2.0-rc83"
5
5
 
6
6
  module Version
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: easy_ml
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0.pre.rc81
4
+ version: 0.2.0.pre.rc83
5
5
  platform: ruby
6
6
  authors:
7
7
  - Brett Shollenberger
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2025-02-19 00:00:00.000000000 Z
11
+ date: 2025-02-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activerecord