easy_ml 0.2.0.pre.rc81 → 0.2.0.pre.rc82

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 169873e9ea5e1b00f7a4e499a2aeffc377615757cdd4b5fe6d70c8c454b9d426
4
- data.tar.gz: fc1d4509606f011bd3adbdf367e767a3e9dfc4fdbb6b5cd91bb413f72da364b2
3
+ metadata.gz: ce245d6900c4c5c001c0de9982894ccf6b41faef31e8c958dc540ef05fe426e4
4
+ data.tar.gz: a120f14076a9ff83ca6afb8b0bd651b9ea9ed0185d42f23b82f5de6b2a4de831
5
5
  SHA512:
6
- metadata.gz: f4ea106a66d3185f612e481b607cef21f5453a00ef6eb12558e53f43aa1d68b8f20d1e950c1840d97180ba6271db9b9b42c8a2d480c22ca8c1e33d1b768590d2
7
- data.tar.gz: 24885cdecd46d612be8b8ce7d4bd9889bdf60420bc82c01993cfe0168e454ebdaaa70899f5cf9cc879ff89a895fd00b1761d01c945dafffc784f7bc1e4a2fb8e
6
+ metadata.gz: db2b7292bf07b5122a7949a111c56a25f8848496eaadfc94f101a271054fd4c21d401b4c941f69d7579c8cdc9887e68c4105f11720dba13aa7fd0fcdafb79b81
7
+ data.tar.gz: f5dc8ffee52fb67b6cda8e4462e540900d528dd9c15ff49faa8ca45f21f9e4968129b996a5018a91801aae431a65b5274d64e69045e5feeeec6f12f09900cda3
@@ -10,13 +10,13 @@ module EasyML
10
10
 
11
11
  @last_activity = Time.current
12
12
  setup_signal_traps
13
- @monitor_thread = start_monitor_thread
13
+ # @monitor_thread = start_monitor_thread
14
14
 
15
15
  @model.actually_train do |iteration_info|
16
16
  @last_activity = Time.current
17
17
  end
18
18
  ensure
19
- @monitor_thread&.exit
19
+ # @monitor_thread&.exit
20
20
  @model.unlock!
21
21
  end
22
22
 
@@ -81,7 +81,10 @@ module EasyML
81
81
  fittable = fittable.select(&:fittable?)
82
82
  where(id: fittable.map(&:id))
83
83
  end
84
- scope :needs_fit, -> { has_changes.or(never_applied).or(never_fit) }
84
+ scope :needs_fit, -> { has_changes.or(never_applied).or(never_fit).or(datasource_was_refreshed) }
85
+ scope :datasource_was_refreshed, -> do
86
+ where(id: all.select(&:datasource_was_refreshed?).map(&:id))
87
+ end
85
88
  scope :ready_to_apply, -> do
86
89
  base = where(needs_fit: false).where.not(id: has_changes.map(&:id))
87
90
  doesnt_fit = where_no_fit
@@ -144,6 +147,7 @@ module EasyML
144
147
  end
145
148
 
146
149
  def datasource_was_refreshed?
150
+ return false unless fittable?
147
151
  return true if fit_at.nil?
148
152
  return false if dataset.datasource.refreshed_at.nil?
149
153
 
@@ -624,8 +624,8 @@ module EasyML
624
624
  private
625
625
 
626
626
  def default_evaluation_inputs
627
- x_true, y_true = dataset.test(split_ys: true)
628
- ds = dataset.test(all_columns: true)
627
+ x_true, y_true = dataset.processed.test(split_ys: true, all_columns: true)
628
+ ds = dataset.processed.test(all_columns: true)
629
629
  y_pred = predict(x_true)
630
630
  {
631
631
  x_true: x_true,
@@ -50,7 +50,7 @@ module EasyML
50
50
  x_valid = x_valid.select(model.dataset.col_order(inference: true))
51
51
  @preprocessed ||= model.preprocess(x_valid, y_valid)
52
52
  y_pred = model.predict(@preprocessed)
53
- dataset = model.dataset.valid(all_columns: true)
53
+ dataset = model.dataset.processed.valid(all_columns: true)
54
54
 
55
55
  metrics = model.evaluate(y_pred: y_pred, y_true: y_valid, x_true: x_valid, dataset: dataset)
56
56
  Wandb.log(metrics)
@@ -427,11 +427,11 @@ module EasyML
427
427
  def prepare_data
428
428
  if @d_train.nil?
429
429
  col_order = dataset.col_order
430
- x_sample, y_sample = dataset.train(split_ys: true, limit: 5, select: col_order, lazy: true)
430
+ x_sample, y_sample = dataset.processed.train(split_ys: true, limit: 5, select: col_order, lazy: true)
431
431
  preprocess(x_sample, y_sample) # Ensure we fail fast if the dataset is misconfigured
432
- x_train, y_train = dataset.train(split_ys: true, select: col_order, lazy: true)
433
- x_valid, y_valid = dataset.valid(split_ys: true, select: col_order, lazy: true)
434
- x_test, y_test = dataset.test(split_ys: true, select: col_order, lazy: true)
432
+ x_train, y_train = dataset.processed.train(split_ys: true, select: col_order, lazy: true)
433
+ x_valid, y_valid = dataset.processed.valid(split_ys: true, select: col_order, lazy: true)
434
+ x_test, y_test = dataset.processed.test(split_ys: true, select: col_order, lazy: true)
435
435
  @d_train = preprocess(x_train, y_train)
436
436
  @d_valid = preprocess(x_valid, y_valid)
437
437
  @d_test = preprocess(x_test, y_test)
@@ -457,7 +457,7 @@ module EasyML
457
457
  lazy = xs.is_a?(Polars::LazyFrame)
458
458
  return xs if (lazy ? xs.limit(1).collect : xs).shape[0] == 0
459
459
 
460
- weights_col = model.weights_column || nil
460
+ weights_col = (model.weights_column.nil? || model.weights_column.blank?) ? nil : model.weights_column
461
461
 
462
462
  if weights_col == model.dataset.target
463
463
  raise ArgumentError, "Weight column cannot be the target column"
@@ -468,11 +468,7 @@ module EasyML
468
468
  feature_cols -= [weights_col] if weights_col
469
469
 
470
470
  # Get features, labels and weights
471
- begin
472
- features = lazy ? xs.select(feature_cols).collect.to_numo : xs.select(feature_cols).to_numo
473
- rescue => e
474
- binding.pry
475
- end
471
+ features = lazy ? xs.select(feature_cols).collect.to_numo : xs.select(feature_cols).to_numo
476
472
  weights = weights_col ? (lazy ? xs.select(weights_col).collect.to_numo : xs.select(weights_col).to_numo) : nil
477
473
  weights = weights.flatten if weights
478
474
  if ys.present?
@@ -73,13 +73,13 @@ module EasyML
73
73
  model.task = task
74
74
 
75
75
  model.dataset.refresh if model.dataset.needs_refresh?
76
- x_valid, y_valid = model.dataset.valid(split_ys: true, all_columns: true)
76
+ x_valid, y_valid = model.dataset.processed.valid(split_ys: true, all_columns: true)
77
77
  x_normalized = model.dataset.normalize(x_valid, inference: true)
78
78
  x_normalized = model.preprocess(x_normalized)
79
79
  self.x_valid = x_valid
80
80
  self.y_valid = y_valid
81
81
  self.x_normalized = x_normalized
82
- self.dataset = model.dataset.valid(all_columns: true)
82
+ self.dataset = model.dataset.processed.valid(all_columns: true)
83
83
  adapter.tune_started_at = tune_started_at
84
84
  adapter.x_valid = x_valid
85
85
  adapter.y_valid = y_valid
@@ -95,7 +95,7 @@ module EasyML
95
95
  keylist = unique_id_key(subdir: "keylist")
96
96
 
97
97
  acquire_lock(keylist) do |suo|
98
- suo.client.sadd(keylist, key)
98
+ suo.client.sadd?(keylist, key)
99
99
  end
100
100
  end
101
101
 
@@ -1,7 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module EasyML
4
- VERSION = "0.2.0-rc81"
4
+ VERSION = "0.2.0-rc82"
5
5
 
6
6
  module Version
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: easy_ml
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0.pre.rc81
4
+ version: 0.2.0.pre.rc82
5
5
  platform: ruby
6
6
  authors:
7
7
  - Brett Shollenberger
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2025-02-19 00:00:00.000000000 Z
11
+ date: 2025-02-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activerecord