easy_ml 0.2.0.pre.rc77 → 0.2.0.pre.rc78

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/app/controllers/easy_ml/models_controller.rb +3 -2
  3. data/app/frontend/components/ModelForm.tsx +16 -0
  4. data/app/frontend/components/ScheduleModal.tsx +0 -2
  5. data/app/frontend/components/dataset/PreprocessingConfig.tsx +7 -6
  6. data/app/jobs/easy_ml/application_job.rb +1 -0
  7. data/app/jobs/easy_ml/batch_job.rb +47 -6
  8. data/app/jobs/easy_ml/compute_feature_job.rb +10 -10
  9. data/app/jobs/easy_ml/reaper.rb +14 -10
  10. data/app/jobs/easy_ml/refresh_dataset_job.rb +2 -0
  11. data/app/jobs/easy_ml/sync_datasource_job.rb +1 -0
  12. data/app/models/concerns/easy_ml/dataframe_serialization.rb +1 -17
  13. data/app/models/easy_ml/column/imputers/base.rb +1 -1
  14. data/app/models/easy_ml/column/imputers/today.rb +1 -1
  15. data/app/models/easy_ml/column/selector.rb +0 -8
  16. data/app/models/easy_ml/column.rb +1 -1
  17. data/app/models/easy_ml/dataset/learner/base.rb +2 -2
  18. data/app/models/easy_ml/dataset/learner/eager.rb +3 -1
  19. data/app/models/easy_ml/dataset/learner/lazy.rb +4 -1
  20. data/app/models/easy_ml/dataset.rb +25 -27
  21. data/app/models/easy_ml/datasource.rb +0 -6
  22. data/app/models/easy_ml/feature.rb +12 -3
  23. data/app/models/easy_ml/model.rb +20 -2
  24. data/app/models/easy_ml/models/xgboost/evals_callback.rb +3 -2
  25. data/app/models/easy_ml/models/xgboost.rb +52 -36
  26. data/app/models/easy_ml/retraining_run.rb +1 -1
  27. data/app/serializers/easy_ml/model_serializer.rb +1 -0
  28. data/lib/easy_ml/core/tuner.rb +7 -4
  29. data/lib/easy_ml/data/dataset_manager/writer/base.rb +26 -9
  30. data/lib/easy_ml/data/dataset_manager/writer.rb +5 -1
  31. data/lib/easy_ml/data/dataset_manager.rb +8 -2
  32. data/lib/easy_ml/data/polars_column.rb +19 -5
  33. data/lib/easy_ml/engine.rb +16 -14
  34. data/lib/easy_ml/feature_store.rb +19 -16
  35. data/lib/easy_ml/support/lockable.rb +1 -5
  36. data/lib/easy_ml/version.rb +1 -1
  37. data/public/easy_ml/assets/.vite/manifest.json +1 -1
  38. data/public/easy_ml/assets/assets/entrypoints/Application.tsx-Bbf3mD_b.js +522 -0
  39. data/public/easy_ml/assets/assets/entrypoints/{Application.tsx-B1qLZuyu.js.map → Application.tsx-Bbf3mD_b.js.map} +1 -1
  40. metadata +6 -7
  41. data/app/models/easy_ml/datasources/polars_datasource.rb +0 -69
  42. data/public/easy_ml/assets/assets/entrypoints/Application.tsx-B1qLZuyu.js +0 -522
@@ -45,7 +45,7 @@ module EasyML
45
45
  MODEL_NAMES = MODEL_OPTIONS.keys.freeze
46
46
  MODEL_CONSTANTS = MODEL_OPTIONS.values.map(&:constantize)
47
47
 
48
- add_configuration_attributes :task, :objective, :hyperparameters, :callbacks, :metrics
48
+ add_configuration_attributes :task, :objective, :hyperparameters, :callbacks, :metrics, :weights_column
49
49
  MODEL_CONSTANTS.flat_map(&:configuration_attributes).each do |attribute|
50
50
  add_configuration_attributes attribute
51
51
  end
@@ -179,6 +179,8 @@ module EasyML
179
179
  end
180
180
 
181
181
  def actually_train(&progress_block)
182
+ raise untrainable_error unless trainable?
183
+
182
184
  lock_model do
183
185
  run = pending_run
184
186
  run.wrap_training do
@@ -258,7 +260,7 @@ module EasyML
258
260
 
259
261
  def formatted_version
260
262
  return nil unless version
261
- Time.strptime(version, "%Y%m%d%H%M%S").strftime("%B %-d, %Y at %-l:%M %p")
263
+ UTC.parse(version).in_time_zone(EasyML::Configuration.timezone).strftime("%B %-d, %Y at %-l:%M %p")
262
264
  end
263
265
 
264
266
  def last_run_at
@@ -277,6 +279,22 @@ module EasyML
277
279
  alias_method :latest_version, :inference_version
278
280
  alias_method :deployed, :inference_version
279
281
 
282
+ def trainable?
283
+ adapter.trainable?
284
+ end
285
+
286
+ def untrainable_columns
287
+ adapter.untrainable_columns
288
+ end
289
+
290
+ def untrainable_error
291
+ %Q(
292
+ Cannot train dataset containing null values!
293
+ Apply preprocessing to the following columns:
294
+ #{untrainable_columns.join(", ")}
295
+ )
296
+ end
297
+
280
298
  def predict(xs)
281
299
  load_model!
282
300
  unless xs.is_a?(XGBoost::DMatrix)
@@ -36,7 +36,7 @@ module EasyML
36
36
  if tuner.present?
37
37
  [tuner.x_valid, tuner.y_valid]
38
38
  else
39
- model.dataset.valid(split_ys: true)
39
+ model.dataset.valid(split_ys: true, lazy: true)
40
40
  end
41
41
  end
42
42
 
@@ -47,7 +47,8 @@ module EasyML
47
47
  if epoch % log_frequency == 0
48
48
  model.adapter.external_model = booster
49
49
  x_valid, y_valid = valid_dataset
50
- @preprocessed ||= model.preprocess(x_valid)
50
+ x_valid = x_valid.select(model.dataset.col_order(inference: true))
51
+ @preprocessed ||= model.preprocess(x_valid, y_valid)
51
52
  y_pred = model.predict(@preprocessed)
52
53
  dataset = model.dataset.valid(all_columns: true)
53
54
 
@@ -421,11 +421,11 @@ module EasyML
421
421
  def prepare_data
422
422
  if @d_train.nil?
423
423
  col_order = dataset.col_order
424
- x_sample, y_sample = dataset.train(split_ys: true, limit: 5, select: col_order)
424
+ x_sample, y_sample = dataset.train(split_ys: true, limit: 5, select: col_order, lazy: true)
425
425
  preprocess(x_sample, y_sample) # Ensure we fail fast if the dataset is misconfigured
426
- x_train, y_train = dataset.train(split_ys: true, select: col_order)
427
- x_valid, y_valid = dataset.valid(split_ys: true, select: col_order)
428
- x_test, y_test = dataset.test(split_ys: true, select: col_order)
426
+ x_train, y_train = dataset.train(split_ys: true, select: col_order, lazy: true)
427
+ x_valid, y_valid = dataset.valid(split_ys: true, select: col_order, lazy: true)
428
+ x_test, y_test = dataset.test(split_ys: true, select: col_order, lazy: true)
429
429
  @d_train = preprocess(x_train, y_train)
430
430
  @d_valid = preprocess(x_valid, y_valid)
431
431
  @d_test = preprocess(x_test, y_test)
@@ -434,21 +434,60 @@ module EasyML
434
434
  [@d_train, @d_valid, @d_test]
435
435
  end
436
436
 
437
+ def trainable?
438
+ untrainable_columns.empty?
439
+ end
440
+
441
+ def untrainable_columns
442
+ df = model.dataset.processed.data(lazy: true)
443
+
444
+ columns = df.columns
445
+ selects = columns.map do |col|
446
+ Polars.col(col).null_count.alias(col)
447
+ end
448
+ null_info = df.select(selects).collect
449
+ null_info.to_hashes.first.compact
450
+ col_list = null_info.to_hashes.first.transform_values { |v| v > 0 ? v : nil }.compact.keys
451
+
452
+ model.dataset.regular_columns(col_list)
453
+ end
454
+
437
455
  def preprocess(xs, ys = nil)
438
456
  return xs if xs.is_a?(::XGBoost::DMatrix)
457
+ weights_col = model.weights_column || nil
458
+
459
+ if weights_col == model.dataset.target
460
+ raise ArgumentError, "Weight column cannot be the target column"
461
+ end
462
+
463
+ # Extract feature columns (all columns except label and weight)
464
+ feature_cols = xs.columns
465
+ feature_cols -= [weights_col] if weights_col
466
+ lazy = xs.is_a?(Polars::LazyFrame)
467
+
468
+ # Get features, labels and weights
469
+ features = lazy ? xs.select(feature_cols).collect.to_numo : xs.select(feature_cols).to_numo
470
+ weights = weights_col ? (lazy ? xs.select(weights_col).collect.to_numo : xs.select(weights_col).to_numo) : nil
471
+ weights = weights.flatten if weights
472
+ if ys.present?
473
+ ys = ys.is_a?(Array) ? Polars::Series.new(ys) : ys
474
+ labels = lazy ? ys.collect.to_numo.flatten : ys.to_numo.flatten
475
+ else
476
+ labels = nil
477
+ end
478
+
479
+ kwargs = {
480
+ label: labels,
481
+ weight: weights,
482
+ }.compact
439
483
 
440
- orig_xs = xs.dup
441
- column_names = xs.columns
442
- xs = _preprocess(xs)
443
- ys = ys.nil? ? nil : _preprocess(ys).flatten
444
- kwargs = { label: ys }.compact
445
484
  begin
446
- ::XGBoost::DMatrix.new(xs, **kwargs).tap do |dmat|
447
- dmat.feature_names = column_names
485
+ ::XGBoost::DMatrix.new(features, **kwargs).tap do |dmatrix|
486
+ dmatrix.feature_names = feature_cols
448
487
  end
449
488
  rescue StandardError => e
450
- problematic_columns = orig_xs.schema.select { |k, v| [Polars::Categorical, Polars::String].include?(v) }
451
- problematic_xs = orig_xs.select(problematic_columns.keys)
489
+ problematic_columns = xs.schema.select { |k, v| [Polars::Categorical, Polars::String].include?(v) }
490
+ problematic_xs = lazy ? xs.lazy.select(problematic_columns.keys).collect : xs.select(problematic_columns.keys)
452
491
  raise %(
453
492
  Error building data for XGBoost.
454
493
  Apply preprocessing to columns
@@ -501,29 +540,6 @@ module EasyML
501
540
  cb_container.after_iteration(@booster, current_iteration, d_train, evals)
502
541
  end
503
542
 
504
- def _preprocess(df)
505
- return df if df.is_a?(Array)
506
-
507
- df.to_a.map do |row|
508
- row.values.map do |value|
509
- case value
510
- when Time
511
- value.to_i # Convert Time to Unix timestamp
512
- when Date
513
- value.to_time.to_i # Convert Date to Unix timestamp
514
- when String
515
- value
516
- when TrueClass, FalseClass
517
- value ? 1.0 : 0.0 # Convert booleans to 1.0 and 0.0
518
- when Integer
519
- value
520
- else
521
- value.to_f # Ensure everything else is converted to a float
522
- end
523
- end
524
- end
525
- end
526
-
527
543
  def initialize_model
528
544
  @xgboost_model = model_class.new(n_estimators: @hyperparameters.to_h.dig(:n_estimators))
529
545
  if block_given?
@@ -150,7 +150,7 @@ module EasyML
150
150
 
151
151
  training_model.dataset.refresh
152
152
  evaluator = retraining_job.evaluator.symbolize_keys
153
- x_test, y_test = training_model.dataset.test(split_ys: true)
153
+ x_test, y_test = training_model.dataset.test(split_ys: true, all_columns: true)
154
154
  y_pred = training_model.predict(x_test)
155
155
 
156
156
  metric = evaluator[:metric].to_sym
@@ -27,6 +27,7 @@ module EasyML
27
27
  :model_type,
28
28
  :task,
29
29
  :objective,
30
+ :weights_column,
30
31
  :metrics,
31
32
  :dataset_id,
32
33
  :status,
@@ -8,7 +8,7 @@ module EasyML
8
8
  :metrics, :objective, :n_trials, :direction, :evaluator,
9
9
  :study, :results, :adapter, :tune_started_at, :x_valid, :y_valid,
10
10
  :project_name, :job, :current_run, :trial_enumerator, :progress_block,
11
- :tuner_job, :dataset
11
+ :tuner_job, :dataset, :x_normalized
12
12
 
13
13
  def initialize(options = {})
14
14
  @model = options[:model]
@@ -73,9 +73,12 @@ module EasyML
73
73
  model.task = task
74
74
 
75
75
  model.dataset.refresh if model.dataset.needs_refresh?
76
- x_valid, y_valid = model.dataset.valid(split_ys: true, select: model.dataset.col_order)
76
+ x_valid, y_valid = model.dataset.valid(split_ys: true, all_columns: true)
77
+ x_normalized = model.dataset.normalize(x_valid, inference: true)
78
+ x_normalized = model.preprocess(x_normalized)
77
79
  self.x_valid = x_valid
78
80
  self.y_valid = y_valid
81
+ self.x_normalized = x_normalized
79
82
  self.dataset = model.dataset.valid(all_columns: true)
80
83
  adapter.tune_started_at = tune_started_at
81
84
  adapter.x_valid = x_valid
@@ -99,7 +102,7 @@ module EasyML
99
102
  @study.tell(@current_trial, result)
100
103
  rescue StandardError => e
101
104
  puts EasyML::Event.easy_ml_context(e.backtrace)
102
- @tuner_run.update!(status: :failed, hyperparameters: {})
105
+ @tuner_run.update!(status: :failed, hyperparameters: model.hyperparameters.to_h)
103
106
  puts "Optuna failed with: #{e.message}"
104
107
  raise e
105
108
  end
@@ -138,7 +141,7 @@ module EasyML
138
141
  end
139
142
  end
140
143
 
141
- y_pred = model.predict(x_valid)
144
+ y_pred = model.predict(x_normalized)
142
145
  model.metrics = metrics
143
146
  metrics = model.evaluate(y_pred: y_pred, y_true: y_valid, x_true: x_valid, dataset: dataset)
144
147
  metric = metrics.symbolize_keys.dig(model.evaluator[:metric].to_sym)
@@ -36,6 +36,10 @@ module EasyML
36
36
  clear_unique_id
37
37
  end
38
38
 
39
+ def unlock!
40
+ clear_all_keys
41
+ end
42
+
39
43
  private
40
44
 
41
45
  def files
@@ -50,6 +54,10 @@ module EasyML
50
54
  safe_write(df, unique_path(subdir: subdir))
51
55
  end
52
56
 
57
+ def acquire_lock(key, &block)
58
+ Support::Lockable.with_lock("#{key}:lock", wait_timeout: 2, &block)
59
+ end
60
+
53
61
  def unique_path(subdir: nil)
54
62
  filename = [filenames, unique_id(subdir: subdir), "parquet"].compact.join(".")
55
63
 
@@ -63,15 +71,18 @@ module EasyML
63
71
  end
64
72
 
65
73
  def clear_all_keys
66
- keys = list_keys
67
- Support::Lockable.with_lock(keys, wait_timeout: 2) do |suo|
68
- suo.client.del(keys)
74
+ list_keys.each { |key| unlock_file(key) }
75
+ end
76
+
77
+ def unlock_file(key)
78
+ acquire_lock(key) do |suo|
79
+ suo.client.del(key)
69
80
  end
70
81
  end
71
82
 
72
83
  def clear_unique_id(subdir: nil)
73
84
  key = unique_id_key(subdir: subdir)
74
- Support::Lockable.with_lock(key, wait_timeout: 2) do |suo|
85
+ acquire_lock(key) do |suo|
75
86
  suo.client.del(key)
76
87
  end
77
88
  end
@@ -83,7 +94,7 @@ module EasyML
83
94
  def add_key(key)
84
95
  keylist = unique_id_key(subdir: "keylist")
85
96
 
86
- Support::Lockable.with_lock(keylist, wait_timeout: 2) do |suo|
97
+ acquire_lock(keylist) do |suo|
87
98
  suo.client.sadd(keylist, key)
88
99
  end
89
100
  end
@@ -91,14 +102,20 @@ module EasyML
91
102
  def list_keys
92
103
  keylist = unique_id_key(subdir: "keylist")
93
104
 
94
- Support::Lockable.with_lock(keylist, wait_timeout: 2) do |suo|
95
- suo.client.smembers(keylist)
105
+ acquire_lock(keylist) do |suo|
106
+ if suo.client.type(keylist) == "set"
107
+ suo.client.smembers(keylist)
108
+ else
109
+ suo.client.del(keylist)
110
+ []
111
+ end
96
112
  end
97
113
  end
98
114
 
99
115
  def key_exists?(key)
100
116
  keylist = unique_id_key(subdir: "keylist")
101
- Support::Lockable.with_lock(keylist, wait_timeout: 2) do |suo|
117
+
118
+ acquire_lock(keylist) do |suo|
102
119
  suo.client.sismember(keylist, key)
103
120
  end
104
121
  end
@@ -107,7 +124,7 @@ module EasyML
107
124
  key = unique_id_key(subdir: subdir)
108
125
  add_key(key)
109
126
 
110
- Support::Lockable.with_lock(key, wait_timeout: 2) do |suo|
127
+ acquire_lock(key) do |suo|
111
128
  redis = suo.client
112
129
 
113
130
  seq = (redis.get(key) || "0").to_i
@@ -15,7 +15,7 @@ module EasyML
15
15
  ]
16
16
 
17
17
  attr_accessor :filenames, :root_dir, :partition,
18
- :append_only, :primary_key, :options
18
+ :primary_key, :options, :append_only, :named
19
19
 
20
20
  def initialize(options)
21
21
  @root_dir = options.dig(:root_dir)
@@ -27,6 +27,10 @@ module EasyML
27
27
  @options = options
28
28
  end
29
29
 
30
+ def unlock!
31
+ adapter_class.new(options).unlock!
32
+ end
33
+
30
34
  def store(df, *args)
31
35
  adapter_class.new(options.merge!(df: df)).store(*args)
32
36
  end
@@ -67,6 +67,8 @@ module EasyML
67
67
  Reader.sha(root_dir)
68
68
  end
69
69
 
70
+ # Transform CSV files into Parquet files, of all the same datatype.
71
+ # Learn datatypes of columns and store schema.
70
72
  def normalize
71
73
  Normalizer.normalize(root_dir)
72
74
  end
@@ -75,14 +77,18 @@ module EasyML
75
77
  query
76
78
  end
77
79
 
78
- def store(df, *args)
79
- writer.store(df, *args)
80
+ def unlock!
81
+ writer.unlock!
80
82
  end
81
83
 
82
84
  def compact
83
85
  writer.compact
84
86
  end
85
87
 
88
+ def store(df, *args)
89
+ writer.store(df, *args)
90
+ end
91
+
86
92
  def cp(from, to)
87
93
  writer.cp(from, to)
88
94
  end
@@ -124,11 +124,25 @@ module EasyML
124
124
  # @param series [Polars::Series] The string series to analyze
125
125
  # @return [Symbol] One of :datetime, :text, or :categorical
126
126
  def determine_string_type(series)
127
- if EasyML::Data::DateConverter.maybe_convert_date(Polars::DataFrame.new({ temp: series }),
128
- :temp)[:temp].dtype.is_a?(Polars::Datetime)
129
- :datetime
130
- else
131
- categorical_or_text?(series)
127
+ # Try to parse as numeric first
128
+ begin
129
+ # Try integer first
130
+ series.cast(Polars::Int64)
131
+ return :numeric
132
+ rescue StandardError
133
+ begin
134
+ # Try float if integer fails
135
+ series.cast(Polars::Float64)
136
+ return :numeric
137
+ rescue StandardError
138
+ # If not numeric, check for datetime or categorical
139
+ if EasyML::Data::DateConverter.maybe_convert_date(Polars::DataFrame.new({ temp: series }),
140
+ :temp)[:temp].dtype.is_a?(Polars::Datetime)
141
+ :datetime
142
+ else
143
+ categorical_or_text?(series)
144
+ end
145
+ end
132
146
  end
133
147
  end
134
148
 
@@ -55,7 +55,7 @@ module EasyML
55
55
  Polars.enable_string_cache
56
56
  end
57
57
 
58
- if %w[db:migrate db:migrate:status db:setup db:drop assets:precompile].include?(ARGV.first)
58
+ if %w[db:create db:migrate db:migrate:status db:setup db:drop assets:precompile].include?(ARGV.first)
59
59
  config.eager_load_paths = config.eager_load_paths.without(config.eager_load_paths.map(&:to_s).grep(/easy_ml/).map { |p| Pathname.new(p) })
60
60
  else
61
61
  config.after_initialize do
@@ -77,6 +77,21 @@ module EasyML
77
77
  end
78
78
  end
79
79
 
80
+ unless %w[db:create db:migrate db:migrate:status db:setup db:drop assets:precompile].include?(ARGV.first)
81
+ initializer "easy_ml.configure_secrets" do
82
+ EasyML::Configuration.configure do |config|
83
+ raise "S3_ACCESS_KEY_ID is missing. Set ENV['S3_ACCESS_KEY_ID']" unless ENV["S3_ACCESS_KEY_ID"]
84
+ raise "S3_SECRET_ACCESS_KEY is missing. Set ENV['S3_SECRET_ACCESS_KEY']" unless ENV["S3_SECRET_ACCESS_KEY"]
85
+
86
+ config.s3_access_key_id = ENV["S3_ACCESS_KEY_ID"]
87
+ config.s3_secret_access_key = ENV["S3_SECRET_ACCESS_KEY"]
88
+ config.s3_region = ENV["S3_REGION"] ? ENV["S3_REGION"] : "us-east-1"
89
+ config.timezone = ENV["TIMEZONE"].present? ? ENV["TIMEZONE"] : "America/New_York"
90
+ config.wandb_api_key = ENV["WANDB_API_KEY"] if ENV["WANDB_API_KEY"]
91
+ end
92
+ end
93
+ end
94
+
80
95
  initializer "easy_ml.check_pending_migrations" do
81
96
  if defined?(Rails::Server)
82
97
  config.after_initialize do
@@ -96,19 +111,6 @@ module EasyML
96
111
  end
97
112
  end
98
113
 
99
- initializer "easy_ml.configure_secrets" do
100
- EasyML::Configuration.configure do |config|
101
- raise "S3_ACCESS_KEY_ID is missing. Set ENV['S3_ACCESS_KEY_ID']" unless ENV["S3_ACCESS_KEY_ID"]
102
- raise "S3_SECRET_ACCESS_KEY is missing. Set ENV['S3_SECRET_ACCESS_KEY']" unless ENV["S3_SECRET_ACCESS_KEY"]
103
-
104
- config.s3_access_key_id = ENV["S3_ACCESS_KEY_ID"]
105
- config.s3_secret_access_key = ENV["S3_SECRET_ACCESS_KEY"]
106
- config.s3_region = ENV["S3_REGION"] if ENV["S3_REGION"]
107
- config.timezone = ENV["TIMEZONE"].present? ? ENV["TIMEZONE"] : "America/New_York"
108
- config.wandb_api_key = ENV["WANDB_API_KEY"] if ENV["WANDB_API_KEY"]
109
- end
110
- end
111
-
112
114
  initializer "easy_ml.setup_generators" do |app|
113
115
  generators_path = EasyML::Engine.root.join("lib/easy_ml/railtie/generators")
114
116
  generators_dirs = Dir[File.join(generators_path, "**", "*.rb")]
@@ -5,19 +5,22 @@ module EasyML
5
5
  def initialize(feature)
6
6
  @feature = feature
7
7
 
8
- datasource_config = feature.dataset.datasource.configuration || {}
9
-
10
- options = {
11
- root_dir: feature_dir,
12
- filenames: "feature",
13
- append_only: false,
14
- primary_key: feature.primary_key&.first,
15
- partition_size: batch_size,
16
- s3_bucket: datasource_config.dig("s3_bucket") || EasyML::Configuration.s3_bucket,
17
- s3_prefix: s3_prefix,
18
- polars_args: datasource_config.dig("polars_args"),
19
- }.compact
20
- super(options)
8
+ datasource_config = feature&.dataset&.datasource&.configuration
9
+ if datasource_config
10
+ options = {
11
+ root_dir: feature_dir,
12
+ filenames: "feature",
13
+ append_only: false,
14
+ primary_key: feature.primary_key&.first,
15
+ partition_size: batch_size,
16
+ s3_bucket: datasource_config.dig("s3_bucket") || EasyML::Configuration.s3_bucket,
17
+ s3_prefix: s3_prefix,
18
+ polars_args: datasource_config.dig("polars_args"),
19
+ }.compact
20
+ super(options)
21
+ else
22
+ super({ root_dir: "" })
23
+ end
21
24
  end
22
25
 
23
26
  def cp(old_version, new_version)
@@ -30,7 +33,7 @@ module EasyML
30
33
  files_to_cp = Dir.glob(Pathname.new(old_dir).join("**/*")).select { |f| File.file?(f) }
31
34
 
32
35
  files_to_cp.each do |file|
33
- target_file = file.gsub(old_version.to_s, new_version.to_s)
36
+ target_file = file.gsub(old_dir, new_dir)
34
37
  FileUtils.mkdir_p(File.dirname(target_file))
35
38
  FileUtils.cp(file, target_file)
36
39
  end
@@ -46,9 +49,9 @@ module EasyML
46
49
  File.join(
47
50
  Rails.root,
48
51
  "easy_ml/datasets",
49
- feature.dataset.name.parameterize.gsub("-", "_"),
52
+ feature&.dataset&.name&.parameterize&.gsub("-", "_"),
50
53
  "features",
51
- feature.name.parameterize.gsub("-", "_"),
54
+ feature&.name&.parameterize&.gsub("-", "_"),
52
55
  version.to_s
53
56
  )
54
57
  end
@@ -22,7 +22,7 @@ module EasyML
22
22
  end
23
23
 
24
24
  def self.lock_client(key, wait_timeout: 0.1, stale_timeout: 60 * 10, resources: 1)
25
- Suo::Client::Redis.new(prefixed_key(key), {
25
+ Suo::Client::Redis.new(key, {
26
26
  acquisition_timeout: wait_timeout,
27
27
  stale_lock_expiry: stale_timeout,
28
28
  resources: resources,
@@ -30,10 +30,6 @@ module EasyML
30
30
  })
31
31
  end
32
32
 
33
- def self.prefixed_key(key)
34
- "easy_ml:#{key}"
35
- end
36
-
37
33
  # Execute a block with a Redis lock
38
34
  def self.with_lock(key, wait_timeout: 0.1, stale_timeout: 60 * 10, resources: 1)
39
35
  lock_key = nil
@@ -1,7 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module EasyML
4
- VERSION = "0.2.0-rc77"
4
+ VERSION = "0.2.0-rc78"
5
5
 
6
6
  module Version
7
7
  end
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "entrypoints/Application.tsx": {
3
- "file": "assets/entrypoints/Application.tsx-B1qLZuyu.js",
3
+ "file": "assets/entrypoints/Application.tsx-Bbf3mD_b.js",
4
4
  "name": "entrypoints/Application.tsx",
5
5
  "src": "entrypoints/Application.tsx",
6
6
  "isEntry": true,