easy_ml 0.2.0.pre.rc77 → 0.2.0.pre.rc78
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/app/controllers/easy_ml/models_controller.rb +3 -2
- data/app/frontend/components/ModelForm.tsx +16 -0
- data/app/frontend/components/ScheduleModal.tsx +0 -2
- data/app/frontend/components/dataset/PreprocessingConfig.tsx +7 -6
- data/app/jobs/easy_ml/application_job.rb +1 -0
- data/app/jobs/easy_ml/batch_job.rb +47 -6
- data/app/jobs/easy_ml/compute_feature_job.rb +10 -10
- data/app/jobs/easy_ml/reaper.rb +14 -10
- data/app/jobs/easy_ml/refresh_dataset_job.rb +2 -0
- data/app/jobs/easy_ml/sync_datasource_job.rb +1 -0
- data/app/models/concerns/easy_ml/dataframe_serialization.rb +1 -17
- data/app/models/easy_ml/column/imputers/base.rb +1 -1
- data/app/models/easy_ml/column/imputers/today.rb +1 -1
- data/app/models/easy_ml/column/selector.rb +0 -8
- data/app/models/easy_ml/column.rb +1 -1
- data/app/models/easy_ml/dataset/learner/base.rb +2 -2
- data/app/models/easy_ml/dataset/learner/eager.rb +3 -1
- data/app/models/easy_ml/dataset/learner/lazy.rb +4 -1
- data/app/models/easy_ml/dataset.rb +25 -27
- data/app/models/easy_ml/datasource.rb +0 -6
- data/app/models/easy_ml/feature.rb +12 -3
- data/app/models/easy_ml/model.rb +20 -2
- data/app/models/easy_ml/models/xgboost/evals_callback.rb +3 -2
- data/app/models/easy_ml/models/xgboost.rb +52 -36
- data/app/models/easy_ml/retraining_run.rb +1 -1
- data/app/serializers/easy_ml/model_serializer.rb +1 -0
- data/lib/easy_ml/core/tuner.rb +7 -4
- data/lib/easy_ml/data/dataset_manager/writer/base.rb +26 -9
- data/lib/easy_ml/data/dataset_manager/writer.rb +5 -1
- data/lib/easy_ml/data/dataset_manager.rb +8 -2
- data/lib/easy_ml/data/polars_column.rb +19 -5
- data/lib/easy_ml/engine.rb +16 -14
- data/lib/easy_ml/feature_store.rb +19 -16
- data/lib/easy_ml/support/lockable.rb +1 -5
- data/lib/easy_ml/version.rb +1 -1
- data/public/easy_ml/assets/.vite/manifest.json +1 -1
- data/public/easy_ml/assets/assets/entrypoints/Application.tsx-Bbf3mD_b.js +522 -0
- data/public/easy_ml/assets/assets/entrypoints/{Application.tsx-B1qLZuyu.js.map → Application.tsx-Bbf3mD_b.js.map} +1 -1
- metadata +6 -7
- data/app/models/easy_ml/datasources/polars_datasource.rb +0 -69
- data/public/easy_ml/assets/assets/entrypoints/Application.tsx-B1qLZuyu.js +0 -522
data/app/models/easy_ml/model.rb
CHANGED
@@ -45,7 +45,7 @@ module EasyML
|
|
45
45
|
MODEL_NAMES = MODEL_OPTIONS.keys.freeze
|
46
46
|
MODEL_CONSTANTS = MODEL_OPTIONS.values.map(&:constantize)
|
47
47
|
|
48
|
-
add_configuration_attributes :task, :objective, :hyperparameters, :callbacks, :metrics
|
48
|
+
add_configuration_attributes :task, :objective, :hyperparameters, :callbacks, :metrics, :weights_column
|
49
49
|
MODEL_CONSTANTS.flat_map(&:configuration_attributes).each do |attribute|
|
50
50
|
add_configuration_attributes attribute
|
51
51
|
end
|
@@ -179,6 +179,8 @@ module EasyML
|
|
179
179
|
end
|
180
180
|
|
181
181
|
def actually_train(&progress_block)
|
182
|
+
raise untrainable_error unless trainable?
|
183
|
+
|
182
184
|
lock_model do
|
183
185
|
run = pending_run
|
184
186
|
run.wrap_training do
|
@@ -258,7 +260,7 @@ module EasyML
|
|
258
260
|
|
259
261
|
def formatted_version
|
260
262
|
return nil unless version
|
261
|
-
|
263
|
+
UTC.parse(version).in_time_zone(EasyML::Configuration.timezone).strftime("%B %-d, %Y at %-l:%M %p")
|
262
264
|
end
|
263
265
|
|
264
266
|
def last_run_at
|
@@ -277,6 +279,22 @@ module EasyML
|
|
277
279
|
alias_method :latest_version, :inference_version
|
278
280
|
alias_method :deployed, :inference_version
|
279
281
|
|
282
|
+
def trainable?
|
283
|
+
adapter.trainable?
|
284
|
+
end
|
285
|
+
|
286
|
+
def untrainable_columns
|
287
|
+
adapter.untrainable_columns
|
288
|
+
end
|
289
|
+
|
290
|
+
def untrainable_error
|
291
|
+
%Q(
|
292
|
+
Cannot train dataset containing null values!
|
293
|
+
Apply preprocessing to the following columns:
|
294
|
+
#{untrainable_columns.join(", ")}
|
295
|
+
)
|
296
|
+
end
|
297
|
+
|
280
298
|
def predict(xs)
|
281
299
|
load_model!
|
282
300
|
unless xs.is_a?(XGBoost::DMatrix)
|
@@ -36,7 +36,7 @@ module EasyML
|
|
36
36
|
if tuner.present?
|
37
37
|
[tuner.x_valid, tuner.y_valid]
|
38
38
|
else
|
39
|
-
model.dataset.valid(split_ys: true)
|
39
|
+
model.dataset.valid(split_ys: true, lazy: true)
|
40
40
|
end
|
41
41
|
end
|
42
42
|
|
@@ -47,7 +47,8 @@ module EasyML
|
|
47
47
|
if epoch % log_frequency == 0
|
48
48
|
model.adapter.external_model = booster
|
49
49
|
x_valid, y_valid = valid_dataset
|
50
|
-
|
50
|
+
x_valid = x_valid.select(model.dataset.col_order(inference: true))
|
51
|
+
@preprocessed ||= model.preprocess(x_valid, y_valid)
|
51
52
|
y_pred = model.predict(@preprocessed)
|
52
53
|
dataset = model.dataset.valid(all_columns: true)
|
53
54
|
|
@@ -421,11 +421,11 @@ module EasyML
|
|
421
421
|
def prepare_data
|
422
422
|
if @d_train.nil?
|
423
423
|
col_order = dataset.col_order
|
424
|
-
x_sample, y_sample = dataset.train(split_ys: true, limit: 5, select: col_order)
|
424
|
+
x_sample, y_sample = dataset.train(split_ys: true, limit: 5, select: col_order, lazy: true)
|
425
425
|
preprocess(x_sample, y_sample) # Ensure we fail fast if the dataset is misconfigured
|
426
|
-
x_train, y_train = dataset.train(split_ys: true, select: col_order)
|
427
|
-
x_valid, y_valid = dataset.valid(split_ys: true, select: col_order)
|
428
|
-
x_test, y_test = dataset.test(split_ys: true, select: col_order)
|
426
|
+
x_train, y_train = dataset.train(split_ys: true, select: col_order, lazy: true)
|
427
|
+
x_valid, y_valid = dataset.valid(split_ys: true, select: col_order, lazy: true)
|
428
|
+
x_test, y_test = dataset.test(split_ys: true, select: col_order, lazy: true)
|
429
429
|
@d_train = preprocess(x_train, y_train)
|
430
430
|
@d_valid = preprocess(x_valid, y_valid)
|
431
431
|
@d_test = preprocess(x_test, y_test)
|
@@ -434,21 +434,60 @@ module EasyML
|
|
434
434
|
[@d_train, @d_valid, @d_test]
|
435
435
|
end
|
436
436
|
|
437
|
+
def trainable?
|
438
|
+
untrainable_columns.empty?
|
439
|
+
end
|
440
|
+
|
441
|
+
def untrainable_columns
|
442
|
+
df = model.dataset.processed.data(lazy: true)
|
443
|
+
|
444
|
+
columns = df.columns
|
445
|
+
selects = columns.map do |col|
|
446
|
+
Polars.col(col).null_count.alias(col)
|
447
|
+
end
|
448
|
+
null_info = df.select(selects).collect
|
449
|
+
null_info.to_hashes.first.compact
|
450
|
+
col_list = null_info.to_hashes.first.transform_values { |v| v > 0 ? v : nil }.compact.keys
|
451
|
+
|
452
|
+
model.dataset.regular_columns(col_list)
|
453
|
+
end
|
454
|
+
|
437
455
|
def preprocess(xs, ys = nil)
|
438
456
|
return xs if xs.is_a?(::XGBoost::DMatrix)
|
457
|
+
weights_col = model.weights_column || nil
|
458
|
+
|
459
|
+
if weights_col == model.dataset.target
|
460
|
+
raise ArgumentError, "Weight column cannot be the target column"
|
461
|
+
end
|
462
|
+
|
463
|
+
# Extract feature columns (all columns except label and weight)
|
464
|
+
feature_cols = xs.columns
|
465
|
+
feature_cols -= [weights_col] if weights_col
|
466
|
+
lazy = xs.is_a?(Polars::LazyFrame)
|
467
|
+
|
468
|
+
# Get features, labels and weights
|
469
|
+
features = lazy ? xs.select(feature_cols).collect.to_numo : xs.select(feature_cols).to_numo
|
470
|
+
weights = weights_col ? (lazy ? xs.select(weights_col).collect.to_numo : xs.select(weights_col).to_numo) : nil
|
471
|
+
weights = weights.flatten if weights
|
472
|
+
if ys.present?
|
473
|
+
ys = ys.is_a?(Array) ? Polars::Series.new(ys) : ys
|
474
|
+
labels = lazy ? ys.collect.to_numo.flatten : ys.to_numo.flatten
|
475
|
+
else
|
476
|
+
labels = nil
|
477
|
+
end
|
478
|
+
|
479
|
+
kwargs = {
|
480
|
+
label: labels,
|
481
|
+
weight: weights,
|
482
|
+
}.compact
|
439
483
|
|
440
|
-
orig_xs = xs.dup
|
441
|
-
column_names = xs.columns
|
442
|
-
xs = _preprocess(xs)
|
443
|
-
ys = ys.nil? ? nil : _preprocess(ys).flatten
|
444
|
-
kwargs = { label: ys }.compact
|
445
484
|
begin
|
446
|
-
::XGBoost::DMatrix.new(
|
447
|
-
|
485
|
+
::XGBoost::DMatrix.new(features, **kwargs).tap do |dmatrix|
|
486
|
+
dmatrix.feature_names = feature_cols
|
448
487
|
end
|
449
488
|
rescue StandardError => e
|
450
|
-
problematic_columns =
|
451
|
-
problematic_xs =
|
489
|
+
problematic_columns = xs.schema.select { |k, v| [Polars::Categorical, Polars::String].include?(v) }
|
490
|
+
problematic_xs = lazy ? xs.lazy.select(problematic_columns.keys).collect : xs.select(problematic_columns.keys)
|
452
491
|
raise %(
|
453
492
|
Error building data for XGBoost.
|
454
493
|
Apply preprocessing to columns
|
@@ -501,29 +540,6 @@ module EasyML
|
|
501
540
|
cb_container.after_iteration(@booster, current_iteration, d_train, evals)
|
502
541
|
end
|
503
542
|
|
504
|
-
def _preprocess(df)
|
505
|
-
return df if df.is_a?(Array)
|
506
|
-
|
507
|
-
df.to_a.map do |row|
|
508
|
-
row.values.map do |value|
|
509
|
-
case value
|
510
|
-
when Time
|
511
|
-
value.to_i # Convert Time to Unix timestamp
|
512
|
-
when Date
|
513
|
-
value.to_time.to_i # Convert Date to Unix timestamp
|
514
|
-
when String
|
515
|
-
value
|
516
|
-
when TrueClass, FalseClass
|
517
|
-
value ? 1.0 : 0.0 # Convert booleans to 1.0 and 0.0
|
518
|
-
when Integer
|
519
|
-
value
|
520
|
-
else
|
521
|
-
value.to_f # Ensure everything else is converted to a float
|
522
|
-
end
|
523
|
-
end
|
524
|
-
end
|
525
|
-
end
|
526
|
-
|
527
543
|
def initialize_model
|
528
544
|
@xgboost_model = model_class.new(n_estimators: @hyperparameters.to_h.dig(:n_estimators))
|
529
545
|
if block_given?
|
@@ -150,7 +150,7 @@ module EasyML
|
|
150
150
|
|
151
151
|
training_model.dataset.refresh
|
152
152
|
evaluator = retraining_job.evaluator.symbolize_keys
|
153
|
-
x_test, y_test = training_model.dataset.test(split_ys: true)
|
153
|
+
x_test, y_test = training_model.dataset.test(split_ys: true, all_columns: true)
|
154
154
|
y_pred = training_model.predict(x_test)
|
155
155
|
|
156
156
|
metric = evaluator[:metric].to_sym
|
data/lib/easy_ml/core/tuner.rb
CHANGED
@@ -8,7 +8,7 @@ module EasyML
|
|
8
8
|
:metrics, :objective, :n_trials, :direction, :evaluator,
|
9
9
|
:study, :results, :adapter, :tune_started_at, :x_valid, :y_valid,
|
10
10
|
:project_name, :job, :current_run, :trial_enumerator, :progress_block,
|
11
|
-
:tuner_job, :dataset
|
11
|
+
:tuner_job, :dataset, :x_normalized
|
12
12
|
|
13
13
|
def initialize(options = {})
|
14
14
|
@model = options[:model]
|
@@ -73,9 +73,12 @@ module EasyML
|
|
73
73
|
model.task = task
|
74
74
|
|
75
75
|
model.dataset.refresh if model.dataset.needs_refresh?
|
76
|
-
x_valid, y_valid = model.dataset.valid(split_ys: true,
|
76
|
+
x_valid, y_valid = model.dataset.valid(split_ys: true, all_columns: true)
|
77
|
+
x_normalized = model.dataset.normalize(x_valid, inference: true)
|
78
|
+
x_normalized = model.preprocess(x_normalized)
|
77
79
|
self.x_valid = x_valid
|
78
80
|
self.y_valid = y_valid
|
81
|
+
self.x_normalized = x_normalized
|
79
82
|
self.dataset = model.dataset.valid(all_columns: true)
|
80
83
|
adapter.tune_started_at = tune_started_at
|
81
84
|
adapter.x_valid = x_valid
|
@@ -99,7 +102,7 @@ module EasyML
|
|
99
102
|
@study.tell(@current_trial, result)
|
100
103
|
rescue StandardError => e
|
101
104
|
puts EasyML::Event.easy_ml_context(e.backtrace)
|
102
|
-
@tuner_run.update!(status: :failed, hyperparameters:
|
105
|
+
@tuner_run.update!(status: :failed, hyperparameters: model.hyperparameters.to_h)
|
103
106
|
puts "Optuna failed with: #{e.message}"
|
104
107
|
raise e
|
105
108
|
end
|
@@ -138,7 +141,7 @@ module EasyML
|
|
138
141
|
end
|
139
142
|
end
|
140
143
|
|
141
|
-
y_pred = model.predict(
|
144
|
+
y_pred = model.predict(x_normalized)
|
142
145
|
model.metrics = metrics
|
143
146
|
metrics = model.evaluate(y_pred: y_pred, y_true: y_valid, x_true: x_valid, dataset: dataset)
|
144
147
|
metric = metrics.symbolize_keys.dig(model.evaluator[:metric].to_sym)
|
@@ -36,6 +36,10 @@ module EasyML
|
|
36
36
|
clear_unique_id
|
37
37
|
end
|
38
38
|
|
39
|
+
def unlock!
|
40
|
+
clear_all_keys
|
41
|
+
end
|
42
|
+
|
39
43
|
private
|
40
44
|
|
41
45
|
def files
|
@@ -50,6 +54,10 @@ module EasyML
|
|
50
54
|
safe_write(df, unique_path(subdir: subdir))
|
51
55
|
end
|
52
56
|
|
57
|
+
def acquire_lock(key, &block)
|
58
|
+
Support::Lockable.with_lock("#{key}:lock", wait_timeout: 2, &block)
|
59
|
+
end
|
60
|
+
|
53
61
|
def unique_path(subdir: nil)
|
54
62
|
filename = [filenames, unique_id(subdir: subdir), "parquet"].compact.join(".")
|
55
63
|
|
@@ -63,15 +71,18 @@ module EasyML
|
|
63
71
|
end
|
64
72
|
|
65
73
|
def clear_all_keys
|
66
|
-
|
67
|
-
|
68
|
-
|
74
|
+
list_keys.each { |key| unlock_file(key) }
|
75
|
+
end
|
76
|
+
|
77
|
+
def unlock_file(key)
|
78
|
+
acquire_lock(key) do |suo|
|
79
|
+
suo.client.del(key)
|
69
80
|
end
|
70
81
|
end
|
71
82
|
|
72
83
|
def clear_unique_id(subdir: nil)
|
73
84
|
key = unique_id_key(subdir: subdir)
|
74
|
-
|
85
|
+
acquire_lock(key) do |suo|
|
75
86
|
suo.client.del(key)
|
76
87
|
end
|
77
88
|
end
|
@@ -83,7 +94,7 @@ module EasyML
|
|
83
94
|
def add_key(key)
|
84
95
|
keylist = unique_id_key(subdir: "keylist")
|
85
96
|
|
86
|
-
|
97
|
+
acquire_lock(keylist) do |suo|
|
87
98
|
suo.client.sadd(keylist, key)
|
88
99
|
end
|
89
100
|
end
|
@@ -91,14 +102,20 @@ module EasyML
|
|
91
102
|
def list_keys
|
92
103
|
keylist = unique_id_key(subdir: "keylist")
|
93
104
|
|
94
|
-
|
95
|
-
suo.client.
|
105
|
+
acquire_lock(keylist) do |suo|
|
106
|
+
if suo.client.type(keylist) == "set"
|
107
|
+
suo.client.smembers(keylist)
|
108
|
+
else
|
109
|
+
suo.client.del(keylist)
|
110
|
+
[]
|
111
|
+
end
|
96
112
|
end
|
97
113
|
end
|
98
114
|
|
99
115
|
def key_exists?(key)
|
100
116
|
keylist = unique_id_key(subdir: "keylist")
|
101
|
-
|
117
|
+
|
118
|
+
acquire_lock(keylist) do |suo|
|
102
119
|
suo.client.sismember(keylist, key)
|
103
120
|
end
|
104
121
|
end
|
@@ -107,7 +124,7 @@ module EasyML
|
|
107
124
|
key = unique_id_key(subdir: subdir)
|
108
125
|
add_key(key)
|
109
126
|
|
110
|
-
|
127
|
+
acquire_lock(key) do |suo|
|
111
128
|
redis = suo.client
|
112
129
|
|
113
130
|
seq = (redis.get(key) || "0").to_i
|
@@ -15,7 +15,7 @@ module EasyML
|
|
15
15
|
]
|
16
16
|
|
17
17
|
attr_accessor :filenames, :root_dir, :partition,
|
18
|
-
:
|
18
|
+
:primary_key, :options, :append_only, :named
|
19
19
|
|
20
20
|
def initialize(options)
|
21
21
|
@root_dir = options.dig(:root_dir)
|
@@ -27,6 +27,10 @@ module EasyML
|
|
27
27
|
@options = options
|
28
28
|
end
|
29
29
|
|
30
|
+
def unlock!
|
31
|
+
adapter_class.new(options).unlock!
|
32
|
+
end
|
33
|
+
|
30
34
|
def store(df, *args)
|
31
35
|
adapter_class.new(options.merge!(df: df)).store(*args)
|
32
36
|
end
|
@@ -67,6 +67,8 @@ module EasyML
|
|
67
67
|
Reader.sha(root_dir)
|
68
68
|
end
|
69
69
|
|
70
|
+
# Transform CSV files into Parquet files, of all the same datatype.
|
71
|
+
# Learn datatypes of columns and store schema.
|
70
72
|
def normalize
|
71
73
|
Normalizer.normalize(root_dir)
|
72
74
|
end
|
@@ -75,14 +77,18 @@ module EasyML
|
|
75
77
|
query
|
76
78
|
end
|
77
79
|
|
78
|
-
def
|
79
|
-
writer.
|
80
|
+
def unlock!
|
81
|
+
writer.unlock!
|
80
82
|
end
|
81
83
|
|
82
84
|
def compact
|
83
85
|
writer.compact
|
84
86
|
end
|
85
87
|
|
88
|
+
def store(df, *args)
|
89
|
+
writer.store(df, *args)
|
90
|
+
end
|
91
|
+
|
86
92
|
def cp(from, to)
|
87
93
|
writer.cp(from, to)
|
88
94
|
end
|
@@ -124,11 +124,25 @@ module EasyML
|
|
124
124
|
# @param series [Polars::Series] The string series to analyze
|
125
125
|
# @return [Symbol] One of :datetime, :text, or :categorical
|
126
126
|
def determine_string_type(series)
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
127
|
+
# Try to parse as numeric first
|
128
|
+
begin
|
129
|
+
# Try integer first
|
130
|
+
series.cast(Polars::Int64)
|
131
|
+
return :numeric
|
132
|
+
rescue StandardError
|
133
|
+
begin
|
134
|
+
# Try float if integer fails
|
135
|
+
series.cast(Polars::Float64)
|
136
|
+
return :numeric
|
137
|
+
rescue StandardError
|
138
|
+
# If not numeric, check for datetime or categorical
|
139
|
+
if EasyML::Data::DateConverter.maybe_convert_date(Polars::DataFrame.new({ temp: series }),
|
140
|
+
:temp)[:temp].dtype.is_a?(Polars::Datetime)
|
141
|
+
:datetime
|
142
|
+
else
|
143
|
+
categorical_or_text?(series)
|
144
|
+
end
|
145
|
+
end
|
132
146
|
end
|
133
147
|
end
|
134
148
|
|
data/lib/easy_ml/engine.rb
CHANGED
@@ -55,7 +55,7 @@ module EasyML
|
|
55
55
|
Polars.enable_string_cache
|
56
56
|
end
|
57
57
|
|
58
|
-
if %w[db:migrate db:migrate:status db:setup db:drop assets:precompile].include?(ARGV.first)
|
58
|
+
if %w[db:create db:migrate db:migrate:status db:setup db:drop assets:precompile].include?(ARGV.first)
|
59
59
|
config.eager_load_paths = config.eager_load_paths.without(config.eager_load_paths.map(&:to_s).grep(/easy_ml/).map { |p| Pathname.new(p) })
|
60
60
|
else
|
61
61
|
config.after_initialize do
|
@@ -77,6 +77,21 @@ module EasyML
|
|
77
77
|
end
|
78
78
|
end
|
79
79
|
|
80
|
+
unless %w[db:create db:migrate db:migrate:status db:setup db:drop assets:precompile].include?(ARGV.first)
|
81
|
+
initializer "easy_ml.configure_secrets" do
|
82
|
+
EasyML::Configuration.configure do |config|
|
83
|
+
raise "S3_ACCESS_KEY_ID is missing. Set ENV['S3_ACCESS_KEY_ID']" unless ENV["S3_ACCESS_KEY_ID"]
|
84
|
+
raise "S3_SECRET_ACCESS_KEY is missing. Set ENV['S3_SECRET_ACCESS_KEY']" unless ENV["S3_SECRET_ACCESS_KEY"]
|
85
|
+
|
86
|
+
config.s3_access_key_id = ENV["S3_ACCESS_KEY_ID"]
|
87
|
+
config.s3_secret_access_key = ENV["S3_SECRET_ACCESS_KEY"]
|
88
|
+
config.s3_region = ENV["S3_REGION"] ? ENV["S3_REGION"] : "us-east-1"
|
89
|
+
config.timezone = ENV["TIMEZONE"].present? ? ENV["TIMEZONE"] : "America/New_York"
|
90
|
+
config.wandb_api_key = ENV["WANDB_API_KEY"] if ENV["WANDB_API_KEY"]
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
80
95
|
initializer "easy_ml.check_pending_migrations" do
|
81
96
|
if defined?(Rails::Server)
|
82
97
|
config.after_initialize do
|
@@ -96,19 +111,6 @@ module EasyML
|
|
96
111
|
end
|
97
112
|
end
|
98
113
|
|
99
|
-
initializer "easy_ml.configure_secrets" do
|
100
|
-
EasyML::Configuration.configure do |config|
|
101
|
-
raise "S3_ACCESS_KEY_ID is missing. Set ENV['S3_ACCESS_KEY_ID']" unless ENV["S3_ACCESS_KEY_ID"]
|
102
|
-
raise "S3_SECRET_ACCESS_KEY is missing. Set ENV['S3_SECRET_ACCESS_KEY']" unless ENV["S3_SECRET_ACCESS_KEY"]
|
103
|
-
|
104
|
-
config.s3_access_key_id = ENV["S3_ACCESS_KEY_ID"]
|
105
|
-
config.s3_secret_access_key = ENV["S3_SECRET_ACCESS_KEY"]
|
106
|
-
config.s3_region = ENV["S3_REGION"] if ENV["S3_REGION"]
|
107
|
-
config.timezone = ENV["TIMEZONE"].present? ? ENV["TIMEZONE"] : "America/New_York"
|
108
|
-
config.wandb_api_key = ENV["WANDB_API_KEY"] if ENV["WANDB_API_KEY"]
|
109
|
-
end
|
110
|
-
end
|
111
|
-
|
112
114
|
initializer "easy_ml.setup_generators" do |app|
|
113
115
|
generators_path = EasyML::Engine.root.join("lib/easy_ml/railtie/generators")
|
114
116
|
generators_dirs = Dir[File.join(generators_path, "**", "*.rb")]
|
@@ -5,19 +5,22 @@ module EasyML
|
|
5
5
|
def initialize(feature)
|
6
6
|
@feature = feature
|
7
7
|
|
8
|
-
datasource_config = feature
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
8
|
+
datasource_config = feature&.dataset&.datasource&.configuration
|
9
|
+
if datasource_config
|
10
|
+
options = {
|
11
|
+
root_dir: feature_dir,
|
12
|
+
filenames: "feature",
|
13
|
+
append_only: false,
|
14
|
+
primary_key: feature.primary_key&.first,
|
15
|
+
partition_size: batch_size,
|
16
|
+
s3_bucket: datasource_config.dig("s3_bucket") || EasyML::Configuration.s3_bucket,
|
17
|
+
s3_prefix: s3_prefix,
|
18
|
+
polars_args: datasource_config.dig("polars_args"),
|
19
|
+
}.compact
|
20
|
+
super(options)
|
21
|
+
else
|
22
|
+
super({ root_dir: "" })
|
23
|
+
end
|
21
24
|
end
|
22
25
|
|
23
26
|
def cp(old_version, new_version)
|
@@ -30,7 +33,7 @@ module EasyML
|
|
30
33
|
files_to_cp = Dir.glob(Pathname.new(old_dir).join("**/*")).select { |f| File.file?(f) }
|
31
34
|
|
32
35
|
files_to_cp.each do |file|
|
33
|
-
target_file = file.gsub(
|
36
|
+
target_file = file.gsub(old_dir, new_dir)
|
34
37
|
FileUtils.mkdir_p(File.dirname(target_file))
|
35
38
|
FileUtils.cp(file, target_file)
|
36
39
|
end
|
@@ -46,9 +49,9 @@ module EasyML
|
|
46
49
|
File.join(
|
47
50
|
Rails.root,
|
48
51
|
"easy_ml/datasets",
|
49
|
-
feature
|
52
|
+
feature&.dataset&.name&.parameterize&.gsub("-", "_"),
|
50
53
|
"features",
|
51
|
-
feature
|
54
|
+
feature&.name&.parameterize&.gsub("-", "_"),
|
52
55
|
version.to_s
|
53
56
|
)
|
54
57
|
end
|
@@ -22,7 +22,7 @@ module EasyML
|
|
22
22
|
end
|
23
23
|
|
24
24
|
def self.lock_client(key, wait_timeout: 0.1, stale_timeout: 60 * 10, resources: 1)
|
25
|
-
Suo::Client::Redis.new(
|
25
|
+
Suo::Client::Redis.new(key, {
|
26
26
|
acquisition_timeout: wait_timeout,
|
27
27
|
stale_lock_expiry: stale_timeout,
|
28
28
|
resources: resources,
|
@@ -30,10 +30,6 @@ module EasyML
|
|
30
30
|
})
|
31
31
|
end
|
32
32
|
|
33
|
-
def self.prefixed_key(key)
|
34
|
-
"easy_ml:#{key}"
|
35
|
-
end
|
36
|
-
|
37
33
|
# Execute a block with a Redis lock
|
38
34
|
def self.with_lock(key, wait_timeout: 0.1, stale_timeout: 60 * 10, resources: 1)
|
39
35
|
lock_key = nil
|
data/lib/easy_ml/version.rb
CHANGED