easy_ml 0.2.0.pre.rc77 → 0.2.0.pre.rc81
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/app/controllers/easy_ml/datasets_controller.rb +3 -3
- data/app/controllers/easy_ml/models_controller.rb +4 -3
- data/app/frontend/components/ModelForm.tsx +16 -0
- data/app/frontend/components/ScheduleModal.tsx +0 -2
- data/app/frontend/components/dataset/PreprocessingConfig.tsx +7 -6
- data/app/jobs/easy_ml/application_job.rb +1 -0
- data/app/jobs/easy_ml/batch_job.rb +47 -6
- data/app/jobs/easy_ml/compute_feature_job.rb +10 -10
- data/app/jobs/easy_ml/reaper.rb +14 -10
- data/app/jobs/easy_ml/refresh_dataset_job.rb +2 -0
- data/app/jobs/easy_ml/sync_datasource_job.rb +1 -0
- data/app/models/concerns/easy_ml/dataframe_serialization.rb +1 -17
- data/app/models/easy_ml/column/imputers/base.rb +1 -1
- data/app/models/easy_ml/column/imputers/imputer.rb +2 -0
- data/app/models/easy_ml/column/imputers/today.rb +1 -1
- data/app/models/easy_ml/column/selector.rb +0 -8
- data/app/models/easy_ml/column.rb +1 -1
- data/app/models/easy_ml/column_list.rb +2 -3
- data/app/models/easy_ml/dataset/learner/base.rb +2 -2
- data/app/models/easy_ml/dataset/learner/eager.rb +3 -1
- data/app/models/easy_ml/dataset/learner/lazy.rb +4 -1
- data/app/models/easy_ml/dataset.rb +47 -38
- data/app/models/easy_ml/datasource.rb +0 -6
- data/app/models/easy_ml/feature.rb +33 -8
- data/app/models/easy_ml/model.rb +27 -4
- data/app/models/easy_ml/models/hyperparameters/xgboost/gbtree.rb +21 -5
- data/app/models/easy_ml/models/xgboost/evals_callback.rb +9 -5
- data/app/models/easy_ml/models/xgboost.rb +58 -36
- data/app/models/easy_ml/retraining_run.rb +1 -1
- data/app/serializers/easy_ml/model_serializer.rb +1 -0
- data/lib/easy_ml/core/tuner/adapters/base_adapter.rb +16 -3
- data/lib/easy_ml/core/tuner/adapters/xgboost_adapter.rb +0 -17
- data/lib/easy_ml/core/tuner.rb +14 -5
- data/lib/easy_ml/data/dataset_manager/reader/base.rb +12 -0
- data/lib/easy_ml/data/dataset_manager/reader/data_frame.rb +8 -3
- data/lib/easy_ml/data/dataset_manager/reader/file.rb +5 -0
- data/lib/easy_ml/data/dataset_manager/reader.rb +7 -1
- data/lib/easy_ml/data/dataset_manager/writer/base.rb +26 -9
- data/lib/easy_ml/data/dataset_manager/writer.rb +5 -1
- data/lib/easy_ml/data/dataset_manager.rb +18 -4
- data/lib/easy_ml/data/embeddings/adapters.rb +56 -0
- data/lib/easy_ml/data/embeddings/compression.rb +0 -0
- data/lib/easy_ml/data/embeddings.rb +43 -0
- data/lib/easy_ml/data/polars_column.rb +19 -5
- data/lib/easy_ml/engine.rb +16 -14
- data/lib/easy_ml/feature_store.rb +19 -16
- data/lib/easy_ml/support/lockable.rb +1 -5
- data/lib/easy_ml/version.rb +1 -1
- data/public/easy_ml/assets/.vite/manifest.json +1 -1
- data/public/easy_ml/assets/assets/entrypoints/Application.tsx-Bbf3mD_b.js +522 -0
- data/public/easy_ml/assets/assets/entrypoints/{Application.tsx-B1qLZuyu.js.map → Application.tsx-Bbf3mD_b.js.map} +1 -1
- metadata +9 -7
- data/app/models/easy_ml/datasources/polars_datasource.rb +0 -69
- data/public/easy_ml/assets/assets/entrypoints/Application.tsx-B1qLZuyu.js +0 -522
data/lib/easy_ml/core/tuner.rb
CHANGED
@@ -8,7 +8,7 @@ module EasyML
|
|
8
8
|
:metrics, :objective, :n_trials, :direction, :evaluator,
|
9
9
|
:study, :results, :adapter, :tune_started_at, :x_valid, :y_valid,
|
10
10
|
:project_name, :job, :current_run, :trial_enumerator, :progress_block,
|
11
|
-
:tuner_job, :dataset
|
11
|
+
:tuner_job, :dataset, :x_normalized
|
12
12
|
|
13
13
|
def initialize(options = {})
|
14
14
|
@model = options[:model]
|
@@ -73,9 +73,12 @@ module EasyML
|
|
73
73
|
model.task = task
|
74
74
|
|
75
75
|
model.dataset.refresh if model.dataset.needs_refresh?
|
76
|
-
x_valid, y_valid = model.dataset.valid(split_ys: true,
|
76
|
+
x_valid, y_valid = model.dataset.valid(split_ys: true, all_columns: true)
|
77
|
+
x_normalized = model.dataset.normalize(x_valid, inference: true)
|
78
|
+
x_normalized = model.preprocess(x_normalized)
|
77
79
|
self.x_valid = x_valid
|
78
80
|
self.y_valid = y_valid
|
81
|
+
self.x_normalized = x_normalized
|
79
82
|
self.dataset = model.dataset.valid(all_columns: true)
|
80
83
|
adapter.tune_started_at = tune_started_at
|
81
84
|
adapter.x_valid = x_valid
|
@@ -99,13 +102,12 @@ module EasyML
|
|
99
102
|
@study.tell(@current_trial, result)
|
100
103
|
rescue StandardError => e
|
101
104
|
puts EasyML::Event.easy_ml_context(e.backtrace)
|
102
|
-
@tuner_run.update!(status: :failed, hyperparameters:
|
105
|
+
@tuner_run.update!(status: :failed, hyperparameters: model.hyperparameters.to_h)
|
103
106
|
puts "Optuna failed with: #{e.message}"
|
104
107
|
raise e
|
105
108
|
end
|
106
109
|
end
|
107
110
|
|
108
|
-
model.after_tuning
|
109
111
|
return nil if tuner_job.tuner_runs.all?(&:failed?)
|
110
112
|
|
111
113
|
best_run = tuner_job.best_run
|
@@ -115,6 +117,13 @@ module EasyML
|
|
115
117
|
status: :success,
|
116
118
|
completed_at: Time.current,
|
117
119
|
)
|
120
|
+
model.after_tuning
|
121
|
+
if best_run&.hyperparameters.present?
|
122
|
+
model.hyperparameters = best_run.hyperparameters
|
123
|
+
model.fit
|
124
|
+
model.save
|
125
|
+
end
|
126
|
+
model.cleanup
|
118
127
|
|
119
128
|
best_run&.hyperparameters
|
120
129
|
rescue StandardError => e
|
@@ -138,7 +147,7 @@ module EasyML
|
|
138
147
|
end
|
139
148
|
end
|
140
149
|
|
141
|
-
y_pred = model.predict(
|
150
|
+
y_pred = model.predict(x_normalized)
|
142
151
|
model.metrics = metrics
|
143
152
|
metrics = model.evaluate(y_pred: y_pred, y_true: y_valid, x_true: x_valid, dataset: dataset)
|
144
153
|
metric = metrics.symbolize_keys.dig(model.evaluator[:metric].to_sym)
|
@@ -35,6 +35,18 @@ module EasyML
|
|
35
35
|
|
36
36
|
private
|
37
37
|
|
38
|
+
def list_df_nulls(df)
|
39
|
+
df = df.lazy
|
40
|
+
|
41
|
+
columns = df.columns
|
42
|
+
selects = columns.map do |col|
|
43
|
+
Polars.col(col).null_count.alias(col)
|
44
|
+
end
|
45
|
+
null_info = df.select(selects).collect
|
46
|
+
null_info.to_hashes.first.compact
|
47
|
+
null_info.to_hashes.first.transform_values { |v| v > 0 ? v : nil }.compact.keys
|
48
|
+
end
|
49
|
+
|
38
50
|
def apply_defaults(kwargs)
|
39
51
|
options = kwargs.dup
|
40
52
|
|
@@ -1,4 +1,3 @@
|
|
1
|
-
|
2
1
|
module EasyML
|
3
2
|
module Data
|
4
3
|
class DatasetManager
|
@@ -8,11 +7,17 @@ module EasyML
|
|
8
7
|
return query_dataframes(lazy_frames, schema)
|
9
8
|
end
|
10
9
|
|
10
|
+
def list_nulls
|
11
|
+
df = lazy_frames
|
12
|
+
list_df_nulls(df)
|
13
|
+
end
|
14
|
+
|
11
15
|
def schema
|
12
16
|
input.schema
|
13
17
|
end
|
14
18
|
|
15
|
-
|
19
|
+
private
|
20
|
+
|
16
21
|
def lazy_frames
|
17
22
|
input.lazy
|
18
23
|
end
|
@@ -20,4 +25,4 @@ module EasyML
|
|
20
25
|
end
|
21
26
|
end
|
22
27
|
end
|
23
|
-
end
|
28
|
+
end
|
@@ -17,12 +17,18 @@ module EasyML
|
|
17
17
|
).query
|
18
18
|
end
|
19
19
|
|
20
|
-
def self.schema(input, **kwargs, &block)
|
20
|
+
def self.schema(input = nil, **kwargs, &block)
|
21
21
|
adapter(input).new(
|
22
22
|
kwargs.merge!(input: input), &block
|
23
23
|
).schema
|
24
24
|
end
|
25
25
|
|
26
|
+
def self.list_nulls(input = nil, **kwargs, &block)
|
27
|
+
adapter(input).new(
|
28
|
+
kwargs.merge!(input: input), &block
|
29
|
+
).list_nulls
|
30
|
+
end
|
31
|
+
|
26
32
|
def self.files(dir)
|
27
33
|
Dir.glob(::File.join(dir, "**/*.{parquet}"))
|
28
34
|
end
|
@@ -36,6 +36,10 @@ module EasyML
|
|
36
36
|
clear_unique_id
|
37
37
|
end
|
38
38
|
|
39
|
+
def unlock!
|
40
|
+
clear_all_keys
|
41
|
+
end
|
42
|
+
|
39
43
|
private
|
40
44
|
|
41
45
|
def files
|
@@ -50,6 +54,10 @@ module EasyML
|
|
50
54
|
safe_write(df, unique_path(subdir: subdir))
|
51
55
|
end
|
52
56
|
|
57
|
+
def acquire_lock(key, &block)
|
58
|
+
Support::Lockable.with_lock("#{key}:lock", wait_timeout: 2, &block)
|
59
|
+
end
|
60
|
+
|
53
61
|
def unique_path(subdir: nil)
|
54
62
|
filename = [filenames, unique_id(subdir: subdir), "parquet"].compact.join(".")
|
55
63
|
|
@@ -63,15 +71,18 @@ module EasyML
|
|
63
71
|
end
|
64
72
|
|
65
73
|
def clear_all_keys
|
66
|
-
|
67
|
-
|
68
|
-
|
74
|
+
list_keys.each { |key| unlock_file(key) }
|
75
|
+
end
|
76
|
+
|
77
|
+
def unlock_file(key)
|
78
|
+
acquire_lock(key) do |suo|
|
79
|
+
suo.client.del(key)
|
69
80
|
end
|
70
81
|
end
|
71
82
|
|
72
83
|
def clear_unique_id(subdir: nil)
|
73
84
|
key = unique_id_key(subdir: subdir)
|
74
|
-
|
85
|
+
acquire_lock(key) do |suo|
|
75
86
|
suo.client.del(key)
|
76
87
|
end
|
77
88
|
end
|
@@ -83,7 +94,7 @@ module EasyML
|
|
83
94
|
def add_key(key)
|
84
95
|
keylist = unique_id_key(subdir: "keylist")
|
85
96
|
|
86
|
-
|
97
|
+
acquire_lock(keylist) do |suo|
|
87
98
|
suo.client.sadd(keylist, key)
|
88
99
|
end
|
89
100
|
end
|
@@ -91,14 +102,20 @@ module EasyML
|
|
91
102
|
def list_keys
|
92
103
|
keylist = unique_id_key(subdir: "keylist")
|
93
104
|
|
94
|
-
|
95
|
-
suo.client.
|
105
|
+
acquire_lock(keylist) do |suo|
|
106
|
+
if suo.client.type(keylist) == "set"
|
107
|
+
suo.client.smembers(keylist)
|
108
|
+
else
|
109
|
+
suo.client.del(keylist)
|
110
|
+
[]
|
111
|
+
end
|
96
112
|
end
|
97
113
|
end
|
98
114
|
|
99
115
|
def key_exists?(key)
|
100
116
|
keylist = unique_id_key(subdir: "keylist")
|
101
|
-
|
117
|
+
|
118
|
+
acquire_lock(keylist) do |suo|
|
102
119
|
suo.client.sismember(keylist, key)
|
103
120
|
end
|
104
121
|
end
|
@@ -107,7 +124,7 @@ module EasyML
|
|
107
124
|
key = unique_id_key(subdir: subdir)
|
108
125
|
add_key(key)
|
109
126
|
|
110
|
-
|
127
|
+
acquire_lock(key) do |suo|
|
111
128
|
redis = suo.client
|
112
129
|
|
113
130
|
seq = (redis.get(key) || "0").to_i
|
@@ -15,7 +15,7 @@ module EasyML
|
|
15
15
|
]
|
16
16
|
|
17
17
|
attr_accessor :filenames, :root_dir, :partition,
|
18
|
-
:
|
18
|
+
:primary_key, :options, :append_only, :named
|
19
19
|
|
20
20
|
def initialize(options)
|
21
21
|
@root_dir = options.dig(:root_dir)
|
@@ -27,6 +27,10 @@ module EasyML
|
|
27
27
|
@options = options
|
28
28
|
end
|
29
29
|
|
30
|
+
def unlock!
|
31
|
+
adapter_class.new(options).unlock!
|
32
|
+
end
|
33
|
+
|
30
34
|
def store(df, *args)
|
31
35
|
adapter_class.new(options.merge!(df: df)).store(*args)
|
32
36
|
end
|
@@ -44,13 +44,21 @@ module EasyML
|
|
44
44
|
Reader.schema(input, **kwargs, &block)
|
45
45
|
end
|
46
46
|
|
47
|
+
def list_nulls(input = nil, **kwargs, &block)
|
48
|
+
Reader.list_nulls(input, **kwargs, &block)
|
49
|
+
end
|
50
|
+
|
47
51
|
def num_rows
|
48
52
|
Reader.num_rows
|
49
53
|
end
|
50
54
|
end
|
51
55
|
|
52
|
-
def
|
53
|
-
Reader.
|
56
|
+
def list_nulls(input = nil, **kwargs, &block)
|
57
|
+
Reader.list_nulls(input, **kwargs, &block)
|
58
|
+
end
|
59
|
+
|
60
|
+
def num_rows(input = nil, **kwargs, &block)
|
61
|
+
Reader.num_rows(input, **kwargs, &block)
|
54
62
|
end
|
55
63
|
|
56
64
|
def query(input = nil, **kwargs, &block)
|
@@ -67,6 +75,8 @@ module EasyML
|
|
67
75
|
Reader.sha(root_dir)
|
68
76
|
end
|
69
77
|
|
78
|
+
# Transform CSV files into Parquet files, of all the same datatype.
|
79
|
+
# Learn datatypes of columns and store schema.
|
70
80
|
def normalize
|
71
81
|
Normalizer.normalize(root_dir)
|
72
82
|
end
|
@@ -75,14 +85,18 @@ module EasyML
|
|
75
85
|
query
|
76
86
|
end
|
77
87
|
|
78
|
-
def
|
79
|
-
writer.
|
88
|
+
def unlock!
|
89
|
+
writer.unlock!
|
80
90
|
end
|
81
91
|
|
82
92
|
def compact
|
83
93
|
writer.compact
|
84
94
|
end
|
85
95
|
|
96
|
+
def store(df, *args)
|
97
|
+
writer.store(df, *args)
|
98
|
+
end
|
99
|
+
|
86
100
|
def cp(from, to)
|
87
101
|
writer.cp(from, to)
|
88
102
|
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
module EasyML
|
2
|
+
module Data
|
3
|
+
class Embeddings
|
4
|
+
class Adapters
|
5
|
+
attr_accessor :model, :config
|
6
|
+
|
7
|
+
ADAPTERS = {
|
8
|
+
anthropic: Langchain::LLM::Anthropic,
|
9
|
+
gemini: Langchain::LLM::GoogleGemini,
|
10
|
+
openai: Langchain::LLM::OpenAI,
|
11
|
+
ollama: Langchain::LLM::Ollama,
|
12
|
+
}
|
13
|
+
|
14
|
+
DEFAULTS = {
|
15
|
+
api_key: {
|
16
|
+
anthropic: ENV["ANTHROPIC_API_KEY"],
|
17
|
+
gemini: ENV["GEMINI_API_KEY"],
|
18
|
+
openai: ENV["OPENAI_API_KEY"],
|
19
|
+
ollama: ENV["OLLAMA_API_KEY"],
|
20
|
+
},
|
21
|
+
}
|
22
|
+
|
23
|
+
def initialize(model, config = {})
|
24
|
+
@model = model.to_sym
|
25
|
+
@config = config.symbolize_keys
|
26
|
+
apply_defaults
|
27
|
+
end
|
28
|
+
|
29
|
+
def embed(df, col)
|
30
|
+
pick
|
31
|
+
texts = df[col].to_a
|
32
|
+
df = df.with_column(
|
33
|
+
embeddings: adapter.embed(text: texts),
|
34
|
+
)
|
35
|
+
end
|
36
|
+
|
37
|
+
private
|
38
|
+
|
39
|
+
def pick
|
40
|
+
@adapter ||= ADAPTERS[@model].new(config)
|
41
|
+
self
|
42
|
+
end
|
43
|
+
|
44
|
+
def apply_defaults
|
45
|
+
@config = @config.deep_symbolize_keys
|
46
|
+
|
47
|
+
DEFAULTS.each do |k, v|
|
48
|
+
unless @config.key?(k)
|
49
|
+
@config[k] = v[@model]
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
File without changes
|
@@ -0,0 +1,43 @@
|
|
1
|
+
module EasyML
|
2
|
+
module Data
|
3
|
+
class Embeddings
|
4
|
+
COMPRESSION_DEFAULT = {
|
5
|
+
present: :balanced,
|
6
|
+
}
|
7
|
+
|
8
|
+
attr_reader :df, :column, :model, :adapter, :compression,
|
9
|
+
:embeddings, :compressed_embeddings
|
10
|
+
|
11
|
+
def initialize(options = {})
|
12
|
+
@df = options[:df]
|
13
|
+
@column = options[:column]
|
14
|
+
@model = options[:model]
|
15
|
+
@config = options[:config] || {}
|
16
|
+
@compression = options[:compression] || COMPRESSION_DEFAULT
|
17
|
+
end
|
18
|
+
|
19
|
+
def create
|
20
|
+
embed
|
21
|
+
compress
|
22
|
+
end
|
23
|
+
|
24
|
+
def embed
|
25
|
+
@embeddings ||= adapter.embed(df, column)
|
26
|
+
end
|
27
|
+
|
28
|
+
def compress
|
29
|
+
@compressed_embeddings ||= compression_adapter.compress(embeddings)
|
30
|
+
end
|
31
|
+
|
32
|
+
private
|
33
|
+
|
34
|
+
def adapter
|
35
|
+
@adapter ||= EasyML::Data::Embeddings::Adapters.new(model, config)
|
36
|
+
end
|
37
|
+
|
38
|
+
def compression_adapter
|
39
|
+
@compression_adapter ||= EasyML::Data::Embeddings::Compression.new(compression)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
@@ -124,11 +124,25 @@ module EasyML
|
|
124
124
|
# @param series [Polars::Series] The string series to analyze
|
125
125
|
# @return [Symbol] One of :datetime, :text, or :categorical
|
126
126
|
def determine_string_type(series)
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
127
|
+
# Try to parse as numeric first
|
128
|
+
begin
|
129
|
+
# Try integer first
|
130
|
+
series.cast(Polars::Int64)
|
131
|
+
return :numeric
|
132
|
+
rescue StandardError
|
133
|
+
begin
|
134
|
+
# Try float if integer fails
|
135
|
+
series.cast(Polars::Float64)
|
136
|
+
return :numeric
|
137
|
+
rescue StandardError
|
138
|
+
# If not numeric, check for datetime or categorical
|
139
|
+
if EasyML::Data::DateConverter.maybe_convert_date(Polars::DataFrame.new({ temp: series }),
|
140
|
+
:temp)[:temp].dtype.is_a?(Polars::Datetime)
|
141
|
+
:datetime
|
142
|
+
else
|
143
|
+
categorical_or_text?(series)
|
144
|
+
end
|
145
|
+
end
|
132
146
|
end
|
133
147
|
end
|
134
148
|
|
data/lib/easy_ml/engine.rb
CHANGED
@@ -55,7 +55,7 @@ module EasyML
|
|
55
55
|
Polars.enable_string_cache
|
56
56
|
end
|
57
57
|
|
58
|
-
if %w[db:migrate db:migrate:status db:setup db:drop assets:precompile].include?(ARGV.first)
|
58
|
+
if %w[db:create db:migrate db:migrate:status db:setup db:drop assets:precompile].include?(ARGV.first)
|
59
59
|
config.eager_load_paths = config.eager_load_paths.without(config.eager_load_paths.map(&:to_s).grep(/easy_ml/).map { |p| Pathname.new(p) })
|
60
60
|
else
|
61
61
|
config.after_initialize do
|
@@ -77,6 +77,21 @@ module EasyML
|
|
77
77
|
end
|
78
78
|
end
|
79
79
|
|
80
|
+
unless %w[db:create db:migrate db:migrate:status db:setup db:drop assets:precompile].include?(ARGV.first)
|
81
|
+
initializer "easy_ml.configure_secrets" do
|
82
|
+
EasyML::Configuration.configure do |config|
|
83
|
+
raise "S3_ACCESS_KEY_ID is missing. Set ENV['S3_ACCESS_KEY_ID']" unless ENV["S3_ACCESS_KEY_ID"]
|
84
|
+
raise "S3_SECRET_ACCESS_KEY is missing. Set ENV['S3_SECRET_ACCESS_KEY']" unless ENV["S3_SECRET_ACCESS_KEY"]
|
85
|
+
|
86
|
+
config.s3_access_key_id = ENV["S3_ACCESS_KEY_ID"]
|
87
|
+
config.s3_secret_access_key = ENV["S3_SECRET_ACCESS_KEY"]
|
88
|
+
config.s3_region = ENV["S3_REGION"] ? ENV["S3_REGION"] : "us-east-1"
|
89
|
+
config.timezone = ENV["TIMEZONE"].present? ? ENV["TIMEZONE"] : "America/New_York"
|
90
|
+
config.wandb_api_key = ENV["WANDB_API_KEY"] if ENV["WANDB_API_KEY"]
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
80
95
|
initializer "easy_ml.check_pending_migrations" do
|
81
96
|
if defined?(Rails::Server)
|
82
97
|
config.after_initialize do
|
@@ -96,19 +111,6 @@ module EasyML
|
|
96
111
|
end
|
97
112
|
end
|
98
113
|
|
99
|
-
initializer "easy_ml.configure_secrets" do
|
100
|
-
EasyML::Configuration.configure do |config|
|
101
|
-
raise "S3_ACCESS_KEY_ID is missing. Set ENV['S3_ACCESS_KEY_ID']" unless ENV["S3_ACCESS_KEY_ID"]
|
102
|
-
raise "S3_SECRET_ACCESS_KEY is missing. Set ENV['S3_SECRET_ACCESS_KEY']" unless ENV["S3_SECRET_ACCESS_KEY"]
|
103
|
-
|
104
|
-
config.s3_access_key_id = ENV["S3_ACCESS_KEY_ID"]
|
105
|
-
config.s3_secret_access_key = ENV["S3_SECRET_ACCESS_KEY"]
|
106
|
-
config.s3_region = ENV["S3_REGION"] if ENV["S3_REGION"]
|
107
|
-
config.timezone = ENV["TIMEZONE"].present? ? ENV["TIMEZONE"] : "America/New_York"
|
108
|
-
config.wandb_api_key = ENV["WANDB_API_KEY"] if ENV["WANDB_API_KEY"]
|
109
|
-
end
|
110
|
-
end
|
111
|
-
|
112
114
|
initializer "easy_ml.setup_generators" do |app|
|
113
115
|
generators_path = EasyML::Engine.root.join("lib/easy_ml/railtie/generators")
|
114
116
|
generators_dirs = Dir[File.join(generators_path, "**", "*.rb")]
|
@@ -5,19 +5,22 @@ module EasyML
|
|
5
5
|
def initialize(feature)
|
6
6
|
@feature = feature
|
7
7
|
|
8
|
-
datasource_config = feature
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
8
|
+
datasource_config = feature&.dataset&.datasource&.configuration
|
9
|
+
if datasource_config
|
10
|
+
options = {
|
11
|
+
root_dir: feature_dir,
|
12
|
+
filenames: "feature",
|
13
|
+
append_only: false,
|
14
|
+
primary_key: feature.primary_key&.first,
|
15
|
+
partition_size: batch_size,
|
16
|
+
s3_bucket: datasource_config.dig("s3_bucket") || EasyML::Configuration.s3_bucket,
|
17
|
+
s3_prefix: s3_prefix,
|
18
|
+
polars_args: datasource_config.dig("polars_args"),
|
19
|
+
}.compact
|
20
|
+
super(options)
|
21
|
+
else
|
22
|
+
super({ root_dir: "" })
|
23
|
+
end
|
21
24
|
end
|
22
25
|
|
23
26
|
def cp(old_version, new_version)
|
@@ -30,7 +33,7 @@ module EasyML
|
|
30
33
|
files_to_cp = Dir.glob(Pathname.new(old_dir).join("**/*")).select { |f| File.file?(f) }
|
31
34
|
|
32
35
|
files_to_cp.each do |file|
|
33
|
-
target_file = file.gsub(
|
36
|
+
target_file = file.gsub(old_dir, new_dir)
|
34
37
|
FileUtils.mkdir_p(File.dirname(target_file))
|
35
38
|
FileUtils.cp(file, target_file)
|
36
39
|
end
|
@@ -46,9 +49,9 @@ module EasyML
|
|
46
49
|
File.join(
|
47
50
|
Rails.root,
|
48
51
|
"easy_ml/datasets",
|
49
|
-
feature
|
52
|
+
feature&.dataset&.name&.parameterize&.gsub("-", "_"),
|
50
53
|
"features",
|
51
|
-
feature
|
54
|
+
feature&.name&.parameterize&.gsub("-", "_"),
|
52
55
|
version.to_s
|
53
56
|
)
|
54
57
|
end
|
@@ -22,7 +22,7 @@ module EasyML
|
|
22
22
|
end
|
23
23
|
|
24
24
|
def self.lock_client(key, wait_timeout: 0.1, stale_timeout: 60 * 10, resources: 1)
|
25
|
-
Suo::Client::Redis.new(
|
25
|
+
Suo::Client::Redis.new(key, {
|
26
26
|
acquisition_timeout: wait_timeout,
|
27
27
|
stale_lock_expiry: stale_timeout,
|
28
28
|
resources: resources,
|
@@ -30,10 +30,6 @@ module EasyML
|
|
30
30
|
})
|
31
31
|
end
|
32
32
|
|
33
|
-
def self.prefixed_key(key)
|
34
|
-
"easy_ml:#{key}"
|
35
|
-
end
|
36
|
-
|
37
33
|
# Execute a block with a Redis lock
|
38
34
|
def self.with_lock(key, wait_timeout: 0.1, stale_timeout: 60 * 10, resources: 1)
|
39
35
|
lock_key = nil
|
data/lib/easy_ml/version.rb
CHANGED