easy_ml 0.2.0.pre.rc71 → 0.2.0.pre.rc75
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/app/controllers/easy_ml/datasets_controller.rb +33 -0
- data/app/controllers/easy_ml/datasources_controller.rb +7 -0
- data/app/controllers/easy_ml/models_controller.rb +46 -0
- data/app/frontend/components/DatasetCard.tsx +212 -0
- data/app/frontend/components/ModelCard.tsx +114 -29
- data/app/frontend/components/StackTrace.tsx +13 -0
- data/app/frontend/components/dataset/FeatureConfigPopover.tsx +10 -7
- data/app/frontend/components/datasets/UploadDatasetButton.tsx +51 -0
- data/app/frontend/components/models/DownloadModelModal.tsx +90 -0
- data/app/frontend/components/models/UploadModelModal.tsx +212 -0
- data/app/frontend/components/models/index.ts +2 -0
- data/app/frontend/pages/DatasetsPage.tsx +36 -130
- data/app/frontend/pages/DatasourcesPage.tsx +22 -2
- data/app/frontend/pages/ModelsPage.tsx +37 -11
- data/app/frontend/types/dataset.ts +1 -2
- data/app/frontend/types.ts +1 -1
- data/app/jobs/easy_ml/reaper.rb +55 -0
- data/app/jobs/easy_ml/training_job.rb +1 -1
- data/app/models/easy_ml/column/imputers/base.rb +4 -0
- data/app/models/easy_ml/column/imputers/clip.rb +5 -3
- data/app/models/easy_ml/column/imputers/imputer.rb +11 -13
- data/app/models/easy_ml/column/imputers/mean.rb +7 -3
- data/app/models/easy_ml/column/imputers/null_imputer.rb +3 -0
- data/app/models/easy_ml/column/imputers/ordinal_encoder.rb +5 -1
- data/app/models/easy_ml/column/imputers.rb +3 -1
- data/app/models/easy_ml/column/lineage/base.rb +5 -1
- data/app/models/easy_ml/column/lineage/computed_by_feature.rb +1 -1
- data/app/models/easy_ml/column/lineage/preprocessed.rb +1 -1
- data/app/models/easy_ml/column/lineage/raw_dataset.rb +1 -1
- data/app/models/easy_ml/column/selector.rb +4 -0
- data/app/models/easy_ml/column.rb +79 -63
- data/app/models/easy_ml/column_history.rb +28 -28
- data/app/models/easy_ml/column_list/imputer.rb +23 -0
- data/app/models/easy_ml/column_list.rb +39 -26
- data/app/models/easy_ml/dataset/learner/base.rb +34 -0
- data/app/models/easy_ml/dataset/learner/eager/boolean.rb +10 -0
- data/app/models/easy_ml/dataset/learner/eager/categorical.rb +51 -0
- data/app/models/easy_ml/dataset/learner/eager/query.rb +37 -0
- data/app/models/easy_ml/dataset/learner/eager.rb +43 -0
- data/app/models/easy_ml/dataset/learner/lazy/boolean.rb +13 -0
- data/app/models/easy_ml/dataset/learner/lazy/categorical.rb +10 -0
- data/app/models/easy_ml/dataset/learner/lazy/datetime.rb +19 -0
- data/app/models/easy_ml/dataset/learner/lazy/null.rb +17 -0
- data/app/models/easy_ml/dataset/learner/lazy/numeric.rb +19 -0
- data/app/models/easy_ml/dataset/learner/lazy/query.rb +69 -0
- data/app/models/easy_ml/dataset/learner/lazy/string.rb +19 -0
- data/app/models/easy_ml/dataset/learner/lazy.rb +51 -0
- data/app/models/easy_ml/dataset/learner/query.rb +25 -0
- data/app/models/easy_ml/dataset/learner.rb +100 -0
- data/app/models/easy_ml/dataset.rb +150 -36
- data/app/models/easy_ml/dataset_history.rb +1 -0
- data/app/models/easy_ml/datasource.rb +9 -0
- data/app/models/easy_ml/event.rb +5 -7
- data/app/models/easy_ml/export/column.rb +27 -0
- data/app/models/easy_ml/export/dataset.rb +37 -0
- data/app/models/easy_ml/export/datasource.rb +12 -0
- data/app/models/easy_ml/export/feature.rb +24 -0
- data/app/models/easy_ml/export/model.rb +40 -0
- data/app/models/easy_ml/export/retraining_job.rb +20 -0
- data/app/models/easy_ml/export/splitter.rb +14 -0
- data/app/models/easy_ml/feature.rb +21 -0
- data/app/models/easy_ml/import/column.rb +35 -0
- data/app/models/easy_ml/import/dataset.rb +148 -0
- data/app/models/easy_ml/import/feature.rb +36 -0
- data/app/models/easy_ml/import/model.rb +136 -0
- data/app/models/easy_ml/import/retraining_job.rb +29 -0
- data/app/models/easy_ml/import/splitter.rb +34 -0
- data/app/models/easy_ml/lineage.rb +44 -0
- data/app/models/easy_ml/model.rb +101 -37
- data/app/models/easy_ml/model_file.rb +6 -0
- data/app/models/easy_ml/models/xgboost/evals_callback.rb +7 -7
- data/app/models/easy_ml/models/xgboost.rb +33 -9
- data/app/models/easy_ml/retraining_job.rb +8 -1
- data/app/models/easy_ml/retraining_run.rb +7 -5
- data/app/models/easy_ml/splitter.rb +8 -0
- data/app/models/lineage_history.rb +6 -0
- data/app/serializers/easy_ml/column_serializer.rb +7 -1
- data/app/serializers/easy_ml/dataset_serializer.rb +2 -1
- data/app/serializers/easy_ml/lineage_serializer.rb +9 -0
- data/config/routes.rb +14 -1
- data/lib/easy_ml/core/tuner/adapters/base_adapter.rb +3 -3
- data/lib/easy_ml/core/tuner.rb +13 -12
- data/lib/easy_ml/data/polars_column.rb +149 -100
- data/lib/easy_ml/data/polars_reader.rb +8 -5
- data/lib/easy_ml/data/polars_schema.rb +56 -0
- data/lib/easy_ml/data/splits/file_split.rb +20 -2
- data/lib/easy_ml/data/splits/split.rb +10 -1
- data/lib/easy_ml/data.rb +1 -0
- data/lib/easy_ml/deep_compact.rb +19 -0
- data/lib/easy_ml/engine.rb +1 -0
- data/lib/easy_ml/feature_store.rb +2 -6
- data/lib/easy_ml/railtie/generators/migration/migration_generator.rb +6 -0
- data/lib/easy_ml/railtie/templates/migration/add_extra_metadata_to_columns.rb.tt +9 -0
- data/lib/easy_ml/railtie/templates/migration/add_raw_schema_to_datasets.rb.tt +9 -0
- data/lib/easy_ml/railtie/templates/migration/add_unique_constraint_to_easy_ml_model_names.rb.tt +8 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_lineages.rb.tt +24 -0
- data/lib/easy_ml/railtie/templates/migration/remove_evaluator_from_retraining_jobs.rb.tt +7 -0
- data/lib/easy_ml/railtie/templates/migration/update_preprocessing_steps_to_jsonb.rb.tt +18 -0
- data/lib/easy_ml/timing.rb +34 -0
- data/lib/easy_ml/version.rb +1 -1
- data/lib/easy_ml.rb +2 -0
- data/public/easy_ml/assets/.vite/manifest.json +2 -2
- data/public/easy_ml/assets/assets/Application-Q7L6ioxr.css +1 -0
- data/public/easy_ml/assets/assets/entrypoints/Application.tsx-Rrzo4ecT.js +522 -0
- data/public/easy_ml/assets/assets/entrypoints/Application.tsx-Rrzo4ecT.js.map +1 -0
- metadata +53 -12
- data/app/models/easy_ml/column/learners/base.rb +0 -103
- data/app/models/easy_ml/column/learners/boolean.rb +0 -11
- data/app/models/easy_ml/column/learners/categorical.rb +0 -51
- data/app/models/easy_ml/column/learners/datetime.rb +0 -19
- data/app/models/easy_ml/column/learners/null.rb +0 -22
- data/app/models/easy_ml/column/learners/numeric.rb +0 -33
- data/app/models/easy_ml/column/learners/string.rb +0 -15
- data/public/easy_ml/assets/assets/Application-BbFobaXt.css +0 -1
- data/public/easy_ml/assets/assets/entrypoints/Application.tsx-CibZcrBc.js +0 -489
- data/public/easy_ml/assets/assets/entrypoints/Application.tsx-CibZcrBc.js.map +0 -1
@@ -0,0 +1,148 @@
|
|
1
|
+
module EasyML
|
2
|
+
module Import
|
3
|
+
class Dataset
|
4
|
+
def self.permitted_keys
|
5
|
+
@permitted_keys ||= EasyML::Dataset.columns.map(&:name).map(&:to_sym) -
|
6
|
+
EasyML::Export::Dataset::UNCONFIGURABLE_COLUMNS.map(&:to_sym) +
|
7
|
+
[:columns, :features, :splitter, :datasource]
|
8
|
+
end
|
9
|
+
|
10
|
+
def self.from_config(json_config, action: nil, dataset: nil)
|
11
|
+
raise ArgumentError, "Target dataset must be specified" if action == :update && dataset.nil?
|
12
|
+
|
13
|
+
config = json_config.is_a?(String) ? JSON.parse(json_config) : json_config
|
14
|
+
dataset_config = config["dataset"]
|
15
|
+
|
16
|
+
# Extract configs for related models
|
17
|
+
datasource_config = dataset_config.delete("datasource")
|
18
|
+
splitter_config = dataset_config.delete("splitter")
|
19
|
+
columns_config = dataset_config.delete("columns") || []
|
20
|
+
features_config = dataset_config.delete("features") || []
|
21
|
+
|
22
|
+
if action == :create
|
23
|
+
name = dataset_config["name"]
|
24
|
+
dataset = EasyML::Dataset.find_by(name: name)
|
25
|
+
action = dataset.present? ? :update : :create
|
26
|
+
end
|
27
|
+
raise ArgumentError, "Action must be specified" unless action.present?
|
28
|
+
|
29
|
+
if action == :create
|
30
|
+
create_dataset(
|
31
|
+
dataset_config,
|
32
|
+
datasource_config,
|
33
|
+
splitter_config,
|
34
|
+
columns_config,
|
35
|
+
features_config
|
36
|
+
)
|
37
|
+
elsif action == :update
|
38
|
+
update_dataset(
|
39
|
+
dataset,
|
40
|
+
dataset_config,
|
41
|
+
columns_config,
|
42
|
+
features_config
|
43
|
+
)
|
44
|
+
else
|
45
|
+
raise ArgumentError, "Invalid action: #{action}. Must be :create or :update"
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
private
|
50
|
+
|
51
|
+
def self.create_dataset(dataset_config, datasource_config, splitter_config, columns_config, features_config)
|
52
|
+
# Create new datasource
|
53
|
+
datasource = EasyML::Datasource.find_or_create_by(name: datasource_config["name"]) do |ds|
|
54
|
+
ds.assign_attributes(datasource_config)
|
55
|
+
end
|
56
|
+
datasource.update!(datasource_config)
|
57
|
+
|
58
|
+
# Create new dataset
|
59
|
+
dataset = EasyML::Dataset.create!(
|
60
|
+
dataset_config.merge(datasource: datasource)
|
61
|
+
)
|
62
|
+
|
63
|
+
# Create splitter if config exists
|
64
|
+
EasyML::Splitter.from_config(splitter_config, dataset) if splitter_config.present?
|
65
|
+
|
66
|
+
# Create columns
|
67
|
+
columns_config.each do |column_config|
|
68
|
+
EasyML::Column.from_config(column_config, dataset, action: :create)
|
69
|
+
end
|
70
|
+
|
71
|
+
# Create features
|
72
|
+
features_config.each do |feature_config|
|
73
|
+
EasyML::Feature.from_config(feature_config, dataset, action: :create)
|
74
|
+
end
|
75
|
+
|
76
|
+
dataset
|
77
|
+
end
|
78
|
+
|
79
|
+
def self.update_dataset(dataset, dataset_config, columns_config, features_config)
|
80
|
+
# Update dataset attributes except name (preserve original name)
|
81
|
+
dataset.update!(dataset_config.except("name", "datasource"))
|
82
|
+
|
83
|
+
needs_refresh = false
|
84
|
+
|
85
|
+
# Update existing columns
|
86
|
+
columns_config.each do |column_config|
|
87
|
+
column_name = column_config["name"]
|
88
|
+
existing_column = dataset.columns.find_by(name: column_name)
|
89
|
+
|
90
|
+
if existing_column
|
91
|
+
old_drop_if_null = existing_column.drop_if_null
|
92
|
+
new_drop_if_null = column_config["drop_if_null"]
|
93
|
+
|
94
|
+
# Check if drop_if_null has changed
|
95
|
+
needs_refresh ||= !new_drop_if_null.nil? && old_drop_if_null != new_drop_if_null
|
96
|
+
end
|
97
|
+
|
98
|
+
EasyML::Column.from_config(column_config, dataset, action: :update)
|
99
|
+
end
|
100
|
+
|
101
|
+
# Update or create features
|
102
|
+
features_config.each do |feature_config|
|
103
|
+
EasyML::Feature.from_config(feature_config, dataset, action: :update)
|
104
|
+
end
|
105
|
+
|
106
|
+
# Refresh if needed
|
107
|
+
dataset.refresh_async if needs_refresh
|
108
|
+
|
109
|
+
dataset
|
110
|
+
end
|
111
|
+
|
112
|
+
def self.validate(dataset_config)
|
113
|
+
extra_keys = dataset_config.keys.map(&:to_sym) - permitted_keys
|
114
|
+
raise ArgumentError, "Invalid dataset keys: #{extra_keys.join(", ")}" unless extra_keys.empty?
|
115
|
+
|
116
|
+
if dataset_config[:splitter].present?
|
117
|
+
dataset_config[:splitter] = EasyML::Import::Splitter.validate(dataset_config[:splitter])
|
118
|
+
end
|
119
|
+
|
120
|
+
if dataset_config[:columns].present?
|
121
|
+
unless dataset_config[:columns].is_a?(Array)
|
122
|
+
raise ArgumentError, "Columns configuration must be an array"
|
123
|
+
end
|
124
|
+
dataset_config[:columns].each_with_index do |col_config, idx|
|
125
|
+
unless col_config.is_a?(Hash)
|
126
|
+
raise ArgumentError, "Each column configuration must be a hash, at index #{idx}"
|
127
|
+
end
|
128
|
+
EasyML::Import::Column.validate(col_config, idx)
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
if dataset_config[:features].present?
|
133
|
+
unless dataset_config[:features].is_a?(Array)
|
134
|
+
raise ArgumentError, "Features configuration must be an array"
|
135
|
+
end
|
136
|
+
dataset_config[:features].each_with_index do |feat_config, idx|
|
137
|
+
unless feat_config.is_a?(Hash)
|
138
|
+
raise ArgumentError, "Each feature configuration must be a hash, at index #{idx}"
|
139
|
+
end
|
140
|
+
EasyML::Import::Feature.validate(feat_config, idx)
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
dataset_config
|
145
|
+
end
|
146
|
+
end
|
147
|
+
end
|
148
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
module EasyML
|
2
|
+
module Import
|
3
|
+
class Feature
|
4
|
+
def self.permitted_keys
|
5
|
+
@permitted_keys ||= EasyML::Feature.columns.map(&:name).map(&:to_sym) -
|
6
|
+
EasyML::Export::Feature::UNCONFIGURABLE_COLUMNS.map(&:to_sym)
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.from_config(config, dataset, action: :create)
|
10
|
+
feature_name = config["name"]
|
11
|
+
existing_feature = dataset.features.find_by(name: feature_name)
|
12
|
+
|
13
|
+
case action
|
14
|
+
when :create
|
15
|
+
dataset.features.create!(config)
|
16
|
+
when :update
|
17
|
+
if existing_feature
|
18
|
+
existing_feature.update!(config)
|
19
|
+
existing_feature
|
20
|
+
else
|
21
|
+
# Features can be added during update, unlike columns
|
22
|
+
dataset.features.create!(config)
|
23
|
+
end
|
24
|
+
else
|
25
|
+
raise ArgumentError, "Invalid action: #{action}. Must be :create or :update"
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.validate(config, idx)
|
30
|
+
extra_keys = config.keys.map(&:to_sym) - permitted_keys
|
31
|
+
raise ArgumentError, "Invalid keys in feature config at index #{idx}: #{extra_keys.join(", ")}" unless extra_keys.empty?
|
32
|
+
config
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,136 @@
|
|
1
|
+
module EasyML
|
2
|
+
module Import
|
3
|
+
class Model
|
4
|
+
def self.permitted_keys
|
5
|
+
@permitted_keys ||= EasyML::Model.columns.map(&:name).map(&:to_sym) -
|
6
|
+
EasyML::Export::Model::UNCONFIGURABLE_COLUMNS.map(&:to_sym) +
|
7
|
+
[:weights] +
|
8
|
+
EasyML::Model.configuration_attributes.map(&:to_sym) +
|
9
|
+
[:dataset, :splitter, :retraining_job]
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.from_config(json_config, action: nil, model: nil, include_dataset: true, dataset: nil)
|
13
|
+
raise ArgumentError, "Action must be specified" unless action.present?
|
14
|
+
raise ArgumentError, "Target model must be specified" if action == :update && model.nil?
|
15
|
+
raise ArgumentError, "Dataset must be specified when creating a model" if action == :create && !include_dataset && dataset.nil?
|
16
|
+
|
17
|
+
config = json_config.is_a?(String) ? JSON.parse(json_config) : json_config
|
18
|
+
config = config.deep_dup.with_indifferent_access
|
19
|
+
|
20
|
+
# Validate the configuration
|
21
|
+
validate(config)
|
22
|
+
model_config = config["model"]
|
23
|
+
|
24
|
+
# Config variables would skip custom setters, so better to manually merge
|
25
|
+
configuration = model_config.delete("configuration")
|
26
|
+
model_config.merge!(configuration) if configuration.present?
|
27
|
+
|
28
|
+
case action
|
29
|
+
when :create
|
30
|
+
create_model(model_config, include_dataset: include_dataset, dataset: dataset)
|
31
|
+
when :update
|
32
|
+
update_model(model, model_config, include_dataset: include_dataset)
|
33
|
+
else
|
34
|
+
raise ArgumentError, "Invalid action: #{action}. Must be :create or :update"
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
def self.create_model(model_config, include_dataset:, dataset:)
|
41
|
+
# Handle dataset if included
|
42
|
+
model_dataset = if include_dataset && model_config["dataset"].present?
|
43
|
+
dataset_config = { "dataset" => model_config.delete("dataset") }
|
44
|
+
EasyML::Import::Dataset.from_config(dataset_config, action: :create)
|
45
|
+
else
|
46
|
+
dataset
|
47
|
+
end
|
48
|
+
|
49
|
+
# Create model
|
50
|
+
model = EasyML::Model.new(model_config.except("weights", "dataset", "retraining_job"))
|
51
|
+
model.dataset = model_dataset
|
52
|
+
|
53
|
+
model_name = model_config["name"]
|
54
|
+
if (existing_model = EasyML::Model.find_by(name: model_name)).present?
|
55
|
+
model.name = generate_unique_name(model_name)
|
56
|
+
end
|
57
|
+
model.save!
|
58
|
+
|
59
|
+
if model_config["retraining_job"].present?
|
60
|
+
retraining_job = EasyML::RetrainingJob.from_config(model_config["retraining_job"], model)
|
61
|
+
model.retraining_job = retraining_job
|
62
|
+
model.save!
|
63
|
+
model.reload
|
64
|
+
end
|
65
|
+
|
66
|
+
# Update weights if present
|
67
|
+
if model_config["weights"].present?
|
68
|
+
model.update!(weights: model_config["weights"])
|
69
|
+
model.import
|
70
|
+
end
|
71
|
+
|
72
|
+
model
|
73
|
+
end
|
74
|
+
|
75
|
+
def self.update_model(model, model_config, include_dataset:)
|
76
|
+
# Update dataset if included
|
77
|
+
if include_dataset && model_config["dataset"].present?
|
78
|
+
dataset_config = { "dataset" => model_config.delete("dataset") }
|
79
|
+
EasyML::Import::Dataset.from_config(dataset_config, action: :update, dataset: model.dataset)
|
80
|
+
end
|
81
|
+
|
82
|
+
# Update model attributes except name (preserve original name)
|
83
|
+
model.update!(model_config.except("name", "weights", "dataset", "retraining_job"))
|
84
|
+
|
85
|
+
if model_config["retraining_job"].present?
|
86
|
+
retraining_job = EasyML::RetrainingJob.from_config(model_config["retraining_job"], model)
|
87
|
+
model.retraining_job = retraining_job
|
88
|
+
model.save!
|
89
|
+
model.reload
|
90
|
+
end
|
91
|
+
|
92
|
+
# Update weights if present
|
93
|
+
if model_config["weights"].present?
|
94
|
+
model.update!(weights: model_config["weights"])
|
95
|
+
model.import
|
96
|
+
end
|
97
|
+
|
98
|
+
model
|
99
|
+
end
|
100
|
+
|
101
|
+
def self.validate(json_config)
|
102
|
+
config = json_config.is_a?(String) ? JSON.parse(json_config) : json_config
|
103
|
+
config = config.deep_dup.with_indifferent_access
|
104
|
+
|
105
|
+
# Validate root keys: must have only "model"
|
106
|
+
extra_keys = config.keys.map(&:to_sym) - [:model]
|
107
|
+
raise ArgumentError, "Invalid root keys: #{extra_keys.join(", ")}" unless extra_keys.empty?
|
108
|
+
|
109
|
+
model_config = config[:model]
|
110
|
+
# Validate that model_config does not contain keys that are unconfigurable
|
111
|
+
extra_keys = model_config.keys.map(&:to_sym) - permitted_keys
|
112
|
+
raise ArgumentError, "Invalid model keys: #{extra_keys.join(", ")}" unless extra_keys.empty?
|
113
|
+
|
114
|
+
# Delegate nested validations to individual importers
|
115
|
+
if model_config["dataset"].present?
|
116
|
+
model_config["dataset"] = EasyML::Import::Dataset.validate(model_config["dataset"])
|
117
|
+
end
|
118
|
+
|
119
|
+
if model_config["retraining_job"].present?
|
120
|
+
model_config["retraining_job"] = EasyML::Import::RetrainingJob.validate(model_config["retraining_job"])
|
121
|
+
end
|
122
|
+
|
123
|
+
config
|
124
|
+
end
|
125
|
+
|
126
|
+
def self.generate_unique_name(base_name)
|
127
|
+
revision = EasyML::Model.where("name LIKE ?", "#{base_name} (Revision %)")
|
128
|
+
.map { |m| m.name.match(/\(Revision (\d+)\)/).try(:[], 1).try(:to_i) }
|
129
|
+
.compact
|
130
|
+
.max || 0
|
131
|
+
|
132
|
+
"#{base_name} (Revision #{revision + 1})"
|
133
|
+
end
|
134
|
+
end
|
135
|
+
end
|
136
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module EasyML
|
2
|
+
module Import
|
3
|
+
class RetrainingJob
|
4
|
+
def self.permitted_keys
|
5
|
+
@permitted_keys ||= EasyML::RetrainingJob.columns.map(&:name).map(&:to_sym) -
|
6
|
+
EasyML::Export::RetrainingJob::UNCONFIGURABLE_COLUMNS.map(&:to_sym)
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.from_config(config, model)
|
10
|
+
existing_job = model.get_retraining_job
|
11
|
+
existing_job.update!(config)
|
12
|
+
existing_job
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.validate(config)
|
16
|
+
return nil unless config.present?
|
17
|
+
|
18
|
+
unless config.is_a?(Hash)
|
19
|
+
raise ArgumentError, "Retraining job configuration must be a hash"
|
20
|
+
end
|
21
|
+
|
22
|
+
extra_keys = config.keys.map(&:to_sym) - permitted_keys
|
23
|
+
raise ArgumentError, "Invalid retraining job keys: #{extra_keys.join(", ")}" unless extra_keys.empty?
|
24
|
+
|
25
|
+
config
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
module EasyML
|
2
|
+
module Import
|
3
|
+
class Splitter
|
4
|
+
def self.permitted_keys
|
5
|
+
@permitted_keys ||= EasyML::Splitter.columns.map(&:name).map(&:to_sym) -
|
6
|
+
EasyML::Export::Splitter::UNCONFIGURABLE_COLUMNS.map(&:to_sym)
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.from_config(config, dataset)
|
10
|
+
return nil unless config.present?
|
11
|
+
|
12
|
+
if dataset.splitter.present?
|
13
|
+
dataset.splitter.update!(config)
|
14
|
+
dataset.splitter
|
15
|
+
else
|
16
|
+
dataset.create_splitter!(config)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.validate(config)
|
21
|
+
return nil unless config.present?
|
22
|
+
|
23
|
+
unless config.is_a?(Hash)
|
24
|
+
raise ArgumentError, "Splitter configuration must be a hash"
|
25
|
+
end
|
26
|
+
|
27
|
+
extra_keys = config.keys.map(&:to_sym) - permitted_keys
|
28
|
+
raise ArgumentError, "Invalid splitter keys: #{extra_keys.join(", ")}" unless extra_keys.empty?
|
29
|
+
|
30
|
+
config
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
# == Schema Information
|
2
|
+
#
|
3
|
+
# Table name: easy_ml_lineages
|
4
|
+
#
|
5
|
+
# id :bigint not null, primary key
|
6
|
+
# column_id :bigint not null
|
7
|
+
# key :string not null
|
8
|
+
# description :string
|
9
|
+
# occurred_at :datetime
|
10
|
+
# created_at :datetime not null
|
11
|
+
# updated_at :datetime not null
|
12
|
+
#
|
13
|
+
module EasyML
|
14
|
+
class Lineage < ActiveRecord::Base
|
15
|
+
belongs_to :column
|
16
|
+
|
17
|
+
class << self
|
18
|
+
def learn(column)
|
19
|
+
@lineage = EasyML::Column::Lineage.new(column).lineage
|
20
|
+
|
21
|
+
existing_lineage = where(column_id: column.id)
|
22
|
+
missing_lineage = @lineage.select { |l| !existing_lineage.exists?(key: l[:key]) }
|
23
|
+
|
24
|
+
missing_lineage = missing_lineage.map { |l|
|
25
|
+
EasyML::Lineage.new(
|
26
|
+
column_id: column.id,
|
27
|
+
key: l[:key],
|
28
|
+
occurred_at: l[:occurred_at],
|
29
|
+
description: l[:description],
|
30
|
+
)
|
31
|
+
}
|
32
|
+
existing_lineage = existing_lineage.map do |lineage|
|
33
|
+
matching_lineage = @lineage.detect { |ll| ll[:key].to_sym == lineage.key.to_sym }
|
34
|
+
|
35
|
+
lineage&.assign_attributes(
|
36
|
+
occurred_at: matching_lineage[:occurred_at],
|
37
|
+
description: matching_lineage[:description],
|
38
|
+
)
|
39
|
+
end
|
40
|
+
missing_lineage.concat(existing_lineage)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
data/app/models/easy_ml/model.rb
CHANGED
@@ -45,7 +45,7 @@ module EasyML
|
|
45
45
|
MODEL_NAMES = MODEL_OPTIONS.keys.freeze
|
46
46
|
MODEL_CONSTANTS = MODEL_OPTIONS.values.map(&:constantize)
|
47
47
|
|
48
|
-
add_configuration_attributes :task, :objective, :hyperparameters, :
|
48
|
+
add_configuration_attributes :task, :objective, :hyperparameters, :callbacks, :metrics
|
49
49
|
MODEL_CONSTANTS.flat_map(&:configuration_attributes).each do |attribute|
|
50
50
|
add_configuration_attributes attribute
|
51
51
|
end
|
@@ -53,10 +53,10 @@ module EasyML
|
|
53
53
|
belongs_to :dataset
|
54
54
|
belongs_to :model_file, class_name: "EasyML::ModelFile", foreign_key: "model_file_id", optional: true
|
55
55
|
|
56
|
-
has_one :retraining_job, class_name: "EasyML::RetrainingJob"
|
56
|
+
has_one :retraining_job, class_name: "EasyML::RetrainingJob", dependent: :destroy
|
57
57
|
accepts_nested_attributes_for :retraining_job
|
58
|
-
has_many :retraining_runs, class_name: "EasyML::RetrainingRun"
|
59
|
-
has_many :deploys, class_name: "EasyML::Deploy"
|
58
|
+
has_many :retraining_runs, class_name: "EasyML::RetrainingRun", dependent: :destroy
|
59
|
+
has_many :deploys, class_name: "EasyML::Deploy", dependent: :destroy
|
60
60
|
|
61
61
|
scope :deployed, -> { EasyML::ModelHistory.deployed }
|
62
62
|
|
@@ -110,6 +110,13 @@ module EasyML
|
|
110
110
|
is_training == true
|
111
111
|
end
|
112
112
|
|
113
|
+
def abort!
|
114
|
+
EasyML::Reaper.kill(EasyML::TrainingJob, id)
|
115
|
+
update(is_training: false, status: :ready)
|
116
|
+
get_retraining_job.retraining_runs.last.update(status: :aborted)
|
117
|
+
unlock!
|
118
|
+
end
|
119
|
+
|
113
120
|
def train(async: true)
|
114
121
|
pending_run # Ensure we update the pending job before enqueuing in background so UI updates properly
|
115
122
|
update(is_training: true)
|
@@ -120,26 +127,41 @@ module EasyML
|
|
120
127
|
end
|
121
128
|
end
|
122
129
|
|
130
|
+
def trained?
|
131
|
+
retraining_runs.where(status: :success).exists?
|
132
|
+
end
|
133
|
+
|
134
|
+
def deployed?
|
135
|
+
inference_version.present?
|
136
|
+
end
|
137
|
+
|
138
|
+
def weights=(weights)
|
139
|
+
raise ArgumentError, "Cannot set weights on model without type" unless model_type.present?
|
140
|
+
|
141
|
+
model_file = get_model_file
|
142
|
+
adapter.set_weights(model_file, weights)
|
143
|
+
save_model_file
|
144
|
+
end
|
145
|
+
|
146
|
+
def weights
|
147
|
+
adapter.weights(get_model_file)
|
148
|
+
end
|
149
|
+
|
123
150
|
def get_retraining_job
|
124
|
-
if retraining_job
|
125
|
-
self.evaluator = retraining_job.evaluator
|
126
|
-
evaluator = self.evaluator.symbolize_keys
|
127
|
-
else
|
128
|
-
default_eval = Core::ModelEvaluator.default_evaluator(task)
|
129
|
-
self.evaluator = default_eval
|
130
|
-
evaluator = default_eval
|
131
|
-
end
|
151
|
+
return retraining_job if retraining_job.present?
|
132
152
|
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
153
|
+
evaluator = Core::ModelEvaluator.default_evaluator(task).symbolize_keys
|
154
|
+
|
155
|
+
method = persisted? ? :create_retraining_job : :build_retraining_job
|
156
|
+
|
157
|
+
send(method,
|
158
|
+
model: self,
|
159
|
+
active: false,
|
160
|
+
metric: evaluator[:metric],
|
161
|
+
direction: evaluator[:direction],
|
162
|
+
threshold: evaluator[:threshold],
|
163
|
+
frequency: "month",
|
164
|
+
at: { hour: 0, day_of_month: 1 })
|
143
165
|
end
|
144
166
|
|
145
167
|
def pending_run
|
@@ -147,6 +169,15 @@ module EasyML
|
|
147
169
|
job.retraining_runs.find_or_create_by(status: "pending", model: self)
|
148
170
|
end
|
149
171
|
|
172
|
+
def import
|
173
|
+
lock_model do
|
174
|
+
run = pending_run
|
175
|
+
run.wrap_training do
|
176
|
+
[self, hyperparameters.to_h]
|
177
|
+
end
|
178
|
+
end
|
179
|
+
end
|
180
|
+
|
150
181
|
def actually_train(&progress_block)
|
151
182
|
lock_model do
|
152
183
|
run = pending_run
|
@@ -186,6 +217,20 @@ module EasyML
|
|
186
217
|
"training:#{self.name}:#{self.id}"
|
187
218
|
end
|
188
219
|
|
220
|
+
def hyperparameters=(hyperparameters)
|
221
|
+
return unless model_type.present?
|
222
|
+
|
223
|
+
@hypers = adapter.build_hyperparameters(hyperparameters)
|
224
|
+
end
|
225
|
+
|
226
|
+
def hyperparameters
|
227
|
+
@hypers ||= adapter.build_hyperparameters(@hyperparameters)
|
228
|
+
end
|
229
|
+
|
230
|
+
def callbacks
|
231
|
+
@cbs ||= adapter.build_callbacks(@callbacks)
|
232
|
+
end
|
233
|
+
|
189
234
|
def hyperparameter_search(&progress_block)
|
190
235
|
tuner = retraining_job.tuner_config.symbolize_keys
|
191
236
|
extra_params = {
|
@@ -232,16 +277,11 @@ module EasyML
|
|
232
277
|
alias_method :latest_version, :inference_version
|
233
278
|
alias_method :deployed, :inference_version
|
234
279
|
|
235
|
-
def hyperparameters
|
236
|
-
@hypers ||= adapter.build_hyperparameters(@hyperparameters)
|
237
|
-
end
|
238
|
-
|
239
|
-
def callbacks
|
240
|
-
@cbs ||= adapter.build_callbacks(@callbacks)
|
241
|
-
end
|
242
|
-
|
243
280
|
def predict(xs)
|
244
281
|
load_model!
|
282
|
+
unless xs.is_a?(XGBoost::DMatrix)
|
283
|
+
xs = dataset.normalize(xs, inference: true)
|
284
|
+
end
|
245
285
|
adapter.predict(xs)
|
246
286
|
end
|
247
287
|
|
@@ -309,7 +349,7 @@ module EasyML
|
|
309
349
|
def fit(tuning: false, x_train: nil, y_train: nil, x_valid: nil, y_valid: nil, &progress_block)
|
310
350
|
return fit_in_batches(**batch_args.merge!(tuning: tuning), &progress_block) if fit_in_batches?
|
311
351
|
|
312
|
-
dataset.refresh
|
352
|
+
dataset.refresh if dataset.reload.needs_refresh?
|
313
353
|
adapter.fit(tuning: tuning, x_train: x_train, y_train: y_train, x_valid: x_valid, y_valid: y_valid, &progress_block)
|
314
354
|
end
|
315
355
|
|
@@ -354,6 +394,10 @@ module EasyML
|
|
354
394
|
dataset.decode_labels(ys, col: col)
|
355
395
|
end
|
356
396
|
|
397
|
+
def evaluator
|
398
|
+
get_retraining_job&.evaluator || default_evaluator
|
399
|
+
end
|
400
|
+
|
357
401
|
def evaluate(y_pred: nil, y_true: nil, x_true: nil, evaluator: nil, dataset: nil)
|
358
402
|
evaluator ||= self.evaluator
|
359
403
|
if y_pred.nil?
|
@@ -366,10 +410,6 @@ module EasyML
|
|
366
410
|
EasyML::Core::ModelEvaluator.evaluate(model: self, y_pred: y_pred, y_true: y_true, x_true: x_true, dataset: dataset, evaluator: evaluator)
|
367
411
|
end
|
368
412
|
|
369
|
-
def evaluator
|
370
|
-
instance_variable_get(:@evaluator) || default_evaluator
|
371
|
-
end
|
372
|
-
|
373
413
|
def default_evaluator
|
374
414
|
return nil unless task.present?
|
375
415
|
|
@@ -381,7 +421,7 @@ module EasyML
|
|
381
421
|
end
|
382
422
|
|
383
423
|
def evals
|
384
|
-
last_run&.metrics || {}
|
424
|
+
(last_run&.metrics || {}).with_indifferent_access
|
385
425
|
end
|
386
426
|
|
387
427
|
def metric_accessor(metric)
|
@@ -536,6 +576,28 @@ module EasyML
|
|
536
576
|
end
|
537
577
|
end
|
538
578
|
|
579
|
+
UNCONFIGURABLE_COLUMNS = %w(
|
580
|
+
id
|
581
|
+
dataset_id
|
582
|
+
model_file_id
|
583
|
+
root_dir
|
584
|
+
file
|
585
|
+
sha
|
586
|
+
last_trained_at
|
587
|
+
is_training
|
588
|
+
created_at
|
589
|
+
updated_at
|
590
|
+
slug
|
591
|
+
)
|
592
|
+
|
593
|
+
def to_config(include_dataset: false)
|
594
|
+
EasyML::Export::Model.to_config(self, include_dataset: include_dataset)
|
595
|
+
end
|
596
|
+
|
597
|
+
def self.from_config(json_config, action: nil, model: nil, include_dataset: true, dataset: nil)
|
598
|
+
EasyML::Import::Model.from_config(json_config, action: action, model: model, include_dataset: include_dataset, dataset: dataset)
|
599
|
+
end
|
600
|
+
|
539
601
|
private
|
540
602
|
|
541
603
|
def default_evaluation_inputs
|
@@ -615,6 +677,8 @@ module EasyML
|
|
615
677
|
end
|
616
678
|
|
617
679
|
def validate_metrics_allowed
|
680
|
+
set_defaults if metrics.nil? || metrics.empty?
|
681
|
+
|
618
682
|
unknown_metrics = metrics.select { |metric| allowed_metrics.exclude?(metric) }
|
619
683
|
return unless unknown_metrics.any?
|
620
684
|
|
@@ -624,7 +688,7 @@ module EasyML
|
|
624
688
|
|
625
689
|
def set_slug
|
626
690
|
if slug.nil? && name.present?
|
627
|
-
self.slug = name.gsub(/\s/, "_").downcase
|
691
|
+
self.slug = name.gsub(/\s/, "_").gsub(/[^a-zA-Z0-9_]/, "").downcase
|
628
692
|
end
|
629
693
|
end
|
630
694
|
end
|