easy_ml 0.1.4 → 0.2.0.pre.rc1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +234 -26
- data/Rakefile +45 -0
- data/app/controllers/easy_ml/application_controller.rb +67 -0
- data/app/controllers/easy_ml/columns_controller.rb +38 -0
- data/app/controllers/easy_ml/datasets_controller.rb +156 -0
- data/app/controllers/easy_ml/datasources_controller.rb +88 -0
- data/app/controllers/easy_ml/deploys_controller.rb +20 -0
- data/app/controllers/easy_ml/models_controller.rb +151 -0
- data/app/controllers/easy_ml/retraining_runs_controller.rb +19 -0
- data/app/controllers/easy_ml/settings_controller.rb +59 -0
- data/app/frontend/components/AlertProvider.tsx +108 -0
- data/app/frontend/components/DatasetPreview.tsx +161 -0
- data/app/frontend/components/EmptyState.tsx +28 -0
- data/app/frontend/components/ModelCard.tsx +255 -0
- data/app/frontend/components/ModelDetails.tsx +334 -0
- data/app/frontend/components/ModelForm.tsx +384 -0
- data/app/frontend/components/Navigation.tsx +300 -0
- data/app/frontend/components/Pagination.tsx +72 -0
- data/app/frontend/components/Popover.tsx +55 -0
- data/app/frontend/components/PredictionStream.tsx +105 -0
- data/app/frontend/components/ScheduleModal.tsx +726 -0
- data/app/frontend/components/SearchInput.tsx +23 -0
- data/app/frontend/components/SearchableSelect.tsx +132 -0
- data/app/frontend/components/dataset/AutosaveIndicator.tsx +39 -0
- data/app/frontend/components/dataset/ColumnConfigModal.tsx +431 -0
- data/app/frontend/components/dataset/ColumnFilters.tsx +256 -0
- data/app/frontend/components/dataset/ColumnList.tsx +101 -0
- data/app/frontend/components/dataset/FeatureConfigPopover.tsx +57 -0
- data/app/frontend/components/dataset/FeaturePicker.tsx +205 -0
- data/app/frontend/components/dataset/PreprocessingConfig.tsx +704 -0
- data/app/frontend/components/dataset/SplitConfigurator.tsx +120 -0
- data/app/frontend/components/dataset/splitters/DateSplitter.tsx +58 -0
- data/app/frontend/components/dataset/splitters/KFoldSplitter.tsx +68 -0
- data/app/frontend/components/dataset/splitters/LeavePOutSplitter.tsx +29 -0
- data/app/frontend/components/dataset/splitters/PredefinedSplitter.tsx +146 -0
- data/app/frontend/components/dataset/splitters/RandomSplitter.tsx +85 -0
- data/app/frontend/components/dataset/splitters/StratifiedSplitter.tsx +79 -0
- data/app/frontend/components/dataset/splitters/constants.ts +77 -0
- data/app/frontend/components/dataset/splitters/types.ts +168 -0
- data/app/frontend/components/dataset/splitters/utils.ts +53 -0
- data/app/frontend/components/features/CodeEditor.tsx +46 -0
- data/app/frontend/components/features/DataPreview.tsx +150 -0
- data/app/frontend/components/features/FeatureCard.tsx +88 -0
- data/app/frontend/components/features/FeatureForm.tsx +235 -0
- data/app/frontend/components/features/FeatureGroupCard.tsx +54 -0
- data/app/frontend/components/settings/PluginSettings.tsx +81 -0
- data/app/frontend/components/ui/badge.tsx +44 -0
- data/app/frontend/components/ui/collapsible.tsx +9 -0
- data/app/frontend/components/ui/scroll-area.tsx +46 -0
- data/app/frontend/components/ui/separator.tsx +29 -0
- data/app/frontend/entrypoints/App.tsx +40 -0
- data/app/frontend/entrypoints/Application.tsx +24 -0
- data/app/frontend/hooks/useAutosave.ts +61 -0
- data/app/frontend/layouts/Layout.tsx +38 -0
- data/app/frontend/lib/utils.ts +6 -0
- data/app/frontend/mockData.ts +272 -0
- data/app/frontend/pages/DatasetDetailsPage.tsx +103 -0
- data/app/frontend/pages/DatasetsPage.tsx +261 -0
- data/app/frontend/pages/DatasourceFormPage.tsx +147 -0
- data/app/frontend/pages/DatasourcesPage.tsx +261 -0
- data/app/frontend/pages/EditModelPage.tsx +45 -0
- data/app/frontend/pages/EditTransformationPage.tsx +56 -0
- data/app/frontend/pages/ModelsPage.tsx +115 -0
- data/app/frontend/pages/NewDatasetPage.tsx +366 -0
- data/app/frontend/pages/NewModelPage.tsx +45 -0
- data/app/frontend/pages/NewTransformationPage.tsx +43 -0
- data/app/frontend/pages/SettingsPage.tsx +272 -0
- data/app/frontend/pages/ShowModelPage.tsx +30 -0
- data/app/frontend/pages/TransformationsPage.tsx +95 -0
- data/app/frontend/styles/application.css +100 -0
- data/app/frontend/types/dataset.ts +146 -0
- data/app/frontend/types/datasource.ts +33 -0
- data/app/frontend/types/preprocessing.ts +1 -0
- data/app/frontend/types.ts +113 -0
- data/app/helpers/easy_ml/application_helper.rb +10 -0
- data/app/jobs/easy_ml/application_job.rb +21 -0
- data/app/jobs/easy_ml/batch_job.rb +46 -0
- data/app/jobs/easy_ml/compute_feature_job.rb +19 -0
- data/app/jobs/easy_ml/deploy_job.rb +13 -0
- data/app/jobs/easy_ml/finalize_feature_job.rb +15 -0
- data/app/jobs/easy_ml/refresh_dataset_job.rb +32 -0
- data/app/jobs/easy_ml/schedule_retraining_job.rb +11 -0
- data/app/jobs/easy_ml/sync_datasource_job.rb +17 -0
- data/app/jobs/easy_ml/training_job.rb +62 -0
- data/app/models/easy_ml/adapters/base_adapter.rb +45 -0
- data/app/models/easy_ml/adapters/polars_adapter.rb +77 -0
- data/app/models/easy_ml/cleaner.rb +82 -0
- data/app/models/easy_ml/column.rb +124 -0
- data/app/models/easy_ml/column_history.rb +30 -0
- data/app/models/easy_ml/column_list.rb +122 -0
- data/app/models/easy_ml/concerns/configurable.rb +61 -0
- data/app/models/easy_ml/concerns/versionable.rb +19 -0
- data/app/models/easy_ml/dataset.rb +767 -0
- data/app/models/easy_ml/dataset_history.rb +56 -0
- data/app/models/easy_ml/datasource.rb +182 -0
- data/app/models/easy_ml/datasource_history.rb +24 -0
- data/app/models/easy_ml/datasources/base_datasource.rb +54 -0
- data/app/models/easy_ml/datasources/file_datasource.rb +58 -0
- data/app/models/easy_ml/datasources/polars_datasource.rb +89 -0
- data/app/models/easy_ml/datasources/s3_datasource.rb +97 -0
- data/app/models/easy_ml/deploy.rb +114 -0
- data/app/models/easy_ml/event.rb +79 -0
- data/app/models/easy_ml/feature.rb +437 -0
- data/app/models/easy_ml/feature_history.rb +38 -0
- data/app/models/easy_ml/model.rb +575 -41
- data/app/models/easy_ml/model_file.rb +133 -0
- data/app/models/easy_ml/model_file_history.rb +24 -0
- data/app/models/easy_ml/model_history.rb +51 -0
- data/app/models/easy_ml/models/base_model.rb +58 -0
- data/app/models/easy_ml/models/hyperparameters/base.rb +99 -0
- data/app/models/easy_ml/models/hyperparameters/xgboost/dart.rb +82 -0
- data/app/models/easy_ml/models/hyperparameters/xgboost/gblinear.rb +82 -0
- data/app/models/easy_ml/models/hyperparameters/xgboost/gbtree.rb +97 -0
- data/app/models/easy_ml/models/hyperparameters/xgboost.rb +71 -0
- data/app/models/easy_ml/models/xgboost/evals_callback.rb +138 -0
- data/app/models/easy_ml/models/xgboost/progress_callback.rb +39 -0
- data/app/models/easy_ml/models/xgboost.rb +544 -5
- data/app/models/easy_ml/prediction.rb +44 -0
- data/app/models/easy_ml/retraining_job.rb +278 -0
- data/app/models/easy_ml/retraining_run.rb +184 -0
- data/app/models/easy_ml/settings.rb +37 -0
- data/app/models/easy_ml/splitter.rb +90 -0
- data/app/models/easy_ml/splitters/base_splitter.rb +28 -0
- data/app/models/easy_ml/splitters/date_splitter.rb +91 -0
- data/app/models/easy_ml/splitters/predefined_splitter.rb +74 -0
- data/app/models/easy_ml/splitters/random_splitter.rb +82 -0
- data/app/models/easy_ml/tuner_job.rb +56 -0
- data/app/models/easy_ml/tuner_run.rb +31 -0
- data/app/models/splitter_history.rb +6 -0
- data/app/serializers/easy_ml/column_serializer.rb +27 -0
- data/app/serializers/easy_ml/dataset_serializer.rb +73 -0
- data/app/serializers/easy_ml/datasource_serializer.rb +64 -0
- data/app/serializers/easy_ml/feature_serializer.rb +27 -0
- data/app/serializers/easy_ml/model_serializer.rb +90 -0
- data/app/serializers/easy_ml/retraining_job_serializer.rb +22 -0
- data/app/serializers/easy_ml/retraining_run_serializer.rb +39 -0
- data/app/serializers/easy_ml/settings_serializer.rb +9 -0
- data/app/views/layouts/easy_ml/application.html.erb +15 -0
- data/config/initializers/resque.rb +3 -0
- data/config/resque-pool.yml +6 -0
- data/config/routes.rb +39 -0
- data/config/spring.rb +1 -0
- data/config/vite.json +15 -0
- data/lib/easy_ml/configuration.rb +64 -0
- data/lib/easy_ml/core/evaluators/base_evaluator.rb +53 -0
- data/lib/easy_ml/core/evaluators/classification_evaluators.rb +126 -0
- data/lib/easy_ml/core/evaluators/regression_evaluators.rb +66 -0
- data/lib/easy_ml/core/model_evaluator.rb +161 -89
- data/lib/easy_ml/core/tuner/adapters/base_adapter.rb +28 -18
- data/lib/easy_ml/core/tuner/adapters/xgboost_adapter.rb +4 -25
- data/lib/easy_ml/core/tuner.rb +123 -62
- data/lib/easy_ml/core.rb +0 -3
- data/lib/easy_ml/core_ext/hash.rb +24 -0
- data/lib/easy_ml/core_ext/pathname.rb +11 -5
- data/lib/easy_ml/data/date_converter.rb +90 -0
- data/lib/easy_ml/data/filter_extensions.rb +31 -0
- data/lib/easy_ml/data/polars_column.rb +126 -0
- data/lib/easy_ml/data/polars_reader.rb +297 -0
- data/lib/easy_ml/data/preprocessor.rb +280 -142
- data/lib/easy_ml/data/simple_imputer.rb +255 -0
- data/lib/easy_ml/data/splits/file_split.rb +252 -0
- data/lib/easy_ml/data/splits/in_memory_split.rb +54 -0
- data/lib/easy_ml/data/splits/split.rb +95 -0
- data/lib/easy_ml/data/splits.rb +9 -0
- data/lib/easy_ml/data/statistics_learner.rb +93 -0
- data/lib/easy_ml/data/synced_directory.rb +341 -0
- data/lib/easy_ml/data.rb +6 -2
- data/lib/easy_ml/engine.rb +105 -6
- data/lib/easy_ml/feature_store.rb +227 -0
- data/lib/easy_ml/features.rb +61 -0
- data/lib/easy_ml/initializers/inflections.rb +17 -3
- data/lib/easy_ml/logging.rb +2 -2
- data/lib/easy_ml/predict.rb +74 -0
- data/lib/easy_ml/railtie/generators/migration/migration_generator.rb +192 -36
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_column_histories.rb.tt +9 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_columns.rb.tt +25 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_dataset_histories.rb.tt +9 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_datasets.rb.tt +31 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_datasource_histories.rb.tt +9 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_datasources.rb.tt +16 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_deploys.rb.tt +24 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_events.rb.tt +20 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_feature_histories.rb.tt +14 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_features.rb.tt +32 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_model_file_histories.rb.tt +9 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_model_files.rb.tt +17 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_model_histories.rb.tt +9 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_models.rb.tt +20 -9
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_predictions.rb.tt +17 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_retraining_jobs.rb.tt +77 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_settings.rb.tt +9 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_splitter_histories.rb.tt +9 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_splitters.rb.tt +15 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_tuner_jobs.rb.tt +40 -0
- data/lib/easy_ml/support/est.rb +5 -1
- data/lib/easy_ml/support/file_rotate.rb +79 -15
- data/lib/easy_ml/support/file_support.rb +9 -0
- data/lib/easy_ml/support/local_file.rb +24 -0
- data/lib/easy_ml/support/lockable.rb +62 -0
- data/lib/easy_ml/support/synced_file.rb +103 -0
- data/lib/easy_ml/support/utc.rb +5 -1
- data/lib/easy_ml/support.rb +6 -3
- data/lib/easy_ml/version.rb +4 -1
- data/lib/easy_ml.rb +7 -2
- metadata +355 -72
- data/app/models/easy_ml/models.rb +0 -5
- data/lib/easy_ml/core/model.rb +0 -30
- data/lib/easy_ml/core/model_core.rb +0 -181
- data/lib/easy_ml/core/models/hyperparameters/base.rb +0 -34
- data/lib/easy_ml/core/models/hyperparameters/xgboost.rb +0 -19
- data/lib/easy_ml/core/models/xgboost.rb +0 -10
- data/lib/easy_ml/core/models/xgboost_core.rb +0 -220
- data/lib/easy_ml/core/models.rb +0 -10
- data/lib/easy_ml/core/uploaders/model_uploader.rb +0 -24
- data/lib/easy_ml/core/uploaders.rb +0 -7
- data/lib/easy_ml/data/dataloader.rb +0 -6
- data/lib/easy_ml/data/dataset/data/preprocessor/statistics.json +0 -31
- data/lib/easy_ml/data/dataset/data/sample_info.json +0 -1
- data/lib/easy_ml/data/dataset/dataset/files/sample_info.json +0 -1
- data/lib/easy_ml/data/dataset/splits/file_split.rb +0 -140
- data/lib/easy_ml/data/dataset/splits/in_memory_split.rb +0 -49
- data/lib/easy_ml/data/dataset/splits/split.rb +0 -98
- data/lib/easy_ml/data/dataset/splits.rb +0 -11
- data/lib/easy_ml/data/dataset/splitters/date_splitter.rb +0 -43
- data/lib/easy_ml/data/dataset/splitters.rb +0 -9
- data/lib/easy_ml/data/dataset.rb +0 -430
- data/lib/easy_ml/data/datasource/datasource_factory.rb +0 -60
- data/lib/easy_ml/data/datasource/file_datasource.rb +0 -40
- data/lib/easy_ml/data/datasource/merged_datasource.rb +0 -64
- data/lib/easy_ml/data/datasource/polars_datasource.rb +0 -41
- data/lib/easy_ml/data/datasource/s3_datasource.rb +0 -89
- data/lib/easy_ml/data/datasource.rb +0 -33
- data/lib/easy_ml/data/preprocessor/preprocessor.rb +0 -205
- data/lib/easy_ml/data/preprocessor/simple_imputer.rb +0 -402
- data/lib/easy_ml/deployment.rb +0 -5
- data/lib/easy_ml/support/synced_directory.rb +0 -134
- data/lib/easy_ml/transforms.rb +0 -29
- /data/{lib/easy_ml/core → app/models/easy_ml}/models/hyperparameters.rb +0 -0
@@ -0,0 +1,278 @@
|
|
1
|
+
# == Schema Information
|
2
|
+
#
|
3
|
+
# Table name: easy_ml_retraining_jobs
|
4
|
+
#
|
5
|
+
# id :bigint not null, primary key
|
6
|
+
# model_id :bigint
|
7
|
+
# frequency :string not null
|
8
|
+
# at :json not null
|
9
|
+
# evaluator :json
|
10
|
+
# tuning_enabled :boolean default(FALSE)
|
11
|
+
# tuner_config :json
|
12
|
+
# tuning_frequency :string
|
13
|
+
# last_tuning_at :datetime
|
14
|
+
# active :boolean default(TRUE)
|
15
|
+
# status :string default("pending")
|
16
|
+
# last_run_at :datetime
|
17
|
+
# metric :string not null
|
18
|
+
# direction :string not null
|
19
|
+
# threshold :float not null
|
20
|
+
# auto_deploy :boolean default(FALSE)
|
21
|
+
# batch_mode :boolean
|
22
|
+
# batch_size :integer
|
23
|
+
# batch_overlap :integer
|
24
|
+
# batch_key :string
|
25
|
+
# created_at :datetime not null
|
26
|
+
# updated_at :datetime not null
|
27
|
+
#
|
28
|
+
module EasyML
|
29
|
+
class RetrainingJob < ActiveRecord::Base
|
30
|
+
self.table_name = "easy_ml_retraining_jobs"
|
31
|
+
|
32
|
+
has_many :retraining_runs, class_name: "EasyML::RetrainingRun", dependent: :destroy
|
33
|
+
has_many :tuner_jobs, through: :retraining_runs
|
34
|
+
|
35
|
+
belongs_to :model, class_name: "EasyML::Model", inverse_of: :retraining_job
|
36
|
+
validates :model, presence: true,
|
37
|
+
uniqueness: { message: "already has a retraining job" }
|
38
|
+
|
39
|
+
VALID_FREQUENCIES = %w[day week month always].freeze
|
40
|
+
FREQUENCY_TYPES = [
|
41
|
+
{
|
42
|
+
value: "day",
|
43
|
+
label: "Daily",
|
44
|
+
description: "Run once every day",
|
45
|
+
},
|
46
|
+
{
|
47
|
+
value: "week",
|
48
|
+
label: "Weekly",
|
49
|
+
description: "Run once every week",
|
50
|
+
},
|
51
|
+
{
|
52
|
+
value: "month",
|
53
|
+
label: "Monthly",
|
54
|
+
description: "Run once every month",
|
55
|
+
},
|
56
|
+
].freeze
|
57
|
+
validates :frequency, presence: true, inclusion: { in: VALID_FREQUENCIES }
|
58
|
+
validates :metric, presence: true
|
59
|
+
validate :validate_metrics_allowed
|
60
|
+
validates :status, presence: true
|
61
|
+
validates :at, presence: true
|
62
|
+
validates :threshold, presence: true
|
63
|
+
validates :tuning_frequency, inclusion: {
|
64
|
+
in: VALID_FREQUENCIES,
|
65
|
+
allow_nil: true,
|
66
|
+
}
|
67
|
+
validate :evaluator_must_be_valid
|
68
|
+
validate :validate_at_format
|
69
|
+
after_initialize :set_direction, unless: :persisted?
|
70
|
+
|
71
|
+
scope :active, -> { joins(:model).where(active: true) }
|
72
|
+
|
73
|
+
def self.current
|
74
|
+
active.select do |job|
|
75
|
+
job.should_run?
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
def self.constants
|
80
|
+
{
|
81
|
+
frequency: FREQUENCY_TYPES,
|
82
|
+
}
|
83
|
+
end
|
84
|
+
|
85
|
+
def tuner_config
|
86
|
+
(read_attribute(:tuner_config) || {}).merge!(objective: metric).stringify_keys
|
87
|
+
end
|
88
|
+
|
89
|
+
def formatted_frequency
|
90
|
+
if active
|
91
|
+
FREQUENCY_TYPES.find { |type| type[:value] == frequency }[:label]
|
92
|
+
else
|
93
|
+
"Manually"
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
def should_run?
|
98
|
+
return true if last_run_at.nil?
|
99
|
+
|
100
|
+
case frequency
|
101
|
+
when "day"
|
102
|
+
current_time = Time.current
|
103
|
+
return false if last_run_at.to_date == current_time.to_date
|
104
|
+
current_time.hour == at["hour"]
|
105
|
+
when "week"
|
106
|
+
current_time = Time.current
|
107
|
+
return false if last_run_at.to_date >= current_time.beginning_of_week
|
108
|
+
current_time.wday == at["day_of_week"] && current_time.hour == at["hour"]
|
109
|
+
when "month"
|
110
|
+
current_time = Time.current
|
111
|
+
return false if last_run_at.to_date >= current_time.beginning_of_month
|
112
|
+
current_time.day == at["day_of_month"] && current_time.hour == at["hour"]
|
113
|
+
else
|
114
|
+
false
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
def should_tune?
|
119
|
+
return false unless tuning_enabled
|
120
|
+
return false unless tuning_frequency.present?
|
121
|
+
return true if last_tuning_at.nil?
|
122
|
+
|
123
|
+
case tuning_frequency
|
124
|
+
when "always"
|
125
|
+
true
|
126
|
+
when "hour"
|
127
|
+
last_tuning_at < Time.current.beginning_of_hour
|
128
|
+
when "day"
|
129
|
+
current_time = Time.current
|
130
|
+
current_time.hour == at["hour"] && last_tuning_at < current_time.beginning_of_day
|
131
|
+
when "week"
|
132
|
+
current_time = Time.current
|
133
|
+
current_time.hour == at["hour"] && current_time.wday == 0 && last_tuning_at < current_time.beginning_of_week
|
134
|
+
when "month"
|
135
|
+
current_time = Time.current
|
136
|
+
current_time.hour == at["hour"] && current_time.day == 1 && last_tuning_at < current_time.beginning_of_month
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
def metric=(metric)
|
141
|
+
write_attribute(:metric, metric)
|
142
|
+
set_direction
|
143
|
+
end
|
144
|
+
|
145
|
+
def evaluator
|
146
|
+
{
|
147
|
+
metric: metric,
|
148
|
+
max: direction == "maximize" ? threshold : nil,
|
149
|
+
min: direction == "minimize" ? threshold : nil,
|
150
|
+
direction: direction,
|
151
|
+
}.compact
|
152
|
+
end
|
153
|
+
|
154
|
+
def formatted_frequency
|
155
|
+
{
|
156
|
+
month: "Monthly",
|
157
|
+
week: "Weekly",
|
158
|
+
day: "Daily",
|
159
|
+
}[frequency.to_sym]
|
160
|
+
end
|
161
|
+
|
162
|
+
private
|
163
|
+
|
164
|
+
def metric_class
|
165
|
+
return nil unless metric
|
166
|
+
|
167
|
+
EasyML::Core::ModelEvaluator.get(metric).new
|
168
|
+
end
|
169
|
+
|
170
|
+
def set_direction
|
171
|
+
return unless metric_class.present?
|
172
|
+
|
173
|
+
write_attribute(:direction, metric_class.direction)
|
174
|
+
end
|
175
|
+
|
176
|
+
def validate_at_format
|
177
|
+
return errors.add(:at, "must be a hash") unless at.is_a?(Hash)
|
178
|
+
return if VALID_FREQUENCIES.exclude?(frequency.to_s)
|
179
|
+
|
180
|
+
required_keys = case frequency
|
181
|
+
when "day"
|
182
|
+
["hour"]
|
183
|
+
when "week"
|
184
|
+
["hour", "day_of_week"]
|
185
|
+
when "month"
|
186
|
+
["hour", "day_of_month"]
|
187
|
+
end
|
188
|
+
|
189
|
+
defaults = {
|
190
|
+
"hour" => 0,
|
191
|
+
"day_of_week" => 0, # Sunday
|
192
|
+
"day_of_month" => 1,
|
193
|
+
}
|
194
|
+
|
195
|
+
missing_keys = required_keys - at.keys.map(&:to_s)
|
196
|
+
missing_keys.each do |key|
|
197
|
+
at[key] = defaults[key]
|
198
|
+
end
|
199
|
+
|
200
|
+
return if at.blank?
|
201
|
+
|
202
|
+
allowed_keys = case frequency
|
203
|
+
when "day"
|
204
|
+
["hour"]
|
205
|
+
when "week"
|
206
|
+
["hour", "day_of_week"]
|
207
|
+
when "month"
|
208
|
+
["hour", "day_of_month"]
|
209
|
+
end
|
210
|
+
|
211
|
+
self.at = self.at.select { |k, v| allowed_keys.include?(k.to_s) }.to_h
|
212
|
+
|
213
|
+
if at["hour"].present?
|
214
|
+
errors.add(:at, "hour must be between 0 and 23") unless (0..23).include?(at["hour"].to_i)
|
215
|
+
end
|
216
|
+
|
217
|
+
if at["day_of_week"].present?
|
218
|
+
errors.add(:at, "day_of_week must be between 0 and 6") unless (0..6).include?(at["day_of_week"].to_i)
|
219
|
+
end
|
220
|
+
|
221
|
+
if at["day_of_month"].present?
|
222
|
+
errors.add(:at, "day_of_month must be between 1 and 31") unless (1..31).include?(at["day_of_month"].to_i)
|
223
|
+
end
|
224
|
+
end
|
225
|
+
|
226
|
+
def current_period_start
|
227
|
+
current_time = Time.current
|
228
|
+
case frequency
|
229
|
+
when "hour"
|
230
|
+
current_time.beginning_of_hour
|
231
|
+
when "day"
|
232
|
+
current_time.beginning_of_day
|
233
|
+
when "week"
|
234
|
+
current_time.beginning_of_week
|
235
|
+
when "month"
|
236
|
+
current_time.beginning_of_month
|
237
|
+
end
|
238
|
+
end
|
239
|
+
|
240
|
+
def evaluator_must_be_valid
|
241
|
+
return if evaluator.nil? || evaluator.blank?
|
242
|
+
|
243
|
+
evaluator = self.evaluator.symbolize_keys
|
244
|
+
|
245
|
+
unless evaluator[:metric].present? && (evaluator[:min].present? || evaluator[:max].present?)
|
246
|
+
errors.add(:evaluator, "must specify metric and either min or max value")
|
247
|
+
return
|
248
|
+
end
|
249
|
+
|
250
|
+
errors.add(:evaluator, "min value must be numeric") if evaluator[:min].present? && !evaluator[:min].is_a?(Numeric)
|
251
|
+
|
252
|
+
errors.add(:evaluator, "max value must be numeric") if evaluator[:max].present? && !evaluator[:max].is_a?(Numeric)
|
253
|
+
|
254
|
+
metric = evaluator[:metric].to_sym
|
255
|
+
|
256
|
+
evaluator = EasyML::Core::ModelEvaluator.get(metric)
|
257
|
+
unless evaluator.present?
|
258
|
+
allowed_metrics = EasyML::Core::ModelEvaluator.metrics
|
259
|
+
errors.add(:evaluator, "contains invalid metric. Allowed metrics are #{allowed_metrics}")
|
260
|
+
return
|
261
|
+
end
|
262
|
+
|
263
|
+
return unless evaluator.present?
|
264
|
+
return if evaluator.new.respond_to?(:evaluate)
|
265
|
+
|
266
|
+
errors.add(:evaluator, "evaluator must implement evaluate method")
|
267
|
+
end
|
268
|
+
|
269
|
+
def validate_metrics_allowed
|
270
|
+
return unless metric
|
271
|
+
metric_unknown = EasyML::Core::ModelEvaluator.metrics.exclude?(metric.to_sym)
|
272
|
+
return unless metric_unknown
|
273
|
+
|
274
|
+
errors.add(:metrics,
|
275
|
+
"don't know how to handle metric #{metric}, use EasyML::Core::ModelEvaluator.register(:name, Evaluator, :regression|:classification)")
|
276
|
+
end
|
277
|
+
end
|
278
|
+
end
|
@@ -0,0 +1,184 @@
|
|
1
|
+
# == Schema Information
|
2
|
+
#
|
3
|
+
# Table name: easy_ml_retraining_runs
|
4
|
+
#
|
5
|
+
# id :bigint not null, primary key
|
6
|
+
# model_id :bigint
|
7
|
+
# model_history_id :bigint
|
8
|
+
# model_file_id :bigint
|
9
|
+
# retraining_job_id :bigint not null
|
10
|
+
# tuner_job_id :bigint
|
11
|
+
# status :string default("pending")
|
12
|
+
# metric_value :float
|
13
|
+
# threshold :float
|
14
|
+
# trigger :string default("manual")
|
15
|
+
# threshold_direction :string
|
16
|
+
# started_at :datetime
|
17
|
+
# completed_at :datetime
|
18
|
+
# error_message :text
|
19
|
+
# metadata :jsonb
|
20
|
+
# metrics :jsonb
|
21
|
+
# best_params :jsonb
|
22
|
+
# wandb_url :string
|
23
|
+
# snapshot_id :string
|
24
|
+
# deployable :boolean
|
25
|
+
# is_deploying :boolean
|
26
|
+
# deployed :boolean
|
27
|
+
# deploy_id :bigint
|
28
|
+
# created_at :datetime not null
|
29
|
+
# updated_at :datetime not null
|
30
|
+
#
|
31
|
+
module EasyML
|
32
|
+
class RetrainingRun < ActiveRecord::Base
|
33
|
+
self.table_name = "easy_ml_retraining_runs"
|
34
|
+
|
35
|
+
belongs_to :retraining_job
|
36
|
+
belongs_to :model, class_name: "EasyML::Model"
|
37
|
+
belongs_to :model_file, class_name: "EasyML::ModelFile", optional: true
|
38
|
+
has_many :events, as: :eventable, class_name: "EasyML::Event", dependent: :destroy
|
39
|
+
|
40
|
+
validates :status, presence: true, inclusion: { in: %w[pending running success failed deployed] }
|
41
|
+
|
42
|
+
scope :running, -> { where(status: "running") }
|
43
|
+
|
44
|
+
def deploy(async: true)
|
45
|
+
deploy = EasyML::Deploy.create!(
|
46
|
+
model: model,
|
47
|
+
retraining_run: self,
|
48
|
+
)
|
49
|
+
|
50
|
+
deploy.deploy(async: async)
|
51
|
+
end
|
52
|
+
|
53
|
+
def wrap_training(&block)
|
54
|
+
return false unless pending?
|
55
|
+
|
56
|
+
begin
|
57
|
+
EasyML::Event.create_event(self, "started")
|
58
|
+
update!(status: "running", started_at: Time.current)
|
59
|
+
|
60
|
+
training_model, best_params = yield
|
61
|
+
|
62
|
+
if best_params.present?
|
63
|
+
tuner = EasyML::TunerJob.where(model: training_model)
|
64
|
+
.order(id: :desc)
|
65
|
+
.first
|
66
|
+
end
|
67
|
+
|
68
|
+
results = metric_results(training_model)
|
69
|
+
failed_reasons = training_model.cannot_deploy_reasons - ["Model has not changed"]
|
70
|
+
if results[:deployable] == false
|
71
|
+
status = "success"
|
72
|
+
else
|
73
|
+
status = failed_reasons.any? ? "failed" : "success"
|
74
|
+
end
|
75
|
+
|
76
|
+
if status == "success"
|
77
|
+
training_model.save_model_file
|
78
|
+
end
|
79
|
+
|
80
|
+
update!(
|
81
|
+
results.merge!(
|
82
|
+
status: status,
|
83
|
+
completed_at: failed_reasons.none? ? Time.current : nil,
|
84
|
+
error_message: failed_reasons.any? ? failed_reasons&.first : nil,
|
85
|
+
model: training_model,
|
86
|
+
metrics: training_model.evaluate,
|
87
|
+
best_params: best_params,
|
88
|
+
tuner_job_id: tuner&.id,
|
89
|
+
metadata: tuner&.metadata,
|
90
|
+
wandb_url: tuner&.wandb_url,
|
91
|
+
model_file_id: status == "success" ? training_model.model_file_id : nil,
|
92
|
+
)
|
93
|
+
)
|
94
|
+
|
95
|
+
if failed_reasons.any?
|
96
|
+
EasyML::Event.handle_error(self, failed_reasons.first)
|
97
|
+
else
|
98
|
+
EasyML::Event.create_event(self, status)
|
99
|
+
end
|
100
|
+
params = { last_run_at: Time.current, last_tuning_at: best_params.present? ? Time.current : nil }.compact
|
101
|
+
retraining_job.update!(params)
|
102
|
+
|
103
|
+
reload
|
104
|
+
if deployable? && retraining_job.auto_deploy
|
105
|
+
training_model.save_model_file
|
106
|
+
training_model.reload
|
107
|
+
deploy = EasyML::Deploy.create!(retraining_run: self, model: training_model, model_file: training_model.model_file, trigger: trigger)
|
108
|
+
deploy.deploy
|
109
|
+
end
|
110
|
+
true
|
111
|
+
rescue => e
|
112
|
+
EasyML::Event.handle_error(self, e)
|
113
|
+
update!(
|
114
|
+
status: "failed",
|
115
|
+
completed_at: Time.current,
|
116
|
+
error_message: e.message,
|
117
|
+
)
|
118
|
+
false
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
def pending?
|
123
|
+
status == "pending"
|
124
|
+
end
|
125
|
+
|
126
|
+
def deployed?
|
127
|
+
status == "deployed"
|
128
|
+
end
|
129
|
+
|
130
|
+
def success?
|
131
|
+
status == "success"
|
132
|
+
end
|
133
|
+
|
134
|
+
def failed?
|
135
|
+
status == "failed"
|
136
|
+
end
|
137
|
+
|
138
|
+
def running?
|
139
|
+
status == "running"
|
140
|
+
end
|
141
|
+
|
142
|
+
def should_tune?
|
143
|
+
retraining_job.tuner_config.present? && retraining_job.should_tune?
|
144
|
+
end
|
145
|
+
|
146
|
+
private
|
147
|
+
|
148
|
+
def metric_results(training_model)
|
149
|
+
return training_model.deployable? unless retraining_job.evaluator.present?
|
150
|
+
|
151
|
+
training_model.dataset.refresh
|
152
|
+
evaluator = retraining_job.evaluator.symbolize_keys
|
153
|
+
x_true, y_true = training_model.dataset.test(split_ys: true)
|
154
|
+
y_pred = training_model.predict(x_true)
|
155
|
+
|
156
|
+
metric = evaluator[:metric].to_sym
|
157
|
+
metrics = EasyML::Core::ModelEvaluator.evaluate(
|
158
|
+
model: training_model,
|
159
|
+
y_pred: y_pred,
|
160
|
+
y_true: y_true,
|
161
|
+
evaluator: evaluator,
|
162
|
+
)
|
163
|
+
metric_value = metrics[metric]
|
164
|
+
|
165
|
+
# Check against min threshold if present
|
166
|
+
if evaluator[:min].present?
|
167
|
+
threshold = evaluator[:min]
|
168
|
+
threshold_direction = "minimize"
|
169
|
+
deployable = metric_value < threshold
|
170
|
+
else
|
171
|
+
threshold = evaluator[:max]
|
172
|
+
threshold_direction = "maximize"
|
173
|
+
deployable = metric_value > threshold
|
174
|
+
end
|
175
|
+
|
176
|
+
{
|
177
|
+
metric_value: metric_value,
|
178
|
+
threshold: threshold,
|
179
|
+
threshold_direction: threshold_direction,
|
180
|
+
deployable: deployable,
|
181
|
+
}
|
182
|
+
end
|
183
|
+
end
|
184
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
# == Schema Information
|
2
|
+
#
|
3
|
+
# Table name: easy_ml_settings
|
4
|
+
#
|
5
|
+
# id :bigint not null, primary key
|
6
|
+
# configuration :json
|
7
|
+
# created_at :datetime not null
|
8
|
+
# updated_at :datetime not null
|
9
|
+
#
|
10
|
+
require_relative "concerns/configurable"
|
11
|
+
|
12
|
+
module EasyML
|
13
|
+
class Settings < ActiveRecord::Base
|
14
|
+
self.table_name = "easy_ml_settings"
|
15
|
+
include EasyML::Concerns::Configurable
|
16
|
+
|
17
|
+
add_configuration_attributes :storage,
|
18
|
+
:s3_access_key_id, :s3_secret_access_key,
|
19
|
+
:s3_bucket, :s3_region, :s3_prefix, :timezone,
|
20
|
+
:wandb_api_key
|
21
|
+
|
22
|
+
validates :storage, inclusion: { in: %w[file s3] }, if: -> { storage.present? }
|
23
|
+
|
24
|
+
TIMEZONES = [
|
25
|
+
{ value: "America/New_York", label: "Eastern Time" },
|
26
|
+
{ value: "America/Chicago", label: "Central Time" },
|
27
|
+
{ value: "America/Denver", label: "Mountain Time" },
|
28
|
+
{ value: "America/Los_Angeles", label: "Pacific Time" },
|
29
|
+
]
|
30
|
+
|
31
|
+
def self.constants
|
32
|
+
{
|
33
|
+
TIMEZONES: TIMEZONES,
|
34
|
+
}
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,90 @@
|
|
1
|
+
# == Schema Information
|
2
|
+
#
|
3
|
+
# Table name: easy_ml_splitters
|
4
|
+
#
|
5
|
+
# id :bigint not null, primary key
|
6
|
+
# splitter_type :string not null
|
7
|
+
# configuration :json
|
8
|
+
# dataset_id :bigint not null
|
9
|
+
# created_at :datetime not null
|
10
|
+
# updated_at :datetime not null
|
11
|
+
#
|
12
|
+
module EasyML
|
13
|
+
class Splitter < ActiveRecord::Base
|
14
|
+
self.table_name = "easy_ml_splitters"
|
15
|
+
include Historiographer::Silent
|
16
|
+
historiographer_mode :snapshot_only
|
17
|
+
|
18
|
+
include EasyML::Concerns::Configurable
|
19
|
+
|
20
|
+
SPLITTER_OPTIONS = {
|
21
|
+
"date" => "EasyML::Splitters::DateSplitter",
|
22
|
+
"random" => "EasyML::Splitters::RandomSplitter",
|
23
|
+
"predefined" => "EasyML::Splitters::PredefinedSplitter",
|
24
|
+
}
|
25
|
+
SPLITTER_TYPES = [
|
26
|
+
{
|
27
|
+
value: "date",
|
28
|
+
label: "Date Splitter",
|
29
|
+
description: "Split dataset based on date ranges for training, validation, and testing",
|
30
|
+
},
|
31
|
+
{
|
32
|
+
value: "random",
|
33
|
+
label: "Random Splitter",
|
34
|
+
description: "Randomly split dataset into training, validation, and testing sets with configurable ratios",
|
35
|
+
},
|
36
|
+
{
|
37
|
+
value: "predefined",
|
38
|
+
label: "Predefined Splitter",
|
39
|
+
description: "Split dataset using predefined file assignments for training, validation, and testing sets",
|
40
|
+
},
|
41
|
+
].freeze
|
42
|
+
|
43
|
+
belongs_to :dataset, class_name: "EasyML::Dataset"
|
44
|
+
has_many :events, as: :eventable, class_name: "EasyML::Event", dependent: :destroy
|
45
|
+
|
46
|
+
validates :splitter_type, presence: true
|
47
|
+
validates :splitter_type, inclusion: { in: SPLITTER_OPTIONS.keys }
|
48
|
+
|
49
|
+
SPLITTER_NAMES = SPLITTER_OPTIONS.keys.freeze
|
50
|
+
SPLITTER_CONSTANTS = SPLITTER_OPTIONS.values.map(&:constantize)
|
51
|
+
SPLITTER_CONSTANTS.flat_map(&:configuration_attributes).each do |attribute|
|
52
|
+
add_configuration_attributes attribute
|
53
|
+
end
|
54
|
+
|
55
|
+
def self.constants
|
56
|
+
{
|
57
|
+
SPLITTER_TYPES: SPLITTER_TYPES,
|
58
|
+
DEFAULT_CONFIGS: SPLITTER_OPTIONS.reduce({}) do |h, (type, klass)|
|
59
|
+
h.tap do
|
60
|
+
h[type] = klass.constantize.default_config
|
61
|
+
end
|
62
|
+
end,
|
63
|
+
}
|
64
|
+
end
|
65
|
+
|
66
|
+
def split(df, &block)
|
67
|
+
adapter.split(df, &block)
|
68
|
+
end
|
69
|
+
|
70
|
+
def splits
|
71
|
+
adapter.splits
|
72
|
+
end
|
73
|
+
|
74
|
+
private
|
75
|
+
|
76
|
+
def adapter
|
77
|
+
@adapter ||= begin
|
78
|
+
adapter_class = SPLITTER_OPTIONS[splitter_type]
|
79
|
+
raise "Don't know how to use splitter #{splitter_type}!" unless adapter_class.present?
|
80
|
+
|
81
|
+
attrs = adapter_class.constantize.configuration_attributes
|
82
|
+
adapter_class.constantize.new(self).tap do |adapter|
|
83
|
+
attrs.each do |attr|
|
84
|
+
adapter.send("#{attr}=", send(attr))
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module EasyML
|
2
|
+
module Splitters
|
3
|
+
class BaseSplitter
|
4
|
+
include ActiveModel::Validations
|
5
|
+
include EasyML::Concerns::Configurable
|
6
|
+
|
7
|
+
attr_reader :splitter
|
8
|
+
|
9
|
+
def split(datasource, &block)
|
10
|
+
datasource.in_batches do |df|
|
11
|
+
split_df(df).tap do |splits|
|
12
|
+
yield splits if block_given?
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
def split_df(df)
|
18
|
+
df
|
19
|
+
end
|
20
|
+
|
21
|
+
def initialize(splitter)
|
22
|
+
@splitter = splitter
|
23
|
+
end
|
24
|
+
|
25
|
+
delegate :dataset, to: :splitter
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,91 @@
|
|
1
|
+
# == Schema Information
|
2
|
+
#
|
3
|
+
# Table name: easy_ml_splitters
|
4
|
+
#
|
5
|
+
# id :bigint not null, primary key
|
6
|
+
# splitter_type :string not null
|
7
|
+
# configuration :json
|
8
|
+
# dataset_id :bigint not null
|
9
|
+
# created_at :datetime not null
|
10
|
+
# updated_at :datetime not null
|
11
|
+
#
|
12
|
+
require_relative "base_splitter"
|
13
|
+
|
14
|
+
module EasyML
|
15
|
+
module Splitters
|
16
|
+
class DateSplitter < BaseSplitter
|
17
|
+
validates :date_col, presence: true
|
18
|
+
validates :months_test, presence: true, numericality: { greater_than: 0 }
|
19
|
+
validates :months_valid, presence: true, numericality: { greater_than: 0 }
|
20
|
+
|
21
|
+
attr_accessor :today, :date_col, :months_test, :months_valid
|
22
|
+
|
23
|
+
add_configuration_attributes :today, :date_col, :months_test, :months_valid
|
24
|
+
|
25
|
+
def self.default_config
|
26
|
+
{
|
27
|
+
date_col: "",
|
28
|
+
months_test: 2,
|
29
|
+
months_valid: 2,
|
30
|
+
}
|
31
|
+
end
|
32
|
+
|
33
|
+
def split_df(df)
|
34
|
+
raise "Split by date requires argument: date_col" unless date_col.present?
|
35
|
+
|
36
|
+
df = EasyML::Data::DateConverter.maybe_convert_date(df, date_col)
|
37
|
+
|
38
|
+
unless df[date_col].dtype.is_a?(Polars::Datetime)
|
39
|
+
raise "Date splitter cannot split on non-date col #{date_col}, dtype is #{df[date_col].dtype}"
|
40
|
+
end
|
41
|
+
|
42
|
+
validation_date_start, test_date_start = splits
|
43
|
+
|
44
|
+
test_df = Polars.concat(
|
45
|
+
[
|
46
|
+
df.filter(Polars.col(date_col) >= test_date_start),
|
47
|
+
df.filter(Polars.col(date_col).is_null),
|
48
|
+
]
|
49
|
+
)
|
50
|
+
remaining_df = df.filter(Polars.col(date_col) < test_date_start)
|
51
|
+
valid_df = remaining_df.filter(Polars.col(date_col) >= validation_date_start)
|
52
|
+
train_df = remaining_df.filter(Polars.col(date_col) < validation_date_start)
|
53
|
+
|
54
|
+
[train_df, valid_df, test_df]
|
55
|
+
end
|
56
|
+
|
57
|
+
def months(n)
|
58
|
+
ActiveSupport::Duration.months(n)
|
59
|
+
end
|
60
|
+
|
61
|
+
def splits
|
62
|
+
reference_date = to_datetime(datasource_end) || today
|
63
|
+
test_date_start = reference_date.advance(months: -months_test).beginning_of_day
|
64
|
+
validation_date_start = test_date_start.advance(months: -months_valid).beginning_of_day
|
65
|
+
[validation_date_start, test_date_start]
|
66
|
+
end
|
67
|
+
|
68
|
+
def datasource_end
|
69
|
+
return @datasource_end if @datasource_end
|
70
|
+
|
71
|
+
@datasource_end = dataset.datasource.query(sort: date_col, descending: true, limit: 1,
|
72
|
+
select: date_col)[date_col]&.to_a&.first
|
73
|
+
end
|
74
|
+
|
75
|
+
def to_datetime(field, default: nil)
|
76
|
+
case field
|
77
|
+
when String
|
78
|
+
UTC.parse(field)
|
79
|
+
when NilClass
|
80
|
+
default
|
81
|
+
else
|
82
|
+
field
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
def today
|
87
|
+
to_datetime(@today, default: UTC.today)
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|