easy_ml 0.1.4 → 0.2.0.pre.rc1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +234 -26
- data/Rakefile +45 -0
- data/app/controllers/easy_ml/application_controller.rb +67 -0
- data/app/controllers/easy_ml/columns_controller.rb +38 -0
- data/app/controllers/easy_ml/datasets_controller.rb +156 -0
- data/app/controllers/easy_ml/datasources_controller.rb +88 -0
- data/app/controllers/easy_ml/deploys_controller.rb +20 -0
- data/app/controllers/easy_ml/models_controller.rb +151 -0
- data/app/controllers/easy_ml/retraining_runs_controller.rb +19 -0
- data/app/controllers/easy_ml/settings_controller.rb +59 -0
- data/app/frontend/components/AlertProvider.tsx +108 -0
- data/app/frontend/components/DatasetPreview.tsx +161 -0
- data/app/frontend/components/EmptyState.tsx +28 -0
- data/app/frontend/components/ModelCard.tsx +255 -0
- data/app/frontend/components/ModelDetails.tsx +334 -0
- data/app/frontend/components/ModelForm.tsx +384 -0
- data/app/frontend/components/Navigation.tsx +300 -0
- data/app/frontend/components/Pagination.tsx +72 -0
- data/app/frontend/components/Popover.tsx +55 -0
- data/app/frontend/components/PredictionStream.tsx +105 -0
- data/app/frontend/components/ScheduleModal.tsx +726 -0
- data/app/frontend/components/SearchInput.tsx +23 -0
- data/app/frontend/components/SearchableSelect.tsx +132 -0
- data/app/frontend/components/dataset/AutosaveIndicator.tsx +39 -0
- data/app/frontend/components/dataset/ColumnConfigModal.tsx +431 -0
- data/app/frontend/components/dataset/ColumnFilters.tsx +256 -0
- data/app/frontend/components/dataset/ColumnList.tsx +101 -0
- data/app/frontend/components/dataset/FeatureConfigPopover.tsx +57 -0
- data/app/frontend/components/dataset/FeaturePicker.tsx +205 -0
- data/app/frontend/components/dataset/PreprocessingConfig.tsx +704 -0
- data/app/frontend/components/dataset/SplitConfigurator.tsx +120 -0
- data/app/frontend/components/dataset/splitters/DateSplitter.tsx +58 -0
- data/app/frontend/components/dataset/splitters/KFoldSplitter.tsx +68 -0
- data/app/frontend/components/dataset/splitters/LeavePOutSplitter.tsx +29 -0
- data/app/frontend/components/dataset/splitters/PredefinedSplitter.tsx +146 -0
- data/app/frontend/components/dataset/splitters/RandomSplitter.tsx +85 -0
- data/app/frontend/components/dataset/splitters/StratifiedSplitter.tsx +79 -0
- data/app/frontend/components/dataset/splitters/constants.ts +77 -0
- data/app/frontend/components/dataset/splitters/types.ts +168 -0
- data/app/frontend/components/dataset/splitters/utils.ts +53 -0
- data/app/frontend/components/features/CodeEditor.tsx +46 -0
- data/app/frontend/components/features/DataPreview.tsx +150 -0
- data/app/frontend/components/features/FeatureCard.tsx +88 -0
- data/app/frontend/components/features/FeatureForm.tsx +235 -0
- data/app/frontend/components/features/FeatureGroupCard.tsx +54 -0
- data/app/frontend/components/settings/PluginSettings.tsx +81 -0
- data/app/frontend/components/ui/badge.tsx +44 -0
- data/app/frontend/components/ui/collapsible.tsx +9 -0
- data/app/frontend/components/ui/scroll-area.tsx +46 -0
- data/app/frontend/components/ui/separator.tsx +29 -0
- data/app/frontend/entrypoints/App.tsx +40 -0
- data/app/frontend/entrypoints/Application.tsx +24 -0
- data/app/frontend/hooks/useAutosave.ts +61 -0
- data/app/frontend/layouts/Layout.tsx +38 -0
- data/app/frontend/lib/utils.ts +6 -0
- data/app/frontend/mockData.ts +272 -0
- data/app/frontend/pages/DatasetDetailsPage.tsx +103 -0
- data/app/frontend/pages/DatasetsPage.tsx +261 -0
- data/app/frontend/pages/DatasourceFormPage.tsx +147 -0
- data/app/frontend/pages/DatasourcesPage.tsx +261 -0
- data/app/frontend/pages/EditModelPage.tsx +45 -0
- data/app/frontend/pages/EditTransformationPage.tsx +56 -0
- data/app/frontend/pages/ModelsPage.tsx +115 -0
- data/app/frontend/pages/NewDatasetPage.tsx +366 -0
- data/app/frontend/pages/NewModelPage.tsx +45 -0
- data/app/frontend/pages/NewTransformationPage.tsx +43 -0
- data/app/frontend/pages/SettingsPage.tsx +272 -0
- data/app/frontend/pages/ShowModelPage.tsx +30 -0
- data/app/frontend/pages/TransformationsPage.tsx +95 -0
- data/app/frontend/styles/application.css +100 -0
- data/app/frontend/types/dataset.ts +146 -0
- data/app/frontend/types/datasource.ts +33 -0
- data/app/frontend/types/preprocessing.ts +1 -0
- data/app/frontend/types.ts +113 -0
- data/app/helpers/easy_ml/application_helper.rb +10 -0
- data/app/jobs/easy_ml/application_job.rb +21 -0
- data/app/jobs/easy_ml/batch_job.rb +46 -0
- data/app/jobs/easy_ml/compute_feature_job.rb +19 -0
- data/app/jobs/easy_ml/deploy_job.rb +13 -0
- data/app/jobs/easy_ml/finalize_feature_job.rb +15 -0
- data/app/jobs/easy_ml/refresh_dataset_job.rb +32 -0
- data/app/jobs/easy_ml/schedule_retraining_job.rb +11 -0
- data/app/jobs/easy_ml/sync_datasource_job.rb +17 -0
- data/app/jobs/easy_ml/training_job.rb +62 -0
- data/app/models/easy_ml/adapters/base_adapter.rb +45 -0
- data/app/models/easy_ml/adapters/polars_adapter.rb +77 -0
- data/app/models/easy_ml/cleaner.rb +82 -0
- data/app/models/easy_ml/column.rb +124 -0
- data/app/models/easy_ml/column_history.rb +30 -0
- data/app/models/easy_ml/column_list.rb +122 -0
- data/app/models/easy_ml/concerns/configurable.rb +61 -0
- data/app/models/easy_ml/concerns/versionable.rb +19 -0
- data/app/models/easy_ml/dataset.rb +767 -0
- data/app/models/easy_ml/dataset_history.rb +56 -0
- data/app/models/easy_ml/datasource.rb +182 -0
- data/app/models/easy_ml/datasource_history.rb +24 -0
- data/app/models/easy_ml/datasources/base_datasource.rb +54 -0
- data/app/models/easy_ml/datasources/file_datasource.rb +58 -0
- data/app/models/easy_ml/datasources/polars_datasource.rb +89 -0
- data/app/models/easy_ml/datasources/s3_datasource.rb +97 -0
- data/app/models/easy_ml/deploy.rb +114 -0
- data/app/models/easy_ml/event.rb +79 -0
- data/app/models/easy_ml/feature.rb +437 -0
- data/app/models/easy_ml/feature_history.rb +38 -0
- data/app/models/easy_ml/model.rb +575 -41
- data/app/models/easy_ml/model_file.rb +133 -0
- data/app/models/easy_ml/model_file_history.rb +24 -0
- data/app/models/easy_ml/model_history.rb +51 -0
- data/app/models/easy_ml/models/base_model.rb +58 -0
- data/app/models/easy_ml/models/hyperparameters/base.rb +99 -0
- data/app/models/easy_ml/models/hyperparameters/xgboost/dart.rb +82 -0
- data/app/models/easy_ml/models/hyperparameters/xgboost/gblinear.rb +82 -0
- data/app/models/easy_ml/models/hyperparameters/xgboost/gbtree.rb +97 -0
- data/app/models/easy_ml/models/hyperparameters/xgboost.rb +71 -0
- data/app/models/easy_ml/models/xgboost/evals_callback.rb +138 -0
- data/app/models/easy_ml/models/xgboost/progress_callback.rb +39 -0
- data/app/models/easy_ml/models/xgboost.rb +544 -5
- data/app/models/easy_ml/prediction.rb +44 -0
- data/app/models/easy_ml/retraining_job.rb +278 -0
- data/app/models/easy_ml/retraining_run.rb +184 -0
- data/app/models/easy_ml/settings.rb +37 -0
- data/app/models/easy_ml/splitter.rb +90 -0
- data/app/models/easy_ml/splitters/base_splitter.rb +28 -0
- data/app/models/easy_ml/splitters/date_splitter.rb +91 -0
- data/app/models/easy_ml/splitters/predefined_splitter.rb +74 -0
- data/app/models/easy_ml/splitters/random_splitter.rb +82 -0
- data/app/models/easy_ml/tuner_job.rb +56 -0
- data/app/models/easy_ml/tuner_run.rb +31 -0
- data/app/models/splitter_history.rb +6 -0
- data/app/serializers/easy_ml/column_serializer.rb +27 -0
- data/app/serializers/easy_ml/dataset_serializer.rb +73 -0
- data/app/serializers/easy_ml/datasource_serializer.rb +64 -0
- data/app/serializers/easy_ml/feature_serializer.rb +27 -0
- data/app/serializers/easy_ml/model_serializer.rb +90 -0
- data/app/serializers/easy_ml/retraining_job_serializer.rb +22 -0
- data/app/serializers/easy_ml/retraining_run_serializer.rb +39 -0
- data/app/serializers/easy_ml/settings_serializer.rb +9 -0
- data/app/views/layouts/easy_ml/application.html.erb +15 -0
- data/config/initializers/resque.rb +3 -0
- data/config/resque-pool.yml +6 -0
- data/config/routes.rb +39 -0
- data/config/spring.rb +1 -0
- data/config/vite.json +15 -0
- data/lib/easy_ml/configuration.rb +64 -0
- data/lib/easy_ml/core/evaluators/base_evaluator.rb +53 -0
- data/lib/easy_ml/core/evaluators/classification_evaluators.rb +126 -0
- data/lib/easy_ml/core/evaluators/regression_evaluators.rb +66 -0
- data/lib/easy_ml/core/model_evaluator.rb +161 -89
- data/lib/easy_ml/core/tuner/adapters/base_adapter.rb +28 -18
- data/lib/easy_ml/core/tuner/adapters/xgboost_adapter.rb +4 -25
- data/lib/easy_ml/core/tuner.rb +123 -62
- data/lib/easy_ml/core.rb +0 -3
- data/lib/easy_ml/core_ext/hash.rb +24 -0
- data/lib/easy_ml/core_ext/pathname.rb +11 -5
- data/lib/easy_ml/data/date_converter.rb +90 -0
- data/lib/easy_ml/data/filter_extensions.rb +31 -0
- data/lib/easy_ml/data/polars_column.rb +126 -0
- data/lib/easy_ml/data/polars_reader.rb +297 -0
- data/lib/easy_ml/data/preprocessor.rb +280 -142
- data/lib/easy_ml/data/simple_imputer.rb +255 -0
- data/lib/easy_ml/data/splits/file_split.rb +252 -0
- data/lib/easy_ml/data/splits/in_memory_split.rb +54 -0
- data/lib/easy_ml/data/splits/split.rb +95 -0
- data/lib/easy_ml/data/splits.rb +9 -0
- data/lib/easy_ml/data/statistics_learner.rb +93 -0
- data/lib/easy_ml/data/synced_directory.rb +341 -0
- data/lib/easy_ml/data.rb +6 -2
- data/lib/easy_ml/engine.rb +105 -6
- data/lib/easy_ml/feature_store.rb +227 -0
- data/lib/easy_ml/features.rb +61 -0
- data/lib/easy_ml/initializers/inflections.rb +17 -3
- data/lib/easy_ml/logging.rb +2 -2
- data/lib/easy_ml/predict.rb +74 -0
- data/lib/easy_ml/railtie/generators/migration/migration_generator.rb +192 -36
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_column_histories.rb.tt +9 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_columns.rb.tt +25 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_dataset_histories.rb.tt +9 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_datasets.rb.tt +31 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_datasource_histories.rb.tt +9 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_datasources.rb.tt +16 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_deploys.rb.tt +24 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_events.rb.tt +20 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_feature_histories.rb.tt +14 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_features.rb.tt +32 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_model_file_histories.rb.tt +9 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_model_files.rb.tt +17 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_model_histories.rb.tt +9 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_models.rb.tt +20 -9
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_predictions.rb.tt +17 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_retraining_jobs.rb.tt +77 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_settings.rb.tt +9 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_splitter_histories.rb.tt +9 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_splitters.rb.tt +15 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_tuner_jobs.rb.tt +40 -0
- data/lib/easy_ml/support/est.rb +5 -1
- data/lib/easy_ml/support/file_rotate.rb +79 -15
- data/lib/easy_ml/support/file_support.rb +9 -0
- data/lib/easy_ml/support/local_file.rb +24 -0
- data/lib/easy_ml/support/lockable.rb +62 -0
- data/lib/easy_ml/support/synced_file.rb +103 -0
- data/lib/easy_ml/support/utc.rb +5 -1
- data/lib/easy_ml/support.rb +6 -3
- data/lib/easy_ml/version.rb +4 -1
- data/lib/easy_ml.rb +7 -2
- metadata +355 -72
- data/app/models/easy_ml/models.rb +0 -5
- data/lib/easy_ml/core/model.rb +0 -30
- data/lib/easy_ml/core/model_core.rb +0 -181
- data/lib/easy_ml/core/models/hyperparameters/base.rb +0 -34
- data/lib/easy_ml/core/models/hyperparameters/xgboost.rb +0 -19
- data/lib/easy_ml/core/models/xgboost.rb +0 -10
- data/lib/easy_ml/core/models/xgboost_core.rb +0 -220
- data/lib/easy_ml/core/models.rb +0 -10
- data/lib/easy_ml/core/uploaders/model_uploader.rb +0 -24
- data/lib/easy_ml/core/uploaders.rb +0 -7
- data/lib/easy_ml/data/dataloader.rb +0 -6
- data/lib/easy_ml/data/dataset/data/preprocessor/statistics.json +0 -31
- data/lib/easy_ml/data/dataset/data/sample_info.json +0 -1
- data/lib/easy_ml/data/dataset/dataset/files/sample_info.json +0 -1
- data/lib/easy_ml/data/dataset/splits/file_split.rb +0 -140
- data/lib/easy_ml/data/dataset/splits/in_memory_split.rb +0 -49
- data/lib/easy_ml/data/dataset/splits/split.rb +0 -98
- data/lib/easy_ml/data/dataset/splits.rb +0 -11
- data/lib/easy_ml/data/dataset/splitters/date_splitter.rb +0 -43
- data/lib/easy_ml/data/dataset/splitters.rb +0 -9
- data/lib/easy_ml/data/dataset.rb +0 -430
- data/lib/easy_ml/data/datasource/datasource_factory.rb +0 -60
- data/lib/easy_ml/data/datasource/file_datasource.rb +0 -40
- data/lib/easy_ml/data/datasource/merged_datasource.rb +0 -64
- data/lib/easy_ml/data/datasource/polars_datasource.rb +0 -41
- data/lib/easy_ml/data/datasource/s3_datasource.rb +0 -89
- data/lib/easy_ml/data/datasource.rb +0 -33
- data/lib/easy_ml/data/preprocessor/preprocessor.rb +0 -205
- data/lib/easy_ml/data/preprocessor/simple_imputer.rb +0 -402
- data/lib/easy_ml/deployment.rb +0 -5
- data/lib/easy_ml/support/synced_directory.rb +0 -134
- data/lib/easy_ml/transforms.rb +0 -29
- /data/{lib/easy_ml/core → app/models/easy_ml}/models/hyperparameters.rb +0 -0
@@ -0,0 +1,133 @@
|
|
1
|
+
# == Schema Information
|
2
|
+
#
|
3
|
+
# Table name: easy_ml_model_files
|
4
|
+
#
|
5
|
+
# id :bigint not null, primary key
|
6
|
+
# filename :string not null
|
7
|
+
# path :string not null
|
8
|
+
# configuration :json
|
9
|
+
# model_type :string
|
10
|
+
# model_id :bigint
|
11
|
+
# created_at :datetime not null
|
12
|
+
# updated_at :datetime not null
|
13
|
+
#
|
14
|
+
module EasyML
|
15
|
+
class ModelFile < ActiveRecord::Base
|
16
|
+
self.table_name = "easy_ml_model_files"
|
17
|
+
include Historiographer::Silent
|
18
|
+
historiographer_mode :snapshot_only
|
19
|
+
|
20
|
+
self.filter_attributes += [:configuration]
|
21
|
+
|
22
|
+
validates :filename, presence: true
|
23
|
+
belongs_to :model, class_name: "EasyML::Model"
|
24
|
+
|
25
|
+
include EasyML::Concerns::Configurable
|
26
|
+
add_configuration_attributes :s3_bucket, :s3_prefix, :s3_region, :s3_access_key_id, :s3_secret_access_key, :root_dir
|
27
|
+
|
28
|
+
def synced_file
|
29
|
+
EasyML::Support::SyncedFile.new(
|
30
|
+
filename: filename,
|
31
|
+
s3_bucket: s3_bucket,
|
32
|
+
s3_prefix: s3_prefix,
|
33
|
+
s3_region: s3_region,
|
34
|
+
s3_access_key_id: s3_access_key_id,
|
35
|
+
s3_secret_access_key: s3_secret_access_key,
|
36
|
+
root_dir: root_dir,
|
37
|
+
)
|
38
|
+
end
|
39
|
+
|
40
|
+
def exist?
|
41
|
+
fit?
|
42
|
+
end
|
43
|
+
|
44
|
+
def fit?
|
45
|
+
return false if root_dir.nil?
|
46
|
+
return false if full_path.nil?
|
47
|
+
|
48
|
+
File.exist?(full_path)
|
49
|
+
end
|
50
|
+
|
51
|
+
def read
|
52
|
+
File.read(full_path)
|
53
|
+
end
|
54
|
+
|
55
|
+
def upload(path)
|
56
|
+
synced_file.upload(path)
|
57
|
+
set_path(path)
|
58
|
+
end
|
59
|
+
|
60
|
+
def set_path(path)
|
61
|
+
path = get_full_path(path)
|
62
|
+
basename = Pathname.new(path).basename.to_s
|
63
|
+
unless path.start_with?(full_dir)
|
64
|
+
new_path = File.join(full_dir, basename).to_s
|
65
|
+
FileUtils.mkdir_p(Pathname.new(new_path).dirname.to_s)
|
66
|
+
FileUtils.cp(path, new_path)
|
67
|
+
path = new_path
|
68
|
+
end
|
69
|
+
self.filename = basename
|
70
|
+
self.path = get_relative_path(path)
|
71
|
+
end
|
72
|
+
|
73
|
+
def get_full_path(path)
|
74
|
+
path = path.to_s
|
75
|
+
|
76
|
+
path = Rails.root.join(path) unless path.match?(Regexp.new(Rails.root.to_s))
|
77
|
+
path
|
78
|
+
end
|
79
|
+
|
80
|
+
def get_relative_path(path)
|
81
|
+
path = path.to_s
|
82
|
+
path = path.to_s.split(Rails.root.to_s).last
|
83
|
+
path.to_s.split("/")[0..-2].reject(&:empty?).join("/")
|
84
|
+
end
|
85
|
+
|
86
|
+
def download
|
87
|
+
return unless full_path.present?
|
88
|
+
|
89
|
+
synced_file.download(full_path) unless File.exist?(full_path)
|
90
|
+
full_path
|
91
|
+
end
|
92
|
+
|
93
|
+
def sha
|
94
|
+
Digest::SHA256.file(full_path).hexdigest
|
95
|
+
end
|
96
|
+
|
97
|
+
def full_path(filename = nil)
|
98
|
+
filename = self.filename if filename.nil?
|
99
|
+
return nil if filename.nil?
|
100
|
+
return nil if relative_dir.nil?
|
101
|
+
|
102
|
+
Rails.root.join(relative_dir, filename).to_s
|
103
|
+
end
|
104
|
+
|
105
|
+
def relative_dir
|
106
|
+
root_dir.to_s.gsub(Regexp.new(Rails.root.to_s), "").gsub!(%r{^/}, "")
|
107
|
+
end
|
108
|
+
|
109
|
+
def full_dir
|
110
|
+
Rails.root.join(relative_dir).to_s
|
111
|
+
end
|
112
|
+
|
113
|
+
def model_root
|
114
|
+
File.expand_path("..", full_dir)
|
115
|
+
end
|
116
|
+
|
117
|
+
def cleanup!
|
118
|
+
[model_root].each do |dir|
|
119
|
+
EasyML::Support::FileRotate.new(dir, []).cleanup(extension_allowlist)
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
def cleanup(files_to_keep)
|
124
|
+
[model_root].each do |dir|
|
125
|
+
EasyML::Support::FileRotate.new(dir, files_to_keep).cleanup(extension_allowlist)
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
def extension_allowlist
|
130
|
+
%w[bin model json]
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# == Schema Information
|
2
|
+
#
|
3
|
+
# Table name: easy_ml_model_file_histories
|
4
|
+
#
|
5
|
+
# id :bigint not null, primary key
|
6
|
+
# model_file_id :integer not null
|
7
|
+
# filename :string not null
|
8
|
+
# path :string not null
|
9
|
+
# configuration :json
|
10
|
+
# model_type :string
|
11
|
+
# model_id :integer
|
12
|
+
# created_at :datetime not null
|
13
|
+
# updated_at :datetime not null
|
14
|
+
# history_started_at :datetime not null
|
15
|
+
# history_ended_at :datetime
|
16
|
+
# history_user_id :integer
|
17
|
+
# snapshot_id :string
|
18
|
+
#
|
19
|
+
module EasyML
|
20
|
+
class ModelFileHistory < ActiveRecord::Base
|
21
|
+
self.table_name = "easy_ml_model_file_histories"
|
22
|
+
include Historiographer::History
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
# == Schema Information
|
2
|
+
#
|
3
|
+
# Table name: easy_ml_model_histories
|
4
|
+
#
|
5
|
+
# id :bigint not null, primary key
|
6
|
+
# model_id :integer not null
|
7
|
+
# name :string not null
|
8
|
+
# model_type :string
|
9
|
+
# status :string
|
10
|
+
# dataset_id :integer
|
11
|
+
# model_file_id :integer
|
12
|
+
# configuration :json
|
13
|
+
# version :string not null
|
14
|
+
# root_dir :string
|
15
|
+
# file :json
|
16
|
+
# sha :string
|
17
|
+
# last_trained_at :datetime
|
18
|
+
# is_training :boolean
|
19
|
+
# created_at :datetime not null
|
20
|
+
# updated_at :datetime not null
|
21
|
+
# history_started_at :datetime not null
|
22
|
+
# history_ended_at :datetime
|
23
|
+
# history_user_id :integer
|
24
|
+
# snapshot_id :string
|
25
|
+
#
|
26
|
+
module EasyML
|
27
|
+
class ModelHistory < ActiveRecord::Base
|
28
|
+
self.table_name = "easy_ml_model_histories"
|
29
|
+
include Historiographer::History
|
30
|
+
|
31
|
+
scope :deployed, -> {
|
32
|
+
where(id: EasyML::Deploy.latest.includes(:model_version).map(&:model_version).compact.map(&:id))
|
33
|
+
}
|
34
|
+
|
35
|
+
def status
|
36
|
+
@status ||= if is_deployed?
|
37
|
+
:inference
|
38
|
+
else
|
39
|
+
:retired
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def is_deployed?
|
44
|
+
EasyML::Deploy.latest.where(model_id: model_id)&.first&.model_version&.id == id
|
45
|
+
end
|
46
|
+
|
47
|
+
def fit
|
48
|
+
raise "Cannot train inference model"
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
module EasyML
|
2
|
+
module Models
|
3
|
+
class BaseModel
|
4
|
+
include ActiveModel::Validations
|
5
|
+
include EasyML::Concerns::Configurable
|
6
|
+
include EasyML::Support::FileSupport
|
7
|
+
|
8
|
+
attr_reader :model
|
9
|
+
|
10
|
+
add_configuration_attributes :hyperparameters
|
11
|
+
|
12
|
+
def initialize(model)
|
13
|
+
@model = model
|
14
|
+
end
|
15
|
+
|
16
|
+
delegate :task, :dataset, :hyperparameters, to: :model
|
17
|
+
|
18
|
+
# Required interface methods that subclasses must implement
|
19
|
+
def predict(_xs)
|
20
|
+
raise NotImplementedError, "#{self.class} must implement #predict"
|
21
|
+
end
|
22
|
+
|
23
|
+
def fit(_x_train = nil)
|
24
|
+
raise NotImplementedError, "#{self.class} must implement #fit"
|
25
|
+
end
|
26
|
+
|
27
|
+
def model_changed?
|
28
|
+
raise NotImplementedError, "#{self.class} must implement #model_changed?"
|
29
|
+
end
|
30
|
+
|
31
|
+
def feature_importances
|
32
|
+
raise NotImplementedError, "#{self.class} must implement #feature_importances"
|
33
|
+
end
|
34
|
+
|
35
|
+
def save_model_file(path)
|
36
|
+
raise NotImplementedError, "#{self.class} must implement #save_model_file"
|
37
|
+
end
|
38
|
+
|
39
|
+
def load_model_file(path)
|
40
|
+
raise NotImplementedError, "#{self.class} must implement #load_model_file"
|
41
|
+
end
|
42
|
+
|
43
|
+
def loaded?
|
44
|
+
raise NotImplementedError, "#{self.class} must implement #loaded?"
|
45
|
+
end
|
46
|
+
|
47
|
+
protected
|
48
|
+
|
49
|
+
def validate_objective
|
50
|
+
raise NotImplementedError, "#{self.class} must implement #validate_objective"
|
51
|
+
end
|
52
|
+
|
53
|
+
def validate_hyperparameters
|
54
|
+
raise NotImplementedError, "#{self.class} must implement #validate_hyperparameters"
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,99 @@
|
|
1
|
+
module EasyML
|
2
|
+
module Models
|
3
|
+
module Hyperparameters
|
4
|
+
class Base
|
5
|
+
attr_accessor :learning_rate, :max_iterations, :batch_size,
|
6
|
+
:regularization, :early_stopping_rounds
|
7
|
+
|
8
|
+
def initialize(options = {})
|
9
|
+
@learning_rate = options[:learning_rate] || 0.01
|
10
|
+
@max_iterations = options[:max_iterations] || 100
|
11
|
+
@batch_size = options[:batch_size] || 32
|
12
|
+
@regularization = options[:regularization] || 0.0001
|
13
|
+
@early_stopping_rounds = options[:early_stopping_rounds]
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.common_tree_params
|
17
|
+
{
|
18
|
+
learning_rate: {
|
19
|
+
label: "Learning Rate",
|
20
|
+
description: "Step size shrinkage used to prevent overfitting",
|
21
|
+
min: 0.001,
|
22
|
+
max: 0.3,
|
23
|
+
step: 0.001,
|
24
|
+
},
|
25
|
+
max_depth: {
|
26
|
+
label: "Maximum Tree Depth",
|
27
|
+
description: "Maximum depth of a tree",
|
28
|
+
min: 3,
|
29
|
+
max: 10,
|
30
|
+
step: 1,
|
31
|
+
},
|
32
|
+
n_estimators: {
|
33
|
+
label: "Number of Trees",
|
34
|
+
description: "Number of boosting rounds",
|
35
|
+
min: 100,
|
36
|
+
max: 1000,
|
37
|
+
step: 1,
|
38
|
+
},
|
39
|
+
early_stopping_rounds: {
|
40
|
+
label: "Early Stopping Rounds",
|
41
|
+
description: "Number of rounds to check for early stopping",
|
42
|
+
min: 1,
|
43
|
+
max: 5,
|
44
|
+
step: 1,
|
45
|
+
},
|
46
|
+
}
|
47
|
+
end
|
48
|
+
|
49
|
+
def self.common_regularization_params
|
50
|
+
{
|
51
|
+
lambda: {
|
52
|
+
label: "L2 Regularization",
|
53
|
+
description: "L2 regularization term on weights",
|
54
|
+
min: 0,
|
55
|
+
max: 10,
|
56
|
+
step: 0.1,
|
57
|
+
},
|
58
|
+
alpha: {
|
59
|
+
label: "L1 Regularization",
|
60
|
+
description: "L1 regularization term on weights",
|
61
|
+
min: 0,
|
62
|
+
max: 1,
|
63
|
+
step: 0.1,
|
64
|
+
},
|
65
|
+
early_stopping_rounds: {
|
66
|
+
label: "Early Stopping Rounds",
|
67
|
+
description: "Number of rounds to check for early stopping",
|
68
|
+
min: 1,
|
69
|
+
max: 5,
|
70
|
+
step: 1,
|
71
|
+
},
|
72
|
+
}
|
73
|
+
end
|
74
|
+
|
75
|
+
def to_h
|
76
|
+
instance_variables.each_with_object({}) do |var, hash|
|
77
|
+
hash[var.to_s.delete("@").to_sym] = instance_variable_get(var)
|
78
|
+
end.with_indifferent_access
|
79
|
+
end
|
80
|
+
|
81
|
+
def merge(other)
|
82
|
+
return self if other.nil?
|
83
|
+
|
84
|
+
other_hash = other.is_a?(Hyperparameters) ? other.to_h : other
|
85
|
+
merged_hash = to_h.merge(other_hash)
|
86
|
+
self.class.new(**merged_hash)
|
87
|
+
end
|
88
|
+
|
89
|
+
def [](key)
|
90
|
+
send(key) if respond_to?(key)
|
91
|
+
end
|
92
|
+
|
93
|
+
def []=(key, value)
|
94
|
+
send("#{key}=", value) if respond_to?("#{key}=")
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
@@ -0,0 +1,82 @@
|
|
1
|
+
module EasyML
|
2
|
+
module Models
|
3
|
+
module Hyperparameters
|
4
|
+
class XGBoost
|
5
|
+
class Dart < XGBoost
|
6
|
+
attr_accessor :rate_drop, :skip_drop, :sample_type, :normalize_type,
|
7
|
+
:subsample, :colsample_bytree
|
8
|
+
|
9
|
+
def initialize(options = {})
|
10
|
+
super
|
11
|
+
@rate_drop = options[:rate_drop] || 0.0
|
12
|
+
@skip_drop = options[:skip_drop] || 0.0
|
13
|
+
@sample_type = options[:sample_type] || "uniform"
|
14
|
+
@normalize_type = options[:normalize_type] || "tree"
|
15
|
+
@subsample = options[:subsample] || 1.0
|
16
|
+
@colsample_bytree = options[:colsample_bytree] || 1.0
|
17
|
+
validate!
|
18
|
+
end
|
19
|
+
|
20
|
+
def validate!
|
21
|
+
unless %w[uniform weighted].include?(@sample_type)
|
22
|
+
raise ArgumentError, "Invalid sample_type: #{@sample_type}"
|
23
|
+
end
|
24
|
+
unless %w[tree forest].include?(@normalize_type)
|
25
|
+
raise ArgumentError, "Invalid normalize_type: #{@normalize_type}"
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.hyperparameter_constants
|
30
|
+
# DART uses all tree parameters since it's tree-based
|
31
|
+
Base.common_tree_params.merge(Base.common_regularization_params).merge(
|
32
|
+
rate_drop: {
|
33
|
+
label: "Dropout Rate",
|
34
|
+
description: "Dropout rate (a fraction of previous trees to drop)",
|
35
|
+
min: 0,
|
36
|
+
max: 1,
|
37
|
+
step: 0.1,
|
38
|
+
},
|
39
|
+
skip_drop: {
|
40
|
+
label: "Skip Dropout",
|
41
|
+
description: "Probability of skipping the dropout procedure during iteration",
|
42
|
+
min: 0,
|
43
|
+
max: 1,
|
44
|
+
step: 0.1,
|
45
|
+
},
|
46
|
+
sample_type: {
|
47
|
+
label: "Sample Type",
|
48
|
+
options: [
|
49
|
+
{
|
50
|
+
value: "uniform",
|
51
|
+
label: "Uniform",
|
52
|
+
description: "Dropped trees are selected uniformly",
|
53
|
+
},
|
54
|
+
{
|
55
|
+
value: "weighted",
|
56
|
+
label: "Weighted",
|
57
|
+
description: "Dropped trees are selected in proportion to weight",
|
58
|
+
},
|
59
|
+
],
|
60
|
+
},
|
61
|
+
normalize_type: {
|
62
|
+
label: "Normalize Type",
|
63
|
+
options: [
|
64
|
+
{
|
65
|
+
value: "tree",
|
66
|
+
label: "Tree",
|
67
|
+
description: "New trees have the same weight of dropped trees divided by k",
|
68
|
+
},
|
69
|
+
{
|
70
|
+
value: "forest",
|
71
|
+
label: "Forest",
|
72
|
+
description: "New trees have the same weight of sum of dropped trees",
|
73
|
+
},
|
74
|
+
],
|
75
|
+
},
|
76
|
+
).merge(GBTree.hyperparameter_constants) # Include GBTree params since DART is tree-based
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
@@ -0,0 +1,82 @@
|
|
1
|
+
module EasyML
|
2
|
+
module Models
|
3
|
+
module Hyperparameters
|
4
|
+
class XGBoost
|
5
|
+
class GBLinear < XGBoost
|
6
|
+
attr_accessor :updater, :feature_selector, :lambda, :alpha
|
7
|
+
|
8
|
+
def initialize(options = {})
|
9
|
+
super
|
10
|
+
@updater = options[:updater] || "shotgun"
|
11
|
+
@feature_selector = options[:feature_selector] || "cyclic"
|
12
|
+
@lambda = options[:lambda] || 1.0
|
13
|
+
@alpha = options[:alpha] || 0.0
|
14
|
+
validate!
|
15
|
+
end
|
16
|
+
|
17
|
+
def validate!
|
18
|
+
unless %w[shotgun coord_descent].include?(@updater)
|
19
|
+
raise ArgumentError, "Invalid updater: #{@updater}"
|
20
|
+
end
|
21
|
+
unless %w[cyclic shuffle greedy thrifty].include?(@feature_selector)
|
22
|
+
raise ArgumentError, "Invalid feature_selector: #{@feature_selector}"
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def self.hyperparameter_constants
|
27
|
+
# GBLinear only uses learning_rate from tree params
|
28
|
+
{ learning_rate: Base.common_tree_params[:learning_rate] }
|
29
|
+
.merge(Base.common_regularization_params)
|
30
|
+
.merge(
|
31
|
+
feature_selector: {
|
32
|
+
label: "Feature Selector",
|
33
|
+
options: [
|
34
|
+
{
|
35
|
+
value: "cyclic",
|
36
|
+
label: "Cyclic",
|
37
|
+
description: "Update features in a cyclic order"
|
38
|
+
},
|
39
|
+
{
|
40
|
+
value: "shuffle",
|
41
|
+
label: "Shuffle",
|
42
|
+
description: "Update features in a random order"
|
43
|
+
},
|
44
|
+
{
|
45
|
+
value: "random",
|
46
|
+
label: "Random",
|
47
|
+
description: "Randomly select features to update"
|
48
|
+
},
|
49
|
+
{
|
50
|
+
value: "greedy",
|
51
|
+
label: "Greedy",
|
52
|
+
description: "Select features with the highest gradient magnitude"
|
53
|
+
},
|
54
|
+
{
|
55
|
+
value: "thrifty",
|
56
|
+
label: "Thrifty",
|
57
|
+
description: "Thrifty, approximated greedy algorithm"
|
58
|
+
}
|
59
|
+
]
|
60
|
+
},
|
61
|
+
updater: {
|
62
|
+
label: "Updater",
|
63
|
+
options: [
|
64
|
+
{
|
65
|
+
value: "shotgun",
|
66
|
+
label: "Shotgun",
|
67
|
+
description: "Parallel coordinate descent algorithm"
|
68
|
+
},
|
69
|
+
{
|
70
|
+
value: "coord_descent",
|
71
|
+
label: "Coordinate Descent",
|
72
|
+
description: "Ordinary coordinate descent algorithm"
|
73
|
+
}
|
74
|
+
]
|
75
|
+
}
|
76
|
+
)
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
@@ -0,0 +1,97 @@
|
|
1
|
+
module EasyML
|
2
|
+
module Models
|
3
|
+
module Hyperparameters
|
4
|
+
class XGBoost
|
5
|
+
class GBTree < XGBoost
|
6
|
+
attr_accessor :max_depth, :min_child_weight, :max_delta_step, :subsample,
|
7
|
+
:colsample_bytree, :colsample_bylevel, :colsample_bynode,
|
8
|
+
:tree_method, :gamma, :scale_pos_weight
|
9
|
+
|
10
|
+
def initialize(options = {})
|
11
|
+
super
|
12
|
+
@max_depth = options[:max_depth] || 6
|
13
|
+
@min_child_weight = options[:min_child_weight] || 1
|
14
|
+
@max_delta_step = options[:max_delta_step] || 0
|
15
|
+
@subsample = options[:subsample] || 1.0
|
16
|
+
@colsample_bytree = options[:colsample_bytree] || 1.0
|
17
|
+
@colsample_bylevel = options[:colsample_bylevel] || 1.0
|
18
|
+
@colsample_bynode = options[:colsample_bynode] || 1.0
|
19
|
+
@tree_method = options[:tree_method] || "auto"
|
20
|
+
@gamma = options[:gamma] || 0.0
|
21
|
+
@scale_pos_weight = options[:scale_pos_weight] || 1.0
|
22
|
+
validate!
|
23
|
+
end
|
24
|
+
|
25
|
+
def validate!
|
26
|
+
unless %w[auto exact approx hist gpu_hist].include?(@tree_method)
|
27
|
+
raise ArgumentError, "Invalid tree_method: #{@tree_method}"
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def self.hyperparameter_constants
|
32
|
+
Base.common_tree_params.merge(Base.common_regularization_params).merge(
|
33
|
+
min_child_weight: {
|
34
|
+
label: "Minimum Child Weight",
|
35
|
+
description: "Minimum sum of instance weight needed in a child",
|
36
|
+
min: 0,
|
37
|
+
max: 10,
|
38
|
+
step: 0.1,
|
39
|
+
},
|
40
|
+
gamma: {
|
41
|
+
label: "Gamma",
|
42
|
+
description: "Minimum loss reduction required to make a further partition",
|
43
|
+
min: 0,
|
44
|
+
max: 10,
|
45
|
+
step: 0.1,
|
46
|
+
},
|
47
|
+
subsample: {
|
48
|
+
label: "Subsample Ratio",
|
49
|
+
description: "Subsample ratio of the training instances",
|
50
|
+
min: 0.1,
|
51
|
+
max: 1,
|
52
|
+
step: 0.1,
|
53
|
+
},
|
54
|
+
colsample_bytree: {
|
55
|
+
label: "Column Sample by Tree",
|
56
|
+
description: "Subsample ratio of columns when constructing each tree",
|
57
|
+
min: 0.1,
|
58
|
+
max: 1,
|
59
|
+
step: 0.1,
|
60
|
+
},
|
61
|
+
tree_method: {
|
62
|
+
label: "Tree Construction Method",
|
63
|
+
options: [
|
64
|
+
{
|
65
|
+
value: "auto",
|
66
|
+
label: "Auto",
|
67
|
+
description: "Use heuristic to choose the fastest method",
|
68
|
+
},
|
69
|
+
{
|
70
|
+
value: "exact",
|
71
|
+
label: "Exact",
|
72
|
+
description: "Exact greedy algorithm",
|
73
|
+
},
|
74
|
+
{
|
75
|
+
value: "approx",
|
76
|
+
label: "Approximate",
|
77
|
+
description: "Approximate greedy algorithm using sketching and histogram",
|
78
|
+
},
|
79
|
+
{
|
80
|
+
value: "hist",
|
81
|
+
label: "Histogram",
|
82
|
+
description: "Fast histogram optimized approximate greedy algorithm",
|
83
|
+
},
|
84
|
+
{
|
85
|
+
value: "gpu_hist",
|
86
|
+
label: "GPU Histogram",
|
87
|
+
description: "GPU implementation of hist algorithm",
|
88
|
+
},
|
89
|
+
],
|
90
|
+
},
|
91
|
+
)
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
require_relative "base"
|
2
|
+
|
3
|
+
module EasyML
|
4
|
+
module Models
|
5
|
+
module Hyperparameters
|
6
|
+
class XGBoost < Base
|
7
|
+
attr_accessor :learning_rate, :max_depth, :n_estimators, :booster,
|
8
|
+
:objective, :lambda, :alpha
|
9
|
+
|
10
|
+
VALID_OBJECTIVES = %w[binary:logistic binary:hinge multi:softmax multi:softprob reg:squarederror reg:logistic].freeze
|
11
|
+
VALID_BOOSTERS = %w[gbtree gblinear dart].freeze
|
12
|
+
|
13
|
+
def initialize(options = {})
|
14
|
+
super
|
15
|
+
@learning_rate = options[:learning_rate] || 0.1
|
16
|
+
@max_depth = options[:max_depth] || 6
|
17
|
+
@n_estimators = options[:n_estimators] || 100
|
18
|
+
@booster = options[:booster] || "gbtree"
|
19
|
+
@objective = options[:objective] || "reg:squarederror"
|
20
|
+
@lambda = options[:lambda] || 1.0
|
21
|
+
@alpha = options[:alpha] || 0.0
|
22
|
+
validate! if self.class.name == "EasyML::Models::Hyperparameters::XGBoost"
|
23
|
+
end
|
24
|
+
|
25
|
+
def validate!
|
26
|
+
unless VALID_OBJECTIVES.include?(@objective)
|
27
|
+
raise ArgumentError, "Invalid objective. Must be one of: #{VALID_OBJECTIVES.join(", ")}"
|
28
|
+
end
|
29
|
+
unless VALID_BOOSTERS.include?(@booster)
|
30
|
+
raise ArgumentError, "Invalid booster. Must be one of: #{VALID_BOOSTERS.join(", ")}"
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def self.hyperparameter_constants
|
35
|
+
{
|
36
|
+
booster: {
|
37
|
+
label: "XGBoost Booster",
|
38
|
+
options: [
|
39
|
+
{
|
40
|
+
value: "gbtree",
|
41
|
+
label: "Gradient Boosted Tree",
|
42
|
+
description: "Traditional Gradient Boosting Decision Tree",
|
43
|
+
},
|
44
|
+
{
|
45
|
+
value: "gblinear",
|
46
|
+
label: "Gradient Boosted Linear",
|
47
|
+
description: "Generalized Linear Model with gradient boosting",
|
48
|
+
},
|
49
|
+
{
|
50
|
+
value: "dart",
|
51
|
+
label: "DART",
|
52
|
+
description: "Dropouts meet Multiple Additive Regression Trees",
|
53
|
+
},
|
54
|
+
],
|
55
|
+
},
|
56
|
+
hyperparameters: {
|
57
|
+
depends_on: "booster",
|
58
|
+
gbtree: GBTree.hyperparameter_constants,
|
59
|
+
gblinear: GBLinear.hyperparameter_constants,
|
60
|
+
dart: Dart.hyperparameter_constants,
|
61
|
+
},
|
62
|
+
}
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
require_relative "xgboost/gbtree"
|
70
|
+
require_relative "xgboost/gblinear"
|
71
|
+
require_relative "xgboost/dart"
|