easy_ml 0.1.3 → 0.2.0.pre.rc1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +234 -26
- data/Rakefile +45 -0
- data/app/controllers/easy_ml/application_controller.rb +67 -0
- data/app/controllers/easy_ml/columns_controller.rb +38 -0
- data/app/controllers/easy_ml/datasets_controller.rb +156 -0
- data/app/controllers/easy_ml/datasources_controller.rb +88 -0
- data/app/controllers/easy_ml/deploys_controller.rb +20 -0
- data/app/controllers/easy_ml/models_controller.rb +151 -0
- data/app/controllers/easy_ml/retraining_runs_controller.rb +19 -0
- data/app/controllers/easy_ml/settings_controller.rb +59 -0
- data/app/frontend/components/AlertProvider.tsx +108 -0
- data/app/frontend/components/DatasetPreview.tsx +161 -0
- data/app/frontend/components/EmptyState.tsx +28 -0
- data/app/frontend/components/ModelCard.tsx +255 -0
- data/app/frontend/components/ModelDetails.tsx +334 -0
- data/app/frontend/components/ModelForm.tsx +384 -0
- data/app/frontend/components/Navigation.tsx +300 -0
- data/app/frontend/components/Pagination.tsx +72 -0
- data/app/frontend/components/Popover.tsx +55 -0
- data/app/frontend/components/PredictionStream.tsx +105 -0
- data/app/frontend/components/ScheduleModal.tsx +726 -0
- data/app/frontend/components/SearchInput.tsx +23 -0
- data/app/frontend/components/SearchableSelect.tsx +132 -0
- data/app/frontend/components/dataset/AutosaveIndicator.tsx +39 -0
- data/app/frontend/components/dataset/ColumnConfigModal.tsx +431 -0
- data/app/frontend/components/dataset/ColumnFilters.tsx +256 -0
- data/app/frontend/components/dataset/ColumnList.tsx +101 -0
- data/app/frontend/components/dataset/FeatureConfigPopover.tsx +57 -0
- data/app/frontend/components/dataset/FeaturePicker.tsx +205 -0
- data/app/frontend/components/dataset/PreprocessingConfig.tsx +704 -0
- data/app/frontend/components/dataset/SplitConfigurator.tsx +120 -0
- data/app/frontend/components/dataset/splitters/DateSplitter.tsx +58 -0
- data/app/frontend/components/dataset/splitters/KFoldSplitter.tsx +68 -0
- data/app/frontend/components/dataset/splitters/LeavePOutSplitter.tsx +29 -0
- data/app/frontend/components/dataset/splitters/PredefinedSplitter.tsx +146 -0
- data/app/frontend/components/dataset/splitters/RandomSplitter.tsx +85 -0
- data/app/frontend/components/dataset/splitters/StratifiedSplitter.tsx +79 -0
- data/app/frontend/components/dataset/splitters/constants.ts +77 -0
- data/app/frontend/components/dataset/splitters/types.ts +168 -0
- data/app/frontend/components/dataset/splitters/utils.ts +53 -0
- data/app/frontend/components/features/CodeEditor.tsx +46 -0
- data/app/frontend/components/features/DataPreview.tsx +150 -0
- data/app/frontend/components/features/FeatureCard.tsx +88 -0
- data/app/frontend/components/features/FeatureForm.tsx +235 -0
- data/app/frontend/components/features/FeatureGroupCard.tsx +54 -0
- data/app/frontend/components/settings/PluginSettings.tsx +81 -0
- data/app/frontend/components/ui/badge.tsx +44 -0
- data/app/frontend/components/ui/collapsible.tsx +9 -0
- data/app/frontend/components/ui/scroll-area.tsx +46 -0
- data/app/frontend/components/ui/separator.tsx +29 -0
- data/app/frontend/entrypoints/App.tsx +40 -0
- data/app/frontend/entrypoints/Application.tsx +24 -0
- data/app/frontend/hooks/useAutosave.ts +61 -0
- data/app/frontend/layouts/Layout.tsx +38 -0
- data/app/frontend/lib/utils.ts +6 -0
- data/app/frontend/mockData.ts +272 -0
- data/app/frontend/pages/DatasetDetailsPage.tsx +103 -0
- data/app/frontend/pages/DatasetsPage.tsx +261 -0
- data/app/frontend/pages/DatasourceFormPage.tsx +147 -0
- data/app/frontend/pages/DatasourcesPage.tsx +261 -0
- data/app/frontend/pages/EditModelPage.tsx +45 -0
- data/app/frontend/pages/EditTransformationPage.tsx +56 -0
- data/app/frontend/pages/ModelsPage.tsx +115 -0
- data/app/frontend/pages/NewDatasetPage.tsx +366 -0
- data/app/frontend/pages/NewModelPage.tsx +45 -0
- data/app/frontend/pages/NewTransformationPage.tsx +43 -0
- data/app/frontend/pages/SettingsPage.tsx +272 -0
- data/app/frontend/pages/ShowModelPage.tsx +30 -0
- data/app/frontend/pages/TransformationsPage.tsx +95 -0
- data/app/frontend/styles/application.css +100 -0
- data/app/frontend/types/dataset.ts +146 -0
- data/app/frontend/types/datasource.ts +33 -0
- data/app/frontend/types/preprocessing.ts +1 -0
- data/app/frontend/types.ts +113 -0
- data/app/helpers/easy_ml/application_helper.rb +10 -0
- data/app/jobs/easy_ml/application_job.rb +21 -0
- data/app/jobs/easy_ml/batch_job.rb +46 -0
- data/app/jobs/easy_ml/compute_feature_job.rb +19 -0
- data/app/jobs/easy_ml/deploy_job.rb +13 -0
- data/app/jobs/easy_ml/finalize_feature_job.rb +15 -0
- data/app/jobs/easy_ml/refresh_dataset_job.rb +32 -0
- data/app/jobs/easy_ml/schedule_retraining_job.rb +11 -0
- data/app/jobs/easy_ml/sync_datasource_job.rb +17 -0
- data/app/jobs/easy_ml/training_job.rb +62 -0
- data/app/models/easy_ml/adapters/base_adapter.rb +45 -0
- data/app/models/easy_ml/adapters/polars_adapter.rb +77 -0
- data/app/models/easy_ml/cleaner.rb +82 -0
- data/app/models/easy_ml/column.rb +124 -0
- data/app/models/easy_ml/column_history.rb +30 -0
- data/app/models/easy_ml/column_list.rb +122 -0
- data/app/models/easy_ml/concerns/configurable.rb +61 -0
- data/app/models/easy_ml/concerns/versionable.rb +19 -0
- data/app/models/easy_ml/dataset.rb +767 -0
- data/app/models/easy_ml/dataset_history.rb +56 -0
- data/app/models/easy_ml/datasource.rb +182 -0
- data/app/models/easy_ml/datasource_history.rb +24 -0
- data/app/models/easy_ml/datasources/base_datasource.rb +54 -0
- data/app/models/easy_ml/datasources/file_datasource.rb +58 -0
- data/app/models/easy_ml/datasources/polars_datasource.rb +89 -0
- data/app/models/easy_ml/datasources/s3_datasource.rb +97 -0
- data/app/models/easy_ml/deploy.rb +114 -0
- data/app/models/easy_ml/event.rb +79 -0
- data/app/models/easy_ml/feature.rb +437 -0
- data/app/models/easy_ml/feature_history.rb +38 -0
- data/app/models/easy_ml/model.rb +575 -41
- data/app/models/easy_ml/model_file.rb +133 -0
- data/app/models/easy_ml/model_file_history.rb +24 -0
- data/app/models/easy_ml/model_history.rb +51 -0
- data/app/models/easy_ml/models/base_model.rb +58 -0
- data/app/models/easy_ml/models/hyperparameters/base.rb +99 -0
- data/app/models/easy_ml/models/hyperparameters/xgboost/dart.rb +82 -0
- data/app/models/easy_ml/models/hyperparameters/xgboost/gblinear.rb +82 -0
- data/app/models/easy_ml/models/hyperparameters/xgboost/gbtree.rb +97 -0
- data/app/models/easy_ml/models/hyperparameters/xgboost.rb +71 -0
- data/app/models/easy_ml/models/xgboost/evals_callback.rb +138 -0
- data/app/models/easy_ml/models/xgboost/progress_callback.rb +39 -0
- data/app/models/easy_ml/models/xgboost.rb +544 -4
- data/app/models/easy_ml/prediction.rb +44 -0
- data/app/models/easy_ml/retraining_job.rb +278 -0
- data/app/models/easy_ml/retraining_run.rb +184 -0
- data/app/models/easy_ml/settings.rb +37 -0
- data/app/models/easy_ml/splitter.rb +90 -0
- data/app/models/easy_ml/splitters/base_splitter.rb +28 -0
- data/app/models/easy_ml/splitters/date_splitter.rb +91 -0
- data/app/models/easy_ml/splitters/predefined_splitter.rb +74 -0
- data/app/models/easy_ml/splitters/random_splitter.rb +82 -0
- data/app/models/easy_ml/tuner_job.rb +56 -0
- data/app/models/easy_ml/tuner_run.rb +31 -0
- data/app/models/splitter_history.rb +6 -0
- data/app/serializers/easy_ml/column_serializer.rb +27 -0
- data/app/serializers/easy_ml/dataset_serializer.rb +73 -0
- data/app/serializers/easy_ml/datasource_serializer.rb +64 -0
- data/app/serializers/easy_ml/feature_serializer.rb +27 -0
- data/app/serializers/easy_ml/model_serializer.rb +90 -0
- data/app/serializers/easy_ml/retraining_job_serializer.rb +22 -0
- data/app/serializers/easy_ml/retraining_run_serializer.rb +39 -0
- data/app/serializers/easy_ml/settings_serializer.rb +9 -0
- data/app/views/layouts/easy_ml/application.html.erb +15 -0
- data/config/initializers/resque.rb +3 -0
- data/config/resque-pool.yml +6 -0
- data/config/routes.rb +39 -0
- data/config/spring.rb +1 -0
- data/config/vite.json +15 -0
- data/lib/easy_ml/configuration.rb +64 -0
- data/lib/easy_ml/core/evaluators/base_evaluator.rb +53 -0
- data/lib/easy_ml/core/evaluators/classification_evaluators.rb +126 -0
- data/lib/easy_ml/core/evaluators/regression_evaluators.rb +66 -0
- data/lib/easy_ml/core/model_evaluator.rb +161 -89
- data/lib/easy_ml/core/tuner/adapters/base_adapter.rb +28 -18
- data/lib/easy_ml/core/tuner/adapters/xgboost_adapter.rb +4 -25
- data/lib/easy_ml/core/tuner.rb +123 -62
- data/lib/easy_ml/core.rb +0 -3
- data/lib/easy_ml/core_ext/hash.rb +24 -0
- data/lib/easy_ml/core_ext/pathname.rb +11 -5
- data/lib/easy_ml/data/date_converter.rb +90 -0
- data/lib/easy_ml/data/filter_extensions.rb +31 -0
- data/lib/easy_ml/data/polars_column.rb +126 -0
- data/lib/easy_ml/data/polars_reader.rb +297 -0
- data/lib/easy_ml/data/preprocessor.rb +280 -142
- data/lib/easy_ml/data/simple_imputer.rb +255 -0
- data/lib/easy_ml/data/splits/file_split.rb +252 -0
- data/lib/easy_ml/data/splits/in_memory_split.rb +54 -0
- data/lib/easy_ml/data/splits/split.rb +95 -0
- data/lib/easy_ml/data/splits.rb +9 -0
- data/lib/easy_ml/data/statistics_learner.rb +93 -0
- data/lib/easy_ml/data/synced_directory.rb +341 -0
- data/lib/easy_ml/data.rb +6 -2
- data/lib/easy_ml/engine.rb +105 -6
- data/lib/easy_ml/feature_store.rb +227 -0
- data/lib/easy_ml/features.rb +61 -0
- data/lib/easy_ml/initializers/inflections.rb +17 -3
- data/lib/easy_ml/logging.rb +2 -2
- data/lib/easy_ml/predict.rb +74 -0
- data/lib/easy_ml/railtie/generators/migration/migration_generator.rb +192 -36
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_column_histories.rb.tt +9 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_columns.rb.tt +25 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_dataset_histories.rb.tt +9 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_datasets.rb.tt +31 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_datasource_histories.rb.tt +9 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_datasources.rb.tt +16 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_deploys.rb.tt +24 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_events.rb.tt +20 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_feature_histories.rb.tt +14 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_features.rb.tt +32 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_model_file_histories.rb.tt +9 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_model_files.rb.tt +17 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_model_histories.rb.tt +9 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_models.rb.tt +20 -9
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_predictions.rb.tt +17 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_retraining_jobs.rb.tt +77 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_settings.rb.tt +9 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_splitter_histories.rb.tt +9 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_splitters.rb.tt +15 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_tuner_jobs.rb.tt +40 -0
- data/lib/easy_ml/support/est.rb +5 -1
- data/lib/easy_ml/support/file_rotate.rb +79 -15
- data/lib/easy_ml/support/file_support.rb +9 -0
- data/lib/easy_ml/support/local_file.rb +24 -0
- data/lib/easy_ml/support/lockable.rb +62 -0
- data/lib/easy_ml/support/synced_file.rb +103 -0
- data/lib/easy_ml/support/utc.rb +5 -1
- data/lib/easy_ml/support.rb +6 -3
- data/lib/easy_ml/version.rb +4 -1
- data/lib/easy_ml.rb +7 -2
- metadata +355 -72
- data/app/models/easy_ml/models.rb +0 -5
- data/lib/easy_ml/core/model.rb +0 -30
- data/lib/easy_ml/core/model_core.rb +0 -181
- data/lib/easy_ml/core/models/hyperparameters/base.rb +0 -34
- data/lib/easy_ml/core/models/hyperparameters/xgboost.rb +0 -19
- data/lib/easy_ml/core/models/xgboost.rb +0 -10
- data/lib/easy_ml/core/models/xgboost_core.rb +0 -220
- data/lib/easy_ml/core/models.rb +0 -10
- data/lib/easy_ml/core/uploaders/model_uploader.rb +0 -24
- data/lib/easy_ml/core/uploaders.rb +0 -7
- data/lib/easy_ml/data/dataloader.rb +0 -6
- data/lib/easy_ml/data/dataset/data/preprocessor/statistics.json +0 -31
- data/lib/easy_ml/data/dataset/data/sample_info.json +0 -1
- data/lib/easy_ml/data/dataset/dataset/files/sample_info.json +0 -1
- data/lib/easy_ml/data/dataset/splits/file_split.rb +0 -140
- data/lib/easy_ml/data/dataset/splits/in_memory_split.rb +0 -49
- data/lib/easy_ml/data/dataset/splits/split.rb +0 -98
- data/lib/easy_ml/data/dataset/splits.rb +0 -11
- data/lib/easy_ml/data/dataset/splitters/date_splitter.rb +0 -43
- data/lib/easy_ml/data/dataset/splitters.rb +0 -9
- data/lib/easy_ml/data/dataset.rb +0 -430
- data/lib/easy_ml/data/datasource/datasource_factory.rb +0 -60
- data/lib/easy_ml/data/datasource/file_datasource.rb +0 -40
- data/lib/easy_ml/data/datasource/merged_datasource.rb +0 -64
- data/lib/easy_ml/data/datasource/polars_datasource.rb +0 -41
- data/lib/easy_ml/data/datasource/s3_datasource.rb +0 -89
- data/lib/easy_ml/data/datasource.rb +0 -33
- data/lib/easy_ml/data/preprocessor/preprocessor.rb +0 -205
- data/lib/easy_ml/data/preprocessor/simple_imputer.rb +0 -402
- data/lib/easy_ml/deployment.rb +0 -5
- data/lib/easy_ml/support/synced_directory.rb +0 -134
- data/lib/easy_ml/transforms.rb +0 -29
- /data/{lib/easy_ml/core → app/models/easy_ml}/models/hyperparameters.rb +0 -0
@@ -0,0 +1,74 @@
|
|
1
|
+
module EasyML
|
2
|
+
module Splitters
|
3
|
+
class PredefinedSplitter < BaseSplitter
|
4
|
+
validates :train_files, :test_files, :valid_files, presence: true
|
5
|
+
validate :files_must_be_unique
|
6
|
+
validate :at_least_one_file_specified
|
7
|
+
|
8
|
+
add_configuration_attributes :train_files, :test_files, :valid_files
|
9
|
+
|
10
|
+
def self.default_config
|
11
|
+
{
|
12
|
+
train_files: ["train.csv"],
|
13
|
+
test_files: ["test.csv"],
|
14
|
+
valid_files: ["valid.csv"],
|
15
|
+
}
|
16
|
+
end
|
17
|
+
|
18
|
+
def split(datasource, &block)
|
19
|
+
validate!
|
20
|
+
|
21
|
+
files = datasource.all_files
|
22
|
+
train, valid, test = match_files(files)
|
23
|
+
|
24
|
+
yield [reader.query(train), reader.query(valid), reader.query(test)]
|
25
|
+
end
|
26
|
+
|
27
|
+
def match_files(files)
|
28
|
+
train = select_preferred_files(files.select { |file| match_file(file, train_files) })
|
29
|
+
test = select_preferred_files(files.select { |file| match_file(file, test_files) })
|
30
|
+
valid = select_preferred_files(files.select { |file| match_file(file, valid_files) })
|
31
|
+
|
32
|
+
[train, valid, test]
|
33
|
+
end
|
34
|
+
|
35
|
+
def select_preferred_files(files)
|
36
|
+
# Group files by their base name (without extensions)
|
37
|
+
grouped = files.group_by { |f| Pathname.new(f).basename.to_s.gsub(/\.parquet$/, "").gsub(/\.[^.]+$/, "") }
|
38
|
+
|
39
|
+
# For each group, prefer parquet if it exists, otherwise use csv
|
40
|
+
grouped.map do |_, group_files|
|
41
|
+
parquet_file = group_files.find { |f| f.end_with?(".parquet") }
|
42
|
+
parquet_file || group_files.first
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def match_file(file, type)
|
47
|
+
base_name = Pathname.new(file).basename.to_s
|
48
|
+
# Strip both .parquet and original extension
|
49
|
+
filename = base_name.gsub(/\.parquet$/, "").gsub(/\.[^.]+$/, "")
|
50
|
+
|
51
|
+
type.map { |f| f.gsub(/\.[^.]+$/, "") }.include?(filename)
|
52
|
+
end
|
53
|
+
|
54
|
+
private
|
55
|
+
|
56
|
+
def reader
|
57
|
+
@reader ||= EasyML::Data::PolarsReader.new
|
58
|
+
end
|
59
|
+
|
60
|
+
def files_must_be_unique
|
61
|
+
all_files = train_files + test_files + valid_files
|
62
|
+
if all_files.uniq.length != all_files.length
|
63
|
+
errors.add(:base, "Files must be unique across splits")
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def at_least_one_file_specified
|
68
|
+
if train_files.empty? && test_files.empty? && valid_files.empty?
|
69
|
+
errors.add(:base, "At least one file must be specified")
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
@@ -0,0 +1,82 @@
|
|
1
|
+
# == Schema Information
|
2
|
+
#
|
3
|
+
# Table name: easy_ml_splitters
|
4
|
+
#
|
5
|
+
# id :bigint not null, primary key
|
6
|
+
# splitter_type :string not null
|
7
|
+
# configuration :json
|
8
|
+
# dataset_id :bigint not null
|
9
|
+
# created_at :datetime not null
|
10
|
+
# updated_at :datetime not null
|
11
|
+
#
|
12
|
+
require_relative "base_splitter"
|
13
|
+
|
14
|
+
module EasyML
|
15
|
+
module Splitters
|
16
|
+
class RandomSplitter < BaseSplitter
|
17
|
+
validates :train_ratio, presence: true, numericality: { greater_than: 0, less_than: 1 }
|
18
|
+
validates :valid_ratio, presence: true, numericality: { greater_than: 0, less_than: 1 }
|
19
|
+
validates :test_ratio, presence: true, numericality: { greater_than: 0, less_than: 1 }
|
20
|
+
validate :ratios_sum_to_one
|
21
|
+
|
22
|
+
attr_accessor :train_ratio, :valid_ratio, :test_ratio, :seed
|
23
|
+
|
24
|
+
add_configuration_attributes :train_ratio, :valid_ratio, :test_ratio, :seed
|
25
|
+
|
26
|
+
def self.default_config
|
27
|
+
{}
|
28
|
+
end
|
29
|
+
|
30
|
+
def split_df(df)
|
31
|
+
set_defaults
|
32
|
+
|
33
|
+
# Set random seed if provided for reproducibility
|
34
|
+
rng = seed ? Random.new(seed.to_i) : Random.new
|
35
|
+
|
36
|
+
# Get total number of rows
|
37
|
+
n_rows = df.height
|
38
|
+
|
39
|
+
# Generate a deterministic random order based on the seed
|
40
|
+
shuffled_indices = (0...n_rows).to_a.shuffle(random: rng || Random.new)
|
41
|
+
|
42
|
+
# Calculate split sizes
|
43
|
+
train_size = (n_rows * train_ratio).floor
|
44
|
+
valid_size = (n_rows * valid_ratio).floor
|
45
|
+
|
46
|
+
# Split indices
|
47
|
+
train_indices = shuffled_indices[0...train_size]
|
48
|
+
valid_indices = shuffled_indices[train_size...(train_size + valid_size)]
|
49
|
+
test_indices = shuffled_indices[(train_size + valid_size)..]
|
50
|
+
|
51
|
+
# Add an index column to filter
|
52
|
+
df_with_index = df.with_columns([
|
53
|
+
Polars.arange(0, n_rows).alias("index"),
|
54
|
+
])
|
55
|
+
|
56
|
+
# Filter rows by index for train, validation, and test sets
|
57
|
+
train_df = df_with_index.filter(Polars.col("index").is_in(train_indices)).drop("index")
|
58
|
+
valid_df = df_with_index.filter(Polars.col("index").is_in(valid_indices)).drop("index")
|
59
|
+
test_df = df_with_index.filter(Polars.col("index").is_in(test_indices)).drop("index")
|
60
|
+
|
61
|
+
[train_df, valid_df, test_df]
|
62
|
+
end
|
63
|
+
|
64
|
+
private
|
65
|
+
|
66
|
+
def set_defaults
|
67
|
+
self.train_ratio ||= 0.6
|
68
|
+
self.valid_ratio ||= 0.2
|
69
|
+
self.test_ratio ||= 0.2
|
70
|
+
end
|
71
|
+
|
72
|
+
def ratios_sum_to_one
|
73
|
+
return unless train_ratio && valid_ratio && test_ratio
|
74
|
+
|
75
|
+
sum = train_ratio + valid_ratio + test_ratio
|
76
|
+
return if (sum - 1.0).abs < 1e-10 # Using small epsilon for float comparison
|
77
|
+
|
78
|
+
errors.add(:base, "Split ratios must sum to 1.0 (got #{sum})")
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
# == Schema Information
|
2
|
+
#
|
3
|
+
# Table name: easy_ml_tuner_jobs
|
4
|
+
#
|
5
|
+
# id :bigint not null, primary key
|
6
|
+
# config :json not null
|
7
|
+
# best_tuner_run_id :bigint
|
8
|
+
# model_id :bigint not null
|
9
|
+
# status :string
|
10
|
+
# direction :string default("minimize")
|
11
|
+
# started_at :datetime
|
12
|
+
# completed_at :datetime
|
13
|
+
# metadata :jsonb
|
14
|
+
# wandb_url :string
|
15
|
+
# created_at :datetime not null
|
16
|
+
# updated_at :datetime not null
|
17
|
+
#
|
18
|
+
module EasyML
|
19
|
+
class TunerJob < ActiveRecord::Base
|
20
|
+
self.table_name = "easy_ml_tuner_jobs"
|
21
|
+
|
22
|
+
belongs_to :model
|
23
|
+
belongs_to :best_tuner_run, class_name: "EasyML::TunerRun", optional: true
|
24
|
+
has_many :tuner_runs, dependent: :destroy
|
25
|
+
|
26
|
+
validates :config, presence: true
|
27
|
+
validates :direction, inclusion: { in: %w[minimize maximize] }
|
28
|
+
|
29
|
+
enum status: {
|
30
|
+
pending: "pending",
|
31
|
+
running: "running",
|
32
|
+
success: "success",
|
33
|
+
failed: "failed",
|
34
|
+
}
|
35
|
+
|
36
|
+
def best_run
|
37
|
+
return nil if tuner_runs.empty?
|
38
|
+
|
39
|
+
tuner_runs.order(value: direction_order).first
|
40
|
+
end
|
41
|
+
|
42
|
+
def self.constants
|
43
|
+
EasyML::Model::MODEL_OPTIONS.inject({}) do |h, (key, class_name)|
|
44
|
+
h.tap do
|
45
|
+
h[key] = class_name.constantize.hyperparameter_constants
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
private
|
51
|
+
|
52
|
+
def direction_order
|
53
|
+
direction == "minimize" ? :asc : :desc
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
# == Schema Information
|
2
|
+
#
|
3
|
+
# Table name: easy_ml_tuner_runs
|
4
|
+
#
|
5
|
+
# id :bigint not null, primary key
|
6
|
+
# tuner_job_id :bigint not null
|
7
|
+
# hyperparameters :json not null
|
8
|
+
# value :float
|
9
|
+
# trial_number :integer
|
10
|
+
# status :string
|
11
|
+
# wandb_url :string
|
12
|
+
# created_at :datetime not null
|
13
|
+
# updated_at :datetime not null
|
14
|
+
#
|
15
|
+
module EasyML
|
16
|
+
class TunerRun < ActiveRecord::Base
|
17
|
+
self.table_name = "easy_ml_tuner_runs"
|
18
|
+
|
19
|
+
belongs_to :tuner_job
|
20
|
+
|
21
|
+
validates :hyperparameters, presence: true
|
22
|
+
validates :trial_number, presence: true, uniqueness: { scope: :tuner_job_id }
|
23
|
+
|
24
|
+
enum status: {
|
25
|
+
pending: "pending",
|
26
|
+
running: "running",
|
27
|
+
success: "success",
|
28
|
+
failed: "failed",
|
29
|
+
}
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# == Schema Information
|
2
|
+
#
|
3
|
+
# Table name: easy_ml_columns
|
4
|
+
#
|
5
|
+
# id :bigint not null, primary key
|
6
|
+
# dataset_id :bigint not null
|
7
|
+
# name :string not null
|
8
|
+
# description :string
|
9
|
+
# datatype :string
|
10
|
+
# polars_datatype :string
|
11
|
+
# is_target :boolean
|
12
|
+
# hidden :boolean default(FALSE)
|
13
|
+
# drop_if_null :boolean default(FALSE)
|
14
|
+
# preprocessing_steps :json
|
15
|
+
# sample_values :json
|
16
|
+
# statistics :json
|
17
|
+
# created_at :datetime not null
|
18
|
+
# updated_at :datetime not null
|
19
|
+
#
|
20
|
+
module EasyML
|
21
|
+
class ColumnSerializer
|
22
|
+
include JSONAPI::Serializer
|
23
|
+
|
24
|
+
attributes :id, :name, :description, :dataset_id, :datatype, :polars_datatype, :preprocessing_steps,
|
25
|
+
:hidden, :drop_if_null, :sample_values, :statistics, :is_target
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,73 @@
|
|
1
|
+
require_relative "./column_serializer"
|
2
|
+
|
3
|
+
# == Schema Information
|
4
|
+
#
|
5
|
+
# Table name: easy_ml_datasets
|
6
|
+
#
|
7
|
+
# id :bigint not null, primary key
|
8
|
+
# name :string not null
|
9
|
+
# description :string
|
10
|
+
# dataset_type :string
|
11
|
+
# status :string
|
12
|
+
# version :string
|
13
|
+
# datasource_id :bigint
|
14
|
+
# root_dir :string
|
15
|
+
# configuration :json
|
16
|
+
# num_rows :bigint
|
17
|
+
# workflow_status :string
|
18
|
+
# statistics :json
|
19
|
+
# preprocessor_statistics :json
|
20
|
+
# schema :json
|
21
|
+
# refreshed_at :datetime
|
22
|
+
# created_at :datetime not null
|
23
|
+
# updated_at :datetime not null
|
24
|
+
#
|
25
|
+
module EasyML
|
26
|
+
class DatasetSerializer
|
27
|
+
include JSONAPI::Serializer
|
28
|
+
|
29
|
+
attributes :id, :name, :description, :target, :num_rows, :status,
|
30
|
+
:datasource_id, :preprocessing_steps, :workflow_status, :statistics
|
31
|
+
|
32
|
+
attribute :splitter do |dataset|
|
33
|
+
dataset.splitter
|
34
|
+
end
|
35
|
+
|
36
|
+
attribute :columns do |dataset|
|
37
|
+
dataset.columns.order(:id).map do |column|
|
38
|
+
ColumnSerializer.new(column).serializable_hash.dig(:data, :attributes)
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
attribute :sample_data do |dataset|
|
43
|
+
if dataset.workflow_status.to_sym == :analyzing
|
44
|
+
nil
|
45
|
+
else
|
46
|
+
dataset.data(limit: 10, all_columns: true)&.to_hashes
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
attribute :updated_at do |dataset|
|
51
|
+
dataset.datasource&.last_updated_at
|
52
|
+
end
|
53
|
+
|
54
|
+
attribute :features do |dataset|
|
55
|
+
dataset.features.ordered.map do |feature|
|
56
|
+
FeatureSerializer.new(feature).serializable_hash.dig(:data, :attributes)
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
attribute :needs_refresh do |dataset|
|
61
|
+
dataset.needs_refresh?
|
62
|
+
end
|
63
|
+
|
64
|
+
attribute :stacktrace do |object|
|
65
|
+
if !object.failed? || object.events.empty?
|
66
|
+
nil
|
67
|
+
else
|
68
|
+
last_event = object.events.order(id: :desc).limit(1).last
|
69
|
+
last_event&.stacktrace if last_event&.status == "failed"
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
@@ -0,0 +1,64 @@
|
|
1
|
+
# == Schema Information
|
2
|
+
#
|
3
|
+
# Table name: easy_ml_datasources
|
4
|
+
#
|
5
|
+
# id :bigint not null, primary key
|
6
|
+
# name :string not null
|
7
|
+
# datasource_type :string
|
8
|
+
# root_dir :string
|
9
|
+
# configuration :json
|
10
|
+
# created_at :datetime not null
|
11
|
+
# updated_at :datetime not null
|
12
|
+
#
|
13
|
+
require "jsonapi/serializer"
|
14
|
+
|
15
|
+
module EasyML
|
16
|
+
class DatasourceSerializer
|
17
|
+
include JSONAPI::Serializer
|
18
|
+
|
19
|
+
set_type :datasource # Optional type for JSON:API
|
20
|
+
|
21
|
+
attributes :id, :name, :datasource_type, :s3_bucket, :s3_prefix, :s3_region, :schema, :columns, :available_files
|
22
|
+
|
23
|
+
attribute :last_synced_at do |datasource|
|
24
|
+
if datasource.is_syncing
|
25
|
+
"Syncing..."
|
26
|
+
else
|
27
|
+
datasource.last_updated_at ? datasource.last_updated_at.in_time_zone(EasyML::Configuration.timezone) : "Not Synced"
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
attribute :created_at do |datasource|
|
32
|
+
datasource.created_at.in_time_zone(EasyML::Configuration.timezone).iso8601
|
33
|
+
end
|
34
|
+
|
35
|
+
attribute :updated_at do |datasource|
|
36
|
+
datasource.updated_at.in_time_zone(EasyML::Configuration.timezone).iso8601
|
37
|
+
end
|
38
|
+
|
39
|
+
attribute :is_synced do |datasource|
|
40
|
+
datasource.last_updated_at.present?
|
41
|
+
end
|
42
|
+
|
43
|
+
attribute :is_syncing do |datasource|
|
44
|
+
datasource.is_syncing
|
45
|
+
end
|
46
|
+
|
47
|
+
attribute :sync_failed do |datasource|
|
48
|
+
if datasource.is_syncing
|
49
|
+
nil
|
50
|
+
else
|
51
|
+
datasource.events.order(id: :desc).limit(1)&.last&.status == "failed"
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
attribute :stacktrace do |datasource|
|
56
|
+
if datasource.is_syncing
|
57
|
+
nil
|
58
|
+
else
|
59
|
+
last_event = datasource.events.order(id: :desc).limit(1).last
|
60
|
+
last_event&.stacktrace if last_event&.status == "failed"
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# == Schema Information
|
2
|
+
#
|
3
|
+
# Table name: easy_ml_
|
4
|
+
#
|
5
|
+
# id :bigint not null, primary key
|
6
|
+
# dataset_id :bigint not null
|
7
|
+
# name :string
|
8
|
+
# feature_class :string not null
|
9
|
+
# feature_method :string not null
|
10
|
+
# feature_position :integer
|
11
|
+
# applied_at :datetime
|
12
|
+
# created_at :datetime not null
|
13
|
+
# updated_at :datetime not null
|
14
|
+
#
|
15
|
+
require "jsonapi/serializer"
|
16
|
+
|
17
|
+
module EasyML
|
18
|
+
class FeatureSerializer
|
19
|
+
include JSONAPI::Serializer
|
20
|
+
|
21
|
+
attributes :id, :feature_class, :feature_position, :name
|
22
|
+
|
23
|
+
attribute :description do |feature|
|
24
|
+
(EasyML::Features::Registry.find(feature.name) || {}).dig(:description)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,90 @@
|
|
1
|
+
# == Schema Information
|
2
|
+
#
|
3
|
+
# Table name: easy_ml_models
|
4
|
+
#
|
5
|
+
# id :bigint not null, primary key
|
6
|
+
# name :string not null
|
7
|
+
# model_type :string
|
8
|
+
# status :string
|
9
|
+
# dataset_id :bigint
|
10
|
+
# model_file_id :bigint
|
11
|
+
# configuration :json
|
12
|
+
# version :string not null
|
13
|
+
# root_dir :string
|
14
|
+
# file :json
|
15
|
+
# sha :string
|
16
|
+
# created_at :datetime not null
|
17
|
+
# updated_at :datetime not null
|
18
|
+
#
|
19
|
+
require "jsonapi/serializer"
|
20
|
+
|
21
|
+
module EasyML
|
22
|
+
class ModelSerializer
|
23
|
+
include JSONAPI::Serializer
|
24
|
+
|
25
|
+
attributes :id,
|
26
|
+
:name,
|
27
|
+
:model_type,
|
28
|
+
:task,
|
29
|
+
:objective,
|
30
|
+
:metrics,
|
31
|
+
:dataset_id,
|
32
|
+
:status,
|
33
|
+
:deployment_status,
|
34
|
+
:configuration,
|
35
|
+
:created_at,
|
36
|
+
:updated_at,
|
37
|
+
:last_run_at
|
38
|
+
|
39
|
+
attribute :is_training do |object|
|
40
|
+
object.training?
|
41
|
+
end
|
42
|
+
|
43
|
+
attribute :last_run do |object|
|
44
|
+
RetrainingRunSerializer.new(object.last_run).serializable_hash.dig(:data, :attributes)
|
45
|
+
end
|
46
|
+
|
47
|
+
attribute :metrics_url do |object|
|
48
|
+
object.last_run&.wandb_url
|
49
|
+
end
|
50
|
+
|
51
|
+
attribute :retraining_runs do |object, params|
|
52
|
+
limit = params[:limit] || 20
|
53
|
+
offset = params[:offset] || 0
|
54
|
+
|
55
|
+
runs = object.retraining_runs
|
56
|
+
.order(created_at: :desc)
|
57
|
+
.offset(offset)
|
58
|
+
.limit(limit)
|
59
|
+
|
60
|
+
{
|
61
|
+
runs: RetrainingRunSerializer.new(runs).serializable_hash[:data].map { |run| run[:attributes] },
|
62
|
+
total_count: object.retraining_runs.count,
|
63
|
+
limit: limit,
|
64
|
+
offset: offset,
|
65
|
+
next_offset: offset + limit,
|
66
|
+
prev_offset: offset - limit,
|
67
|
+
}
|
68
|
+
end
|
69
|
+
|
70
|
+
attribute :version do |object|
|
71
|
+
object.formatted_version
|
72
|
+
end
|
73
|
+
|
74
|
+
attribute :formatted_model_type do |object|
|
75
|
+
object.formatted_model_type
|
76
|
+
end
|
77
|
+
|
78
|
+
attribute :formatted_frequency do |object|
|
79
|
+
object.retraining_job.present? ? object.retraining_job.formatted_frequency : nil
|
80
|
+
end
|
81
|
+
|
82
|
+
attribute :dataset do |object|
|
83
|
+
DatasetSerializer.new(object.dataset).serializable_hash.dig(:data, :attributes)
|
84
|
+
end
|
85
|
+
|
86
|
+
attribute :retraining_job do |object|
|
87
|
+
RetrainingJobSerializer.new(object.retraining_job).serializable_hash.dig(:data, :attributes)
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require "jsonapi/serializer"
|
2
|
+
|
3
|
+
module EasyML
|
4
|
+
class RetrainingJobSerializer
|
5
|
+
include JSONAPI::Serializer
|
6
|
+
|
7
|
+
attributes :id,
|
8
|
+
:active,
|
9
|
+
:frequency,
|
10
|
+
:formatted_frequency,
|
11
|
+
:tuning_frequency,
|
12
|
+
:at,
|
13
|
+
:metric,
|
14
|
+
:threshold,
|
15
|
+
:tuner_config,
|
16
|
+
:batch_mode,
|
17
|
+
:batch_size,
|
18
|
+
:batch_overlap,
|
19
|
+
:batch_key,
|
20
|
+
:tuning_enabled
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
require "jsonapi/serializer"
|
2
|
+
|
3
|
+
module EasyML
|
4
|
+
class RetrainingRunSerializer
|
5
|
+
include JSONAPI::Serializer
|
6
|
+
|
7
|
+
attributes :id,
|
8
|
+
:deployable,
|
9
|
+
:metrics,
|
10
|
+
:metric_value,
|
11
|
+
:threshold,
|
12
|
+
:threshold_direction,
|
13
|
+
:status,
|
14
|
+
:error_message,
|
15
|
+
:is_deploying,
|
16
|
+
:deployed
|
17
|
+
|
18
|
+
attribute :metrics_url do |run|
|
19
|
+
run.wandb_url
|
20
|
+
end
|
21
|
+
|
22
|
+
attribute :started_at do |run|
|
23
|
+
run.started_at&.in_time_zone(EasyML::Configuration.timezone)
|
24
|
+
end
|
25
|
+
|
26
|
+
attribute :completed_at do |run|
|
27
|
+
run.completed_at&.in_time_zone(EasyML::Configuration.timezone)
|
28
|
+
end
|
29
|
+
|
30
|
+
attribute :stacktrace do |object|
|
31
|
+
if object.status.to_s == "running"
|
32
|
+
nil
|
33
|
+
else
|
34
|
+
last_event = object.events.order(id: :desc).limit(1).last
|
35
|
+
last_event&.stacktrace if last_event&.status.to_s == "failed"
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
<!DOCTYPE html>
|
2
|
+
<html>
|
3
|
+
<head>
|
4
|
+
<title>EasyML</title>
|
5
|
+
<%= csrf_meta_tags %>
|
6
|
+
<%= csp_meta_tag %>
|
7
|
+
|
8
|
+
<%= vite_client_tag %>
|
9
|
+
<%= vite_react_refresh_tag %>
|
10
|
+
<%= vite_typescript_tag 'Application.tsx' %>
|
11
|
+
</head>
|
12
|
+
<body>
|
13
|
+
<%= yield %>
|
14
|
+
</body>
|
15
|
+
</html>
|
data/config/routes.rb
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
EasyML::Engine.routes.draw do
|
2
|
+
root to: "models#index"
|
3
|
+
|
4
|
+
resources :models, as: :easy_ml_models do
|
5
|
+
member do
|
6
|
+
post :train
|
7
|
+
get :retraining_runs, to: "retraining_runs#index"
|
8
|
+
end
|
9
|
+
resources :deploys, only: [:create]
|
10
|
+
get "new", on: :collection, as: "new"
|
11
|
+
end
|
12
|
+
|
13
|
+
resources :retraining_runs, only: [:show]
|
14
|
+
|
15
|
+
# Datasources
|
16
|
+
resources :datasources, as: :easy_ml_datasources do
|
17
|
+
member do
|
18
|
+
post :sync
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
# Datasets
|
23
|
+
resources :datasets, as: :easy_ml_datasets do
|
24
|
+
member do
|
25
|
+
post :refresh
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
# Transformations
|
30
|
+
resources :transformations, only: %i[index new edit], as: :easy_ml_transformations
|
31
|
+
|
32
|
+
# Settings
|
33
|
+
resources :settings, only: [:index] do
|
34
|
+
patch :update, on: :collection
|
35
|
+
end
|
36
|
+
|
37
|
+
# Columns
|
38
|
+
resources :columns, only: [:update], as: :easy_ml_columns
|
39
|
+
end
|
data/config/spring.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
Spring.application_root = "./spec/internal"
|