easy_ml 0.1.4 → 0.2.0.pre.rc1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +234 -26
- data/Rakefile +45 -0
- data/app/controllers/easy_ml/application_controller.rb +67 -0
- data/app/controllers/easy_ml/columns_controller.rb +38 -0
- data/app/controllers/easy_ml/datasets_controller.rb +156 -0
- data/app/controllers/easy_ml/datasources_controller.rb +88 -0
- data/app/controllers/easy_ml/deploys_controller.rb +20 -0
- data/app/controllers/easy_ml/models_controller.rb +151 -0
- data/app/controllers/easy_ml/retraining_runs_controller.rb +19 -0
- data/app/controllers/easy_ml/settings_controller.rb +59 -0
- data/app/frontend/components/AlertProvider.tsx +108 -0
- data/app/frontend/components/DatasetPreview.tsx +161 -0
- data/app/frontend/components/EmptyState.tsx +28 -0
- data/app/frontend/components/ModelCard.tsx +255 -0
- data/app/frontend/components/ModelDetails.tsx +334 -0
- data/app/frontend/components/ModelForm.tsx +384 -0
- data/app/frontend/components/Navigation.tsx +300 -0
- data/app/frontend/components/Pagination.tsx +72 -0
- data/app/frontend/components/Popover.tsx +55 -0
- data/app/frontend/components/PredictionStream.tsx +105 -0
- data/app/frontend/components/ScheduleModal.tsx +726 -0
- data/app/frontend/components/SearchInput.tsx +23 -0
- data/app/frontend/components/SearchableSelect.tsx +132 -0
- data/app/frontend/components/dataset/AutosaveIndicator.tsx +39 -0
- data/app/frontend/components/dataset/ColumnConfigModal.tsx +431 -0
- data/app/frontend/components/dataset/ColumnFilters.tsx +256 -0
- data/app/frontend/components/dataset/ColumnList.tsx +101 -0
- data/app/frontend/components/dataset/FeatureConfigPopover.tsx +57 -0
- data/app/frontend/components/dataset/FeaturePicker.tsx +205 -0
- data/app/frontend/components/dataset/PreprocessingConfig.tsx +704 -0
- data/app/frontend/components/dataset/SplitConfigurator.tsx +120 -0
- data/app/frontend/components/dataset/splitters/DateSplitter.tsx +58 -0
- data/app/frontend/components/dataset/splitters/KFoldSplitter.tsx +68 -0
- data/app/frontend/components/dataset/splitters/LeavePOutSplitter.tsx +29 -0
- data/app/frontend/components/dataset/splitters/PredefinedSplitter.tsx +146 -0
- data/app/frontend/components/dataset/splitters/RandomSplitter.tsx +85 -0
- data/app/frontend/components/dataset/splitters/StratifiedSplitter.tsx +79 -0
- data/app/frontend/components/dataset/splitters/constants.ts +77 -0
- data/app/frontend/components/dataset/splitters/types.ts +168 -0
- data/app/frontend/components/dataset/splitters/utils.ts +53 -0
- data/app/frontend/components/features/CodeEditor.tsx +46 -0
- data/app/frontend/components/features/DataPreview.tsx +150 -0
- data/app/frontend/components/features/FeatureCard.tsx +88 -0
- data/app/frontend/components/features/FeatureForm.tsx +235 -0
- data/app/frontend/components/features/FeatureGroupCard.tsx +54 -0
- data/app/frontend/components/settings/PluginSettings.tsx +81 -0
- data/app/frontend/components/ui/badge.tsx +44 -0
- data/app/frontend/components/ui/collapsible.tsx +9 -0
- data/app/frontend/components/ui/scroll-area.tsx +46 -0
- data/app/frontend/components/ui/separator.tsx +29 -0
- data/app/frontend/entrypoints/App.tsx +40 -0
- data/app/frontend/entrypoints/Application.tsx +24 -0
- data/app/frontend/hooks/useAutosave.ts +61 -0
- data/app/frontend/layouts/Layout.tsx +38 -0
- data/app/frontend/lib/utils.ts +6 -0
- data/app/frontend/mockData.ts +272 -0
- data/app/frontend/pages/DatasetDetailsPage.tsx +103 -0
- data/app/frontend/pages/DatasetsPage.tsx +261 -0
- data/app/frontend/pages/DatasourceFormPage.tsx +147 -0
- data/app/frontend/pages/DatasourcesPage.tsx +261 -0
- data/app/frontend/pages/EditModelPage.tsx +45 -0
- data/app/frontend/pages/EditTransformationPage.tsx +56 -0
- data/app/frontend/pages/ModelsPage.tsx +115 -0
- data/app/frontend/pages/NewDatasetPage.tsx +366 -0
- data/app/frontend/pages/NewModelPage.tsx +45 -0
- data/app/frontend/pages/NewTransformationPage.tsx +43 -0
- data/app/frontend/pages/SettingsPage.tsx +272 -0
- data/app/frontend/pages/ShowModelPage.tsx +30 -0
- data/app/frontend/pages/TransformationsPage.tsx +95 -0
- data/app/frontend/styles/application.css +100 -0
- data/app/frontend/types/dataset.ts +146 -0
- data/app/frontend/types/datasource.ts +33 -0
- data/app/frontend/types/preprocessing.ts +1 -0
- data/app/frontend/types.ts +113 -0
- data/app/helpers/easy_ml/application_helper.rb +10 -0
- data/app/jobs/easy_ml/application_job.rb +21 -0
- data/app/jobs/easy_ml/batch_job.rb +46 -0
- data/app/jobs/easy_ml/compute_feature_job.rb +19 -0
- data/app/jobs/easy_ml/deploy_job.rb +13 -0
- data/app/jobs/easy_ml/finalize_feature_job.rb +15 -0
- data/app/jobs/easy_ml/refresh_dataset_job.rb +32 -0
- data/app/jobs/easy_ml/schedule_retraining_job.rb +11 -0
- data/app/jobs/easy_ml/sync_datasource_job.rb +17 -0
- data/app/jobs/easy_ml/training_job.rb +62 -0
- data/app/models/easy_ml/adapters/base_adapter.rb +45 -0
- data/app/models/easy_ml/adapters/polars_adapter.rb +77 -0
- data/app/models/easy_ml/cleaner.rb +82 -0
- data/app/models/easy_ml/column.rb +124 -0
- data/app/models/easy_ml/column_history.rb +30 -0
- data/app/models/easy_ml/column_list.rb +122 -0
- data/app/models/easy_ml/concerns/configurable.rb +61 -0
- data/app/models/easy_ml/concerns/versionable.rb +19 -0
- data/app/models/easy_ml/dataset.rb +767 -0
- data/app/models/easy_ml/dataset_history.rb +56 -0
- data/app/models/easy_ml/datasource.rb +182 -0
- data/app/models/easy_ml/datasource_history.rb +24 -0
- data/app/models/easy_ml/datasources/base_datasource.rb +54 -0
- data/app/models/easy_ml/datasources/file_datasource.rb +58 -0
- data/app/models/easy_ml/datasources/polars_datasource.rb +89 -0
- data/app/models/easy_ml/datasources/s3_datasource.rb +97 -0
- data/app/models/easy_ml/deploy.rb +114 -0
- data/app/models/easy_ml/event.rb +79 -0
- data/app/models/easy_ml/feature.rb +437 -0
- data/app/models/easy_ml/feature_history.rb +38 -0
- data/app/models/easy_ml/model.rb +575 -41
- data/app/models/easy_ml/model_file.rb +133 -0
- data/app/models/easy_ml/model_file_history.rb +24 -0
- data/app/models/easy_ml/model_history.rb +51 -0
- data/app/models/easy_ml/models/base_model.rb +58 -0
- data/app/models/easy_ml/models/hyperparameters/base.rb +99 -0
- data/app/models/easy_ml/models/hyperparameters/xgboost/dart.rb +82 -0
- data/app/models/easy_ml/models/hyperparameters/xgboost/gblinear.rb +82 -0
- data/app/models/easy_ml/models/hyperparameters/xgboost/gbtree.rb +97 -0
- data/app/models/easy_ml/models/hyperparameters/xgboost.rb +71 -0
- data/app/models/easy_ml/models/xgboost/evals_callback.rb +138 -0
- data/app/models/easy_ml/models/xgboost/progress_callback.rb +39 -0
- data/app/models/easy_ml/models/xgboost.rb +544 -5
- data/app/models/easy_ml/prediction.rb +44 -0
- data/app/models/easy_ml/retraining_job.rb +278 -0
- data/app/models/easy_ml/retraining_run.rb +184 -0
- data/app/models/easy_ml/settings.rb +37 -0
- data/app/models/easy_ml/splitter.rb +90 -0
- data/app/models/easy_ml/splitters/base_splitter.rb +28 -0
- data/app/models/easy_ml/splitters/date_splitter.rb +91 -0
- data/app/models/easy_ml/splitters/predefined_splitter.rb +74 -0
- data/app/models/easy_ml/splitters/random_splitter.rb +82 -0
- data/app/models/easy_ml/tuner_job.rb +56 -0
- data/app/models/easy_ml/tuner_run.rb +31 -0
- data/app/models/splitter_history.rb +6 -0
- data/app/serializers/easy_ml/column_serializer.rb +27 -0
- data/app/serializers/easy_ml/dataset_serializer.rb +73 -0
- data/app/serializers/easy_ml/datasource_serializer.rb +64 -0
- data/app/serializers/easy_ml/feature_serializer.rb +27 -0
- data/app/serializers/easy_ml/model_serializer.rb +90 -0
- data/app/serializers/easy_ml/retraining_job_serializer.rb +22 -0
- data/app/serializers/easy_ml/retraining_run_serializer.rb +39 -0
- data/app/serializers/easy_ml/settings_serializer.rb +9 -0
- data/app/views/layouts/easy_ml/application.html.erb +15 -0
- data/config/initializers/resque.rb +3 -0
- data/config/resque-pool.yml +6 -0
- data/config/routes.rb +39 -0
- data/config/spring.rb +1 -0
- data/config/vite.json +15 -0
- data/lib/easy_ml/configuration.rb +64 -0
- data/lib/easy_ml/core/evaluators/base_evaluator.rb +53 -0
- data/lib/easy_ml/core/evaluators/classification_evaluators.rb +126 -0
- data/lib/easy_ml/core/evaluators/regression_evaluators.rb +66 -0
- data/lib/easy_ml/core/model_evaluator.rb +161 -89
- data/lib/easy_ml/core/tuner/adapters/base_adapter.rb +28 -18
- data/lib/easy_ml/core/tuner/adapters/xgboost_adapter.rb +4 -25
- data/lib/easy_ml/core/tuner.rb +123 -62
- data/lib/easy_ml/core.rb +0 -3
- data/lib/easy_ml/core_ext/hash.rb +24 -0
- data/lib/easy_ml/core_ext/pathname.rb +11 -5
- data/lib/easy_ml/data/date_converter.rb +90 -0
- data/lib/easy_ml/data/filter_extensions.rb +31 -0
- data/lib/easy_ml/data/polars_column.rb +126 -0
- data/lib/easy_ml/data/polars_reader.rb +297 -0
- data/lib/easy_ml/data/preprocessor.rb +280 -142
- data/lib/easy_ml/data/simple_imputer.rb +255 -0
- data/lib/easy_ml/data/splits/file_split.rb +252 -0
- data/lib/easy_ml/data/splits/in_memory_split.rb +54 -0
- data/lib/easy_ml/data/splits/split.rb +95 -0
- data/lib/easy_ml/data/splits.rb +9 -0
- data/lib/easy_ml/data/statistics_learner.rb +93 -0
- data/lib/easy_ml/data/synced_directory.rb +341 -0
- data/lib/easy_ml/data.rb +6 -2
- data/lib/easy_ml/engine.rb +105 -6
- data/lib/easy_ml/feature_store.rb +227 -0
- data/lib/easy_ml/features.rb +61 -0
- data/lib/easy_ml/initializers/inflections.rb +17 -3
- data/lib/easy_ml/logging.rb +2 -2
- data/lib/easy_ml/predict.rb +74 -0
- data/lib/easy_ml/railtie/generators/migration/migration_generator.rb +192 -36
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_column_histories.rb.tt +9 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_columns.rb.tt +25 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_dataset_histories.rb.tt +9 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_datasets.rb.tt +31 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_datasource_histories.rb.tt +9 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_datasources.rb.tt +16 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_deploys.rb.tt +24 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_events.rb.tt +20 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_feature_histories.rb.tt +14 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_features.rb.tt +32 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_model_file_histories.rb.tt +9 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_model_files.rb.tt +17 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_model_histories.rb.tt +9 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_models.rb.tt +20 -9
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_predictions.rb.tt +17 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_retraining_jobs.rb.tt +77 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_settings.rb.tt +9 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_splitter_histories.rb.tt +9 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_splitters.rb.tt +15 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_tuner_jobs.rb.tt +40 -0
- data/lib/easy_ml/support/est.rb +5 -1
- data/lib/easy_ml/support/file_rotate.rb +79 -15
- data/lib/easy_ml/support/file_support.rb +9 -0
- data/lib/easy_ml/support/local_file.rb +24 -0
- data/lib/easy_ml/support/lockable.rb +62 -0
- data/lib/easy_ml/support/synced_file.rb +103 -0
- data/lib/easy_ml/support/utc.rb +5 -1
- data/lib/easy_ml/support.rb +6 -3
- data/lib/easy_ml/version.rb +4 -1
- data/lib/easy_ml.rb +7 -2
- metadata +355 -72
- data/app/models/easy_ml/models.rb +0 -5
- data/lib/easy_ml/core/model.rb +0 -30
- data/lib/easy_ml/core/model_core.rb +0 -181
- data/lib/easy_ml/core/models/hyperparameters/base.rb +0 -34
- data/lib/easy_ml/core/models/hyperparameters/xgboost.rb +0 -19
- data/lib/easy_ml/core/models/xgboost.rb +0 -10
- data/lib/easy_ml/core/models/xgboost_core.rb +0 -220
- data/lib/easy_ml/core/models.rb +0 -10
- data/lib/easy_ml/core/uploaders/model_uploader.rb +0 -24
- data/lib/easy_ml/core/uploaders.rb +0 -7
- data/lib/easy_ml/data/dataloader.rb +0 -6
- data/lib/easy_ml/data/dataset/data/preprocessor/statistics.json +0 -31
- data/lib/easy_ml/data/dataset/data/sample_info.json +0 -1
- data/lib/easy_ml/data/dataset/dataset/files/sample_info.json +0 -1
- data/lib/easy_ml/data/dataset/splits/file_split.rb +0 -140
- data/lib/easy_ml/data/dataset/splits/in_memory_split.rb +0 -49
- data/lib/easy_ml/data/dataset/splits/split.rb +0 -98
- data/lib/easy_ml/data/dataset/splits.rb +0 -11
- data/lib/easy_ml/data/dataset/splitters/date_splitter.rb +0 -43
- data/lib/easy_ml/data/dataset/splitters.rb +0 -9
- data/lib/easy_ml/data/dataset.rb +0 -430
- data/lib/easy_ml/data/datasource/datasource_factory.rb +0 -60
- data/lib/easy_ml/data/datasource/file_datasource.rb +0 -40
- data/lib/easy_ml/data/datasource/merged_datasource.rb +0 -64
- data/lib/easy_ml/data/datasource/polars_datasource.rb +0 -41
- data/lib/easy_ml/data/datasource/s3_datasource.rb +0 -89
- data/lib/easy_ml/data/datasource.rb +0 -33
- data/lib/easy_ml/data/preprocessor/preprocessor.rb +0 -205
- data/lib/easy_ml/data/preprocessor/simple_imputer.rb +0 -402
- data/lib/easy_ml/deployment.rb +0 -5
- data/lib/easy_ml/support/synced_directory.rb +0 -134
- data/lib/easy_ml/transforms.rb +0 -29
- /data/{lib/easy_ml/core → app/models/easy_ml}/models/hyperparameters.rb +0 -0
@@ -0,0 +1,74 @@
|
|
1
|
+
module EasyML
|
2
|
+
module Splitters
|
3
|
+
class PredefinedSplitter < BaseSplitter
|
4
|
+
validates :train_files, :test_files, :valid_files, presence: true
|
5
|
+
validate :files_must_be_unique
|
6
|
+
validate :at_least_one_file_specified
|
7
|
+
|
8
|
+
add_configuration_attributes :train_files, :test_files, :valid_files
|
9
|
+
|
10
|
+
def self.default_config
|
11
|
+
{
|
12
|
+
train_files: ["train.csv"],
|
13
|
+
test_files: ["test.csv"],
|
14
|
+
valid_files: ["valid.csv"],
|
15
|
+
}
|
16
|
+
end
|
17
|
+
|
18
|
+
def split(datasource, &block)
|
19
|
+
validate!
|
20
|
+
|
21
|
+
files = datasource.all_files
|
22
|
+
train, valid, test = match_files(files)
|
23
|
+
|
24
|
+
yield [reader.query(train), reader.query(valid), reader.query(test)]
|
25
|
+
end
|
26
|
+
|
27
|
+
def match_files(files)
|
28
|
+
train = select_preferred_files(files.select { |file| match_file(file, train_files) })
|
29
|
+
test = select_preferred_files(files.select { |file| match_file(file, test_files) })
|
30
|
+
valid = select_preferred_files(files.select { |file| match_file(file, valid_files) })
|
31
|
+
|
32
|
+
[train, valid, test]
|
33
|
+
end
|
34
|
+
|
35
|
+
def select_preferred_files(files)
|
36
|
+
# Group files by their base name (without extensions)
|
37
|
+
grouped = files.group_by { |f| Pathname.new(f).basename.to_s.gsub(/\.parquet$/, "").gsub(/\.[^.]+$/, "") }
|
38
|
+
|
39
|
+
# For each group, prefer parquet if it exists, otherwise use csv
|
40
|
+
grouped.map do |_, group_files|
|
41
|
+
parquet_file = group_files.find { |f| f.end_with?(".parquet") }
|
42
|
+
parquet_file || group_files.first
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def match_file(file, type)
|
47
|
+
base_name = Pathname.new(file).basename.to_s
|
48
|
+
# Strip both .parquet and original extension
|
49
|
+
filename = base_name.gsub(/\.parquet$/, "").gsub(/\.[^.]+$/, "")
|
50
|
+
|
51
|
+
type.map { |f| f.gsub(/\.[^.]+$/, "") }.include?(filename)
|
52
|
+
end
|
53
|
+
|
54
|
+
private
|
55
|
+
|
56
|
+
def reader
|
57
|
+
@reader ||= EasyML::Data::PolarsReader.new
|
58
|
+
end
|
59
|
+
|
60
|
+
def files_must_be_unique
|
61
|
+
all_files = train_files + test_files + valid_files
|
62
|
+
if all_files.uniq.length != all_files.length
|
63
|
+
errors.add(:base, "Files must be unique across splits")
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def at_least_one_file_specified
|
68
|
+
if train_files.empty? && test_files.empty? && valid_files.empty?
|
69
|
+
errors.add(:base, "At least one file must be specified")
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
@@ -0,0 +1,82 @@
|
|
1
|
+
# == Schema Information
|
2
|
+
#
|
3
|
+
# Table name: easy_ml_splitters
|
4
|
+
#
|
5
|
+
# id :bigint not null, primary key
|
6
|
+
# splitter_type :string not null
|
7
|
+
# configuration :json
|
8
|
+
# dataset_id :bigint not null
|
9
|
+
# created_at :datetime not null
|
10
|
+
# updated_at :datetime not null
|
11
|
+
#
|
12
|
+
require_relative "base_splitter"
|
13
|
+
|
14
|
+
module EasyML
|
15
|
+
module Splitters
|
16
|
+
class RandomSplitter < BaseSplitter
|
17
|
+
validates :train_ratio, presence: true, numericality: { greater_than: 0, less_than: 1 }
|
18
|
+
validates :valid_ratio, presence: true, numericality: { greater_than: 0, less_than: 1 }
|
19
|
+
validates :test_ratio, presence: true, numericality: { greater_than: 0, less_than: 1 }
|
20
|
+
validate :ratios_sum_to_one
|
21
|
+
|
22
|
+
attr_accessor :train_ratio, :valid_ratio, :test_ratio, :seed
|
23
|
+
|
24
|
+
add_configuration_attributes :train_ratio, :valid_ratio, :test_ratio, :seed
|
25
|
+
|
26
|
+
def self.default_config
|
27
|
+
{}
|
28
|
+
end
|
29
|
+
|
30
|
+
def split_df(df)
|
31
|
+
set_defaults
|
32
|
+
|
33
|
+
# Set random seed if provided for reproducibility
|
34
|
+
rng = seed ? Random.new(seed.to_i) : Random.new
|
35
|
+
|
36
|
+
# Get total number of rows
|
37
|
+
n_rows = df.height
|
38
|
+
|
39
|
+
# Generate a deterministic random order based on the seed
|
40
|
+
shuffled_indices = (0...n_rows).to_a.shuffle(random: rng || Random.new)
|
41
|
+
|
42
|
+
# Calculate split sizes
|
43
|
+
train_size = (n_rows * train_ratio).floor
|
44
|
+
valid_size = (n_rows * valid_ratio).floor
|
45
|
+
|
46
|
+
# Split indices
|
47
|
+
train_indices = shuffled_indices[0...train_size]
|
48
|
+
valid_indices = shuffled_indices[train_size...(train_size + valid_size)]
|
49
|
+
test_indices = shuffled_indices[(train_size + valid_size)..]
|
50
|
+
|
51
|
+
# Add an index column to filter
|
52
|
+
df_with_index = df.with_columns([
|
53
|
+
Polars.arange(0, n_rows).alias("index"),
|
54
|
+
])
|
55
|
+
|
56
|
+
# Filter rows by index for train, validation, and test sets
|
57
|
+
train_df = df_with_index.filter(Polars.col("index").is_in(train_indices)).drop("index")
|
58
|
+
valid_df = df_with_index.filter(Polars.col("index").is_in(valid_indices)).drop("index")
|
59
|
+
test_df = df_with_index.filter(Polars.col("index").is_in(test_indices)).drop("index")
|
60
|
+
|
61
|
+
[train_df, valid_df, test_df]
|
62
|
+
end
|
63
|
+
|
64
|
+
private
|
65
|
+
|
66
|
+
def set_defaults
|
67
|
+
self.train_ratio ||= 0.6
|
68
|
+
self.valid_ratio ||= 0.2
|
69
|
+
self.test_ratio ||= 0.2
|
70
|
+
end
|
71
|
+
|
72
|
+
def ratios_sum_to_one
|
73
|
+
return unless train_ratio && valid_ratio && test_ratio
|
74
|
+
|
75
|
+
sum = train_ratio + valid_ratio + test_ratio
|
76
|
+
return if (sum - 1.0).abs < 1e-10 # Using small epsilon for float comparison
|
77
|
+
|
78
|
+
errors.add(:base, "Split ratios must sum to 1.0 (got #{sum})")
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
# == Schema Information
|
2
|
+
#
|
3
|
+
# Table name: easy_ml_tuner_jobs
|
4
|
+
#
|
5
|
+
# id :bigint not null, primary key
|
6
|
+
# config :json not null
|
7
|
+
# best_tuner_run_id :bigint
|
8
|
+
# model_id :bigint not null
|
9
|
+
# status :string
|
10
|
+
# direction :string default("minimize")
|
11
|
+
# started_at :datetime
|
12
|
+
# completed_at :datetime
|
13
|
+
# metadata :jsonb
|
14
|
+
# wandb_url :string
|
15
|
+
# created_at :datetime not null
|
16
|
+
# updated_at :datetime not null
|
17
|
+
#
|
18
|
+
module EasyML
|
19
|
+
class TunerJob < ActiveRecord::Base
|
20
|
+
self.table_name = "easy_ml_tuner_jobs"
|
21
|
+
|
22
|
+
belongs_to :model
|
23
|
+
belongs_to :best_tuner_run, class_name: "EasyML::TunerRun", optional: true
|
24
|
+
has_many :tuner_runs, dependent: :destroy
|
25
|
+
|
26
|
+
validates :config, presence: true
|
27
|
+
validates :direction, inclusion: { in: %w[minimize maximize] }
|
28
|
+
|
29
|
+
enum status: {
|
30
|
+
pending: "pending",
|
31
|
+
running: "running",
|
32
|
+
success: "success",
|
33
|
+
failed: "failed",
|
34
|
+
}
|
35
|
+
|
36
|
+
def best_run
|
37
|
+
return nil if tuner_runs.empty?
|
38
|
+
|
39
|
+
tuner_runs.order(value: direction_order).first
|
40
|
+
end
|
41
|
+
|
42
|
+
def self.constants
|
43
|
+
EasyML::Model::MODEL_OPTIONS.inject({}) do |h, (key, class_name)|
|
44
|
+
h.tap do
|
45
|
+
h[key] = class_name.constantize.hyperparameter_constants
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
private
|
51
|
+
|
52
|
+
def direction_order
|
53
|
+
direction == "minimize" ? :asc : :desc
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
# == Schema Information
|
2
|
+
#
|
3
|
+
# Table name: easy_ml_tuner_runs
|
4
|
+
#
|
5
|
+
# id :bigint not null, primary key
|
6
|
+
# tuner_job_id :bigint not null
|
7
|
+
# hyperparameters :json not null
|
8
|
+
# value :float
|
9
|
+
# trial_number :integer
|
10
|
+
# status :string
|
11
|
+
# wandb_url :string
|
12
|
+
# created_at :datetime not null
|
13
|
+
# updated_at :datetime not null
|
14
|
+
#
|
15
|
+
module EasyML
|
16
|
+
class TunerRun < ActiveRecord::Base
|
17
|
+
self.table_name = "easy_ml_tuner_runs"
|
18
|
+
|
19
|
+
belongs_to :tuner_job
|
20
|
+
|
21
|
+
validates :hyperparameters, presence: true
|
22
|
+
validates :trial_number, presence: true, uniqueness: { scope: :tuner_job_id }
|
23
|
+
|
24
|
+
enum status: {
|
25
|
+
pending: "pending",
|
26
|
+
running: "running",
|
27
|
+
success: "success",
|
28
|
+
failed: "failed",
|
29
|
+
}
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# == Schema Information
|
2
|
+
#
|
3
|
+
# Table name: easy_ml_columns
|
4
|
+
#
|
5
|
+
# id :bigint not null, primary key
|
6
|
+
# dataset_id :bigint not null
|
7
|
+
# name :string not null
|
8
|
+
# description :string
|
9
|
+
# datatype :string
|
10
|
+
# polars_datatype :string
|
11
|
+
# is_target :boolean
|
12
|
+
# hidden :boolean default(FALSE)
|
13
|
+
# drop_if_null :boolean default(FALSE)
|
14
|
+
# preprocessing_steps :json
|
15
|
+
# sample_values :json
|
16
|
+
# statistics :json
|
17
|
+
# created_at :datetime not null
|
18
|
+
# updated_at :datetime not null
|
19
|
+
#
|
20
|
+
module EasyML
|
21
|
+
class ColumnSerializer
|
22
|
+
include JSONAPI::Serializer
|
23
|
+
|
24
|
+
attributes :id, :name, :description, :dataset_id, :datatype, :polars_datatype, :preprocessing_steps,
|
25
|
+
:hidden, :drop_if_null, :sample_values, :statistics, :is_target
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,73 @@
|
|
1
|
+
require_relative "./column_serializer"
|
2
|
+
|
3
|
+
# == Schema Information
|
4
|
+
#
|
5
|
+
# Table name: easy_ml_datasets
|
6
|
+
#
|
7
|
+
# id :bigint not null, primary key
|
8
|
+
# name :string not null
|
9
|
+
# description :string
|
10
|
+
# dataset_type :string
|
11
|
+
# status :string
|
12
|
+
# version :string
|
13
|
+
# datasource_id :bigint
|
14
|
+
# root_dir :string
|
15
|
+
# configuration :json
|
16
|
+
# num_rows :bigint
|
17
|
+
# workflow_status :string
|
18
|
+
# statistics :json
|
19
|
+
# preprocessor_statistics :json
|
20
|
+
# schema :json
|
21
|
+
# refreshed_at :datetime
|
22
|
+
# created_at :datetime not null
|
23
|
+
# updated_at :datetime not null
|
24
|
+
#
|
25
|
+
module EasyML
|
26
|
+
class DatasetSerializer
|
27
|
+
include JSONAPI::Serializer
|
28
|
+
|
29
|
+
attributes :id, :name, :description, :target, :num_rows, :status,
|
30
|
+
:datasource_id, :preprocessing_steps, :workflow_status, :statistics
|
31
|
+
|
32
|
+
attribute :splitter do |dataset|
|
33
|
+
dataset.splitter
|
34
|
+
end
|
35
|
+
|
36
|
+
attribute :columns do |dataset|
|
37
|
+
dataset.columns.order(:id).map do |column|
|
38
|
+
ColumnSerializer.new(column).serializable_hash.dig(:data, :attributes)
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
attribute :sample_data do |dataset|
|
43
|
+
if dataset.workflow_status.to_sym == :analyzing
|
44
|
+
nil
|
45
|
+
else
|
46
|
+
dataset.data(limit: 10, all_columns: true)&.to_hashes
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
attribute :updated_at do |dataset|
|
51
|
+
dataset.datasource&.last_updated_at
|
52
|
+
end
|
53
|
+
|
54
|
+
attribute :features do |dataset|
|
55
|
+
dataset.features.ordered.map do |feature|
|
56
|
+
FeatureSerializer.new(feature).serializable_hash.dig(:data, :attributes)
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
attribute :needs_refresh do |dataset|
|
61
|
+
dataset.needs_refresh?
|
62
|
+
end
|
63
|
+
|
64
|
+
attribute :stacktrace do |object|
|
65
|
+
if !object.failed? || object.events.empty?
|
66
|
+
nil
|
67
|
+
else
|
68
|
+
last_event = object.events.order(id: :desc).limit(1).last
|
69
|
+
last_event&.stacktrace if last_event&.status == "failed"
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
@@ -0,0 +1,64 @@
|
|
1
|
+
# == Schema Information
|
2
|
+
#
|
3
|
+
# Table name: easy_ml_datasources
|
4
|
+
#
|
5
|
+
# id :bigint not null, primary key
|
6
|
+
# name :string not null
|
7
|
+
# datasource_type :string
|
8
|
+
# root_dir :string
|
9
|
+
# configuration :json
|
10
|
+
# created_at :datetime not null
|
11
|
+
# updated_at :datetime not null
|
12
|
+
#
|
13
|
+
require "jsonapi/serializer"
|
14
|
+
|
15
|
+
module EasyML
|
16
|
+
class DatasourceSerializer
|
17
|
+
include JSONAPI::Serializer
|
18
|
+
|
19
|
+
set_type :datasource # Optional type for JSON:API
|
20
|
+
|
21
|
+
attributes :id, :name, :datasource_type, :s3_bucket, :s3_prefix, :s3_region, :schema, :columns, :available_files
|
22
|
+
|
23
|
+
attribute :last_synced_at do |datasource|
|
24
|
+
if datasource.is_syncing
|
25
|
+
"Syncing..."
|
26
|
+
else
|
27
|
+
datasource.last_updated_at ? datasource.last_updated_at.in_time_zone(EasyML::Configuration.timezone) : "Not Synced"
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
attribute :created_at do |datasource|
|
32
|
+
datasource.created_at.in_time_zone(EasyML::Configuration.timezone).iso8601
|
33
|
+
end
|
34
|
+
|
35
|
+
attribute :updated_at do |datasource|
|
36
|
+
datasource.updated_at.in_time_zone(EasyML::Configuration.timezone).iso8601
|
37
|
+
end
|
38
|
+
|
39
|
+
attribute :is_synced do |datasource|
|
40
|
+
datasource.last_updated_at.present?
|
41
|
+
end
|
42
|
+
|
43
|
+
attribute :is_syncing do |datasource|
|
44
|
+
datasource.is_syncing
|
45
|
+
end
|
46
|
+
|
47
|
+
attribute :sync_failed do |datasource|
|
48
|
+
if datasource.is_syncing
|
49
|
+
nil
|
50
|
+
else
|
51
|
+
datasource.events.order(id: :desc).limit(1)&.last&.status == "failed"
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
attribute :stacktrace do |datasource|
|
56
|
+
if datasource.is_syncing
|
57
|
+
nil
|
58
|
+
else
|
59
|
+
last_event = datasource.events.order(id: :desc).limit(1).last
|
60
|
+
last_event&.stacktrace if last_event&.status == "failed"
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# == Schema Information
|
2
|
+
#
|
3
|
+
# Table name: easy_ml_
|
4
|
+
#
|
5
|
+
# id :bigint not null, primary key
|
6
|
+
# dataset_id :bigint not null
|
7
|
+
# name :string
|
8
|
+
# feature_class :string not null
|
9
|
+
# feature_method :string not null
|
10
|
+
# feature_position :integer
|
11
|
+
# applied_at :datetime
|
12
|
+
# created_at :datetime not null
|
13
|
+
# updated_at :datetime not null
|
14
|
+
#
|
15
|
+
require "jsonapi/serializer"
|
16
|
+
|
17
|
+
module EasyML
|
18
|
+
class FeatureSerializer
|
19
|
+
include JSONAPI::Serializer
|
20
|
+
|
21
|
+
attributes :id, :feature_class, :feature_position, :name
|
22
|
+
|
23
|
+
attribute :description do |feature|
|
24
|
+
(EasyML::Features::Registry.find(feature.name) || {}).dig(:description)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,90 @@
|
|
1
|
+
# == Schema Information
|
2
|
+
#
|
3
|
+
# Table name: easy_ml_models
|
4
|
+
#
|
5
|
+
# id :bigint not null, primary key
|
6
|
+
# name :string not null
|
7
|
+
# model_type :string
|
8
|
+
# status :string
|
9
|
+
# dataset_id :bigint
|
10
|
+
# model_file_id :bigint
|
11
|
+
# configuration :json
|
12
|
+
# version :string not null
|
13
|
+
# root_dir :string
|
14
|
+
# file :json
|
15
|
+
# sha :string
|
16
|
+
# created_at :datetime not null
|
17
|
+
# updated_at :datetime not null
|
18
|
+
#
|
19
|
+
require "jsonapi/serializer"
|
20
|
+
|
21
|
+
module EasyML
|
22
|
+
class ModelSerializer
|
23
|
+
include JSONAPI::Serializer
|
24
|
+
|
25
|
+
attributes :id,
|
26
|
+
:name,
|
27
|
+
:model_type,
|
28
|
+
:task,
|
29
|
+
:objective,
|
30
|
+
:metrics,
|
31
|
+
:dataset_id,
|
32
|
+
:status,
|
33
|
+
:deployment_status,
|
34
|
+
:configuration,
|
35
|
+
:created_at,
|
36
|
+
:updated_at,
|
37
|
+
:last_run_at
|
38
|
+
|
39
|
+
attribute :is_training do |object|
|
40
|
+
object.training?
|
41
|
+
end
|
42
|
+
|
43
|
+
attribute :last_run do |object|
|
44
|
+
RetrainingRunSerializer.new(object.last_run).serializable_hash.dig(:data, :attributes)
|
45
|
+
end
|
46
|
+
|
47
|
+
attribute :metrics_url do |object|
|
48
|
+
object.last_run&.wandb_url
|
49
|
+
end
|
50
|
+
|
51
|
+
attribute :retraining_runs do |object, params|
|
52
|
+
limit = params[:limit] || 20
|
53
|
+
offset = params[:offset] || 0
|
54
|
+
|
55
|
+
runs = object.retraining_runs
|
56
|
+
.order(created_at: :desc)
|
57
|
+
.offset(offset)
|
58
|
+
.limit(limit)
|
59
|
+
|
60
|
+
{
|
61
|
+
runs: RetrainingRunSerializer.new(runs).serializable_hash[:data].map { |run| run[:attributes] },
|
62
|
+
total_count: object.retraining_runs.count,
|
63
|
+
limit: limit,
|
64
|
+
offset: offset,
|
65
|
+
next_offset: offset + limit,
|
66
|
+
prev_offset: offset - limit,
|
67
|
+
}
|
68
|
+
end
|
69
|
+
|
70
|
+
attribute :version do |object|
|
71
|
+
object.formatted_version
|
72
|
+
end
|
73
|
+
|
74
|
+
attribute :formatted_model_type do |object|
|
75
|
+
object.formatted_model_type
|
76
|
+
end
|
77
|
+
|
78
|
+
attribute :formatted_frequency do |object|
|
79
|
+
object.retraining_job.present? ? object.retraining_job.formatted_frequency : nil
|
80
|
+
end
|
81
|
+
|
82
|
+
attribute :dataset do |object|
|
83
|
+
DatasetSerializer.new(object.dataset).serializable_hash.dig(:data, :attributes)
|
84
|
+
end
|
85
|
+
|
86
|
+
attribute :retraining_job do |object|
|
87
|
+
RetrainingJobSerializer.new(object.retraining_job).serializable_hash.dig(:data, :attributes)
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require "jsonapi/serializer"
|
2
|
+
|
3
|
+
module EasyML
|
4
|
+
class RetrainingJobSerializer
|
5
|
+
include JSONAPI::Serializer
|
6
|
+
|
7
|
+
attributes :id,
|
8
|
+
:active,
|
9
|
+
:frequency,
|
10
|
+
:formatted_frequency,
|
11
|
+
:tuning_frequency,
|
12
|
+
:at,
|
13
|
+
:metric,
|
14
|
+
:threshold,
|
15
|
+
:tuner_config,
|
16
|
+
:batch_mode,
|
17
|
+
:batch_size,
|
18
|
+
:batch_overlap,
|
19
|
+
:batch_key,
|
20
|
+
:tuning_enabled
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
require "jsonapi/serializer"
|
2
|
+
|
3
|
+
module EasyML
|
4
|
+
class RetrainingRunSerializer
|
5
|
+
include JSONAPI::Serializer
|
6
|
+
|
7
|
+
attributes :id,
|
8
|
+
:deployable,
|
9
|
+
:metrics,
|
10
|
+
:metric_value,
|
11
|
+
:threshold,
|
12
|
+
:threshold_direction,
|
13
|
+
:status,
|
14
|
+
:error_message,
|
15
|
+
:is_deploying,
|
16
|
+
:deployed
|
17
|
+
|
18
|
+
attribute :metrics_url do |run|
|
19
|
+
run.wandb_url
|
20
|
+
end
|
21
|
+
|
22
|
+
attribute :started_at do |run|
|
23
|
+
run.started_at&.in_time_zone(EasyML::Configuration.timezone)
|
24
|
+
end
|
25
|
+
|
26
|
+
attribute :completed_at do |run|
|
27
|
+
run.completed_at&.in_time_zone(EasyML::Configuration.timezone)
|
28
|
+
end
|
29
|
+
|
30
|
+
attribute :stacktrace do |object|
|
31
|
+
if object.status.to_s == "running"
|
32
|
+
nil
|
33
|
+
else
|
34
|
+
last_event = object.events.order(id: :desc).limit(1).last
|
35
|
+
last_event&.stacktrace if last_event&.status.to_s == "failed"
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
<!DOCTYPE html>
|
2
|
+
<html>
|
3
|
+
<head>
|
4
|
+
<title>EasyML</title>
|
5
|
+
<%= csrf_meta_tags %>
|
6
|
+
<%= csp_meta_tag %>
|
7
|
+
|
8
|
+
<%= vite_client_tag %>
|
9
|
+
<%= vite_react_refresh_tag %>
|
10
|
+
<%= vite_typescript_tag 'Application.tsx' %>
|
11
|
+
</head>
|
12
|
+
<body>
|
13
|
+
<%= yield %>
|
14
|
+
</body>
|
15
|
+
</html>
|
data/config/routes.rb
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
EasyML::Engine.routes.draw do
|
2
|
+
root to: "models#index"
|
3
|
+
|
4
|
+
resources :models, as: :easy_ml_models do
|
5
|
+
member do
|
6
|
+
post :train
|
7
|
+
get :retraining_runs, to: "retraining_runs#index"
|
8
|
+
end
|
9
|
+
resources :deploys, only: [:create]
|
10
|
+
get "new", on: :collection, as: "new"
|
11
|
+
end
|
12
|
+
|
13
|
+
resources :retraining_runs, only: [:show]
|
14
|
+
|
15
|
+
# Datasources
|
16
|
+
resources :datasources, as: :easy_ml_datasources do
|
17
|
+
member do
|
18
|
+
post :sync
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
# Datasets
|
23
|
+
resources :datasets, as: :easy_ml_datasets do
|
24
|
+
member do
|
25
|
+
post :refresh
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
# Transformations
|
30
|
+
resources :transformations, only: %i[index new edit], as: :easy_ml_transformations
|
31
|
+
|
32
|
+
# Settings
|
33
|
+
resources :settings, only: [:index] do
|
34
|
+
patch :update, on: :collection
|
35
|
+
end
|
36
|
+
|
37
|
+
# Columns
|
38
|
+
resources :columns, only: [:update], as: :easy_ml_columns
|
39
|
+
end
|
data/config/spring.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
Spring.application_root = "./spec/internal"
|