easy_ml 0.1.4 → 0.2.0.pre.rc1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (239) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +234 -26
  3. data/Rakefile +45 -0
  4. data/app/controllers/easy_ml/application_controller.rb +67 -0
  5. data/app/controllers/easy_ml/columns_controller.rb +38 -0
  6. data/app/controllers/easy_ml/datasets_controller.rb +156 -0
  7. data/app/controllers/easy_ml/datasources_controller.rb +88 -0
  8. data/app/controllers/easy_ml/deploys_controller.rb +20 -0
  9. data/app/controllers/easy_ml/models_controller.rb +151 -0
  10. data/app/controllers/easy_ml/retraining_runs_controller.rb +19 -0
  11. data/app/controllers/easy_ml/settings_controller.rb +59 -0
  12. data/app/frontend/components/AlertProvider.tsx +108 -0
  13. data/app/frontend/components/DatasetPreview.tsx +161 -0
  14. data/app/frontend/components/EmptyState.tsx +28 -0
  15. data/app/frontend/components/ModelCard.tsx +255 -0
  16. data/app/frontend/components/ModelDetails.tsx +334 -0
  17. data/app/frontend/components/ModelForm.tsx +384 -0
  18. data/app/frontend/components/Navigation.tsx +300 -0
  19. data/app/frontend/components/Pagination.tsx +72 -0
  20. data/app/frontend/components/Popover.tsx +55 -0
  21. data/app/frontend/components/PredictionStream.tsx +105 -0
  22. data/app/frontend/components/ScheduleModal.tsx +726 -0
  23. data/app/frontend/components/SearchInput.tsx +23 -0
  24. data/app/frontend/components/SearchableSelect.tsx +132 -0
  25. data/app/frontend/components/dataset/AutosaveIndicator.tsx +39 -0
  26. data/app/frontend/components/dataset/ColumnConfigModal.tsx +431 -0
  27. data/app/frontend/components/dataset/ColumnFilters.tsx +256 -0
  28. data/app/frontend/components/dataset/ColumnList.tsx +101 -0
  29. data/app/frontend/components/dataset/FeatureConfigPopover.tsx +57 -0
  30. data/app/frontend/components/dataset/FeaturePicker.tsx +205 -0
  31. data/app/frontend/components/dataset/PreprocessingConfig.tsx +704 -0
  32. data/app/frontend/components/dataset/SplitConfigurator.tsx +120 -0
  33. data/app/frontend/components/dataset/splitters/DateSplitter.tsx +58 -0
  34. data/app/frontend/components/dataset/splitters/KFoldSplitter.tsx +68 -0
  35. data/app/frontend/components/dataset/splitters/LeavePOutSplitter.tsx +29 -0
  36. data/app/frontend/components/dataset/splitters/PredefinedSplitter.tsx +146 -0
  37. data/app/frontend/components/dataset/splitters/RandomSplitter.tsx +85 -0
  38. data/app/frontend/components/dataset/splitters/StratifiedSplitter.tsx +79 -0
  39. data/app/frontend/components/dataset/splitters/constants.ts +77 -0
  40. data/app/frontend/components/dataset/splitters/types.ts +168 -0
  41. data/app/frontend/components/dataset/splitters/utils.ts +53 -0
  42. data/app/frontend/components/features/CodeEditor.tsx +46 -0
  43. data/app/frontend/components/features/DataPreview.tsx +150 -0
  44. data/app/frontend/components/features/FeatureCard.tsx +88 -0
  45. data/app/frontend/components/features/FeatureForm.tsx +235 -0
  46. data/app/frontend/components/features/FeatureGroupCard.tsx +54 -0
  47. data/app/frontend/components/settings/PluginSettings.tsx +81 -0
  48. data/app/frontend/components/ui/badge.tsx +44 -0
  49. data/app/frontend/components/ui/collapsible.tsx +9 -0
  50. data/app/frontend/components/ui/scroll-area.tsx +46 -0
  51. data/app/frontend/components/ui/separator.tsx +29 -0
  52. data/app/frontend/entrypoints/App.tsx +40 -0
  53. data/app/frontend/entrypoints/Application.tsx +24 -0
  54. data/app/frontend/hooks/useAutosave.ts +61 -0
  55. data/app/frontend/layouts/Layout.tsx +38 -0
  56. data/app/frontend/lib/utils.ts +6 -0
  57. data/app/frontend/mockData.ts +272 -0
  58. data/app/frontend/pages/DatasetDetailsPage.tsx +103 -0
  59. data/app/frontend/pages/DatasetsPage.tsx +261 -0
  60. data/app/frontend/pages/DatasourceFormPage.tsx +147 -0
  61. data/app/frontend/pages/DatasourcesPage.tsx +261 -0
  62. data/app/frontend/pages/EditModelPage.tsx +45 -0
  63. data/app/frontend/pages/EditTransformationPage.tsx +56 -0
  64. data/app/frontend/pages/ModelsPage.tsx +115 -0
  65. data/app/frontend/pages/NewDatasetPage.tsx +366 -0
  66. data/app/frontend/pages/NewModelPage.tsx +45 -0
  67. data/app/frontend/pages/NewTransformationPage.tsx +43 -0
  68. data/app/frontend/pages/SettingsPage.tsx +272 -0
  69. data/app/frontend/pages/ShowModelPage.tsx +30 -0
  70. data/app/frontend/pages/TransformationsPage.tsx +95 -0
  71. data/app/frontend/styles/application.css +100 -0
  72. data/app/frontend/types/dataset.ts +146 -0
  73. data/app/frontend/types/datasource.ts +33 -0
  74. data/app/frontend/types/preprocessing.ts +1 -0
  75. data/app/frontend/types.ts +113 -0
  76. data/app/helpers/easy_ml/application_helper.rb +10 -0
  77. data/app/jobs/easy_ml/application_job.rb +21 -0
  78. data/app/jobs/easy_ml/batch_job.rb +46 -0
  79. data/app/jobs/easy_ml/compute_feature_job.rb +19 -0
  80. data/app/jobs/easy_ml/deploy_job.rb +13 -0
  81. data/app/jobs/easy_ml/finalize_feature_job.rb +15 -0
  82. data/app/jobs/easy_ml/refresh_dataset_job.rb +32 -0
  83. data/app/jobs/easy_ml/schedule_retraining_job.rb +11 -0
  84. data/app/jobs/easy_ml/sync_datasource_job.rb +17 -0
  85. data/app/jobs/easy_ml/training_job.rb +62 -0
  86. data/app/models/easy_ml/adapters/base_adapter.rb +45 -0
  87. data/app/models/easy_ml/adapters/polars_adapter.rb +77 -0
  88. data/app/models/easy_ml/cleaner.rb +82 -0
  89. data/app/models/easy_ml/column.rb +124 -0
  90. data/app/models/easy_ml/column_history.rb +30 -0
  91. data/app/models/easy_ml/column_list.rb +122 -0
  92. data/app/models/easy_ml/concerns/configurable.rb +61 -0
  93. data/app/models/easy_ml/concerns/versionable.rb +19 -0
  94. data/app/models/easy_ml/dataset.rb +767 -0
  95. data/app/models/easy_ml/dataset_history.rb +56 -0
  96. data/app/models/easy_ml/datasource.rb +182 -0
  97. data/app/models/easy_ml/datasource_history.rb +24 -0
  98. data/app/models/easy_ml/datasources/base_datasource.rb +54 -0
  99. data/app/models/easy_ml/datasources/file_datasource.rb +58 -0
  100. data/app/models/easy_ml/datasources/polars_datasource.rb +89 -0
  101. data/app/models/easy_ml/datasources/s3_datasource.rb +97 -0
  102. data/app/models/easy_ml/deploy.rb +114 -0
  103. data/app/models/easy_ml/event.rb +79 -0
  104. data/app/models/easy_ml/feature.rb +437 -0
  105. data/app/models/easy_ml/feature_history.rb +38 -0
  106. data/app/models/easy_ml/model.rb +575 -41
  107. data/app/models/easy_ml/model_file.rb +133 -0
  108. data/app/models/easy_ml/model_file_history.rb +24 -0
  109. data/app/models/easy_ml/model_history.rb +51 -0
  110. data/app/models/easy_ml/models/base_model.rb +58 -0
  111. data/app/models/easy_ml/models/hyperparameters/base.rb +99 -0
  112. data/app/models/easy_ml/models/hyperparameters/xgboost/dart.rb +82 -0
  113. data/app/models/easy_ml/models/hyperparameters/xgboost/gblinear.rb +82 -0
  114. data/app/models/easy_ml/models/hyperparameters/xgboost/gbtree.rb +97 -0
  115. data/app/models/easy_ml/models/hyperparameters/xgboost.rb +71 -0
  116. data/app/models/easy_ml/models/xgboost/evals_callback.rb +138 -0
  117. data/app/models/easy_ml/models/xgboost/progress_callback.rb +39 -0
  118. data/app/models/easy_ml/models/xgboost.rb +544 -5
  119. data/app/models/easy_ml/prediction.rb +44 -0
  120. data/app/models/easy_ml/retraining_job.rb +278 -0
  121. data/app/models/easy_ml/retraining_run.rb +184 -0
  122. data/app/models/easy_ml/settings.rb +37 -0
  123. data/app/models/easy_ml/splitter.rb +90 -0
  124. data/app/models/easy_ml/splitters/base_splitter.rb +28 -0
  125. data/app/models/easy_ml/splitters/date_splitter.rb +91 -0
  126. data/app/models/easy_ml/splitters/predefined_splitter.rb +74 -0
  127. data/app/models/easy_ml/splitters/random_splitter.rb +82 -0
  128. data/app/models/easy_ml/tuner_job.rb +56 -0
  129. data/app/models/easy_ml/tuner_run.rb +31 -0
  130. data/app/models/splitter_history.rb +6 -0
  131. data/app/serializers/easy_ml/column_serializer.rb +27 -0
  132. data/app/serializers/easy_ml/dataset_serializer.rb +73 -0
  133. data/app/serializers/easy_ml/datasource_serializer.rb +64 -0
  134. data/app/serializers/easy_ml/feature_serializer.rb +27 -0
  135. data/app/serializers/easy_ml/model_serializer.rb +90 -0
  136. data/app/serializers/easy_ml/retraining_job_serializer.rb +22 -0
  137. data/app/serializers/easy_ml/retraining_run_serializer.rb +39 -0
  138. data/app/serializers/easy_ml/settings_serializer.rb +9 -0
  139. data/app/views/layouts/easy_ml/application.html.erb +15 -0
  140. data/config/initializers/resque.rb +3 -0
  141. data/config/resque-pool.yml +6 -0
  142. data/config/routes.rb +39 -0
  143. data/config/spring.rb +1 -0
  144. data/config/vite.json +15 -0
  145. data/lib/easy_ml/configuration.rb +64 -0
  146. data/lib/easy_ml/core/evaluators/base_evaluator.rb +53 -0
  147. data/lib/easy_ml/core/evaluators/classification_evaluators.rb +126 -0
  148. data/lib/easy_ml/core/evaluators/regression_evaluators.rb +66 -0
  149. data/lib/easy_ml/core/model_evaluator.rb +161 -89
  150. data/lib/easy_ml/core/tuner/adapters/base_adapter.rb +28 -18
  151. data/lib/easy_ml/core/tuner/adapters/xgboost_adapter.rb +4 -25
  152. data/lib/easy_ml/core/tuner.rb +123 -62
  153. data/lib/easy_ml/core.rb +0 -3
  154. data/lib/easy_ml/core_ext/hash.rb +24 -0
  155. data/lib/easy_ml/core_ext/pathname.rb +11 -5
  156. data/lib/easy_ml/data/date_converter.rb +90 -0
  157. data/lib/easy_ml/data/filter_extensions.rb +31 -0
  158. data/lib/easy_ml/data/polars_column.rb +126 -0
  159. data/lib/easy_ml/data/polars_reader.rb +297 -0
  160. data/lib/easy_ml/data/preprocessor.rb +280 -142
  161. data/lib/easy_ml/data/simple_imputer.rb +255 -0
  162. data/lib/easy_ml/data/splits/file_split.rb +252 -0
  163. data/lib/easy_ml/data/splits/in_memory_split.rb +54 -0
  164. data/lib/easy_ml/data/splits/split.rb +95 -0
  165. data/lib/easy_ml/data/splits.rb +9 -0
  166. data/lib/easy_ml/data/statistics_learner.rb +93 -0
  167. data/lib/easy_ml/data/synced_directory.rb +341 -0
  168. data/lib/easy_ml/data.rb +6 -2
  169. data/lib/easy_ml/engine.rb +105 -6
  170. data/lib/easy_ml/feature_store.rb +227 -0
  171. data/lib/easy_ml/features.rb +61 -0
  172. data/lib/easy_ml/initializers/inflections.rb +17 -3
  173. data/lib/easy_ml/logging.rb +2 -2
  174. data/lib/easy_ml/predict.rb +74 -0
  175. data/lib/easy_ml/railtie/generators/migration/migration_generator.rb +192 -36
  176. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_column_histories.rb.tt +9 -0
  177. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_columns.rb.tt +25 -0
  178. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_dataset_histories.rb.tt +9 -0
  179. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_datasets.rb.tt +31 -0
  180. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_datasource_histories.rb.tt +9 -0
  181. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_datasources.rb.tt +16 -0
  182. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_deploys.rb.tt +24 -0
  183. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_events.rb.tt +20 -0
  184. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_feature_histories.rb.tt +14 -0
  185. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_features.rb.tt +32 -0
  186. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_model_file_histories.rb.tt +9 -0
  187. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_model_files.rb.tt +17 -0
  188. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_model_histories.rb.tt +9 -0
  189. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_models.rb.tt +20 -9
  190. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_predictions.rb.tt +17 -0
  191. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_retraining_jobs.rb.tt +77 -0
  192. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_settings.rb.tt +9 -0
  193. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_splitter_histories.rb.tt +9 -0
  194. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_splitters.rb.tt +15 -0
  195. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_tuner_jobs.rb.tt +40 -0
  196. data/lib/easy_ml/support/est.rb +5 -1
  197. data/lib/easy_ml/support/file_rotate.rb +79 -15
  198. data/lib/easy_ml/support/file_support.rb +9 -0
  199. data/lib/easy_ml/support/local_file.rb +24 -0
  200. data/lib/easy_ml/support/lockable.rb +62 -0
  201. data/lib/easy_ml/support/synced_file.rb +103 -0
  202. data/lib/easy_ml/support/utc.rb +5 -1
  203. data/lib/easy_ml/support.rb +6 -3
  204. data/lib/easy_ml/version.rb +4 -1
  205. data/lib/easy_ml.rb +7 -2
  206. metadata +355 -72
  207. data/app/models/easy_ml/models.rb +0 -5
  208. data/lib/easy_ml/core/model.rb +0 -30
  209. data/lib/easy_ml/core/model_core.rb +0 -181
  210. data/lib/easy_ml/core/models/hyperparameters/base.rb +0 -34
  211. data/lib/easy_ml/core/models/hyperparameters/xgboost.rb +0 -19
  212. data/lib/easy_ml/core/models/xgboost.rb +0 -10
  213. data/lib/easy_ml/core/models/xgboost_core.rb +0 -220
  214. data/lib/easy_ml/core/models.rb +0 -10
  215. data/lib/easy_ml/core/uploaders/model_uploader.rb +0 -24
  216. data/lib/easy_ml/core/uploaders.rb +0 -7
  217. data/lib/easy_ml/data/dataloader.rb +0 -6
  218. data/lib/easy_ml/data/dataset/data/preprocessor/statistics.json +0 -31
  219. data/lib/easy_ml/data/dataset/data/sample_info.json +0 -1
  220. data/lib/easy_ml/data/dataset/dataset/files/sample_info.json +0 -1
  221. data/lib/easy_ml/data/dataset/splits/file_split.rb +0 -140
  222. data/lib/easy_ml/data/dataset/splits/in_memory_split.rb +0 -49
  223. data/lib/easy_ml/data/dataset/splits/split.rb +0 -98
  224. data/lib/easy_ml/data/dataset/splits.rb +0 -11
  225. data/lib/easy_ml/data/dataset/splitters/date_splitter.rb +0 -43
  226. data/lib/easy_ml/data/dataset/splitters.rb +0 -9
  227. data/lib/easy_ml/data/dataset.rb +0 -430
  228. data/lib/easy_ml/data/datasource/datasource_factory.rb +0 -60
  229. data/lib/easy_ml/data/datasource/file_datasource.rb +0 -40
  230. data/lib/easy_ml/data/datasource/merged_datasource.rb +0 -64
  231. data/lib/easy_ml/data/datasource/polars_datasource.rb +0 -41
  232. data/lib/easy_ml/data/datasource/s3_datasource.rb +0 -89
  233. data/lib/easy_ml/data/datasource.rb +0 -33
  234. data/lib/easy_ml/data/preprocessor/preprocessor.rb +0 -205
  235. data/lib/easy_ml/data/preprocessor/simple_imputer.rb +0 -402
  236. data/lib/easy_ml/deployment.rb +0 -5
  237. data/lib/easy_ml/support/synced_directory.rb +0 -134
  238. data/lib/easy_ml/transforms.rb +0 -29
  239. /data/{lib/easy_ml/core → app/models/easy_ml}/models/hyperparameters.rb +0 -0
@@ -0,0 +1,74 @@
1
+ module EasyML
2
+ module Splitters
3
+ class PredefinedSplitter < BaseSplitter
4
+ validates :train_files, :test_files, :valid_files, presence: true
5
+ validate :files_must_be_unique
6
+ validate :at_least_one_file_specified
7
+
8
+ add_configuration_attributes :train_files, :test_files, :valid_files
9
+
10
+ def self.default_config
11
+ {
12
+ train_files: ["train.csv"],
13
+ test_files: ["test.csv"],
14
+ valid_files: ["valid.csv"],
15
+ }
16
+ end
17
+
18
+ def split(datasource, &block)
19
+ validate!
20
+
21
+ files = datasource.all_files
22
+ train, valid, test = match_files(files)
23
+
24
+ yield [reader.query(train), reader.query(valid), reader.query(test)]
25
+ end
26
+
27
+ def match_files(files)
28
+ train = select_preferred_files(files.select { |file| match_file(file, train_files) })
29
+ test = select_preferred_files(files.select { |file| match_file(file, test_files) })
30
+ valid = select_preferred_files(files.select { |file| match_file(file, valid_files) })
31
+
32
+ [train, valid, test]
33
+ end
34
+
35
+ def select_preferred_files(files)
36
+ # Group files by their base name (without extensions)
37
+ grouped = files.group_by { |f| Pathname.new(f).basename.to_s.gsub(/\.parquet$/, "").gsub(/\.[^.]+$/, "") }
38
+
39
+ # For each group, prefer parquet if it exists, otherwise use csv
40
+ grouped.map do |_, group_files|
41
+ parquet_file = group_files.find { |f| f.end_with?(".parquet") }
42
+ parquet_file || group_files.first
43
+ end
44
+ end
45
+
46
+ def match_file(file, type)
47
+ base_name = Pathname.new(file).basename.to_s
48
+ # Strip both .parquet and original extension
49
+ filename = base_name.gsub(/\.parquet$/, "").gsub(/\.[^.]+$/, "")
50
+
51
+ type.map { |f| f.gsub(/\.[^.]+$/, "") }.include?(filename)
52
+ end
53
+
54
+ private
55
+
56
+ def reader
57
+ @reader ||= EasyML::Data::PolarsReader.new
58
+ end
59
+
60
+ def files_must_be_unique
61
+ all_files = train_files + test_files + valid_files
62
+ if all_files.uniq.length != all_files.length
63
+ errors.add(:base, "Files must be unique across splits")
64
+ end
65
+ end
66
+
67
+ def at_least_one_file_specified
68
+ if train_files.empty? && test_files.empty? && valid_files.empty?
69
+ errors.add(:base, "At least one file must be specified")
70
+ end
71
+ end
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,82 @@
1
+ # == Schema Information
2
+ #
3
+ # Table name: easy_ml_splitters
4
+ #
5
+ # id :bigint not null, primary key
6
+ # splitter_type :string not null
7
+ # configuration :json
8
+ # dataset_id :bigint not null
9
+ # created_at :datetime not null
10
+ # updated_at :datetime not null
11
+ #
12
+ require_relative "base_splitter"
13
+
14
+ module EasyML
15
+ module Splitters
16
+ class RandomSplitter < BaseSplitter
17
+ validates :train_ratio, presence: true, numericality: { greater_than: 0, less_than: 1 }
18
+ validates :valid_ratio, presence: true, numericality: { greater_than: 0, less_than: 1 }
19
+ validates :test_ratio, presence: true, numericality: { greater_than: 0, less_than: 1 }
20
+ validate :ratios_sum_to_one
21
+
22
+ attr_accessor :train_ratio, :valid_ratio, :test_ratio, :seed
23
+
24
+ add_configuration_attributes :train_ratio, :valid_ratio, :test_ratio, :seed
25
+
26
+ def self.default_config
27
+ {}
28
+ end
29
+
30
+ def split_df(df)
31
+ set_defaults
32
+
33
+ # Set random seed if provided for reproducibility
34
+ rng = seed ? Random.new(seed.to_i) : Random.new
35
+
36
+ # Get total number of rows
37
+ n_rows = df.height
38
+
39
+ # Generate a deterministic random order based on the seed
40
+ shuffled_indices = (0...n_rows).to_a.shuffle(random: rng || Random.new)
41
+
42
+ # Calculate split sizes
43
+ train_size = (n_rows * train_ratio).floor
44
+ valid_size = (n_rows * valid_ratio).floor
45
+
46
+ # Split indices
47
+ train_indices = shuffled_indices[0...train_size]
48
+ valid_indices = shuffled_indices[train_size...(train_size + valid_size)]
49
+ test_indices = shuffled_indices[(train_size + valid_size)..]
50
+
51
+ # Add an index column to filter
52
+ df_with_index = df.with_columns([
53
+ Polars.arange(0, n_rows).alias("index"),
54
+ ])
55
+
56
+ # Filter rows by index for train, validation, and test sets
57
+ train_df = df_with_index.filter(Polars.col("index").is_in(train_indices)).drop("index")
58
+ valid_df = df_with_index.filter(Polars.col("index").is_in(valid_indices)).drop("index")
59
+ test_df = df_with_index.filter(Polars.col("index").is_in(test_indices)).drop("index")
60
+
61
+ [train_df, valid_df, test_df]
62
+ end
63
+
64
+ private
65
+
66
+ def set_defaults
67
+ self.train_ratio ||= 0.6
68
+ self.valid_ratio ||= 0.2
69
+ self.test_ratio ||= 0.2
70
+ end
71
+
72
+ def ratios_sum_to_one
73
+ return unless train_ratio && valid_ratio && test_ratio
74
+
75
+ sum = train_ratio + valid_ratio + test_ratio
76
+ return if (sum - 1.0).abs < 1e-10 # Using small epsilon for float comparison
77
+
78
+ errors.add(:base, "Split ratios must sum to 1.0 (got #{sum})")
79
+ end
80
+ end
81
+ end
82
+ end
@@ -0,0 +1,56 @@
1
+ # == Schema Information
2
+ #
3
+ # Table name: easy_ml_tuner_jobs
4
+ #
5
+ # id :bigint not null, primary key
6
+ # config :json not null
7
+ # best_tuner_run_id :bigint
8
+ # model_id :bigint not null
9
+ # status :string
10
+ # direction :string default("minimize")
11
+ # started_at :datetime
12
+ # completed_at :datetime
13
+ # metadata :jsonb
14
+ # wandb_url :string
15
+ # created_at :datetime not null
16
+ # updated_at :datetime not null
17
+ #
18
+ module EasyML
19
+ class TunerJob < ActiveRecord::Base
20
+ self.table_name = "easy_ml_tuner_jobs"
21
+
22
+ belongs_to :model
23
+ belongs_to :best_tuner_run, class_name: "EasyML::TunerRun", optional: true
24
+ has_many :tuner_runs, dependent: :destroy
25
+
26
+ validates :config, presence: true
27
+ validates :direction, inclusion: { in: %w[minimize maximize] }
28
+
29
+ enum status: {
30
+ pending: "pending",
31
+ running: "running",
32
+ success: "success",
33
+ failed: "failed",
34
+ }
35
+
36
+ def best_run
37
+ return nil if tuner_runs.empty?
38
+
39
+ tuner_runs.order(value: direction_order).first
40
+ end
41
+
42
+ def self.constants
43
+ EasyML::Model::MODEL_OPTIONS.inject({}) do |h, (key, class_name)|
44
+ h.tap do
45
+ h[key] = class_name.constantize.hyperparameter_constants
46
+ end
47
+ end
48
+ end
49
+
50
+ private
51
+
52
+ def direction_order
53
+ direction == "minimize" ? :asc : :desc
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,31 @@
1
+ # == Schema Information
2
+ #
3
+ # Table name: easy_ml_tuner_runs
4
+ #
5
+ # id :bigint not null, primary key
6
+ # tuner_job_id :bigint not null
7
+ # hyperparameters :json not null
8
+ # value :float
9
+ # trial_number :integer
10
+ # status :string
11
+ # wandb_url :string
12
+ # created_at :datetime not null
13
+ # updated_at :datetime not null
14
+ #
15
+ module EasyML
16
+ class TunerRun < ActiveRecord::Base
17
+ self.table_name = "easy_ml_tuner_runs"
18
+
19
+ belongs_to :tuner_job
20
+
21
+ validates :hyperparameters, presence: true
22
+ validates :trial_number, presence: true, uniqueness: { scope: :tuner_job_id }
23
+
24
+ enum status: {
25
+ pending: "pending",
26
+ running: "running",
27
+ success: "success",
28
+ failed: "failed",
29
+ }
30
+ end
31
+ end
@@ -0,0 +1,6 @@
1
+ module EasyML
2
+ class SplitterHistory < ActiveRecord::Base
3
+ self.table_name = "easy_ml_splitter_histories"
4
+ include Historiographer::History
5
+ end
6
+ end
@@ -0,0 +1,27 @@
1
+ # == Schema Information
2
+ #
3
+ # Table name: easy_ml_columns
4
+ #
5
+ # id :bigint not null, primary key
6
+ # dataset_id :bigint not null
7
+ # name :string not null
8
+ # description :string
9
+ # datatype :string
10
+ # polars_datatype :string
11
+ # is_target :boolean
12
+ # hidden :boolean default(FALSE)
13
+ # drop_if_null :boolean default(FALSE)
14
+ # preprocessing_steps :json
15
+ # sample_values :json
16
+ # statistics :json
17
+ # created_at :datetime not null
18
+ # updated_at :datetime not null
19
+ #
20
+ module EasyML
21
+ class ColumnSerializer
22
+ include JSONAPI::Serializer
23
+
24
+ attributes :id, :name, :description, :dataset_id, :datatype, :polars_datatype, :preprocessing_steps,
25
+ :hidden, :drop_if_null, :sample_values, :statistics, :is_target
26
+ end
27
+ end
@@ -0,0 +1,73 @@
1
+ require_relative "./column_serializer"
2
+
3
+ # == Schema Information
4
+ #
5
+ # Table name: easy_ml_datasets
6
+ #
7
+ # id :bigint not null, primary key
8
+ # name :string not null
9
+ # description :string
10
+ # dataset_type :string
11
+ # status :string
12
+ # version :string
13
+ # datasource_id :bigint
14
+ # root_dir :string
15
+ # configuration :json
16
+ # num_rows :bigint
17
+ # workflow_status :string
18
+ # statistics :json
19
+ # preprocessor_statistics :json
20
+ # schema :json
21
+ # refreshed_at :datetime
22
+ # created_at :datetime not null
23
+ # updated_at :datetime not null
24
+ #
25
+ module EasyML
26
+ class DatasetSerializer
27
+ include JSONAPI::Serializer
28
+
29
+ attributes :id, :name, :description, :target, :num_rows, :status,
30
+ :datasource_id, :preprocessing_steps, :workflow_status, :statistics
31
+
32
+ attribute :splitter do |dataset|
33
+ dataset.splitter
34
+ end
35
+
36
+ attribute :columns do |dataset|
37
+ dataset.columns.order(:id).map do |column|
38
+ ColumnSerializer.new(column).serializable_hash.dig(:data, :attributes)
39
+ end
40
+ end
41
+
42
+ attribute :sample_data do |dataset|
43
+ if dataset.workflow_status.to_sym == :analyzing
44
+ nil
45
+ else
46
+ dataset.data(limit: 10, all_columns: true)&.to_hashes
47
+ end
48
+ end
49
+
50
+ attribute :updated_at do |dataset|
51
+ dataset.datasource&.last_updated_at
52
+ end
53
+
54
+ attribute :features do |dataset|
55
+ dataset.features.ordered.map do |feature|
56
+ FeatureSerializer.new(feature).serializable_hash.dig(:data, :attributes)
57
+ end
58
+ end
59
+
60
+ attribute :needs_refresh do |dataset|
61
+ dataset.needs_refresh?
62
+ end
63
+
64
+ attribute :stacktrace do |object|
65
+ if !object.failed? || object.events.empty?
66
+ nil
67
+ else
68
+ last_event = object.events.order(id: :desc).limit(1).last
69
+ last_event&.stacktrace if last_event&.status == "failed"
70
+ end
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,64 @@
1
+ # == Schema Information
2
+ #
3
+ # Table name: easy_ml_datasources
4
+ #
5
+ # id :bigint not null, primary key
6
+ # name :string not null
7
+ # datasource_type :string
8
+ # root_dir :string
9
+ # configuration :json
10
+ # created_at :datetime not null
11
+ # updated_at :datetime not null
12
+ #
13
+ require "jsonapi/serializer"
14
+
15
+ module EasyML
16
+ class DatasourceSerializer
17
+ include JSONAPI::Serializer
18
+
19
+ set_type :datasource # Optional type for JSON:API
20
+
21
+ attributes :id, :name, :datasource_type, :s3_bucket, :s3_prefix, :s3_region, :schema, :columns, :available_files
22
+
23
+ attribute :last_synced_at do |datasource|
24
+ if datasource.is_syncing
25
+ "Syncing..."
26
+ else
27
+ datasource.last_updated_at ? datasource.last_updated_at.in_time_zone(EasyML::Configuration.timezone) : "Not Synced"
28
+ end
29
+ end
30
+
31
+ attribute :created_at do |datasource|
32
+ datasource.created_at.in_time_zone(EasyML::Configuration.timezone).iso8601
33
+ end
34
+
35
+ attribute :updated_at do |datasource|
36
+ datasource.updated_at.in_time_zone(EasyML::Configuration.timezone).iso8601
37
+ end
38
+
39
+ attribute :is_synced do |datasource|
40
+ datasource.last_updated_at.present?
41
+ end
42
+
43
+ attribute :is_syncing do |datasource|
44
+ datasource.is_syncing
45
+ end
46
+
47
+ attribute :sync_failed do |datasource|
48
+ if datasource.is_syncing
49
+ nil
50
+ else
51
+ datasource.events.order(id: :desc).limit(1)&.last&.status == "failed"
52
+ end
53
+ end
54
+
55
+ attribute :stacktrace do |datasource|
56
+ if datasource.is_syncing
57
+ nil
58
+ else
59
+ last_event = datasource.events.order(id: :desc).limit(1).last
60
+ last_event&.stacktrace if last_event&.status == "failed"
61
+ end
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,27 @@
1
+ # == Schema Information
2
+ #
3
+ # Table name: easy_ml_
4
+ #
5
+ # id :bigint not null, primary key
6
+ # dataset_id :bigint not null
7
+ # name :string
8
+ # feature_class :string not null
9
+ # feature_method :string not null
10
+ # feature_position :integer
11
+ # applied_at :datetime
12
+ # created_at :datetime not null
13
+ # updated_at :datetime not null
14
+ #
15
+ require "jsonapi/serializer"
16
+
17
+ module EasyML
18
+ class FeatureSerializer
19
+ include JSONAPI::Serializer
20
+
21
+ attributes :id, :feature_class, :feature_position, :name
22
+
23
+ attribute :description do |feature|
24
+ (EasyML::Features::Registry.find(feature.name) || {}).dig(:description)
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,90 @@
1
+ # == Schema Information
2
+ #
3
+ # Table name: easy_ml_models
4
+ #
5
+ # id :bigint not null, primary key
6
+ # name :string not null
7
+ # model_type :string
8
+ # status :string
9
+ # dataset_id :bigint
10
+ # model_file_id :bigint
11
+ # configuration :json
12
+ # version :string not null
13
+ # root_dir :string
14
+ # file :json
15
+ # sha :string
16
+ # created_at :datetime not null
17
+ # updated_at :datetime not null
18
+ #
19
+ require "jsonapi/serializer"
20
+
21
+ module EasyML
22
+ class ModelSerializer
23
+ include JSONAPI::Serializer
24
+
25
+ attributes :id,
26
+ :name,
27
+ :model_type,
28
+ :task,
29
+ :objective,
30
+ :metrics,
31
+ :dataset_id,
32
+ :status,
33
+ :deployment_status,
34
+ :configuration,
35
+ :created_at,
36
+ :updated_at,
37
+ :last_run_at
38
+
39
+ attribute :is_training do |object|
40
+ object.training?
41
+ end
42
+
43
+ attribute :last_run do |object|
44
+ RetrainingRunSerializer.new(object.last_run).serializable_hash.dig(:data, :attributes)
45
+ end
46
+
47
+ attribute :metrics_url do |object|
48
+ object.last_run&.wandb_url
49
+ end
50
+
51
+ attribute :retraining_runs do |object, params|
52
+ limit = params[:limit] || 20
53
+ offset = params[:offset] || 0
54
+
55
+ runs = object.retraining_runs
56
+ .order(created_at: :desc)
57
+ .offset(offset)
58
+ .limit(limit)
59
+
60
+ {
61
+ runs: RetrainingRunSerializer.new(runs).serializable_hash[:data].map { |run| run[:attributes] },
62
+ total_count: object.retraining_runs.count,
63
+ limit: limit,
64
+ offset: offset,
65
+ next_offset: offset + limit,
66
+ prev_offset: offset - limit,
67
+ }
68
+ end
69
+
70
+ attribute :version do |object|
71
+ object.formatted_version
72
+ end
73
+
74
+ attribute :formatted_model_type do |object|
75
+ object.formatted_model_type
76
+ end
77
+
78
+ attribute :formatted_frequency do |object|
79
+ object.retraining_job.present? ? object.retraining_job.formatted_frequency : nil
80
+ end
81
+
82
+ attribute :dataset do |object|
83
+ DatasetSerializer.new(object.dataset).serializable_hash.dig(:data, :attributes)
84
+ end
85
+
86
+ attribute :retraining_job do |object|
87
+ RetrainingJobSerializer.new(object.retraining_job).serializable_hash.dig(:data, :attributes)
88
+ end
89
+ end
90
+ end
@@ -0,0 +1,22 @@
1
+ require "jsonapi/serializer"
2
+
3
+ module EasyML
4
+ class RetrainingJobSerializer
5
+ include JSONAPI::Serializer
6
+
7
+ attributes :id,
8
+ :active,
9
+ :frequency,
10
+ :formatted_frequency,
11
+ :tuning_frequency,
12
+ :at,
13
+ :metric,
14
+ :threshold,
15
+ :tuner_config,
16
+ :batch_mode,
17
+ :batch_size,
18
+ :batch_overlap,
19
+ :batch_key,
20
+ :tuning_enabled
21
+ end
22
+ end
@@ -0,0 +1,39 @@
1
+ require "jsonapi/serializer"
2
+
3
+ module EasyML
4
+ class RetrainingRunSerializer
5
+ include JSONAPI::Serializer
6
+
7
+ attributes :id,
8
+ :deployable,
9
+ :metrics,
10
+ :metric_value,
11
+ :threshold,
12
+ :threshold_direction,
13
+ :status,
14
+ :error_message,
15
+ :is_deploying,
16
+ :deployed
17
+
18
+ attribute :metrics_url do |run|
19
+ run.wandb_url
20
+ end
21
+
22
+ attribute :started_at do |run|
23
+ run.started_at&.in_time_zone(EasyML::Configuration.timezone)
24
+ end
25
+
26
+ attribute :completed_at do |run|
27
+ run.completed_at&.in_time_zone(EasyML::Configuration.timezone)
28
+ end
29
+
30
+ attribute :stacktrace do |object|
31
+ if object.status.to_s == "running"
32
+ nil
33
+ else
34
+ last_event = object.events.order(id: :desc).limit(1).last
35
+ last_event&.stacktrace if last_event&.status.to_s == "failed"
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,9 @@
1
+ require "jsonapi/serializer"
2
+
3
+ module EasyML
4
+ class SettingsSerializer
5
+ include JSONAPI::Serializer
6
+
7
+ attributes *EasyML::Settings.configuration_attributes
8
+ end
9
+ end
@@ -0,0 +1,15 @@
1
+ <!DOCTYPE html>
2
+ <html>
3
+ <head>
4
+ <title>EasyML</title>
5
+ <%= csrf_meta_tags %>
6
+ <%= csp_meta_tag %>
7
+
8
+ <%= vite_client_tag %>
9
+ <%= vite_react_refresh_tag %>
10
+ <%= vite_typescript_tag 'Application.tsx' %>
11
+ </head>
12
+ <body>
13
+ <%= yield %>
14
+ </body>
15
+ </html>
@@ -0,0 +1,3 @@
1
+ require 'resque'
2
+ Resque.redis = ENV['REDIS_URL'] || 'redis://localhost:6379'
3
+
@@ -0,0 +1,6 @@
1
+ development:
2
+ '*': 2
3
+
4
+ production:
5
+ '*': <%= ENV['WORKER_COUNT'] || 5 %>
6
+
data/config/routes.rb ADDED
@@ -0,0 +1,39 @@
1
+ EasyML::Engine.routes.draw do
2
+ root to: "models#index"
3
+
4
+ resources :models, as: :easy_ml_models do
5
+ member do
6
+ post :train
7
+ get :retraining_runs, to: "retraining_runs#index"
8
+ end
9
+ resources :deploys, only: [:create]
10
+ get "new", on: :collection, as: "new"
11
+ end
12
+
13
+ resources :retraining_runs, only: [:show]
14
+
15
+ # Datasources
16
+ resources :datasources, as: :easy_ml_datasources do
17
+ member do
18
+ post :sync
19
+ end
20
+ end
21
+
22
+ # Datasets
23
+ resources :datasets, as: :easy_ml_datasets do
24
+ member do
25
+ post :refresh
26
+ end
27
+ end
28
+
29
+ # Transformations
30
+ resources :transformations, only: %i[index new edit], as: :easy_ml_transformations
31
+
32
+ # Settings
33
+ resources :settings, only: [:index] do
34
+ patch :update, on: :collection
35
+ end
36
+
37
+ # Columns
38
+ resources :columns, only: [:update], as: :easy_ml_columns
39
+ end
data/config/spring.rb ADDED
@@ -0,0 +1 @@
1
+ Spring.application_root = "./spec/internal"