easy_ml 0.1.4 → 0.2.0.pre.rc1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (239) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +234 -26
  3. data/Rakefile +45 -0
  4. data/app/controllers/easy_ml/application_controller.rb +67 -0
  5. data/app/controllers/easy_ml/columns_controller.rb +38 -0
  6. data/app/controllers/easy_ml/datasets_controller.rb +156 -0
  7. data/app/controllers/easy_ml/datasources_controller.rb +88 -0
  8. data/app/controllers/easy_ml/deploys_controller.rb +20 -0
  9. data/app/controllers/easy_ml/models_controller.rb +151 -0
  10. data/app/controllers/easy_ml/retraining_runs_controller.rb +19 -0
  11. data/app/controllers/easy_ml/settings_controller.rb +59 -0
  12. data/app/frontend/components/AlertProvider.tsx +108 -0
  13. data/app/frontend/components/DatasetPreview.tsx +161 -0
  14. data/app/frontend/components/EmptyState.tsx +28 -0
  15. data/app/frontend/components/ModelCard.tsx +255 -0
  16. data/app/frontend/components/ModelDetails.tsx +334 -0
  17. data/app/frontend/components/ModelForm.tsx +384 -0
  18. data/app/frontend/components/Navigation.tsx +300 -0
  19. data/app/frontend/components/Pagination.tsx +72 -0
  20. data/app/frontend/components/Popover.tsx +55 -0
  21. data/app/frontend/components/PredictionStream.tsx +105 -0
  22. data/app/frontend/components/ScheduleModal.tsx +726 -0
  23. data/app/frontend/components/SearchInput.tsx +23 -0
  24. data/app/frontend/components/SearchableSelect.tsx +132 -0
  25. data/app/frontend/components/dataset/AutosaveIndicator.tsx +39 -0
  26. data/app/frontend/components/dataset/ColumnConfigModal.tsx +431 -0
  27. data/app/frontend/components/dataset/ColumnFilters.tsx +256 -0
  28. data/app/frontend/components/dataset/ColumnList.tsx +101 -0
  29. data/app/frontend/components/dataset/FeatureConfigPopover.tsx +57 -0
  30. data/app/frontend/components/dataset/FeaturePicker.tsx +205 -0
  31. data/app/frontend/components/dataset/PreprocessingConfig.tsx +704 -0
  32. data/app/frontend/components/dataset/SplitConfigurator.tsx +120 -0
  33. data/app/frontend/components/dataset/splitters/DateSplitter.tsx +58 -0
  34. data/app/frontend/components/dataset/splitters/KFoldSplitter.tsx +68 -0
  35. data/app/frontend/components/dataset/splitters/LeavePOutSplitter.tsx +29 -0
  36. data/app/frontend/components/dataset/splitters/PredefinedSplitter.tsx +146 -0
  37. data/app/frontend/components/dataset/splitters/RandomSplitter.tsx +85 -0
  38. data/app/frontend/components/dataset/splitters/StratifiedSplitter.tsx +79 -0
  39. data/app/frontend/components/dataset/splitters/constants.ts +77 -0
  40. data/app/frontend/components/dataset/splitters/types.ts +168 -0
  41. data/app/frontend/components/dataset/splitters/utils.ts +53 -0
  42. data/app/frontend/components/features/CodeEditor.tsx +46 -0
  43. data/app/frontend/components/features/DataPreview.tsx +150 -0
  44. data/app/frontend/components/features/FeatureCard.tsx +88 -0
  45. data/app/frontend/components/features/FeatureForm.tsx +235 -0
  46. data/app/frontend/components/features/FeatureGroupCard.tsx +54 -0
  47. data/app/frontend/components/settings/PluginSettings.tsx +81 -0
  48. data/app/frontend/components/ui/badge.tsx +44 -0
  49. data/app/frontend/components/ui/collapsible.tsx +9 -0
  50. data/app/frontend/components/ui/scroll-area.tsx +46 -0
  51. data/app/frontend/components/ui/separator.tsx +29 -0
  52. data/app/frontend/entrypoints/App.tsx +40 -0
  53. data/app/frontend/entrypoints/Application.tsx +24 -0
  54. data/app/frontend/hooks/useAutosave.ts +61 -0
  55. data/app/frontend/layouts/Layout.tsx +38 -0
  56. data/app/frontend/lib/utils.ts +6 -0
  57. data/app/frontend/mockData.ts +272 -0
  58. data/app/frontend/pages/DatasetDetailsPage.tsx +103 -0
  59. data/app/frontend/pages/DatasetsPage.tsx +261 -0
  60. data/app/frontend/pages/DatasourceFormPage.tsx +147 -0
  61. data/app/frontend/pages/DatasourcesPage.tsx +261 -0
  62. data/app/frontend/pages/EditModelPage.tsx +45 -0
  63. data/app/frontend/pages/EditTransformationPage.tsx +56 -0
  64. data/app/frontend/pages/ModelsPage.tsx +115 -0
  65. data/app/frontend/pages/NewDatasetPage.tsx +366 -0
  66. data/app/frontend/pages/NewModelPage.tsx +45 -0
  67. data/app/frontend/pages/NewTransformationPage.tsx +43 -0
  68. data/app/frontend/pages/SettingsPage.tsx +272 -0
  69. data/app/frontend/pages/ShowModelPage.tsx +30 -0
  70. data/app/frontend/pages/TransformationsPage.tsx +95 -0
  71. data/app/frontend/styles/application.css +100 -0
  72. data/app/frontend/types/dataset.ts +146 -0
  73. data/app/frontend/types/datasource.ts +33 -0
  74. data/app/frontend/types/preprocessing.ts +1 -0
  75. data/app/frontend/types.ts +113 -0
  76. data/app/helpers/easy_ml/application_helper.rb +10 -0
  77. data/app/jobs/easy_ml/application_job.rb +21 -0
  78. data/app/jobs/easy_ml/batch_job.rb +46 -0
  79. data/app/jobs/easy_ml/compute_feature_job.rb +19 -0
  80. data/app/jobs/easy_ml/deploy_job.rb +13 -0
  81. data/app/jobs/easy_ml/finalize_feature_job.rb +15 -0
  82. data/app/jobs/easy_ml/refresh_dataset_job.rb +32 -0
  83. data/app/jobs/easy_ml/schedule_retraining_job.rb +11 -0
  84. data/app/jobs/easy_ml/sync_datasource_job.rb +17 -0
  85. data/app/jobs/easy_ml/training_job.rb +62 -0
  86. data/app/models/easy_ml/adapters/base_adapter.rb +45 -0
  87. data/app/models/easy_ml/adapters/polars_adapter.rb +77 -0
  88. data/app/models/easy_ml/cleaner.rb +82 -0
  89. data/app/models/easy_ml/column.rb +124 -0
  90. data/app/models/easy_ml/column_history.rb +30 -0
  91. data/app/models/easy_ml/column_list.rb +122 -0
  92. data/app/models/easy_ml/concerns/configurable.rb +61 -0
  93. data/app/models/easy_ml/concerns/versionable.rb +19 -0
  94. data/app/models/easy_ml/dataset.rb +767 -0
  95. data/app/models/easy_ml/dataset_history.rb +56 -0
  96. data/app/models/easy_ml/datasource.rb +182 -0
  97. data/app/models/easy_ml/datasource_history.rb +24 -0
  98. data/app/models/easy_ml/datasources/base_datasource.rb +54 -0
  99. data/app/models/easy_ml/datasources/file_datasource.rb +58 -0
  100. data/app/models/easy_ml/datasources/polars_datasource.rb +89 -0
  101. data/app/models/easy_ml/datasources/s3_datasource.rb +97 -0
  102. data/app/models/easy_ml/deploy.rb +114 -0
  103. data/app/models/easy_ml/event.rb +79 -0
  104. data/app/models/easy_ml/feature.rb +437 -0
  105. data/app/models/easy_ml/feature_history.rb +38 -0
  106. data/app/models/easy_ml/model.rb +575 -41
  107. data/app/models/easy_ml/model_file.rb +133 -0
  108. data/app/models/easy_ml/model_file_history.rb +24 -0
  109. data/app/models/easy_ml/model_history.rb +51 -0
  110. data/app/models/easy_ml/models/base_model.rb +58 -0
  111. data/app/models/easy_ml/models/hyperparameters/base.rb +99 -0
  112. data/app/models/easy_ml/models/hyperparameters/xgboost/dart.rb +82 -0
  113. data/app/models/easy_ml/models/hyperparameters/xgboost/gblinear.rb +82 -0
  114. data/app/models/easy_ml/models/hyperparameters/xgboost/gbtree.rb +97 -0
  115. data/app/models/easy_ml/models/hyperparameters/xgboost.rb +71 -0
  116. data/app/models/easy_ml/models/xgboost/evals_callback.rb +138 -0
  117. data/app/models/easy_ml/models/xgboost/progress_callback.rb +39 -0
  118. data/app/models/easy_ml/models/xgboost.rb +544 -5
  119. data/app/models/easy_ml/prediction.rb +44 -0
  120. data/app/models/easy_ml/retraining_job.rb +278 -0
  121. data/app/models/easy_ml/retraining_run.rb +184 -0
  122. data/app/models/easy_ml/settings.rb +37 -0
  123. data/app/models/easy_ml/splitter.rb +90 -0
  124. data/app/models/easy_ml/splitters/base_splitter.rb +28 -0
  125. data/app/models/easy_ml/splitters/date_splitter.rb +91 -0
  126. data/app/models/easy_ml/splitters/predefined_splitter.rb +74 -0
  127. data/app/models/easy_ml/splitters/random_splitter.rb +82 -0
  128. data/app/models/easy_ml/tuner_job.rb +56 -0
  129. data/app/models/easy_ml/tuner_run.rb +31 -0
  130. data/app/models/splitter_history.rb +6 -0
  131. data/app/serializers/easy_ml/column_serializer.rb +27 -0
  132. data/app/serializers/easy_ml/dataset_serializer.rb +73 -0
  133. data/app/serializers/easy_ml/datasource_serializer.rb +64 -0
  134. data/app/serializers/easy_ml/feature_serializer.rb +27 -0
  135. data/app/serializers/easy_ml/model_serializer.rb +90 -0
  136. data/app/serializers/easy_ml/retraining_job_serializer.rb +22 -0
  137. data/app/serializers/easy_ml/retraining_run_serializer.rb +39 -0
  138. data/app/serializers/easy_ml/settings_serializer.rb +9 -0
  139. data/app/views/layouts/easy_ml/application.html.erb +15 -0
  140. data/config/initializers/resque.rb +3 -0
  141. data/config/resque-pool.yml +6 -0
  142. data/config/routes.rb +39 -0
  143. data/config/spring.rb +1 -0
  144. data/config/vite.json +15 -0
  145. data/lib/easy_ml/configuration.rb +64 -0
  146. data/lib/easy_ml/core/evaluators/base_evaluator.rb +53 -0
  147. data/lib/easy_ml/core/evaluators/classification_evaluators.rb +126 -0
  148. data/lib/easy_ml/core/evaluators/regression_evaluators.rb +66 -0
  149. data/lib/easy_ml/core/model_evaluator.rb +161 -89
  150. data/lib/easy_ml/core/tuner/adapters/base_adapter.rb +28 -18
  151. data/lib/easy_ml/core/tuner/adapters/xgboost_adapter.rb +4 -25
  152. data/lib/easy_ml/core/tuner.rb +123 -62
  153. data/lib/easy_ml/core.rb +0 -3
  154. data/lib/easy_ml/core_ext/hash.rb +24 -0
  155. data/lib/easy_ml/core_ext/pathname.rb +11 -5
  156. data/lib/easy_ml/data/date_converter.rb +90 -0
  157. data/lib/easy_ml/data/filter_extensions.rb +31 -0
  158. data/lib/easy_ml/data/polars_column.rb +126 -0
  159. data/lib/easy_ml/data/polars_reader.rb +297 -0
  160. data/lib/easy_ml/data/preprocessor.rb +280 -142
  161. data/lib/easy_ml/data/simple_imputer.rb +255 -0
  162. data/lib/easy_ml/data/splits/file_split.rb +252 -0
  163. data/lib/easy_ml/data/splits/in_memory_split.rb +54 -0
  164. data/lib/easy_ml/data/splits/split.rb +95 -0
  165. data/lib/easy_ml/data/splits.rb +9 -0
  166. data/lib/easy_ml/data/statistics_learner.rb +93 -0
  167. data/lib/easy_ml/data/synced_directory.rb +341 -0
  168. data/lib/easy_ml/data.rb +6 -2
  169. data/lib/easy_ml/engine.rb +105 -6
  170. data/lib/easy_ml/feature_store.rb +227 -0
  171. data/lib/easy_ml/features.rb +61 -0
  172. data/lib/easy_ml/initializers/inflections.rb +17 -3
  173. data/lib/easy_ml/logging.rb +2 -2
  174. data/lib/easy_ml/predict.rb +74 -0
  175. data/lib/easy_ml/railtie/generators/migration/migration_generator.rb +192 -36
  176. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_column_histories.rb.tt +9 -0
  177. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_columns.rb.tt +25 -0
  178. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_dataset_histories.rb.tt +9 -0
  179. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_datasets.rb.tt +31 -0
  180. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_datasource_histories.rb.tt +9 -0
  181. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_datasources.rb.tt +16 -0
  182. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_deploys.rb.tt +24 -0
  183. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_events.rb.tt +20 -0
  184. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_feature_histories.rb.tt +14 -0
  185. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_features.rb.tt +32 -0
  186. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_model_file_histories.rb.tt +9 -0
  187. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_model_files.rb.tt +17 -0
  188. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_model_histories.rb.tt +9 -0
  189. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_models.rb.tt +20 -9
  190. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_predictions.rb.tt +17 -0
  191. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_retraining_jobs.rb.tt +77 -0
  192. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_settings.rb.tt +9 -0
  193. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_splitter_histories.rb.tt +9 -0
  194. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_splitters.rb.tt +15 -0
  195. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_tuner_jobs.rb.tt +40 -0
  196. data/lib/easy_ml/support/est.rb +5 -1
  197. data/lib/easy_ml/support/file_rotate.rb +79 -15
  198. data/lib/easy_ml/support/file_support.rb +9 -0
  199. data/lib/easy_ml/support/local_file.rb +24 -0
  200. data/lib/easy_ml/support/lockable.rb +62 -0
  201. data/lib/easy_ml/support/synced_file.rb +103 -0
  202. data/lib/easy_ml/support/utc.rb +5 -1
  203. data/lib/easy_ml/support.rb +6 -3
  204. data/lib/easy_ml/version.rb +4 -1
  205. data/lib/easy_ml.rb +7 -2
  206. metadata +355 -72
  207. data/app/models/easy_ml/models.rb +0 -5
  208. data/lib/easy_ml/core/model.rb +0 -30
  209. data/lib/easy_ml/core/model_core.rb +0 -181
  210. data/lib/easy_ml/core/models/hyperparameters/base.rb +0 -34
  211. data/lib/easy_ml/core/models/hyperparameters/xgboost.rb +0 -19
  212. data/lib/easy_ml/core/models/xgboost.rb +0 -10
  213. data/lib/easy_ml/core/models/xgboost_core.rb +0 -220
  214. data/lib/easy_ml/core/models.rb +0 -10
  215. data/lib/easy_ml/core/uploaders/model_uploader.rb +0 -24
  216. data/lib/easy_ml/core/uploaders.rb +0 -7
  217. data/lib/easy_ml/data/dataloader.rb +0 -6
  218. data/lib/easy_ml/data/dataset/data/preprocessor/statistics.json +0 -31
  219. data/lib/easy_ml/data/dataset/data/sample_info.json +0 -1
  220. data/lib/easy_ml/data/dataset/dataset/files/sample_info.json +0 -1
  221. data/lib/easy_ml/data/dataset/splits/file_split.rb +0 -140
  222. data/lib/easy_ml/data/dataset/splits/in_memory_split.rb +0 -49
  223. data/lib/easy_ml/data/dataset/splits/split.rb +0 -98
  224. data/lib/easy_ml/data/dataset/splits.rb +0 -11
  225. data/lib/easy_ml/data/dataset/splitters/date_splitter.rb +0 -43
  226. data/lib/easy_ml/data/dataset/splitters.rb +0 -9
  227. data/lib/easy_ml/data/dataset.rb +0 -430
  228. data/lib/easy_ml/data/datasource/datasource_factory.rb +0 -60
  229. data/lib/easy_ml/data/datasource/file_datasource.rb +0 -40
  230. data/lib/easy_ml/data/datasource/merged_datasource.rb +0 -64
  231. data/lib/easy_ml/data/datasource/polars_datasource.rb +0 -41
  232. data/lib/easy_ml/data/datasource/s3_datasource.rb +0 -89
  233. data/lib/easy_ml/data/datasource.rb +0 -33
  234. data/lib/easy_ml/data/preprocessor/preprocessor.rb +0 -205
  235. data/lib/easy_ml/data/preprocessor/simple_imputer.rb +0 -402
  236. data/lib/easy_ml/deployment.rb +0 -5
  237. data/lib/easy_ml/support/synced_directory.rb +0 -134
  238. data/lib/easy_ml/transforms.rb +0 -29
  239. /data/{lib/easy_ml/core → app/models/easy_ml}/models/hyperparameters.rb +0 -0
@@ -0,0 +1,31 @@
1
+ class CreateEasyMLDatasets < ActiveRecord::Migration[<%= ActiveRecord::Migration.current_version %>]
2
+ def change
3
+ create_table :easy_ml_datasets do |t|
4
+ t.string :name, null: false
5
+ t.string :description
6
+ t.string :dataset_type
7
+ t.string :status
8
+ t.string :version
9
+ t.bigint :datasource_id
10
+ t.string :root_dir
11
+ t.json :configuration
12
+ t.bigint :num_rows
13
+ t.string :workflow_status
14
+ t.json :statistics
15
+ t.json :preprocessor_statistics
16
+ t.json :schema
17
+ t.datetime :refreshed_at
18
+
19
+ t.timestamps
20
+
21
+ t.index :created_at
22
+ t.index :refreshed_at
23
+ t.index :name
24
+ t.index :status
25
+ t.index [:name, :status]
26
+ t.index :datasource_id
27
+ t.index :dataset_type
28
+ t.index :workflow_status
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,9 @@
1
+ require "historiographer/postgres_migration"
2
+
3
+ class CreateEasyMLDatasourceHistories < ActiveRecord::Migration[<%= ActiveRecord::Migration.current_version %>]
4
+ def change
5
+ create_table :easy_ml_datasource_histories do |t|
6
+ t.histories(foreign_key: :datasource_id)
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,16 @@
1
+ class CreateEasyMLDatasources < ActiveRecord::Migration[<%= ActiveRecord::Migration.current_version %>]
2
+ def change
3
+ create_table :easy_ml_datasources do |t|
4
+ t.string :name, null: false
5
+ t.string :datasource_type
6
+ t.string :root_dir
7
+ t.json :configuration
8
+ t.datetime :refreshed_at
9
+
10
+ t.timestamps
11
+ t.index :created_at
12
+ t.index :datasource_type
13
+ t.index :refreshed_at
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,24 @@
1
+ class CreateEasyMLDeploys < ActiveRecord::Migration[<%= ActiveRecord::Migration.current_version %>]
2
+ def change
3
+ create_table :easy_ml_deploys do |t|
4
+ t.bigint :model_id
5
+ t.bigint :model_history_id
6
+ t.bigint :retraining_run_id
7
+ t.bigint :model_file_id
8
+ t.string :status, null: false
9
+ t.string :trigger, default: 'manual'
10
+ t.text :stacktrace
11
+ t.string :snapshot_id
12
+ t.timestamps
13
+
14
+ t.index :created_at
15
+ t.index :model_id
16
+ t.index :model_history_id
17
+ t.index :snapshot_id
18
+ t.index :model_file_id
19
+ t.index :retraining_run_id
20
+ t.index :status
21
+ t.index :trigger
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,20 @@
1
+ class CreateEasyMLEvents < ActiveRecord::Migration[<%= ActiveRecord::Migration.current_version %>]
2
+ def change
3
+ create_table :easy_ml_events do |t|
4
+ t.string :name, null: false
5
+ t.string :status, null: false
6
+ t.string :eventable_type
7
+ t.bigint :eventable_id
8
+ t.text :stacktrace
9
+
10
+ t.timestamps
11
+
12
+ t.index :name
13
+ t.index :status
14
+ t.index :eventable_type
15
+ t.index :eventable_id
16
+ t.index :created_at
17
+ t.index [:eventable_type, :eventable_id]
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,14 @@
1
+ require "historiographer/postgres_migration"
2
+
3
+ class CreateEasyMLFeatureHistories < ActiveRecord::Migration[<%= ActiveRecord::Migration.current_version %>]
4
+ def change
5
+ create_table :easy_ml_feature_histories do |t|
6
+ t.histories(
7
+ foreign_key: :feature_id,
8
+ index_names: {
9
+ [:dataset_id, :feature_position] => "idx_feature_histories_on_dataset_and_position"
10
+ }
11
+ )
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,32 @@
1
+ class CreateEasyMLFeatures < ActiveRecord::Migration[<%= ActiveRecord::Migration.current_version %>]
2
+ def change
3
+ create_table :easy_ml_features do |t|
4
+ t.bigint :dataset_id, null: false
5
+ t.string :name
6
+ t.bigint :version
7
+ t.string :feature_class, null: false
8
+ t.integer :feature_position
9
+ t.integer :batch_size
10
+ t.boolean :needs_fit
11
+ t.string :sha
12
+ t.string :primary_key, array: true
13
+ t.datetime :applied_at
14
+ t.datetime :fit_at
15
+ t.bigint :refresh_every
16
+
17
+ t.timestamps
18
+
19
+ t.index %i[dataset_id feature_position], name: "idx_features_on_dataset_and_position"
20
+ t.index %i[dataset_id name], unique: true, name: "idx_features_on_dataset_and_name"
21
+ t.index :feature_class
22
+ t.index :applied_at
23
+ t.index :name
24
+ t.index :version
25
+ t.index :sha
26
+ t.index :batch_size
27
+ t.index :needs_fit
28
+ t.index :fit_at
29
+ t.index :refresh_every
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,9 @@
1
+ require "historiographer/postgres_migration"
2
+
3
+ class CreateEasyMLModelFileHistories < ActiveRecord::Migration[<%= ActiveRecord::Migration.current_version %>]
4
+ def change
5
+ create_table :easy_ml_model_file_histories do |t|
6
+ t.histories(foreign_key: :model_file_id)
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,17 @@
1
+ class CreateEasyMLModelFiles < ActiveRecord::Migration[<%= ActiveRecord::Migration.current_version %>]
2
+ def change
3
+ create_table :easy_ml_model_files do |t|
4
+ t.string :filename, null: false
5
+ t.string :path, null: false
6
+ t.json :configuration
7
+ t.string :model_type
8
+ t.bigint :model_id
9
+ t.timestamps
10
+
11
+ t.index :created_at
12
+ t.index :filename
13
+ t.index [:model_type]
14
+ t.index :model_id
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,9 @@
1
+ require "historiographer/postgres_migration"
2
+
3
+ class CreateEasyMLModelHistories < ActiveRecord::Migration[<%= ActiveRecord::Migration.current_version %>]
4
+ def change
5
+ create_table :easy_ml_model_histories do |t|
6
+ t.histories(foreign_key: :model_id)
7
+ end
8
+ end
9
+ end
@@ -1,23 +1,34 @@
1
1
  # lib/railtie/generators/templates/migration/create_easy_ml_models.rb.tt
2
- class CreateEasyMLModels < ActiveRecord::Migration[6.0]
2
+ class CreateEasyMLModels < ActiveRecord::Migration[<%= ActiveRecord::Migration.current_version %>]
3
3
  def change
4
4
  create_table :easy_ml_models do |t|
5
5
  t.string :name, null: false
6
- t.boolean :is_live, default: false
6
+ t.string :model_type
7
+ t.string :status
8
+ t.bigint :dataset_id
9
+ t.bigint :model_file_id
10
+ t.json :configuration
7
11
  t.string :version, null: false
8
- t.string :ml_model
9
- t.string :task
10
- t.json :metrics, default: []
11
- t.json :file, null: false
12
+ t.string :root_dir
13
+ t.json :file
14
+ t.string :sha
15
+ t.datetime :last_trained_at
16
+ t.boolean :is_training
12
17
 
13
18
  t.timestamps
14
19
 
15
20
  t.index :created_at
21
+ t.index :last_trained_at
16
22
  t.index :name
17
23
  t.index :version
18
- t.index :is_live
19
- t.index [:name, :version], unique: true
20
- t.index [:name, :version, :is_live]
24
+ t.index :status
25
+ t.index [:name, :status]
26
+ t.index [:name, :version]
27
+ t.index :dataset_id
28
+ t.index :model_type
29
+ t.index :model_file_id
30
+ t.index :sha
31
+ t.index :is_training
21
32
  end
22
33
  end
23
34
  end
@@ -0,0 +1,17 @@
1
+ class CreateEasyMLPredictions < ActiveRecord::Migration[<%= ActiveRecord::Migration.current_version %>]
2
+ def change
3
+ create_table :easy_ml_predictions do |t|
4
+ t.bigint :model_id, null: false
5
+ t.bigint :model_history_id
6
+ t.string :prediction_type
7
+ t.jsonb :prediction_value
8
+ t.jsonb :raw_input
9
+ t.jsonb :normalized_input
10
+ t.timestamps
11
+
12
+ t.index :model_id
13
+ t.index :model_history_id
14
+ t.index :created_at
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,77 @@
1
+ class CreateEasyMLRetrainingJobs < ActiveRecord::Migration[<%= ActiveRecord::Migration.current_version %>]
2
+ def change
3
+ create_table :easy_ml_retraining_jobs do |t|
4
+ t.bigint :model_id
5
+ t.string :frequency, null: false # day, week, month, hour
6
+ t.json :at, null: false # hour of day (0-23)
7
+ t.json :evaluator # Model evaluator
8
+ t.boolean :tuning_enabled, default: false
9
+ t.json :tuner_config # configuration for the tuner
10
+ t.string :tuning_frequency # day, week, month, hour - when to run with tuner
11
+ t.datetime :last_tuning_at # track last tuning run
12
+ t.boolean :active, default: true
13
+ t.string :status, default: "pending"
14
+ t.datetime :last_run_at
15
+ t.string :metric, null: false
16
+ t.string :direction, null: false
17
+ t.float :threshold, null: false
18
+ t.boolean :auto_deploy, default: false
19
+ t.boolean :batch_mode
20
+ t.integer :batch_size
21
+ t.integer :batch_overlap
22
+ t.string :batch_key
23
+
24
+ t.timestamps
25
+
26
+ t.index :model_id
27
+ t.index :active
28
+ t.index :last_run_at
29
+ t.index :last_tuning_at
30
+ t.index :batch_mode
31
+ t.index :auto_deploy
32
+ t.index :tuning_enabled
33
+ end
34
+
35
+ create_table :easy_ml_retraining_runs do |t|
36
+ t.bigint :model_id
37
+ t.bigint :model_history_id
38
+ t.bigint :model_file_id
39
+ t.bigint :retraining_job_id, null: false
40
+ t.bigint :tuner_job_id, null: true
41
+ t.string :status, default: 'pending'
42
+ t.float :metric_value
43
+ t.float :threshold
44
+ t.string :trigger, default: 'manual'
45
+ t.string :threshold_direction
46
+ t.datetime :started_at
47
+ t.datetime :completed_at
48
+ t.text :error_message
49
+ t.jsonb :metadata
50
+ t.jsonb :metrics
51
+ t.jsonb :best_params
52
+ t.string :wandb_url
53
+ t.string :snapshot_id
54
+ t.boolean :deployable
55
+ t.boolean :is_deploying
56
+ t.boolean :deployed
57
+ t.bigint :deploy_id
58
+
59
+ t.timestamps
60
+
61
+ t.index :status
62
+ t.index :started_at
63
+ t.index :completed_at
64
+ t.index :created_at
65
+ t.index :tuner_job_id
66
+ t.index :retraining_job_id
67
+ t.index :model_id
68
+ t.index :trigger
69
+ t.index :wandb_url
70
+ t.index :snapshot_id
71
+ t.index :deploy_id
72
+ t.index :model_history_id
73
+ t.index :deployable
74
+ t.index :is_deploying
75
+ end
76
+ end
77
+ end
@@ -0,0 +1,9 @@
1
+ class CreateEasyMLSettings < ActiveRecord::Migration[<%= ActiveRecord::Migration.current_version %>]
2
+ def change
3
+ create_table :easy_ml_settings do |t|
4
+ t.json :configuration
5
+
6
+ t.timestamps
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,9 @@
1
+ require "historiographer/postgres_migration"
2
+
3
+ class CreateEasyMLSplitterHistories < ActiveRecord::Migration[<%= ActiveRecord::Migration.current_version %>]
4
+ def change
5
+ create_table :easy_ml_splitter_histories do |t|
6
+ t.histories(foreign_key: :splitter_id)
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,15 @@
1
+ class CreateEasyMLSplitters < ActiveRecord::Migration[<%= ActiveRecord::Migration.current_version %>]
2
+ def change
3
+ create_table :easy_ml_splitters do |t|
4
+ t.string :splitter_type, null: false
5
+ t.json :configuration
6
+ t.bigint :dataset_id, null: false
7
+
8
+ t.timestamps
9
+
10
+ t.index :splitter_type
11
+ t.index :created_at
12
+ t.index :dataset_id
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,40 @@
1
+ class CreateEasyMLTunerJobs < ActiveRecord::Migration[<%= ActiveRecord::Migration.current_version %>]
2
+ def change
3
+ create_table :easy_ml_tuner_jobs do |t|
4
+ t.json :config, null: false
5
+ t.bigint :best_tuner_run_id
6
+ t.bigint :model_id, null: false
7
+ t.string :status
8
+ t.string :direction, default: 'minimize'
9
+ t.datetime :started_at
10
+ t.datetime :completed_at
11
+ t.jsonb :metadata
12
+ t.string :wandb_url
13
+
14
+ t.timestamps
15
+
16
+ t.index :status
17
+ t.index :started_at
18
+ t.index :completed_at
19
+ t.index :model_id
20
+ t.index :best_tuner_run_id
21
+ t.index :wandb_url
22
+ end
23
+
24
+ create_table :easy_ml_tuner_runs do |t|
25
+ t.bigint :tuner_job_id, null: false
26
+ t.json :hyperparameters, null: false
27
+ t.float :value
28
+ t.integer :trial_number
29
+ t.string :status
30
+ t.string :wandb_url
31
+
32
+ t.timestamps
33
+
34
+ t.index [:tuner_job_id, :value]
35
+ t.index [:tuner_job_id, :trial_number], name: "idx_tuner_runs_and_trial_number"
36
+ t.index :status
37
+ t.index :wandb_url
38
+ end
39
+ end
40
+ end
@@ -1 +1,5 @@
1
- EST = ActiveSupport::TimeZone.new("America/New_York")
1
+ module EasyML
2
+ module Support
3
+ EST = ActiveSupport::TimeZone.new("America/New_York") unless defined?(EST)
4
+ end
5
+ end
@@ -1,22 +1,86 @@
1
1
  module EasyML
2
- class FileRotate
3
- def initialize(directory, files_to_keep)
4
- @directory = directory
5
- @files_to_keep = files_to_keep
6
- end
2
+ module Support
3
+ class FileRotate
4
+ def initialize(directory, files_to_keep, verbose: false)
5
+ @directory = directory
6
+ @files_to_keep = files_to_keep
7
+ @stats = { checked: 0, kept: 0, deleted: 0 }
8
+ @verbose = verbose
9
+ end
10
+
11
+ def cleanup(allowed_endings = [])
12
+ return unless @directory.present?
13
+
14
+ log "\nStarting file rotation in: #{@directory}"
15
+ log "Files to keep: #{@files_to_keep.count}"
16
+
17
+ process_directory(@directory, allowed_endings)
18
+ cleanup_empty_directories(@directory)
19
+
20
+ log "\nFile rotation complete:"
21
+ log "Files checked: #{@stats[:checked]}"
22
+ log "Files kept: #{@stats[:kept]}"
23
+ log "Files deleted: #{@stats[:deleted]}"
24
+ end
25
+
26
+ private
27
+
28
+ def process_directory(dir, allowed_endings)
29
+ return unless Dir.exist?(dir)
30
+
31
+ log "\nProcessing directory: #{dir}"
7
32
 
8
- def cleanup(allowed_endings = %w[json])
9
- return unless @directory.present?
33
+ if @files_to_keep.include?(dir)
34
+ log " Keeping entire directory: #{dir}"
35
+ @stats[:kept] += 1
36
+ return
37
+ end
10
38
 
11
- allowed_patterns = allowed_endings.map { |ending| File.join(@directory, "**", "*#{ending}") }
12
- files_to_check = allowed_patterns.empty? ? Dir.glob(File.join(@directory, "**/*")) : Dir.glob(allowed_patterns)
13
- # Filter out directories
14
- files_to_check = files_to_check.select { |file| File.file?(file) }
39
+ allowed_patterns = allowed_endings.map { |ending| File.join(dir, "*#{ending}") }
40
+ files_to_check = allowed_patterns.empty? ? Dir.glob(File.join(dir, "*")) : Dir.glob(allowed_patterns)
41
+
42
+ files_to_check.each do |file|
43
+ next unless File.file?(file)
44
+
45
+ @stats[:checked] += 1
46
+
47
+ FileUtils.chown_R(`whoami`.chomp, "staff", file)
48
+ FileUtils.chmod_R(0o777, file)
49
+
50
+ if @files_to_keep.exclude?(file) && File.exist?(file)
51
+ log " Deleting: #{file}"
52
+ File.delete(file)
53
+ @stats[:deleted] += 1
54
+ else
55
+ log " Keeping: #{file}"
56
+ @stats[:kept] += 1
57
+ end
58
+ end
59
+
60
+ Dir.each_child(dir) do |child|
61
+ full_path = File.join(dir, child)
62
+ process_directory(full_path, allowed_endings) if File.directory?(full_path)
63
+ end
64
+ end
65
+
66
+ def cleanup_empty_directories(dir)
67
+ return unless Dir.exist?(dir)
68
+
69
+ Dir.each_child(dir) do |child|
70
+ full_path = File.join(dir, child)
71
+ cleanup_empty_directories(full_path) if File.directory?(full_path)
72
+ end
73
+
74
+ if Dir.empty?(dir) && dir != @directory
75
+ log "Removing empty directory: #{dir}"
76
+ Dir.rmdir(dir)
77
+ end
78
+ rescue Errno::ENOTEMPTY, Errno::EACCES
79
+ # Directory not empty or permission denied - skip it
80
+ end
15
81
 
16
- files_to_check.each do |file|
17
- FileUtils.chown_R(`whoami`.chomp, "staff", file)
18
- FileUtils.chmod_R(0o777, file)
19
- File.delete(file) if @files_to_keep.exclude?(file) && File.exist?(file)
82
+ def log(message)
83
+ puts message if @verbose
20
84
  end
21
85
  end
22
86
  end
@@ -0,0 +1,9 @@
1
+ module EasyML
2
+ module Support
3
+ module FileSupport
4
+ def ensure_directory_exists(dir)
5
+ FileUtils.mkdir_p(dir) unless File.directory?(dir)
6
+ end
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,24 @@
1
+ module EasyML
2
+ module Support
3
+ class LocalFile
4
+ attr_accessor :root_dir, :filename
5
+
6
+ def initialize(options = {})
7
+ @root_dir = options[:root_dir]
8
+ @filename = options[:filename]
9
+ end
10
+
11
+ def upload(file_path)
12
+ file_path
13
+ end
14
+
15
+ def download(full_path)
16
+ full_path
17
+ end
18
+
19
+ def synced?
20
+ true
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,62 @@
1
+ module EasyML
2
+ module Support
3
+ module Lockable
4
+ KEYS_HASH = "easy_ml:lock_keys"
5
+
6
+ def self.unlock!(key)
7
+ suo_client = lock_client(key)
8
+
9
+ suo_client.locks.map(&:last).each do |lock_key|
10
+ suo_client.unlock(lock_key)
11
+ end
12
+ end
13
+
14
+ def self.locked?(key)
15
+ suo_client = lock_client(key)
16
+ suo_client.locked?
17
+ end
18
+
19
+ def self.locks(key)
20
+ suo_client = lock_client(key)
21
+ suo_client.locks
22
+ end
23
+
24
+ def self.lock_client(key, wait_timeout: 0.1, stale_timeout: 60 * 10, resources: 1)
25
+ Suo::Client::Redis.new(prefixed_key(key), {
26
+ acquisition_timeout: wait_timeout,
27
+ stale_lock_expiry: stale_timeout,
28
+ resources: resources,
29
+ client: client,
30
+ })
31
+ end
32
+
33
+ def self.prefixed_key(key)
34
+ "easy_ml:#{key}"
35
+ end
36
+
37
+ # Execute a block with a Redis lock
38
+ def self.with_lock(key, wait_timeout: 0.1, stale_timeout: 60 * 10, resources: 1)
39
+ lock_key = nil
40
+ suo_client = lock_client(key, wait_timeout: wait_timeout, stale_timeout: stale_timeout, resources: resources)
41
+ begin
42
+ lock_key = suo_client.lock
43
+ if lock_key
44
+ yield suo_client
45
+ end
46
+ ensure
47
+ suo_client.unlock(lock_key) if lock_key
48
+ end
49
+ end
50
+
51
+ # Redis client
52
+ def self.client
53
+ @client ||= Redis.new(host: redis_host)
54
+ end
55
+
56
+ # Determine Redis host
57
+ def self.redis_host
58
+ ENV["REDIS_HOST"] || (defined?(Resque) ? Resque.redis.client.host : "localhost")
59
+ end
60
+ end
61
+ end
62
+ end