easy_ml 0.1.4 → 0.2.0.pre.rc1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (239) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +234 -26
  3. data/Rakefile +45 -0
  4. data/app/controllers/easy_ml/application_controller.rb +67 -0
  5. data/app/controllers/easy_ml/columns_controller.rb +38 -0
  6. data/app/controllers/easy_ml/datasets_controller.rb +156 -0
  7. data/app/controllers/easy_ml/datasources_controller.rb +88 -0
  8. data/app/controllers/easy_ml/deploys_controller.rb +20 -0
  9. data/app/controllers/easy_ml/models_controller.rb +151 -0
  10. data/app/controllers/easy_ml/retraining_runs_controller.rb +19 -0
  11. data/app/controllers/easy_ml/settings_controller.rb +59 -0
  12. data/app/frontend/components/AlertProvider.tsx +108 -0
  13. data/app/frontend/components/DatasetPreview.tsx +161 -0
  14. data/app/frontend/components/EmptyState.tsx +28 -0
  15. data/app/frontend/components/ModelCard.tsx +255 -0
  16. data/app/frontend/components/ModelDetails.tsx +334 -0
  17. data/app/frontend/components/ModelForm.tsx +384 -0
  18. data/app/frontend/components/Navigation.tsx +300 -0
  19. data/app/frontend/components/Pagination.tsx +72 -0
  20. data/app/frontend/components/Popover.tsx +55 -0
  21. data/app/frontend/components/PredictionStream.tsx +105 -0
  22. data/app/frontend/components/ScheduleModal.tsx +726 -0
  23. data/app/frontend/components/SearchInput.tsx +23 -0
  24. data/app/frontend/components/SearchableSelect.tsx +132 -0
  25. data/app/frontend/components/dataset/AutosaveIndicator.tsx +39 -0
  26. data/app/frontend/components/dataset/ColumnConfigModal.tsx +431 -0
  27. data/app/frontend/components/dataset/ColumnFilters.tsx +256 -0
  28. data/app/frontend/components/dataset/ColumnList.tsx +101 -0
  29. data/app/frontend/components/dataset/FeatureConfigPopover.tsx +57 -0
  30. data/app/frontend/components/dataset/FeaturePicker.tsx +205 -0
  31. data/app/frontend/components/dataset/PreprocessingConfig.tsx +704 -0
  32. data/app/frontend/components/dataset/SplitConfigurator.tsx +120 -0
  33. data/app/frontend/components/dataset/splitters/DateSplitter.tsx +58 -0
  34. data/app/frontend/components/dataset/splitters/KFoldSplitter.tsx +68 -0
  35. data/app/frontend/components/dataset/splitters/LeavePOutSplitter.tsx +29 -0
  36. data/app/frontend/components/dataset/splitters/PredefinedSplitter.tsx +146 -0
  37. data/app/frontend/components/dataset/splitters/RandomSplitter.tsx +85 -0
  38. data/app/frontend/components/dataset/splitters/StratifiedSplitter.tsx +79 -0
  39. data/app/frontend/components/dataset/splitters/constants.ts +77 -0
  40. data/app/frontend/components/dataset/splitters/types.ts +168 -0
  41. data/app/frontend/components/dataset/splitters/utils.ts +53 -0
  42. data/app/frontend/components/features/CodeEditor.tsx +46 -0
  43. data/app/frontend/components/features/DataPreview.tsx +150 -0
  44. data/app/frontend/components/features/FeatureCard.tsx +88 -0
  45. data/app/frontend/components/features/FeatureForm.tsx +235 -0
  46. data/app/frontend/components/features/FeatureGroupCard.tsx +54 -0
  47. data/app/frontend/components/settings/PluginSettings.tsx +81 -0
  48. data/app/frontend/components/ui/badge.tsx +44 -0
  49. data/app/frontend/components/ui/collapsible.tsx +9 -0
  50. data/app/frontend/components/ui/scroll-area.tsx +46 -0
  51. data/app/frontend/components/ui/separator.tsx +29 -0
  52. data/app/frontend/entrypoints/App.tsx +40 -0
  53. data/app/frontend/entrypoints/Application.tsx +24 -0
  54. data/app/frontend/hooks/useAutosave.ts +61 -0
  55. data/app/frontend/layouts/Layout.tsx +38 -0
  56. data/app/frontend/lib/utils.ts +6 -0
  57. data/app/frontend/mockData.ts +272 -0
  58. data/app/frontend/pages/DatasetDetailsPage.tsx +103 -0
  59. data/app/frontend/pages/DatasetsPage.tsx +261 -0
  60. data/app/frontend/pages/DatasourceFormPage.tsx +147 -0
  61. data/app/frontend/pages/DatasourcesPage.tsx +261 -0
  62. data/app/frontend/pages/EditModelPage.tsx +45 -0
  63. data/app/frontend/pages/EditTransformationPage.tsx +56 -0
  64. data/app/frontend/pages/ModelsPage.tsx +115 -0
  65. data/app/frontend/pages/NewDatasetPage.tsx +366 -0
  66. data/app/frontend/pages/NewModelPage.tsx +45 -0
  67. data/app/frontend/pages/NewTransformationPage.tsx +43 -0
  68. data/app/frontend/pages/SettingsPage.tsx +272 -0
  69. data/app/frontend/pages/ShowModelPage.tsx +30 -0
  70. data/app/frontend/pages/TransformationsPage.tsx +95 -0
  71. data/app/frontend/styles/application.css +100 -0
  72. data/app/frontend/types/dataset.ts +146 -0
  73. data/app/frontend/types/datasource.ts +33 -0
  74. data/app/frontend/types/preprocessing.ts +1 -0
  75. data/app/frontend/types.ts +113 -0
  76. data/app/helpers/easy_ml/application_helper.rb +10 -0
  77. data/app/jobs/easy_ml/application_job.rb +21 -0
  78. data/app/jobs/easy_ml/batch_job.rb +46 -0
  79. data/app/jobs/easy_ml/compute_feature_job.rb +19 -0
  80. data/app/jobs/easy_ml/deploy_job.rb +13 -0
  81. data/app/jobs/easy_ml/finalize_feature_job.rb +15 -0
  82. data/app/jobs/easy_ml/refresh_dataset_job.rb +32 -0
  83. data/app/jobs/easy_ml/schedule_retraining_job.rb +11 -0
  84. data/app/jobs/easy_ml/sync_datasource_job.rb +17 -0
  85. data/app/jobs/easy_ml/training_job.rb +62 -0
  86. data/app/models/easy_ml/adapters/base_adapter.rb +45 -0
  87. data/app/models/easy_ml/adapters/polars_adapter.rb +77 -0
  88. data/app/models/easy_ml/cleaner.rb +82 -0
  89. data/app/models/easy_ml/column.rb +124 -0
  90. data/app/models/easy_ml/column_history.rb +30 -0
  91. data/app/models/easy_ml/column_list.rb +122 -0
  92. data/app/models/easy_ml/concerns/configurable.rb +61 -0
  93. data/app/models/easy_ml/concerns/versionable.rb +19 -0
  94. data/app/models/easy_ml/dataset.rb +767 -0
  95. data/app/models/easy_ml/dataset_history.rb +56 -0
  96. data/app/models/easy_ml/datasource.rb +182 -0
  97. data/app/models/easy_ml/datasource_history.rb +24 -0
  98. data/app/models/easy_ml/datasources/base_datasource.rb +54 -0
  99. data/app/models/easy_ml/datasources/file_datasource.rb +58 -0
  100. data/app/models/easy_ml/datasources/polars_datasource.rb +89 -0
  101. data/app/models/easy_ml/datasources/s3_datasource.rb +97 -0
  102. data/app/models/easy_ml/deploy.rb +114 -0
  103. data/app/models/easy_ml/event.rb +79 -0
  104. data/app/models/easy_ml/feature.rb +437 -0
  105. data/app/models/easy_ml/feature_history.rb +38 -0
  106. data/app/models/easy_ml/model.rb +575 -41
  107. data/app/models/easy_ml/model_file.rb +133 -0
  108. data/app/models/easy_ml/model_file_history.rb +24 -0
  109. data/app/models/easy_ml/model_history.rb +51 -0
  110. data/app/models/easy_ml/models/base_model.rb +58 -0
  111. data/app/models/easy_ml/models/hyperparameters/base.rb +99 -0
  112. data/app/models/easy_ml/models/hyperparameters/xgboost/dart.rb +82 -0
  113. data/app/models/easy_ml/models/hyperparameters/xgboost/gblinear.rb +82 -0
  114. data/app/models/easy_ml/models/hyperparameters/xgboost/gbtree.rb +97 -0
  115. data/app/models/easy_ml/models/hyperparameters/xgboost.rb +71 -0
  116. data/app/models/easy_ml/models/xgboost/evals_callback.rb +138 -0
  117. data/app/models/easy_ml/models/xgboost/progress_callback.rb +39 -0
  118. data/app/models/easy_ml/models/xgboost.rb +544 -5
  119. data/app/models/easy_ml/prediction.rb +44 -0
  120. data/app/models/easy_ml/retraining_job.rb +278 -0
  121. data/app/models/easy_ml/retraining_run.rb +184 -0
  122. data/app/models/easy_ml/settings.rb +37 -0
  123. data/app/models/easy_ml/splitter.rb +90 -0
  124. data/app/models/easy_ml/splitters/base_splitter.rb +28 -0
  125. data/app/models/easy_ml/splitters/date_splitter.rb +91 -0
  126. data/app/models/easy_ml/splitters/predefined_splitter.rb +74 -0
  127. data/app/models/easy_ml/splitters/random_splitter.rb +82 -0
  128. data/app/models/easy_ml/tuner_job.rb +56 -0
  129. data/app/models/easy_ml/tuner_run.rb +31 -0
  130. data/app/models/splitter_history.rb +6 -0
  131. data/app/serializers/easy_ml/column_serializer.rb +27 -0
  132. data/app/serializers/easy_ml/dataset_serializer.rb +73 -0
  133. data/app/serializers/easy_ml/datasource_serializer.rb +64 -0
  134. data/app/serializers/easy_ml/feature_serializer.rb +27 -0
  135. data/app/serializers/easy_ml/model_serializer.rb +90 -0
  136. data/app/serializers/easy_ml/retraining_job_serializer.rb +22 -0
  137. data/app/serializers/easy_ml/retraining_run_serializer.rb +39 -0
  138. data/app/serializers/easy_ml/settings_serializer.rb +9 -0
  139. data/app/views/layouts/easy_ml/application.html.erb +15 -0
  140. data/config/initializers/resque.rb +3 -0
  141. data/config/resque-pool.yml +6 -0
  142. data/config/routes.rb +39 -0
  143. data/config/spring.rb +1 -0
  144. data/config/vite.json +15 -0
  145. data/lib/easy_ml/configuration.rb +64 -0
  146. data/lib/easy_ml/core/evaluators/base_evaluator.rb +53 -0
  147. data/lib/easy_ml/core/evaluators/classification_evaluators.rb +126 -0
  148. data/lib/easy_ml/core/evaluators/regression_evaluators.rb +66 -0
  149. data/lib/easy_ml/core/model_evaluator.rb +161 -89
  150. data/lib/easy_ml/core/tuner/adapters/base_adapter.rb +28 -18
  151. data/lib/easy_ml/core/tuner/adapters/xgboost_adapter.rb +4 -25
  152. data/lib/easy_ml/core/tuner.rb +123 -62
  153. data/lib/easy_ml/core.rb +0 -3
  154. data/lib/easy_ml/core_ext/hash.rb +24 -0
  155. data/lib/easy_ml/core_ext/pathname.rb +11 -5
  156. data/lib/easy_ml/data/date_converter.rb +90 -0
  157. data/lib/easy_ml/data/filter_extensions.rb +31 -0
  158. data/lib/easy_ml/data/polars_column.rb +126 -0
  159. data/lib/easy_ml/data/polars_reader.rb +297 -0
  160. data/lib/easy_ml/data/preprocessor.rb +280 -142
  161. data/lib/easy_ml/data/simple_imputer.rb +255 -0
  162. data/lib/easy_ml/data/splits/file_split.rb +252 -0
  163. data/lib/easy_ml/data/splits/in_memory_split.rb +54 -0
  164. data/lib/easy_ml/data/splits/split.rb +95 -0
  165. data/lib/easy_ml/data/splits.rb +9 -0
  166. data/lib/easy_ml/data/statistics_learner.rb +93 -0
  167. data/lib/easy_ml/data/synced_directory.rb +341 -0
  168. data/lib/easy_ml/data.rb +6 -2
  169. data/lib/easy_ml/engine.rb +105 -6
  170. data/lib/easy_ml/feature_store.rb +227 -0
  171. data/lib/easy_ml/features.rb +61 -0
  172. data/lib/easy_ml/initializers/inflections.rb +17 -3
  173. data/lib/easy_ml/logging.rb +2 -2
  174. data/lib/easy_ml/predict.rb +74 -0
  175. data/lib/easy_ml/railtie/generators/migration/migration_generator.rb +192 -36
  176. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_column_histories.rb.tt +9 -0
  177. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_columns.rb.tt +25 -0
  178. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_dataset_histories.rb.tt +9 -0
  179. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_datasets.rb.tt +31 -0
  180. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_datasource_histories.rb.tt +9 -0
  181. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_datasources.rb.tt +16 -0
  182. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_deploys.rb.tt +24 -0
  183. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_events.rb.tt +20 -0
  184. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_feature_histories.rb.tt +14 -0
  185. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_features.rb.tt +32 -0
  186. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_model_file_histories.rb.tt +9 -0
  187. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_model_files.rb.tt +17 -0
  188. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_model_histories.rb.tt +9 -0
  189. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_models.rb.tt +20 -9
  190. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_predictions.rb.tt +17 -0
  191. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_retraining_jobs.rb.tt +77 -0
  192. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_settings.rb.tt +9 -0
  193. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_splitter_histories.rb.tt +9 -0
  194. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_splitters.rb.tt +15 -0
  195. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_tuner_jobs.rb.tt +40 -0
  196. data/lib/easy_ml/support/est.rb +5 -1
  197. data/lib/easy_ml/support/file_rotate.rb +79 -15
  198. data/lib/easy_ml/support/file_support.rb +9 -0
  199. data/lib/easy_ml/support/local_file.rb +24 -0
  200. data/lib/easy_ml/support/lockable.rb +62 -0
  201. data/lib/easy_ml/support/synced_file.rb +103 -0
  202. data/lib/easy_ml/support/utc.rb +5 -1
  203. data/lib/easy_ml/support.rb +6 -3
  204. data/lib/easy_ml/version.rb +4 -1
  205. data/lib/easy_ml.rb +7 -2
  206. metadata +355 -72
  207. data/app/models/easy_ml/models.rb +0 -5
  208. data/lib/easy_ml/core/model.rb +0 -30
  209. data/lib/easy_ml/core/model_core.rb +0 -181
  210. data/lib/easy_ml/core/models/hyperparameters/base.rb +0 -34
  211. data/lib/easy_ml/core/models/hyperparameters/xgboost.rb +0 -19
  212. data/lib/easy_ml/core/models/xgboost.rb +0 -10
  213. data/lib/easy_ml/core/models/xgboost_core.rb +0 -220
  214. data/lib/easy_ml/core/models.rb +0 -10
  215. data/lib/easy_ml/core/uploaders/model_uploader.rb +0 -24
  216. data/lib/easy_ml/core/uploaders.rb +0 -7
  217. data/lib/easy_ml/data/dataloader.rb +0 -6
  218. data/lib/easy_ml/data/dataset/data/preprocessor/statistics.json +0 -31
  219. data/lib/easy_ml/data/dataset/data/sample_info.json +0 -1
  220. data/lib/easy_ml/data/dataset/dataset/files/sample_info.json +0 -1
  221. data/lib/easy_ml/data/dataset/splits/file_split.rb +0 -140
  222. data/lib/easy_ml/data/dataset/splits/in_memory_split.rb +0 -49
  223. data/lib/easy_ml/data/dataset/splits/split.rb +0 -98
  224. data/lib/easy_ml/data/dataset/splits.rb +0 -11
  225. data/lib/easy_ml/data/dataset/splitters/date_splitter.rb +0 -43
  226. data/lib/easy_ml/data/dataset/splitters.rb +0 -9
  227. data/lib/easy_ml/data/dataset.rb +0 -430
  228. data/lib/easy_ml/data/datasource/datasource_factory.rb +0 -60
  229. data/lib/easy_ml/data/datasource/file_datasource.rb +0 -40
  230. data/lib/easy_ml/data/datasource/merged_datasource.rb +0 -64
  231. data/lib/easy_ml/data/datasource/polars_datasource.rb +0 -41
  232. data/lib/easy_ml/data/datasource/s3_datasource.rb +0 -89
  233. data/lib/easy_ml/data/datasource.rb +0 -33
  234. data/lib/easy_ml/data/preprocessor/preprocessor.rb +0 -205
  235. data/lib/easy_ml/data/preprocessor/simple_imputer.rb +0 -402
  236. data/lib/easy_ml/deployment.rb +0 -5
  237. data/lib/easy_ml/support/synced_directory.rb +0 -134
  238. data/lib/easy_ml/transforms.rb +0 -29
  239. /data/{lib/easy_ml/core → app/models/easy_ml}/models/hyperparameters.rb +0 -0
@@ -0,0 +1,133 @@
1
+ # == Schema Information
2
+ #
3
+ # Table name: easy_ml_model_files
4
+ #
5
+ # id :bigint not null, primary key
6
+ # filename :string not null
7
+ # path :string not null
8
+ # configuration :json
9
+ # model_type :string
10
+ # model_id :bigint
11
+ # created_at :datetime not null
12
+ # updated_at :datetime not null
13
+ #
14
+ module EasyML
15
+ class ModelFile < ActiveRecord::Base
16
+ self.table_name = "easy_ml_model_files"
17
+ include Historiographer::Silent
18
+ historiographer_mode :snapshot_only
19
+
20
+ self.filter_attributes += [:configuration]
21
+
22
+ validates :filename, presence: true
23
+ belongs_to :model, class_name: "EasyML::Model"
24
+
25
+ include EasyML::Concerns::Configurable
26
+ add_configuration_attributes :s3_bucket, :s3_prefix, :s3_region, :s3_access_key_id, :s3_secret_access_key, :root_dir
27
+
28
+ def synced_file
29
+ EasyML::Support::SyncedFile.new(
30
+ filename: filename,
31
+ s3_bucket: s3_bucket,
32
+ s3_prefix: s3_prefix,
33
+ s3_region: s3_region,
34
+ s3_access_key_id: s3_access_key_id,
35
+ s3_secret_access_key: s3_secret_access_key,
36
+ root_dir: root_dir,
37
+ )
38
+ end
39
+
40
+ def exist?
41
+ fit?
42
+ end
43
+
44
+ def fit?
45
+ return false if root_dir.nil?
46
+ return false if full_path.nil?
47
+
48
+ File.exist?(full_path)
49
+ end
50
+
51
+ def read
52
+ File.read(full_path)
53
+ end
54
+
55
+ def upload(path)
56
+ synced_file.upload(path)
57
+ set_path(path)
58
+ end
59
+
60
+ def set_path(path)
61
+ path = get_full_path(path)
62
+ basename = Pathname.new(path).basename.to_s
63
+ unless path.start_with?(full_dir)
64
+ new_path = File.join(full_dir, basename).to_s
65
+ FileUtils.mkdir_p(Pathname.new(new_path).dirname.to_s)
66
+ FileUtils.cp(path, new_path)
67
+ path = new_path
68
+ end
69
+ self.filename = basename
70
+ self.path = get_relative_path(path)
71
+ end
72
+
73
+ def get_full_path(path)
74
+ path = path.to_s
75
+
76
+ path = Rails.root.join(path) unless path.match?(Regexp.new(Rails.root.to_s))
77
+ path
78
+ end
79
+
80
+ def get_relative_path(path)
81
+ path = path.to_s
82
+ path = path.to_s.split(Rails.root.to_s).last
83
+ path.to_s.split("/")[0..-2].reject(&:empty?).join("/")
84
+ end
85
+
86
+ def download
87
+ return unless full_path.present?
88
+
89
+ synced_file.download(full_path) unless File.exist?(full_path)
90
+ full_path
91
+ end
92
+
93
+ def sha
94
+ Digest::SHA256.file(full_path).hexdigest
95
+ end
96
+
97
+ def full_path(filename = nil)
98
+ filename = self.filename if filename.nil?
99
+ return nil if filename.nil?
100
+ return nil if relative_dir.nil?
101
+
102
+ Rails.root.join(relative_dir, filename).to_s
103
+ end
104
+
105
+ def relative_dir
106
+ root_dir.to_s.gsub(Regexp.new(Rails.root.to_s), "").gsub!(%r{^/}, "")
107
+ end
108
+
109
+ def full_dir
110
+ Rails.root.join(relative_dir).to_s
111
+ end
112
+
113
+ def model_root
114
+ File.expand_path("..", full_dir)
115
+ end
116
+
117
+ def cleanup!
118
+ [model_root].each do |dir|
119
+ EasyML::Support::FileRotate.new(dir, []).cleanup(extension_allowlist)
120
+ end
121
+ end
122
+
123
+ def cleanup(files_to_keep)
124
+ [model_root].each do |dir|
125
+ EasyML::Support::FileRotate.new(dir, files_to_keep).cleanup(extension_allowlist)
126
+ end
127
+ end
128
+
129
+ def extension_allowlist
130
+ %w[bin model json]
131
+ end
132
+ end
133
+ end
@@ -0,0 +1,24 @@
1
+ # == Schema Information
2
+ #
3
+ # Table name: easy_ml_model_file_histories
4
+ #
5
+ # id :bigint not null, primary key
6
+ # model_file_id :integer not null
7
+ # filename :string not null
8
+ # path :string not null
9
+ # configuration :json
10
+ # model_type :string
11
+ # model_id :integer
12
+ # created_at :datetime not null
13
+ # updated_at :datetime not null
14
+ # history_started_at :datetime not null
15
+ # history_ended_at :datetime
16
+ # history_user_id :integer
17
+ # snapshot_id :string
18
+ #
19
+ module EasyML
20
+ class ModelFileHistory < ActiveRecord::Base
21
+ self.table_name = "easy_ml_model_file_histories"
22
+ include Historiographer::History
23
+ end
24
+ end
@@ -0,0 +1,51 @@
1
+ # == Schema Information
2
+ #
3
+ # Table name: easy_ml_model_histories
4
+ #
5
+ # id :bigint not null, primary key
6
+ # model_id :integer not null
7
+ # name :string not null
8
+ # model_type :string
9
+ # status :string
10
+ # dataset_id :integer
11
+ # model_file_id :integer
12
+ # configuration :json
13
+ # version :string not null
14
+ # root_dir :string
15
+ # file :json
16
+ # sha :string
17
+ # last_trained_at :datetime
18
+ # is_training :boolean
19
+ # created_at :datetime not null
20
+ # updated_at :datetime not null
21
+ # history_started_at :datetime not null
22
+ # history_ended_at :datetime
23
+ # history_user_id :integer
24
+ # snapshot_id :string
25
+ #
26
+ module EasyML
27
+ class ModelHistory < ActiveRecord::Base
28
+ self.table_name = "easy_ml_model_histories"
29
+ include Historiographer::History
30
+
31
+ scope :deployed, -> {
32
+ where(id: EasyML::Deploy.latest.includes(:model_version).map(&:model_version).compact.map(&:id))
33
+ }
34
+
35
+ def status
36
+ @status ||= if is_deployed?
37
+ :inference
38
+ else
39
+ :retired
40
+ end
41
+ end
42
+
43
+ def is_deployed?
44
+ EasyML::Deploy.latest.where(model_id: model_id)&.first&.model_version&.id == id
45
+ end
46
+
47
+ def fit
48
+ raise "Cannot train inference model"
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,58 @@
1
+ module EasyML
2
+ module Models
3
+ class BaseModel
4
+ include ActiveModel::Validations
5
+ include EasyML::Concerns::Configurable
6
+ include EasyML::Support::FileSupport
7
+
8
+ attr_reader :model
9
+
10
+ add_configuration_attributes :hyperparameters
11
+
12
+ def initialize(model)
13
+ @model = model
14
+ end
15
+
16
+ delegate :task, :dataset, :hyperparameters, to: :model
17
+
18
+ # Required interface methods that subclasses must implement
19
+ def predict(_xs)
20
+ raise NotImplementedError, "#{self.class} must implement #predict"
21
+ end
22
+
23
+ def fit(_x_train = nil)
24
+ raise NotImplementedError, "#{self.class} must implement #fit"
25
+ end
26
+
27
+ def model_changed?
28
+ raise NotImplementedError, "#{self.class} must implement #model_changed?"
29
+ end
30
+
31
+ def feature_importances
32
+ raise NotImplementedError, "#{self.class} must implement #feature_importances"
33
+ end
34
+
35
+ def save_model_file(path)
36
+ raise NotImplementedError, "#{self.class} must implement #save_model_file"
37
+ end
38
+
39
+ def load_model_file(path)
40
+ raise NotImplementedError, "#{self.class} must implement #load_model_file"
41
+ end
42
+
43
+ def loaded?
44
+ raise NotImplementedError, "#{self.class} must implement #loaded?"
45
+ end
46
+
47
+ protected
48
+
49
+ def validate_objective
50
+ raise NotImplementedError, "#{self.class} must implement #validate_objective"
51
+ end
52
+
53
+ def validate_hyperparameters
54
+ raise NotImplementedError, "#{self.class} must implement #validate_hyperparameters"
55
+ end
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,99 @@
1
+ module EasyML
2
+ module Models
3
+ module Hyperparameters
4
+ class Base
5
+ attr_accessor :learning_rate, :max_iterations, :batch_size,
6
+ :regularization, :early_stopping_rounds
7
+
8
+ def initialize(options = {})
9
+ @learning_rate = options[:learning_rate] || 0.01
10
+ @max_iterations = options[:max_iterations] || 100
11
+ @batch_size = options[:batch_size] || 32
12
+ @regularization = options[:regularization] || 0.0001
13
+ @early_stopping_rounds = options[:early_stopping_rounds]
14
+ end
15
+
16
+ def self.common_tree_params
17
+ {
18
+ learning_rate: {
19
+ label: "Learning Rate",
20
+ description: "Step size shrinkage used to prevent overfitting",
21
+ min: 0.001,
22
+ max: 0.3,
23
+ step: 0.001,
24
+ },
25
+ max_depth: {
26
+ label: "Maximum Tree Depth",
27
+ description: "Maximum depth of a tree",
28
+ min: 3,
29
+ max: 10,
30
+ step: 1,
31
+ },
32
+ n_estimators: {
33
+ label: "Number of Trees",
34
+ description: "Number of boosting rounds",
35
+ min: 100,
36
+ max: 1000,
37
+ step: 1,
38
+ },
39
+ early_stopping_rounds: {
40
+ label: "Early Stopping Rounds",
41
+ description: "Number of rounds to check for early stopping",
42
+ min: 1,
43
+ max: 5,
44
+ step: 1,
45
+ },
46
+ }
47
+ end
48
+
49
+ def self.common_regularization_params
50
+ {
51
+ lambda: {
52
+ label: "L2 Regularization",
53
+ description: "L2 regularization term on weights",
54
+ min: 0,
55
+ max: 10,
56
+ step: 0.1,
57
+ },
58
+ alpha: {
59
+ label: "L1 Regularization",
60
+ description: "L1 regularization term on weights",
61
+ min: 0,
62
+ max: 1,
63
+ step: 0.1,
64
+ },
65
+ early_stopping_rounds: {
66
+ label: "Early Stopping Rounds",
67
+ description: "Number of rounds to check for early stopping",
68
+ min: 1,
69
+ max: 5,
70
+ step: 1,
71
+ },
72
+ }
73
+ end
74
+
75
+ def to_h
76
+ instance_variables.each_with_object({}) do |var, hash|
77
+ hash[var.to_s.delete("@").to_sym] = instance_variable_get(var)
78
+ end.with_indifferent_access
79
+ end
80
+
81
+ def merge(other)
82
+ return self if other.nil?
83
+
84
+ other_hash = other.is_a?(Hyperparameters) ? other.to_h : other
85
+ merged_hash = to_h.merge(other_hash)
86
+ self.class.new(**merged_hash)
87
+ end
88
+
89
+ def [](key)
90
+ send(key) if respond_to?(key)
91
+ end
92
+
93
+ def []=(key, value)
94
+ send("#{key}=", value) if respond_to?("#{key}=")
95
+ end
96
+ end
97
+ end
98
+ end
99
+ end
@@ -0,0 +1,82 @@
1
+ module EasyML
2
+ module Models
3
+ module Hyperparameters
4
+ class XGBoost
5
+ class Dart < XGBoost
6
+ attr_accessor :rate_drop, :skip_drop, :sample_type, :normalize_type,
7
+ :subsample, :colsample_bytree
8
+
9
+ def initialize(options = {})
10
+ super
11
+ @rate_drop = options[:rate_drop] || 0.0
12
+ @skip_drop = options[:skip_drop] || 0.0
13
+ @sample_type = options[:sample_type] || "uniform"
14
+ @normalize_type = options[:normalize_type] || "tree"
15
+ @subsample = options[:subsample] || 1.0
16
+ @colsample_bytree = options[:colsample_bytree] || 1.0
17
+ validate!
18
+ end
19
+
20
+ def validate!
21
+ unless %w[uniform weighted].include?(@sample_type)
22
+ raise ArgumentError, "Invalid sample_type: #{@sample_type}"
23
+ end
24
+ unless %w[tree forest].include?(@normalize_type)
25
+ raise ArgumentError, "Invalid normalize_type: #{@normalize_type}"
26
+ end
27
+ end
28
+
29
+ def self.hyperparameter_constants
30
+ # DART uses all tree parameters since it's tree-based
31
+ Base.common_tree_params.merge(Base.common_regularization_params).merge(
32
+ rate_drop: {
33
+ label: "Dropout Rate",
34
+ description: "Dropout rate (a fraction of previous trees to drop)",
35
+ min: 0,
36
+ max: 1,
37
+ step: 0.1,
38
+ },
39
+ skip_drop: {
40
+ label: "Skip Dropout",
41
+ description: "Probability of skipping the dropout procedure during iteration",
42
+ min: 0,
43
+ max: 1,
44
+ step: 0.1,
45
+ },
46
+ sample_type: {
47
+ label: "Sample Type",
48
+ options: [
49
+ {
50
+ value: "uniform",
51
+ label: "Uniform",
52
+ description: "Dropped trees are selected uniformly",
53
+ },
54
+ {
55
+ value: "weighted",
56
+ label: "Weighted",
57
+ description: "Dropped trees are selected in proportion to weight",
58
+ },
59
+ ],
60
+ },
61
+ normalize_type: {
62
+ label: "Normalize Type",
63
+ options: [
64
+ {
65
+ value: "tree",
66
+ label: "Tree",
67
+ description: "New trees have the same weight of dropped trees divided by k",
68
+ },
69
+ {
70
+ value: "forest",
71
+ label: "Forest",
72
+ description: "New trees have the same weight of sum of dropped trees",
73
+ },
74
+ ],
75
+ },
76
+ ).merge(GBTree.hyperparameter_constants) # Include GBTree params since DART is tree-based
77
+ end
78
+ end
79
+ end
80
+ end
81
+ end
82
+ end
@@ -0,0 +1,82 @@
1
+ module EasyML
2
+ module Models
3
+ module Hyperparameters
4
+ class XGBoost
5
+ class GBLinear < XGBoost
6
+ attr_accessor :updater, :feature_selector, :lambda, :alpha
7
+
8
+ def initialize(options = {})
9
+ super
10
+ @updater = options[:updater] || "shotgun"
11
+ @feature_selector = options[:feature_selector] || "cyclic"
12
+ @lambda = options[:lambda] || 1.0
13
+ @alpha = options[:alpha] || 0.0
14
+ validate!
15
+ end
16
+
17
+ def validate!
18
+ unless %w[shotgun coord_descent].include?(@updater)
19
+ raise ArgumentError, "Invalid updater: #{@updater}"
20
+ end
21
+ unless %w[cyclic shuffle greedy thrifty].include?(@feature_selector)
22
+ raise ArgumentError, "Invalid feature_selector: #{@feature_selector}"
23
+ end
24
+ end
25
+
26
+ def self.hyperparameter_constants
27
+ # GBLinear only uses learning_rate from tree params
28
+ { learning_rate: Base.common_tree_params[:learning_rate] }
29
+ .merge(Base.common_regularization_params)
30
+ .merge(
31
+ feature_selector: {
32
+ label: "Feature Selector",
33
+ options: [
34
+ {
35
+ value: "cyclic",
36
+ label: "Cyclic",
37
+ description: "Update features in a cyclic order"
38
+ },
39
+ {
40
+ value: "shuffle",
41
+ label: "Shuffle",
42
+ description: "Update features in a random order"
43
+ },
44
+ {
45
+ value: "random",
46
+ label: "Random",
47
+ description: "Randomly select features to update"
48
+ },
49
+ {
50
+ value: "greedy",
51
+ label: "Greedy",
52
+ description: "Select features with the highest gradient magnitude"
53
+ },
54
+ {
55
+ value: "thrifty",
56
+ label: "Thrifty",
57
+ description: "Thrifty, approximated greedy algorithm"
58
+ }
59
+ ]
60
+ },
61
+ updater: {
62
+ label: "Updater",
63
+ options: [
64
+ {
65
+ value: "shotgun",
66
+ label: "Shotgun",
67
+ description: "Parallel coordinate descent algorithm"
68
+ },
69
+ {
70
+ value: "coord_descent",
71
+ label: "Coordinate Descent",
72
+ description: "Ordinary coordinate descent algorithm"
73
+ }
74
+ ]
75
+ }
76
+ )
77
+ end
78
+ end
79
+ end
80
+ end
81
+ end
82
+ end
@@ -0,0 +1,97 @@
1
+ module EasyML
2
+ module Models
3
+ module Hyperparameters
4
+ class XGBoost
5
+ class GBTree < XGBoost
6
+ attr_accessor :max_depth, :min_child_weight, :max_delta_step, :subsample,
7
+ :colsample_bytree, :colsample_bylevel, :colsample_bynode,
8
+ :tree_method, :gamma, :scale_pos_weight
9
+
10
+ def initialize(options = {})
11
+ super
12
+ @max_depth = options[:max_depth] || 6
13
+ @min_child_weight = options[:min_child_weight] || 1
14
+ @max_delta_step = options[:max_delta_step] || 0
15
+ @subsample = options[:subsample] || 1.0
16
+ @colsample_bytree = options[:colsample_bytree] || 1.0
17
+ @colsample_bylevel = options[:colsample_bylevel] || 1.0
18
+ @colsample_bynode = options[:colsample_bynode] || 1.0
19
+ @tree_method = options[:tree_method] || "auto"
20
+ @gamma = options[:gamma] || 0.0
21
+ @scale_pos_weight = options[:scale_pos_weight] || 1.0
22
+ validate!
23
+ end
24
+
25
+ def validate!
26
+ unless %w[auto exact approx hist gpu_hist].include?(@tree_method)
27
+ raise ArgumentError, "Invalid tree_method: #{@tree_method}"
28
+ end
29
+ end
30
+
31
+ def self.hyperparameter_constants
32
+ Base.common_tree_params.merge(Base.common_regularization_params).merge(
33
+ min_child_weight: {
34
+ label: "Minimum Child Weight",
35
+ description: "Minimum sum of instance weight needed in a child",
36
+ min: 0,
37
+ max: 10,
38
+ step: 0.1,
39
+ },
40
+ gamma: {
41
+ label: "Gamma",
42
+ description: "Minimum loss reduction required to make a further partition",
43
+ min: 0,
44
+ max: 10,
45
+ step: 0.1,
46
+ },
47
+ subsample: {
48
+ label: "Subsample Ratio",
49
+ description: "Subsample ratio of the training instances",
50
+ min: 0.1,
51
+ max: 1,
52
+ step: 0.1,
53
+ },
54
+ colsample_bytree: {
55
+ label: "Column Sample by Tree",
56
+ description: "Subsample ratio of columns when constructing each tree",
57
+ min: 0.1,
58
+ max: 1,
59
+ step: 0.1,
60
+ },
61
+ tree_method: {
62
+ label: "Tree Construction Method",
63
+ options: [
64
+ {
65
+ value: "auto",
66
+ label: "Auto",
67
+ description: "Use heuristic to choose the fastest method",
68
+ },
69
+ {
70
+ value: "exact",
71
+ label: "Exact",
72
+ description: "Exact greedy algorithm",
73
+ },
74
+ {
75
+ value: "approx",
76
+ label: "Approximate",
77
+ description: "Approximate greedy algorithm using sketching and histogram",
78
+ },
79
+ {
80
+ value: "hist",
81
+ label: "Histogram",
82
+ description: "Fast histogram optimized approximate greedy algorithm",
83
+ },
84
+ {
85
+ value: "gpu_hist",
86
+ label: "GPU Histogram",
87
+ description: "GPU implementation of hist algorithm",
88
+ },
89
+ ],
90
+ },
91
+ )
92
+ end
93
+ end
94
+ end
95
+ end
96
+ end
97
+ end
@@ -0,0 +1,71 @@
1
+ require_relative "base"
2
+
3
+ module EasyML
4
+ module Models
5
+ module Hyperparameters
6
+ class XGBoost < Base
7
+ attr_accessor :learning_rate, :max_depth, :n_estimators, :booster,
8
+ :objective, :lambda, :alpha
9
+
10
+ VALID_OBJECTIVES = %w[binary:logistic binary:hinge multi:softmax multi:softprob reg:squarederror reg:logistic].freeze
11
+ VALID_BOOSTERS = %w[gbtree gblinear dart].freeze
12
+
13
+ def initialize(options = {})
14
+ super
15
+ @learning_rate = options[:learning_rate] || 0.1
16
+ @max_depth = options[:max_depth] || 6
17
+ @n_estimators = options[:n_estimators] || 100
18
+ @booster = options[:booster] || "gbtree"
19
+ @objective = options[:objective] || "reg:squarederror"
20
+ @lambda = options[:lambda] || 1.0
21
+ @alpha = options[:alpha] || 0.0
22
+ validate! if self.class.name == "EasyML::Models::Hyperparameters::XGBoost"
23
+ end
24
+
25
+ def validate!
26
+ unless VALID_OBJECTIVES.include?(@objective)
27
+ raise ArgumentError, "Invalid objective. Must be one of: #{VALID_OBJECTIVES.join(", ")}"
28
+ end
29
+ unless VALID_BOOSTERS.include?(@booster)
30
+ raise ArgumentError, "Invalid booster. Must be one of: #{VALID_BOOSTERS.join(", ")}"
31
+ end
32
+ end
33
+
34
+ def self.hyperparameter_constants
35
+ {
36
+ booster: {
37
+ label: "XGBoost Booster",
38
+ options: [
39
+ {
40
+ value: "gbtree",
41
+ label: "Gradient Boosted Tree",
42
+ description: "Traditional Gradient Boosting Decision Tree",
43
+ },
44
+ {
45
+ value: "gblinear",
46
+ label: "Gradient Boosted Linear",
47
+ description: "Generalized Linear Model with gradient boosting",
48
+ },
49
+ {
50
+ value: "dart",
51
+ label: "DART",
52
+ description: "Dropouts meet Multiple Additive Regression Trees",
53
+ },
54
+ ],
55
+ },
56
+ hyperparameters: {
57
+ depends_on: "booster",
58
+ gbtree: GBTree.hyperparameter_constants,
59
+ gblinear: GBLinear.hyperparameter_constants,
60
+ dart: Dart.hyperparameter_constants,
61
+ },
62
+ }
63
+ end
64
+ end
65
+ end
66
+ end
67
+ end
68
+
69
+ require_relative "xgboost/gbtree"
70
+ require_relative "xgboost/gblinear"
71
+ require_relative "xgboost/dart"