easy_ml 0.1.3 → 0.2.0.pre.rc1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (239) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +234 -26
  3. data/Rakefile +45 -0
  4. data/app/controllers/easy_ml/application_controller.rb +67 -0
  5. data/app/controllers/easy_ml/columns_controller.rb +38 -0
  6. data/app/controllers/easy_ml/datasets_controller.rb +156 -0
  7. data/app/controllers/easy_ml/datasources_controller.rb +88 -0
  8. data/app/controllers/easy_ml/deploys_controller.rb +20 -0
  9. data/app/controllers/easy_ml/models_controller.rb +151 -0
  10. data/app/controllers/easy_ml/retraining_runs_controller.rb +19 -0
  11. data/app/controllers/easy_ml/settings_controller.rb +59 -0
  12. data/app/frontend/components/AlertProvider.tsx +108 -0
  13. data/app/frontend/components/DatasetPreview.tsx +161 -0
  14. data/app/frontend/components/EmptyState.tsx +28 -0
  15. data/app/frontend/components/ModelCard.tsx +255 -0
  16. data/app/frontend/components/ModelDetails.tsx +334 -0
  17. data/app/frontend/components/ModelForm.tsx +384 -0
  18. data/app/frontend/components/Navigation.tsx +300 -0
  19. data/app/frontend/components/Pagination.tsx +72 -0
  20. data/app/frontend/components/Popover.tsx +55 -0
  21. data/app/frontend/components/PredictionStream.tsx +105 -0
  22. data/app/frontend/components/ScheduleModal.tsx +726 -0
  23. data/app/frontend/components/SearchInput.tsx +23 -0
  24. data/app/frontend/components/SearchableSelect.tsx +132 -0
  25. data/app/frontend/components/dataset/AutosaveIndicator.tsx +39 -0
  26. data/app/frontend/components/dataset/ColumnConfigModal.tsx +431 -0
  27. data/app/frontend/components/dataset/ColumnFilters.tsx +256 -0
  28. data/app/frontend/components/dataset/ColumnList.tsx +101 -0
  29. data/app/frontend/components/dataset/FeatureConfigPopover.tsx +57 -0
  30. data/app/frontend/components/dataset/FeaturePicker.tsx +205 -0
  31. data/app/frontend/components/dataset/PreprocessingConfig.tsx +704 -0
  32. data/app/frontend/components/dataset/SplitConfigurator.tsx +120 -0
  33. data/app/frontend/components/dataset/splitters/DateSplitter.tsx +58 -0
  34. data/app/frontend/components/dataset/splitters/KFoldSplitter.tsx +68 -0
  35. data/app/frontend/components/dataset/splitters/LeavePOutSplitter.tsx +29 -0
  36. data/app/frontend/components/dataset/splitters/PredefinedSplitter.tsx +146 -0
  37. data/app/frontend/components/dataset/splitters/RandomSplitter.tsx +85 -0
  38. data/app/frontend/components/dataset/splitters/StratifiedSplitter.tsx +79 -0
  39. data/app/frontend/components/dataset/splitters/constants.ts +77 -0
  40. data/app/frontend/components/dataset/splitters/types.ts +168 -0
  41. data/app/frontend/components/dataset/splitters/utils.ts +53 -0
  42. data/app/frontend/components/features/CodeEditor.tsx +46 -0
  43. data/app/frontend/components/features/DataPreview.tsx +150 -0
  44. data/app/frontend/components/features/FeatureCard.tsx +88 -0
  45. data/app/frontend/components/features/FeatureForm.tsx +235 -0
  46. data/app/frontend/components/features/FeatureGroupCard.tsx +54 -0
  47. data/app/frontend/components/settings/PluginSettings.tsx +81 -0
  48. data/app/frontend/components/ui/badge.tsx +44 -0
  49. data/app/frontend/components/ui/collapsible.tsx +9 -0
  50. data/app/frontend/components/ui/scroll-area.tsx +46 -0
  51. data/app/frontend/components/ui/separator.tsx +29 -0
  52. data/app/frontend/entrypoints/App.tsx +40 -0
  53. data/app/frontend/entrypoints/Application.tsx +24 -0
  54. data/app/frontend/hooks/useAutosave.ts +61 -0
  55. data/app/frontend/layouts/Layout.tsx +38 -0
  56. data/app/frontend/lib/utils.ts +6 -0
  57. data/app/frontend/mockData.ts +272 -0
  58. data/app/frontend/pages/DatasetDetailsPage.tsx +103 -0
  59. data/app/frontend/pages/DatasetsPage.tsx +261 -0
  60. data/app/frontend/pages/DatasourceFormPage.tsx +147 -0
  61. data/app/frontend/pages/DatasourcesPage.tsx +261 -0
  62. data/app/frontend/pages/EditModelPage.tsx +45 -0
  63. data/app/frontend/pages/EditTransformationPage.tsx +56 -0
  64. data/app/frontend/pages/ModelsPage.tsx +115 -0
  65. data/app/frontend/pages/NewDatasetPage.tsx +366 -0
  66. data/app/frontend/pages/NewModelPage.tsx +45 -0
  67. data/app/frontend/pages/NewTransformationPage.tsx +43 -0
  68. data/app/frontend/pages/SettingsPage.tsx +272 -0
  69. data/app/frontend/pages/ShowModelPage.tsx +30 -0
  70. data/app/frontend/pages/TransformationsPage.tsx +95 -0
  71. data/app/frontend/styles/application.css +100 -0
  72. data/app/frontend/types/dataset.ts +146 -0
  73. data/app/frontend/types/datasource.ts +33 -0
  74. data/app/frontend/types/preprocessing.ts +1 -0
  75. data/app/frontend/types.ts +113 -0
  76. data/app/helpers/easy_ml/application_helper.rb +10 -0
  77. data/app/jobs/easy_ml/application_job.rb +21 -0
  78. data/app/jobs/easy_ml/batch_job.rb +46 -0
  79. data/app/jobs/easy_ml/compute_feature_job.rb +19 -0
  80. data/app/jobs/easy_ml/deploy_job.rb +13 -0
  81. data/app/jobs/easy_ml/finalize_feature_job.rb +15 -0
  82. data/app/jobs/easy_ml/refresh_dataset_job.rb +32 -0
  83. data/app/jobs/easy_ml/schedule_retraining_job.rb +11 -0
  84. data/app/jobs/easy_ml/sync_datasource_job.rb +17 -0
  85. data/app/jobs/easy_ml/training_job.rb +62 -0
  86. data/app/models/easy_ml/adapters/base_adapter.rb +45 -0
  87. data/app/models/easy_ml/adapters/polars_adapter.rb +77 -0
  88. data/app/models/easy_ml/cleaner.rb +82 -0
  89. data/app/models/easy_ml/column.rb +124 -0
  90. data/app/models/easy_ml/column_history.rb +30 -0
  91. data/app/models/easy_ml/column_list.rb +122 -0
  92. data/app/models/easy_ml/concerns/configurable.rb +61 -0
  93. data/app/models/easy_ml/concerns/versionable.rb +19 -0
  94. data/app/models/easy_ml/dataset.rb +767 -0
  95. data/app/models/easy_ml/dataset_history.rb +56 -0
  96. data/app/models/easy_ml/datasource.rb +182 -0
  97. data/app/models/easy_ml/datasource_history.rb +24 -0
  98. data/app/models/easy_ml/datasources/base_datasource.rb +54 -0
  99. data/app/models/easy_ml/datasources/file_datasource.rb +58 -0
  100. data/app/models/easy_ml/datasources/polars_datasource.rb +89 -0
  101. data/app/models/easy_ml/datasources/s3_datasource.rb +97 -0
  102. data/app/models/easy_ml/deploy.rb +114 -0
  103. data/app/models/easy_ml/event.rb +79 -0
  104. data/app/models/easy_ml/feature.rb +437 -0
  105. data/app/models/easy_ml/feature_history.rb +38 -0
  106. data/app/models/easy_ml/model.rb +575 -41
  107. data/app/models/easy_ml/model_file.rb +133 -0
  108. data/app/models/easy_ml/model_file_history.rb +24 -0
  109. data/app/models/easy_ml/model_history.rb +51 -0
  110. data/app/models/easy_ml/models/base_model.rb +58 -0
  111. data/app/models/easy_ml/models/hyperparameters/base.rb +99 -0
  112. data/app/models/easy_ml/models/hyperparameters/xgboost/dart.rb +82 -0
  113. data/app/models/easy_ml/models/hyperparameters/xgboost/gblinear.rb +82 -0
  114. data/app/models/easy_ml/models/hyperparameters/xgboost/gbtree.rb +97 -0
  115. data/app/models/easy_ml/models/hyperparameters/xgboost.rb +71 -0
  116. data/app/models/easy_ml/models/xgboost/evals_callback.rb +138 -0
  117. data/app/models/easy_ml/models/xgboost/progress_callback.rb +39 -0
  118. data/app/models/easy_ml/models/xgboost.rb +544 -4
  119. data/app/models/easy_ml/prediction.rb +44 -0
  120. data/app/models/easy_ml/retraining_job.rb +278 -0
  121. data/app/models/easy_ml/retraining_run.rb +184 -0
  122. data/app/models/easy_ml/settings.rb +37 -0
  123. data/app/models/easy_ml/splitter.rb +90 -0
  124. data/app/models/easy_ml/splitters/base_splitter.rb +28 -0
  125. data/app/models/easy_ml/splitters/date_splitter.rb +91 -0
  126. data/app/models/easy_ml/splitters/predefined_splitter.rb +74 -0
  127. data/app/models/easy_ml/splitters/random_splitter.rb +82 -0
  128. data/app/models/easy_ml/tuner_job.rb +56 -0
  129. data/app/models/easy_ml/tuner_run.rb +31 -0
  130. data/app/models/splitter_history.rb +6 -0
  131. data/app/serializers/easy_ml/column_serializer.rb +27 -0
  132. data/app/serializers/easy_ml/dataset_serializer.rb +73 -0
  133. data/app/serializers/easy_ml/datasource_serializer.rb +64 -0
  134. data/app/serializers/easy_ml/feature_serializer.rb +27 -0
  135. data/app/serializers/easy_ml/model_serializer.rb +90 -0
  136. data/app/serializers/easy_ml/retraining_job_serializer.rb +22 -0
  137. data/app/serializers/easy_ml/retraining_run_serializer.rb +39 -0
  138. data/app/serializers/easy_ml/settings_serializer.rb +9 -0
  139. data/app/views/layouts/easy_ml/application.html.erb +15 -0
  140. data/config/initializers/resque.rb +3 -0
  141. data/config/resque-pool.yml +6 -0
  142. data/config/routes.rb +39 -0
  143. data/config/spring.rb +1 -0
  144. data/config/vite.json +15 -0
  145. data/lib/easy_ml/configuration.rb +64 -0
  146. data/lib/easy_ml/core/evaluators/base_evaluator.rb +53 -0
  147. data/lib/easy_ml/core/evaluators/classification_evaluators.rb +126 -0
  148. data/lib/easy_ml/core/evaluators/regression_evaluators.rb +66 -0
  149. data/lib/easy_ml/core/model_evaluator.rb +161 -89
  150. data/lib/easy_ml/core/tuner/adapters/base_adapter.rb +28 -18
  151. data/lib/easy_ml/core/tuner/adapters/xgboost_adapter.rb +4 -25
  152. data/lib/easy_ml/core/tuner.rb +123 -62
  153. data/lib/easy_ml/core.rb +0 -3
  154. data/lib/easy_ml/core_ext/hash.rb +24 -0
  155. data/lib/easy_ml/core_ext/pathname.rb +11 -5
  156. data/lib/easy_ml/data/date_converter.rb +90 -0
  157. data/lib/easy_ml/data/filter_extensions.rb +31 -0
  158. data/lib/easy_ml/data/polars_column.rb +126 -0
  159. data/lib/easy_ml/data/polars_reader.rb +297 -0
  160. data/lib/easy_ml/data/preprocessor.rb +280 -142
  161. data/lib/easy_ml/data/simple_imputer.rb +255 -0
  162. data/lib/easy_ml/data/splits/file_split.rb +252 -0
  163. data/lib/easy_ml/data/splits/in_memory_split.rb +54 -0
  164. data/lib/easy_ml/data/splits/split.rb +95 -0
  165. data/lib/easy_ml/data/splits.rb +9 -0
  166. data/lib/easy_ml/data/statistics_learner.rb +93 -0
  167. data/lib/easy_ml/data/synced_directory.rb +341 -0
  168. data/lib/easy_ml/data.rb +6 -2
  169. data/lib/easy_ml/engine.rb +105 -6
  170. data/lib/easy_ml/feature_store.rb +227 -0
  171. data/lib/easy_ml/features.rb +61 -0
  172. data/lib/easy_ml/initializers/inflections.rb +17 -3
  173. data/lib/easy_ml/logging.rb +2 -2
  174. data/lib/easy_ml/predict.rb +74 -0
  175. data/lib/easy_ml/railtie/generators/migration/migration_generator.rb +192 -36
  176. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_column_histories.rb.tt +9 -0
  177. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_columns.rb.tt +25 -0
  178. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_dataset_histories.rb.tt +9 -0
  179. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_datasets.rb.tt +31 -0
  180. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_datasource_histories.rb.tt +9 -0
  181. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_datasources.rb.tt +16 -0
  182. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_deploys.rb.tt +24 -0
  183. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_events.rb.tt +20 -0
  184. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_feature_histories.rb.tt +14 -0
  185. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_features.rb.tt +32 -0
  186. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_model_file_histories.rb.tt +9 -0
  187. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_model_files.rb.tt +17 -0
  188. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_model_histories.rb.tt +9 -0
  189. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_models.rb.tt +20 -9
  190. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_predictions.rb.tt +17 -0
  191. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_retraining_jobs.rb.tt +77 -0
  192. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_settings.rb.tt +9 -0
  193. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_splitter_histories.rb.tt +9 -0
  194. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_splitters.rb.tt +15 -0
  195. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_tuner_jobs.rb.tt +40 -0
  196. data/lib/easy_ml/support/est.rb +5 -1
  197. data/lib/easy_ml/support/file_rotate.rb +79 -15
  198. data/lib/easy_ml/support/file_support.rb +9 -0
  199. data/lib/easy_ml/support/local_file.rb +24 -0
  200. data/lib/easy_ml/support/lockable.rb +62 -0
  201. data/lib/easy_ml/support/synced_file.rb +103 -0
  202. data/lib/easy_ml/support/utc.rb +5 -1
  203. data/lib/easy_ml/support.rb +6 -3
  204. data/lib/easy_ml/version.rb +4 -1
  205. data/lib/easy_ml.rb +7 -2
  206. metadata +355 -72
  207. data/app/models/easy_ml/models.rb +0 -5
  208. data/lib/easy_ml/core/model.rb +0 -30
  209. data/lib/easy_ml/core/model_core.rb +0 -181
  210. data/lib/easy_ml/core/models/hyperparameters/base.rb +0 -34
  211. data/lib/easy_ml/core/models/hyperparameters/xgboost.rb +0 -19
  212. data/lib/easy_ml/core/models/xgboost.rb +0 -10
  213. data/lib/easy_ml/core/models/xgboost_core.rb +0 -220
  214. data/lib/easy_ml/core/models.rb +0 -10
  215. data/lib/easy_ml/core/uploaders/model_uploader.rb +0 -24
  216. data/lib/easy_ml/core/uploaders.rb +0 -7
  217. data/lib/easy_ml/data/dataloader.rb +0 -6
  218. data/lib/easy_ml/data/dataset/data/preprocessor/statistics.json +0 -31
  219. data/lib/easy_ml/data/dataset/data/sample_info.json +0 -1
  220. data/lib/easy_ml/data/dataset/dataset/files/sample_info.json +0 -1
  221. data/lib/easy_ml/data/dataset/splits/file_split.rb +0 -140
  222. data/lib/easy_ml/data/dataset/splits/in_memory_split.rb +0 -49
  223. data/lib/easy_ml/data/dataset/splits/split.rb +0 -98
  224. data/lib/easy_ml/data/dataset/splits.rb +0 -11
  225. data/lib/easy_ml/data/dataset/splitters/date_splitter.rb +0 -43
  226. data/lib/easy_ml/data/dataset/splitters.rb +0 -9
  227. data/lib/easy_ml/data/dataset.rb +0 -430
  228. data/lib/easy_ml/data/datasource/datasource_factory.rb +0 -60
  229. data/lib/easy_ml/data/datasource/file_datasource.rb +0 -40
  230. data/lib/easy_ml/data/datasource/merged_datasource.rb +0 -64
  231. data/lib/easy_ml/data/datasource/polars_datasource.rb +0 -41
  232. data/lib/easy_ml/data/datasource/s3_datasource.rb +0 -89
  233. data/lib/easy_ml/data/datasource.rb +0 -33
  234. data/lib/easy_ml/data/preprocessor/preprocessor.rb +0 -205
  235. data/lib/easy_ml/data/preprocessor/simple_imputer.rb +0 -402
  236. data/lib/easy_ml/deployment.rb +0 -5
  237. data/lib/easy_ml/support/synced_directory.rb +0 -134
  238. data/lib/easy_ml/transforms.rb +0 -29
  239. /data/{lib/easy_ml/core → app/models/easy_ml}/models/hyperparameters.rb +0 -0
@@ -0,0 +1,272 @@
1
+ import type { Model, RetrainingJob, RetrainingRun, Dataset, Prediction, Feature, FeatureGroup } from './types';
2
+
3
+ // Helper function to generate dates
4
+ const daysAgo = (days: number) => {
5
+ const date = new Date();
6
+ date.setDate(date.getDate() - days);
7
+ return date.toISOString();
8
+ };
9
+
10
+ export const mockDatasets: Dataset[] = [
11
+ {
12
+ id: 1,
13
+ name: 'Customer Churn Dataset',
14
+ description: 'Historical customer data for churn prediction',
15
+ columns: [
16
+ {
17
+ name: 'usage_days',
18
+ type: 'numeric',
19
+ description: 'Number of days customer has used the product',
20
+ statistics: {
21
+ mean: 145.7,
22
+ median: 130,
23
+ min: 1,
24
+ max: 365,
25
+ nullCount: 0
26
+ }
27
+ },
28
+ {
29
+ name: 'total_spend',
30
+ type: 'numeric',
31
+ description: 'Total customer spend in USD',
32
+ statistics: {
33
+ mean: 487.32,
34
+ median: 425.50,
35
+ min: 0,
36
+ max: 2500.00,
37
+ nullCount: 1250
38
+ }
39
+ },
40
+ {
41
+ name: 'support_tickets',
42
+ type: 'numeric',
43
+ description: 'Number of support tickets opened',
44
+ statistics: {
45
+ mean: 2.3,
46
+ median: 1,
47
+ min: 0,
48
+ max: 15,
49
+ nullCount: 3750
50
+ }
51
+ },
52
+ {
53
+ name: 'subscription_tier',
54
+ type: 'categorical',
55
+ description: 'Customer subscription level',
56
+ statistics: {
57
+ uniqueCount: 3,
58
+ nullCount: 125
59
+ }
60
+ },
61
+ {
62
+ name: 'last_login',
63
+ type: 'datetime',
64
+ description: 'Last time the customer logged in',
65
+ statistics: {
66
+ nullCount: 5000
67
+ }
68
+ }
69
+ ],
70
+ sampleData: [
71
+ {
72
+ usage_days: 234,
73
+ total_spend: 567.89,
74
+ support_tickets: 1,
75
+ subscription_tier: 'premium',
76
+ last_login: '2024-03-01'
77
+ },
78
+ {
79
+ usage_days: 45,
80
+ total_spend: null,
81
+ support_tickets: null,
82
+ subscription_tier: 'basic',
83
+ last_login: null
84
+ }
85
+ ],
86
+ rowCount: 25000,
87
+ updatedAt: '2024-03-10T12:00:00Z'
88
+ }
89
+ ];
90
+
91
+ export const mockModels: Model[] = [
92
+ {
93
+ id: 1,
94
+ name: 'Customer Churn Predictor',
95
+ modelType: 'classification',
96
+ status: 'completed',
97
+ deploymentStatus: 'inference',
98
+ deployed: true,
99
+ datasetId: 1,
100
+ configuration: {
101
+ algorithm: 'xgboost',
102
+ features: ['usage_days', 'total_spend', 'support_tickets'],
103
+ objective: 'binary:logistic',
104
+ metrics: ['accuracy', 'f1']
105
+ },
106
+ version: '2.1.0',
107
+ rootDir: '/models/churn_predictor',
108
+ file: { path: 'model.joblib' },
109
+ createdAt: daysAgo(30),
110
+ updatedAt: daysAgo(0)
111
+ }
112
+ ];
113
+
114
+ export const mockRetrainingJobs: RetrainingJob[] = [
115
+ {
116
+ id: 1,
117
+ model: 'Customer Churn Predictor',
118
+ frequency: 'daily',
119
+ at: 2,
120
+ evaluator: {
121
+ metric: 'f1_score',
122
+ threshold: 0.85,
123
+ direction: 'maximize'
124
+ },
125
+ tunerConfig: {
126
+ trials: 10,
127
+ metrics: ['f1_score'],
128
+ parameters: {
129
+ max_depth: { min: 3, max: 10 },
130
+ learning_rate: { min: 0.01, max: 0.1 }
131
+ }
132
+ },
133
+ tuningFrequency: 'weekly',
134
+ lastTuningAt: daysAgo(7),
135
+ active: true,
136
+ status: 'completed',
137
+ lastRunAt: daysAgo(1),
138
+ lockedAt: null,
139
+ createdAt: daysAgo(30),
140
+ updatedAt: daysAgo(0)
141
+ }
142
+ ];
143
+
144
+ export const mockRetrainingRuns: RetrainingRun[] = [
145
+ {
146
+ id: 1,
147
+ modelId: 1,
148
+ retrainingJobId: 1,
149
+ tunerJobId: null,
150
+ status: 'completed',
151
+ metricValue: 0.89,
152
+ threshold: 0.85,
153
+ thresholdDirection: 'maximize',
154
+ shoulddeploy: true,
155
+ startedAt: daysAgo(1),
156
+ completedAt: daysAgo(1),
157
+ errorMessage: null,
158
+ metadata: {
159
+ metrics: {
160
+ accuracy: 0.92,
161
+ precision: 0.88,
162
+ recall: 0.90,
163
+ f1: 0.89
164
+ },
165
+ parameters: {
166
+ max_depth: 6,
167
+ learning_rate: 0.05
168
+ }
169
+ },
170
+ createdAt: daysAgo(1),
171
+ updatedAt: daysAgo(1)
172
+ },
173
+ {
174
+ id: 2,
175
+ modelId: 1,
176
+ retrainingJobId: 1,
177
+ tunerJobId: 1,
178
+ status: 'completed',
179
+ metricValue: 0.86,
180
+ threshold: 0.85,
181
+ thresholdDirection: 'maximize',
182
+ shoulddeploy: true,
183
+ startedAt: daysAgo(2),
184
+ completedAt: daysAgo(2),
185
+ errorMessage: null,
186
+ metadata: {
187
+ metrics: {
188
+ accuracy: 0.90,
189
+ precision: 0.85,
190
+ recall: 0.87,
191
+ f1: 0.86
192
+ },
193
+ parameters: {
194
+ max_depth: 5,
195
+ learning_rate: 0.03
196
+ }
197
+ },
198
+ createdAt: daysAgo(2),
199
+ updatedAt: daysAgo(2)
200
+ },
201
+ {
202
+ id: 3,
203
+ modelId: 1,
204
+ retrainingJobId: 1,
205
+ tunerJobId: null,
206
+ status: 'failed',
207
+ metricValue: null,
208
+ threshold: 0.85,
209
+ thresholdDirection: 'maximize',
210
+ shoulddeploy: false,
211
+ startedAt: daysAgo(3),
212
+ completedAt: daysAgo(3),
213
+ errorMessage: 'Training failed due to insufficient memory',
214
+ metadata: null,
215
+ createdAt: daysAgo(3),
216
+ updatedAt: daysAgo(3)
217
+ },
218
+ {
219
+ id: 4,
220
+ modelId: 1,
221
+ retrainingJobId: 1,
222
+ tunerJobId: null,
223
+ status: 'completed',
224
+ metricValue: 0.83,
225
+ threshold: 0.85,
226
+ thresholdDirection: 'maximize',
227
+ shoulddeploy: false,
228
+ startedAt: daysAgo(4),
229
+ completedAt: daysAgo(4),
230
+ errorMessage: null,
231
+ metadata: {
232
+ metrics: {
233
+ accuracy: 0.87,
234
+ precision: 0.82,
235
+ recall: 0.84,
236
+ f1: 0.83
237
+ },
238
+ parameters: {
239
+ max_depth: 4,
240
+ learning_rate: 0.02
241
+ }
242
+ },
243
+ createdAt: daysAgo(4),
244
+ updatedAt: daysAgo(4)
245
+ }
246
+ ];
247
+
248
+ export const mockFeatures: Feature[] = [
249
+ {
250
+ id: 1,
251
+ name: 'Normalize state',
252
+ description: 'Turn state names into 2 letter state abbreviations',
253
+ groupId: 1,
254
+ testDatasetId: 1,
255
+ inputColumns: ["state"],
256
+ outputColumns: ["state"],
257
+ code: "",
258
+ createdAt: daysAgo(30),
259
+ updatedAt: daysAgo(0)
260
+ }
261
+ ];
262
+
263
+ export const mockFeatureGroups: FeatureGroup[] = [
264
+ {
265
+ id: 1,
266
+ name: 'Customer Churn',
267
+ description: 'Features for customer churn dataset',
268
+ features: mockFeatures,
269
+ createdAt: daysAgo(30),
270
+ updatedAt: daysAgo(0)
271
+ }
272
+ ];
@@ -0,0 +1,103 @@
1
+ import React, { useState, useCallback } from 'react';
2
+ import { usePage, router } from '@inertiajs/react';
3
+ import { Settings } from 'lucide-react';
4
+ import { isEqual } from 'lodash';
5
+ import { DatasetPreview } from '../components/DatasetPreview';
6
+ import { ColumnConfigModal } from '../components/dataset/ColumnConfigModal';
7
+ import type { Dataset, Column } from '../types/dataset';
8
+ import type { PreprocessingConstants } from '../types';
9
+
10
+ interface Props {
11
+ dataset: Dataset;
12
+ constants: PreprocessingConstants;
13
+ }
14
+
15
+ export default function DatasetDetailsPage({ dataset, constants }: Props) {
16
+ const [showColumnConfig, setShowColumnConfig] = useState(false);
17
+ const [currentDataset, setCurrentDataset] = useState<Dataset>(dataset);
18
+ const { rootPath } = usePage().props;
19
+
20
+ const onSave = useCallback((updatedDataset: Dataset) => {
21
+ // Find dataset-level changes
22
+ const datasetChanges = Object.entries(updatedDataset).reduce((acc, [key, value]) => {
23
+ if (key !== 'columns' && key !== 'features' && !isEqual(currentDataset[key as keyof Dataset], value)) {
24
+ acc[key as keyof Dataset] = value;
25
+ }
26
+ return acc;
27
+ }, {} as Partial<Dataset>);
28
+
29
+ // Find column changes
30
+ const columnChanges = updatedDataset.columns.reduce((acc, newColumn) => {
31
+ const oldColumn = currentDataset.columns.find(c => c.id === newColumn.id);
32
+
33
+ if (!oldColumn || !isEqual(oldColumn, newColumn)) {
34
+ const changedFields = Object.entries(newColumn).reduce((fields, [key, value]) => {
35
+ if (!oldColumn || !isEqual(oldColumn[key as keyof Column], value)) {
36
+ fields[key] = value;
37
+ }
38
+ return fields;
39
+ }, {} as Record<string, any>);
40
+
41
+ if (Object.keys(changedFields).length > 0) {
42
+ acc[newColumn.id] = {
43
+ ...changedFields,
44
+ id: newColumn.id
45
+ };
46
+ }
47
+ }
48
+ return acc;
49
+ }, {} as Record<number, Record<string, any>>);
50
+
51
+ // Format features for nested attributes
52
+ const transformChanges = updatedDataset.features?.map((feature, index) => ({
53
+ id: feature.id,
54
+ name: feature.name,
55
+ feature_class: feature.feature_class,
56
+ feature_position: index,
57
+ _destroy: feature._destroy
58
+ }));
59
+
60
+ // Only make the API call if there are actual changes
61
+ if (Object.keys(datasetChanges).length > 0 ||
62
+ Object.keys(columnChanges).length > 0 ||
63
+ !isEqual(currentDataset.features, updatedDataset.features)) {
64
+ router.patch(`${rootPath}/datasets/${dataset.id}`, {
65
+ dataset: {
66
+ ...datasetChanges,
67
+ columns_attributes: columnChanges,
68
+ features_attributes: transformChanges
69
+ }
70
+ }, {
71
+ preserveState: true,
72
+ preserveScroll: true
73
+ });
74
+ }
75
+
76
+ // Update local state
77
+ setCurrentDataset(updatedDataset);
78
+ }, [currentDataset, dataset.id, rootPath]);
79
+
80
+ return (
81
+ <div className="p-8 space-y-6">
82
+ <div className="flex justify-end">
83
+ <button
84
+ onClick={() => setShowColumnConfig(true)}
85
+ className="flex items-center gap-2 px-4 py-2 bg-white border border-gray-300 rounded-md text-sm font-medium text-gray-700 hover:bg-gray-50"
86
+ >
87
+ <Settings className="w-4 h-4" />
88
+ Configure Columns
89
+ </button>
90
+ </div>
91
+
92
+ <DatasetPreview dataset={currentDataset} />
93
+
94
+ <ColumnConfigModal
95
+ isOpen={showColumnConfig}
96
+ onClose={() => setShowColumnConfig(false)}
97
+ initialDataset={currentDataset}
98
+ constants={constants}
99
+ onSave={onSave}
100
+ />
101
+ </div>
102
+ );
103
+ }
@@ -0,0 +1,261 @@
1
+ import React, { useState, useMemo, useEffect } from 'react';
2
+ import { Link, usePage, router } from '@inertiajs/react';
3
+ import { Database, Plus, Trash2, ExternalLink, Loader2, AlertCircle, ChevronDown, ChevronUp } from 'lucide-react';
4
+ import { EmptyState } from '../components/EmptyState';
5
+ import { SearchInput } from '../components/SearchInput';
6
+ import { Pagination } from '../components/Pagination';
7
+ import { Dataset, DatasetWorkflowStatus, Column } from "@types/dataset";
8
+ interface Props {
9
+ datasets: Dataset[];
10
+ }
11
+
12
+ const ITEMS_PER_PAGE = 6;
13
+
14
+ const STATUS_STYLES: Record<DatasetWorkflowStatus, { bg: string; text: string; icon: React.ReactNode }> = {
15
+ analyzing: {
16
+ bg: 'bg-blue-100',
17
+ text: 'text-blue-800',
18
+ icon: <Loader2 className="w-4 h-4 animate-spin" />
19
+ },
20
+ ready: {
21
+ bg: 'bg-green-100',
22
+ text: 'text-green-800',
23
+ icon: null
24
+ },
25
+ failed: {
26
+ bg: 'bg-red-100',
27
+ text: 'text-red-800',
28
+ icon: <AlertCircle className="w-4 h-4" />
29
+ },
30
+ };
31
+
32
+ export default function DatasetsPage({ datasets, constants }: Props) {
33
+ console.log(datasets)
34
+ const { rootPath } = usePage().props;
35
+ const [searchQuery, setSearchQuery] = useState('');
36
+ const [currentPage, setCurrentPage] = useState(1);
37
+ const [expandedErrors, setExpandedErrors] = useState<number[]>([]);
38
+
39
+ const filteredDatasets = useMemo(() => {
40
+ return datasets.filter(dataset =>
41
+ dataset.name.toLowerCase().includes(searchQuery.toLowerCase()) ||
42
+ dataset.description.toLowerCase().includes(searchQuery.toLowerCase())
43
+ );
44
+ }, [datasets, searchQuery]);
45
+
46
+ const totalPages = Math.ceil(filteredDatasets.length / ITEMS_PER_PAGE);
47
+ const paginatedDatasets = filteredDatasets.slice(
48
+ (currentPage - 1) * ITEMS_PER_PAGE,
49
+ currentPage * ITEMS_PER_PAGE
50
+ );
51
+
52
+ const handleDelete = (datasetId: number) => {
53
+ if (confirm('Are you sure you want to delete this dataset?')) {
54
+ router.delete(`${rootPath}/datasets/${datasetId}`);
55
+ }
56
+ };
57
+
58
+ useEffect(() => {
59
+ let pollInterval: number | undefined;
60
+
61
+ const isAnyAnalyzing = datasets.some(d => d.workflow_status === 'analyzing');
62
+
63
+ if (isAnyAnalyzing) {
64
+ pollInterval = window.setInterval(() => {
65
+ router.get(window.location.href, {}, {
66
+ preserveScroll: true,
67
+ preserveState: true,
68
+ only: ['datasets']
69
+ });
70
+ }, 2000);
71
+ }
72
+
73
+ return () => {
74
+ if (pollInterval) {
75
+ window.clearInterval(pollInterval);
76
+ }
77
+ };
78
+ }, [datasets]);
79
+
80
+ const toggleError = (id: number) => {
81
+ setExpandedErrors(prev =>
82
+ prev.includes(id)
83
+ ? prev.filter(expandedId => expandedId !== id)
84
+ : [...prev, id]
85
+ );
86
+ };
87
+
88
+ if (datasets.length === 0) {
89
+ return (
90
+ <div className="p-8">
91
+ <EmptyState
92
+ icon={Database}
93
+ title="Create your first dataset"
94
+ description="Create a dataset to start training your machine learning models"
95
+ actionLabel="Create Dataset"
96
+ onAction={() => { router.visit(`${rootPath}/datasets/new`) }}
97
+ />
98
+ </div>
99
+ );
100
+ }
101
+
102
+ return (
103
+ <div className="p-8">
104
+ <div className="space-y-6">
105
+ <div className="flex justify-between items-center">
106
+ <div className="flex items-center gap-4">
107
+ <h2 className="text-xl font-semibold text-gray-900">Datasets</h2>
108
+ <SearchInput
109
+ value={searchQuery}
110
+ onChange={setSearchQuery}
111
+ placeholder="Search datasets..."
112
+ />
113
+ </div>
114
+ <Link
115
+ href={`${rootPath}/datasets/new`}
116
+ className="inline-flex items-center gap-2 px-4 py-2 bg-blue-600 text-white text-sm font-medium rounded-md hover:bg-blue-700 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-blue-500"
117
+ >
118
+ <Plus className="w-4 h-4" />
119
+ New Dataset
120
+ </Link>
121
+ </div>
122
+
123
+ {paginatedDatasets.length === 0 ? (
124
+ <div className="text-center py-12 bg-white rounded-lg shadow">
125
+ <Database className="mx-auto h-12 w-12 text-gray-400" />
126
+ <h3 className="mt-2 text-sm font-medium text-gray-900">No datasets found</h3>
127
+ <p className="mt-1 text-sm text-gray-500">
128
+ No datasets match your search criteria. Try adjusting your search or create a new dataset.
129
+ </p>
130
+ <div className="mt-6">
131
+ <Link
132
+ href={`${rootPath}/datasets/new`}
133
+ className="inline-flex items-center px-4 py-2 border border-transparent shadow-sm text-sm font-medium rounded-md text-white bg-blue-600 hover:bg-blue-700 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-blue-500"
134
+ >
135
+ <Plus className="w-4 h-4 mr-2" />
136
+ New Dataset
137
+ </Link>
138
+ </div>
139
+ </div>
140
+ ) : (
141
+ <>
142
+ <div className="grid grid-cols-1 md:grid-cols-2 gap-6">
143
+ {paginatedDatasets.map((dataset) => (
144
+ <div
145
+ key={dataset.id}
146
+ className="bg-white rounded-lg shadow-md p-6 hover:shadow-lg transition-shadow"
147
+ >
148
+ <div className="flex justify-between items-start mb-4">
149
+ <div className="flex items-start gap-3">
150
+ <Database className="w-5 h-5 text-blue-600 mt-1" />
151
+ <div>
152
+ <div className="flex items-center gap-2">
153
+ <h3 className="text-lg font-semibold text-gray-900">
154
+ {dataset.name}
155
+ </h3>
156
+ <div className={`inline-flex items-center gap-1 px-2 py-0.5 rounded-full text-xs font-medium ${STATUS_STYLES[dataset.workflow_status].bg} ${STATUS_STYLES[dataset.workflow_status].text}`}>
157
+ {STATUS_STYLES[dataset.workflow_status].icon}
158
+ <span>{dataset.workflow_status.charAt(0).toUpperCase() + dataset.workflow_status.slice(1)}</span>
159
+ </div>
160
+ </div>
161
+ <p className="text-sm text-gray-500 mt-1">
162
+ {dataset.description}
163
+ </p>
164
+ </div>
165
+ </div>
166
+ <div className="flex gap-2">
167
+ <Link
168
+ href={`${rootPath}/datasets/${dataset.id}`}
169
+ className={`transition-colors ${
170
+ dataset.workflow_status === 'analyzing'
171
+ ? 'text-gray-300 cursor-not-allowed pointer-events-none'
172
+ : 'text-gray-400 hover:text-blue-600'
173
+ }`}
174
+ title={dataset.workflow_status === 'analyzing' ? 'Dataset is being analyzed' : 'View details'}
175
+ >
176
+ <ExternalLink className="w-5 h-5" />
177
+ </Link>
178
+ <button
179
+ className="text-gray-400 hover:text-red-600 transition-colors"
180
+ title="Delete dataset"
181
+ onClick={() => handleDelete(dataset.id)}
182
+ >
183
+ <Trash2 className="w-5 h-5" />
184
+ </button>
185
+ </div>
186
+ </div>
187
+
188
+ <div className="grid grid-cols-2 gap-4 mt-4">
189
+ <div>
190
+ <span className="text-sm text-gray-500">Columns</span>
191
+ <p className="text-sm font-medium text-gray-900">
192
+ {dataset.columns.length} columns
193
+ </p>
194
+ </div>
195
+ <div>
196
+ <span className="text-sm text-gray-500">Rows</span>
197
+ <p className="text-sm font-medium text-gray-900">
198
+ {dataset.num_rows.toLocaleString()}
199
+ </p>
200
+ </div>
201
+ </div>
202
+
203
+ <div className="mt-4 pt-4 border-t border-gray-100">
204
+ <div className="flex flex-wrap gap-2">
205
+ {dataset.columns.slice(0, 3).map((column: Column) => (
206
+ <span
207
+ key={column.name}
208
+ className="inline-flex items-center px-2.5 py-0.5 rounded-full text-xs font-medium bg-blue-100 text-blue-800"
209
+ >
210
+ {column.name}
211
+ </span>
212
+ ))}
213
+ {dataset.columns.length > 3 && (
214
+ <span className="inline-flex items-center px-2.5 py-0.5 rounded-full text-xs font-medium bg-gray-100 text-gray-800">
215
+ +{dataset.columns.length - 3} more
216
+ </span>
217
+ )}
218
+ </div>
219
+ </div>
220
+
221
+ {dataset.workflow_status === 'failed' && dataset.stacktrace && (
222
+ <div className="mt-4 pt-4 border-t border-gray-100">
223
+ <button
224
+ onClick={() => toggleError(dataset.id)}
225
+ className="flex items-center gap-2 text-sm text-red-600 hover:text-red-700"
226
+ >
227
+ <AlertCircle className="w-4 h-4" />
228
+ <span>View Error Details</span>
229
+ {expandedErrors.includes(dataset.id) ? (
230
+ <ChevronUp className="w-4 h-4" />
231
+ ) : (
232
+ <ChevronDown className="w-4 h-4" />
233
+ )}
234
+ </button>
235
+ {expandedErrors.includes(dataset.id) && (
236
+ <div className="mt-2 p-3 bg-red-50 rounded-md">
237
+ <pre className="text-xs text-red-700 whitespace-pre-wrap font-mono">
238
+ {dataset.stacktrace}
239
+ </pre>
240
+ </div>
241
+ )}
242
+ </div>
243
+ )}
244
+
245
+ </div>
246
+ ))}
247
+ </div>
248
+
249
+ {totalPages > 1 && (
250
+ <Pagination
251
+ currentPage={currentPage}
252
+ totalPages={totalPages}
253
+ onPageChange={setCurrentPage}
254
+ />
255
+ )}
256
+ </>
257
+ )}
258
+ </div>
259
+ </div>
260
+ );
261
+ }