easy_ml 0.1.4 → 0.2.0.pre.rc1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (239) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +234 -26
  3. data/Rakefile +45 -0
  4. data/app/controllers/easy_ml/application_controller.rb +67 -0
  5. data/app/controllers/easy_ml/columns_controller.rb +38 -0
  6. data/app/controllers/easy_ml/datasets_controller.rb +156 -0
  7. data/app/controllers/easy_ml/datasources_controller.rb +88 -0
  8. data/app/controllers/easy_ml/deploys_controller.rb +20 -0
  9. data/app/controllers/easy_ml/models_controller.rb +151 -0
  10. data/app/controllers/easy_ml/retraining_runs_controller.rb +19 -0
  11. data/app/controllers/easy_ml/settings_controller.rb +59 -0
  12. data/app/frontend/components/AlertProvider.tsx +108 -0
  13. data/app/frontend/components/DatasetPreview.tsx +161 -0
  14. data/app/frontend/components/EmptyState.tsx +28 -0
  15. data/app/frontend/components/ModelCard.tsx +255 -0
  16. data/app/frontend/components/ModelDetails.tsx +334 -0
  17. data/app/frontend/components/ModelForm.tsx +384 -0
  18. data/app/frontend/components/Navigation.tsx +300 -0
  19. data/app/frontend/components/Pagination.tsx +72 -0
  20. data/app/frontend/components/Popover.tsx +55 -0
  21. data/app/frontend/components/PredictionStream.tsx +105 -0
  22. data/app/frontend/components/ScheduleModal.tsx +726 -0
  23. data/app/frontend/components/SearchInput.tsx +23 -0
  24. data/app/frontend/components/SearchableSelect.tsx +132 -0
  25. data/app/frontend/components/dataset/AutosaveIndicator.tsx +39 -0
  26. data/app/frontend/components/dataset/ColumnConfigModal.tsx +431 -0
  27. data/app/frontend/components/dataset/ColumnFilters.tsx +256 -0
  28. data/app/frontend/components/dataset/ColumnList.tsx +101 -0
  29. data/app/frontend/components/dataset/FeatureConfigPopover.tsx +57 -0
  30. data/app/frontend/components/dataset/FeaturePicker.tsx +205 -0
  31. data/app/frontend/components/dataset/PreprocessingConfig.tsx +704 -0
  32. data/app/frontend/components/dataset/SplitConfigurator.tsx +120 -0
  33. data/app/frontend/components/dataset/splitters/DateSplitter.tsx +58 -0
  34. data/app/frontend/components/dataset/splitters/KFoldSplitter.tsx +68 -0
  35. data/app/frontend/components/dataset/splitters/LeavePOutSplitter.tsx +29 -0
  36. data/app/frontend/components/dataset/splitters/PredefinedSplitter.tsx +146 -0
  37. data/app/frontend/components/dataset/splitters/RandomSplitter.tsx +85 -0
  38. data/app/frontend/components/dataset/splitters/StratifiedSplitter.tsx +79 -0
  39. data/app/frontend/components/dataset/splitters/constants.ts +77 -0
  40. data/app/frontend/components/dataset/splitters/types.ts +168 -0
  41. data/app/frontend/components/dataset/splitters/utils.ts +53 -0
  42. data/app/frontend/components/features/CodeEditor.tsx +46 -0
  43. data/app/frontend/components/features/DataPreview.tsx +150 -0
  44. data/app/frontend/components/features/FeatureCard.tsx +88 -0
  45. data/app/frontend/components/features/FeatureForm.tsx +235 -0
  46. data/app/frontend/components/features/FeatureGroupCard.tsx +54 -0
  47. data/app/frontend/components/settings/PluginSettings.tsx +81 -0
  48. data/app/frontend/components/ui/badge.tsx +44 -0
  49. data/app/frontend/components/ui/collapsible.tsx +9 -0
  50. data/app/frontend/components/ui/scroll-area.tsx +46 -0
  51. data/app/frontend/components/ui/separator.tsx +29 -0
  52. data/app/frontend/entrypoints/App.tsx +40 -0
  53. data/app/frontend/entrypoints/Application.tsx +24 -0
  54. data/app/frontend/hooks/useAutosave.ts +61 -0
  55. data/app/frontend/layouts/Layout.tsx +38 -0
  56. data/app/frontend/lib/utils.ts +6 -0
  57. data/app/frontend/mockData.ts +272 -0
  58. data/app/frontend/pages/DatasetDetailsPage.tsx +103 -0
  59. data/app/frontend/pages/DatasetsPage.tsx +261 -0
  60. data/app/frontend/pages/DatasourceFormPage.tsx +147 -0
  61. data/app/frontend/pages/DatasourcesPage.tsx +261 -0
  62. data/app/frontend/pages/EditModelPage.tsx +45 -0
  63. data/app/frontend/pages/EditTransformationPage.tsx +56 -0
  64. data/app/frontend/pages/ModelsPage.tsx +115 -0
  65. data/app/frontend/pages/NewDatasetPage.tsx +366 -0
  66. data/app/frontend/pages/NewModelPage.tsx +45 -0
  67. data/app/frontend/pages/NewTransformationPage.tsx +43 -0
  68. data/app/frontend/pages/SettingsPage.tsx +272 -0
  69. data/app/frontend/pages/ShowModelPage.tsx +30 -0
  70. data/app/frontend/pages/TransformationsPage.tsx +95 -0
  71. data/app/frontend/styles/application.css +100 -0
  72. data/app/frontend/types/dataset.ts +146 -0
  73. data/app/frontend/types/datasource.ts +33 -0
  74. data/app/frontend/types/preprocessing.ts +1 -0
  75. data/app/frontend/types.ts +113 -0
  76. data/app/helpers/easy_ml/application_helper.rb +10 -0
  77. data/app/jobs/easy_ml/application_job.rb +21 -0
  78. data/app/jobs/easy_ml/batch_job.rb +46 -0
  79. data/app/jobs/easy_ml/compute_feature_job.rb +19 -0
  80. data/app/jobs/easy_ml/deploy_job.rb +13 -0
  81. data/app/jobs/easy_ml/finalize_feature_job.rb +15 -0
  82. data/app/jobs/easy_ml/refresh_dataset_job.rb +32 -0
  83. data/app/jobs/easy_ml/schedule_retraining_job.rb +11 -0
  84. data/app/jobs/easy_ml/sync_datasource_job.rb +17 -0
  85. data/app/jobs/easy_ml/training_job.rb +62 -0
  86. data/app/models/easy_ml/adapters/base_adapter.rb +45 -0
  87. data/app/models/easy_ml/adapters/polars_adapter.rb +77 -0
  88. data/app/models/easy_ml/cleaner.rb +82 -0
  89. data/app/models/easy_ml/column.rb +124 -0
  90. data/app/models/easy_ml/column_history.rb +30 -0
  91. data/app/models/easy_ml/column_list.rb +122 -0
  92. data/app/models/easy_ml/concerns/configurable.rb +61 -0
  93. data/app/models/easy_ml/concerns/versionable.rb +19 -0
  94. data/app/models/easy_ml/dataset.rb +767 -0
  95. data/app/models/easy_ml/dataset_history.rb +56 -0
  96. data/app/models/easy_ml/datasource.rb +182 -0
  97. data/app/models/easy_ml/datasource_history.rb +24 -0
  98. data/app/models/easy_ml/datasources/base_datasource.rb +54 -0
  99. data/app/models/easy_ml/datasources/file_datasource.rb +58 -0
  100. data/app/models/easy_ml/datasources/polars_datasource.rb +89 -0
  101. data/app/models/easy_ml/datasources/s3_datasource.rb +97 -0
  102. data/app/models/easy_ml/deploy.rb +114 -0
  103. data/app/models/easy_ml/event.rb +79 -0
  104. data/app/models/easy_ml/feature.rb +437 -0
  105. data/app/models/easy_ml/feature_history.rb +38 -0
  106. data/app/models/easy_ml/model.rb +575 -41
  107. data/app/models/easy_ml/model_file.rb +133 -0
  108. data/app/models/easy_ml/model_file_history.rb +24 -0
  109. data/app/models/easy_ml/model_history.rb +51 -0
  110. data/app/models/easy_ml/models/base_model.rb +58 -0
  111. data/app/models/easy_ml/models/hyperparameters/base.rb +99 -0
  112. data/app/models/easy_ml/models/hyperparameters/xgboost/dart.rb +82 -0
  113. data/app/models/easy_ml/models/hyperparameters/xgboost/gblinear.rb +82 -0
  114. data/app/models/easy_ml/models/hyperparameters/xgboost/gbtree.rb +97 -0
  115. data/app/models/easy_ml/models/hyperparameters/xgboost.rb +71 -0
  116. data/app/models/easy_ml/models/xgboost/evals_callback.rb +138 -0
  117. data/app/models/easy_ml/models/xgboost/progress_callback.rb +39 -0
  118. data/app/models/easy_ml/models/xgboost.rb +544 -5
  119. data/app/models/easy_ml/prediction.rb +44 -0
  120. data/app/models/easy_ml/retraining_job.rb +278 -0
  121. data/app/models/easy_ml/retraining_run.rb +184 -0
  122. data/app/models/easy_ml/settings.rb +37 -0
  123. data/app/models/easy_ml/splitter.rb +90 -0
  124. data/app/models/easy_ml/splitters/base_splitter.rb +28 -0
  125. data/app/models/easy_ml/splitters/date_splitter.rb +91 -0
  126. data/app/models/easy_ml/splitters/predefined_splitter.rb +74 -0
  127. data/app/models/easy_ml/splitters/random_splitter.rb +82 -0
  128. data/app/models/easy_ml/tuner_job.rb +56 -0
  129. data/app/models/easy_ml/tuner_run.rb +31 -0
  130. data/app/models/splitter_history.rb +6 -0
  131. data/app/serializers/easy_ml/column_serializer.rb +27 -0
  132. data/app/serializers/easy_ml/dataset_serializer.rb +73 -0
  133. data/app/serializers/easy_ml/datasource_serializer.rb +64 -0
  134. data/app/serializers/easy_ml/feature_serializer.rb +27 -0
  135. data/app/serializers/easy_ml/model_serializer.rb +90 -0
  136. data/app/serializers/easy_ml/retraining_job_serializer.rb +22 -0
  137. data/app/serializers/easy_ml/retraining_run_serializer.rb +39 -0
  138. data/app/serializers/easy_ml/settings_serializer.rb +9 -0
  139. data/app/views/layouts/easy_ml/application.html.erb +15 -0
  140. data/config/initializers/resque.rb +3 -0
  141. data/config/resque-pool.yml +6 -0
  142. data/config/routes.rb +39 -0
  143. data/config/spring.rb +1 -0
  144. data/config/vite.json +15 -0
  145. data/lib/easy_ml/configuration.rb +64 -0
  146. data/lib/easy_ml/core/evaluators/base_evaluator.rb +53 -0
  147. data/lib/easy_ml/core/evaluators/classification_evaluators.rb +126 -0
  148. data/lib/easy_ml/core/evaluators/regression_evaluators.rb +66 -0
  149. data/lib/easy_ml/core/model_evaluator.rb +161 -89
  150. data/lib/easy_ml/core/tuner/adapters/base_adapter.rb +28 -18
  151. data/lib/easy_ml/core/tuner/adapters/xgboost_adapter.rb +4 -25
  152. data/lib/easy_ml/core/tuner.rb +123 -62
  153. data/lib/easy_ml/core.rb +0 -3
  154. data/lib/easy_ml/core_ext/hash.rb +24 -0
  155. data/lib/easy_ml/core_ext/pathname.rb +11 -5
  156. data/lib/easy_ml/data/date_converter.rb +90 -0
  157. data/lib/easy_ml/data/filter_extensions.rb +31 -0
  158. data/lib/easy_ml/data/polars_column.rb +126 -0
  159. data/lib/easy_ml/data/polars_reader.rb +297 -0
  160. data/lib/easy_ml/data/preprocessor.rb +280 -142
  161. data/lib/easy_ml/data/simple_imputer.rb +255 -0
  162. data/lib/easy_ml/data/splits/file_split.rb +252 -0
  163. data/lib/easy_ml/data/splits/in_memory_split.rb +54 -0
  164. data/lib/easy_ml/data/splits/split.rb +95 -0
  165. data/lib/easy_ml/data/splits.rb +9 -0
  166. data/lib/easy_ml/data/statistics_learner.rb +93 -0
  167. data/lib/easy_ml/data/synced_directory.rb +341 -0
  168. data/lib/easy_ml/data.rb +6 -2
  169. data/lib/easy_ml/engine.rb +105 -6
  170. data/lib/easy_ml/feature_store.rb +227 -0
  171. data/lib/easy_ml/features.rb +61 -0
  172. data/lib/easy_ml/initializers/inflections.rb +17 -3
  173. data/lib/easy_ml/logging.rb +2 -2
  174. data/lib/easy_ml/predict.rb +74 -0
  175. data/lib/easy_ml/railtie/generators/migration/migration_generator.rb +192 -36
  176. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_column_histories.rb.tt +9 -0
  177. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_columns.rb.tt +25 -0
  178. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_dataset_histories.rb.tt +9 -0
  179. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_datasets.rb.tt +31 -0
  180. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_datasource_histories.rb.tt +9 -0
  181. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_datasources.rb.tt +16 -0
  182. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_deploys.rb.tt +24 -0
  183. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_events.rb.tt +20 -0
  184. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_feature_histories.rb.tt +14 -0
  185. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_features.rb.tt +32 -0
  186. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_model_file_histories.rb.tt +9 -0
  187. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_model_files.rb.tt +17 -0
  188. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_model_histories.rb.tt +9 -0
  189. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_models.rb.tt +20 -9
  190. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_predictions.rb.tt +17 -0
  191. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_retraining_jobs.rb.tt +77 -0
  192. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_settings.rb.tt +9 -0
  193. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_splitter_histories.rb.tt +9 -0
  194. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_splitters.rb.tt +15 -0
  195. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_tuner_jobs.rb.tt +40 -0
  196. data/lib/easy_ml/support/est.rb +5 -1
  197. data/lib/easy_ml/support/file_rotate.rb +79 -15
  198. data/lib/easy_ml/support/file_support.rb +9 -0
  199. data/lib/easy_ml/support/local_file.rb +24 -0
  200. data/lib/easy_ml/support/lockable.rb +62 -0
  201. data/lib/easy_ml/support/synced_file.rb +103 -0
  202. data/lib/easy_ml/support/utc.rb +5 -1
  203. data/lib/easy_ml/support.rb +6 -3
  204. data/lib/easy_ml/version.rb +4 -1
  205. data/lib/easy_ml.rb +7 -2
  206. metadata +355 -72
  207. data/app/models/easy_ml/models.rb +0 -5
  208. data/lib/easy_ml/core/model.rb +0 -30
  209. data/lib/easy_ml/core/model_core.rb +0 -181
  210. data/lib/easy_ml/core/models/hyperparameters/base.rb +0 -34
  211. data/lib/easy_ml/core/models/hyperparameters/xgboost.rb +0 -19
  212. data/lib/easy_ml/core/models/xgboost.rb +0 -10
  213. data/lib/easy_ml/core/models/xgboost_core.rb +0 -220
  214. data/lib/easy_ml/core/models.rb +0 -10
  215. data/lib/easy_ml/core/uploaders/model_uploader.rb +0 -24
  216. data/lib/easy_ml/core/uploaders.rb +0 -7
  217. data/lib/easy_ml/data/dataloader.rb +0 -6
  218. data/lib/easy_ml/data/dataset/data/preprocessor/statistics.json +0 -31
  219. data/lib/easy_ml/data/dataset/data/sample_info.json +0 -1
  220. data/lib/easy_ml/data/dataset/dataset/files/sample_info.json +0 -1
  221. data/lib/easy_ml/data/dataset/splits/file_split.rb +0 -140
  222. data/lib/easy_ml/data/dataset/splits/in_memory_split.rb +0 -49
  223. data/lib/easy_ml/data/dataset/splits/split.rb +0 -98
  224. data/lib/easy_ml/data/dataset/splits.rb +0 -11
  225. data/lib/easy_ml/data/dataset/splitters/date_splitter.rb +0 -43
  226. data/lib/easy_ml/data/dataset/splitters.rb +0 -9
  227. data/lib/easy_ml/data/dataset.rb +0 -430
  228. data/lib/easy_ml/data/datasource/datasource_factory.rb +0 -60
  229. data/lib/easy_ml/data/datasource/file_datasource.rb +0 -40
  230. data/lib/easy_ml/data/datasource/merged_datasource.rb +0 -64
  231. data/lib/easy_ml/data/datasource/polars_datasource.rb +0 -41
  232. data/lib/easy_ml/data/datasource/s3_datasource.rb +0 -89
  233. data/lib/easy_ml/data/datasource.rb +0 -33
  234. data/lib/easy_ml/data/preprocessor/preprocessor.rb +0 -205
  235. data/lib/easy_ml/data/preprocessor/simple_imputer.rb +0 -402
  236. data/lib/easy_ml/deployment.rb +0 -5
  237. data/lib/easy_ml/support/synced_directory.rb +0 -134
  238. data/lib/easy_ml/transforms.rb +0 -29
  239. /data/{lib/easy_ml/core → app/models/easy_ml}/models/hyperparameters.rb +0 -0
@@ -0,0 +1,272 @@
1
+ import type { Model, RetrainingJob, RetrainingRun, Dataset, Prediction, Feature, FeatureGroup } from './types';
2
+
3
+ // Helper function to generate dates
4
+ const daysAgo = (days: number) => {
5
+ const date = new Date();
6
+ date.setDate(date.getDate() - days);
7
+ return date.toISOString();
8
+ };
9
+
10
+ export const mockDatasets: Dataset[] = [
11
+ {
12
+ id: 1,
13
+ name: 'Customer Churn Dataset',
14
+ description: 'Historical customer data for churn prediction',
15
+ columns: [
16
+ {
17
+ name: 'usage_days',
18
+ type: 'numeric',
19
+ description: 'Number of days customer has used the product',
20
+ statistics: {
21
+ mean: 145.7,
22
+ median: 130,
23
+ min: 1,
24
+ max: 365,
25
+ nullCount: 0
26
+ }
27
+ },
28
+ {
29
+ name: 'total_spend',
30
+ type: 'numeric',
31
+ description: 'Total customer spend in USD',
32
+ statistics: {
33
+ mean: 487.32,
34
+ median: 425.50,
35
+ min: 0,
36
+ max: 2500.00,
37
+ nullCount: 1250
38
+ }
39
+ },
40
+ {
41
+ name: 'support_tickets',
42
+ type: 'numeric',
43
+ description: 'Number of support tickets opened',
44
+ statistics: {
45
+ mean: 2.3,
46
+ median: 1,
47
+ min: 0,
48
+ max: 15,
49
+ nullCount: 3750
50
+ }
51
+ },
52
+ {
53
+ name: 'subscription_tier',
54
+ type: 'categorical',
55
+ description: 'Customer subscription level',
56
+ statistics: {
57
+ uniqueCount: 3,
58
+ nullCount: 125
59
+ }
60
+ },
61
+ {
62
+ name: 'last_login',
63
+ type: 'datetime',
64
+ description: 'Last time the customer logged in',
65
+ statistics: {
66
+ nullCount: 5000
67
+ }
68
+ }
69
+ ],
70
+ sampleData: [
71
+ {
72
+ usage_days: 234,
73
+ total_spend: 567.89,
74
+ support_tickets: 1,
75
+ subscription_tier: 'premium',
76
+ last_login: '2024-03-01'
77
+ },
78
+ {
79
+ usage_days: 45,
80
+ total_spend: null,
81
+ support_tickets: null,
82
+ subscription_tier: 'basic',
83
+ last_login: null
84
+ }
85
+ ],
86
+ rowCount: 25000,
87
+ updatedAt: '2024-03-10T12:00:00Z'
88
+ }
89
+ ];
90
+
91
+ export const mockModels: Model[] = [
92
+ {
93
+ id: 1,
94
+ name: 'Customer Churn Predictor',
95
+ modelType: 'classification',
96
+ status: 'completed',
97
+ deploymentStatus: 'inference',
98
+ deployed: true,
99
+ datasetId: 1,
100
+ configuration: {
101
+ algorithm: 'xgboost',
102
+ features: ['usage_days', 'total_spend', 'support_tickets'],
103
+ objective: 'binary:logistic',
104
+ metrics: ['accuracy', 'f1']
105
+ },
106
+ version: '2.1.0',
107
+ rootDir: '/models/churn_predictor',
108
+ file: { path: 'model.joblib' },
109
+ createdAt: daysAgo(30),
110
+ updatedAt: daysAgo(0)
111
+ }
112
+ ];
113
+
114
+ export const mockRetrainingJobs: RetrainingJob[] = [
115
+ {
116
+ id: 1,
117
+ model: 'Customer Churn Predictor',
118
+ frequency: 'daily',
119
+ at: 2,
120
+ evaluator: {
121
+ metric: 'f1_score',
122
+ threshold: 0.85,
123
+ direction: 'maximize'
124
+ },
125
+ tunerConfig: {
126
+ trials: 10,
127
+ metrics: ['f1_score'],
128
+ parameters: {
129
+ max_depth: { min: 3, max: 10 },
130
+ learning_rate: { min: 0.01, max: 0.1 }
131
+ }
132
+ },
133
+ tuningFrequency: 'weekly',
134
+ lastTuningAt: daysAgo(7),
135
+ active: true,
136
+ status: 'completed',
137
+ lastRunAt: daysAgo(1),
138
+ lockedAt: null,
139
+ createdAt: daysAgo(30),
140
+ updatedAt: daysAgo(0)
141
+ }
142
+ ];
143
+
144
+ export const mockRetrainingRuns: RetrainingRun[] = [
145
+ {
146
+ id: 1,
147
+ modelId: 1,
148
+ retrainingJobId: 1,
149
+ tunerJobId: null,
150
+ status: 'completed',
151
+ metricValue: 0.89,
152
+ threshold: 0.85,
153
+ thresholdDirection: 'maximize',
154
+ shoulddeploy: true,
155
+ startedAt: daysAgo(1),
156
+ completedAt: daysAgo(1),
157
+ errorMessage: null,
158
+ metadata: {
159
+ metrics: {
160
+ accuracy: 0.92,
161
+ precision: 0.88,
162
+ recall: 0.90,
163
+ f1: 0.89
164
+ },
165
+ parameters: {
166
+ max_depth: 6,
167
+ learning_rate: 0.05
168
+ }
169
+ },
170
+ createdAt: daysAgo(1),
171
+ updatedAt: daysAgo(1)
172
+ },
173
+ {
174
+ id: 2,
175
+ modelId: 1,
176
+ retrainingJobId: 1,
177
+ tunerJobId: 1,
178
+ status: 'completed',
179
+ metricValue: 0.86,
180
+ threshold: 0.85,
181
+ thresholdDirection: 'maximize',
182
+ shoulddeploy: true,
183
+ startedAt: daysAgo(2),
184
+ completedAt: daysAgo(2),
185
+ errorMessage: null,
186
+ metadata: {
187
+ metrics: {
188
+ accuracy: 0.90,
189
+ precision: 0.85,
190
+ recall: 0.87,
191
+ f1: 0.86
192
+ },
193
+ parameters: {
194
+ max_depth: 5,
195
+ learning_rate: 0.03
196
+ }
197
+ },
198
+ createdAt: daysAgo(2),
199
+ updatedAt: daysAgo(2)
200
+ },
201
+ {
202
+ id: 3,
203
+ modelId: 1,
204
+ retrainingJobId: 1,
205
+ tunerJobId: null,
206
+ status: 'failed',
207
+ metricValue: null,
208
+ threshold: 0.85,
209
+ thresholdDirection: 'maximize',
210
+ shoulddeploy: false,
211
+ startedAt: daysAgo(3),
212
+ completedAt: daysAgo(3),
213
+ errorMessage: 'Training failed due to insufficient memory',
214
+ metadata: null,
215
+ createdAt: daysAgo(3),
216
+ updatedAt: daysAgo(3)
217
+ },
218
+ {
219
+ id: 4,
220
+ modelId: 1,
221
+ retrainingJobId: 1,
222
+ tunerJobId: null,
223
+ status: 'completed',
224
+ metricValue: 0.83,
225
+ threshold: 0.85,
226
+ thresholdDirection: 'maximize',
227
+ shoulddeploy: false,
228
+ startedAt: daysAgo(4),
229
+ completedAt: daysAgo(4),
230
+ errorMessage: null,
231
+ metadata: {
232
+ metrics: {
233
+ accuracy: 0.87,
234
+ precision: 0.82,
235
+ recall: 0.84,
236
+ f1: 0.83
237
+ },
238
+ parameters: {
239
+ max_depth: 4,
240
+ learning_rate: 0.02
241
+ }
242
+ },
243
+ createdAt: daysAgo(4),
244
+ updatedAt: daysAgo(4)
245
+ }
246
+ ];
247
+
248
+ export const mockFeatures: Feature[] = [
249
+ {
250
+ id: 1,
251
+ name: 'Normalize state',
252
+ description: 'Turn state names into 2 letter state abbreviations',
253
+ groupId: 1,
254
+ testDatasetId: 1,
255
+ inputColumns: ["state"],
256
+ outputColumns: ["state"],
257
+ code: "",
258
+ createdAt: daysAgo(30),
259
+ updatedAt: daysAgo(0)
260
+ }
261
+ ];
262
+
263
+ export const mockFeatureGroups: FeatureGroup[] = [
264
+ {
265
+ id: 1,
266
+ name: 'Customer Churn',
267
+ description: 'Features for customer churn dataset',
268
+ features: mockFeatures,
269
+ createdAt: daysAgo(30),
270
+ updatedAt: daysAgo(0)
271
+ }
272
+ ];
@@ -0,0 +1,103 @@
1
+ import React, { useState, useCallback } from 'react';
2
+ import { usePage, router } from '@inertiajs/react';
3
+ import { Settings } from 'lucide-react';
4
+ import { isEqual } from 'lodash';
5
+ import { DatasetPreview } from '../components/DatasetPreview';
6
+ import { ColumnConfigModal } from '../components/dataset/ColumnConfigModal';
7
+ import type { Dataset, Column } from '../types/dataset';
8
+ import type { PreprocessingConstants } from '../types';
9
+
10
+ interface Props {
11
+ dataset: Dataset;
12
+ constants: PreprocessingConstants;
13
+ }
14
+
15
+ export default function DatasetDetailsPage({ dataset, constants }: Props) {
16
+ const [showColumnConfig, setShowColumnConfig] = useState(false);
17
+ const [currentDataset, setCurrentDataset] = useState<Dataset>(dataset);
18
+ const { rootPath } = usePage().props;
19
+
20
+ const onSave = useCallback((updatedDataset: Dataset) => {
21
+ // Find dataset-level changes
22
+ const datasetChanges = Object.entries(updatedDataset).reduce((acc, [key, value]) => {
23
+ if (key !== 'columns' && key !== 'features' && !isEqual(currentDataset[key as keyof Dataset], value)) {
24
+ acc[key as keyof Dataset] = value;
25
+ }
26
+ return acc;
27
+ }, {} as Partial<Dataset>);
28
+
29
+ // Find column changes
30
+ const columnChanges = updatedDataset.columns.reduce((acc, newColumn) => {
31
+ const oldColumn = currentDataset.columns.find(c => c.id === newColumn.id);
32
+
33
+ if (!oldColumn || !isEqual(oldColumn, newColumn)) {
34
+ const changedFields = Object.entries(newColumn).reduce((fields, [key, value]) => {
35
+ if (!oldColumn || !isEqual(oldColumn[key as keyof Column], value)) {
36
+ fields[key] = value;
37
+ }
38
+ return fields;
39
+ }, {} as Record<string, any>);
40
+
41
+ if (Object.keys(changedFields).length > 0) {
42
+ acc[newColumn.id] = {
43
+ ...changedFields,
44
+ id: newColumn.id
45
+ };
46
+ }
47
+ }
48
+ return acc;
49
+ }, {} as Record<number, Record<string, any>>);
50
+
51
+ // Format features for nested attributes
52
+ const transformChanges = updatedDataset.features?.map((feature, index) => ({
53
+ id: feature.id,
54
+ name: feature.name,
55
+ feature_class: feature.feature_class,
56
+ feature_position: index,
57
+ _destroy: feature._destroy
58
+ }));
59
+
60
+ // Only make the API call if there are actual changes
61
+ if (Object.keys(datasetChanges).length > 0 ||
62
+ Object.keys(columnChanges).length > 0 ||
63
+ !isEqual(currentDataset.features, updatedDataset.features)) {
64
+ router.patch(`${rootPath}/datasets/${dataset.id}`, {
65
+ dataset: {
66
+ ...datasetChanges,
67
+ columns_attributes: columnChanges,
68
+ features_attributes: transformChanges
69
+ }
70
+ }, {
71
+ preserveState: true,
72
+ preserveScroll: true
73
+ });
74
+ }
75
+
76
+ // Update local state
77
+ setCurrentDataset(updatedDataset);
78
+ }, [currentDataset, dataset.id, rootPath]);
79
+
80
+ return (
81
+ <div className="p-8 space-y-6">
82
+ <div className="flex justify-end">
83
+ <button
84
+ onClick={() => setShowColumnConfig(true)}
85
+ className="flex items-center gap-2 px-4 py-2 bg-white border border-gray-300 rounded-md text-sm font-medium text-gray-700 hover:bg-gray-50"
86
+ >
87
+ <Settings className="w-4 h-4" />
88
+ Configure Columns
89
+ </button>
90
+ </div>
91
+
92
+ <DatasetPreview dataset={currentDataset} />
93
+
94
+ <ColumnConfigModal
95
+ isOpen={showColumnConfig}
96
+ onClose={() => setShowColumnConfig(false)}
97
+ initialDataset={currentDataset}
98
+ constants={constants}
99
+ onSave={onSave}
100
+ />
101
+ </div>
102
+ );
103
+ }
@@ -0,0 +1,261 @@
1
+ import React, { useState, useMemo, useEffect } from 'react';
2
+ import { Link, usePage, router } from '@inertiajs/react';
3
+ import { Database, Plus, Trash2, ExternalLink, Loader2, AlertCircle, ChevronDown, ChevronUp } from 'lucide-react';
4
+ import { EmptyState } from '../components/EmptyState';
5
+ import { SearchInput } from '../components/SearchInput';
6
+ import { Pagination } from '../components/Pagination';
7
+ import { Dataset, DatasetWorkflowStatus, Column } from "@types/dataset";
8
+ interface Props {
9
+ datasets: Dataset[];
10
+ }
11
+
12
+ const ITEMS_PER_PAGE = 6;
13
+
14
+ const STATUS_STYLES: Record<DatasetWorkflowStatus, { bg: string; text: string; icon: React.ReactNode }> = {
15
+ analyzing: {
16
+ bg: 'bg-blue-100',
17
+ text: 'text-blue-800',
18
+ icon: <Loader2 className="w-4 h-4 animate-spin" />
19
+ },
20
+ ready: {
21
+ bg: 'bg-green-100',
22
+ text: 'text-green-800',
23
+ icon: null
24
+ },
25
+ failed: {
26
+ bg: 'bg-red-100',
27
+ text: 'text-red-800',
28
+ icon: <AlertCircle className="w-4 h-4" />
29
+ },
30
+ };
31
+
32
+ export default function DatasetsPage({ datasets, constants }: Props) {
33
+ console.log(datasets)
34
+ const { rootPath } = usePage().props;
35
+ const [searchQuery, setSearchQuery] = useState('');
36
+ const [currentPage, setCurrentPage] = useState(1);
37
+ const [expandedErrors, setExpandedErrors] = useState<number[]>([]);
38
+
39
+ const filteredDatasets = useMemo(() => {
40
+ return datasets.filter(dataset =>
41
+ dataset.name.toLowerCase().includes(searchQuery.toLowerCase()) ||
42
+ dataset.description.toLowerCase().includes(searchQuery.toLowerCase())
43
+ );
44
+ }, [datasets, searchQuery]);
45
+
46
+ const totalPages = Math.ceil(filteredDatasets.length / ITEMS_PER_PAGE);
47
+ const paginatedDatasets = filteredDatasets.slice(
48
+ (currentPage - 1) * ITEMS_PER_PAGE,
49
+ currentPage * ITEMS_PER_PAGE
50
+ );
51
+
52
+ const handleDelete = (datasetId: number) => {
53
+ if (confirm('Are you sure you want to delete this dataset?')) {
54
+ router.delete(`${rootPath}/datasets/${datasetId}`);
55
+ }
56
+ };
57
+
58
+ useEffect(() => {
59
+ let pollInterval: number | undefined;
60
+
61
+ const isAnyAnalyzing = datasets.some(d => d.workflow_status === 'analyzing');
62
+
63
+ if (isAnyAnalyzing) {
64
+ pollInterval = window.setInterval(() => {
65
+ router.get(window.location.href, {}, {
66
+ preserveScroll: true,
67
+ preserveState: true,
68
+ only: ['datasets']
69
+ });
70
+ }, 2000);
71
+ }
72
+
73
+ return () => {
74
+ if (pollInterval) {
75
+ window.clearInterval(pollInterval);
76
+ }
77
+ };
78
+ }, [datasets]);
79
+
80
+ const toggleError = (id: number) => {
81
+ setExpandedErrors(prev =>
82
+ prev.includes(id)
83
+ ? prev.filter(expandedId => expandedId !== id)
84
+ : [...prev, id]
85
+ );
86
+ };
87
+
88
+ if (datasets.length === 0) {
89
+ return (
90
+ <div className="p-8">
91
+ <EmptyState
92
+ icon={Database}
93
+ title="Create your first dataset"
94
+ description="Create a dataset to start training your machine learning models"
95
+ actionLabel="Create Dataset"
96
+ onAction={() => { router.visit(`${rootPath}/datasets/new`) }}
97
+ />
98
+ </div>
99
+ );
100
+ }
101
+
102
+ return (
103
+ <div className="p-8">
104
+ <div className="space-y-6">
105
+ <div className="flex justify-between items-center">
106
+ <div className="flex items-center gap-4">
107
+ <h2 className="text-xl font-semibold text-gray-900">Datasets</h2>
108
+ <SearchInput
109
+ value={searchQuery}
110
+ onChange={setSearchQuery}
111
+ placeholder="Search datasets..."
112
+ />
113
+ </div>
114
+ <Link
115
+ href={`${rootPath}/datasets/new`}
116
+ className="inline-flex items-center gap-2 px-4 py-2 bg-blue-600 text-white text-sm font-medium rounded-md hover:bg-blue-700 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-blue-500"
117
+ >
118
+ <Plus className="w-4 h-4" />
119
+ New Dataset
120
+ </Link>
121
+ </div>
122
+
123
+ {paginatedDatasets.length === 0 ? (
124
+ <div className="text-center py-12 bg-white rounded-lg shadow">
125
+ <Database className="mx-auto h-12 w-12 text-gray-400" />
126
+ <h3 className="mt-2 text-sm font-medium text-gray-900">No datasets found</h3>
127
+ <p className="mt-1 text-sm text-gray-500">
128
+ No datasets match your search criteria. Try adjusting your search or create a new dataset.
129
+ </p>
130
+ <div className="mt-6">
131
+ <Link
132
+ href={`${rootPath}/datasets/new`}
133
+ className="inline-flex items-center px-4 py-2 border border-transparent shadow-sm text-sm font-medium rounded-md text-white bg-blue-600 hover:bg-blue-700 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-blue-500"
134
+ >
135
+ <Plus className="w-4 h-4 mr-2" />
136
+ New Dataset
137
+ </Link>
138
+ </div>
139
+ </div>
140
+ ) : (
141
+ <>
142
+ <div className="grid grid-cols-1 md:grid-cols-2 gap-6">
143
+ {paginatedDatasets.map((dataset) => (
144
+ <div
145
+ key={dataset.id}
146
+ className="bg-white rounded-lg shadow-md p-6 hover:shadow-lg transition-shadow"
147
+ >
148
+ <div className="flex justify-between items-start mb-4">
149
+ <div className="flex items-start gap-3">
150
+ <Database className="w-5 h-5 text-blue-600 mt-1" />
151
+ <div>
152
+ <div className="flex items-center gap-2">
153
+ <h3 className="text-lg font-semibold text-gray-900">
154
+ {dataset.name}
155
+ </h3>
156
+ <div className={`inline-flex items-center gap-1 px-2 py-0.5 rounded-full text-xs font-medium ${STATUS_STYLES[dataset.workflow_status].bg} ${STATUS_STYLES[dataset.workflow_status].text}`}>
157
+ {STATUS_STYLES[dataset.workflow_status].icon}
158
+ <span>{dataset.workflow_status.charAt(0).toUpperCase() + dataset.workflow_status.slice(1)}</span>
159
+ </div>
160
+ </div>
161
+ <p className="text-sm text-gray-500 mt-1">
162
+ {dataset.description}
163
+ </p>
164
+ </div>
165
+ </div>
166
+ <div className="flex gap-2">
167
+ <Link
168
+ href={`${rootPath}/datasets/${dataset.id}`}
169
+ className={`transition-colors ${
170
+ dataset.workflow_status === 'analyzing'
171
+ ? 'text-gray-300 cursor-not-allowed pointer-events-none'
172
+ : 'text-gray-400 hover:text-blue-600'
173
+ }`}
174
+ title={dataset.workflow_status === 'analyzing' ? 'Dataset is being analyzed' : 'View details'}
175
+ >
176
+ <ExternalLink className="w-5 h-5" />
177
+ </Link>
178
+ <button
179
+ className="text-gray-400 hover:text-red-600 transition-colors"
180
+ title="Delete dataset"
181
+ onClick={() => handleDelete(dataset.id)}
182
+ >
183
+ <Trash2 className="w-5 h-5" />
184
+ </button>
185
+ </div>
186
+ </div>
187
+
188
+ <div className="grid grid-cols-2 gap-4 mt-4">
189
+ <div>
190
+ <span className="text-sm text-gray-500">Columns</span>
191
+ <p className="text-sm font-medium text-gray-900">
192
+ {dataset.columns.length} columns
193
+ </p>
194
+ </div>
195
+ <div>
196
+ <span className="text-sm text-gray-500">Rows</span>
197
+ <p className="text-sm font-medium text-gray-900">
198
+ {dataset.num_rows.toLocaleString()}
199
+ </p>
200
+ </div>
201
+ </div>
202
+
203
+ <div className="mt-4 pt-4 border-t border-gray-100">
204
+ <div className="flex flex-wrap gap-2">
205
+ {dataset.columns.slice(0, 3).map((column: Column) => (
206
+ <span
207
+ key={column.name}
208
+ className="inline-flex items-center px-2.5 py-0.5 rounded-full text-xs font-medium bg-blue-100 text-blue-800"
209
+ >
210
+ {column.name}
211
+ </span>
212
+ ))}
213
+ {dataset.columns.length > 3 && (
214
+ <span className="inline-flex items-center px-2.5 py-0.5 rounded-full text-xs font-medium bg-gray-100 text-gray-800">
215
+ +{dataset.columns.length - 3} more
216
+ </span>
217
+ )}
218
+ </div>
219
+ </div>
220
+
221
+ {dataset.workflow_status === 'failed' && dataset.stacktrace && (
222
+ <div className="mt-4 pt-4 border-t border-gray-100">
223
+ <button
224
+ onClick={() => toggleError(dataset.id)}
225
+ className="flex items-center gap-2 text-sm text-red-600 hover:text-red-700"
226
+ >
227
+ <AlertCircle className="w-4 h-4" />
228
+ <span>View Error Details</span>
229
+ {expandedErrors.includes(dataset.id) ? (
230
+ <ChevronUp className="w-4 h-4" />
231
+ ) : (
232
+ <ChevronDown className="w-4 h-4" />
233
+ )}
234
+ </button>
235
+ {expandedErrors.includes(dataset.id) && (
236
+ <div className="mt-2 p-3 bg-red-50 rounded-md">
237
+ <pre className="text-xs text-red-700 whitespace-pre-wrap font-mono">
238
+ {dataset.stacktrace}
239
+ </pre>
240
+ </div>
241
+ )}
242
+ </div>
243
+ )}
244
+
245
+ </div>
246
+ ))}
247
+ </div>
248
+
249
+ {totalPages > 1 && (
250
+ <Pagination
251
+ currentPage={currentPage}
252
+ totalPages={totalPages}
253
+ onPageChange={setCurrentPage}
254
+ />
255
+ )}
256
+ </>
257
+ )}
258
+ </div>
259
+ </div>
260
+ );
261
+ }