easy_ml 0.1.4 → 0.2.0.pre.rc1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (239) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +234 -26
  3. data/Rakefile +45 -0
  4. data/app/controllers/easy_ml/application_controller.rb +67 -0
  5. data/app/controllers/easy_ml/columns_controller.rb +38 -0
  6. data/app/controllers/easy_ml/datasets_controller.rb +156 -0
  7. data/app/controllers/easy_ml/datasources_controller.rb +88 -0
  8. data/app/controllers/easy_ml/deploys_controller.rb +20 -0
  9. data/app/controllers/easy_ml/models_controller.rb +151 -0
  10. data/app/controllers/easy_ml/retraining_runs_controller.rb +19 -0
  11. data/app/controllers/easy_ml/settings_controller.rb +59 -0
  12. data/app/frontend/components/AlertProvider.tsx +108 -0
  13. data/app/frontend/components/DatasetPreview.tsx +161 -0
  14. data/app/frontend/components/EmptyState.tsx +28 -0
  15. data/app/frontend/components/ModelCard.tsx +255 -0
  16. data/app/frontend/components/ModelDetails.tsx +334 -0
  17. data/app/frontend/components/ModelForm.tsx +384 -0
  18. data/app/frontend/components/Navigation.tsx +300 -0
  19. data/app/frontend/components/Pagination.tsx +72 -0
  20. data/app/frontend/components/Popover.tsx +55 -0
  21. data/app/frontend/components/PredictionStream.tsx +105 -0
  22. data/app/frontend/components/ScheduleModal.tsx +726 -0
  23. data/app/frontend/components/SearchInput.tsx +23 -0
  24. data/app/frontend/components/SearchableSelect.tsx +132 -0
  25. data/app/frontend/components/dataset/AutosaveIndicator.tsx +39 -0
  26. data/app/frontend/components/dataset/ColumnConfigModal.tsx +431 -0
  27. data/app/frontend/components/dataset/ColumnFilters.tsx +256 -0
  28. data/app/frontend/components/dataset/ColumnList.tsx +101 -0
  29. data/app/frontend/components/dataset/FeatureConfigPopover.tsx +57 -0
  30. data/app/frontend/components/dataset/FeaturePicker.tsx +205 -0
  31. data/app/frontend/components/dataset/PreprocessingConfig.tsx +704 -0
  32. data/app/frontend/components/dataset/SplitConfigurator.tsx +120 -0
  33. data/app/frontend/components/dataset/splitters/DateSplitter.tsx +58 -0
  34. data/app/frontend/components/dataset/splitters/KFoldSplitter.tsx +68 -0
  35. data/app/frontend/components/dataset/splitters/LeavePOutSplitter.tsx +29 -0
  36. data/app/frontend/components/dataset/splitters/PredefinedSplitter.tsx +146 -0
  37. data/app/frontend/components/dataset/splitters/RandomSplitter.tsx +85 -0
  38. data/app/frontend/components/dataset/splitters/StratifiedSplitter.tsx +79 -0
  39. data/app/frontend/components/dataset/splitters/constants.ts +77 -0
  40. data/app/frontend/components/dataset/splitters/types.ts +168 -0
  41. data/app/frontend/components/dataset/splitters/utils.ts +53 -0
  42. data/app/frontend/components/features/CodeEditor.tsx +46 -0
  43. data/app/frontend/components/features/DataPreview.tsx +150 -0
  44. data/app/frontend/components/features/FeatureCard.tsx +88 -0
  45. data/app/frontend/components/features/FeatureForm.tsx +235 -0
  46. data/app/frontend/components/features/FeatureGroupCard.tsx +54 -0
  47. data/app/frontend/components/settings/PluginSettings.tsx +81 -0
  48. data/app/frontend/components/ui/badge.tsx +44 -0
  49. data/app/frontend/components/ui/collapsible.tsx +9 -0
  50. data/app/frontend/components/ui/scroll-area.tsx +46 -0
  51. data/app/frontend/components/ui/separator.tsx +29 -0
  52. data/app/frontend/entrypoints/App.tsx +40 -0
  53. data/app/frontend/entrypoints/Application.tsx +24 -0
  54. data/app/frontend/hooks/useAutosave.ts +61 -0
  55. data/app/frontend/layouts/Layout.tsx +38 -0
  56. data/app/frontend/lib/utils.ts +6 -0
  57. data/app/frontend/mockData.ts +272 -0
  58. data/app/frontend/pages/DatasetDetailsPage.tsx +103 -0
  59. data/app/frontend/pages/DatasetsPage.tsx +261 -0
  60. data/app/frontend/pages/DatasourceFormPage.tsx +147 -0
  61. data/app/frontend/pages/DatasourcesPage.tsx +261 -0
  62. data/app/frontend/pages/EditModelPage.tsx +45 -0
  63. data/app/frontend/pages/EditTransformationPage.tsx +56 -0
  64. data/app/frontend/pages/ModelsPage.tsx +115 -0
  65. data/app/frontend/pages/NewDatasetPage.tsx +366 -0
  66. data/app/frontend/pages/NewModelPage.tsx +45 -0
  67. data/app/frontend/pages/NewTransformationPage.tsx +43 -0
  68. data/app/frontend/pages/SettingsPage.tsx +272 -0
  69. data/app/frontend/pages/ShowModelPage.tsx +30 -0
  70. data/app/frontend/pages/TransformationsPage.tsx +95 -0
  71. data/app/frontend/styles/application.css +100 -0
  72. data/app/frontend/types/dataset.ts +146 -0
  73. data/app/frontend/types/datasource.ts +33 -0
  74. data/app/frontend/types/preprocessing.ts +1 -0
  75. data/app/frontend/types.ts +113 -0
  76. data/app/helpers/easy_ml/application_helper.rb +10 -0
  77. data/app/jobs/easy_ml/application_job.rb +21 -0
  78. data/app/jobs/easy_ml/batch_job.rb +46 -0
  79. data/app/jobs/easy_ml/compute_feature_job.rb +19 -0
  80. data/app/jobs/easy_ml/deploy_job.rb +13 -0
  81. data/app/jobs/easy_ml/finalize_feature_job.rb +15 -0
  82. data/app/jobs/easy_ml/refresh_dataset_job.rb +32 -0
  83. data/app/jobs/easy_ml/schedule_retraining_job.rb +11 -0
  84. data/app/jobs/easy_ml/sync_datasource_job.rb +17 -0
  85. data/app/jobs/easy_ml/training_job.rb +62 -0
  86. data/app/models/easy_ml/adapters/base_adapter.rb +45 -0
  87. data/app/models/easy_ml/adapters/polars_adapter.rb +77 -0
  88. data/app/models/easy_ml/cleaner.rb +82 -0
  89. data/app/models/easy_ml/column.rb +124 -0
  90. data/app/models/easy_ml/column_history.rb +30 -0
  91. data/app/models/easy_ml/column_list.rb +122 -0
  92. data/app/models/easy_ml/concerns/configurable.rb +61 -0
  93. data/app/models/easy_ml/concerns/versionable.rb +19 -0
  94. data/app/models/easy_ml/dataset.rb +767 -0
  95. data/app/models/easy_ml/dataset_history.rb +56 -0
  96. data/app/models/easy_ml/datasource.rb +182 -0
  97. data/app/models/easy_ml/datasource_history.rb +24 -0
  98. data/app/models/easy_ml/datasources/base_datasource.rb +54 -0
  99. data/app/models/easy_ml/datasources/file_datasource.rb +58 -0
  100. data/app/models/easy_ml/datasources/polars_datasource.rb +89 -0
  101. data/app/models/easy_ml/datasources/s3_datasource.rb +97 -0
  102. data/app/models/easy_ml/deploy.rb +114 -0
  103. data/app/models/easy_ml/event.rb +79 -0
  104. data/app/models/easy_ml/feature.rb +437 -0
  105. data/app/models/easy_ml/feature_history.rb +38 -0
  106. data/app/models/easy_ml/model.rb +575 -41
  107. data/app/models/easy_ml/model_file.rb +133 -0
  108. data/app/models/easy_ml/model_file_history.rb +24 -0
  109. data/app/models/easy_ml/model_history.rb +51 -0
  110. data/app/models/easy_ml/models/base_model.rb +58 -0
  111. data/app/models/easy_ml/models/hyperparameters/base.rb +99 -0
  112. data/app/models/easy_ml/models/hyperparameters/xgboost/dart.rb +82 -0
  113. data/app/models/easy_ml/models/hyperparameters/xgboost/gblinear.rb +82 -0
  114. data/app/models/easy_ml/models/hyperparameters/xgboost/gbtree.rb +97 -0
  115. data/app/models/easy_ml/models/hyperparameters/xgboost.rb +71 -0
  116. data/app/models/easy_ml/models/xgboost/evals_callback.rb +138 -0
  117. data/app/models/easy_ml/models/xgboost/progress_callback.rb +39 -0
  118. data/app/models/easy_ml/models/xgboost.rb +544 -5
  119. data/app/models/easy_ml/prediction.rb +44 -0
  120. data/app/models/easy_ml/retraining_job.rb +278 -0
  121. data/app/models/easy_ml/retraining_run.rb +184 -0
  122. data/app/models/easy_ml/settings.rb +37 -0
  123. data/app/models/easy_ml/splitter.rb +90 -0
  124. data/app/models/easy_ml/splitters/base_splitter.rb +28 -0
  125. data/app/models/easy_ml/splitters/date_splitter.rb +91 -0
  126. data/app/models/easy_ml/splitters/predefined_splitter.rb +74 -0
  127. data/app/models/easy_ml/splitters/random_splitter.rb +82 -0
  128. data/app/models/easy_ml/tuner_job.rb +56 -0
  129. data/app/models/easy_ml/tuner_run.rb +31 -0
  130. data/app/models/splitter_history.rb +6 -0
  131. data/app/serializers/easy_ml/column_serializer.rb +27 -0
  132. data/app/serializers/easy_ml/dataset_serializer.rb +73 -0
  133. data/app/serializers/easy_ml/datasource_serializer.rb +64 -0
  134. data/app/serializers/easy_ml/feature_serializer.rb +27 -0
  135. data/app/serializers/easy_ml/model_serializer.rb +90 -0
  136. data/app/serializers/easy_ml/retraining_job_serializer.rb +22 -0
  137. data/app/serializers/easy_ml/retraining_run_serializer.rb +39 -0
  138. data/app/serializers/easy_ml/settings_serializer.rb +9 -0
  139. data/app/views/layouts/easy_ml/application.html.erb +15 -0
  140. data/config/initializers/resque.rb +3 -0
  141. data/config/resque-pool.yml +6 -0
  142. data/config/routes.rb +39 -0
  143. data/config/spring.rb +1 -0
  144. data/config/vite.json +15 -0
  145. data/lib/easy_ml/configuration.rb +64 -0
  146. data/lib/easy_ml/core/evaluators/base_evaluator.rb +53 -0
  147. data/lib/easy_ml/core/evaluators/classification_evaluators.rb +126 -0
  148. data/lib/easy_ml/core/evaluators/regression_evaluators.rb +66 -0
  149. data/lib/easy_ml/core/model_evaluator.rb +161 -89
  150. data/lib/easy_ml/core/tuner/adapters/base_adapter.rb +28 -18
  151. data/lib/easy_ml/core/tuner/adapters/xgboost_adapter.rb +4 -25
  152. data/lib/easy_ml/core/tuner.rb +123 -62
  153. data/lib/easy_ml/core.rb +0 -3
  154. data/lib/easy_ml/core_ext/hash.rb +24 -0
  155. data/lib/easy_ml/core_ext/pathname.rb +11 -5
  156. data/lib/easy_ml/data/date_converter.rb +90 -0
  157. data/lib/easy_ml/data/filter_extensions.rb +31 -0
  158. data/lib/easy_ml/data/polars_column.rb +126 -0
  159. data/lib/easy_ml/data/polars_reader.rb +297 -0
  160. data/lib/easy_ml/data/preprocessor.rb +280 -142
  161. data/lib/easy_ml/data/simple_imputer.rb +255 -0
  162. data/lib/easy_ml/data/splits/file_split.rb +252 -0
  163. data/lib/easy_ml/data/splits/in_memory_split.rb +54 -0
  164. data/lib/easy_ml/data/splits/split.rb +95 -0
  165. data/lib/easy_ml/data/splits.rb +9 -0
  166. data/lib/easy_ml/data/statistics_learner.rb +93 -0
  167. data/lib/easy_ml/data/synced_directory.rb +341 -0
  168. data/lib/easy_ml/data.rb +6 -2
  169. data/lib/easy_ml/engine.rb +105 -6
  170. data/lib/easy_ml/feature_store.rb +227 -0
  171. data/lib/easy_ml/features.rb +61 -0
  172. data/lib/easy_ml/initializers/inflections.rb +17 -3
  173. data/lib/easy_ml/logging.rb +2 -2
  174. data/lib/easy_ml/predict.rb +74 -0
  175. data/lib/easy_ml/railtie/generators/migration/migration_generator.rb +192 -36
  176. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_column_histories.rb.tt +9 -0
  177. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_columns.rb.tt +25 -0
  178. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_dataset_histories.rb.tt +9 -0
  179. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_datasets.rb.tt +31 -0
  180. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_datasource_histories.rb.tt +9 -0
  181. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_datasources.rb.tt +16 -0
  182. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_deploys.rb.tt +24 -0
  183. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_events.rb.tt +20 -0
  184. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_feature_histories.rb.tt +14 -0
  185. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_features.rb.tt +32 -0
  186. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_model_file_histories.rb.tt +9 -0
  187. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_model_files.rb.tt +17 -0
  188. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_model_histories.rb.tt +9 -0
  189. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_models.rb.tt +20 -9
  190. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_predictions.rb.tt +17 -0
  191. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_retraining_jobs.rb.tt +77 -0
  192. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_settings.rb.tt +9 -0
  193. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_splitter_histories.rb.tt +9 -0
  194. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_splitters.rb.tt +15 -0
  195. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_tuner_jobs.rb.tt +40 -0
  196. data/lib/easy_ml/support/est.rb +5 -1
  197. data/lib/easy_ml/support/file_rotate.rb +79 -15
  198. data/lib/easy_ml/support/file_support.rb +9 -0
  199. data/lib/easy_ml/support/local_file.rb +24 -0
  200. data/lib/easy_ml/support/lockable.rb +62 -0
  201. data/lib/easy_ml/support/synced_file.rb +103 -0
  202. data/lib/easy_ml/support/utc.rb +5 -1
  203. data/lib/easy_ml/support.rb +6 -3
  204. data/lib/easy_ml/version.rb +4 -1
  205. data/lib/easy_ml.rb +7 -2
  206. metadata +355 -72
  207. data/app/models/easy_ml/models.rb +0 -5
  208. data/lib/easy_ml/core/model.rb +0 -30
  209. data/lib/easy_ml/core/model_core.rb +0 -181
  210. data/lib/easy_ml/core/models/hyperparameters/base.rb +0 -34
  211. data/lib/easy_ml/core/models/hyperparameters/xgboost.rb +0 -19
  212. data/lib/easy_ml/core/models/xgboost.rb +0 -10
  213. data/lib/easy_ml/core/models/xgboost_core.rb +0 -220
  214. data/lib/easy_ml/core/models.rb +0 -10
  215. data/lib/easy_ml/core/uploaders/model_uploader.rb +0 -24
  216. data/lib/easy_ml/core/uploaders.rb +0 -7
  217. data/lib/easy_ml/data/dataloader.rb +0 -6
  218. data/lib/easy_ml/data/dataset/data/preprocessor/statistics.json +0 -31
  219. data/lib/easy_ml/data/dataset/data/sample_info.json +0 -1
  220. data/lib/easy_ml/data/dataset/dataset/files/sample_info.json +0 -1
  221. data/lib/easy_ml/data/dataset/splits/file_split.rb +0 -140
  222. data/lib/easy_ml/data/dataset/splits/in_memory_split.rb +0 -49
  223. data/lib/easy_ml/data/dataset/splits/split.rb +0 -98
  224. data/lib/easy_ml/data/dataset/splits.rb +0 -11
  225. data/lib/easy_ml/data/dataset/splitters/date_splitter.rb +0 -43
  226. data/lib/easy_ml/data/dataset/splitters.rb +0 -9
  227. data/lib/easy_ml/data/dataset.rb +0 -430
  228. data/lib/easy_ml/data/datasource/datasource_factory.rb +0 -60
  229. data/lib/easy_ml/data/datasource/file_datasource.rb +0 -40
  230. data/lib/easy_ml/data/datasource/merged_datasource.rb +0 -64
  231. data/lib/easy_ml/data/datasource/polars_datasource.rb +0 -41
  232. data/lib/easy_ml/data/datasource/s3_datasource.rb +0 -89
  233. data/lib/easy_ml/data/datasource.rb +0 -33
  234. data/lib/easy_ml/data/preprocessor/preprocessor.rb +0 -205
  235. data/lib/easy_ml/data/preprocessor/simple_imputer.rb +0 -402
  236. data/lib/easy_ml/deployment.rb +0 -5
  237. data/lib/easy_ml/support/synced_directory.rb +0 -134
  238. data/lib/easy_ml/transforms.rb +0 -29
  239. /data/{lib/easy_ml/core → app/models/easy_ml}/models/hyperparameters.rb +0 -0
@@ -0,0 +1,168 @@
1
+ import type { ColumnType } from '../../../types/datasource';
2
+ import type { Datasource } from '../types/datasource';
3
+
4
+ export type NewDatasetFormProps = {
5
+ datasources: Datasource[];
6
+ constants: {
7
+ columns: ColumnType[];
8
+ };
9
+ }
10
+ export type SplitterType =
11
+ | 'date'
12
+ | 'random'
13
+ | 'predefined'
14
+ | 'stratified'
15
+ | 'stratified_kfold'
16
+ | 'group_kfold'
17
+ | 'group_shuffle'
18
+ | 'leave_p_out';
19
+
20
+ export interface DateSplitConfig {
21
+ date_column: string;
22
+ months_test: number;
23
+ months_valid: number;
24
+ }
25
+
26
+ export interface RandomSplitConfig {
27
+ train_ratio?: number;
28
+ test_ratio?: number;
29
+ valid_ratio?: number;
30
+ seed?: number;
31
+ }
32
+
33
+ export interface PredefinedSplitConfig {
34
+ train_files: string[];
35
+ test_files: string[];
36
+ valid_files: string[];
37
+ }
38
+
39
+ export interface ColumnConfig {
40
+ name: string;
41
+ type: string;
42
+ }
43
+
44
+ export interface PredefinedSplitConfig {
45
+ train_files: string[];
46
+ test_files: string[];
47
+ valid_files: string[];
48
+ }
49
+
50
+ export interface StratifiedSplitConfig {
51
+ stratify_column: string;
52
+ train_ratio: number;
53
+ test_ratio: number;
54
+ valid_ratio: number;
55
+ }
56
+
57
+ export interface KFoldConfig {
58
+ n_splits: number;
59
+ shuffle: boolean;
60
+ random_state?: number;
61
+ }
62
+
63
+ export interface LeavePOutConfig {
64
+ p: number;
65
+ shuffle: boolean;
66
+ random_state?: number;
67
+ }
68
+
69
+ export type SplitConfig =
70
+ | DateSplitConfig
71
+ | PredefinedSplitConfig
72
+ | StratifiedSplitConfig
73
+ | KFoldConfig
74
+ | LeavePOutConfig
75
+ | Record<string, never>; // For random split
76
+
77
+ export interface ValidationResult {
78
+ isValid: boolean;
79
+ error?: string;
80
+ }
81
+
82
+ // Validation functions for each splitter type
83
+ export const validateDateSplitter = (config: DateSplitConfig): ValidationResult => {
84
+ if (!config.date_column) {
85
+ return { isValid: false, error: "Please select a date column" };
86
+ }
87
+ if (!config.months_test || config.months_test <= 0) {
88
+ return { isValid: false, error: "Test months must be greater than 0" };
89
+ }
90
+ if (!config.months_valid || config.months_valid <= 0) {
91
+ return { isValid: false, error: "Validation months must be greater than 0" };
92
+ }
93
+ return { isValid: true };
94
+ };
95
+
96
+ export const validateRandomSplitter = (config: RandomSplitConfig): ValidationResult => {
97
+ const total = (config.train_ratio ?? 0.6) +
98
+ (config.test_ratio ?? 0.2) +
99
+ (config.valid_ratio ?? 0.2);
100
+
101
+ if (Math.abs(total - 1.0) >= 0.001) {
102
+ return {
103
+ isValid: false,
104
+ error: `Split ratios must sum to 1.0 (current sum: ${total.toFixed(2)})`
105
+ };
106
+ }
107
+ return { isValid: true };
108
+ };
109
+
110
+ export const validatePredefinedSplitter = (config: PredefinedSplitConfig): ValidationResult => {
111
+ if (!config.files || config.files.length === 0) {
112
+ return { isValid: false, error: "Please select at least one file for splitting" };
113
+ }
114
+ return { isValid: true };
115
+ };
116
+
117
+ export const validateStratifiedSplitter = (config: StratifiedSplitConfig): ValidationResult => {
118
+ if (!config.stratify_column) {
119
+ return { isValid: false, error: "Please select a column to stratify on" };
120
+ }
121
+
122
+ const total = (config.train_ratio ?? 0) +
123
+ (config.test_ratio ?? 0) +
124
+ (config.valid_ratio ?? 0);
125
+
126
+ if (Math.abs(total - 1.0) >= 0.001) {
127
+ return {
128
+ isValid: false,
129
+ error: `Split ratios must sum to 1.0 (current sum: ${total.toFixed(2)})`
130
+ };
131
+ }
132
+ return { isValid: true };
133
+ };
134
+
135
+ export const validateKFoldSplitter = (config: KFoldConfig): ValidationResult => {
136
+ if (!config.n_splits || config.n_splits <= 1) {
137
+ return { isValid: false, error: "Number of splits must be greater than 1" };
138
+ }
139
+ return { isValid: true };
140
+ };
141
+
142
+ export const validateLeavePOutSplitter = (config: LeavePOutConfig): ValidationResult => {
143
+ if (!config.p || config.p <= 0) {
144
+ return { isValid: false, error: "P value must be greater than 0" };
145
+ }
146
+ return { isValid: true };
147
+ };
148
+
149
+ // Main validation function
150
+ export const validateSplitterConfig = (type: SplitterType, config: SplitConfig): ValidationResult => {
151
+ switch (type) {
152
+ case 'date':
153
+ return validateDateSplitter(config as DateSplitConfig);
154
+ case 'random':
155
+ return validateRandomSplitter(config as RandomSplitConfig);
156
+ case 'predefined':
157
+ return validatePredefinedSplitter(config as PredefinedSplitConfig);
158
+ case 'stratified':
159
+ return validateStratifiedSplitter(config as StratifiedSplitConfig);
160
+ case 'stratified_kfold':
161
+ case 'group_kfold':
162
+ return validateKFoldSplitter(config as KFoldConfig);
163
+ case 'leave_p_out':
164
+ return validateLeavePOutSplitter(config as LeavePOutConfig);
165
+ default:
166
+ return { isValid: false, error: "Invalid splitter type" };
167
+ }
168
+ };
@@ -0,0 +1,53 @@
1
+ import type { Column } from '../../types';
2
+
3
+ export function getDateColumns(columns: Column[]): string[] {
4
+ return columns
5
+ .filter(col => col.type === 'datetime')
6
+ .map(col => col.name);
7
+ }
8
+
9
+ export function validateSplitConfig(type: string, config: any): string[] {
10
+ const errors: string[] = [];
11
+
12
+ switch (type) {
13
+ case 'date':
14
+ if (!config.dateColumn) {
15
+ errors.push('Date column is required');
16
+ }
17
+ if (config.monthsTest < 1) {
18
+ errors.push('Test set must be at least 1 month');
19
+ }
20
+ if (config.monthsValid < 1) {
21
+ errors.push('Validation set must be at least 1 month');
22
+ }
23
+ break;
24
+
25
+ case 'predefined':
26
+ if (!config.files?.length) {
27
+ errors.push('At least one file must be selected');
28
+ }
29
+ if (!config.files?.some(f => f.type === 'train')) {
30
+ errors.push('Training set file is required');
31
+ }
32
+ if (!config.files?.some(f => f.type === 'test')) {
33
+ errors.push('Test set file is required');
34
+ }
35
+ break;
36
+
37
+ case 'stratified':
38
+ case 'stratified_kfold':
39
+ if (!config.targetColumn) {
40
+ errors.push('Target column is required');
41
+ }
42
+ break;
43
+
44
+ case 'group_kfold':
45
+ case 'group_shuffle':
46
+ if (!config.groupColumn) {
47
+ errors.push('Group column is required');
48
+ }
49
+ break;
50
+ }
51
+
52
+ return errors;
53
+ }
@@ -0,0 +1,46 @@
1
+ import React from 'react';
2
+ import { AlertCircle } from 'lucide-react';
3
+
4
+ interface CodeEditorProps {
5
+ value: string;
6
+ onChange: (value: string) => void;
7
+ language: string;
8
+ }
9
+
10
+ export function CodeEditor({ value, onChange, language }: CodeEditorProps) {
11
+ return (
12
+ <div className="space-y-4">
13
+ <div className="bg-gray-900 rounded-lg overflow-hidden">
14
+ <div className="flex items-center justify-between px-4 py-2 bg-gray-800">
15
+ <span className="text-sm text-gray-400">Ruby Feature</span>
16
+ <span className="text-xs px-2 py-1 bg-gray-700 rounded text-gray-300">
17
+ {language}
18
+ </span>
19
+ </div>
20
+ <textarea
21
+ value={value}
22
+ onChange={(e) => onChange(e.target.value)}
23
+ className="w-full h-64 p-4 bg-gray-900 text-gray-100 font-mono text-sm focus:outline-none"
24
+ placeholder={`def transform(df)\n # Your feature code here\n # Example:\n # df["column"] = df["column"].map { |value| value.upcase }\n df\nend`}
25
+ spellCheck={false}
26
+ />
27
+ </div>
28
+
29
+ <div className="bg-blue-50 rounded-lg p-4">
30
+ <div className="flex gap-2">
31
+ <AlertCircle className="w-5 h-5 text-blue-500 flex-shrink-0" />
32
+ <div className="text-sm text-blue-700">
33
+ <p className="font-medium mb-1">Feature Guidelines</p>
34
+ <ul className="list-disc pl-4 space-y-1">
35
+ <li>The function must be named 'feature'</li>
36
+ <li>It should accept a DataFrame as its only parameter</li>
37
+ <li>All operations should be performed on the DataFrame object</li>
38
+ <li>The function must return the modified DataFrame</li>
39
+ <li>Use standard Ruby syntax and DataFrame operations</li>
40
+ </ul>
41
+ </div>
42
+ </div>
43
+ </div>
44
+ </div>
45
+ );
46
+ }
@@ -0,0 +1,150 @@
1
+ import React, { useState } from 'react';
2
+ import { Play, AlertTriangle } from 'lucide-react';
3
+ import type { Dataset } from '../../types';
4
+
5
+ interface DataPreviewProps {
6
+ dataset: Dataset;
7
+ code: string;
8
+ inputColumns: string[];
9
+ outputColumns: string[];
10
+ }
11
+
12
+ export function DataPreview({ dataset, code, inputColumns, outputColumns }: DataPreviewProps) {
13
+ const [isRunning, setIsRunning] = useState(false);
14
+ const [error, setError] = useState<string | null>(null);
15
+ const [previewData, setPreviewData] = useState<Record<string, any>[] | null>(null);
16
+
17
+ const runFeature = () => {
18
+ setIsRunning(true);
19
+ setError(null);
20
+
21
+ // Simulate feature execution
22
+ setTimeout(() => {
23
+ try {
24
+ // In a real implementation, this would execute the Ruby code
25
+ // For now, we'll just show the original data
26
+ setPreviewData(dataset.sampleData);
27
+ setIsRunning(false);
28
+ } catch (err) {
29
+ setError(err instanceof Error ? err.message : 'An error occurred');
30
+ setIsRunning(false);
31
+ }
32
+ }, 1000);
33
+ };
34
+
35
+ return (
36
+ <div className="space-y-4">
37
+ <div className="flex justify-between items-center">
38
+ <h4 className="text-sm font-medium text-gray-900">Data Preview</h4>
39
+ <button
40
+ onClick={runFeature}
41
+ disabled={isRunning}
42
+ className="inline-flex items-center gap-2 px-3 py-1.5 bg-blue-600 text-white text-sm font-medium rounded-md hover:bg-blue-700 disabled:opacity-50"
43
+ >
44
+ <Play className="w-4 h-4" />
45
+ {isRunning ? 'Running...' : 'Run Preview'}
46
+ </button>
47
+ </div>
48
+
49
+ {error && (
50
+ <div className="bg-red-50 border border-red-200 rounded-lg p-4">
51
+ <div className="flex items-start gap-2">
52
+ <AlertTriangle className="w-5 h-5 text-red-500 mt-0.5" />
53
+ <div>
54
+ <h4 className="text-sm font-medium text-red-800">
55
+ Feature Error
56
+ </h4>
57
+ <pre className="mt-1 text-sm text-red-700 whitespace-pre-wrap font-mono">
58
+ {error}
59
+ </pre>
60
+ </div>
61
+ </div>
62
+ </div>
63
+ )}
64
+
65
+ <div className="border border-gray-200 rounded-lg overflow-hidden">
66
+ <div className="grid grid-cols-2 divide-x divide-gray-200">
67
+ <div>
68
+ <div className="px-4 py-2 bg-gray-50 border-b border-gray-200">
69
+ <h5 className="text-sm font-medium text-gray-700">Input Data</h5>
70
+ </div>
71
+ <div className="overflow-x-auto">
72
+ <table className="min-w-full divide-y divide-gray-200">
73
+ <thead className="bg-gray-50">
74
+ <tr>
75
+ {inputColumns.map((column) => (
76
+ <th
77
+ key={column}
78
+ className="px-4 py-2 text-left text-xs font-medium text-gray-500 uppercase tracking-wider"
79
+ >
80
+ {column}
81
+ </th>
82
+ ))}
83
+ </tr>
84
+ </thead>
85
+ <tbody className="bg-white divide-y divide-gray-200">
86
+ {dataset.sampleData.map((row, i) => (
87
+ <tr key={i}>
88
+ {inputColumns.map((column) => (
89
+ <td
90
+ key={column}
91
+ className="px-4 py-2 text-sm text-gray-900 whitespace-nowrap"
92
+ >
93
+ {String(row[column])}
94
+ </td>
95
+ ))}
96
+ </tr>
97
+ ))}
98
+ </tbody>
99
+ </table>
100
+ </div>
101
+ </div>
102
+
103
+ <div>
104
+ <div className="px-4 py-2 bg-gray-50 border-b border-gray-200">
105
+ <h5 className="text-sm font-medium text-gray-700">
106
+ {previewData ? 'Featureed Data' : 'Output Preview'}
107
+ </h5>
108
+ </div>
109
+ <div className="overflow-x-auto">
110
+ {previewData ? (
111
+ <table className="min-w-full divide-y divide-gray-200">
112
+ <thead className="bg-gray-50">
113
+ <tr>
114
+ {outputColumns.map((column) => (
115
+ <th
116
+ key={column}
117
+ className="px-4 py-2 text-left text-xs font-medium text-gray-500 uppercase tracking-wider"
118
+ >
119
+ {column}
120
+ </th>
121
+ ))}
122
+ </tr>
123
+ </thead>
124
+ <tbody className="bg-white divide-y divide-gray-200">
125
+ {previewData.map((row, i) => (
126
+ <tr key={i}>
127
+ {outputColumns.map((column) => (
128
+ <td
129
+ key={column}
130
+ className="px-4 py-2 text-sm text-gray-900 whitespace-nowrap"
131
+ >
132
+ {String(row[column])}
133
+ </td>
134
+ ))}
135
+ </tr>
136
+ ))}
137
+ </tbody>
138
+ </table>
139
+ ) : (
140
+ <div className="p-8 text-center text-sm text-gray-500">
141
+ Run the feature to see the preview
142
+ </div>
143
+ )}
144
+ </div>
145
+ </div>
146
+ </div>
147
+ </div>
148
+ </div>
149
+ );
150
+ }
@@ -0,0 +1,88 @@
1
+ import React from 'react';
2
+ // import { Link } from 'react-router-dom';
3
+ import { Code2, Settings, Trash2, FolderOpen } from 'lucide-react';
4
+ import type { Feature, FeatureGroup } from '../../types';
5
+
6
+ interface FeatureCardProps {
7
+ feature: Feature;
8
+ group: FeatureGroup;
9
+ }
10
+
11
+ export function FeatureCard({ feature, group }: FeatureCardProps) {
12
+ return (
13
+ <div className="bg-white rounded-lg shadow-md p-6 hover:shadow-lg transition-shadow">
14
+ <div className="flex justify-between items-start mb-4">
15
+ <div className="flex items-start gap-3">
16
+ <Code2 className="w-5 h-5 text-blue-600 mt-1" />
17
+ <div>
18
+ <h3 className="text-lg font-semibold text-gray-900">
19
+ {feature.name}
20
+ </h3>
21
+ <p className="text-sm text-gray-500 mt-1">
22
+ {feature.description}
23
+ </p>
24
+ </div>
25
+ </div>
26
+ <div className="flex gap-2">
27
+ <Link
28
+ to={`/features/${feature.id}/edit`}
29
+ className="text-gray-400 hover:text-blue-600 transition-colors"
30
+ title="Edit feature"
31
+ >
32
+ <Settings className="w-5 h-5" />
33
+ </Link>
34
+ <button
35
+ className="text-gray-400 hover:text-red-600 transition-colors"
36
+ title="Delete feature"
37
+ >
38
+ <Trash2 className="w-5 h-5" />
39
+ </button>
40
+ </div>
41
+ </div>
42
+
43
+ <div className="grid grid-cols-2 gap-4 mt-4">
44
+ <div>
45
+ <span className="text-sm text-gray-500">Input Columns</span>
46
+ <div className="flex flex-wrap gap-2 mt-1">
47
+ {feature.inputColumns.map((column) => (
48
+ <span
49
+ key={column}
50
+ className="inline-flex items-center px-2.5 py-0.5 rounded-full text-xs font-medium bg-gray-100 text-gray-800"
51
+ >
52
+ {column}
53
+ </span>
54
+ ))}
55
+ </div>
56
+ </div>
57
+ <div>
58
+ <span className="text-sm text-gray-500">Output Columns</span>
59
+ <div className="flex flex-wrap gap-2 mt-1">
60
+ {feature.outputColumns.map((column) => (
61
+ <span
62
+ key={column}
63
+ className="inline-flex items-center px-2.5 py-0.5 rounded-full text-xs font-medium bg-blue-100 text-blue-800"
64
+ >
65
+ {column}
66
+ </span>
67
+ ))}
68
+ </div>
69
+ </div>
70
+ </div>
71
+
72
+ <div className="mt-4 pt-4 border-t border-gray-100">
73
+ <div className="flex items-center justify-between">
74
+ <Link
75
+ to={`/features/groups/${group.id}`}
76
+ className="flex items-center gap-2 text-sm text-gray-500 hover:text-gray-700"
77
+ >
78
+ <FolderOpen className="w-4 h-4" />
79
+ {group.name}
80
+ </Link>
81
+ <span className="text-sm text-gray-500">
82
+ Last updated {new Date(feature.updatedAt).toLocaleDateString()}
83
+ </span>
84
+ </div>
85
+ </div>
86
+ </div>
87
+ );
88
+ }