easy_ml 0.1.4 → 0.2.0.pre.rc1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (239) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +234 -26
  3. data/Rakefile +45 -0
  4. data/app/controllers/easy_ml/application_controller.rb +67 -0
  5. data/app/controllers/easy_ml/columns_controller.rb +38 -0
  6. data/app/controllers/easy_ml/datasets_controller.rb +156 -0
  7. data/app/controllers/easy_ml/datasources_controller.rb +88 -0
  8. data/app/controllers/easy_ml/deploys_controller.rb +20 -0
  9. data/app/controllers/easy_ml/models_controller.rb +151 -0
  10. data/app/controllers/easy_ml/retraining_runs_controller.rb +19 -0
  11. data/app/controllers/easy_ml/settings_controller.rb +59 -0
  12. data/app/frontend/components/AlertProvider.tsx +108 -0
  13. data/app/frontend/components/DatasetPreview.tsx +161 -0
  14. data/app/frontend/components/EmptyState.tsx +28 -0
  15. data/app/frontend/components/ModelCard.tsx +255 -0
  16. data/app/frontend/components/ModelDetails.tsx +334 -0
  17. data/app/frontend/components/ModelForm.tsx +384 -0
  18. data/app/frontend/components/Navigation.tsx +300 -0
  19. data/app/frontend/components/Pagination.tsx +72 -0
  20. data/app/frontend/components/Popover.tsx +55 -0
  21. data/app/frontend/components/PredictionStream.tsx +105 -0
  22. data/app/frontend/components/ScheduleModal.tsx +726 -0
  23. data/app/frontend/components/SearchInput.tsx +23 -0
  24. data/app/frontend/components/SearchableSelect.tsx +132 -0
  25. data/app/frontend/components/dataset/AutosaveIndicator.tsx +39 -0
  26. data/app/frontend/components/dataset/ColumnConfigModal.tsx +431 -0
  27. data/app/frontend/components/dataset/ColumnFilters.tsx +256 -0
  28. data/app/frontend/components/dataset/ColumnList.tsx +101 -0
  29. data/app/frontend/components/dataset/FeatureConfigPopover.tsx +57 -0
  30. data/app/frontend/components/dataset/FeaturePicker.tsx +205 -0
  31. data/app/frontend/components/dataset/PreprocessingConfig.tsx +704 -0
  32. data/app/frontend/components/dataset/SplitConfigurator.tsx +120 -0
  33. data/app/frontend/components/dataset/splitters/DateSplitter.tsx +58 -0
  34. data/app/frontend/components/dataset/splitters/KFoldSplitter.tsx +68 -0
  35. data/app/frontend/components/dataset/splitters/LeavePOutSplitter.tsx +29 -0
  36. data/app/frontend/components/dataset/splitters/PredefinedSplitter.tsx +146 -0
  37. data/app/frontend/components/dataset/splitters/RandomSplitter.tsx +85 -0
  38. data/app/frontend/components/dataset/splitters/StratifiedSplitter.tsx +79 -0
  39. data/app/frontend/components/dataset/splitters/constants.ts +77 -0
  40. data/app/frontend/components/dataset/splitters/types.ts +168 -0
  41. data/app/frontend/components/dataset/splitters/utils.ts +53 -0
  42. data/app/frontend/components/features/CodeEditor.tsx +46 -0
  43. data/app/frontend/components/features/DataPreview.tsx +150 -0
  44. data/app/frontend/components/features/FeatureCard.tsx +88 -0
  45. data/app/frontend/components/features/FeatureForm.tsx +235 -0
  46. data/app/frontend/components/features/FeatureGroupCard.tsx +54 -0
  47. data/app/frontend/components/settings/PluginSettings.tsx +81 -0
  48. data/app/frontend/components/ui/badge.tsx +44 -0
  49. data/app/frontend/components/ui/collapsible.tsx +9 -0
  50. data/app/frontend/components/ui/scroll-area.tsx +46 -0
  51. data/app/frontend/components/ui/separator.tsx +29 -0
  52. data/app/frontend/entrypoints/App.tsx +40 -0
  53. data/app/frontend/entrypoints/Application.tsx +24 -0
  54. data/app/frontend/hooks/useAutosave.ts +61 -0
  55. data/app/frontend/layouts/Layout.tsx +38 -0
  56. data/app/frontend/lib/utils.ts +6 -0
  57. data/app/frontend/mockData.ts +272 -0
  58. data/app/frontend/pages/DatasetDetailsPage.tsx +103 -0
  59. data/app/frontend/pages/DatasetsPage.tsx +261 -0
  60. data/app/frontend/pages/DatasourceFormPage.tsx +147 -0
  61. data/app/frontend/pages/DatasourcesPage.tsx +261 -0
  62. data/app/frontend/pages/EditModelPage.tsx +45 -0
  63. data/app/frontend/pages/EditTransformationPage.tsx +56 -0
  64. data/app/frontend/pages/ModelsPage.tsx +115 -0
  65. data/app/frontend/pages/NewDatasetPage.tsx +366 -0
  66. data/app/frontend/pages/NewModelPage.tsx +45 -0
  67. data/app/frontend/pages/NewTransformationPage.tsx +43 -0
  68. data/app/frontend/pages/SettingsPage.tsx +272 -0
  69. data/app/frontend/pages/ShowModelPage.tsx +30 -0
  70. data/app/frontend/pages/TransformationsPage.tsx +95 -0
  71. data/app/frontend/styles/application.css +100 -0
  72. data/app/frontend/types/dataset.ts +146 -0
  73. data/app/frontend/types/datasource.ts +33 -0
  74. data/app/frontend/types/preprocessing.ts +1 -0
  75. data/app/frontend/types.ts +113 -0
  76. data/app/helpers/easy_ml/application_helper.rb +10 -0
  77. data/app/jobs/easy_ml/application_job.rb +21 -0
  78. data/app/jobs/easy_ml/batch_job.rb +46 -0
  79. data/app/jobs/easy_ml/compute_feature_job.rb +19 -0
  80. data/app/jobs/easy_ml/deploy_job.rb +13 -0
  81. data/app/jobs/easy_ml/finalize_feature_job.rb +15 -0
  82. data/app/jobs/easy_ml/refresh_dataset_job.rb +32 -0
  83. data/app/jobs/easy_ml/schedule_retraining_job.rb +11 -0
  84. data/app/jobs/easy_ml/sync_datasource_job.rb +17 -0
  85. data/app/jobs/easy_ml/training_job.rb +62 -0
  86. data/app/models/easy_ml/adapters/base_adapter.rb +45 -0
  87. data/app/models/easy_ml/adapters/polars_adapter.rb +77 -0
  88. data/app/models/easy_ml/cleaner.rb +82 -0
  89. data/app/models/easy_ml/column.rb +124 -0
  90. data/app/models/easy_ml/column_history.rb +30 -0
  91. data/app/models/easy_ml/column_list.rb +122 -0
  92. data/app/models/easy_ml/concerns/configurable.rb +61 -0
  93. data/app/models/easy_ml/concerns/versionable.rb +19 -0
  94. data/app/models/easy_ml/dataset.rb +767 -0
  95. data/app/models/easy_ml/dataset_history.rb +56 -0
  96. data/app/models/easy_ml/datasource.rb +182 -0
  97. data/app/models/easy_ml/datasource_history.rb +24 -0
  98. data/app/models/easy_ml/datasources/base_datasource.rb +54 -0
  99. data/app/models/easy_ml/datasources/file_datasource.rb +58 -0
  100. data/app/models/easy_ml/datasources/polars_datasource.rb +89 -0
  101. data/app/models/easy_ml/datasources/s3_datasource.rb +97 -0
  102. data/app/models/easy_ml/deploy.rb +114 -0
  103. data/app/models/easy_ml/event.rb +79 -0
  104. data/app/models/easy_ml/feature.rb +437 -0
  105. data/app/models/easy_ml/feature_history.rb +38 -0
  106. data/app/models/easy_ml/model.rb +575 -41
  107. data/app/models/easy_ml/model_file.rb +133 -0
  108. data/app/models/easy_ml/model_file_history.rb +24 -0
  109. data/app/models/easy_ml/model_history.rb +51 -0
  110. data/app/models/easy_ml/models/base_model.rb +58 -0
  111. data/app/models/easy_ml/models/hyperparameters/base.rb +99 -0
  112. data/app/models/easy_ml/models/hyperparameters/xgboost/dart.rb +82 -0
  113. data/app/models/easy_ml/models/hyperparameters/xgboost/gblinear.rb +82 -0
  114. data/app/models/easy_ml/models/hyperparameters/xgboost/gbtree.rb +97 -0
  115. data/app/models/easy_ml/models/hyperparameters/xgboost.rb +71 -0
  116. data/app/models/easy_ml/models/xgboost/evals_callback.rb +138 -0
  117. data/app/models/easy_ml/models/xgboost/progress_callback.rb +39 -0
  118. data/app/models/easy_ml/models/xgboost.rb +544 -5
  119. data/app/models/easy_ml/prediction.rb +44 -0
  120. data/app/models/easy_ml/retraining_job.rb +278 -0
  121. data/app/models/easy_ml/retraining_run.rb +184 -0
  122. data/app/models/easy_ml/settings.rb +37 -0
  123. data/app/models/easy_ml/splitter.rb +90 -0
  124. data/app/models/easy_ml/splitters/base_splitter.rb +28 -0
  125. data/app/models/easy_ml/splitters/date_splitter.rb +91 -0
  126. data/app/models/easy_ml/splitters/predefined_splitter.rb +74 -0
  127. data/app/models/easy_ml/splitters/random_splitter.rb +82 -0
  128. data/app/models/easy_ml/tuner_job.rb +56 -0
  129. data/app/models/easy_ml/tuner_run.rb +31 -0
  130. data/app/models/splitter_history.rb +6 -0
  131. data/app/serializers/easy_ml/column_serializer.rb +27 -0
  132. data/app/serializers/easy_ml/dataset_serializer.rb +73 -0
  133. data/app/serializers/easy_ml/datasource_serializer.rb +64 -0
  134. data/app/serializers/easy_ml/feature_serializer.rb +27 -0
  135. data/app/serializers/easy_ml/model_serializer.rb +90 -0
  136. data/app/serializers/easy_ml/retraining_job_serializer.rb +22 -0
  137. data/app/serializers/easy_ml/retraining_run_serializer.rb +39 -0
  138. data/app/serializers/easy_ml/settings_serializer.rb +9 -0
  139. data/app/views/layouts/easy_ml/application.html.erb +15 -0
  140. data/config/initializers/resque.rb +3 -0
  141. data/config/resque-pool.yml +6 -0
  142. data/config/routes.rb +39 -0
  143. data/config/spring.rb +1 -0
  144. data/config/vite.json +15 -0
  145. data/lib/easy_ml/configuration.rb +64 -0
  146. data/lib/easy_ml/core/evaluators/base_evaluator.rb +53 -0
  147. data/lib/easy_ml/core/evaluators/classification_evaluators.rb +126 -0
  148. data/lib/easy_ml/core/evaluators/regression_evaluators.rb +66 -0
  149. data/lib/easy_ml/core/model_evaluator.rb +161 -89
  150. data/lib/easy_ml/core/tuner/adapters/base_adapter.rb +28 -18
  151. data/lib/easy_ml/core/tuner/adapters/xgboost_adapter.rb +4 -25
  152. data/lib/easy_ml/core/tuner.rb +123 -62
  153. data/lib/easy_ml/core.rb +0 -3
  154. data/lib/easy_ml/core_ext/hash.rb +24 -0
  155. data/lib/easy_ml/core_ext/pathname.rb +11 -5
  156. data/lib/easy_ml/data/date_converter.rb +90 -0
  157. data/lib/easy_ml/data/filter_extensions.rb +31 -0
  158. data/lib/easy_ml/data/polars_column.rb +126 -0
  159. data/lib/easy_ml/data/polars_reader.rb +297 -0
  160. data/lib/easy_ml/data/preprocessor.rb +280 -142
  161. data/lib/easy_ml/data/simple_imputer.rb +255 -0
  162. data/lib/easy_ml/data/splits/file_split.rb +252 -0
  163. data/lib/easy_ml/data/splits/in_memory_split.rb +54 -0
  164. data/lib/easy_ml/data/splits/split.rb +95 -0
  165. data/lib/easy_ml/data/splits.rb +9 -0
  166. data/lib/easy_ml/data/statistics_learner.rb +93 -0
  167. data/lib/easy_ml/data/synced_directory.rb +341 -0
  168. data/lib/easy_ml/data.rb +6 -2
  169. data/lib/easy_ml/engine.rb +105 -6
  170. data/lib/easy_ml/feature_store.rb +227 -0
  171. data/lib/easy_ml/features.rb +61 -0
  172. data/lib/easy_ml/initializers/inflections.rb +17 -3
  173. data/lib/easy_ml/logging.rb +2 -2
  174. data/lib/easy_ml/predict.rb +74 -0
  175. data/lib/easy_ml/railtie/generators/migration/migration_generator.rb +192 -36
  176. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_column_histories.rb.tt +9 -0
  177. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_columns.rb.tt +25 -0
  178. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_dataset_histories.rb.tt +9 -0
  179. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_datasets.rb.tt +31 -0
  180. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_datasource_histories.rb.tt +9 -0
  181. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_datasources.rb.tt +16 -0
  182. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_deploys.rb.tt +24 -0
  183. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_events.rb.tt +20 -0
  184. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_feature_histories.rb.tt +14 -0
  185. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_features.rb.tt +32 -0
  186. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_model_file_histories.rb.tt +9 -0
  187. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_model_files.rb.tt +17 -0
  188. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_model_histories.rb.tt +9 -0
  189. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_models.rb.tt +20 -9
  190. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_predictions.rb.tt +17 -0
  191. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_retraining_jobs.rb.tt +77 -0
  192. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_settings.rb.tt +9 -0
  193. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_splitter_histories.rb.tt +9 -0
  194. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_splitters.rb.tt +15 -0
  195. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_tuner_jobs.rb.tt +40 -0
  196. data/lib/easy_ml/support/est.rb +5 -1
  197. data/lib/easy_ml/support/file_rotate.rb +79 -15
  198. data/lib/easy_ml/support/file_support.rb +9 -0
  199. data/lib/easy_ml/support/local_file.rb +24 -0
  200. data/lib/easy_ml/support/lockable.rb +62 -0
  201. data/lib/easy_ml/support/synced_file.rb +103 -0
  202. data/lib/easy_ml/support/utc.rb +5 -1
  203. data/lib/easy_ml/support.rb +6 -3
  204. data/lib/easy_ml/version.rb +4 -1
  205. data/lib/easy_ml.rb +7 -2
  206. metadata +355 -72
  207. data/app/models/easy_ml/models.rb +0 -5
  208. data/lib/easy_ml/core/model.rb +0 -30
  209. data/lib/easy_ml/core/model_core.rb +0 -181
  210. data/lib/easy_ml/core/models/hyperparameters/base.rb +0 -34
  211. data/lib/easy_ml/core/models/hyperparameters/xgboost.rb +0 -19
  212. data/lib/easy_ml/core/models/xgboost.rb +0 -10
  213. data/lib/easy_ml/core/models/xgboost_core.rb +0 -220
  214. data/lib/easy_ml/core/models.rb +0 -10
  215. data/lib/easy_ml/core/uploaders/model_uploader.rb +0 -24
  216. data/lib/easy_ml/core/uploaders.rb +0 -7
  217. data/lib/easy_ml/data/dataloader.rb +0 -6
  218. data/lib/easy_ml/data/dataset/data/preprocessor/statistics.json +0 -31
  219. data/lib/easy_ml/data/dataset/data/sample_info.json +0 -1
  220. data/lib/easy_ml/data/dataset/dataset/files/sample_info.json +0 -1
  221. data/lib/easy_ml/data/dataset/splits/file_split.rb +0 -140
  222. data/lib/easy_ml/data/dataset/splits/in_memory_split.rb +0 -49
  223. data/lib/easy_ml/data/dataset/splits/split.rb +0 -98
  224. data/lib/easy_ml/data/dataset/splits.rb +0 -11
  225. data/lib/easy_ml/data/dataset/splitters/date_splitter.rb +0 -43
  226. data/lib/easy_ml/data/dataset/splitters.rb +0 -9
  227. data/lib/easy_ml/data/dataset.rb +0 -430
  228. data/lib/easy_ml/data/datasource/datasource_factory.rb +0 -60
  229. data/lib/easy_ml/data/datasource/file_datasource.rb +0 -40
  230. data/lib/easy_ml/data/datasource/merged_datasource.rb +0 -64
  231. data/lib/easy_ml/data/datasource/polars_datasource.rb +0 -41
  232. data/lib/easy_ml/data/datasource/s3_datasource.rb +0 -89
  233. data/lib/easy_ml/data/datasource.rb +0 -33
  234. data/lib/easy_ml/data/preprocessor/preprocessor.rb +0 -205
  235. data/lib/easy_ml/data/preprocessor/simple_imputer.rb +0 -402
  236. data/lib/easy_ml/deployment.rb +0 -5
  237. data/lib/easy_ml/support/synced_directory.rb +0 -134
  238. data/lib/easy_ml/transforms.rb +0 -29
  239. /data/{lib/easy_ml/core → app/models/easy_ml}/models/hyperparameters.rb +0 -0
@@ -0,0 +1,168 @@
1
+ import type { ColumnType } from '../../../types/datasource';
2
+ import type { Datasource } from '../types/datasource';
3
+
4
+ export type NewDatasetFormProps = {
5
+ datasources: Datasource[];
6
+ constants: {
7
+ columns: ColumnType[];
8
+ };
9
+ }
10
+ export type SplitterType =
11
+ | 'date'
12
+ | 'random'
13
+ | 'predefined'
14
+ | 'stratified'
15
+ | 'stratified_kfold'
16
+ | 'group_kfold'
17
+ | 'group_shuffle'
18
+ | 'leave_p_out';
19
+
20
+ export interface DateSplitConfig {
21
+ date_column: string;
22
+ months_test: number;
23
+ months_valid: number;
24
+ }
25
+
26
+ export interface RandomSplitConfig {
27
+ train_ratio?: number;
28
+ test_ratio?: number;
29
+ valid_ratio?: number;
30
+ seed?: number;
31
+ }
32
+
33
+ export interface PredefinedSplitConfig {
34
+ train_files: string[];
35
+ test_files: string[];
36
+ valid_files: string[];
37
+ }
38
+
39
+ export interface ColumnConfig {
40
+ name: string;
41
+ type: string;
42
+ }
43
+
44
+ export interface PredefinedSplitConfig {
45
+ train_files: string[];
46
+ test_files: string[];
47
+ valid_files: string[];
48
+ }
49
+
50
+ export interface StratifiedSplitConfig {
51
+ stratify_column: string;
52
+ train_ratio: number;
53
+ test_ratio: number;
54
+ valid_ratio: number;
55
+ }
56
+
57
+ export interface KFoldConfig {
58
+ n_splits: number;
59
+ shuffle: boolean;
60
+ random_state?: number;
61
+ }
62
+
63
+ export interface LeavePOutConfig {
64
+ p: number;
65
+ shuffle: boolean;
66
+ random_state?: number;
67
+ }
68
+
69
+ export type SplitConfig =
70
+ | DateSplitConfig
71
+ | PredefinedSplitConfig
72
+ | StratifiedSplitConfig
73
+ | KFoldConfig
74
+ | LeavePOutConfig
75
+ | Record<string, never>; // For random split
76
+
77
+ export interface ValidationResult {
78
+ isValid: boolean;
79
+ error?: string;
80
+ }
81
+
82
+ // Validation functions for each splitter type
83
+ export const validateDateSplitter = (config: DateSplitConfig): ValidationResult => {
84
+ if (!config.date_column) {
85
+ return { isValid: false, error: "Please select a date column" };
86
+ }
87
+ if (!config.months_test || config.months_test <= 0) {
88
+ return { isValid: false, error: "Test months must be greater than 0" };
89
+ }
90
+ if (!config.months_valid || config.months_valid <= 0) {
91
+ return { isValid: false, error: "Validation months must be greater than 0" };
92
+ }
93
+ return { isValid: true };
94
+ };
95
+
96
+ export const validateRandomSplitter = (config: RandomSplitConfig): ValidationResult => {
97
+ const total = (config.train_ratio ?? 0.6) +
98
+ (config.test_ratio ?? 0.2) +
99
+ (config.valid_ratio ?? 0.2);
100
+
101
+ if (Math.abs(total - 1.0) >= 0.001) {
102
+ return {
103
+ isValid: false,
104
+ error: `Split ratios must sum to 1.0 (current sum: ${total.toFixed(2)})`
105
+ };
106
+ }
107
+ return { isValid: true };
108
+ };
109
+
110
+ export const validatePredefinedSplitter = (config: PredefinedSplitConfig): ValidationResult => {
111
+ if (!config.files || config.files.length === 0) {
112
+ return { isValid: false, error: "Please select at least one file for splitting" };
113
+ }
114
+ return { isValid: true };
115
+ };
116
+
117
+ export const validateStratifiedSplitter = (config: StratifiedSplitConfig): ValidationResult => {
118
+ if (!config.stratify_column) {
119
+ return { isValid: false, error: "Please select a column to stratify on" };
120
+ }
121
+
122
+ const total = (config.train_ratio ?? 0) +
123
+ (config.test_ratio ?? 0) +
124
+ (config.valid_ratio ?? 0);
125
+
126
+ if (Math.abs(total - 1.0) >= 0.001) {
127
+ return {
128
+ isValid: false,
129
+ error: `Split ratios must sum to 1.0 (current sum: ${total.toFixed(2)})`
130
+ };
131
+ }
132
+ return { isValid: true };
133
+ };
134
+
135
+ export const validateKFoldSplitter = (config: KFoldConfig): ValidationResult => {
136
+ if (!config.n_splits || config.n_splits <= 1) {
137
+ return { isValid: false, error: "Number of splits must be greater than 1" };
138
+ }
139
+ return { isValid: true };
140
+ };
141
+
142
+ export const validateLeavePOutSplitter = (config: LeavePOutConfig): ValidationResult => {
143
+ if (!config.p || config.p <= 0) {
144
+ return { isValid: false, error: "P value must be greater than 0" };
145
+ }
146
+ return { isValid: true };
147
+ };
148
+
149
+ // Main validation function
150
+ export const validateSplitterConfig = (type: SplitterType, config: SplitConfig): ValidationResult => {
151
+ switch (type) {
152
+ case 'date':
153
+ return validateDateSplitter(config as DateSplitConfig);
154
+ case 'random':
155
+ return validateRandomSplitter(config as RandomSplitConfig);
156
+ case 'predefined':
157
+ return validatePredefinedSplitter(config as PredefinedSplitConfig);
158
+ case 'stratified':
159
+ return validateStratifiedSplitter(config as StratifiedSplitConfig);
160
+ case 'stratified_kfold':
161
+ case 'group_kfold':
162
+ return validateKFoldSplitter(config as KFoldConfig);
163
+ case 'leave_p_out':
164
+ return validateLeavePOutSplitter(config as LeavePOutConfig);
165
+ default:
166
+ return { isValid: false, error: "Invalid splitter type" };
167
+ }
168
+ };
@@ -0,0 +1,53 @@
1
+ import type { Column } from '../../types';
2
+
3
+ export function getDateColumns(columns: Column[]): string[] {
4
+ return columns
5
+ .filter(col => col.type === 'datetime')
6
+ .map(col => col.name);
7
+ }
8
+
9
+ export function validateSplitConfig(type: string, config: any): string[] {
10
+ const errors: string[] = [];
11
+
12
+ switch (type) {
13
+ case 'date':
14
+ if (!config.dateColumn) {
15
+ errors.push('Date column is required');
16
+ }
17
+ if (config.monthsTest < 1) {
18
+ errors.push('Test set must be at least 1 month');
19
+ }
20
+ if (config.monthsValid < 1) {
21
+ errors.push('Validation set must be at least 1 month');
22
+ }
23
+ break;
24
+
25
+ case 'predefined':
26
+ if (!config.files?.length) {
27
+ errors.push('At least one file must be selected');
28
+ }
29
+ if (!config.files?.some(f => f.type === 'train')) {
30
+ errors.push('Training set file is required');
31
+ }
32
+ if (!config.files?.some(f => f.type === 'test')) {
33
+ errors.push('Test set file is required');
34
+ }
35
+ break;
36
+
37
+ case 'stratified':
38
+ case 'stratified_kfold':
39
+ if (!config.targetColumn) {
40
+ errors.push('Target column is required');
41
+ }
42
+ break;
43
+
44
+ case 'group_kfold':
45
+ case 'group_shuffle':
46
+ if (!config.groupColumn) {
47
+ errors.push('Group column is required');
48
+ }
49
+ break;
50
+ }
51
+
52
+ return errors;
53
+ }
@@ -0,0 +1,46 @@
1
+ import React from 'react';
2
+ import { AlertCircle } from 'lucide-react';
3
+
4
+ interface CodeEditorProps {
5
+ value: string;
6
+ onChange: (value: string) => void;
7
+ language: string;
8
+ }
9
+
10
+ export function CodeEditor({ value, onChange, language }: CodeEditorProps) {
11
+ return (
12
+ <div className="space-y-4">
13
+ <div className="bg-gray-900 rounded-lg overflow-hidden">
14
+ <div className="flex items-center justify-between px-4 py-2 bg-gray-800">
15
+ <span className="text-sm text-gray-400">Ruby Feature</span>
16
+ <span className="text-xs px-2 py-1 bg-gray-700 rounded text-gray-300">
17
+ {language}
18
+ </span>
19
+ </div>
20
+ <textarea
21
+ value={value}
22
+ onChange={(e) => onChange(e.target.value)}
23
+ className="w-full h-64 p-4 bg-gray-900 text-gray-100 font-mono text-sm focus:outline-none"
24
+ placeholder={`def transform(df)\n # Your feature code here\n # Example:\n # df["column"] = df["column"].map { |value| value.upcase }\n df\nend`}
25
+ spellCheck={false}
26
+ />
27
+ </div>
28
+
29
+ <div className="bg-blue-50 rounded-lg p-4">
30
+ <div className="flex gap-2">
31
+ <AlertCircle className="w-5 h-5 text-blue-500 flex-shrink-0" />
32
+ <div className="text-sm text-blue-700">
33
+ <p className="font-medium mb-1">Feature Guidelines</p>
34
+ <ul className="list-disc pl-4 space-y-1">
35
+ <li>The function must be named 'feature'</li>
36
+ <li>It should accept a DataFrame as its only parameter</li>
37
+ <li>All operations should be performed on the DataFrame object</li>
38
+ <li>The function must return the modified DataFrame</li>
39
+ <li>Use standard Ruby syntax and DataFrame operations</li>
40
+ </ul>
41
+ </div>
42
+ </div>
43
+ </div>
44
+ </div>
45
+ );
46
+ }
@@ -0,0 +1,150 @@
1
+ import React, { useState } from 'react';
2
+ import { Play, AlertTriangle } from 'lucide-react';
3
+ import type { Dataset } from '../../types';
4
+
5
+ interface DataPreviewProps {
6
+ dataset: Dataset;
7
+ code: string;
8
+ inputColumns: string[];
9
+ outputColumns: string[];
10
+ }
11
+
12
+ export function DataPreview({ dataset, code, inputColumns, outputColumns }: DataPreviewProps) {
13
+ const [isRunning, setIsRunning] = useState(false);
14
+ const [error, setError] = useState<string | null>(null);
15
+ const [previewData, setPreviewData] = useState<Record<string, any>[] | null>(null);
16
+
17
+ const runFeature = () => {
18
+ setIsRunning(true);
19
+ setError(null);
20
+
21
+ // Simulate feature execution
22
+ setTimeout(() => {
23
+ try {
24
+ // In a real implementation, this would execute the Ruby code
25
+ // For now, we'll just show the original data
26
+ setPreviewData(dataset.sampleData);
27
+ setIsRunning(false);
28
+ } catch (err) {
29
+ setError(err instanceof Error ? err.message : 'An error occurred');
30
+ setIsRunning(false);
31
+ }
32
+ }, 1000);
33
+ };
34
+
35
+ return (
36
+ <div className="space-y-4">
37
+ <div className="flex justify-between items-center">
38
+ <h4 className="text-sm font-medium text-gray-900">Data Preview</h4>
39
+ <button
40
+ onClick={runFeature}
41
+ disabled={isRunning}
42
+ className="inline-flex items-center gap-2 px-3 py-1.5 bg-blue-600 text-white text-sm font-medium rounded-md hover:bg-blue-700 disabled:opacity-50"
43
+ >
44
+ <Play className="w-4 h-4" />
45
+ {isRunning ? 'Running...' : 'Run Preview'}
46
+ </button>
47
+ </div>
48
+
49
+ {error && (
50
+ <div className="bg-red-50 border border-red-200 rounded-lg p-4">
51
+ <div className="flex items-start gap-2">
52
+ <AlertTriangle className="w-5 h-5 text-red-500 mt-0.5" />
53
+ <div>
54
+ <h4 className="text-sm font-medium text-red-800">
55
+ Feature Error
56
+ </h4>
57
+ <pre className="mt-1 text-sm text-red-700 whitespace-pre-wrap font-mono">
58
+ {error}
59
+ </pre>
60
+ </div>
61
+ </div>
62
+ </div>
63
+ )}
64
+
65
+ <div className="border border-gray-200 rounded-lg overflow-hidden">
66
+ <div className="grid grid-cols-2 divide-x divide-gray-200">
67
+ <div>
68
+ <div className="px-4 py-2 bg-gray-50 border-b border-gray-200">
69
+ <h5 className="text-sm font-medium text-gray-700">Input Data</h5>
70
+ </div>
71
+ <div className="overflow-x-auto">
72
+ <table className="min-w-full divide-y divide-gray-200">
73
+ <thead className="bg-gray-50">
74
+ <tr>
75
+ {inputColumns.map((column) => (
76
+ <th
77
+ key={column}
78
+ className="px-4 py-2 text-left text-xs font-medium text-gray-500 uppercase tracking-wider"
79
+ >
80
+ {column}
81
+ </th>
82
+ ))}
83
+ </tr>
84
+ </thead>
85
+ <tbody className="bg-white divide-y divide-gray-200">
86
+ {dataset.sampleData.map((row, i) => (
87
+ <tr key={i}>
88
+ {inputColumns.map((column) => (
89
+ <td
90
+ key={column}
91
+ className="px-4 py-2 text-sm text-gray-900 whitespace-nowrap"
92
+ >
93
+ {String(row[column])}
94
+ </td>
95
+ ))}
96
+ </tr>
97
+ ))}
98
+ </tbody>
99
+ </table>
100
+ </div>
101
+ </div>
102
+
103
+ <div>
104
+ <div className="px-4 py-2 bg-gray-50 border-b border-gray-200">
105
+ <h5 className="text-sm font-medium text-gray-700">
106
+ {previewData ? 'Featureed Data' : 'Output Preview'}
107
+ </h5>
108
+ </div>
109
+ <div className="overflow-x-auto">
110
+ {previewData ? (
111
+ <table className="min-w-full divide-y divide-gray-200">
112
+ <thead className="bg-gray-50">
113
+ <tr>
114
+ {outputColumns.map((column) => (
115
+ <th
116
+ key={column}
117
+ className="px-4 py-2 text-left text-xs font-medium text-gray-500 uppercase tracking-wider"
118
+ >
119
+ {column}
120
+ </th>
121
+ ))}
122
+ </tr>
123
+ </thead>
124
+ <tbody className="bg-white divide-y divide-gray-200">
125
+ {previewData.map((row, i) => (
126
+ <tr key={i}>
127
+ {outputColumns.map((column) => (
128
+ <td
129
+ key={column}
130
+ className="px-4 py-2 text-sm text-gray-900 whitespace-nowrap"
131
+ >
132
+ {String(row[column])}
133
+ </td>
134
+ ))}
135
+ </tr>
136
+ ))}
137
+ </tbody>
138
+ </table>
139
+ ) : (
140
+ <div className="p-8 text-center text-sm text-gray-500">
141
+ Run the feature to see the preview
142
+ </div>
143
+ )}
144
+ </div>
145
+ </div>
146
+ </div>
147
+ </div>
148
+ </div>
149
+ );
150
+ }
@@ -0,0 +1,88 @@
1
+ import React from 'react';
2
+ // import { Link } from 'react-router-dom';
3
+ import { Code2, Settings, Trash2, FolderOpen } from 'lucide-react';
4
+ import type { Feature, FeatureGroup } from '../../types';
5
+
6
+ interface FeatureCardProps {
7
+ feature: Feature;
8
+ group: FeatureGroup;
9
+ }
10
+
11
+ export function FeatureCard({ feature, group }: FeatureCardProps) {
12
+ return (
13
+ <div className="bg-white rounded-lg shadow-md p-6 hover:shadow-lg transition-shadow">
14
+ <div className="flex justify-between items-start mb-4">
15
+ <div className="flex items-start gap-3">
16
+ <Code2 className="w-5 h-5 text-blue-600 mt-1" />
17
+ <div>
18
+ <h3 className="text-lg font-semibold text-gray-900">
19
+ {feature.name}
20
+ </h3>
21
+ <p className="text-sm text-gray-500 mt-1">
22
+ {feature.description}
23
+ </p>
24
+ </div>
25
+ </div>
26
+ <div className="flex gap-2">
27
+ <Link
28
+ to={`/features/${feature.id}/edit`}
29
+ className="text-gray-400 hover:text-blue-600 transition-colors"
30
+ title="Edit feature"
31
+ >
32
+ <Settings className="w-5 h-5" />
33
+ </Link>
34
+ <button
35
+ className="text-gray-400 hover:text-red-600 transition-colors"
36
+ title="Delete feature"
37
+ >
38
+ <Trash2 className="w-5 h-5" />
39
+ </button>
40
+ </div>
41
+ </div>
42
+
43
+ <div className="grid grid-cols-2 gap-4 mt-4">
44
+ <div>
45
+ <span className="text-sm text-gray-500">Input Columns</span>
46
+ <div className="flex flex-wrap gap-2 mt-1">
47
+ {feature.inputColumns.map((column) => (
48
+ <span
49
+ key={column}
50
+ className="inline-flex items-center px-2.5 py-0.5 rounded-full text-xs font-medium bg-gray-100 text-gray-800"
51
+ >
52
+ {column}
53
+ </span>
54
+ ))}
55
+ </div>
56
+ </div>
57
+ <div>
58
+ <span className="text-sm text-gray-500">Output Columns</span>
59
+ <div className="flex flex-wrap gap-2 mt-1">
60
+ {feature.outputColumns.map((column) => (
61
+ <span
62
+ key={column}
63
+ className="inline-flex items-center px-2.5 py-0.5 rounded-full text-xs font-medium bg-blue-100 text-blue-800"
64
+ >
65
+ {column}
66
+ </span>
67
+ ))}
68
+ </div>
69
+ </div>
70
+ </div>
71
+
72
+ <div className="mt-4 pt-4 border-t border-gray-100">
73
+ <div className="flex items-center justify-between">
74
+ <Link
75
+ to={`/features/groups/${group.id}`}
76
+ className="flex items-center gap-2 text-sm text-gray-500 hover:text-gray-700"
77
+ >
78
+ <FolderOpen className="w-4 h-4" />
79
+ {group.name}
80
+ </Link>
81
+ <span className="text-sm text-gray-500">
82
+ Last updated {new Date(feature.updatedAt).toLocaleDateString()}
83
+ </span>
84
+ </div>
85
+ </div>
86
+ </div>
87
+ );
88
+ }