easy_ml 0.1.4 → 0.2.0.pre.rc1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (239) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +234 -26
  3. data/Rakefile +45 -0
  4. data/app/controllers/easy_ml/application_controller.rb +67 -0
  5. data/app/controllers/easy_ml/columns_controller.rb +38 -0
  6. data/app/controllers/easy_ml/datasets_controller.rb +156 -0
  7. data/app/controllers/easy_ml/datasources_controller.rb +88 -0
  8. data/app/controllers/easy_ml/deploys_controller.rb +20 -0
  9. data/app/controllers/easy_ml/models_controller.rb +151 -0
  10. data/app/controllers/easy_ml/retraining_runs_controller.rb +19 -0
  11. data/app/controllers/easy_ml/settings_controller.rb +59 -0
  12. data/app/frontend/components/AlertProvider.tsx +108 -0
  13. data/app/frontend/components/DatasetPreview.tsx +161 -0
  14. data/app/frontend/components/EmptyState.tsx +28 -0
  15. data/app/frontend/components/ModelCard.tsx +255 -0
  16. data/app/frontend/components/ModelDetails.tsx +334 -0
  17. data/app/frontend/components/ModelForm.tsx +384 -0
  18. data/app/frontend/components/Navigation.tsx +300 -0
  19. data/app/frontend/components/Pagination.tsx +72 -0
  20. data/app/frontend/components/Popover.tsx +55 -0
  21. data/app/frontend/components/PredictionStream.tsx +105 -0
  22. data/app/frontend/components/ScheduleModal.tsx +726 -0
  23. data/app/frontend/components/SearchInput.tsx +23 -0
  24. data/app/frontend/components/SearchableSelect.tsx +132 -0
  25. data/app/frontend/components/dataset/AutosaveIndicator.tsx +39 -0
  26. data/app/frontend/components/dataset/ColumnConfigModal.tsx +431 -0
  27. data/app/frontend/components/dataset/ColumnFilters.tsx +256 -0
  28. data/app/frontend/components/dataset/ColumnList.tsx +101 -0
  29. data/app/frontend/components/dataset/FeatureConfigPopover.tsx +57 -0
  30. data/app/frontend/components/dataset/FeaturePicker.tsx +205 -0
  31. data/app/frontend/components/dataset/PreprocessingConfig.tsx +704 -0
  32. data/app/frontend/components/dataset/SplitConfigurator.tsx +120 -0
  33. data/app/frontend/components/dataset/splitters/DateSplitter.tsx +58 -0
  34. data/app/frontend/components/dataset/splitters/KFoldSplitter.tsx +68 -0
  35. data/app/frontend/components/dataset/splitters/LeavePOutSplitter.tsx +29 -0
  36. data/app/frontend/components/dataset/splitters/PredefinedSplitter.tsx +146 -0
  37. data/app/frontend/components/dataset/splitters/RandomSplitter.tsx +85 -0
  38. data/app/frontend/components/dataset/splitters/StratifiedSplitter.tsx +79 -0
  39. data/app/frontend/components/dataset/splitters/constants.ts +77 -0
  40. data/app/frontend/components/dataset/splitters/types.ts +168 -0
  41. data/app/frontend/components/dataset/splitters/utils.ts +53 -0
  42. data/app/frontend/components/features/CodeEditor.tsx +46 -0
  43. data/app/frontend/components/features/DataPreview.tsx +150 -0
  44. data/app/frontend/components/features/FeatureCard.tsx +88 -0
  45. data/app/frontend/components/features/FeatureForm.tsx +235 -0
  46. data/app/frontend/components/features/FeatureGroupCard.tsx +54 -0
  47. data/app/frontend/components/settings/PluginSettings.tsx +81 -0
  48. data/app/frontend/components/ui/badge.tsx +44 -0
  49. data/app/frontend/components/ui/collapsible.tsx +9 -0
  50. data/app/frontend/components/ui/scroll-area.tsx +46 -0
  51. data/app/frontend/components/ui/separator.tsx +29 -0
  52. data/app/frontend/entrypoints/App.tsx +40 -0
  53. data/app/frontend/entrypoints/Application.tsx +24 -0
  54. data/app/frontend/hooks/useAutosave.ts +61 -0
  55. data/app/frontend/layouts/Layout.tsx +38 -0
  56. data/app/frontend/lib/utils.ts +6 -0
  57. data/app/frontend/mockData.ts +272 -0
  58. data/app/frontend/pages/DatasetDetailsPage.tsx +103 -0
  59. data/app/frontend/pages/DatasetsPage.tsx +261 -0
  60. data/app/frontend/pages/DatasourceFormPage.tsx +147 -0
  61. data/app/frontend/pages/DatasourcesPage.tsx +261 -0
  62. data/app/frontend/pages/EditModelPage.tsx +45 -0
  63. data/app/frontend/pages/EditTransformationPage.tsx +56 -0
  64. data/app/frontend/pages/ModelsPage.tsx +115 -0
  65. data/app/frontend/pages/NewDatasetPage.tsx +366 -0
  66. data/app/frontend/pages/NewModelPage.tsx +45 -0
  67. data/app/frontend/pages/NewTransformationPage.tsx +43 -0
  68. data/app/frontend/pages/SettingsPage.tsx +272 -0
  69. data/app/frontend/pages/ShowModelPage.tsx +30 -0
  70. data/app/frontend/pages/TransformationsPage.tsx +95 -0
  71. data/app/frontend/styles/application.css +100 -0
  72. data/app/frontend/types/dataset.ts +146 -0
  73. data/app/frontend/types/datasource.ts +33 -0
  74. data/app/frontend/types/preprocessing.ts +1 -0
  75. data/app/frontend/types.ts +113 -0
  76. data/app/helpers/easy_ml/application_helper.rb +10 -0
  77. data/app/jobs/easy_ml/application_job.rb +21 -0
  78. data/app/jobs/easy_ml/batch_job.rb +46 -0
  79. data/app/jobs/easy_ml/compute_feature_job.rb +19 -0
  80. data/app/jobs/easy_ml/deploy_job.rb +13 -0
  81. data/app/jobs/easy_ml/finalize_feature_job.rb +15 -0
  82. data/app/jobs/easy_ml/refresh_dataset_job.rb +32 -0
  83. data/app/jobs/easy_ml/schedule_retraining_job.rb +11 -0
  84. data/app/jobs/easy_ml/sync_datasource_job.rb +17 -0
  85. data/app/jobs/easy_ml/training_job.rb +62 -0
  86. data/app/models/easy_ml/adapters/base_adapter.rb +45 -0
  87. data/app/models/easy_ml/adapters/polars_adapter.rb +77 -0
  88. data/app/models/easy_ml/cleaner.rb +82 -0
  89. data/app/models/easy_ml/column.rb +124 -0
  90. data/app/models/easy_ml/column_history.rb +30 -0
  91. data/app/models/easy_ml/column_list.rb +122 -0
  92. data/app/models/easy_ml/concerns/configurable.rb +61 -0
  93. data/app/models/easy_ml/concerns/versionable.rb +19 -0
  94. data/app/models/easy_ml/dataset.rb +767 -0
  95. data/app/models/easy_ml/dataset_history.rb +56 -0
  96. data/app/models/easy_ml/datasource.rb +182 -0
  97. data/app/models/easy_ml/datasource_history.rb +24 -0
  98. data/app/models/easy_ml/datasources/base_datasource.rb +54 -0
  99. data/app/models/easy_ml/datasources/file_datasource.rb +58 -0
  100. data/app/models/easy_ml/datasources/polars_datasource.rb +89 -0
  101. data/app/models/easy_ml/datasources/s3_datasource.rb +97 -0
  102. data/app/models/easy_ml/deploy.rb +114 -0
  103. data/app/models/easy_ml/event.rb +79 -0
  104. data/app/models/easy_ml/feature.rb +437 -0
  105. data/app/models/easy_ml/feature_history.rb +38 -0
  106. data/app/models/easy_ml/model.rb +575 -41
  107. data/app/models/easy_ml/model_file.rb +133 -0
  108. data/app/models/easy_ml/model_file_history.rb +24 -0
  109. data/app/models/easy_ml/model_history.rb +51 -0
  110. data/app/models/easy_ml/models/base_model.rb +58 -0
  111. data/app/models/easy_ml/models/hyperparameters/base.rb +99 -0
  112. data/app/models/easy_ml/models/hyperparameters/xgboost/dart.rb +82 -0
  113. data/app/models/easy_ml/models/hyperparameters/xgboost/gblinear.rb +82 -0
  114. data/app/models/easy_ml/models/hyperparameters/xgboost/gbtree.rb +97 -0
  115. data/app/models/easy_ml/models/hyperparameters/xgboost.rb +71 -0
  116. data/app/models/easy_ml/models/xgboost/evals_callback.rb +138 -0
  117. data/app/models/easy_ml/models/xgboost/progress_callback.rb +39 -0
  118. data/app/models/easy_ml/models/xgboost.rb +544 -5
  119. data/app/models/easy_ml/prediction.rb +44 -0
  120. data/app/models/easy_ml/retraining_job.rb +278 -0
  121. data/app/models/easy_ml/retraining_run.rb +184 -0
  122. data/app/models/easy_ml/settings.rb +37 -0
  123. data/app/models/easy_ml/splitter.rb +90 -0
  124. data/app/models/easy_ml/splitters/base_splitter.rb +28 -0
  125. data/app/models/easy_ml/splitters/date_splitter.rb +91 -0
  126. data/app/models/easy_ml/splitters/predefined_splitter.rb +74 -0
  127. data/app/models/easy_ml/splitters/random_splitter.rb +82 -0
  128. data/app/models/easy_ml/tuner_job.rb +56 -0
  129. data/app/models/easy_ml/tuner_run.rb +31 -0
  130. data/app/models/splitter_history.rb +6 -0
  131. data/app/serializers/easy_ml/column_serializer.rb +27 -0
  132. data/app/serializers/easy_ml/dataset_serializer.rb +73 -0
  133. data/app/serializers/easy_ml/datasource_serializer.rb +64 -0
  134. data/app/serializers/easy_ml/feature_serializer.rb +27 -0
  135. data/app/serializers/easy_ml/model_serializer.rb +90 -0
  136. data/app/serializers/easy_ml/retraining_job_serializer.rb +22 -0
  137. data/app/serializers/easy_ml/retraining_run_serializer.rb +39 -0
  138. data/app/serializers/easy_ml/settings_serializer.rb +9 -0
  139. data/app/views/layouts/easy_ml/application.html.erb +15 -0
  140. data/config/initializers/resque.rb +3 -0
  141. data/config/resque-pool.yml +6 -0
  142. data/config/routes.rb +39 -0
  143. data/config/spring.rb +1 -0
  144. data/config/vite.json +15 -0
  145. data/lib/easy_ml/configuration.rb +64 -0
  146. data/lib/easy_ml/core/evaluators/base_evaluator.rb +53 -0
  147. data/lib/easy_ml/core/evaluators/classification_evaluators.rb +126 -0
  148. data/lib/easy_ml/core/evaluators/regression_evaluators.rb +66 -0
  149. data/lib/easy_ml/core/model_evaluator.rb +161 -89
  150. data/lib/easy_ml/core/tuner/adapters/base_adapter.rb +28 -18
  151. data/lib/easy_ml/core/tuner/adapters/xgboost_adapter.rb +4 -25
  152. data/lib/easy_ml/core/tuner.rb +123 -62
  153. data/lib/easy_ml/core.rb +0 -3
  154. data/lib/easy_ml/core_ext/hash.rb +24 -0
  155. data/lib/easy_ml/core_ext/pathname.rb +11 -5
  156. data/lib/easy_ml/data/date_converter.rb +90 -0
  157. data/lib/easy_ml/data/filter_extensions.rb +31 -0
  158. data/lib/easy_ml/data/polars_column.rb +126 -0
  159. data/lib/easy_ml/data/polars_reader.rb +297 -0
  160. data/lib/easy_ml/data/preprocessor.rb +280 -142
  161. data/lib/easy_ml/data/simple_imputer.rb +255 -0
  162. data/lib/easy_ml/data/splits/file_split.rb +252 -0
  163. data/lib/easy_ml/data/splits/in_memory_split.rb +54 -0
  164. data/lib/easy_ml/data/splits/split.rb +95 -0
  165. data/lib/easy_ml/data/splits.rb +9 -0
  166. data/lib/easy_ml/data/statistics_learner.rb +93 -0
  167. data/lib/easy_ml/data/synced_directory.rb +341 -0
  168. data/lib/easy_ml/data.rb +6 -2
  169. data/lib/easy_ml/engine.rb +105 -6
  170. data/lib/easy_ml/feature_store.rb +227 -0
  171. data/lib/easy_ml/features.rb +61 -0
  172. data/lib/easy_ml/initializers/inflections.rb +17 -3
  173. data/lib/easy_ml/logging.rb +2 -2
  174. data/lib/easy_ml/predict.rb +74 -0
  175. data/lib/easy_ml/railtie/generators/migration/migration_generator.rb +192 -36
  176. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_column_histories.rb.tt +9 -0
  177. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_columns.rb.tt +25 -0
  178. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_dataset_histories.rb.tt +9 -0
  179. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_datasets.rb.tt +31 -0
  180. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_datasource_histories.rb.tt +9 -0
  181. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_datasources.rb.tt +16 -0
  182. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_deploys.rb.tt +24 -0
  183. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_events.rb.tt +20 -0
  184. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_feature_histories.rb.tt +14 -0
  185. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_features.rb.tt +32 -0
  186. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_model_file_histories.rb.tt +9 -0
  187. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_model_files.rb.tt +17 -0
  188. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_model_histories.rb.tt +9 -0
  189. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_models.rb.tt +20 -9
  190. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_predictions.rb.tt +17 -0
  191. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_retraining_jobs.rb.tt +77 -0
  192. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_settings.rb.tt +9 -0
  193. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_splitter_histories.rb.tt +9 -0
  194. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_splitters.rb.tt +15 -0
  195. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_tuner_jobs.rb.tt +40 -0
  196. data/lib/easy_ml/support/est.rb +5 -1
  197. data/lib/easy_ml/support/file_rotate.rb +79 -15
  198. data/lib/easy_ml/support/file_support.rb +9 -0
  199. data/lib/easy_ml/support/local_file.rb +24 -0
  200. data/lib/easy_ml/support/lockable.rb +62 -0
  201. data/lib/easy_ml/support/synced_file.rb +103 -0
  202. data/lib/easy_ml/support/utc.rb +5 -1
  203. data/lib/easy_ml/support.rb +6 -3
  204. data/lib/easy_ml/version.rb +4 -1
  205. data/lib/easy_ml.rb +7 -2
  206. metadata +355 -72
  207. data/app/models/easy_ml/models.rb +0 -5
  208. data/lib/easy_ml/core/model.rb +0 -30
  209. data/lib/easy_ml/core/model_core.rb +0 -181
  210. data/lib/easy_ml/core/models/hyperparameters/base.rb +0 -34
  211. data/lib/easy_ml/core/models/hyperparameters/xgboost.rb +0 -19
  212. data/lib/easy_ml/core/models/xgboost.rb +0 -10
  213. data/lib/easy_ml/core/models/xgboost_core.rb +0 -220
  214. data/lib/easy_ml/core/models.rb +0 -10
  215. data/lib/easy_ml/core/uploaders/model_uploader.rb +0 -24
  216. data/lib/easy_ml/core/uploaders.rb +0 -7
  217. data/lib/easy_ml/data/dataloader.rb +0 -6
  218. data/lib/easy_ml/data/dataset/data/preprocessor/statistics.json +0 -31
  219. data/lib/easy_ml/data/dataset/data/sample_info.json +0 -1
  220. data/lib/easy_ml/data/dataset/dataset/files/sample_info.json +0 -1
  221. data/lib/easy_ml/data/dataset/splits/file_split.rb +0 -140
  222. data/lib/easy_ml/data/dataset/splits/in_memory_split.rb +0 -49
  223. data/lib/easy_ml/data/dataset/splits/split.rb +0 -98
  224. data/lib/easy_ml/data/dataset/splits.rb +0 -11
  225. data/lib/easy_ml/data/dataset/splitters/date_splitter.rb +0 -43
  226. data/lib/easy_ml/data/dataset/splitters.rb +0 -9
  227. data/lib/easy_ml/data/dataset.rb +0 -430
  228. data/lib/easy_ml/data/datasource/datasource_factory.rb +0 -60
  229. data/lib/easy_ml/data/datasource/file_datasource.rb +0 -40
  230. data/lib/easy_ml/data/datasource/merged_datasource.rb +0 -64
  231. data/lib/easy_ml/data/datasource/polars_datasource.rb +0 -41
  232. data/lib/easy_ml/data/datasource/s3_datasource.rb +0 -89
  233. data/lib/easy_ml/data/datasource.rb +0 -33
  234. data/lib/easy_ml/data/preprocessor/preprocessor.rb +0 -205
  235. data/lib/easy_ml/data/preprocessor/simple_imputer.rb +0 -402
  236. data/lib/easy_ml/deployment.rb +0 -5
  237. data/lib/easy_ml/support/synced_directory.rb +0 -134
  238. data/lib/easy_ml/transforms.rb +0 -29
  239. /data/{lib/easy_ml/core → app/models/easy_ml}/models/hyperparameters.rb +0 -0
@@ -0,0 +1,704 @@
1
+ import React, { useState, useEffect } from 'react';
2
+ import { Settings2, Wrench, ArrowRight, Pencil, Trash2, Database } from 'lucide-react';
3
+ import type { Dataset, Column, ColumnType, PreprocessingConstants, PreprocessingSteps, PreprocessingStep } from '../../types/dataset';
4
+ import { Badge } from "@/components/ui/badge";
5
+
6
+ interface PreprocessingConfigProps {
7
+ column: Column;
8
+ dataset: Dataset;
9
+ setColumnType: (columnName: string, columnType: string) => void;
10
+ setDataset: (dataset: Dataset) => void;
11
+ constants: PreprocessingConstants;
12
+ onUpdate: (
13
+ training: PreprocessingStep,
14
+ inference: PreprocessingStep | undefined,
15
+ useDistinctInference: boolean
16
+ ) => void;
17
+ }
18
+
19
+ const isNumericType = (type: ColumnType): boolean =>
20
+ type === 'float' || type === 'integer';
21
+
22
+ const createPreprocessingStep = (steps?: PreprocessingStep): PreprocessingStep => ({
23
+ method: steps?.method || 'none',
24
+ params: {
25
+ constant: steps?.params?.constant,
26
+ categorical_min: steps?.params?.categorical_min ?? 100,
27
+ one_hot: steps?.params?.one_hot ?? true,
28
+ ordinal_encoding: steps?.params?.ordinal_encoding ?? false,
29
+ clip: steps?.params?.clip
30
+ }
31
+ });
32
+
33
+ export function PreprocessingConfig({
34
+ column,
35
+ dataset,
36
+ setColumnType,
37
+ setDataset,
38
+ constants,
39
+ onUpdate
40
+ }: PreprocessingConfigProps) {
41
+ const [useDistinctInference, setUseDistinctInference] = useState(
42
+ Boolean(column.preprocessing_steps?.inference?.method &&
43
+ column.preprocessing_steps.inference.method !== 'none')
44
+ );
45
+
46
+ const selectedType = column.datatype as ColumnType;
47
+
48
+ const [training, setTraining] = useState<PreprocessingStep>(() =>
49
+ createPreprocessingStep(column.preprocessing_steps?.training)
50
+ );
51
+
52
+ const [inference, setInference] = useState<PreprocessingStep>(() =>
53
+ createPreprocessingStep(column.preprocessing_steps?.inference)
54
+ );
55
+
56
+ // Update all states when column changes
57
+ useEffect(() => {
58
+ setTraining(createPreprocessingStep(column.preprocessing_steps?.training));
59
+ setInference(createPreprocessingStep(column.preprocessing_steps?.inference));
60
+ }, [column.id]); // Only re-run when column changes
61
+
62
+ const handleStrategyChange = (
63
+ type: 'training' | 'inference',
64
+ method: PreprocessingStep['method']
65
+ ) => {
66
+ let defaultParams: PreprocessingStep['params'] = {};
67
+
68
+ if (selectedType === 'categorical') {
69
+ if (method === 'categorical') {
70
+ defaultParams = {
71
+ ...defaultParams,
72
+ categorical_min: 100,
73
+ one_hot: true
74
+ };
75
+ } else if (method != 'none') {
76
+ defaultParams = {
77
+ ...defaultParams,
78
+ one_hot: true
79
+ };
80
+ }
81
+ }
82
+
83
+ if (column.is_target) {
84
+ defaultParams = {
85
+ ...defaultParams,
86
+ ordinal_encoding: true
87
+ };
88
+ }
89
+
90
+ const newStrategy: PreprocessingStep = {
91
+ method,
92
+ params: defaultParams
93
+ };
94
+
95
+ if (type === 'training') {
96
+ setTraining(newStrategy);
97
+ onUpdate(newStrategy, useDistinctInference ? inference : undefined, useDistinctInference);
98
+ } else {
99
+ setInference(newStrategy);
100
+ onUpdate(training, newStrategy, useDistinctInference);
101
+ }
102
+ };
103
+
104
+ // Update the categorical params section:
105
+ const handleCategoricalParamChange = (
106
+ type: 'training' | 'inference',
107
+ updates: Partial<PreprocessingStep['params']>
108
+ ) => {
109
+ const strategy = type === 'training' ? training : inference;
110
+ const setStrategy = type === 'training' ? setTraining : setInference;
111
+
112
+ const newStrategy: PreprocessingStep = {
113
+ ...strategy,
114
+ params: {
115
+ categorical_min: strategy.params.categorical_min,
116
+ one_hot: strategy.params.one_hot,
117
+ ordinal_encoding: strategy.params.ordinal_encoding,
118
+ ...updates
119
+ }
120
+ };
121
+
122
+ setStrategy(newStrategy);
123
+ if (type === 'training') {
124
+ onUpdate(newStrategy, useDistinctInference ? inference : undefined, useDistinctInference);
125
+ } else {
126
+ onUpdate(training, newStrategy, useDistinctInference);
127
+ }
128
+ };
129
+
130
+ // Update the numeric clipping section:
131
+ const handleClipChange = (
132
+ type: 'training' | 'inference',
133
+ clipUpdates: Partial<{ min?: number; max?: number }>
134
+ ) => {
135
+ const strategy = type === 'training' ? training : inference;
136
+ const setStrategy = type === 'training' ? setTraining : setInference;
137
+
138
+ const newStrategy: PreprocessingStep = {
139
+ ...strategy,
140
+ params: {
141
+ ...strategy.params,
142
+ clip: {
143
+ ...strategy.params.clip,
144
+ ...clipUpdates
145
+ }
146
+ }
147
+ };
148
+
149
+ setStrategy(newStrategy);
150
+ if (type === 'training') {
151
+ onUpdate(newStrategy, useDistinctInference ? inference : undefined, useDistinctInference);
152
+ } else {
153
+ onUpdate(training, newStrategy, useDistinctInference);
154
+ }
155
+ };
156
+
157
+ const handleConstantValueChange = (
158
+ type: 'training' | 'inference',
159
+ value: string
160
+ ) => {
161
+ const strategy = type === 'training' ? training : inference;
162
+ const setStrategy = type === 'training' ? setTraining : setInference;
163
+
164
+ const newStrategy: PreprocessingStep = {
165
+ ...strategy,
166
+ params: {
167
+ ...strategy.params,
168
+ constant: value
169
+ }
170
+ };
171
+
172
+ setStrategy(newStrategy);
173
+ if (type === 'training') {
174
+ onUpdate(newStrategy, useDistinctInference ? inference : undefined, useDistinctInference);
175
+ } else {
176
+ onUpdate(training, newStrategy, useDistinctInference);
177
+ }
178
+ };
179
+
180
+ const renderConstantValueInput = (type: 'training' | 'inference') => {
181
+ const strategy = type === 'training' ? training : inference;
182
+ if (strategy.method !== 'constant') return null;
183
+
184
+ return (
185
+ <div className="mt-4">
186
+ <label className="block text-sm font-medium text-gray-700 mb-1">
187
+ Constant Value
188
+ </label>
189
+ {isNumericType(selectedType) ? (
190
+ <input
191
+ type="number"
192
+ value={strategy.params?.constant ?? ''}
193
+ onChange={(e) => handleConstantValueChange(type, e.target.value)}
194
+ className="w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500"
195
+ placeholder="Enter a number..."
196
+ />
197
+ ) : (
198
+ <input
199
+ type="text"
200
+ value={strategy.params?.constant ?? ''}
201
+ onChange={(e) => handleConstantValueChange(type, e.target.value)}
202
+ className="w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500"
203
+ placeholder="Enter a value..."
204
+ />
205
+ )}
206
+ </div>
207
+ );
208
+ };
209
+
210
+ const [isEditingDescription, setIsEditingDescription] = useState(false);
211
+
212
+ const onToggleDropIfNull = (e: React.ChangeEvent<HTMLInputElement>) => {
213
+ const updatedColumns = dataset.columns.map(c => ({
214
+ ...c,
215
+ drop_if_null: c.name === column.name ? e.target.checked : c.drop_if_null
216
+ }));
217
+
218
+ setDataset({
219
+ ...dataset,
220
+ columns: updatedColumns
221
+ });
222
+ };
223
+
224
+ const handleDescriptionChange = (e: React.ChangeEvent<HTMLTextAreaElement>) => {
225
+ const updatedColumns = dataset.columns.map(c => ({
226
+ ...c,
227
+ description: c.name === column.name ? e.target.value : c.description
228
+ }));
229
+
230
+ setDataset({
231
+ ...dataset,
232
+ columns: updatedColumns
233
+ });
234
+ };
235
+
236
+ const handleDescriptionSave = () => {
237
+ setIsEditingDescription(false);
238
+ };
239
+
240
+ const handleDescriptionKeyDown = (e: React.KeyboardEvent) => {
241
+ if (e.key === 'Enter') {
242
+ e.preventDefault();
243
+ setIsEditingDescription(false);
244
+ } else if (e.key === 'Escape') {
245
+ setIsEditingDescription(false);
246
+ }
247
+ };
248
+
249
+ const handleDescriptionClick = () => {
250
+ setIsEditingDescription(true);
251
+ };
252
+
253
+ let nullCount = (column.statistics?.processed.null_count || column.statistics?.raw.null_count) || 0;
254
+ const nullPercentage = nullCount && column.statistics?.raw.num_rows
255
+ ? ((nullCount / column.statistics.raw.num_rows) * 100)
256
+ : 0;
257
+
258
+ const nullPercentageProcessed = column.statistics?.processed?.null_count && column.statistics?.raw.num_rows
259
+ ? ((column.statistics.processed.null_count / column.statistics.raw.num_rows) * 100)
260
+ : 0;
261
+
262
+ const totalRows = column.statistics?.raw.num_rows ?? 0;
263
+
264
+ const renderStrategySpecificInfo = (type: 'training' | 'inference') => {
265
+ const strategy = type === 'training' ? training : inference;
266
+ let content;
267
+ if (strategy.method === 'most_frequent' && column.statistics?.raw.most_frequent_value) {
268
+ content = `Most Frequent Value: ${column.statistics.raw.most_frequent_value}`
269
+ } else if (strategy.method === 'ffill' && column.statistics?.raw.last_value) {
270
+ content = `Last Value: ${column.statistics.raw.last_value}`
271
+ } else if (strategy.method === 'median' && column.statistics?.raw?.median) {
272
+ content = `Median: ${column.statistics.raw.median}`
273
+ } else if (strategy.method === 'mean' && column.statistics?.raw?.mean) {
274
+ content = `Mean: ${column.statistics.raw.mean}`
275
+ } else {
276
+ return null;
277
+ }
278
+ return (
279
+ <div className="mt-4 bg-yellow-50 rounded-lg p-4">
280
+ <span className="text-sm font-medium text-yellow-700">
281
+ {content}
282
+ </span>
283
+ </div>
284
+ );
285
+ };
286
+
287
+ return (
288
+ <div className="space-y-8">
289
+ {/* Column Header Section */}
290
+ <div className="bg-white rounded-lg border border-gray-200 p-6">
291
+ <div className="flex items-center justify-between mb-4">
292
+ <div className="flex-1 max-w-[70%]">
293
+ <h2 className="text-2xl font-semibold text-gray-900">{column.name}</h2>
294
+ <div className="mt-1 flex items-start gap-1">
295
+ {isEditingDescription ? (
296
+ <div className="flex-1">
297
+ <textarea
298
+ value={column.description || ''}
299
+ onChange={handleDescriptionChange}
300
+ onBlur={handleDescriptionSave}
301
+ onKeyDown={handleDescriptionKeyDown}
302
+ className="w-full px-2 py-1 text-sm text-gray-900 border border-blue-500 rounded-md focus:outline-none focus:ring-2 focus:ring-blue-500"
303
+ rows={2}
304
+ autoFocus
305
+ placeholder="Enter column description..."
306
+ />
307
+ <p className="mt-1 text-xs text-gray-500">
308
+ Press Enter to save, Escape to cancel
309
+ </p>
310
+ </div>
311
+ ) : (
312
+ <div className="flex-3/4 flex items-start gap-1">
313
+ <p
314
+ className="text-sm text-gray-500 cursor-pointer flex-grow truncate"
315
+ onClick={handleDescriptionClick}
316
+ >
317
+ {column.description || 'No description provided'}
318
+ </p>
319
+ <button
320
+ onClick={handleDescriptionClick}
321
+ className="p-1 text-gray-400 hover:text-gray-600 rounded-md hover:bg-gray-100 flex-shrink-0"
322
+ >
323
+ <Pencil className="w-4 h-4" />
324
+ </button>
325
+ </div>
326
+ )}
327
+ </div>
328
+ </div>
329
+ <div className="flex items-center gap-4 flex-shrink-0">
330
+ {column.is_target ? (
331
+ <span className="inline-flex items-center px-3 py-1 rounded-full text-sm font-medium bg-purple-100 text-purple-800">
332
+ Target Column
333
+ </span>
334
+ ) : (
335
+ <div className="flex items-center gap-2">
336
+ <label className="flex items-center gap-2 text-sm">
337
+ <input
338
+ type="checkbox"
339
+ checked={column.drop_if_null}
340
+ onChange={onToggleDropIfNull}
341
+ className="rounded border-gray-300 text-red-600 focus:ring-red-500"
342
+ />
343
+ <span className="flex items-center gap-1 text-gray-700">
344
+ <Trash2 className="w-4 h-4 text-gray-400" />
345
+ Drop if null
346
+ </span>
347
+ </label>
348
+ </div>
349
+ )}
350
+ </div>
351
+ </div>
352
+
353
+ {/* Null Value Statistics */}
354
+ <div className="mt-6 grid grid-cols-2 gap-6">
355
+ <div className="bg-gray-50 rounded-lg p-4">
356
+ <div className="flex items-center gap-2 mb-3">
357
+ <Database className="w-4 h-4 text-gray-500" />
358
+ <h3 className="text-sm font-medium text-gray-900">Raw Data Statistics</h3>
359
+ </div>
360
+ <div className="space-y-2">
361
+ <div className="flex justify-between text-sm">
362
+ <span className="text-gray-600">Null Values:</span>
363
+ <span className="font-medium text-gray-900">{column.statistics?.raw?.null_count.toLocaleString()}</span>
364
+ </div>
365
+ <div className="flex justify-between text-sm">
366
+ <span className="text-gray-600">Total Rows:</span>
367
+ <span className="font-medium text-gray-900">{totalRows.toLocaleString()}</span>
368
+ </div>
369
+ <div className="flex justify-between text-sm">
370
+ <span className="text-gray-600">Null Percentage:</span>
371
+ <span className="font-medium text-gray-900">{nullPercentage.toFixed(2)}%</span>
372
+ </div>
373
+ <div className="mt-2">
374
+ <div className="w-full h-2 bg-gray-200 rounded-full overflow-hidden">
375
+ <div
376
+ className="h-full bg-blue-600 rounded-full"
377
+ style={{ width: `${nullPercentage}%` }}
378
+ />
379
+ </div>
380
+ </div>
381
+ </div>
382
+ </div>
383
+
384
+ <div className="bg-gray-50 rounded-lg p-4">
385
+ <div className="flex items-center gap-2 mb-3">
386
+ <Wrench className="w-4 h-4 text-gray-500" />
387
+ <h3 className="text-sm font-medium text-gray-900">Processed Data Statistics</h3>
388
+ </div>
389
+ {dataset?.preprocessing_steps?.training ? (
390
+ <div className="space-y-2">
391
+ <div className="flex justify-between text-sm">
392
+ <span className="text-gray-600">Null Values:</span>
393
+ <span className="font-medium text-gray-900">{column.statistics?.processed?.null_count?.toLocaleString()}</span>
394
+ </div>
395
+ <div className="flex justify-between text-sm">
396
+ <span className="text-gray-600">Total Rows:</span>
397
+ <span className="font-medium text-gray-900">{column.statistics?.processed?.num_rows?.toLocaleString()}</span>
398
+ </div>
399
+ <div className="flex justify-between text-sm">
400
+ <span className="text-gray-600">Null Percentage:</span>
401
+ <span className="font-medium text-gray-900">{nullPercentageProcessed.toFixed(2)}%</span>
402
+ </div>
403
+ <div className="mt-2">
404
+ <div className="w-full h-2 bg-gray-200 rounded-full overflow-hidden">
405
+ <div
406
+ className="h-full bg-blue-600 rounded-full"
407
+ style={{ width: `${nullPercentageProcessed}%` }}
408
+ />
409
+ </div>
410
+ </div>
411
+ </div>
412
+ ) : (
413
+ <div className="text-sm text-gray-500 text-center py-2">
414
+ No preprocessing configured
415
+ </div>
416
+ )}
417
+ </div>
418
+ </div>
419
+
420
+ <div className="grid grid-cols-3 gap-4 mt-6">
421
+ <div className="bg-gray-50 rounded-lg p-4">
422
+ <span className="text-sm text-gray-500">Type</span>
423
+ <p className="text-lg font-medium text-gray-900 mt-1">{column.datatype}</p>
424
+ </div>
425
+ <div className="bg-gray-50 rounded-lg p-4">
426
+ <span className="text-sm text-gray-500">Unique Values</span>
427
+ <p className="text-lg font-medium text-gray-900 mt-1">
428
+ {column.statistics?.processed?.unique_count?.toLocaleString() ?? 'N/A'}
429
+ </p>
430
+ </div>
431
+ <div className="bg-gray-50 rounded-lg p-4">
432
+ <span className="text-sm text-gray-500">Null Values</span>
433
+ <p className="text-lg font-medium text-gray-900 mt-1">
434
+ {column.statistics?.processed?.null_count?.toLocaleString() ?? '0'}
435
+ </p>
436
+ </div>
437
+ </div>
438
+
439
+ {column.statistics?.processed.null_count ? (
440
+ <div className="mt-6">
441
+ <div className="flex items-center justify-between mb-2">
442
+ <span className="text-sm font-medium text-gray-700">Null Distribution</span>
443
+ <span className="text-sm text-gray-500">
444
+ {nullPercentage}% of values are null
445
+ </span>
446
+ </div>
447
+ <div className="relative h-2 bg-gray-100 rounded-full overflow-hidden">
448
+ <div
449
+ className="absolute top-0 left-0 h-full bg-yellow-400 rounded-full"
450
+ style={{ width: `${nullPercentage}%` }}
451
+ />
452
+ </div>
453
+ </div>
454
+ ) : (
455
+ <div className="mt-6 bg-green-50 rounded-lg p-4">
456
+ <div className="flex items-center gap-2">
457
+ <div className="w-2 h-2 bg-green-400 rounded-full" />
458
+ <span className="text-sm text-green-700">This column has no null values</span>
459
+ </div>
460
+ </div>
461
+ )}
462
+
463
+ {column.statistics?.raw?.sample_data && (
464
+ <div className="mt-6">
465
+ <h4 className="text-sm font-medium text-gray-700 mb-2">Sample Values</h4>
466
+ <div className="bg-gray-50 rounded-lg p-4">
467
+ <div className="flex flex-wrap gap-2">
468
+ {column.statistics?.raw?.sample_data && column.statistics.raw.sample_data.map((value, index) => (
469
+ <span key={index} className="px-2 py-1 bg-gray-100 rounded text-sm text-gray-700">
470
+ {String(value)}
471
+ </span>
472
+ ))}
473
+ </div>
474
+ </div>
475
+ </div>
476
+ )}
477
+ </div>
478
+
479
+ {/* Data Type Section */}
480
+ <div className="bg-white rounded-lg border border-gray-200 p-6">
481
+ <h3 className="text-lg font-medium text-gray-900 mb-4 flex items-center gap-2">
482
+ <Settings2 className="w-5 h-5 text-gray-500" />
483
+ Data Type
484
+ </h3>
485
+
486
+ <div className="space-y-4">
487
+ <div>
488
+ <label className="block text-sm font-medium text-gray-700 mb-1">
489
+ Column Type
490
+ </label>
491
+ <select
492
+ value={selectedType}
493
+ disabled
494
+ className="w-full rounded-md border-gray-300 bg-gray-50 shadow-sm text-gray-700 cursor-not-allowed"
495
+ >
496
+ {constants.column_types.map(type => (
497
+ <option key={type.value} value={type.value}>
498
+ {type.label}
499
+ </option>
500
+ ))}
501
+ </select>
502
+ <p className="mt-1 text-sm text-gray-500">
503
+ Column type cannot be changed after creation
504
+ </p>
505
+ </div>
506
+
507
+ <div className="bg-gray-50 rounded-md p-4">
508
+ <h4 className="text-sm font-medium text-gray-900 mb-2">Sample Data</h4>
509
+ <div className="space-y-2">
510
+ {Array.isArray(column.sample_values) ? column.sample_values.slice(0, 3).map((value: any, index: number) => (
511
+ <span key={index} className="m-1 flex-items items-center">
512
+ <Badge>
513
+ {String(value)}
514
+ </Badge>
515
+ </span>
516
+ )) : []}
517
+ </div>
518
+ </div>
519
+ </div>
520
+ </div>
521
+
522
+ {/* Preprocessing Strategy Section */}
523
+ <div className="bg-white rounded-lg border border-gray-200 p-6">
524
+ <h3 className="text-lg font-medium text-gray-900 mb-4 flex items-center gap-2">
525
+ <Wrench className="w-5 h-5 text-gray-500" />
526
+ Preprocessing Strategy
527
+ </h3>
528
+
529
+ <div className="space-y-6">
530
+ <div>
531
+ <div className="flex items-center justify-between mb-4">
532
+ <label className="block text-sm font-medium text-gray-700">
533
+ Training Strategy
534
+ </label>
535
+ <div className="flex items-center gap-2">
536
+ <input
537
+ type="checkbox"
538
+ id="useDistinctInference"
539
+ checked={useDistinctInference}
540
+ onChange={(e) => {
541
+ setUseDistinctInference(e.target.checked);
542
+ onUpdate(
543
+ training,
544
+ e.target.checked ? inference : undefined,
545
+ e.target.checked
546
+ );
547
+ }}
548
+ className="rounded border-gray-300 text-blue-600 focus:ring-blue-500"
549
+ />
550
+ <label htmlFor="useDistinctInference" className="text-sm text-gray-700">
551
+ Use different strategy for inference
552
+ </label>
553
+ </div>
554
+ </div>
555
+
556
+ <div className={useDistinctInference ? "grid grid-cols-2 gap-6" : ""}>
557
+ <div>
558
+ <select
559
+ value={training.method}
560
+ onChange={(e) => handleStrategyChange('training', e.target.value as PreprocessingStep['method'])}
561
+ className="w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500"
562
+ >
563
+ <option value="none">No preprocessing</option>
564
+ {constants.preprocessing_strategies[selectedType]?.map((strategy: { value: string; label: string; }) => (
565
+ <option key={strategy.value} value={strategy.value}>
566
+ {strategy.label}
567
+ </option>
568
+ ))}
569
+ </select>
570
+
571
+ {renderStrategySpecificInfo('training')}
572
+ {renderConstantValueInput('training')}
573
+ {(column.datatype === 'categorical' && training.method === 'categorical') && (
574
+ <div className="mt-4 space-y-4 bg-gray-50 rounded-lg p-4">
575
+ <div>
576
+ <label className="block text-sm font-medium text-gray-700 mb-1">
577
+ Minimum Category Instances
578
+ </label>
579
+ <input
580
+ type="number"
581
+ min="1"
582
+ value={training.params.categorical_min}
583
+ onChange={(e) => handleCategoricalParamChange('training', {
584
+ categorical_min: parseInt(e.target.value)
585
+ })}
586
+ className="w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500"
587
+ />
588
+ <p className="mt-1 text-sm text-gray-500">
589
+ Categories with fewer instances will be grouped as "OTHER"
590
+ </p>
591
+ </div>
592
+ </div>
593
+ )}
594
+ {(column.datatype === 'categorical' && training.method !== 'none') && (
595
+ <div className="mt-4 space-y-4 bg-gray-50 rounded-lg p-4">
596
+ <h4 className="text-sm font-medium text-gray-900 mb-2">Encoding</h4>
597
+ <div className="flex items-center gap-2">
598
+ <input
599
+ type="radio"
600
+ id="oneHotEncode"
601
+ name="encoding"
602
+ checked={training.params.one_hot}
603
+ onChange={() => handleCategoricalParamChange('training', {
604
+ one_hot: true,
605
+ ordinal_encoding: false
606
+ })}
607
+ className="rounded border-gray-300 text-blue-600 focus:ring-blue-500"
608
+ />
609
+ <label htmlFor="oneHotEncode" className="text-sm text-gray-700">
610
+ One-hot encode categories
611
+ </label>
612
+ </div>
613
+ <div className="flex items-center gap-2">
614
+ <input
615
+ type="radio"
616
+ id="ordinalEncode"
617
+ name="encoding"
618
+ checked={training.params.ordinal_encoding}
619
+ onChange={() => handleCategoricalParamChange('training', {
620
+ one_hot: false,
621
+ ordinal_encoding: true
622
+ })}
623
+ className="rounded border-gray-300 text-blue-600 focus:ring-blue-500"
624
+ />
625
+ <label htmlFor="ordinalEncode" className="text-sm text-gray-700">
626
+ Ordinal encode categories
627
+ </label>
628
+ </div>
629
+ </div>
630
+ )}
631
+ </div>
632
+
633
+ {useDistinctInference && (
634
+ <div>
635
+ <div className="flex items-center gap-2 mb-2">
636
+ <ArrowRight className="w-4 h-4 text-gray-400" />
637
+ <span className="text-sm font-medium text-gray-700">
638
+ Inference Strategy
639
+ </span>
640
+ </div>
641
+ <select
642
+ value={inference.method}
643
+ onChange={(e) => handleStrategyChange('inference', e.target.value as PreprocessingStep['method'])}
644
+ className="w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500"
645
+ >
646
+ <option value="none">No preprocessing</option>
647
+ {constants.preprocessing_strategies[selectedType]?.map((strategy: { value: string; label: string; }) => (
648
+ <option key={strategy.value} value={strategy.value}>
649
+ {strategy.label}
650
+ </option>
651
+ ))}
652
+ </select>
653
+
654
+ {renderConstantValueInput('inference')}
655
+ </div>
656
+ )}
657
+ </div>
658
+ </div>
659
+
660
+ {isNumericType(selectedType) && training.method !== 'none' && (
661
+ <div className="border-t pt-4">
662
+ <h4 className="text-sm font-medium text-gray-900 mb-2">Clip Values</h4>
663
+ <div className="grid grid-cols-2 gap-4">
664
+ <div>
665
+ <label className="block text-sm font-medium text-gray-700 mb-1">
666
+ Min Value
667
+ </label>
668
+ <input
669
+ type="number"
670
+ value={training.params?.clip?.min ?? ''}
671
+ onChange={(e) => {
672
+ handleClipChange('training', {
673
+ min: e.target.value ? Number(e.target.value) : undefined
674
+ });
675
+ }}
676
+ className="w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500"
677
+ placeholder="No minimum"
678
+ />
679
+ </div>
680
+ <div>
681
+ <label className="block text-sm font-medium text-gray-700 mb-1">
682
+ Max Value
683
+ </label>
684
+ <input
685
+ type="number"
686
+ value={training.params?.clip?.max ?? ''}
687
+ onChange={(e) => {
688
+ handleClipChange('training', {
689
+ max: e.target.value ? Number(e.target.value) : undefined
690
+ });
691
+ }}
692
+ className="w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500"
693
+ placeholder="No maximum"
694
+ />
695
+ </div>
696
+ </div>
697
+ </div>
698
+ )}
699
+ </div>
700
+ </div>
701
+
702
+ </div>
703
+ );
704
+ }