easy_ml 0.1.3 → 0.2.0.pre.rc1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (239) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +234 -26
  3. data/Rakefile +45 -0
  4. data/app/controllers/easy_ml/application_controller.rb +67 -0
  5. data/app/controllers/easy_ml/columns_controller.rb +38 -0
  6. data/app/controllers/easy_ml/datasets_controller.rb +156 -0
  7. data/app/controllers/easy_ml/datasources_controller.rb +88 -0
  8. data/app/controllers/easy_ml/deploys_controller.rb +20 -0
  9. data/app/controllers/easy_ml/models_controller.rb +151 -0
  10. data/app/controllers/easy_ml/retraining_runs_controller.rb +19 -0
  11. data/app/controllers/easy_ml/settings_controller.rb +59 -0
  12. data/app/frontend/components/AlertProvider.tsx +108 -0
  13. data/app/frontend/components/DatasetPreview.tsx +161 -0
  14. data/app/frontend/components/EmptyState.tsx +28 -0
  15. data/app/frontend/components/ModelCard.tsx +255 -0
  16. data/app/frontend/components/ModelDetails.tsx +334 -0
  17. data/app/frontend/components/ModelForm.tsx +384 -0
  18. data/app/frontend/components/Navigation.tsx +300 -0
  19. data/app/frontend/components/Pagination.tsx +72 -0
  20. data/app/frontend/components/Popover.tsx +55 -0
  21. data/app/frontend/components/PredictionStream.tsx +105 -0
  22. data/app/frontend/components/ScheduleModal.tsx +726 -0
  23. data/app/frontend/components/SearchInput.tsx +23 -0
  24. data/app/frontend/components/SearchableSelect.tsx +132 -0
  25. data/app/frontend/components/dataset/AutosaveIndicator.tsx +39 -0
  26. data/app/frontend/components/dataset/ColumnConfigModal.tsx +431 -0
  27. data/app/frontend/components/dataset/ColumnFilters.tsx +256 -0
  28. data/app/frontend/components/dataset/ColumnList.tsx +101 -0
  29. data/app/frontend/components/dataset/FeatureConfigPopover.tsx +57 -0
  30. data/app/frontend/components/dataset/FeaturePicker.tsx +205 -0
  31. data/app/frontend/components/dataset/PreprocessingConfig.tsx +704 -0
  32. data/app/frontend/components/dataset/SplitConfigurator.tsx +120 -0
  33. data/app/frontend/components/dataset/splitters/DateSplitter.tsx +58 -0
  34. data/app/frontend/components/dataset/splitters/KFoldSplitter.tsx +68 -0
  35. data/app/frontend/components/dataset/splitters/LeavePOutSplitter.tsx +29 -0
  36. data/app/frontend/components/dataset/splitters/PredefinedSplitter.tsx +146 -0
  37. data/app/frontend/components/dataset/splitters/RandomSplitter.tsx +85 -0
  38. data/app/frontend/components/dataset/splitters/StratifiedSplitter.tsx +79 -0
  39. data/app/frontend/components/dataset/splitters/constants.ts +77 -0
  40. data/app/frontend/components/dataset/splitters/types.ts +168 -0
  41. data/app/frontend/components/dataset/splitters/utils.ts +53 -0
  42. data/app/frontend/components/features/CodeEditor.tsx +46 -0
  43. data/app/frontend/components/features/DataPreview.tsx +150 -0
  44. data/app/frontend/components/features/FeatureCard.tsx +88 -0
  45. data/app/frontend/components/features/FeatureForm.tsx +235 -0
  46. data/app/frontend/components/features/FeatureGroupCard.tsx +54 -0
  47. data/app/frontend/components/settings/PluginSettings.tsx +81 -0
  48. data/app/frontend/components/ui/badge.tsx +44 -0
  49. data/app/frontend/components/ui/collapsible.tsx +9 -0
  50. data/app/frontend/components/ui/scroll-area.tsx +46 -0
  51. data/app/frontend/components/ui/separator.tsx +29 -0
  52. data/app/frontend/entrypoints/App.tsx +40 -0
  53. data/app/frontend/entrypoints/Application.tsx +24 -0
  54. data/app/frontend/hooks/useAutosave.ts +61 -0
  55. data/app/frontend/layouts/Layout.tsx +38 -0
  56. data/app/frontend/lib/utils.ts +6 -0
  57. data/app/frontend/mockData.ts +272 -0
  58. data/app/frontend/pages/DatasetDetailsPage.tsx +103 -0
  59. data/app/frontend/pages/DatasetsPage.tsx +261 -0
  60. data/app/frontend/pages/DatasourceFormPage.tsx +147 -0
  61. data/app/frontend/pages/DatasourcesPage.tsx +261 -0
  62. data/app/frontend/pages/EditModelPage.tsx +45 -0
  63. data/app/frontend/pages/EditTransformationPage.tsx +56 -0
  64. data/app/frontend/pages/ModelsPage.tsx +115 -0
  65. data/app/frontend/pages/NewDatasetPage.tsx +366 -0
  66. data/app/frontend/pages/NewModelPage.tsx +45 -0
  67. data/app/frontend/pages/NewTransformationPage.tsx +43 -0
  68. data/app/frontend/pages/SettingsPage.tsx +272 -0
  69. data/app/frontend/pages/ShowModelPage.tsx +30 -0
  70. data/app/frontend/pages/TransformationsPage.tsx +95 -0
  71. data/app/frontend/styles/application.css +100 -0
  72. data/app/frontend/types/dataset.ts +146 -0
  73. data/app/frontend/types/datasource.ts +33 -0
  74. data/app/frontend/types/preprocessing.ts +1 -0
  75. data/app/frontend/types.ts +113 -0
  76. data/app/helpers/easy_ml/application_helper.rb +10 -0
  77. data/app/jobs/easy_ml/application_job.rb +21 -0
  78. data/app/jobs/easy_ml/batch_job.rb +46 -0
  79. data/app/jobs/easy_ml/compute_feature_job.rb +19 -0
  80. data/app/jobs/easy_ml/deploy_job.rb +13 -0
  81. data/app/jobs/easy_ml/finalize_feature_job.rb +15 -0
  82. data/app/jobs/easy_ml/refresh_dataset_job.rb +32 -0
  83. data/app/jobs/easy_ml/schedule_retraining_job.rb +11 -0
  84. data/app/jobs/easy_ml/sync_datasource_job.rb +17 -0
  85. data/app/jobs/easy_ml/training_job.rb +62 -0
  86. data/app/models/easy_ml/adapters/base_adapter.rb +45 -0
  87. data/app/models/easy_ml/adapters/polars_adapter.rb +77 -0
  88. data/app/models/easy_ml/cleaner.rb +82 -0
  89. data/app/models/easy_ml/column.rb +124 -0
  90. data/app/models/easy_ml/column_history.rb +30 -0
  91. data/app/models/easy_ml/column_list.rb +122 -0
  92. data/app/models/easy_ml/concerns/configurable.rb +61 -0
  93. data/app/models/easy_ml/concerns/versionable.rb +19 -0
  94. data/app/models/easy_ml/dataset.rb +767 -0
  95. data/app/models/easy_ml/dataset_history.rb +56 -0
  96. data/app/models/easy_ml/datasource.rb +182 -0
  97. data/app/models/easy_ml/datasource_history.rb +24 -0
  98. data/app/models/easy_ml/datasources/base_datasource.rb +54 -0
  99. data/app/models/easy_ml/datasources/file_datasource.rb +58 -0
  100. data/app/models/easy_ml/datasources/polars_datasource.rb +89 -0
  101. data/app/models/easy_ml/datasources/s3_datasource.rb +97 -0
  102. data/app/models/easy_ml/deploy.rb +114 -0
  103. data/app/models/easy_ml/event.rb +79 -0
  104. data/app/models/easy_ml/feature.rb +437 -0
  105. data/app/models/easy_ml/feature_history.rb +38 -0
  106. data/app/models/easy_ml/model.rb +575 -41
  107. data/app/models/easy_ml/model_file.rb +133 -0
  108. data/app/models/easy_ml/model_file_history.rb +24 -0
  109. data/app/models/easy_ml/model_history.rb +51 -0
  110. data/app/models/easy_ml/models/base_model.rb +58 -0
  111. data/app/models/easy_ml/models/hyperparameters/base.rb +99 -0
  112. data/app/models/easy_ml/models/hyperparameters/xgboost/dart.rb +82 -0
  113. data/app/models/easy_ml/models/hyperparameters/xgboost/gblinear.rb +82 -0
  114. data/app/models/easy_ml/models/hyperparameters/xgboost/gbtree.rb +97 -0
  115. data/app/models/easy_ml/models/hyperparameters/xgboost.rb +71 -0
  116. data/app/models/easy_ml/models/xgboost/evals_callback.rb +138 -0
  117. data/app/models/easy_ml/models/xgboost/progress_callback.rb +39 -0
  118. data/app/models/easy_ml/models/xgboost.rb +544 -4
  119. data/app/models/easy_ml/prediction.rb +44 -0
  120. data/app/models/easy_ml/retraining_job.rb +278 -0
  121. data/app/models/easy_ml/retraining_run.rb +184 -0
  122. data/app/models/easy_ml/settings.rb +37 -0
  123. data/app/models/easy_ml/splitter.rb +90 -0
  124. data/app/models/easy_ml/splitters/base_splitter.rb +28 -0
  125. data/app/models/easy_ml/splitters/date_splitter.rb +91 -0
  126. data/app/models/easy_ml/splitters/predefined_splitter.rb +74 -0
  127. data/app/models/easy_ml/splitters/random_splitter.rb +82 -0
  128. data/app/models/easy_ml/tuner_job.rb +56 -0
  129. data/app/models/easy_ml/tuner_run.rb +31 -0
  130. data/app/models/splitter_history.rb +6 -0
  131. data/app/serializers/easy_ml/column_serializer.rb +27 -0
  132. data/app/serializers/easy_ml/dataset_serializer.rb +73 -0
  133. data/app/serializers/easy_ml/datasource_serializer.rb +64 -0
  134. data/app/serializers/easy_ml/feature_serializer.rb +27 -0
  135. data/app/serializers/easy_ml/model_serializer.rb +90 -0
  136. data/app/serializers/easy_ml/retraining_job_serializer.rb +22 -0
  137. data/app/serializers/easy_ml/retraining_run_serializer.rb +39 -0
  138. data/app/serializers/easy_ml/settings_serializer.rb +9 -0
  139. data/app/views/layouts/easy_ml/application.html.erb +15 -0
  140. data/config/initializers/resque.rb +3 -0
  141. data/config/resque-pool.yml +6 -0
  142. data/config/routes.rb +39 -0
  143. data/config/spring.rb +1 -0
  144. data/config/vite.json +15 -0
  145. data/lib/easy_ml/configuration.rb +64 -0
  146. data/lib/easy_ml/core/evaluators/base_evaluator.rb +53 -0
  147. data/lib/easy_ml/core/evaluators/classification_evaluators.rb +126 -0
  148. data/lib/easy_ml/core/evaluators/regression_evaluators.rb +66 -0
  149. data/lib/easy_ml/core/model_evaluator.rb +161 -89
  150. data/lib/easy_ml/core/tuner/adapters/base_adapter.rb +28 -18
  151. data/lib/easy_ml/core/tuner/adapters/xgboost_adapter.rb +4 -25
  152. data/lib/easy_ml/core/tuner.rb +123 -62
  153. data/lib/easy_ml/core.rb +0 -3
  154. data/lib/easy_ml/core_ext/hash.rb +24 -0
  155. data/lib/easy_ml/core_ext/pathname.rb +11 -5
  156. data/lib/easy_ml/data/date_converter.rb +90 -0
  157. data/lib/easy_ml/data/filter_extensions.rb +31 -0
  158. data/lib/easy_ml/data/polars_column.rb +126 -0
  159. data/lib/easy_ml/data/polars_reader.rb +297 -0
  160. data/lib/easy_ml/data/preprocessor.rb +280 -142
  161. data/lib/easy_ml/data/simple_imputer.rb +255 -0
  162. data/lib/easy_ml/data/splits/file_split.rb +252 -0
  163. data/lib/easy_ml/data/splits/in_memory_split.rb +54 -0
  164. data/lib/easy_ml/data/splits/split.rb +95 -0
  165. data/lib/easy_ml/data/splits.rb +9 -0
  166. data/lib/easy_ml/data/statistics_learner.rb +93 -0
  167. data/lib/easy_ml/data/synced_directory.rb +341 -0
  168. data/lib/easy_ml/data.rb +6 -2
  169. data/lib/easy_ml/engine.rb +105 -6
  170. data/lib/easy_ml/feature_store.rb +227 -0
  171. data/lib/easy_ml/features.rb +61 -0
  172. data/lib/easy_ml/initializers/inflections.rb +17 -3
  173. data/lib/easy_ml/logging.rb +2 -2
  174. data/lib/easy_ml/predict.rb +74 -0
  175. data/lib/easy_ml/railtie/generators/migration/migration_generator.rb +192 -36
  176. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_column_histories.rb.tt +9 -0
  177. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_columns.rb.tt +25 -0
  178. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_dataset_histories.rb.tt +9 -0
  179. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_datasets.rb.tt +31 -0
  180. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_datasource_histories.rb.tt +9 -0
  181. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_datasources.rb.tt +16 -0
  182. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_deploys.rb.tt +24 -0
  183. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_events.rb.tt +20 -0
  184. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_feature_histories.rb.tt +14 -0
  185. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_features.rb.tt +32 -0
  186. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_model_file_histories.rb.tt +9 -0
  187. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_model_files.rb.tt +17 -0
  188. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_model_histories.rb.tt +9 -0
  189. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_models.rb.tt +20 -9
  190. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_predictions.rb.tt +17 -0
  191. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_retraining_jobs.rb.tt +77 -0
  192. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_settings.rb.tt +9 -0
  193. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_splitter_histories.rb.tt +9 -0
  194. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_splitters.rb.tt +15 -0
  195. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_tuner_jobs.rb.tt +40 -0
  196. data/lib/easy_ml/support/est.rb +5 -1
  197. data/lib/easy_ml/support/file_rotate.rb +79 -15
  198. data/lib/easy_ml/support/file_support.rb +9 -0
  199. data/lib/easy_ml/support/local_file.rb +24 -0
  200. data/lib/easy_ml/support/lockable.rb +62 -0
  201. data/lib/easy_ml/support/synced_file.rb +103 -0
  202. data/lib/easy_ml/support/utc.rb +5 -1
  203. data/lib/easy_ml/support.rb +6 -3
  204. data/lib/easy_ml/version.rb +4 -1
  205. data/lib/easy_ml.rb +7 -2
  206. metadata +355 -72
  207. data/app/models/easy_ml/models.rb +0 -5
  208. data/lib/easy_ml/core/model.rb +0 -30
  209. data/lib/easy_ml/core/model_core.rb +0 -181
  210. data/lib/easy_ml/core/models/hyperparameters/base.rb +0 -34
  211. data/lib/easy_ml/core/models/hyperparameters/xgboost.rb +0 -19
  212. data/lib/easy_ml/core/models/xgboost.rb +0 -10
  213. data/lib/easy_ml/core/models/xgboost_core.rb +0 -220
  214. data/lib/easy_ml/core/models.rb +0 -10
  215. data/lib/easy_ml/core/uploaders/model_uploader.rb +0 -24
  216. data/lib/easy_ml/core/uploaders.rb +0 -7
  217. data/lib/easy_ml/data/dataloader.rb +0 -6
  218. data/lib/easy_ml/data/dataset/data/preprocessor/statistics.json +0 -31
  219. data/lib/easy_ml/data/dataset/data/sample_info.json +0 -1
  220. data/lib/easy_ml/data/dataset/dataset/files/sample_info.json +0 -1
  221. data/lib/easy_ml/data/dataset/splits/file_split.rb +0 -140
  222. data/lib/easy_ml/data/dataset/splits/in_memory_split.rb +0 -49
  223. data/lib/easy_ml/data/dataset/splits/split.rb +0 -98
  224. data/lib/easy_ml/data/dataset/splits.rb +0 -11
  225. data/lib/easy_ml/data/dataset/splitters/date_splitter.rb +0 -43
  226. data/lib/easy_ml/data/dataset/splitters.rb +0 -9
  227. data/lib/easy_ml/data/dataset.rb +0 -430
  228. data/lib/easy_ml/data/datasource/datasource_factory.rb +0 -60
  229. data/lib/easy_ml/data/datasource/file_datasource.rb +0 -40
  230. data/lib/easy_ml/data/datasource/merged_datasource.rb +0 -64
  231. data/lib/easy_ml/data/datasource/polars_datasource.rb +0 -41
  232. data/lib/easy_ml/data/datasource/s3_datasource.rb +0 -89
  233. data/lib/easy_ml/data/datasource.rb +0 -33
  234. data/lib/easy_ml/data/preprocessor/preprocessor.rb +0 -205
  235. data/lib/easy_ml/data/preprocessor/simple_imputer.rb +0 -402
  236. data/lib/easy_ml/deployment.rb +0 -5
  237. data/lib/easy_ml/support/synced_directory.rb +0 -134
  238. data/lib/easy_ml/transforms.rb +0 -29
  239. /data/{lib/easy_ml/core → app/models/easy_ml}/models/hyperparameters.rb +0 -0
@@ -0,0 +1,120 @@
1
+ import React, { Fragment } from 'react';
2
+ import { Tab } from '@headlessui/react';
3
+ import { Info } from 'lucide-react';
4
+ import { SearchableSelect } from '../SearchableSelect';
5
+ import { DateSplitter } from './splitters/DateSplitter';
6
+ import { RandomSplitter } from './splitters/RandomSplitter';
7
+ import { PredefinedSplitter } from './splitters/PredefinedSplitter';
8
+ import { StratifiedSplitter } from './splitters/StratifiedSplitter';
9
+ import { KFoldSplitter } from './splitters/KFoldSplitter';
10
+ import { LeavePOutSplitter } from './splitters/LeavePOutSplitter';
11
+ import { SPLITTER_OPTIONS, DEFAULT_CONFIGS } from './splitters/constants';
12
+ import type { SplitterType, SplitConfig, ColumnConfig } from './splitters/types';
13
+
14
+ interface SplitConfiguratorProps {
15
+ type: SplitterType;
16
+ splitter_attributes: SplitConfig;
17
+ columns: ColumnConfig[];
18
+ available_files: string[];
19
+ onChange: (type: SplitterType, attributes: SplitConfig) => void;
20
+ }
21
+
22
+ export function SplitConfigurator({ type, splitter_attributes, columns, available_files, onSplitterChange, onChange }: SplitConfiguratorProps) {
23
+ const dateColumns = columns.filter(col => col.type === 'datetime').map(col => col.name);
24
+
25
+ const handleTypeChange = (newType: SplitterType) => {
26
+ onChange(newType, DEFAULT_CONFIGS[newType]);
27
+ };
28
+
29
+ const handleSplitterChange = (type: SplitterType, newAttributes: SplitConfig) => {
30
+ onChange(type, newAttributes);
31
+ };
32
+
33
+ const renderSplitter = () => {
34
+ switch (type) {
35
+ case 'date':
36
+ return (
37
+ <DateSplitter
38
+ attributes={splitter_attributes}
39
+ columns={dateColumns}
40
+ onChange={(attrs) => handleSplitterChange(type, attrs)}
41
+ />
42
+ );
43
+ case 'random':
44
+ return (
45
+ <RandomSplitter
46
+ attributes={splitter_attributes}
47
+ onChange={(attrs) => handleSplitterChange(type, attrs)}
48
+ />
49
+ );
50
+ case 'predefined':
51
+ return (
52
+ <PredefinedSplitter
53
+ attributes={splitter_attributes}
54
+ available_files={available_files}
55
+ onChange={(attrs) => handleSplitterChange(type, attrs)}
56
+ />
57
+ );
58
+ case 'stratified':
59
+ return (
60
+ <StratifiedSplitter
61
+ attributes={splitter_attributes}
62
+ columns={columns}
63
+ onChange={(attrs) => handleSplitterChange(type, attrs)}
64
+ />
65
+ );
66
+ case 'stratified_kfold':
67
+ case 'group_kfold':
68
+ return (
69
+ <KFoldSplitter
70
+ attributes={splitter_attributes}
71
+ columns={columns}
72
+ onChange={(attrs) => handleSplitterChange(type, attrs)}
73
+ />
74
+ );
75
+ case 'group_shuffle':
76
+ return (
77
+ <StratifiedSplitter
78
+ attributes={splitter_attributes}
79
+ columns={columns}
80
+ onChange={(attrs) => handleSplitterChange(type, {
81
+ groupColumn: attrs.targetColumn,
82
+ testSize: attrs.testSize,
83
+ validSize: attrs.validSize
84
+ })}
85
+ />
86
+ );
87
+ case 'leave_p_out':
88
+ return (
89
+ <LeavePOutSplitter
90
+ attributes={splitter_attributes}
91
+ onChange={(attrs) => handleSplitterChange(type, attrs)}
92
+ />
93
+ );
94
+ default:
95
+ return null;
96
+ }
97
+ };
98
+
99
+ return (
100
+ <div className="space-y-6">
101
+ <div>
102
+ <label className="block text-sm font-medium text-gray-700 mb-1">
103
+ Split Type
104
+ </label>
105
+ <SearchableSelect
106
+ options={SPLITTER_OPTIONS}
107
+ value={type}
108
+ onChange={(value) => handleTypeChange(value as SplitterType)}
109
+ />
110
+ </div>
111
+
112
+ <div className="bg-gray-50 rounded-lg p-4">
113
+ {renderSplitter()}
114
+ </div>
115
+ </div>
116
+ );
117
+ }
118
+
119
+ export type { SplitterType };
120
+ export type { ColumnConfig };
@@ -0,0 +1,58 @@
1
+ import React from 'react';
2
+ import { SearchableSelect } from '../../SearchableSelect';
3
+ import type { DateSplitConfig } from '../types';
4
+
5
+ interface DateSplitterProps {
6
+ attributes: DateSplitConfig;
7
+ columns: string[];
8
+ onChange: (attributes: DateSplitConfig) => void;
9
+ }
10
+
11
+ export function DateSplitter({ attributes, columns, onChange }: DateSplitterProps) {
12
+ return (
13
+ <div className="space-y-4">
14
+ <div>
15
+ <label htmlFor="date_column" className="block text-sm font-medium text-gray-700">
16
+ Date Column
17
+ </label>
18
+ <SearchableSelect
19
+ id="date_column"
20
+ value={attributes.date_column}
21
+ options={columns.map(col => ({ value: col, label: col }))}
22
+ onChange={(value) => onChange({ ...attributes, date_column: value })}
23
+ placeholder="Select date column"
24
+ />
25
+ </div>
26
+
27
+ <div className="grid grid-cols-2 gap-4">
28
+ <div>
29
+ <label htmlFor="months_test" className="block text-sm font-medium text-gray-700">
30
+ Test Months
31
+ </label>
32
+ <input
33
+ type="number"
34
+ id="months_test"
35
+ value={attributes.months_test}
36
+ onChange={(e) => onChange({ ...attributes, months_test: parseInt(e.target.value) })}
37
+ className="mt-1 p-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500 sm:text-sm"
38
+ min="1"
39
+ />
40
+ </div>
41
+
42
+ <div>
43
+ <label htmlFor="months_valid" className="block text-sm font-medium text-gray-700">
44
+ Validation Months
45
+ </label>
46
+ <input
47
+ type="number"
48
+ id="months_valid"
49
+ value={attributes.months_valid}
50
+ onChange={(e) => onChange({ ...attributes, months_valid: parseInt(e.target.value) })}
51
+ className="mt-1 p-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500 sm:text-sm"
52
+ min="1"
53
+ />
54
+ </div>
55
+ </div>
56
+ </div>
57
+ );
58
+ }
@@ -0,0 +1,68 @@
1
+ import React from 'react';
2
+ import { SearchableSelect } from '../../SearchableSelect';
3
+
4
+ interface KFoldSplitterProps {
5
+ type: 'kfold' | 'stratified' | 'group';
6
+ targetColumn?: string;
7
+ groupColumn?: string;
8
+ nSplits: number;
9
+ columns: Array<{ name: string; type: string }>;
10
+ onChange: (config: {
11
+ targetColumn?: string;
12
+ groupColumn?: string;
13
+ nSplits: number;
14
+ }) => void;
15
+ }
16
+
17
+ export function KFoldSplitter({
18
+ type,
19
+ targetColumn,
20
+ groupColumn,
21
+ nSplits,
22
+ columns,
23
+ onChange
24
+ }: KFoldSplitterProps) {
25
+ return (
26
+ <div className="space-y-4">
27
+ {(type === 'stratified' || type === 'group') && (
28
+ <div>
29
+ <label className="block text-sm font-medium text-gray-700 mb-1">
30
+ {type === 'stratified' ? 'Target Column' : 'Group Column'}
31
+ </label>
32
+ <SearchableSelect
33
+ options={columns.map(col => ({
34
+ value: col.name,
35
+ label: col.name,
36
+ description: `Type: ${col.type}`
37
+ }))}
38
+ value={type === 'stratified' ? targetColumn : groupColumn}
39
+ onChange={(value) => onChange({
40
+ targetColumn: type === 'stratified' ? value as string : targetColumn,
41
+ groupColumn: type === 'group' ? value as string : groupColumn,
42
+ nSplits
43
+ })}
44
+ placeholder={`Select ${type === 'stratified' ? 'target' : 'group'} column...`}
45
+ />
46
+ </div>
47
+ )}
48
+
49
+ <div>
50
+ <label className="block text-sm font-medium text-gray-700 mb-1">
51
+ Number of Splits
52
+ </label>
53
+ <input
54
+ type="number"
55
+ min={2}
56
+ max={10}
57
+ value={nSplits}
58
+ onChange={(e) => onChange({
59
+ targetColumn,
60
+ groupColumn,
61
+ nSplits: parseInt(e.target.value) || 2
62
+ })}
63
+ className="mt-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500"
64
+ />
65
+ </div>
66
+ </div>
67
+ );
68
+ }
@@ -0,0 +1,29 @@
1
+ import React from 'react';
2
+
3
+ interface LeavePOutSplitterProps {
4
+ p: number;
5
+ onChange: (p: number) => void;
6
+ }
7
+
8
+ export function LeavePOutSplitter({ p, onChange }: LeavePOutSplitterProps) {
9
+ return (
10
+ <div className="space-y-4">
11
+ <div>
12
+ <label className="block text-sm font-medium text-gray-700 mb-1">
13
+ Number of samples to leave out (P)
14
+ </label>
15
+ <input
16
+ type="number"
17
+ min={1}
18
+ max={100}
19
+ value={p}
20
+ onChange={(e) => onChange(parseInt(e.target.value) || 1)}
21
+ className="mt-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500"
22
+ />
23
+ <p className="mt-1 text-sm text-gray-500">
24
+ Each training set will have P samples removed, which form the test set.
25
+ </p>
26
+ </div>
27
+ </div>
28
+ );
29
+ }
@@ -0,0 +1,146 @@
1
+ import React from 'react';
2
+ import { FileCheck } from 'lucide-react';
3
+ import { SearchableSelect } from '../../SearchableSelect';
4
+
5
+ interface FileConfig {
6
+ path: string;
7
+ type: 'train' | 'test' | 'valid';
8
+ }
9
+
10
+ interface PredefinedSplitConfig {
11
+ splitter_type: 'predefined';
12
+ train_files: string[];
13
+ test_files: string[];
14
+ valid_files: string[];
15
+ }
16
+
17
+ interface PredefinedSplitterProps {
18
+ attributes: PredefinedSplitConfig;
19
+ available_files: string[];
20
+ onChange: (attributes: PredefinedSplitConfig) => void;
21
+ }
22
+
23
+ export function PredefinedSplitter({ attributes, available_files, onChange }: PredefinedSplitterProps) {
24
+ const [selectedFiles, setSelectedFiles] = React.useState<FileConfig[]>([]);
25
+
26
+ // Convert attributes to FileConfig array for UI
27
+ React.useEffect(() => {
28
+ const files: FileConfig[] = [
29
+ ...attributes.train_files.map(path => ({ path, type: 'train' as const })),
30
+ ...attributes.test_files.map(path => ({ path, type: 'test' as const })),
31
+ ...attributes.valid_files.map(path => ({ path, type: 'valid' as const }))
32
+ ];
33
+ setSelectedFiles(files);
34
+ }, [attributes.train_files, attributes.test_files, attributes.valid_files]);
35
+
36
+ const addFile = (path: string) => {
37
+ const newFiles = [...selectedFiles, { path, type: 'train' }];
38
+ setSelectedFiles(newFiles);
39
+ updateAttributes(newFiles);
40
+ };
41
+
42
+ const updateFileType = (index: number, type: 'train' | 'test' | 'valid') => {
43
+ const newFiles = selectedFiles.map((file, i) =>
44
+ i === index ? { ...file, type } : file
45
+ );
46
+ setSelectedFiles(newFiles);
47
+ updateAttributes(newFiles);
48
+ };
49
+
50
+ const removeFile = (index: number) => {
51
+ const newFiles = selectedFiles.filter((_, i) => i !== index);
52
+ setSelectedFiles(newFiles);
53
+ updateAttributes(newFiles);
54
+ };
55
+
56
+ const updateAttributes = (files: FileConfig[]) => {
57
+ onChange({
58
+ splitter_type: 'predefined',
59
+ train_files: files.filter(f => f.type === 'train').map(f => f.path),
60
+ test_files: files.filter(f => f.type === 'test').map(f => f.path),
61
+ valid_files: files.filter(f => f.type === 'valid').map(f => f.path)
62
+ });
63
+ };
64
+
65
+ const unusedFiles = available_files.filter(
66
+ path => !selectedFiles.find(f => f.path === path)
67
+ );
68
+
69
+ return (
70
+ <div className="space-y-4">
71
+ {/* File Selection */}
72
+ <div>
73
+ <label className="block text-sm font-medium text-gray-700">Add File</label>
74
+ <SearchableSelect
75
+ options={unusedFiles.map(path => ({
76
+ value: path,
77
+ label: path.split('/').pop() || path,
78
+ description: path
79
+ }))}
80
+ value={null}
81
+ onChange={(value) => addFile(value as string)}
82
+ placeholder="Select a file..."
83
+ />
84
+ </div>
85
+
86
+ {/* Selected files */}
87
+ {selectedFiles.length > 0 ? (
88
+ <div className="space-y-2">
89
+ {selectedFiles.map((file, index) => (
90
+ <div
91
+ key={file.path}
92
+ className="flex items-center justify-between p-3 bg-gray-50 rounded-lg"
93
+ >
94
+ <div className="flex items-center gap-2 min-w-0">
95
+ <FileCheck className="w-4 h-4 text-gray-400 flex-shrink-0" />
96
+ <span className="text-sm text-gray-900 truncate">
97
+ {file.path.split('/').pop()}
98
+ </span>
99
+ </div>
100
+ <div className="flex items-center gap-2">
101
+ <select
102
+ value={file.type}
103
+ onChange={(e) => updateFileType(index, e.target.value as 'train' | 'test' | 'valid')}
104
+ className="text-sm rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500"
105
+ >
106
+ <option value="train">Training Set</option>
107
+ <option value="test">Test Set</option>
108
+ <option value="valid">Validation Set</option>
109
+ </select>
110
+ <button
111
+ onClick={() => removeFile(index)}
112
+ className="text-sm text-red-600 hover:text-red-700"
113
+ >
114
+ Remove
115
+ </button>
116
+ </div>
117
+ </div>
118
+ ))}
119
+ </div>
120
+ ) : (
121
+ <div className="text-center py-4 bg-gray-50 rounded-lg border-2 border-dashed border-gray-200">
122
+ <p className="text-sm text-gray-500">
123
+ Select files to create your train/test/validation splits
124
+ </p>
125
+ </div>
126
+ )}
127
+
128
+ {/* Validation messages */}
129
+ {selectedFiles.length > 0 && (
130
+ <div className="space-y-1 text-sm">
131
+ {!selectedFiles.some(f => f.type === 'train') && (
132
+ <p className="text-yellow-600">
133
+ • You need at least one training set file
134
+ </p>
135
+ )}
136
+ {!selectedFiles.some(f => f.type === 'test') && (
137
+ <p className="text-yellow-600">
138
+ • You need at least one test set file
139
+ </p>
140
+ )}
141
+ </div>
142
+ )}
143
+
144
+ </div>
145
+ );
146
+ }
@@ -0,0 +1,85 @@
1
+ import React from 'react';
2
+ import { Info } from 'lucide-react';
3
+ import type { RandomSplitConfig } from '../types';
4
+
5
+ interface RandomSplitterProps {
6
+ attributes: RandomSplitConfig;
7
+ onChange: (attributes: RandomSplitConfig) => void;
8
+ }
9
+
10
+ export function RandomSplitter({ attributes, onChange }: RandomSplitterProps) {
11
+ return (
12
+ <div className="space-y-4">
13
+ <div className="flex items-start gap-2">
14
+ <Info className="w-5 h-5 text-blue-500 mt-0.5" />
15
+ <p className="text-sm text-blue-700">
16
+ Random splitting will automatically split your data into 60% training, 20% test, and 20% validation sets.
17
+ </p>
18
+ </div>
19
+
20
+ <div className="grid grid-cols-3 gap-4">
21
+ <div>
22
+ <label htmlFor="train_ratio" className="block text-sm font-medium text-gray-700">
23
+ Training Ratio
24
+ </label>
25
+ <input
26
+ type="number"
27
+ id="train_ratio"
28
+ value={attributes.train_ratio ?? 0.6}
29
+ onChange={(e) => onChange({ ...attributes, train_ratio: parseFloat(e.target.value) })}
30
+ className="mt-1 p-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500 sm:text-sm"
31
+ min="0"
32
+ max="1"
33
+ step="0.1"
34
+ />
35
+ </div>
36
+
37
+ <div>
38
+ <label htmlFor="test_ratio" className="block text-sm font-medium text-gray-700">
39
+ Test Ratio
40
+ </label>
41
+ <input
42
+ type="number"
43
+ id="test_ratio"
44
+ value={attributes.test_ratio ?? 0.2}
45
+ onChange={(e) => onChange({ ...attributes, test_ratio: parseFloat(e.target.value) })}
46
+ className="mt-1 p-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500 sm:text-sm"
47
+ min="0"
48
+ max="1"
49
+ step="0.1"
50
+ />
51
+ </div>
52
+
53
+ <div>
54
+ <label htmlFor="valid_ratio" className="block text-sm font-medium text-gray-700">
55
+ Validation Ratio
56
+ </label>
57
+ <input
58
+ type="number"
59
+ id="valid_ratio"
60
+ value={attributes.valid_ratio ?? 0.2}
61
+ onChange={(e) => onChange({ ...attributes, valid_ratio: parseFloat(e.target.value) })}
62
+ className="mt-1 p-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500 sm:text-sm"
63
+ min="0"
64
+ max="1"
65
+ step="0.1"
66
+ />
67
+ </div>
68
+ </div>
69
+
70
+ <div>
71
+ <label htmlFor="seed" className="block text-sm font-medium text-gray-700">
72
+ Random Seed (optional)
73
+ </label>
74
+ <input
75
+ type="number"
76
+ id="seed"
77
+ value={attributes.seed ?? ''}
78
+ onChange={(e) => onChange({ ...attributes, seed: e.target.value ? parseInt(e.target.value) : undefined })}
79
+ className="mt-1 p-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500 sm:text-sm"
80
+ placeholder="Enter a random seed"
81
+ />
82
+ </div>
83
+ </div>
84
+ );
85
+ }
@@ -0,0 +1,79 @@
1
+ import React from 'react';
2
+ import { SearchableSelect } from '../../SearchableSelect';
3
+
4
+ interface StratifiedSplitterProps {
5
+ targetColumn: string;
6
+ testSize: number;
7
+ validSize: number;
8
+ columns: Array<{ name: string; type: string }>;
9
+ onChange: (config: { targetColumn: string; testSize: number; validSize: number }) => void;
10
+ }
11
+
12
+ export function StratifiedSplitter({
13
+ targetColumn,
14
+ testSize,
15
+ validSize,
16
+ columns,
17
+ onChange
18
+ }: StratifiedSplitterProps) {
19
+ return (
20
+ <div className="space-y-4">
21
+ <div>
22
+ <label className="block text-sm font-medium text-gray-700 mb-1">
23
+ Target Column
24
+ </label>
25
+ <SearchableSelect
26
+ options={columns.map(col => ({
27
+ value: col.name,
28
+ label: col.name,
29
+ description: `Type: ${col.type}`
30
+ }))}
31
+ value={targetColumn}
32
+ onChange={(value) => onChange({
33
+ targetColumn: value as string,
34
+ testSize,
35
+ validSize
36
+ })}
37
+ placeholder="Select target column..."
38
+ />
39
+ </div>
40
+
41
+ <div className="grid grid-cols-2 gap-4">
42
+ <div>
43
+ <label className="block text-sm font-medium text-gray-700 mb-1">
44
+ Test Set Size (%)
45
+ </label>
46
+ <input
47
+ type="number"
48
+ min={1}
49
+ max={40}
50
+ value={testSize}
51
+ onChange={(e) => onChange({
52
+ targetColumn,
53
+ testSize: parseInt(e.target.value) || 0,
54
+ validSize
55
+ })}
56
+ className="mt-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500"
57
+ />
58
+ </div>
59
+ <div>
60
+ <label className="block text-sm font-medium text-gray-700 mb-1">
61
+ Validation Set Size (%)
62
+ </label>
63
+ <input
64
+ type="number"
65
+ min={1}
66
+ max={40}
67
+ value={validSize}
68
+ onChange={(e) => onChange({
69
+ targetColumn,
70
+ testSize,
71
+ validSize: parseInt(e.target.value) || 0
72
+ })}
73
+ className="mt-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500"
74
+ />
75
+ </div>
76
+ </div>
77
+ </div>
78
+ );
79
+ }
@@ -0,0 +1,77 @@
1
+ export const SPLITTER_OPTIONS = [
2
+ {
3
+ value: 'date',
4
+ label: 'Date Split',
5
+ description: 'Split data based on a date/time column'
6
+ },
7
+ {
8
+ value: 'random',
9
+ label: 'Random Split',
10
+ description: 'Randomly split data into train/test/validation sets (70/20/10)'
11
+ },
12
+ {
13
+ value: 'predefined',
14
+ label: 'Predefined Splits',
15
+ description: 'Use separate files for train/test/validation sets'
16
+ },
17
+ // {
18
+ // value: 'stratified',
19
+ // label: 'Stratified Shuffle Split',
20
+ // description: 'Maintain the percentage of samples for each class'
21
+ // },
22
+ // {
23
+ // value: 'stratified_kfold',
24
+ // label: 'Stratified K-Fold',
25
+ // description: 'K-fold with preserved class distribution'
26
+ // },
27
+ // {
28
+ // value: 'group_kfold',
29
+ // label: 'Group K-Fold',
30
+ // description: 'K-fold ensuring group integrity'
31
+ // },
32
+ // {
33
+ // value: 'group_shuffle',
34
+ // label: 'Group Shuffle Split',
35
+ // description: 'Random split respecting group boundaries'
36
+ // },
37
+ // {
38
+ // value: 'leave_p_out',
39
+ // label: 'Leave P Out',
40
+ // description: 'Use P samples for testing in each fold'
41
+ // }
42
+ ] as const;
43
+
44
+ export const DEFAULT_CONFIGS = {
45
+ date: {
46
+ date_column: '',
47
+ months_test: 2,
48
+ months_valid: 1
49
+ },
50
+ random: {},
51
+ predefined: {
52
+ train_files: [],
53
+ test_files: [],
54
+ valid_files: []
55
+ },
56
+ stratified: {
57
+ targetColumn: '',
58
+ testSize: 20,
59
+ validSize: 10
60
+ },
61
+ stratified_kfold: {
62
+ targetColumn: '',
63
+ nSplits: 5
64
+ },
65
+ group_kfold: {
66
+ groupColumn: '',
67
+ nSplits: 5
68
+ },
69
+ group_shuffle: {
70
+ groupColumn: '',
71
+ testSize: 20,
72
+ validSize: 10
73
+ },
74
+ leave_p_out: {
75
+ p: 1
76
+ }
77
+ } as const;