easy_ml 0.1.4 → 0.2.0.pre.rc1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (239) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +234 -26
  3. data/Rakefile +45 -0
  4. data/app/controllers/easy_ml/application_controller.rb +67 -0
  5. data/app/controllers/easy_ml/columns_controller.rb +38 -0
  6. data/app/controllers/easy_ml/datasets_controller.rb +156 -0
  7. data/app/controllers/easy_ml/datasources_controller.rb +88 -0
  8. data/app/controllers/easy_ml/deploys_controller.rb +20 -0
  9. data/app/controllers/easy_ml/models_controller.rb +151 -0
  10. data/app/controllers/easy_ml/retraining_runs_controller.rb +19 -0
  11. data/app/controllers/easy_ml/settings_controller.rb +59 -0
  12. data/app/frontend/components/AlertProvider.tsx +108 -0
  13. data/app/frontend/components/DatasetPreview.tsx +161 -0
  14. data/app/frontend/components/EmptyState.tsx +28 -0
  15. data/app/frontend/components/ModelCard.tsx +255 -0
  16. data/app/frontend/components/ModelDetails.tsx +334 -0
  17. data/app/frontend/components/ModelForm.tsx +384 -0
  18. data/app/frontend/components/Navigation.tsx +300 -0
  19. data/app/frontend/components/Pagination.tsx +72 -0
  20. data/app/frontend/components/Popover.tsx +55 -0
  21. data/app/frontend/components/PredictionStream.tsx +105 -0
  22. data/app/frontend/components/ScheduleModal.tsx +726 -0
  23. data/app/frontend/components/SearchInput.tsx +23 -0
  24. data/app/frontend/components/SearchableSelect.tsx +132 -0
  25. data/app/frontend/components/dataset/AutosaveIndicator.tsx +39 -0
  26. data/app/frontend/components/dataset/ColumnConfigModal.tsx +431 -0
  27. data/app/frontend/components/dataset/ColumnFilters.tsx +256 -0
  28. data/app/frontend/components/dataset/ColumnList.tsx +101 -0
  29. data/app/frontend/components/dataset/FeatureConfigPopover.tsx +57 -0
  30. data/app/frontend/components/dataset/FeaturePicker.tsx +205 -0
  31. data/app/frontend/components/dataset/PreprocessingConfig.tsx +704 -0
  32. data/app/frontend/components/dataset/SplitConfigurator.tsx +120 -0
  33. data/app/frontend/components/dataset/splitters/DateSplitter.tsx +58 -0
  34. data/app/frontend/components/dataset/splitters/KFoldSplitter.tsx +68 -0
  35. data/app/frontend/components/dataset/splitters/LeavePOutSplitter.tsx +29 -0
  36. data/app/frontend/components/dataset/splitters/PredefinedSplitter.tsx +146 -0
  37. data/app/frontend/components/dataset/splitters/RandomSplitter.tsx +85 -0
  38. data/app/frontend/components/dataset/splitters/StratifiedSplitter.tsx +79 -0
  39. data/app/frontend/components/dataset/splitters/constants.ts +77 -0
  40. data/app/frontend/components/dataset/splitters/types.ts +168 -0
  41. data/app/frontend/components/dataset/splitters/utils.ts +53 -0
  42. data/app/frontend/components/features/CodeEditor.tsx +46 -0
  43. data/app/frontend/components/features/DataPreview.tsx +150 -0
  44. data/app/frontend/components/features/FeatureCard.tsx +88 -0
  45. data/app/frontend/components/features/FeatureForm.tsx +235 -0
  46. data/app/frontend/components/features/FeatureGroupCard.tsx +54 -0
  47. data/app/frontend/components/settings/PluginSettings.tsx +81 -0
  48. data/app/frontend/components/ui/badge.tsx +44 -0
  49. data/app/frontend/components/ui/collapsible.tsx +9 -0
  50. data/app/frontend/components/ui/scroll-area.tsx +46 -0
  51. data/app/frontend/components/ui/separator.tsx +29 -0
  52. data/app/frontend/entrypoints/App.tsx +40 -0
  53. data/app/frontend/entrypoints/Application.tsx +24 -0
  54. data/app/frontend/hooks/useAutosave.ts +61 -0
  55. data/app/frontend/layouts/Layout.tsx +38 -0
  56. data/app/frontend/lib/utils.ts +6 -0
  57. data/app/frontend/mockData.ts +272 -0
  58. data/app/frontend/pages/DatasetDetailsPage.tsx +103 -0
  59. data/app/frontend/pages/DatasetsPage.tsx +261 -0
  60. data/app/frontend/pages/DatasourceFormPage.tsx +147 -0
  61. data/app/frontend/pages/DatasourcesPage.tsx +261 -0
  62. data/app/frontend/pages/EditModelPage.tsx +45 -0
  63. data/app/frontend/pages/EditTransformationPage.tsx +56 -0
  64. data/app/frontend/pages/ModelsPage.tsx +115 -0
  65. data/app/frontend/pages/NewDatasetPage.tsx +366 -0
  66. data/app/frontend/pages/NewModelPage.tsx +45 -0
  67. data/app/frontend/pages/NewTransformationPage.tsx +43 -0
  68. data/app/frontend/pages/SettingsPage.tsx +272 -0
  69. data/app/frontend/pages/ShowModelPage.tsx +30 -0
  70. data/app/frontend/pages/TransformationsPage.tsx +95 -0
  71. data/app/frontend/styles/application.css +100 -0
  72. data/app/frontend/types/dataset.ts +146 -0
  73. data/app/frontend/types/datasource.ts +33 -0
  74. data/app/frontend/types/preprocessing.ts +1 -0
  75. data/app/frontend/types.ts +113 -0
  76. data/app/helpers/easy_ml/application_helper.rb +10 -0
  77. data/app/jobs/easy_ml/application_job.rb +21 -0
  78. data/app/jobs/easy_ml/batch_job.rb +46 -0
  79. data/app/jobs/easy_ml/compute_feature_job.rb +19 -0
  80. data/app/jobs/easy_ml/deploy_job.rb +13 -0
  81. data/app/jobs/easy_ml/finalize_feature_job.rb +15 -0
  82. data/app/jobs/easy_ml/refresh_dataset_job.rb +32 -0
  83. data/app/jobs/easy_ml/schedule_retraining_job.rb +11 -0
  84. data/app/jobs/easy_ml/sync_datasource_job.rb +17 -0
  85. data/app/jobs/easy_ml/training_job.rb +62 -0
  86. data/app/models/easy_ml/adapters/base_adapter.rb +45 -0
  87. data/app/models/easy_ml/adapters/polars_adapter.rb +77 -0
  88. data/app/models/easy_ml/cleaner.rb +82 -0
  89. data/app/models/easy_ml/column.rb +124 -0
  90. data/app/models/easy_ml/column_history.rb +30 -0
  91. data/app/models/easy_ml/column_list.rb +122 -0
  92. data/app/models/easy_ml/concerns/configurable.rb +61 -0
  93. data/app/models/easy_ml/concerns/versionable.rb +19 -0
  94. data/app/models/easy_ml/dataset.rb +767 -0
  95. data/app/models/easy_ml/dataset_history.rb +56 -0
  96. data/app/models/easy_ml/datasource.rb +182 -0
  97. data/app/models/easy_ml/datasource_history.rb +24 -0
  98. data/app/models/easy_ml/datasources/base_datasource.rb +54 -0
  99. data/app/models/easy_ml/datasources/file_datasource.rb +58 -0
  100. data/app/models/easy_ml/datasources/polars_datasource.rb +89 -0
  101. data/app/models/easy_ml/datasources/s3_datasource.rb +97 -0
  102. data/app/models/easy_ml/deploy.rb +114 -0
  103. data/app/models/easy_ml/event.rb +79 -0
  104. data/app/models/easy_ml/feature.rb +437 -0
  105. data/app/models/easy_ml/feature_history.rb +38 -0
  106. data/app/models/easy_ml/model.rb +575 -41
  107. data/app/models/easy_ml/model_file.rb +133 -0
  108. data/app/models/easy_ml/model_file_history.rb +24 -0
  109. data/app/models/easy_ml/model_history.rb +51 -0
  110. data/app/models/easy_ml/models/base_model.rb +58 -0
  111. data/app/models/easy_ml/models/hyperparameters/base.rb +99 -0
  112. data/app/models/easy_ml/models/hyperparameters/xgboost/dart.rb +82 -0
  113. data/app/models/easy_ml/models/hyperparameters/xgboost/gblinear.rb +82 -0
  114. data/app/models/easy_ml/models/hyperparameters/xgboost/gbtree.rb +97 -0
  115. data/app/models/easy_ml/models/hyperparameters/xgboost.rb +71 -0
  116. data/app/models/easy_ml/models/xgboost/evals_callback.rb +138 -0
  117. data/app/models/easy_ml/models/xgboost/progress_callback.rb +39 -0
  118. data/app/models/easy_ml/models/xgboost.rb +544 -5
  119. data/app/models/easy_ml/prediction.rb +44 -0
  120. data/app/models/easy_ml/retraining_job.rb +278 -0
  121. data/app/models/easy_ml/retraining_run.rb +184 -0
  122. data/app/models/easy_ml/settings.rb +37 -0
  123. data/app/models/easy_ml/splitter.rb +90 -0
  124. data/app/models/easy_ml/splitters/base_splitter.rb +28 -0
  125. data/app/models/easy_ml/splitters/date_splitter.rb +91 -0
  126. data/app/models/easy_ml/splitters/predefined_splitter.rb +74 -0
  127. data/app/models/easy_ml/splitters/random_splitter.rb +82 -0
  128. data/app/models/easy_ml/tuner_job.rb +56 -0
  129. data/app/models/easy_ml/tuner_run.rb +31 -0
  130. data/app/models/splitter_history.rb +6 -0
  131. data/app/serializers/easy_ml/column_serializer.rb +27 -0
  132. data/app/serializers/easy_ml/dataset_serializer.rb +73 -0
  133. data/app/serializers/easy_ml/datasource_serializer.rb +64 -0
  134. data/app/serializers/easy_ml/feature_serializer.rb +27 -0
  135. data/app/serializers/easy_ml/model_serializer.rb +90 -0
  136. data/app/serializers/easy_ml/retraining_job_serializer.rb +22 -0
  137. data/app/serializers/easy_ml/retraining_run_serializer.rb +39 -0
  138. data/app/serializers/easy_ml/settings_serializer.rb +9 -0
  139. data/app/views/layouts/easy_ml/application.html.erb +15 -0
  140. data/config/initializers/resque.rb +3 -0
  141. data/config/resque-pool.yml +6 -0
  142. data/config/routes.rb +39 -0
  143. data/config/spring.rb +1 -0
  144. data/config/vite.json +15 -0
  145. data/lib/easy_ml/configuration.rb +64 -0
  146. data/lib/easy_ml/core/evaluators/base_evaluator.rb +53 -0
  147. data/lib/easy_ml/core/evaluators/classification_evaluators.rb +126 -0
  148. data/lib/easy_ml/core/evaluators/regression_evaluators.rb +66 -0
  149. data/lib/easy_ml/core/model_evaluator.rb +161 -89
  150. data/lib/easy_ml/core/tuner/adapters/base_adapter.rb +28 -18
  151. data/lib/easy_ml/core/tuner/adapters/xgboost_adapter.rb +4 -25
  152. data/lib/easy_ml/core/tuner.rb +123 -62
  153. data/lib/easy_ml/core.rb +0 -3
  154. data/lib/easy_ml/core_ext/hash.rb +24 -0
  155. data/lib/easy_ml/core_ext/pathname.rb +11 -5
  156. data/lib/easy_ml/data/date_converter.rb +90 -0
  157. data/lib/easy_ml/data/filter_extensions.rb +31 -0
  158. data/lib/easy_ml/data/polars_column.rb +126 -0
  159. data/lib/easy_ml/data/polars_reader.rb +297 -0
  160. data/lib/easy_ml/data/preprocessor.rb +280 -142
  161. data/lib/easy_ml/data/simple_imputer.rb +255 -0
  162. data/lib/easy_ml/data/splits/file_split.rb +252 -0
  163. data/lib/easy_ml/data/splits/in_memory_split.rb +54 -0
  164. data/lib/easy_ml/data/splits/split.rb +95 -0
  165. data/lib/easy_ml/data/splits.rb +9 -0
  166. data/lib/easy_ml/data/statistics_learner.rb +93 -0
  167. data/lib/easy_ml/data/synced_directory.rb +341 -0
  168. data/lib/easy_ml/data.rb +6 -2
  169. data/lib/easy_ml/engine.rb +105 -6
  170. data/lib/easy_ml/feature_store.rb +227 -0
  171. data/lib/easy_ml/features.rb +61 -0
  172. data/lib/easy_ml/initializers/inflections.rb +17 -3
  173. data/lib/easy_ml/logging.rb +2 -2
  174. data/lib/easy_ml/predict.rb +74 -0
  175. data/lib/easy_ml/railtie/generators/migration/migration_generator.rb +192 -36
  176. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_column_histories.rb.tt +9 -0
  177. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_columns.rb.tt +25 -0
  178. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_dataset_histories.rb.tt +9 -0
  179. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_datasets.rb.tt +31 -0
  180. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_datasource_histories.rb.tt +9 -0
  181. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_datasources.rb.tt +16 -0
  182. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_deploys.rb.tt +24 -0
  183. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_events.rb.tt +20 -0
  184. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_feature_histories.rb.tt +14 -0
  185. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_features.rb.tt +32 -0
  186. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_model_file_histories.rb.tt +9 -0
  187. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_model_files.rb.tt +17 -0
  188. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_model_histories.rb.tt +9 -0
  189. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_models.rb.tt +20 -9
  190. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_predictions.rb.tt +17 -0
  191. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_retraining_jobs.rb.tt +77 -0
  192. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_settings.rb.tt +9 -0
  193. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_splitter_histories.rb.tt +9 -0
  194. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_splitters.rb.tt +15 -0
  195. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_tuner_jobs.rb.tt +40 -0
  196. data/lib/easy_ml/support/est.rb +5 -1
  197. data/lib/easy_ml/support/file_rotate.rb +79 -15
  198. data/lib/easy_ml/support/file_support.rb +9 -0
  199. data/lib/easy_ml/support/local_file.rb +24 -0
  200. data/lib/easy_ml/support/lockable.rb +62 -0
  201. data/lib/easy_ml/support/synced_file.rb +103 -0
  202. data/lib/easy_ml/support/utc.rb +5 -1
  203. data/lib/easy_ml/support.rb +6 -3
  204. data/lib/easy_ml/version.rb +4 -1
  205. data/lib/easy_ml.rb +7 -2
  206. metadata +355 -72
  207. data/app/models/easy_ml/models.rb +0 -5
  208. data/lib/easy_ml/core/model.rb +0 -30
  209. data/lib/easy_ml/core/model_core.rb +0 -181
  210. data/lib/easy_ml/core/models/hyperparameters/base.rb +0 -34
  211. data/lib/easy_ml/core/models/hyperparameters/xgboost.rb +0 -19
  212. data/lib/easy_ml/core/models/xgboost.rb +0 -10
  213. data/lib/easy_ml/core/models/xgboost_core.rb +0 -220
  214. data/lib/easy_ml/core/models.rb +0 -10
  215. data/lib/easy_ml/core/uploaders/model_uploader.rb +0 -24
  216. data/lib/easy_ml/core/uploaders.rb +0 -7
  217. data/lib/easy_ml/data/dataloader.rb +0 -6
  218. data/lib/easy_ml/data/dataset/data/preprocessor/statistics.json +0 -31
  219. data/lib/easy_ml/data/dataset/data/sample_info.json +0 -1
  220. data/lib/easy_ml/data/dataset/dataset/files/sample_info.json +0 -1
  221. data/lib/easy_ml/data/dataset/splits/file_split.rb +0 -140
  222. data/lib/easy_ml/data/dataset/splits/in_memory_split.rb +0 -49
  223. data/lib/easy_ml/data/dataset/splits/split.rb +0 -98
  224. data/lib/easy_ml/data/dataset/splits.rb +0 -11
  225. data/lib/easy_ml/data/dataset/splitters/date_splitter.rb +0 -43
  226. data/lib/easy_ml/data/dataset/splitters.rb +0 -9
  227. data/lib/easy_ml/data/dataset.rb +0 -430
  228. data/lib/easy_ml/data/datasource/datasource_factory.rb +0 -60
  229. data/lib/easy_ml/data/datasource/file_datasource.rb +0 -40
  230. data/lib/easy_ml/data/datasource/merged_datasource.rb +0 -64
  231. data/lib/easy_ml/data/datasource/polars_datasource.rb +0 -41
  232. data/lib/easy_ml/data/datasource/s3_datasource.rb +0 -89
  233. data/lib/easy_ml/data/datasource.rb +0 -33
  234. data/lib/easy_ml/data/preprocessor/preprocessor.rb +0 -205
  235. data/lib/easy_ml/data/preprocessor/simple_imputer.rb +0 -402
  236. data/lib/easy_ml/deployment.rb +0 -5
  237. data/lib/easy_ml/support/synced_directory.rb +0 -134
  238. data/lib/easy_ml/transforms.rb +0 -29
  239. /data/{lib/easy_ml/core → app/models/easy_ml}/models/hyperparameters.rb +0 -0
@@ -0,0 +1,120 @@
1
+ import React, { Fragment } from 'react';
2
+ import { Tab } from '@headlessui/react';
3
+ import { Info } from 'lucide-react';
4
+ import { SearchableSelect } from '../SearchableSelect';
5
+ import { DateSplitter } from './splitters/DateSplitter';
6
+ import { RandomSplitter } from './splitters/RandomSplitter';
7
+ import { PredefinedSplitter } from './splitters/PredefinedSplitter';
8
+ import { StratifiedSplitter } from './splitters/StratifiedSplitter';
9
+ import { KFoldSplitter } from './splitters/KFoldSplitter';
10
+ import { LeavePOutSplitter } from './splitters/LeavePOutSplitter';
11
+ import { SPLITTER_OPTIONS, DEFAULT_CONFIGS } from './splitters/constants';
12
+ import type { SplitterType, SplitConfig, ColumnConfig } from './splitters/types';
13
+
14
+ interface SplitConfiguratorProps {
15
+ type: SplitterType;
16
+ splitter_attributes: SplitConfig;
17
+ columns: ColumnConfig[];
18
+ available_files: string[];
19
+ onChange: (type: SplitterType, attributes: SplitConfig) => void;
20
+ }
21
+
22
+ export function SplitConfigurator({ type, splitter_attributes, columns, available_files, onSplitterChange, onChange }: SplitConfiguratorProps) {
23
+ const dateColumns = columns.filter(col => col.type === 'datetime').map(col => col.name);
24
+
25
+ const handleTypeChange = (newType: SplitterType) => {
26
+ onChange(newType, DEFAULT_CONFIGS[newType]);
27
+ };
28
+
29
+ const handleSplitterChange = (type: SplitterType, newAttributes: SplitConfig) => {
30
+ onChange(type, newAttributes);
31
+ };
32
+
33
+ const renderSplitter = () => {
34
+ switch (type) {
35
+ case 'date':
36
+ return (
37
+ <DateSplitter
38
+ attributes={splitter_attributes}
39
+ columns={dateColumns}
40
+ onChange={(attrs) => handleSplitterChange(type, attrs)}
41
+ />
42
+ );
43
+ case 'random':
44
+ return (
45
+ <RandomSplitter
46
+ attributes={splitter_attributes}
47
+ onChange={(attrs) => handleSplitterChange(type, attrs)}
48
+ />
49
+ );
50
+ case 'predefined':
51
+ return (
52
+ <PredefinedSplitter
53
+ attributes={splitter_attributes}
54
+ available_files={available_files}
55
+ onChange={(attrs) => handleSplitterChange(type, attrs)}
56
+ />
57
+ );
58
+ case 'stratified':
59
+ return (
60
+ <StratifiedSplitter
61
+ attributes={splitter_attributes}
62
+ columns={columns}
63
+ onChange={(attrs) => handleSplitterChange(type, attrs)}
64
+ />
65
+ );
66
+ case 'stratified_kfold':
67
+ case 'group_kfold':
68
+ return (
69
+ <KFoldSplitter
70
+ attributes={splitter_attributes}
71
+ columns={columns}
72
+ onChange={(attrs) => handleSplitterChange(type, attrs)}
73
+ />
74
+ );
75
+ case 'group_shuffle':
76
+ return (
77
+ <StratifiedSplitter
78
+ attributes={splitter_attributes}
79
+ columns={columns}
80
+ onChange={(attrs) => handleSplitterChange(type, {
81
+ groupColumn: attrs.targetColumn,
82
+ testSize: attrs.testSize,
83
+ validSize: attrs.validSize
84
+ })}
85
+ />
86
+ );
87
+ case 'leave_p_out':
88
+ return (
89
+ <LeavePOutSplitter
90
+ attributes={splitter_attributes}
91
+ onChange={(attrs) => handleSplitterChange(type, attrs)}
92
+ />
93
+ );
94
+ default:
95
+ return null;
96
+ }
97
+ };
98
+
99
+ return (
100
+ <div className="space-y-6">
101
+ <div>
102
+ <label className="block text-sm font-medium text-gray-700 mb-1">
103
+ Split Type
104
+ </label>
105
+ <SearchableSelect
106
+ options={SPLITTER_OPTIONS}
107
+ value={type}
108
+ onChange={(value) => handleTypeChange(value as SplitterType)}
109
+ />
110
+ </div>
111
+
112
+ <div className="bg-gray-50 rounded-lg p-4">
113
+ {renderSplitter()}
114
+ </div>
115
+ </div>
116
+ );
117
+ }
118
+
119
+ export type { SplitterType };
120
+ export type { ColumnConfig };
@@ -0,0 +1,58 @@
1
+ import React from 'react';
2
+ import { SearchableSelect } from '../../SearchableSelect';
3
+ import type { DateSplitConfig } from '../types';
4
+
5
+ interface DateSplitterProps {
6
+ attributes: DateSplitConfig;
7
+ columns: string[];
8
+ onChange: (attributes: DateSplitConfig) => void;
9
+ }
10
+
11
+ export function DateSplitter({ attributes, columns, onChange }: DateSplitterProps) {
12
+ return (
13
+ <div className="space-y-4">
14
+ <div>
15
+ <label htmlFor="date_column" className="block text-sm font-medium text-gray-700">
16
+ Date Column
17
+ </label>
18
+ <SearchableSelect
19
+ id="date_column"
20
+ value={attributes.date_column}
21
+ options={columns.map(col => ({ value: col, label: col }))}
22
+ onChange={(value) => onChange({ ...attributes, date_column: value })}
23
+ placeholder="Select date column"
24
+ />
25
+ </div>
26
+
27
+ <div className="grid grid-cols-2 gap-4">
28
+ <div>
29
+ <label htmlFor="months_test" className="block text-sm font-medium text-gray-700">
30
+ Test Months
31
+ </label>
32
+ <input
33
+ type="number"
34
+ id="months_test"
35
+ value={attributes.months_test}
36
+ onChange={(e) => onChange({ ...attributes, months_test: parseInt(e.target.value) })}
37
+ className="mt-1 p-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500 sm:text-sm"
38
+ min="1"
39
+ />
40
+ </div>
41
+
42
+ <div>
43
+ <label htmlFor="months_valid" className="block text-sm font-medium text-gray-700">
44
+ Validation Months
45
+ </label>
46
+ <input
47
+ type="number"
48
+ id="months_valid"
49
+ value={attributes.months_valid}
50
+ onChange={(e) => onChange({ ...attributes, months_valid: parseInt(e.target.value) })}
51
+ className="mt-1 p-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500 sm:text-sm"
52
+ min="1"
53
+ />
54
+ </div>
55
+ </div>
56
+ </div>
57
+ );
58
+ }
@@ -0,0 +1,68 @@
1
+ import React from 'react';
2
+ import { SearchableSelect } from '../../SearchableSelect';
3
+
4
+ interface KFoldSplitterProps {
5
+ type: 'kfold' | 'stratified' | 'group';
6
+ targetColumn?: string;
7
+ groupColumn?: string;
8
+ nSplits: number;
9
+ columns: Array<{ name: string; type: string }>;
10
+ onChange: (config: {
11
+ targetColumn?: string;
12
+ groupColumn?: string;
13
+ nSplits: number;
14
+ }) => void;
15
+ }
16
+
17
+ export function KFoldSplitter({
18
+ type,
19
+ targetColumn,
20
+ groupColumn,
21
+ nSplits,
22
+ columns,
23
+ onChange
24
+ }: KFoldSplitterProps) {
25
+ return (
26
+ <div className="space-y-4">
27
+ {(type === 'stratified' || type === 'group') && (
28
+ <div>
29
+ <label className="block text-sm font-medium text-gray-700 mb-1">
30
+ {type === 'stratified' ? 'Target Column' : 'Group Column'}
31
+ </label>
32
+ <SearchableSelect
33
+ options={columns.map(col => ({
34
+ value: col.name,
35
+ label: col.name,
36
+ description: `Type: ${col.type}`
37
+ }))}
38
+ value={type === 'stratified' ? targetColumn : groupColumn}
39
+ onChange={(value) => onChange({
40
+ targetColumn: type === 'stratified' ? value as string : targetColumn,
41
+ groupColumn: type === 'group' ? value as string : groupColumn,
42
+ nSplits
43
+ })}
44
+ placeholder={`Select ${type === 'stratified' ? 'target' : 'group'} column...`}
45
+ />
46
+ </div>
47
+ )}
48
+
49
+ <div>
50
+ <label className="block text-sm font-medium text-gray-700 mb-1">
51
+ Number of Splits
52
+ </label>
53
+ <input
54
+ type="number"
55
+ min={2}
56
+ max={10}
57
+ value={nSplits}
58
+ onChange={(e) => onChange({
59
+ targetColumn,
60
+ groupColumn,
61
+ nSplits: parseInt(e.target.value) || 2
62
+ })}
63
+ className="mt-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500"
64
+ />
65
+ </div>
66
+ </div>
67
+ );
68
+ }
@@ -0,0 +1,29 @@
1
+ import React from 'react';
2
+
3
+ interface LeavePOutSplitterProps {
4
+ p: number;
5
+ onChange: (p: number) => void;
6
+ }
7
+
8
+ export function LeavePOutSplitter({ p, onChange }: LeavePOutSplitterProps) {
9
+ return (
10
+ <div className="space-y-4">
11
+ <div>
12
+ <label className="block text-sm font-medium text-gray-700 mb-1">
13
+ Number of samples to leave out (P)
14
+ </label>
15
+ <input
16
+ type="number"
17
+ min={1}
18
+ max={100}
19
+ value={p}
20
+ onChange={(e) => onChange(parseInt(e.target.value) || 1)}
21
+ className="mt-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500"
22
+ />
23
+ <p className="mt-1 text-sm text-gray-500">
24
+ Each training set will have P samples removed, which form the test set.
25
+ </p>
26
+ </div>
27
+ </div>
28
+ );
29
+ }
@@ -0,0 +1,146 @@
1
+ import React from 'react';
2
+ import { FileCheck } from 'lucide-react';
3
+ import { SearchableSelect } from '../../SearchableSelect';
4
+
5
+ interface FileConfig {
6
+ path: string;
7
+ type: 'train' | 'test' | 'valid';
8
+ }
9
+
10
+ interface PredefinedSplitConfig {
11
+ splitter_type: 'predefined';
12
+ train_files: string[];
13
+ test_files: string[];
14
+ valid_files: string[];
15
+ }
16
+
17
+ interface PredefinedSplitterProps {
18
+ attributes: PredefinedSplitConfig;
19
+ available_files: string[];
20
+ onChange: (attributes: PredefinedSplitConfig) => void;
21
+ }
22
+
23
+ export function PredefinedSplitter({ attributes, available_files, onChange }: PredefinedSplitterProps) {
24
+ const [selectedFiles, setSelectedFiles] = React.useState<FileConfig[]>([]);
25
+
26
+ // Convert attributes to FileConfig array for UI
27
+ React.useEffect(() => {
28
+ const files: FileConfig[] = [
29
+ ...attributes.train_files.map(path => ({ path, type: 'train' as const })),
30
+ ...attributes.test_files.map(path => ({ path, type: 'test' as const })),
31
+ ...attributes.valid_files.map(path => ({ path, type: 'valid' as const }))
32
+ ];
33
+ setSelectedFiles(files);
34
+ }, [attributes.train_files, attributes.test_files, attributes.valid_files]);
35
+
36
+ const addFile = (path: string) => {
37
+ const newFiles = [...selectedFiles, { path, type: 'train' }];
38
+ setSelectedFiles(newFiles);
39
+ updateAttributes(newFiles);
40
+ };
41
+
42
+ const updateFileType = (index: number, type: 'train' | 'test' | 'valid') => {
43
+ const newFiles = selectedFiles.map((file, i) =>
44
+ i === index ? { ...file, type } : file
45
+ );
46
+ setSelectedFiles(newFiles);
47
+ updateAttributes(newFiles);
48
+ };
49
+
50
+ const removeFile = (index: number) => {
51
+ const newFiles = selectedFiles.filter((_, i) => i !== index);
52
+ setSelectedFiles(newFiles);
53
+ updateAttributes(newFiles);
54
+ };
55
+
56
+ const updateAttributes = (files: FileConfig[]) => {
57
+ onChange({
58
+ splitter_type: 'predefined',
59
+ train_files: files.filter(f => f.type === 'train').map(f => f.path),
60
+ test_files: files.filter(f => f.type === 'test').map(f => f.path),
61
+ valid_files: files.filter(f => f.type === 'valid').map(f => f.path)
62
+ });
63
+ };
64
+
65
+ const unusedFiles = available_files.filter(
66
+ path => !selectedFiles.find(f => f.path === path)
67
+ );
68
+
69
+ return (
70
+ <div className="space-y-4">
71
+ {/* File Selection */}
72
+ <div>
73
+ <label className="block text-sm font-medium text-gray-700">Add File</label>
74
+ <SearchableSelect
75
+ options={unusedFiles.map(path => ({
76
+ value: path,
77
+ label: path.split('/').pop() || path,
78
+ description: path
79
+ }))}
80
+ value={null}
81
+ onChange={(value) => addFile(value as string)}
82
+ placeholder="Select a file..."
83
+ />
84
+ </div>
85
+
86
+ {/* Selected files */}
87
+ {selectedFiles.length > 0 ? (
88
+ <div className="space-y-2">
89
+ {selectedFiles.map((file, index) => (
90
+ <div
91
+ key={file.path}
92
+ className="flex items-center justify-between p-3 bg-gray-50 rounded-lg"
93
+ >
94
+ <div className="flex items-center gap-2 min-w-0">
95
+ <FileCheck className="w-4 h-4 text-gray-400 flex-shrink-0" />
96
+ <span className="text-sm text-gray-900 truncate">
97
+ {file.path.split('/').pop()}
98
+ </span>
99
+ </div>
100
+ <div className="flex items-center gap-2">
101
+ <select
102
+ value={file.type}
103
+ onChange={(e) => updateFileType(index, e.target.value as 'train' | 'test' | 'valid')}
104
+ className="text-sm rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500"
105
+ >
106
+ <option value="train">Training Set</option>
107
+ <option value="test">Test Set</option>
108
+ <option value="valid">Validation Set</option>
109
+ </select>
110
+ <button
111
+ onClick={() => removeFile(index)}
112
+ className="text-sm text-red-600 hover:text-red-700"
113
+ >
114
+ Remove
115
+ </button>
116
+ </div>
117
+ </div>
118
+ ))}
119
+ </div>
120
+ ) : (
121
+ <div className="text-center py-4 bg-gray-50 rounded-lg border-2 border-dashed border-gray-200">
122
+ <p className="text-sm text-gray-500">
123
+ Select files to create your train/test/validation splits
124
+ </p>
125
+ </div>
126
+ )}
127
+
128
+ {/* Validation messages */}
129
+ {selectedFiles.length > 0 && (
130
+ <div className="space-y-1 text-sm">
131
+ {!selectedFiles.some(f => f.type === 'train') && (
132
+ <p className="text-yellow-600">
133
+ • You need at least one training set file
134
+ </p>
135
+ )}
136
+ {!selectedFiles.some(f => f.type === 'test') && (
137
+ <p className="text-yellow-600">
138
+ • You need at least one test set file
139
+ </p>
140
+ )}
141
+ </div>
142
+ )}
143
+
144
+ </div>
145
+ );
146
+ }
@@ -0,0 +1,85 @@
1
+ import React from 'react';
2
+ import { Info } from 'lucide-react';
3
+ import type { RandomSplitConfig } from '../types';
4
+
5
+ interface RandomSplitterProps {
6
+ attributes: RandomSplitConfig;
7
+ onChange: (attributes: RandomSplitConfig) => void;
8
+ }
9
+
10
+ export function RandomSplitter({ attributes, onChange }: RandomSplitterProps) {
11
+ return (
12
+ <div className="space-y-4">
13
+ <div className="flex items-start gap-2">
14
+ <Info className="w-5 h-5 text-blue-500 mt-0.5" />
15
+ <p className="text-sm text-blue-700">
16
+ Random splitting will automatically split your data into 60% training, 20% test, and 20% validation sets.
17
+ </p>
18
+ </div>
19
+
20
+ <div className="grid grid-cols-3 gap-4">
21
+ <div>
22
+ <label htmlFor="train_ratio" className="block text-sm font-medium text-gray-700">
23
+ Training Ratio
24
+ </label>
25
+ <input
26
+ type="number"
27
+ id="train_ratio"
28
+ value={attributes.train_ratio ?? 0.6}
29
+ onChange={(e) => onChange({ ...attributes, train_ratio: parseFloat(e.target.value) })}
30
+ className="mt-1 p-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500 sm:text-sm"
31
+ min="0"
32
+ max="1"
33
+ step="0.1"
34
+ />
35
+ </div>
36
+
37
+ <div>
38
+ <label htmlFor="test_ratio" className="block text-sm font-medium text-gray-700">
39
+ Test Ratio
40
+ </label>
41
+ <input
42
+ type="number"
43
+ id="test_ratio"
44
+ value={attributes.test_ratio ?? 0.2}
45
+ onChange={(e) => onChange({ ...attributes, test_ratio: parseFloat(e.target.value) })}
46
+ className="mt-1 p-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500 sm:text-sm"
47
+ min="0"
48
+ max="1"
49
+ step="0.1"
50
+ />
51
+ </div>
52
+
53
+ <div>
54
+ <label htmlFor="valid_ratio" className="block text-sm font-medium text-gray-700">
55
+ Validation Ratio
56
+ </label>
57
+ <input
58
+ type="number"
59
+ id="valid_ratio"
60
+ value={attributes.valid_ratio ?? 0.2}
61
+ onChange={(e) => onChange({ ...attributes, valid_ratio: parseFloat(e.target.value) })}
62
+ className="mt-1 p-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500 sm:text-sm"
63
+ min="0"
64
+ max="1"
65
+ step="0.1"
66
+ />
67
+ </div>
68
+ </div>
69
+
70
+ <div>
71
+ <label htmlFor="seed" className="block text-sm font-medium text-gray-700">
72
+ Random Seed (optional)
73
+ </label>
74
+ <input
75
+ type="number"
76
+ id="seed"
77
+ value={attributes.seed ?? ''}
78
+ onChange={(e) => onChange({ ...attributes, seed: e.target.value ? parseInt(e.target.value) : undefined })}
79
+ className="mt-1 p-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500 sm:text-sm"
80
+ placeholder="Enter a random seed"
81
+ />
82
+ </div>
83
+ </div>
84
+ );
85
+ }
@@ -0,0 +1,79 @@
1
+ import React from 'react';
2
+ import { SearchableSelect } from '../../SearchableSelect';
3
+
4
+ interface StratifiedSplitterProps {
5
+ targetColumn: string;
6
+ testSize: number;
7
+ validSize: number;
8
+ columns: Array<{ name: string; type: string }>;
9
+ onChange: (config: { targetColumn: string; testSize: number; validSize: number }) => void;
10
+ }
11
+
12
+ export function StratifiedSplitter({
13
+ targetColumn,
14
+ testSize,
15
+ validSize,
16
+ columns,
17
+ onChange
18
+ }: StratifiedSplitterProps) {
19
+ return (
20
+ <div className="space-y-4">
21
+ <div>
22
+ <label className="block text-sm font-medium text-gray-700 mb-1">
23
+ Target Column
24
+ </label>
25
+ <SearchableSelect
26
+ options={columns.map(col => ({
27
+ value: col.name,
28
+ label: col.name,
29
+ description: `Type: ${col.type}`
30
+ }))}
31
+ value={targetColumn}
32
+ onChange={(value) => onChange({
33
+ targetColumn: value as string,
34
+ testSize,
35
+ validSize
36
+ })}
37
+ placeholder="Select target column..."
38
+ />
39
+ </div>
40
+
41
+ <div className="grid grid-cols-2 gap-4">
42
+ <div>
43
+ <label className="block text-sm font-medium text-gray-700 mb-1">
44
+ Test Set Size (%)
45
+ </label>
46
+ <input
47
+ type="number"
48
+ min={1}
49
+ max={40}
50
+ value={testSize}
51
+ onChange={(e) => onChange({
52
+ targetColumn,
53
+ testSize: parseInt(e.target.value) || 0,
54
+ validSize
55
+ })}
56
+ className="mt-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500"
57
+ />
58
+ </div>
59
+ <div>
60
+ <label className="block text-sm font-medium text-gray-700 mb-1">
61
+ Validation Set Size (%)
62
+ </label>
63
+ <input
64
+ type="number"
65
+ min={1}
66
+ max={40}
67
+ value={validSize}
68
+ onChange={(e) => onChange({
69
+ targetColumn,
70
+ testSize,
71
+ validSize: parseInt(e.target.value) || 0
72
+ })}
73
+ className="mt-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500"
74
+ />
75
+ </div>
76
+ </div>
77
+ </div>
78
+ );
79
+ }
@@ -0,0 +1,77 @@
1
+ export const SPLITTER_OPTIONS = [
2
+ {
3
+ value: 'date',
4
+ label: 'Date Split',
5
+ description: 'Split data based on a date/time column'
6
+ },
7
+ {
8
+ value: 'random',
9
+ label: 'Random Split',
10
+ description: 'Randomly split data into train/test/validation sets (70/20/10)'
11
+ },
12
+ {
13
+ value: 'predefined',
14
+ label: 'Predefined Splits',
15
+ description: 'Use separate files for train/test/validation sets'
16
+ },
17
+ // {
18
+ // value: 'stratified',
19
+ // label: 'Stratified Shuffle Split',
20
+ // description: 'Maintain the percentage of samples for each class'
21
+ // },
22
+ // {
23
+ // value: 'stratified_kfold',
24
+ // label: 'Stratified K-Fold',
25
+ // description: 'K-fold with preserved class distribution'
26
+ // },
27
+ // {
28
+ // value: 'group_kfold',
29
+ // label: 'Group K-Fold',
30
+ // description: 'K-fold ensuring group integrity'
31
+ // },
32
+ // {
33
+ // value: 'group_shuffle',
34
+ // label: 'Group Shuffle Split',
35
+ // description: 'Random split respecting group boundaries'
36
+ // },
37
+ // {
38
+ // value: 'leave_p_out',
39
+ // label: 'Leave P Out',
40
+ // description: 'Use P samples for testing in each fold'
41
+ // }
42
+ ] as const;
43
+
44
+ export const DEFAULT_CONFIGS = {
45
+ date: {
46
+ date_column: '',
47
+ months_test: 2,
48
+ months_valid: 1
49
+ },
50
+ random: {},
51
+ predefined: {
52
+ train_files: [],
53
+ test_files: [],
54
+ valid_files: []
55
+ },
56
+ stratified: {
57
+ targetColumn: '',
58
+ testSize: 20,
59
+ validSize: 10
60
+ },
61
+ stratified_kfold: {
62
+ targetColumn: '',
63
+ nSplits: 5
64
+ },
65
+ group_kfold: {
66
+ groupColumn: '',
67
+ nSplits: 5
68
+ },
69
+ group_shuffle: {
70
+ groupColumn: '',
71
+ testSize: 20,
72
+ validSize: 10
73
+ },
74
+ leave_p_out: {
75
+ p: 1
76
+ }
77
+ } as const;