easy_ml 0.1.3 → 0.2.0.pre.rc1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +234 -26
- data/Rakefile +45 -0
- data/app/controllers/easy_ml/application_controller.rb +67 -0
- data/app/controllers/easy_ml/columns_controller.rb +38 -0
- data/app/controllers/easy_ml/datasets_controller.rb +156 -0
- data/app/controllers/easy_ml/datasources_controller.rb +88 -0
- data/app/controllers/easy_ml/deploys_controller.rb +20 -0
- data/app/controllers/easy_ml/models_controller.rb +151 -0
- data/app/controllers/easy_ml/retraining_runs_controller.rb +19 -0
- data/app/controllers/easy_ml/settings_controller.rb +59 -0
- data/app/frontend/components/AlertProvider.tsx +108 -0
- data/app/frontend/components/DatasetPreview.tsx +161 -0
- data/app/frontend/components/EmptyState.tsx +28 -0
- data/app/frontend/components/ModelCard.tsx +255 -0
- data/app/frontend/components/ModelDetails.tsx +334 -0
- data/app/frontend/components/ModelForm.tsx +384 -0
- data/app/frontend/components/Navigation.tsx +300 -0
- data/app/frontend/components/Pagination.tsx +72 -0
- data/app/frontend/components/Popover.tsx +55 -0
- data/app/frontend/components/PredictionStream.tsx +105 -0
- data/app/frontend/components/ScheduleModal.tsx +726 -0
- data/app/frontend/components/SearchInput.tsx +23 -0
- data/app/frontend/components/SearchableSelect.tsx +132 -0
- data/app/frontend/components/dataset/AutosaveIndicator.tsx +39 -0
- data/app/frontend/components/dataset/ColumnConfigModal.tsx +431 -0
- data/app/frontend/components/dataset/ColumnFilters.tsx +256 -0
- data/app/frontend/components/dataset/ColumnList.tsx +101 -0
- data/app/frontend/components/dataset/FeatureConfigPopover.tsx +57 -0
- data/app/frontend/components/dataset/FeaturePicker.tsx +205 -0
- data/app/frontend/components/dataset/PreprocessingConfig.tsx +704 -0
- data/app/frontend/components/dataset/SplitConfigurator.tsx +120 -0
- data/app/frontend/components/dataset/splitters/DateSplitter.tsx +58 -0
- data/app/frontend/components/dataset/splitters/KFoldSplitter.tsx +68 -0
- data/app/frontend/components/dataset/splitters/LeavePOutSplitter.tsx +29 -0
- data/app/frontend/components/dataset/splitters/PredefinedSplitter.tsx +146 -0
- data/app/frontend/components/dataset/splitters/RandomSplitter.tsx +85 -0
- data/app/frontend/components/dataset/splitters/StratifiedSplitter.tsx +79 -0
- data/app/frontend/components/dataset/splitters/constants.ts +77 -0
- data/app/frontend/components/dataset/splitters/types.ts +168 -0
- data/app/frontend/components/dataset/splitters/utils.ts +53 -0
- data/app/frontend/components/features/CodeEditor.tsx +46 -0
- data/app/frontend/components/features/DataPreview.tsx +150 -0
- data/app/frontend/components/features/FeatureCard.tsx +88 -0
- data/app/frontend/components/features/FeatureForm.tsx +235 -0
- data/app/frontend/components/features/FeatureGroupCard.tsx +54 -0
- data/app/frontend/components/settings/PluginSettings.tsx +81 -0
- data/app/frontend/components/ui/badge.tsx +44 -0
- data/app/frontend/components/ui/collapsible.tsx +9 -0
- data/app/frontend/components/ui/scroll-area.tsx +46 -0
- data/app/frontend/components/ui/separator.tsx +29 -0
- data/app/frontend/entrypoints/App.tsx +40 -0
- data/app/frontend/entrypoints/Application.tsx +24 -0
- data/app/frontend/hooks/useAutosave.ts +61 -0
- data/app/frontend/layouts/Layout.tsx +38 -0
- data/app/frontend/lib/utils.ts +6 -0
- data/app/frontend/mockData.ts +272 -0
- data/app/frontend/pages/DatasetDetailsPage.tsx +103 -0
- data/app/frontend/pages/DatasetsPage.tsx +261 -0
- data/app/frontend/pages/DatasourceFormPage.tsx +147 -0
- data/app/frontend/pages/DatasourcesPage.tsx +261 -0
- data/app/frontend/pages/EditModelPage.tsx +45 -0
- data/app/frontend/pages/EditTransformationPage.tsx +56 -0
- data/app/frontend/pages/ModelsPage.tsx +115 -0
- data/app/frontend/pages/NewDatasetPage.tsx +366 -0
- data/app/frontend/pages/NewModelPage.tsx +45 -0
- data/app/frontend/pages/NewTransformationPage.tsx +43 -0
- data/app/frontend/pages/SettingsPage.tsx +272 -0
- data/app/frontend/pages/ShowModelPage.tsx +30 -0
- data/app/frontend/pages/TransformationsPage.tsx +95 -0
- data/app/frontend/styles/application.css +100 -0
- data/app/frontend/types/dataset.ts +146 -0
- data/app/frontend/types/datasource.ts +33 -0
- data/app/frontend/types/preprocessing.ts +1 -0
- data/app/frontend/types.ts +113 -0
- data/app/helpers/easy_ml/application_helper.rb +10 -0
- data/app/jobs/easy_ml/application_job.rb +21 -0
- data/app/jobs/easy_ml/batch_job.rb +46 -0
- data/app/jobs/easy_ml/compute_feature_job.rb +19 -0
- data/app/jobs/easy_ml/deploy_job.rb +13 -0
- data/app/jobs/easy_ml/finalize_feature_job.rb +15 -0
- data/app/jobs/easy_ml/refresh_dataset_job.rb +32 -0
- data/app/jobs/easy_ml/schedule_retraining_job.rb +11 -0
- data/app/jobs/easy_ml/sync_datasource_job.rb +17 -0
- data/app/jobs/easy_ml/training_job.rb +62 -0
- data/app/models/easy_ml/adapters/base_adapter.rb +45 -0
- data/app/models/easy_ml/adapters/polars_adapter.rb +77 -0
- data/app/models/easy_ml/cleaner.rb +82 -0
- data/app/models/easy_ml/column.rb +124 -0
- data/app/models/easy_ml/column_history.rb +30 -0
- data/app/models/easy_ml/column_list.rb +122 -0
- data/app/models/easy_ml/concerns/configurable.rb +61 -0
- data/app/models/easy_ml/concerns/versionable.rb +19 -0
- data/app/models/easy_ml/dataset.rb +767 -0
- data/app/models/easy_ml/dataset_history.rb +56 -0
- data/app/models/easy_ml/datasource.rb +182 -0
- data/app/models/easy_ml/datasource_history.rb +24 -0
- data/app/models/easy_ml/datasources/base_datasource.rb +54 -0
- data/app/models/easy_ml/datasources/file_datasource.rb +58 -0
- data/app/models/easy_ml/datasources/polars_datasource.rb +89 -0
- data/app/models/easy_ml/datasources/s3_datasource.rb +97 -0
- data/app/models/easy_ml/deploy.rb +114 -0
- data/app/models/easy_ml/event.rb +79 -0
- data/app/models/easy_ml/feature.rb +437 -0
- data/app/models/easy_ml/feature_history.rb +38 -0
- data/app/models/easy_ml/model.rb +575 -41
- data/app/models/easy_ml/model_file.rb +133 -0
- data/app/models/easy_ml/model_file_history.rb +24 -0
- data/app/models/easy_ml/model_history.rb +51 -0
- data/app/models/easy_ml/models/base_model.rb +58 -0
- data/app/models/easy_ml/models/hyperparameters/base.rb +99 -0
- data/app/models/easy_ml/models/hyperparameters/xgboost/dart.rb +82 -0
- data/app/models/easy_ml/models/hyperparameters/xgboost/gblinear.rb +82 -0
- data/app/models/easy_ml/models/hyperparameters/xgboost/gbtree.rb +97 -0
- data/app/models/easy_ml/models/hyperparameters/xgboost.rb +71 -0
- data/app/models/easy_ml/models/xgboost/evals_callback.rb +138 -0
- data/app/models/easy_ml/models/xgboost/progress_callback.rb +39 -0
- data/app/models/easy_ml/models/xgboost.rb +544 -4
- data/app/models/easy_ml/prediction.rb +44 -0
- data/app/models/easy_ml/retraining_job.rb +278 -0
- data/app/models/easy_ml/retraining_run.rb +184 -0
- data/app/models/easy_ml/settings.rb +37 -0
- data/app/models/easy_ml/splitter.rb +90 -0
- data/app/models/easy_ml/splitters/base_splitter.rb +28 -0
- data/app/models/easy_ml/splitters/date_splitter.rb +91 -0
- data/app/models/easy_ml/splitters/predefined_splitter.rb +74 -0
- data/app/models/easy_ml/splitters/random_splitter.rb +82 -0
- data/app/models/easy_ml/tuner_job.rb +56 -0
- data/app/models/easy_ml/tuner_run.rb +31 -0
- data/app/models/splitter_history.rb +6 -0
- data/app/serializers/easy_ml/column_serializer.rb +27 -0
- data/app/serializers/easy_ml/dataset_serializer.rb +73 -0
- data/app/serializers/easy_ml/datasource_serializer.rb +64 -0
- data/app/serializers/easy_ml/feature_serializer.rb +27 -0
- data/app/serializers/easy_ml/model_serializer.rb +90 -0
- data/app/serializers/easy_ml/retraining_job_serializer.rb +22 -0
- data/app/serializers/easy_ml/retraining_run_serializer.rb +39 -0
- data/app/serializers/easy_ml/settings_serializer.rb +9 -0
- data/app/views/layouts/easy_ml/application.html.erb +15 -0
- data/config/initializers/resque.rb +3 -0
- data/config/resque-pool.yml +6 -0
- data/config/routes.rb +39 -0
- data/config/spring.rb +1 -0
- data/config/vite.json +15 -0
- data/lib/easy_ml/configuration.rb +64 -0
- data/lib/easy_ml/core/evaluators/base_evaluator.rb +53 -0
- data/lib/easy_ml/core/evaluators/classification_evaluators.rb +126 -0
- data/lib/easy_ml/core/evaluators/regression_evaluators.rb +66 -0
- data/lib/easy_ml/core/model_evaluator.rb +161 -89
- data/lib/easy_ml/core/tuner/adapters/base_adapter.rb +28 -18
- data/lib/easy_ml/core/tuner/adapters/xgboost_adapter.rb +4 -25
- data/lib/easy_ml/core/tuner.rb +123 -62
- data/lib/easy_ml/core.rb +0 -3
- data/lib/easy_ml/core_ext/hash.rb +24 -0
- data/lib/easy_ml/core_ext/pathname.rb +11 -5
- data/lib/easy_ml/data/date_converter.rb +90 -0
- data/lib/easy_ml/data/filter_extensions.rb +31 -0
- data/lib/easy_ml/data/polars_column.rb +126 -0
- data/lib/easy_ml/data/polars_reader.rb +297 -0
- data/lib/easy_ml/data/preprocessor.rb +280 -142
- data/lib/easy_ml/data/simple_imputer.rb +255 -0
- data/lib/easy_ml/data/splits/file_split.rb +252 -0
- data/lib/easy_ml/data/splits/in_memory_split.rb +54 -0
- data/lib/easy_ml/data/splits/split.rb +95 -0
- data/lib/easy_ml/data/splits.rb +9 -0
- data/lib/easy_ml/data/statistics_learner.rb +93 -0
- data/lib/easy_ml/data/synced_directory.rb +341 -0
- data/lib/easy_ml/data.rb +6 -2
- data/lib/easy_ml/engine.rb +105 -6
- data/lib/easy_ml/feature_store.rb +227 -0
- data/lib/easy_ml/features.rb +61 -0
- data/lib/easy_ml/initializers/inflections.rb +17 -3
- data/lib/easy_ml/logging.rb +2 -2
- data/lib/easy_ml/predict.rb +74 -0
- data/lib/easy_ml/railtie/generators/migration/migration_generator.rb +192 -36
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_column_histories.rb.tt +9 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_columns.rb.tt +25 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_dataset_histories.rb.tt +9 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_datasets.rb.tt +31 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_datasource_histories.rb.tt +9 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_datasources.rb.tt +16 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_deploys.rb.tt +24 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_events.rb.tt +20 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_feature_histories.rb.tt +14 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_features.rb.tt +32 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_model_file_histories.rb.tt +9 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_model_files.rb.tt +17 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_model_histories.rb.tt +9 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_models.rb.tt +20 -9
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_predictions.rb.tt +17 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_retraining_jobs.rb.tt +77 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_settings.rb.tt +9 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_splitter_histories.rb.tt +9 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_splitters.rb.tt +15 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_tuner_jobs.rb.tt +40 -0
- data/lib/easy_ml/support/est.rb +5 -1
- data/lib/easy_ml/support/file_rotate.rb +79 -15
- data/lib/easy_ml/support/file_support.rb +9 -0
- data/lib/easy_ml/support/local_file.rb +24 -0
- data/lib/easy_ml/support/lockable.rb +62 -0
- data/lib/easy_ml/support/synced_file.rb +103 -0
- data/lib/easy_ml/support/utc.rb +5 -1
- data/lib/easy_ml/support.rb +6 -3
- data/lib/easy_ml/version.rb +4 -1
- data/lib/easy_ml.rb +7 -2
- metadata +355 -72
- data/app/models/easy_ml/models.rb +0 -5
- data/lib/easy_ml/core/model.rb +0 -30
- data/lib/easy_ml/core/model_core.rb +0 -181
- data/lib/easy_ml/core/models/hyperparameters/base.rb +0 -34
- data/lib/easy_ml/core/models/hyperparameters/xgboost.rb +0 -19
- data/lib/easy_ml/core/models/xgboost.rb +0 -10
- data/lib/easy_ml/core/models/xgboost_core.rb +0 -220
- data/lib/easy_ml/core/models.rb +0 -10
- data/lib/easy_ml/core/uploaders/model_uploader.rb +0 -24
- data/lib/easy_ml/core/uploaders.rb +0 -7
- data/lib/easy_ml/data/dataloader.rb +0 -6
- data/lib/easy_ml/data/dataset/data/preprocessor/statistics.json +0 -31
- data/lib/easy_ml/data/dataset/data/sample_info.json +0 -1
- data/lib/easy_ml/data/dataset/dataset/files/sample_info.json +0 -1
- data/lib/easy_ml/data/dataset/splits/file_split.rb +0 -140
- data/lib/easy_ml/data/dataset/splits/in_memory_split.rb +0 -49
- data/lib/easy_ml/data/dataset/splits/split.rb +0 -98
- data/lib/easy_ml/data/dataset/splits.rb +0 -11
- data/lib/easy_ml/data/dataset/splitters/date_splitter.rb +0 -43
- data/lib/easy_ml/data/dataset/splitters.rb +0 -9
- data/lib/easy_ml/data/dataset.rb +0 -430
- data/lib/easy_ml/data/datasource/datasource_factory.rb +0 -60
- data/lib/easy_ml/data/datasource/file_datasource.rb +0 -40
- data/lib/easy_ml/data/datasource/merged_datasource.rb +0 -64
- data/lib/easy_ml/data/datasource/polars_datasource.rb +0 -41
- data/lib/easy_ml/data/datasource/s3_datasource.rb +0 -89
- data/lib/easy_ml/data/datasource.rb +0 -33
- data/lib/easy_ml/data/preprocessor/preprocessor.rb +0 -205
- data/lib/easy_ml/data/preprocessor/simple_imputer.rb +0 -402
- data/lib/easy_ml/deployment.rb +0 -5
- data/lib/easy_ml/support/synced_directory.rb +0 -134
- data/lib/easy_ml/transforms.rb +0 -29
- /data/{lib/easy_ml/core → app/models/easy_ml}/models/hyperparameters.rb +0 -0
@@ -0,0 +1,120 @@
|
|
1
|
+
import React, { Fragment } from 'react';
|
2
|
+
import { Tab } from '@headlessui/react';
|
3
|
+
import { Info } from 'lucide-react';
|
4
|
+
import { SearchableSelect } from '../SearchableSelect';
|
5
|
+
import { DateSplitter } from './splitters/DateSplitter';
|
6
|
+
import { RandomSplitter } from './splitters/RandomSplitter';
|
7
|
+
import { PredefinedSplitter } from './splitters/PredefinedSplitter';
|
8
|
+
import { StratifiedSplitter } from './splitters/StratifiedSplitter';
|
9
|
+
import { KFoldSplitter } from './splitters/KFoldSplitter';
|
10
|
+
import { LeavePOutSplitter } from './splitters/LeavePOutSplitter';
|
11
|
+
import { SPLITTER_OPTIONS, DEFAULT_CONFIGS } from './splitters/constants';
|
12
|
+
import type { SplitterType, SplitConfig, ColumnConfig } from './splitters/types';
|
13
|
+
|
14
|
+
interface SplitConfiguratorProps {
|
15
|
+
type: SplitterType;
|
16
|
+
splitter_attributes: SplitConfig;
|
17
|
+
columns: ColumnConfig[];
|
18
|
+
available_files: string[];
|
19
|
+
onChange: (type: SplitterType, attributes: SplitConfig) => void;
|
20
|
+
}
|
21
|
+
|
22
|
+
export function SplitConfigurator({ type, splitter_attributes, columns, available_files, onSplitterChange, onChange }: SplitConfiguratorProps) {
|
23
|
+
const dateColumns = columns.filter(col => col.type === 'datetime').map(col => col.name);
|
24
|
+
|
25
|
+
const handleTypeChange = (newType: SplitterType) => {
|
26
|
+
onChange(newType, DEFAULT_CONFIGS[newType]);
|
27
|
+
};
|
28
|
+
|
29
|
+
const handleSplitterChange = (type: SplitterType, newAttributes: SplitConfig) => {
|
30
|
+
onChange(type, newAttributes);
|
31
|
+
};
|
32
|
+
|
33
|
+
const renderSplitter = () => {
|
34
|
+
switch (type) {
|
35
|
+
case 'date':
|
36
|
+
return (
|
37
|
+
<DateSplitter
|
38
|
+
attributes={splitter_attributes}
|
39
|
+
columns={dateColumns}
|
40
|
+
onChange={(attrs) => handleSplitterChange(type, attrs)}
|
41
|
+
/>
|
42
|
+
);
|
43
|
+
case 'random':
|
44
|
+
return (
|
45
|
+
<RandomSplitter
|
46
|
+
attributes={splitter_attributes}
|
47
|
+
onChange={(attrs) => handleSplitterChange(type, attrs)}
|
48
|
+
/>
|
49
|
+
);
|
50
|
+
case 'predefined':
|
51
|
+
return (
|
52
|
+
<PredefinedSplitter
|
53
|
+
attributes={splitter_attributes}
|
54
|
+
available_files={available_files}
|
55
|
+
onChange={(attrs) => handleSplitterChange(type, attrs)}
|
56
|
+
/>
|
57
|
+
);
|
58
|
+
case 'stratified':
|
59
|
+
return (
|
60
|
+
<StratifiedSplitter
|
61
|
+
attributes={splitter_attributes}
|
62
|
+
columns={columns}
|
63
|
+
onChange={(attrs) => handleSplitterChange(type, attrs)}
|
64
|
+
/>
|
65
|
+
);
|
66
|
+
case 'stratified_kfold':
|
67
|
+
case 'group_kfold':
|
68
|
+
return (
|
69
|
+
<KFoldSplitter
|
70
|
+
attributes={splitter_attributes}
|
71
|
+
columns={columns}
|
72
|
+
onChange={(attrs) => handleSplitterChange(type, attrs)}
|
73
|
+
/>
|
74
|
+
);
|
75
|
+
case 'group_shuffle':
|
76
|
+
return (
|
77
|
+
<StratifiedSplitter
|
78
|
+
attributes={splitter_attributes}
|
79
|
+
columns={columns}
|
80
|
+
onChange={(attrs) => handleSplitterChange(type, {
|
81
|
+
groupColumn: attrs.targetColumn,
|
82
|
+
testSize: attrs.testSize,
|
83
|
+
validSize: attrs.validSize
|
84
|
+
})}
|
85
|
+
/>
|
86
|
+
);
|
87
|
+
case 'leave_p_out':
|
88
|
+
return (
|
89
|
+
<LeavePOutSplitter
|
90
|
+
attributes={splitter_attributes}
|
91
|
+
onChange={(attrs) => handleSplitterChange(type, attrs)}
|
92
|
+
/>
|
93
|
+
);
|
94
|
+
default:
|
95
|
+
return null;
|
96
|
+
}
|
97
|
+
};
|
98
|
+
|
99
|
+
return (
|
100
|
+
<div className="space-y-6">
|
101
|
+
<div>
|
102
|
+
<label className="block text-sm font-medium text-gray-700 mb-1">
|
103
|
+
Split Type
|
104
|
+
</label>
|
105
|
+
<SearchableSelect
|
106
|
+
options={SPLITTER_OPTIONS}
|
107
|
+
value={type}
|
108
|
+
onChange={(value) => handleTypeChange(value as SplitterType)}
|
109
|
+
/>
|
110
|
+
</div>
|
111
|
+
|
112
|
+
<div className="bg-gray-50 rounded-lg p-4">
|
113
|
+
{renderSplitter()}
|
114
|
+
</div>
|
115
|
+
</div>
|
116
|
+
);
|
117
|
+
}
|
118
|
+
|
119
|
+
export type { SplitterType };
|
120
|
+
export type { ColumnConfig };
|
@@ -0,0 +1,58 @@
|
|
1
|
+
import React from 'react';
|
2
|
+
import { SearchableSelect } from '../../SearchableSelect';
|
3
|
+
import type { DateSplitConfig } from '../types';
|
4
|
+
|
5
|
+
interface DateSplitterProps {
|
6
|
+
attributes: DateSplitConfig;
|
7
|
+
columns: string[];
|
8
|
+
onChange: (attributes: DateSplitConfig) => void;
|
9
|
+
}
|
10
|
+
|
11
|
+
export function DateSplitter({ attributes, columns, onChange }: DateSplitterProps) {
|
12
|
+
return (
|
13
|
+
<div className="space-y-4">
|
14
|
+
<div>
|
15
|
+
<label htmlFor="date_column" className="block text-sm font-medium text-gray-700">
|
16
|
+
Date Column
|
17
|
+
</label>
|
18
|
+
<SearchableSelect
|
19
|
+
id="date_column"
|
20
|
+
value={attributes.date_column}
|
21
|
+
options={columns.map(col => ({ value: col, label: col }))}
|
22
|
+
onChange={(value) => onChange({ ...attributes, date_column: value })}
|
23
|
+
placeholder="Select date column"
|
24
|
+
/>
|
25
|
+
</div>
|
26
|
+
|
27
|
+
<div className="grid grid-cols-2 gap-4">
|
28
|
+
<div>
|
29
|
+
<label htmlFor="months_test" className="block text-sm font-medium text-gray-700">
|
30
|
+
Test Months
|
31
|
+
</label>
|
32
|
+
<input
|
33
|
+
type="number"
|
34
|
+
id="months_test"
|
35
|
+
value={attributes.months_test}
|
36
|
+
onChange={(e) => onChange({ ...attributes, months_test: parseInt(e.target.value) })}
|
37
|
+
className="mt-1 p-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500 sm:text-sm"
|
38
|
+
min="1"
|
39
|
+
/>
|
40
|
+
</div>
|
41
|
+
|
42
|
+
<div>
|
43
|
+
<label htmlFor="months_valid" className="block text-sm font-medium text-gray-700">
|
44
|
+
Validation Months
|
45
|
+
</label>
|
46
|
+
<input
|
47
|
+
type="number"
|
48
|
+
id="months_valid"
|
49
|
+
value={attributes.months_valid}
|
50
|
+
onChange={(e) => onChange({ ...attributes, months_valid: parseInt(e.target.value) })}
|
51
|
+
className="mt-1 p-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500 sm:text-sm"
|
52
|
+
min="1"
|
53
|
+
/>
|
54
|
+
</div>
|
55
|
+
</div>
|
56
|
+
</div>
|
57
|
+
);
|
58
|
+
}
|
@@ -0,0 +1,68 @@
|
|
1
|
+
import React from 'react';
|
2
|
+
import { SearchableSelect } from '../../SearchableSelect';
|
3
|
+
|
4
|
+
interface KFoldSplitterProps {
|
5
|
+
type: 'kfold' | 'stratified' | 'group';
|
6
|
+
targetColumn?: string;
|
7
|
+
groupColumn?: string;
|
8
|
+
nSplits: number;
|
9
|
+
columns: Array<{ name: string; type: string }>;
|
10
|
+
onChange: (config: {
|
11
|
+
targetColumn?: string;
|
12
|
+
groupColumn?: string;
|
13
|
+
nSplits: number;
|
14
|
+
}) => void;
|
15
|
+
}
|
16
|
+
|
17
|
+
export function KFoldSplitter({
|
18
|
+
type,
|
19
|
+
targetColumn,
|
20
|
+
groupColumn,
|
21
|
+
nSplits,
|
22
|
+
columns,
|
23
|
+
onChange
|
24
|
+
}: KFoldSplitterProps) {
|
25
|
+
return (
|
26
|
+
<div className="space-y-4">
|
27
|
+
{(type === 'stratified' || type === 'group') && (
|
28
|
+
<div>
|
29
|
+
<label className="block text-sm font-medium text-gray-700 mb-1">
|
30
|
+
{type === 'stratified' ? 'Target Column' : 'Group Column'}
|
31
|
+
</label>
|
32
|
+
<SearchableSelect
|
33
|
+
options={columns.map(col => ({
|
34
|
+
value: col.name,
|
35
|
+
label: col.name,
|
36
|
+
description: `Type: ${col.type}`
|
37
|
+
}))}
|
38
|
+
value={type === 'stratified' ? targetColumn : groupColumn}
|
39
|
+
onChange={(value) => onChange({
|
40
|
+
targetColumn: type === 'stratified' ? value as string : targetColumn,
|
41
|
+
groupColumn: type === 'group' ? value as string : groupColumn,
|
42
|
+
nSplits
|
43
|
+
})}
|
44
|
+
placeholder={`Select ${type === 'stratified' ? 'target' : 'group'} column...`}
|
45
|
+
/>
|
46
|
+
</div>
|
47
|
+
)}
|
48
|
+
|
49
|
+
<div>
|
50
|
+
<label className="block text-sm font-medium text-gray-700 mb-1">
|
51
|
+
Number of Splits
|
52
|
+
</label>
|
53
|
+
<input
|
54
|
+
type="number"
|
55
|
+
min={2}
|
56
|
+
max={10}
|
57
|
+
value={nSplits}
|
58
|
+
onChange={(e) => onChange({
|
59
|
+
targetColumn,
|
60
|
+
groupColumn,
|
61
|
+
nSplits: parseInt(e.target.value) || 2
|
62
|
+
})}
|
63
|
+
className="mt-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500"
|
64
|
+
/>
|
65
|
+
</div>
|
66
|
+
</div>
|
67
|
+
);
|
68
|
+
}
|
@@ -0,0 +1,29 @@
|
|
1
|
+
import React from 'react';
|
2
|
+
|
3
|
+
interface LeavePOutSplitterProps {
|
4
|
+
p: number;
|
5
|
+
onChange: (p: number) => void;
|
6
|
+
}
|
7
|
+
|
8
|
+
export function LeavePOutSplitter({ p, onChange }: LeavePOutSplitterProps) {
|
9
|
+
return (
|
10
|
+
<div className="space-y-4">
|
11
|
+
<div>
|
12
|
+
<label className="block text-sm font-medium text-gray-700 mb-1">
|
13
|
+
Number of samples to leave out (P)
|
14
|
+
</label>
|
15
|
+
<input
|
16
|
+
type="number"
|
17
|
+
min={1}
|
18
|
+
max={100}
|
19
|
+
value={p}
|
20
|
+
onChange={(e) => onChange(parseInt(e.target.value) || 1)}
|
21
|
+
className="mt-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500"
|
22
|
+
/>
|
23
|
+
<p className="mt-1 text-sm text-gray-500">
|
24
|
+
Each training set will have P samples removed, which form the test set.
|
25
|
+
</p>
|
26
|
+
</div>
|
27
|
+
</div>
|
28
|
+
);
|
29
|
+
}
|
@@ -0,0 +1,146 @@
|
|
1
|
+
import React from 'react';
|
2
|
+
import { FileCheck } from 'lucide-react';
|
3
|
+
import { SearchableSelect } from '../../SearchableSelect';
|
4
|
+
|
5
|
+
interface FileConfig {
|
6
|
+
path: string;
|
7
|
+
type: 'train' | 'test' | 'valid';
|
8
|
+
}
|
9
|
+
|
10
|
+
interface PredefinedSplitConfig {
|
11
|
+
splitter_type: 'predefined';
|
12
|
+
train_files: string[];
|
13
|
+
test_files: string[];
|
14
|
+
valid_files: string[];
|
15
|
+
}
|
16
|
+
|
17
|
+
interface PredefinedSplitterProps {
|
18
|
+
attributes: PredefinedSplitConfig;
|
19
|
+
available_files: string[];
|
20
|
+
onChange: (attributes: PredefinedSplitConfig) => void;
|
21
|
+
}
|
22
|
+
|
23
|
+
export function PredefinedSplitter({ attributes, available_files, onChange }: PredefinedSplitterProps) {
|
24
|
+
const [selectedFiles, setSelectedFiles] = React.useState<FileConfig[]>([]);
|
25
|
+
|
26
|
+
// Convert attributes to FileConfig array for UI
|
27
|
+
React.useEffect(() => {
|
28
|
+
const files: FileConfig[] = [
|
29
|
+
...attributes.train_files.map(path => ({ path, type: 'train' as const })),
|
30
|
+
...attributes.test_files.map(path => ({ path, type: 'test' as const })),
|
31
|
+
...attributes.valid_files.map(path => ({ path, type: 'valid' as const }))
|
32
|
+
];
|
33
|
+
setSelectedFiles(files);
|
34
|
+
}, [attributes.train_files, attributes.test_files, attributes.valid_files]);
|
35
|
+
|
36
|
+
const addFile = (path: string) => {
|
37
|
+
const newFiles = [...selectedFiles, { path, type: 'train' }];
|
38
|
+
setSelectedFiles(newFiles);
|
39
|
+
updateAttributes(newFiles);
|
40
|
+
};
|
41
|
+
|
42
|
+
const updateFileType = (index: number, type: 'train' | 'test' | 'valid') => {
|
43
|
+
const newFiles = selectedFiles.map((file, i) =>
|
44
|
+
i === index ? { ...file, type } : file
|
45
|
+
);
|
46
|
+
setSelectedFiles(newFiles);
|
47
|
+
updateAttributes(newFiles);
|
48
|
+
};
|
49
|
+
|
50
|
+
const removeFile = (index: number) => {
|
51
|
+
const newFiles = selectedFiles.filter((_, i) => i !== index);
|
52
|
+
setSelectedFiles(newFiles);
|
53
|
+
updateAttributes(newFiles);
|
54
|
+
};
|
55
|
+
|
56
|
+
const updateAttributes = (files: FileConfig[]) => {
|
57
|
+
onChange({
|
58
|
+
splitter_type: 'predefined',
|
59
|
+
train_files: files.filter(f => f.type === 'train').map(f => f.path),
|
60
|
+
test_files: files.filter(f => f.type === 'test').map(f => f.path),
|
61
|
+
valid_files: files.filter(f => f.type === 'valid').map(f => f.path)
|
62
|
+
});
|
63
|
+
};
|
64
|
+
|
65
|
+
const unusedFiles = available_files.filter(
|
66
|
+
path => !selectedFiles.find(f => f.path === path)
|
67
|
+
);
|
68
|
+
|
69
|
+
return (
|
70
|
+
<div className="space-y-4">
|
71
|
+
{/* File Selection */}
|
72
|
+
<div>
|
73
|
+
<label className="block text-sm font-medium text-gray-700">Add File</label>
|
74
|
+
<SearchableSelect
|
75
|
+
options={unusedFiles.map(path => ({
|
76
|
+
value: path,
|
77
|
+
label: path.split('/').pop() || path,
|
78
|
+
description: path
|
79
|
+
}))}
|
80
|
+
value={null}
|
81
|
+
onChange={(value) => addFile(value as string)}
|
82
|
+
placeholder="Select a file..."
|
83
|
+
/>
|
84
|
+
</div>
|
85
|
+
|
86
|
+
{/* Selected files */}
|
87
|
+
{selectedFiles.length > 0 ? (
|
88
|
+
<div className="space-y-2">
|
89
|
+
{selectedFiles.map((file, index) => (
|
90
|
+
<div
|
91
|
+
key={file.path}
|
92
|
+
className="flex items-center justify-between p-3 bg-gray-50 rounded-lg"
|
93
|
+
>
|
94
|
+
<div className="flex items-center gap-2 min-w-0">
|
95
|
+
<FileCheck className="w-4 h-4 text-gray-400 flex-shrink-0" />
|
96
|
+
<span className="text-sm text-gray-900 truncate">
|
97
|
+
{file.path.split('/').pop()}
|
98
|
+
</span>
|
99
|
+
</div>
|
100
|
+
<div className="flex items-center gap-2">
|
101
|
+
<select
|
102
|
+
value={file.type}
|
103
|
+
onChange={(e) => updateFileType(index, e.target.value as 'train' | 'test' | 'valid')}
|
104
|
+
className="text-sm rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500"
|
105
|
+
>
|
106
|
+
<option value="train">Training Set</option>
|
107
|
+
<option value="test">Test Set</option>
|
108
|
+
<option value="valid">Validation Set</option>
|
109
|
+
</select>
|
110
|
+
<button
|
111
|
+
onClick={() => removeFile(index)}
|
112
|
+
className="text-sm text-red-600 hover:text-red-700"
|
113
|
+
>
|
114
|
+
Remove
|
115
|
+
</button>
|
116
|
+
</div>
|
117
|
+
</div>
|
118
|
+
))}
|
119
|
+
</div>
|
120
|
+
) : (
|
121
|
+
<div className="text-center py-4 bg-gray-50 rounded-lg border-2 border-dashed border-gray-200">
|
122
|
+
<p className="text-sm text-gray-500">
|
123
|
+
Select files to create your train/test/validation splits
|
124
|
+
</p>
|
125
|
+
</div>
|
126
|
+
)}
|
127
|
+
|
128
|
+
{/* Validation messages */}
|
129
|
+
{selectedFiles.length > 0 && (
|
130
|
+
<div className="space-y-1 text-sm">
|
131
|
+
{!selectedFiles.some(f => f.type === 'train') && (
|
132
|
+
<p className="text-yellow-600">
|
133
|
+
• You need at least one training set file
|
134
|
+
</p>
|
135
|
+
)}
|
136
|
+
{!selectedFiles.some(f => f.type === 'test') && (
|
137
|
+
<p className="text-yellow-600">
|
138
|
+
• You need at least one test set file
|
139
|
+
</p>
|
140
|
+
)}
|
141
|
+
</div>
|
142
|
+
)}
|
143
|
+
|
144
|
+
</div>
|
145
|
+
);
|
146
|
+
}
|
@@ -0,0 +1,85 @@
|
|
1
|
+
import React from 'react';
|
2
|
+
import { Info } from 'lucide-react';
|
3
|
+
import type { RandomSplitConfig } from '../types';
|
4
|
+
|
5
|
+
interface RandomSplitterProps {
|
6
|
+
attributes: RandomSplitConfig;
|
7
|
+
onChange: (attributes: RandomSplitConfig) => void;
|
8
|
+
}
|
9
|
+
|
10
|
+
export function RandomSplitter({ attributes, onChange }: RandomSplitterProps) {
|
11
|
+
return (
|
12
|
+
<div className="space-y-4">
|
13
|
+
<div className="flex items-start gap-2">
|
14
|
+
<Info className="w-5 h-5 text-blue-500 mt-0.5" />
|
15
|
+
<p className="text-sm text-blue-700">
|
16
|
+
Random splitting will automatically split your data into 60% training, 20% test, and 20% validation sets.
|
17
|
+
</p>
|
18
|
+
</div>
|
19
|
+
|
20
|
+
<div className="grid grid-cols-3 gap-4">
|
21
|
+
<div>
|
22
|
+
<label htmlFor="train_ratio" className="block text-sm font-medium text-gray-700">
|
23
|
+
Training Ratio
|
24
|
+
</label>
|
25
|
+
<input
|
26
|
+
type="number"
|
27
|
+
id="train_ratio"
|
28
|
+
value={attributes.train_ratio ?? 0.6}
|
29
|
+
onChange={(e) => onChange({ ...attributes, train_ratio: parseFloat(e.target.value) })}
|
30
|
+
className="mt-1 p-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500 sm:text-sm"
|
31
|
+
min="0"
|
32
|
+
max="1"
|
33
|
+
step="0.1"
|
34
|
+
/>
|
35
|
+
</div>
|
36
|
+
|
37
|
+
<div>
|
38
|
+
<label htmlFor="test_ratio" className="block text-sm font-medium text-gray-700">
|
39
|
+
Test Ratio
|
40
|
+
</label>
|
41
|
+
<input
|
42
|
+
type="number"
|
43
|
+
id="test_ratio"
|
44
|
+
value={attributes.test_ratio ?? 0.2}
|
45
|
+
onChange={(e) => onChange({ ...attributes, test_ratio: parseFloat(e.target.value) })}
|
46
|
+
className="mt-1 p-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500 sm:text-sm"
|
47
|
+
min="0"
|
48
|
+
max="1"
|
49
|
+
step="0.1"
|
50
|
+
/>
|
51
|
+
</div>
|
52
|
+
|
53
|
+
<div>
|
54
|
+
<label htmlFor="valid_ratio" className="block text-sm font-medium text-gray-700">
|
55
|
+
Validation Ratio
|
56
|
+
</label>
|
57
|
+
<input
|
58
|
+
type="number"
|
59
|
+
id="valid_ratio"
|
60
|
+
value={attributes.valid_ratio ?? 0.2}
|
61
|
+
onChange={(e) => onChange({ ...attributes, valid_ratio: parseFloat(e.target.value) })}
|
62
|
+
className="mt-1 p-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500 sm:text-sm"
|
63
|
+
min="0"
|
64
|
+
max="1"
|
65
|
+
step="0.1"
|
66
|
+
/>
|
67
|
+
</div>
|
68
|
+
</div>
|
69
|
+
|
70
|
+
<div>
|
71
|
+
<label htmlFor="seed" className="block text-sm font-medium text-gray-700">
|
72
|
+
Random Seed (optional)
|
73
|
+
</label>
|
74
|
+
<input
|
75
|
+
type="number"
|
76
|
+
id="seed"
|
77
|
+
value={attributes.seed ?? ''}
|
78
|
+
onChange={(e) => onChange({ ...attributes, seed: e.target.value ? parseInt(e.target.value) : undefined })}
|
79
|
+
className="mt-1 p-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500 sm:text-sm"
|
80
|
+
placeholder="Enter a random seed"
|
81
|
+
/>
|
82
|
+
</div>
|
83
|
+
</div>
|
84
|
+
);
|
85
|
+
}
|
@@ -0,0 +1,79 @@
|
|
1
|
+
import React from 'react';
|
2
|
+
import { SearchableSelect } from '../../SearchableSelect';
|
3
|
+
|
4
|
+
interface StratifiedSplitterProps {
|
5
|
+
targetColumn: string;
|
6
|
+
testSize: number;
|
7
|
+
validSize: number;
|
8
|
+
columns: Array<{ name: string; type: string }>;
|
9
|
+
onChange: (config: { targetColumn: string; testSize: number; validSize: number }) => void;
|
10
|
+
}
|
11
|
+
|
12
|
+
export function StratifiedSplitter({
|
13
|
+
targetColumn,
|
14
|
+
testSize,
|
15
|
+
validSize,
|
16
|
+
columns,
|
17
|
+
onChange
|
18
|
+
}: StratifiedSplitterProps) {
|
19
|
+
return (
|
20
|
+
<div className="space-y-4">
|
21
|
+
<div>
|
22
|
+
<label className="block text-sm font-medium text-gray-700 mb-1">
|
23
|
+
Target Column
|
24
|
+
</label>
|
25
|
+
<SearchableSelect
|
26
|
+
options={columns.map(col => ({
|
27
|
+
value: col.name,
|
28
|
+
label: col.name,
|
29
|
+
description: `Type: ${col.type}`
|
30
|
+
}))}
|
31
|
+
value={targetColumn}
|
32
|
+
onChange={(value) => onChange({
|
33
|
+
targetColumn: value as string,
|
34
|
+
testSize,
|
35
|
+
validSize
|
36
|
+
})}
|
37
|
+
placeholder="Select target column..."
|
38
|
+
/>
|
39
|
+
</div>
|
40
|
+
|
41
|
+
<div className="grid grid-cols-2 gap-4">
|
42
|
+
<div>
|
43
|
+
<label className="block text-sm font-medium text-gray-700 mb-1">
|
44
|
+
Test Set Size (%)
|
45
|
+
</label>
|
46
|
+
<input
|
47
|
+
type="number"
|
48
|
+
min={1}
|
49
|
+
max={40}
|
50
|
+
value={testSize}
|
51
|
+
onChange={(e) => onChange({
|
52
|
+
targetColumn,
|
53
|
+
testSize: parseInt(e.target.value) || 0,
|
54
|
+
validSize
|
55
|
+
})}
|
56
|
+
className="mt-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500"
|
57
|
+
/>
|
58
|
+
</div>
|
59
|
+
<div>
|
60
|
+
<label className="block text-sm font-medium text-gray-700 mb-1">
|
61
|
+
Validation Set Size (%)
|
62
|
+
</label>
|
63
|
+
<input
|
64
|
+
type="number"
|
65
|
+
min={1}
|
66
|
+
max={40}
|
67
|
+
value={validSize}
|
68
|
+
onChange={(e) => onChange({
|
69
|
+
targetColumn,
|
70
|
+
testSize,
|
71
|
+
validSize: parseInt(e.target.value) || 0
|
72
|
+
})}
|
73
|
+
className="mt-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500"
|
74
|
+
/>
|
75
|
+
</div>
|
76
|
+
</div>
|
77
|
+
</div>
|
78
|
+
);
|
79
|
+
}
|
@@ -0,0 +1,77 @@
|
|
1
|
+
export const SPLITTER_OPTIONS = [
|
2
|
+
{
|
3
|
+
value: 'date',
|
4
|
+
label: 'Date Split',
|
5
|
+
description: 'Split data based on a date/time column'
|
6
|
+
},
|
7
|
+
{
|
8
|
+
value: 'random',
|
9
|
+
label: 'Random Split',
|
10
|
+
description: 'Randomly split data into train/test/validation sets (70/20/10)'
|
11
|
+
},
|
12
|
+
{
|
13
|
+
value: 'predefined',
|
14
|
+
label: 'Predefined Splits',
|
15
|
+
description: 'Use separate files for train/test/validation sets'
|
16
|
+
},
|
17
|
+
// {
|
18
|
+
// value: 'stratified',
|
19
|
+
// label: 'Stratified Shuffle Split',
|
20
|
+
// description: 'Maintain the percentage of samples for each class'
|
21
|
+
// },
|
22
|
+
// {
|
23
|
+
// value: 'stratified_kfold',
|
24
|
+
// label: 'Stratified K-Fold',
|
25
|
+
// description: 'K-fold with preserved class distribution'
|
26
|
+
// },
|
27
|
+
// {
|
28
|
+
// value: 'group_kfold',
|
29
|
+
// label: 'Group K-Fold',
|
30
|
+
// description: 'K-fold ensuring group integrity'
|
31
|
+
// },
|
32
|
+
// {
|
33
|
+
// value: 'group_shuffle',
|
34
|
+
// label: 'Group Shuffle Split',
|
35
|
+
// description: 'Random split respecting group boundaries'
|
36
|
+
// },
|
37
|
+
// {
|
38
|
+
// value: 'leave_p_out',
|
39
|
+
// label: 'Leave P Out',
|
40
|
+
// description: 'Use P samples for testing in each fold'
|
41
|
+
// }
|
42
|
+
] as const;
|
43
|
+
|
44
|
+
export const DEFAULT_CONFIGS = {
|
45
|
+
date: {
|
46
|
+
date_column: '',
|
47
|
+
months_test: 2,
|
48
|
+
months_valid: 1
|
49
|
+
},
|
50
|
+
random: {},
|
51
|
+
predefined: {
|
52
|
+
train_files: [],
|
53
|
+
test_files: [],
|
54
|
+
valid_files: []
|
55
|
+
},
|
56
|
+
stratified: {
|
57
|
+
targetColumn: '',
|
58
|
+
testSize: 20,
|
59
|
+
validSize: 10
|
60
|
+
},
|
61
|
+
stratified_kfold: {
|
62
|
+
targetColumn: '',
|
63
|
+
nSplits: 5
|
64
|
+
},
|
65
|
+
group_kfold: {
|
66
|
+
groupColumn: '',
|
67
|
+
nSplits: 5
|
68
|
+
},
|
69
|
+
group_shuffle: {
|
70
|
+
groupColumn: '',
|
71
|
+
testSize: 20,
|
72
|
+
validSize: 10
|
73
|
+
},
|
74
|
+
leave_p_out: {
|
75
|
+
p: 1
|
76
|
+
}
|
77
|
+
} as const;
|