easy_ml 0.1.4 → 0.2.0.pre.rc1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +234 -26
- data/Rakefile +45 -0
- data/app/controllers/easy_ml/application_controller.rb +67 -0
- data/app/controllers/easy_ml/columns_controller.rb +38 -0
- data/app/controllers/easy_ml/datasets_controller.rb +156 -0
- data/app/controllers/easy_ml/datasources_controller.rb +88 -0
- data/app/controllers/easy_ml/deploys_controller.rb +20 -0
- data/app/controllers/easy_ml/models_controller.rb +151 -0
- data/app/controllers/easy_ml/retraining_runs_controller.rb +19 -0
- data/app/controllers/easy_ml/settings_controller.rb +59 -0
- data/app/frontend/components/AlertProvider.tsx +108 -0
- data/app/frontend/components/DatasetPreview.tsx +161 -0
- data/app/frontend/components/EmptyState.tsx +28 -0
- data/app/frontend/components/ModelCard.tsx +255 -0
- data/app/frontend/components/ModelDetails.tsx +334 -0
- data/app/frontend/components/ModelForm.tsx +384 -0
- data/app/frontend/components/Navigation.tsx +300 -0
- data/app/frontend/components/Pagination.tsx +72 -0
- data/app/frontend/components/Popover.tsx +55 -0
- data/app/frontend/components/PredictionStream.tsx +105 -0
- data/app/frontend/components/ScheduleModal.tsx +726 -0
- data/app/frontend/components/SearchInput.tsx +23 -0
- data/app/frontend/components/SearchableSelect.tsx +132 -0
- data/app/frontend/components/dataset/AutosaveIndicator.tsx +39 -0
- data/app/frontend/components/dataset/ColumnConfigModal.tsx +431 -0
- data/app/frontend/components/dataset/ColumnFilters.tsx +256 -0
- data/app/frontend/components/dataset/ColumnList.tsx +101 -0
- data/app/frontend/components/dataset/FeatureConfigPopover.tsx +57 -0
- data/app/frontend/components/dataset/FeaturePicker.tsx +205 -0
- data/app/frontend/components/dataset/PreprocessingConfig.tsx +704 -0
- data/app/frontend/components/dataset/SplitConfigurator.tsx +120 -0
- data/app/frontend/components/dataset/splitters/DateSplitter.tsx +58 -0
- data/app/frontend/components/dataset/splitters/KFoldSplitter.tsx +68 -0
- data/app/frontend/components/dataset/splitters/LeavePOutSplitter.tsx +29 -0
- data/app/frontend/components/dataset/splitters/PredefinedSplitter.tsx +146 -0
- data/app/frontend/components/dataset/splitters/RandomSplitter.tsx +85 -0
- data/app/frontend/components/dataset/splitters/StratifiedSplitter.tsx +79 -0
- data/app/frontend/components/dataset/splitters/constants.ts +77 -0
- data/app/frontend/components/dataset/splitters/types.ts +168 -0
- data/app/frontend/components/dataset/splitters/utils.ts +53 -0
- data/app/frontend/components/features/CodeEditor.tsx +46 -0
- data/app/frontend/components/features/DataPreview.tsx +150 -0
- data/app/frontend/components/features/FeatureCard.tsx +88 -0
- data/app/frontend/components/features/FeatureForm.tsx +235 -0
- data/app/frontend/components/features/FeatureGroupCard.tsx +54 -0
- data/app/frontend/components/settings/PluginSettings.tsx +81 -0
- data/app/frontend/components/ui/badge.tsx +44 -0
- data/app/frontend/components/ui/collapsible.tsx +9 -0
- data/app/frontend/components/ui/scroll-area.tsx +46 -0
- data/app/frontend/components/ui/separator.tsx +29 -0
- data/app/frontend/entrypoints/App.tsx +40 -0
- data/app/frontend/entrypoints/Application.tsx +24 -0
- data/app/frontend/hooks/useAutosave.ts +61 -0
- data/app/frontend/layouts/Layout.tsx +38 -0
- data/app/frontend/lib/utils.ts +6 -0
- data/app/frontend/mockData.ts +272 -0
- data/app/frontend/pages/DatasetDetailsPage.tsx +103 -0
- data/app/frontend/pages/DatasetsPage.tsx +261 -0
- data/app/frontend/pages/DatasourceFormPage.tsx +147 -0
- data/app/frontend/pages/DatasourcesPage.tsx +261 -0
- data/app/frontend/pages/EditModelPage.tsx +45 -0
- data/app/frontend/pages/EditTransformationPage.tsx +56 -0
- data/app/frontend/pages/ModelsPage.tsx +115 -0
- data/app/frontend/pages/NewDatasetPage.tsx +366 -0
- data/app/frontend/pages/NewModelPage.tsx +45 -0
- data/app/frontend/pages/NewTransformationPage.tsx +43 -0
- data/app/frontend/pages/SettingsPage.tsx +272 -0
- data/app/frontend/pages/ShowModelPage.tsx +30 -0
- data/app/frontend/pages/TransformationsPage.tsx +95 -0
- data/app/frontend/styles/application.css +100 -0
- data/app/frontend/types/dataset.ts +146 -0
- data/app/frontend/types/datasource.ts +33 -0
- data/app/frontend/types/preprocessing.ts +1 -0
- data/app/frontend/types.ts +113 -0
- data/app/helpers/easy_ml/application_helper.rb +10 -0
- data/app/jobs/easy_ml/application_job.rb +21 -0
- data/app/jobs/easy_ml/batch_job.rb +46 -0
- data/app/jobs/easy_ml/compute_feature_job.rb +19 -0
- data/app/jobs/easy_ml/deploy_job.rb +13 -0
- data/app/jobs/easy_ml/finalize_feature_job.rb +15 -0
- data/app/jobs/easy_ml/refresh_dataset_job.rb +32 -0
- data/app/jobs/easy_ml/schedule_retraining_job.rb +11 -0
- data/app/jobs/easy_ml/sync_datasource_job.rb +17 -0
- data/app/jobs/easy_ml/training_job.rb +62 -0
- data/app/models/easy_ml/adapters/base_adapter.rb +45 -0
- data/app/models/easy_ml/adapters/polars_adapter.rb +77 -0
- data/app/models/easy_ml/cleaner.rb +82 -0
- data/app/models/easy_ml/column.rb +124 -0
- data/app/models/easy_ml/column_history.rb +30 -0
- data/app/models/easy_ml/column_list.rb +122 -0
- data/app/models/easy_ml/concerns/configurable.rb +61 -0
- data/app/models/easy_ml/concerns/versionable.rb +19 -0
- data/app/models/easy_ml/dataset.rb +767 -0
- data/app/models/easy_ml/dataset_history.rb +56 -0
- data/app/models/easy_ml/datasource.rb +182 -0
- data/app/models/easy_ml/datasource_history.rb +24 -0
- data/app/models/easy_ml/datasources/base_datasource.rb +54 -0
- data/app/models/easy_ml/datasources/file_datasource.rb +58 -0
- data/app/models/easy_ml/datasources/polars_datasource.rb +89 -0
- data/app/models/easy_ml/datasources/s3_datasource.rb +97 -0
- data/app/models/easy_ml/deploy.rb +114 -0
- data/app/models/easy_ml/event.rb +79 -0
- data/app/models/easy_ml/feature.rb +437 -0
- data/app/models/easy_ml/feature_history.rb +38 -0
- data/app/models/easy_ml/model.rb +575 -41
- data/app/models/easy_ml/model_file.rb +133 -0
- data/app/models/easy_ml/model_file_history.rb +24 -0
- data/app/models/easy_ml/model_history.rb +51 -0
- data/app/models/easy_ml/models/base_model.rb +58 -0
- data/app/models/easy_ml/models/hyperparameters/base.rb +99 -0
- data/app/models/easy_ml/models/hyperparameters/xgboost/dart.rb +82 -0
- data/app/models/easy_ml/models/hyperparameters/xgboost/gblinear.rb +82 -0
- data/app/models/easy_ml/models/hyperparameters/xgboost/gbtree.rb +97 -0
- data/app/models/easy_ml/models/hyperparameters/xgboost.rb +71 -0
- data/app/models/easy_ml/models/xgboost/evals_callback.rb +138 -0
- data/app/models/easy_ml/models/xgboost/progress_callback.rb +39 -0
- data/app/models/easy_ml/models/xgboost.rb +544 -5
- data/app/models/easy_ml/prediction.rb +44 -0
- data/app/models/easy_ml/retraining_job.rb +278 -0
- data/app/models/easy_ml/retraining_run.rb +184 -0
- data/app/models/easy_ml/settings.rb +37 -0
- data/app/models/easy_ml/splitter.rb +90 -0
- data/app/models/easy_ml/splitters/base_splitter.rb +28 -0
- data/app/models/easy_ml/splitters/date_splitter.rb +91 -0
- data/app/models/easy_ml/splitters/predefined_splitter.rb +74 -0
- data/app/models/easy_ml/splitters/random_splitter.rb +82 -0
- data/app/models/easy_ml/tuner_job.rb +56 -0
- data/app/models/easy_ml/tuner_run.rb +31 -0
- data/app/models/splitter_history.rb +6 -0
- data/app/serializers/easy_ml/column_serializer.rb +27 -0
- data/app/serializers/easy_ml/dataset_serializer.rb +73 -0
- data/app/serializers/easy_ml/datasource_serializer.rb +64 -0
- data/app/serializers/easy_ml/feature_serializer.rb +27 -0
- data/app/serializers/easy_ml/model_serializer.rb +90 -0
- data/app/serializers/easy_ml/retraining_job_serializer.rb +22 -0
- data/app/serializers/easy_ml/retraining_run_serializer.rb +39 -0
- data/app/serializers/easy_ml/settings_serializer.rb +9 -0
- data/app/views/layouts/easy_ml/application.html.erb +15 -0
- data/config/initializers/resque.rb +3 -0
- data/config/resque-pool.yml +6 -0
- data/config/routes.rb +39 -0
- data/config/spring.rb +1 -0
- data/config/vite.json +15 -0
- data/lib/easy_ml/configuration.rb +64 -0
- data/lib/easy_ml/core/evaluators/base_evaluator.rb +53 -0
- data/lib/easy_ml/core/evaluators/classification_evaluators.rb +126 -0
- data/lib/easy_ml/core/evaluators/regression_evaluators.rb +66 -0
- data/lib/easy_ml/core/model_evaluator.rb +161 -89
- data/lib/easy_ml/core/tuner/adapters/base_adapter.rb +28 -18
- data/lib/easy_ml/core/tuner/adapters/xgboost_adapter.rb +4 -25
- data/lib/easy_ml/core/tuner.rb +123 -62
- data/lib/easy_ml/core.rb +0 -3
- data/lib/easy_ml/core_ext/hash.rb +24 -0
- data/lib/easy_ml/core_ext/pathname.rb +11 -5
- data/lib/easy_ml/data/date_converter.rb +90 -0
- data/lib/easy_ml/data/filter_extensions.rb +31 -0
- data/lib/easy_ml/data/polars_column.rb +126 -0
- data/lib/easy_ml/data/polars_reader.rb +297 -0
- data/lib/easy_ml/data/preprocessor.rb +280 -142
- data/lib/easy_ml/data/simple_imputer.rb +255 -0
- data/lib/easy_ml/data/splits/file_split.rb +252 -0
- data/lib/easy_ml/data/splits/in_memory_split.rb +54 -0
- data/lib/easy_ml/data/splits/split.rb +95 -0
- data/lib/easy_ml/data/splits.rb +9 -0
- data/lib/easy_ml/data/statistics_learner.rb +93 -0
- data/lib/easy_ml/data/synced_directory.rb +341 -0
- data/lib/easy_ml/data.rb +6 -2
- data/lib/easy_ml/engine.rb +105 -6
- data/lib/easy_ml/feature_store.rb +227 -0
- data/lib/easy_ml/features.rb +61 -0
- data/lib/easy_ml/initializers/inflections.rb +17 -3
- data/lib/easy_ml/logging.rb +2 -2
- data/lib/easy_ml/predict.rb +74 -0
- data/lib/easy_ml/railtie/generators/migration/migration_generator.rb +192 -36
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_column_histories.rb.tt +9 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_columns.rb.tt +25 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_dataset_histories.rb.tt +9 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_datasets.rb.tt +31 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_datasource_histories.rb.tt +9 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_datasources.rb.tt +16 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_deploys.rb.tt +24 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_events.rb.tt +20 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_feature_histories.rb.tt +14 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_features.rb.tt +32 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_model_file_histories.rb.tt +9 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_model_files.rb.tt +17 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_model_histories.rb.tt +9 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_models.rb.tt +20 -9
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_predictions.rb.tt +17 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_retraining_jobs.rb.tt +77 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_settings.rb.tt +9 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_splitter_histories.rb.tt +9 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_splitters.rb.tt +15 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_tuner_jobs.rb.tt +40 -0
- data/lib/easy_ml/support/est.rb +5 -1
- data/lib/easy_ml/support/file_rotate.rb +79 -15
- data/lib/easy_ml/support/file_support.rb +9 -0
- data/lib/easy_ml/support/local_file.rb +24 -0
- data/lib/easy_ml/support/lockable.rb +62 -0
- data/lib/easy_ml/support/synced_file.rb +103 -0
- data/lib/easy_ml/support/utc.rb +5 -1
- data/lib/easy_ml/support.rb +6 -3
- data/lib/easy_ml/version.rb +4 -1
- data/lib/easy_ml.rb +7 -2
- metadata +355 -72
- data/app/models/easy_ml/models.rb +0 -5
- data/lib/easy_ml/core/model.rb +0 -30
- data/lib/easy_ml/core/model_core.rb +0 -181
- data/lib/easy_ml/core/models/hyperparameters/base.rb +0 -34
- data/lib/easy_ml/core/models/hyperparameters/xgboost.rb +0 -19
- data/lib/easy_ml/core/models/xgboost.rb +0 -10
- data/lib/easy_ml/core/models/xgboost_core.rb +0 -220
- data/lib/easy_ml/core/models.rb +0 -10
- data/lib/easy_ml/core/uploaders/model_uploader.rb +0 -24
- data/lib/easy_ml/core/uploaders.rb +0 -7
- data/lib/easy_ml/data/dataloader.rb +0 -6
- data/lib/easy_ml/data/dataset/data/preprocessor/statistics.json +0 -31
- data/lib/easy_ml/data/dataset/data/sample_info.json +0 -1
- data/lib/easy_ml/data/dataset/dataset/files/sample_info.json +0 -1
- data/lib/easy_ml/data/dataset/splits/file_split.rb +0 -140
- data/lib/easy_ml/data/dataset/splits/in_memory_split.rb +0 -49
- data/lib/easy_ml/data/dataset/splits/split.rb +0 -98
- data/lib/easy_ml/data/dataset/splits.rb +0 -11
- data/lib/easy_ml/data/dataset/splitters/date_splitter.rb +0 -43
- data/lib/easy_ml/data/dataset/splitters.rb +0 -9
- data/lib/easy_ml/data/dataset.rb +0 -430
- data/lib/easy_ml/data/datasource/datasource_factory.rb +0 -60
- data/lib/easy_ml/data/datasource/file_datasource.rb +0 -40
- data/lib/easy_ml/data/datasource/merged_datasource.rb +0 -64
- data/lib/easy_ml/data/datasource/polars_datasource.rb +0 -41
- data/lib/easy_ml/data/datasource/s3_datasource.rb +0 -89
- data/lib/easy_ml/data/datasource.rb +0 -33
- data/lib/easy_ml/data/preprocessor/preprocessor.rb +0 -205
- data/lib/easy_ml/data/preprocessor/simple_imputer.rb +0 -402
- data/lib/easy_ml/deployment.rb +0 -5
- data/lib/easy_ml/support/synced_directory.rb +0 -134
- data/lib/easy_ml/transforms.rb +0 -29
- /data/{lib/easy_ml/core → app/models/easy_ml}/models/hyperparameters.rb +0 -0
@@ -0,0 +1,120 @@
|
|
1
|
+
import React, { Fragment } from 'react';
|
2
|
+
import { Tab } from '@headlessui/react';
|
3
|
+
import { Info } from 'lucide-react';
|
4
|
+
import { SearchableSelect } from '../SearchableSelect';
|
5
|
+
import { DateSplitter } from './splitters/DateSplitter';
|
6
|
+
import { RandomSplitter } from './splitters/RandomSplitter';
|
7
|
+
import { PredefinedSplitter } from './splitters/PredefinedSplitter';
|
8
|
+
import { StratifiedSplitter } from './splitters/StratifiedSplitter';
|
9
|
+
import { KFoldSplitter } from './splitters/KFoldSplitter';
|
10
|
+
import { LeavePOutSplitter } from './splitters/LeavePOutSplitter';
|
11
|
+
import { SPLITTER_OPTIONS, DEFAULT_CONFIGS } from './splitters/constants';
|
12
|
+
import type { SplitterType, SplitConfig, ColumnConfig } from './splitters/types';
|
13
|
+
|
14
|
+
interface SplitConfiguratorProps {
|
15
|
+
type: SplitterType;
|
16
|
+
splitter_attributes: SplitConfig;
|
17
|
+
columns: ColumnConfig[];
|
18
|
+
available_files: string[];
|
19
|
+
onChange: (type: SplitterType, attributes: SplitConfig) => void;
|
20
|
+
}
|
21
|
+
|
22
|
+
export function SplitConfigurator({ type, splitter_attributes, columns, available_files, onSplitterChange, onChange }: SplitConfiguratorProps) {
|
23
|
+
const dateColumns = columns.filter(col => col.type === 'datetime').map(col => col.name);
|
24
|
+
|
25
|
+
const handleTypeChange = (newType: SplitterType) => {
|
26
|
+
onChange(newType, DEFAULT_CONFIGS[newType]);
|
27
|
+
};
|
28
|
+
|
29
|
+
const handleSplitterChange = (type: SplitterType, newAttributes: SplitConfig) => {
|
30
|
+
onChange(type, newAttributes);
|
31
|
+
};
|
32
|
+
|
33
|
+
const renderSplitter = () => {
|
34
|
+
switch (type) {
|
35
|
+
case 'date':
|
36
|
+
return (
|
37
|
+
<DateSplitter
|
38
|
+
attributes={splitter_attributes}
|
39
|
+
columns={dateColumns}
|
40
|
+
onChange={(attrs) => handleSplitterChange(type, attrs)}
|
41
|
+
/>
|
42
|
+
);
|
43
|
+
case 'random':
|
44
|
+
return (
|
45
|
+
<RandomSplitter
|
46
|
+
attributes={splitter_attributes}
|
47
|
+
onChange={(attrs) => handleSplitterChange(type, attrs)}
|
48
|
+
/>
|
49
|
+
);
|
50
|
+
case 'predefined':
|
51
|
+
return (
|
52
|
+
<PredefinedSplitter
|
53
|
+
attributes={splitter_attributes}
|
54
|
+
available_files={available_files}
|
55
|
+
onChange={(attrs) => handleSplitterChange(type, attrs)}
|
56
|
+
/>
|
57
|
+
);
|
58
|
+
case 'stratified':
|
59
|
+
return (
|
60
|
+
<StratifiedSplitter
|
61
|
+
attributes={splitter_attributes}
|
62
|
+
columns={columns}
|
63
|
+
onChange={(attrs) => handleSplitterChange(type, attrs)}
|
64
|
+
/>
|
65
|
+
);
|
66
|
+
case 'stratified_kfold':
|
67
|
+
case 'group_kfold':
|
68
|
+
return (
|
69
|
+
<KFoldSplitter
|
70
|
+
attributes={splitter_attributes}
|
71
|
+
columns={columns}
|
72
|
+
onChange={(attrs) => handleSplitterChange(type, attrs)}
|
73
|
+
/>
|
74
|
+
);
|
75
|
+
case 'group_shuffle':
|
76
|
+
return (
|
77
|
+
<StratifiedSplitter
|
78
|
+
attributes={splitter_attributes}
|
79
|
+
columns={columns}
|
80
|
+
onChange={(attrs) => handleSplitterChange(type, {
|
81
|
+
groupColumn: attrs.targetColumn,
|
82
|
+
testSize: attrs.testSize,
|
83
|
+
validSize: attrs.validSize
|
84
|
+
})}
|
85
|
+
/>
|
86
|
+
);
|
87
|
+
case 'leave_p_out':
|
88
|
+
return (
|
89
|
+
<LeavePOutSplitter
|
90
|
+
attributes={splitter_attributes}
|
91
|
+
onChange={(attrs) => handleSplitterChange(type, attrs)}
|
92
|
+
/>
|
93
|
+
);
|
94
|
+
default:
|
95
|
+
return null;
|
96
|
+
}
|
97
|
+
};
|
98
|
+
|
99
|
+
return (
|
100
|
+
<div className="space-y-6">
|
101
|
+
<div>
|
102
|
+
<label className="block text-sm font-medium text-gray-700 mb-1">
|
103
|
+
Split Type
|
104
|
+
</label>
|
105
|
+
<SearchableSelect
|
106
|
+
options={SPLITTER_OPTIONS}
|
107
|
+
value={type}
|
108
|
+
onChange={(value) => handleTypeChange(value as SplitterType)}
|
109
|
+
/>
|
110
|
+
</div>
|
111
|
+
|
112
|
+
<div className="bg-gray-50 rounded-lg p-4">
|
113
|
+
{renderSplitter()}
|
114
|
+
</div>
|
115
|
+
</div>
|
116
|
+
);
|
117
|
+
}
|
118
|
+
|
119
|
+
export type { SplitterType };
|
120
|
+
export type { ColumnConfig };
|
@@ -0,0 +1,58 @@
|
|
1
|
+
import React from 'react';
|
2
|
+
import { SearchableSelect } from '../../SearchableSelect';
|
3
|
+
import type { DateSplitConfig } from '../types';
|
4
|
+
|
5
|
+
interface DateSplitterProps {
|
6
|
+
attributes: DateSplitConfig;
|
7
|
+
columns: string[];
|
8
|
+
onChange: (attributes: DateSplitConfig) => void;
|
9
|
+
}
|
10
|
+
|
11
|
+
export function DateSplitter({ attributes, columns, onChange }: DateSplitterProps) {
|
12
|
+
return (
|
13
|
+
<div className="space-y-4">
|
14
|
+
<div>
|
15
|
+
<label htmlFor="date_column" className="block text-sm font-medium text-gray-700">
|
16
|
+
Date Column
|
17
|
+
</label>
|
18
|
+
<SearchableSelect
|
19
|
+
id="date_column"
|
20
|
+
value={attributes.date_column}
|
21
|
+
options={columns.map(col => ({ value: col, label: col }))}
|
22
|
+
onChange={(value) => onChange({ ...attributes, date_column: value })}
|
23
|
+
placeholder="Select date column"
|
24
|
+
/>
|
25
|
+
</div>
|
26
|
+
|
27
|
+
<div className="grid grid-cols-2 gap-4">
|
28
|
+
<div>
|
29
|
+
<label htmlFor="months_test" className="block text-sm font-medium text-gray-700">
|
30
|
+
Test Months
|
31
|
+
</label>
|
32
|
+
<input
|
33
|
+
type="number"
|
34
|
+
id="months_test"
|
35
|
+
value={attributes.months_test}
|
36
|
+
onChange={(e) => onChange({ ...attributes, months_test: parseInt(e.target.value) })}
|
37
|
+
className="mt-1 p-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500 sm:text-sm"
|
38
|
+
min="1"
|
39
|
+
/>
|
40
|
+
</div>
|
41
|
+
|
42
|
+
<div>
|
43
|
+
<label htmlFor="months_valid" className="block text-sm font-medium text-gray-700">
|
44
|
+
Validation Months
|
45
|
+
</label>
|
46
|
+
<input
|
47
|
+
type="number"
|
48
|
+
id="months_valid"
|
49
|
+
value={attributes.months_valid}
|
50
|
+
onChange={(e) => onChange({ ...attributes, months_valid: parseInt(e.target.value) })}
|
51
|
+
className="mt-1 p-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500 sm:text-sm"
|
52
|
+
min="1"
|
53
|
+
/>
|
54
|
+
</div>
|
55
|
+
</div>
|
56
|
+
</div>
|
57
|
+
);
|
58
|
+
}
|
@@ -0,0 +1,68 @@
|
|
1
|
+
import React from 'react';
|
2
|
+
import { SearchableSelect } from '../../SearchableSelect';
|
3
|
+
|
4
|
+
interface KFoldSplitterProps {
|
5
|
+
type: 'kfold' | 'stratified' | 'group';
|
6
|
+
targetColumn?: string;
|
7
|
+
groupColumn?: string;
|
8
|
+
nSplits: number;
|
9
|
+
columns: Array<{ name: string; type: string }>;
|
10
|
+
onChange: (config: {
|
11
|
+
targetColumn?: string;
|
12
|
+
groupColumn?: string;
|
13
|
+
nSplits: number;
|
14
|
+
}) => void;
|
15
|
+
}
|
16
|
+
|
17
|
+
export function KFoldSplitter({
|
18
|
+
type,
|
19
|
+
targetColumn,
|
20
|
+
groupColumn,
|
21
|
+
nSplits,
|
22
|
+
columns,
|
23
|
+
onChange
|
24
|
+
}: KFoldSplitterProps) {
|
25
|
+
return (
|
26
|
+
<div className="space-y-4">
|
27
|
+
{(type === 'stratified' || type === 'group') && (
|
28
|
+
<div>
|
29
|
+
<label className="block text-sm font-medium text-gray-700 mb-1">
|
30
|
+
{type === 'stratified' ? 'Target Column' : 'Group Column'}
|
31
|
+
</label>
|
32
|
+
<SearchableSelect
|
33
|
+
options={columns.map(col => ({
|
34
|
+
value: col.name,
|
35
|
+
label: col.name,
|
36
|
+
description: `Type: ${col.type}`
|
37
|
+
}))}
|
38
|
+
value={type === 'stratified' ? targetColumn : groupColumn}
|
39
|
+
onChange={(value) => onChange({
|
40
|
+
targetColumn: type === 'stratified' ? value as string : targetColumn,
|
41
|
+
groupColumn: type === 'group' ? value as string : groupColumn,
|
42
|
+
nSplits
|
43
|
+
})}
|
44
|
+
placeholder={`Select ${type === 'stratified' ? 'target' : 'group'} column...`}
|
45
|
+
/>
|
46
|
+
</div>
|
47
|
+
)}
|
48
|
+
|
49
|
+
<div>
|
50
|
+
<label className="block text-sm font-medium text-gray-700 mb-1">
|
51
|
+
Number of Splits
|
52
|
+
</label>
|
53
|
+
<input
|
54
|
+
type="number"
|
55
|
+
min={2}
|
56
|
+
max={10}
|
57
|
+
value={nSplits}
|
58
|
+
onChange={(e) => onChange({
|
59
|
+
targetColumn,
|
60
|
+
groupColumn,
|
61
|
+
nSplits: parseInt(e.target.value) || 2
|
62
|
+
})}
|
63
|
+
className="mt-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500"
|
64
|
+
/>
|
65
|
+
</div>
|
66
|
+
</div>
|
67
|
+
);
|
68
|
+
}
|
@@ -0,0 +1,29 @@
|
|
1
|
+
import React from 'react';
|
2
|
+
|
3
|
+
interface LeavePOutSplitterProps {
|
4
|
+
p: number;
|
5
|
+
onChange: (p: number) => void;
|
6
|
+
}
|
7
|
+
|
8
|
+
export function LeavePOutSplitter({ p, onChange }: LeavePOutSplitterProps) {
|
9
|
+
return (
|
10
|
+
<div className="space-y-4">
|
11
|
+
<div>
|
12
|
+
<label className="block text-sm font-medium text-gray-700 mb-1">
|
13
|
+
Number of samples to leave out (P)
|
14
|
+
</label>
|
15
|
+
<input
|
16
|
+
type="number"
|
17
|
+
min={1}
|
18
|
+
max={100}
|
19
|
+
value={p}
|
20
|
+
onChange={(e) => onChange(parseInt(e.target.value) || 1)}
|
21
|
+
className="mt-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500"
|
22
|
+
/>
|
23
|
+
<p className="mt-1 text-sm text-gray-500">
|
24
|
+
Each training set will have P samples removed, which form the test set.
|
25
|
+
</p>
|
26
|
+
</div>
|
27
|
+
</div>
|
28
|
+
);
|
29
|
+
}
|
@@ -0,0 +1,146 @@
|
|
1
|
+
import React from 'react';
|
2
|
+
import { FileCheck } from 'lucide-react';
|
3
|
+
import { SearchableSelect } from '../../SearchableSelect';
|
4
|
+
|
5
|
+
interface FileConfig {
|
6
|
+
path: string;
|
7
|
+
type: 'train' | 'test' | 'valid';
|
8
|
+
}
|
9
|
+
|
10
|
+
interface PredefinedSplitConfig {
|
11
|
+
splitter_type: 'predefined';
|
12
|
+
train_files: string[];
|
13
|
+
test_files: string[];
|
14
|
+
valid_files: string[];
|
15
|
+
}
|
16
|
+
|
17
|
+
interface PredefinedSplitterProps {
|
18
|
+
attributes: PredefinedSplitConfig;
|
19
|
+
available_files: string[];
|
20
|
+
onChange: (attributes: PredefinedSplitConfig) => void;
|
21
|
+
}
|
22
|
+
|
23
|
+
export function PredefinedSplitter({ attributes, available_files, onChange }: PredefinedSplitterProps) {
|
24
|
+
const [selectedFiles, setSelectedFiles] = React.useState<FileConfig[]>([]);
|
25
|
+
|
26
|
+
// Convert attributes to FileConfig array for UI
|
27
|
+
React.useEffect(() => {
|
28
|
+
const files: FileConfig[] = [
|
29
|
+
...attributes.train_files.map(path => ({ path, type: 'train' as const })),
|
30
|
+
...attributes.test_files.map(path => ({ path, type: 'test' as const })),
|
31
|
+
...attributes.valid_files.map(path => ({ path, type: 'valid' as const }))
|
32
|
+
];
|
33
|
+
setSelectedFiles(files);
|
34
|
+
}, [attributes.train_files, attributes.test_files, attributes.valid_files]);
|
35
|
+
|
36
|
+
const addFile = (path: string) => {
|
37
|
+
const newFiles = [...selectedFiles, { path, type: 'train' }];
|
38
|
+
setSelectedFiles(newFiles);
|
39
|
+
updateAttributes(newFiles);
|
40
|
+
};
|
41
|
+
|
42
|
+
const updateFileType = (index: number, type: 'train' | 'test' | 'valid') => {
|
43
|
+
const newFiles = selectedFiles.map((file, i) =>
|
44
|
+
i === index ? { ...file, type } : file
|
45
|
+
);
|
46
|
+
setSelectedFiles(newFiles);
|
47
|
+
updateAttributes(newFiles);
|
48
|
+
};
|
49
|
+
|
50
|
+
const removeFile = (index: number) => {
|
51
|
+
const newFiles = selectedFiles.filter((_, i) => i !== index);
|
52
|
+
setSelectedFiles(newFiles);
|
53
|
+
updateAttributes(newFiles);
|
54
|
+
};
|
55
|
+
|
56
|
+
const updateAttributes = (files: FileConfig[]) => {
|
57
|
+
onChange({
|
58
|
+
splitter_type: 'predefined',
|
59
|
+
train_files: files.filter(f => f.type === 'train').map(f => f.path),
|
60
|
+
test_files: files.filter(f => f.type === 'test').map(f => f.path),
|
61
|
+
valid_files: files.filter(f => f.type === 'valid').map(f => f.path)
|
62
|
+
});
|
63
|
+
};
|
64
|
+
|
65
|
+
const unusedFiles = available_files.filter(
|
66
|
+
path => !selectedFiles.find(f => f.path === path)
|
67
|
+
);
|
68
|
+
|
69
|
+
return (
|
70
|
+
<div className="space-y-4">
|
71
|
+
{/* File Selection */}
|
72
|
+
<div>
|
73
|
+
<label className="block text-sm font-medium text-gray-700">Add File</label>
|
74
|
+
<SearchableSelect
|
75
|
+
options={unusedFiles.map(path => ({
|
76
|
+
value: path,
|
77
|
+
label: path.split('/').pop() || path,
|
78
|
+
description: path
|
79
|
+
}))}
|
80
|
+
value={null}
|
81
|
+
onChange={(value) => addFile(value as string)}
|
82
|
+
placeholder="Select a file..."
|
83
|
+
/>
|
84
|
+
</div>
|
85
|
+
|
86
|
+
{/* Selected files */}
|
87
|
+
{selectedFiles.length > 0 ? (
|
88
|
+
<div className="space-y-2">
|
89
|
+
{selectedFiles.map((file, index) => (
|
90
|
+
<div
|
91
|
+
key={file.path}
|
92
|
+
className="flex items-center justify-between p-3 bg-gray-50 rounded-lg"
|
93
|
+
>
|
94
|
+
<div className="flex items-center gap-2 min-w-0">
|
95
|
+
<FileCheck className="w-4 h-4 text-gray-400 flex-shrink-0" />
|
96
|
+
<span className="text-sm text-gray-900 truncate">
|
97
|
+
{file.path.split('/').pop()}
|
98
|
+
</span>
|
99
|
+
</div>
|
100
|
+
<div className="flex items-center gap-2">
|
101
|
+
<select
|
102
|
+
value={file.type}
|
103
|
+
onChange={(e) => updateFileType(index, e.target.value as 'train' | 'test' | 'valid')}
|
104
|
+
className="text-sm rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500"
|
105
|
+
>
|
106
|
+
<option value="train">Training Set</option>
|
107
|
+
<option value="test">Test Set</option>
|
108
|
+
<option value="valid">Validation Set</option>
|
109
|
+
</select>
|
110
|
+
<button
|
111
|
+
onClick={() => removeFile(index)}
|
112
|
+
className="text-sm text-red-600 hover:text-red-700"
|
113
|
+
>
|
114
|
+
Remove
|
115
|
+
</button>
|
116
|
+
</div>
|
117
|
+
</div>
|
118
|
+
))}
|
119
|
+
</div>
|
120
|
+
) : (
|
121
|
+
<div className="text-center py-4 bg-gray-50 rounded-lg border-2 border-dashed border-gray-200">
|
122
|
+
<p className="text-sm text-gray-500">
|
123
|
+
Select files to create your train/test/validation splits
|
124
|
+
</p>
|
125
|
+
</div>
|
126
|
+
)}
|
127
|
+
|
128
|
+
{/* Validation messages */}
|
129
|
+
{selectedFiles.length > 0 && (
|
130
|
+
<div className="space-y-1 text-sm">
|
131
|
+
{!selectedFiles.some(f => f.type === 'train') && (
|
132
|
+
<p className="text-yellow-600">
|
133
|
+
• You need at least one training set file
|
134
|
+
</p>
|
135
|
+
)}
|
136
|
+
{!selectedFiles.some(f => f.type === 'test') && (
|
137
|
+
<p className="text-yellow-600">
|
138
|
+
• You need at least one test set file
|
139
|
+
</p>
|
140
|
+
)}
|
141
|
+
</div>
|
142
|
+
)}
|
143
|
+
|
144
|
+
</div>
|
145
|
+
);
|
146
|
+
}
|
@@ -0,0 +1,85 @@
|
|
1
|
+
import React from 'react';
|
2
|
+
import { Info } from 'lucide-react';
|
3
|
+
import type { RandomSplitConfig } from '../types';
|
4
|
+
|
5
|
+
interface RandomSplitterProps {
|
6
|
+
attributes: RandomSplitConfig;
|
7
|
+
onChange: (attributes: RandomSplitConfig) => void;
|
8
|
+
}
|
9
|
+
|
10
|
+
export function RandomSplitter({ attributes, onChange }: RandomSplitterProps) {
|
11
|
+
return (
|
12
|
+
<div className="space-y-4">
|
13
|
+
<div className="flex items-start gap-2">
|
14
|
+
<Info className="w-5 h-5 text-blue-500 mt-0.5" />
|
15
|
+
<p className="text-sm text-blue-700">
|
16
|
+
Random splitting will automatically split your data into 60% training, 20% test, and 20% validation sets.
|
17
|
+
</p>
|
18
|
+
</div>
|
19
|
+
|
20
|
+
<div className="grid grid-cols-3 gap-4">
|
21
|
+
<div>
|
22
|
+
<label htmlFor="train_ratio" className="block text-sm font-medium text-gray-700">
|
23
|
+
Training Ratio
|
24
|
+
</label>
|
25
|
+
<input
|
26
|
+
type="number"
|
27
|
+
id="train_ratio"
|
28
|
+
value={attributes.train_ratio ?? 0.6}
|
29
|
+
onChange={(e) => onChange({ ...attributes, train_ratio: parseFloat(e.target.value) })}
|
30
|
+
className="mt-1 p-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500 sm:text-sm"
|
31
|
+
min="0"
|
32
|
+
max="1"
|
33
|
+
step="0.1"
|
34
|
+
/>
|
35
|
+
</div>
|
36
|
+
|
37
|
+
<div>
|
38
|
+
<label htmlFor="test_ratio" className="block text-sm font-medium text-gray-700">
|
39
|
+
Test Ratio
|
40
|
+
</label>
|
41
|
+
<input
|
42
|
+
type="number"
|
43
|
+
id="test_ratio"
|
44
|
+
value={attributes.test_ratio ?? 0.2}
|
45
|
+
onChange={(e) => onChange({ ...attributes, test_ratio: parseFloat(e.target.value) })}
|
46
|
+
className="mt-1 p-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500 sm:text-sm"
|
47
|
+
min="0"
|
48
|
+
max="1"
|
49
|
+
step="0.1"
|
50
|
+
/>
|
51
|
+
</div>
|
52
|
+
|
53
|
+
<div>
|
54
|
+
<label htmlFor="valid_ratio" className="block text-sm font-medium text-gray-700">
|
55
|
+
Validation Ratio
|
56
|
+
</label>
|
57
|
+
<input
|
58
|
+
type="number"
|
59
|
+
id="valid_ratio"
|
60
|
+
value={attributes.valid_ratio ?? 0.2}
|
61
|
+
onChange={(e) => onChange({ ...attributes, valid_ratio: parseFloat(e.target.value) })}
|
62
|
+
className="mt-1 p-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500 sm:text-sm"
|
63
|
+
min="0"
|
64
|
+
max="1"
|
65
|
+
step="0.1"
|
66
|
+
/>
|
67
|
+
</div>
|
68
|
+
</div>
|
69
|
+
|
70
|
+
<div>
|
71
|
+
<label htmlFor="seed" className="block text-sm font-medium text-gray-700">
|
72
|
+
Random Seed (optional)
|
73
|
+
</label>
|
74
|
+
<input
|
75
|
+
type="number"
|
76
|
+
id="seed"
|
77
|
+
value={attributes.seed ?? ''}
|
78
|
+
onChange={(e) => onChange({ ...attributes, seed: e.target.value ? parseInt(e.target.value) : undefined })}
|
79
|
+
className="mt-1 p-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500 sm:text-sm"
|
80
|
+
placeholder="Enter a random seed"
|
81
|
+
/>
|
82
|
+
</div>
|
83
|
+
</div>
|
84
|
+
);
|
85
|
+
}
|
@@ -0,0 +1,79 @@
|
|
1
|
+
import React from 'react';
|
2
|
+
import { SearchableSelect } from '../../SearchableSelect';
|
3
|
+
|
4
|
+
interface StratifiedSplitterProps {
|
5
|
+
targetColumn: string;
|
6
|
+
testSize: number;
|
7
|
+
validSize: number;
|
8
|
+
columns: Array<{ name: string; type: string }>;
|
9
|
+
onChange: (config: { targetColumn: string; testSize: number; validSize: number }) => void;
|
10
|
+
}
|
11
|
+
|
12
|
+
export function StratifiedSplitter({
|
13
|
+
targetColumn,
|
14
|
+
testSize,
|
15
|
+
validSize,
|
16
|
+
columns,
|
17
|
+
onChange
|
18
|
+
}: StratifiedSplitterProps) {
|
19
|
+
return (
|
20
|
+
<div className="space-y-4">
|
21
|
+
<div>
|
22
|
+
<label className="block text-sm font-medium text-gray-700 mb-1">
|
23
|
+
Target Column
|
24
|
+
</label>
|
25
|
+
<SearchableSelect
|
26
|
+
options={columns.map(col => ({
|
27
|
+
value: col.name,
|
28
|
+
label: col.name,
|
29
|
+
description: `Type: ${col.type}`
|
30
|
+
}))}
|
31
|
+
value={targetColumn}
|
32
|
+
onChange={(value) => onChange({
|
33
|
+
targetColumn: value as string,
|
34
|
+
testSize,
|
35
|
+
validSize
|
36
|
+
})}
|
37
|
+
placeholder="Select target column..."
|
38
|
+
/>
|
39
|
+
</div>
|
40
|
+
|
41
|
+
<div className="grid grid-cols-2 gap-4">
|
42
|
+
<div>
|
43
|
+
<label className="block text-sm font-medium text-gray-700 mb-1">
|
44
|
+
Test Set Size (%)
|
45
|
+
</label>
|
46
|
+
<input
|
47
|
+
type="number"
|
48
|
+
min={1}
|
49
|
+
max={40}
|
50
|
+
value={testSize}
|
51
|
+
onChange={(e) => onChange({
|
52
|
+
targetColumn,
|
53
|
+
testSize: parseInt(e.target.value) || 0,
|
54
|
+
validSize
|
55
|
+
})}
|
56
|
+
className="mt-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500"
|
57
|
+
/>
|
58
|
+
</div>
|
59
|
+
<div>
|
60
|
+
<label className="block text-sm font-medium text-gray-700 mb-1">
|
61
|
+
Validation Set Size (%)
|
62
|
+
</label>
|
63
|
+
<input
|
64
|
+
type="number"
|
65
|
+
min={1}
|
66
|
+
max={40}
|
67
|
+
value={validSize}
|
68
|
+
onChange={(e) => onChange({
|
69
|
+
targetColumn,
|
70
|
+
testSize,
|
71
|
+
validSize: parseInt(e.target.value) || 0
|
72
|
+
})}
|
73
|
+
className="mt-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500"
|
74
|
+
/>
|
75
|
+
</div>
|
76
|
+
</div>
|
77
|
+
</div>
|
78
|
+
);
|
79
|
+
}
|
@@ -0,0 +1,77 @@
|
|
1
|
+
export const SPLITTER_OPTIONS = [
|
2
|
+
{
|
3
|
+
value: 'date',
|
4
|
+
label: 'Date Split',
|
5
|
+
description: 'Split data based on a date/time column'
|
6
|
+
},
|
7
|
+
{
|
8
|
+
value: 'random',
|
9
|
+
label: 'Random Split',
|
10
|
+
description: 'Randomly split data into train/test/validation sets (70/20/10)'
|
11
|
+
},
|
12
|
+
{
|
13
|
+
value: 'predefined',
|
14
|
+
label: 'Predefined Splits',
|
15
|
+
description: 'Use separate files for train/test/validation sets'
|
16
|
+
},
|
17
|
+
// {
|
18
|
+
// value: 'stratified',
|
19
|
+
// label: 'Stratified Shuffle Split',
|
20
|
+
// description: 'Maintain the percentage of samples for each class'
|
21
|
+
// },
|
22
|
+
// {
|
23
|
+
// value: 'stratified_kfold',
|
24
|
+
// label: 'Stratified K-Fold',
|
25
|
+
// description: 'K-fold with preserved class distribution'
|
26
|
+
// },
|
27
|
+
// {
|
28
|
+
// value: 'group_kfold',
|
29
|
+
// label: 'Group K-Fold',
|
30
|
+
// description: 'K-fold ensuring group integrity'
|
31
|
+
// },
|
32
|
+
// {
|
33
|
+
// value: 'group_shuffle',
|
34
|
+
// label: 'Group Shuffle Split',
|
35
|
+
// description: 'Random split respecting group boundaries'
|
36
|
+
// },
|
37
|
+
// {
|
38
|
+
// value: 'leave_p_out',
|
39
|
+
// label: 'Leave P Out',
|
40
|
+
// description: 'Use P samples for testing in each fold'
|
41
|
+
// }
|
42
|
+
] as const;
|
43
|
+
|
44
|
+
export const DEFAULT_CONFIGS = {
|
45
|
+
date: {
|
46
|
+
date_column: '',
|
47
|
+
months_test: 2,
|
48
|
+
months_valid: 1
|
49
|
+
},
|
50
|
+
random: {},
|
51
|
+
predefined: {
|
52
|
+
train_files: [],
|
53
|
+
test_files: [],
|
54
|
+
valid_files: []
|
55
|
+
},
|
56
|
+
stratified: {
|
57
|
+
targetColumn: '',
|
58
|
+
testSize: 20,
|
59
|
+
validSize: 10
|
60
|
+
},
|
61
|
+
stratified_kfold: {
|
62
|
+
targetColumn: '',
|
63
|
+
nSplits: 5
|
64
|
+
},
|
65
|
+
group_kfold: {
|
66
|
+
groupColumn: '',
|
67
|
+
nSplits: 5
|
68
|
+
},
|
69
|
+
group_shuffle: {
|
70
|
+
groupColumn: '',
|
71
|
+
testSize: 20,
|
72
|
+
validSize: 10
|
73
|
+
},
|
74
|
+
leave_p_out: {
|
75
|
+
p: 1
|
76
|
+
}
|
77
|
+
} as const;
|