easy_ml 0.1.4 → 0.2.0.pre.rc1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +234 -26
- data/Rakefile +45 -0
- data/app/controllers/easy_ml/application_controller.rb +67 -0
- data/app/controllers/easy_ml/columns_controller.rb +38 -0
- data/app/controllers/easy_ml/datasets_controller.rb +156 -0
- data/app/controllers/easy_ml/datasources_controller.rb +88 -0
- data/app/controllers/easy_ml/deploys_controller.rb +20 -0
- data/app/controllers/easy_ml/models_controller.rb +151 -0
- data/app/controllers/easy_ml/retraining_runs_controller.rb +19 -0
- data/app/controllers/easy_ml/settings_controller.rb +59 -0
- data/app/frontend/components/AlertProvider.tsx +108 -0
- data/app/frontend/components/DatasetPreview.tsx +161 -0
- data/app/frontend/components/EmptyState.tsx +28 -0
- data/app/frontend/components/ModelCard.tsx +255 -0
- data/app/frontend/components/ModelDetails.tsx +334 -0
- data/app/frontend/components/ModelForm.tsx +384 -0
- data/app/frontend/components/Navigation.tsx +300 -0
- data/app/frontend/components/Pagination.tsx +72 -0
- data/app/frontend/components/Popover.tsx +55 -0
- data/app/frontend/components/PredictionStream.tsx +105 -0
- data/app/frontend/components/ScheduleModal.tsx +726 -0
- data/app/frontend/components/SearchInput.tsx +23 -0
- data/app/frontend/components/SearchableSelect.tsx +132 -0
- data/app/frontend/components/dataset/AutosaveIndicator.tsx +39 -0
- data/app/frontend/components/dataset/ColumnConfigModal.tsx +431 -0
- data/app/frontend/components/dataset/ColumnFilters.tsx +256 -0
- data/app/frontend/components/dataset/ColumnList.tsx +101 -0
- data/app/frontend/components/dataset/FeatureConfigPopover.tsx +57 -0
- data/app/frontend/components/dataset/FeaturePicker.tsx +205 -0
- data/app/frontend/components/dataset/PreprocessingConfig.tsx +704 -0
- data/app/frontend/components/dataset/SplitConfigurator.tsx +120 -0
- data/app/frontend/components/dataset/splitters/DateSplitter.tsx +58 -0
- data/app/frontend/components/dataset/splitters/KFoldSplitter.tsx +68 -0
- data/app/frontend/components/dataset/splitters/LeavePOutSplitter.tsx +29 -0
- data/app/frontend/components/dataset/splitters/PredefinedSplitter.tsx +146 -0
- data/app/frontend/components/dataset/splitters/RandomSplitter.tsx +85 -0
- data/app/frontend/components/dataset/splitters/StratifiedSplitter.tsx +79 -0
- data/app/frontend/components/dataset/splitters/constants.ts +77 -0
- data/app/frontend/components/dataset/splitters/types.ts +168 -0
- data/app/frontend/components/dataset/splitters/utils.ts +53 -0
- data/app/frontend/components/features/CodeEditor.tsx +46 -0
- data/app/frontend/components/features/DataPreview.tsx +150 -0
- data/app/frontend/components/features/FeatureCard.tsx +88 -0
- data/app/frontend/components/features/FeatureForm.tsx +235 -0
- data/app/frontend/components/features/FeatureGroupCard.tsx +54 -0
- data/app/frontend/components/settings/PluginSettings.tsx +81 -0
- data/app/frontend/components/ui/badge.tsx +44 -0
- data/app/frontend/components/ui/collapsible.tsx +9 -0
- data/app/frontend/components/ui/scroll-area.tsx +46 -0
- data/app/frontend/components/ui/separator.tsx +29 -0
- data/app/frontend/entrypoints/App.tsx +40 -0
- data/app/frontend/entrypoints/Application.tsx +24 -0
- data/app/frontend/hooks/useAutosave.ts +61 -0
- data/app/frontend/layouts/Layout.tsx +38 -0
- data/app/frontend/lib/utils.ts +6 -0
- data/app/frontend/mockData.ts +272 -0
- data/app/frontend/pages/DatasetDetailsPage.tsx +103 -0
- data/app/frontend/pages/DatasetsPage.tsx +261 -0
- data/app/frontend/pages/DatasourceFormPage.tsx +147 -0
- data/app/frontend/pages/DatasourcesPage.tsx +261 -0
- data/app/frontend/pages/EditModelPage.tsx +45 -0
- data/app/frontend/pages/EditTransformationPage.tsx +56 -0
- data/app/frontend/pages/ModelsPage.tsx +115 -0
- data/app/frontend/pages/NewDatasetPage.tsx +366 -0
- data/app/frontend/pages/NewModelPage.tsx +45 -0
- data/app/frontend/pages/NewTransformationPage.tsx +43 -0
- data/app/frontend/pages/SettingsPage.tsx +272 -0
- data/app/frontend/pages/ShowModelPage.tsx +30 -0
- data/app/frontend/pages/TransformationsPage.tsx +95 -0
- data/app/frontend/styles/application.css +100 -0
- data/app/frontend/types/dataset.ts +146 -0
- data/app/frontend/types/datasource.ts +33 -0
- data/app/frontend/types/preprocessing.ts +1 -0
- data/app/frontend/types.ts +113 -0
- data/app/helpers/easy_ml/application_helper.rb +10 -0
- data/app/jobs/easy_ml/application_job.rb +21 -0
- data/app/jobs/easy_ml/batch_job.rb +46 -0
- data/app/jobs/easy_ml/compute_feature_job.rb +19 -0
- data/app/jobs/easy_ml/deploy_job.rb +13 -0
- data/app/jobs/easy_ml/finalize_feature_job.rb +15 -0
- data/app/jobs/easy_ml/refresh_dataset_job.rb +32 -0
- data/app/jobs/easy_ml/schedule_retraining_job.rb +11 -0
- data/app/jobs/easy_ml/sync_datasource_job.rb +17 -0
- data/app/jobs/easy_ml/training_job.rb +62 -0
- data/app/models/easy_ml/adapters/base_adapter.rb +45 -0
- data/app/models/easy_ml/adapters/polars_adapter.rb +77 -0
- data/app/models/easy_ml/cleaner.rb +82 -0
- data/app/models/easy_ml/column.rb +124 -0
- data/app/models/easy_ml/column_history.rb +30 -0
- data/app/models/easy_ml/column_list.rb +122 -0
- data/app/models/easy_ml/concerns/configurable.rb +61 -0
- data/app/models/easy_ml/concerns/versionable.rb +19 -0
- data/app/models/easy_ml/dataset.rb +767 -0
- data/app/models/easy_ml/dataset_history.rb +56 -0
- data/app/models/easy_ml/datasource.rb +182 -0
- data/app/models/easy_ml/datasource_history.rb +24 -0
- data/app/models/easy_ml/datasources/base_datasource.rb +54 -0
- data/app/models/easy_ml/datasources/file_datasource.rb +58 -0
- data/app/models/easy_ml/datasources/polars_datasource.rb +89 -0
- data/app/models/easy_ml/datasources/s3_datasource.rb +97 -0
- data/app/models/easy_ml/deploy.rb +114 -0
- data/app/models/easy_ml/event.rb +79 -0
- data/app/models/easy_ml/feature.rb +437 -0
- data/app/models/easy_ml/feature_history.rb +38 -0
- data/app/models/easy_ml/model.rb +575 -41
- data/app/models/easy_ml/model_file.rb +133 -0
- data/app/models/easy_ml/model_file_history.rb +24 -0
- data/app/models/easy_ml/model_history.rb +51 -0
- data/app/models/easy_ml/models/base_model.rb +58 -0
- data/app/models/easy_ml/models/hyperparameters/base.rb +99 -0
- data/app/models/easy_ml/models/hyperparameters/xgboost/dart.rb +82 -0
- data/app/models/easy_ml/models/hyperparameters/xgboost/gblinear.rb +82 -0
- data/app/models/easy_ml/models/hyperparameters/xgboost/gbtree.rb +97 -0
- data/app/models/easy_ml/models/hyperparameters/xgboost.rb +71 -0
- data/app/models/easy_ml/models/xgboost/evals_callback.rb +138 -0
- data/app/models/easy_ml/models/xgboost/progress_callback.rb +39 -0
- data/app/models/easy_ml/models/xgboost.rb +544 -5
- data/app/models/easy_ml/prediction.rb +44 -0
- data/app/models/easy_ml/retraining_job.rb +278 -0
- data/app/models/easy_ml/retraining_run.rb +184 -0
- data/app/models/easy_ml/settings.rb +37 -0
- data/app/models/easy_ml/splitter.rb +90 -0
- data/app/models/easy_ml/splitters/base_splitter.rb +28 -0
- data/app/models/easy_ml/splitters/date_splitter.rb +91 -0
- data/app/models/easy_ml/splitters/predefined_splitter.rb +74 -0
- data/app/models/easy_ml/splitters/random_splitter.rb +82 -0
- data/app/models/easy_ml/tuner_job.rb +56 -0
- data/app/models/easy_ml/tuner_run.rb +31 -0
- data/app/models/splitter_history.rb +6 -0
- data/app/serializers/easy_ml/column_serializer.rb +27 -0
- data/app/serializers/easy_ml/dataset_serializer.rb +73 -0
- data/app/serializers/easy_ml/datasource_serializer.rb +64 -0
- data/app/serializers/easy_ml/feature_serializer.rb +27 -0
- data/app/serializers/easy_ml/model_serializer.rb +90 -0
- data/app/serializers/easy_ml/retraining_job_serializer.rb +22 -0
- data/app/serializers/easy_ml/retraining_run_serializer.rb +39 -0
- data/app/serializers/easy_ml/settings_serializer.rb +9 -0
- data/app/views/layouts/easy_ml/application.html.erb +15 -0
- data/config/initializers/resque.rb +3 -0
- data/config/resque-pool.yml +6 -0
- data/config/routes.rb +39 -0
- data/config/spring.rb +1 -0
- data/config/vite.json +15 -0
- data/lib/easy_ml/configuration.rb +64 -0
- data/lib/easy_ml/core/evaluators/base_evaluator.rb +53 -0
- data/lib/easy_ml/core/evaluators/classification_evaluators.rb +126 -0
- data/lib/easy_ml/core/evaluators/regression_evaluators.rb +66 -0
- data/lib/easy_ml/core/model_evaluator.rb +161 -89
- data/lib/easy_ml/core/tuner/adapters/base_adapter.rb +28 -18
- data/lib/easy_ml/core/tuner/adapters/xgboost_adapter.rb +4 -25
- data/lib/easy_ml/core/tuner.rb +123 -62
- data/lib/easy_ml/core.rb +0 -3
- data/lib/easy_ml/core_ext/hash.rb +24 -0
- data/lib/easy_ml/core_ext/pathname.rb +11 -5
- data/lib/easy_ml/data/date_converter.rb +90 -0
- data/lib/easy_ml/data/filter_extensions.rb +31 -0
- data/lib/easy_ml/data/polars_column.rb +126 -0
- data/lib/easy_ml/data/polars_reader.rb +297 -0
- data/lib/easy_ml/data/preprocessor.rb +280 -142
- data/lib/easy_ml/data/simple_imputer.rb +255 -0
- data/lib/easy_ml/data/splits/file_split.rb +252 -0
- data/lib/easy_ml/data/splits/in_memory_split.rb +54 -0
- data/lib/easy_ml/data/splits/split.rb +95 -0
- data/lib/easy_ml/data/splits.rb +9 -0
- data/lib/easy_ml/data/statistics_learner.rb +93 -0
- data/lib/easy_ml/data/synced_directory.rb +341 -0
- data/lib/easy_ml/data.rb +6 -2
- data/lib/easy_ml/engine.rb +105 -6
- data/lib/easy_ml/feature_store.rb +227 -0
- data/lib/easy_ml/features.rb +61 -0
- data/lib/easy_ml/initializers/inflections.rb +17 -3
- data/lib/easy_ml/logging.rb +2 -2
- data/lib/easy_ml/predict.rb +74 -0
- data/lib/easy_ml/railtie/generators/migration/migration_generator.rb +192 -36
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_column_histories.rb.tt +9 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_columns.rb.tt +25 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_dataset_histories.rb.tt +9 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_datasets.rb.tt +31 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_datasource_histories.rb.tt +9 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_datasources.rb.tt +16 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_deploys.rb.tt +24 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_events.rb.tt +20 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_feature_histories.rb.tt +14 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_features.rb.tt +32 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_model_file_histories.rb.tt +9 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_model_files.rb.tt +17 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_model_histories.rb.tt +9 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_models.rb.tt +20 -9
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_predictions.rb.tt +17 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_retraining_jobs.rb.tt +77 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_settings.rb.tt +9 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_splitter_histories.rb.tt +9 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_splitters.rb.tt +15 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_tuner_jobs.rb.tt +40 -0
- data/lib/easy_ml/support/est.rb +5 -1
- data/lib/easy_ml/support/file_rotate.rb +79 -15
- data/lib/easy_ml/support/file_support.rb +9 -0
- data/lib/easy_ml/support/local_file.rb +24 -0
- data/lib/easy_ml/support/lockable.rb +62 -0
- data/lib/easy_ml/support/synced_file.rb +103 -0
- data/lib/easy_ml/support/utc.rb +5 -1
- data/lib/easy_ml/support.rb +6 -3
- data/lib/easy_ml/version.rb +4 -1
- data/lib/easy_ml.rb +7 -2
- metadata +355 -72
- data/app/models/easy_ml/models.rb +0 -5
- data/lib/easy_ml/core/model.rb +0 -30
- data/lib/easy_ml/core/model_core.rb +0 -181
- data/lib/easy_ml/core/models/hyperparameters/base.rb +0 -34
- data/lib/easy_ml/core/models/hyperparameters/xgboost.rb +0 -19
- data/lib/easy_ml/core/models/xgboost.rb +0 -10
- data/lib/easy_ml/core/models/xgboost_core.rb +0 -220
- data/lib/easy_ml/core/models.rb +0 -10
- data/lib/easy_ml/core/uploaders/model_uploader.rb +0 -24
- data/lib/easy_ml/core/uploaders.rb +0 -7
- data/lib/easy_ml/data/dataloader.rb +0 -6
- data/lib/easy_ml/data/dataset/data/preprocessor/statistics.json +0 -31
- data/lib/easy_ml/data/dataset/data/sample_info.json +0 -1
- data/lib/easy_ml/data/dataset/dataset/files/sample_info.json +0 -1
- data/lib/easy_ml/data/dataset/splits/file_split.rb +0 -140
- data/lib/easy_ml/data/dataset/splits/in_memory_split.rb +0 -49
- data/lib/easy_ml/data/dataset/splits/split.rb +0 -98
- data/lib/easy_ml/data/dataset/splits.rb +0 -11
- data/lib/easy_ml/data/dataset/splitters/date_splitter.rb +0 -43
- data/lib/easy_ml/data/dataset/splitters.rb +0 -9
- data/lib/easy_ml/data/dataset.rb +0 -430
- data/lib/easy_ml/data/datasource/datasource_factory.rb +0 -60
- data/lib/easy_ml/data/datasource/file_datasource.rb +0 -40
- data/lib/easy_ml/data/datasource/merged_datasource.rb +0 -64
- data/lib/easy_ml/data/datasource/polars_datasource.rb +0 -41
- data/lib/easy_ml/data/datasource/s3_datasource.rb +0 -89
- data/lib/easy_ml/data/datasource.rb +0 -33
- data/lib/easy_ml/data/preprocessor/preprocessor.rb +0 -205
- data/lib/easy_ml/data/preprocessor/simple_imputer.rb +0 -402
- data/lib/easy_ml/deployment.rb +0 -5
- data/lib/easy_ml/support/synced_directory.rb +0 -134
- data/lib/easy_ml/transforms.rb +0 -29
- /data/{lib/easy_ml/core → app/models/easy_ml}/models/hyperparameters.rb +0 -0
@@ -0,0 +1,272 @@
|
|
1
|
+
import type { Model, RetrainingJob, RetrainingRun, Dataset, Prediction, Feature, FeatureGroup } from './types';
|
2
|
+
|
3
|
+
// Helper function to generate dates
|
4
|
+
const daysAgo = (days: number) => {
|
5
|
+
const date = new Date();
|
6
|
+
date.setDate(date.getDate() - days);
|
7
|
+
return date.toISOString();
|
8
|
+
};
|
9
|
+
|
10
|
+
export const mockDatasets: Dataset[] = [
|
11
|
+
{
|
12
|
+
id: 1,
|
13
|
+
name: 'Customer Churn Dataset',
|
14
|
+
description: 'Historical customer data for churn prediction',
|
15
|
+
columns: [
|
16
|
+
{
|
17
|
+
name: 'usage_days',
|
18
|
+
type: 'numeric',
|
19
|
+
description: 'Number of days customer has used the product',
|
20
|
+
statistics: {
|
21
|
+
mean: 145.7,
|
22
|
+
median: 130,
|
23
|
+
min: 1,
|
24
|
+
max: 365,
|
25
|
+
nullCount: 0
|
26
|
+
}
|
27
|
+
},
|
28
|
+
{
|
29
|
+
name: 'total_spend',
|
30
|
+
type: 'numeric',
|
31
|
+
description: 'Total customer spend in USD',
|
32
|
+
statistics: {
|
33
|
+
mean: 487.32,
|
34
|
+
median: 425.50,
|
35
|
+
min: 0,
|
36
|
+
max: 2500.00,
|
37
|
+
nullCount: 1250
|
38
|
+
}
|
39
|
+
},
|
40
|
+
{
|
41
|
+
name: 'support_tickets',
|
42
|
+
type: 'numeric',
|
43
|
+
description: 'Number of support tickets opened',
|
44
|
+
statistics: {
|
45
|
+
mean: 2.3,
|
46
|
+
median: 1,
|
47
|
+
min: 0,
|
48
|
+
max: 15,
|
49
|
+
nullCount: 3750
|
50
|
+
}
|
51
|
+
},
|
52
|
+
{
|
53
|
+
name: 'subscription_tier',
|
54
|
+
type: 'categorical',
|
55
|
+
description: 'Customer subscription level',
|
56
|
+
statistics: {
|
57
|
+
uniqueCount: 3,
|
58
|
+
nullCount: 125
|
59
|
+
}
|
60
|
+
},
|
61
|
+
{
|
62
|
+
name: 'last_login',
|
63
|
+
type: 'datetime',
|
64
|
+
description: 'Last time the customer logged in',
|
65
|
+
statistics: {
|
66
|
+
nullCount: 5000
|
67
|
+
}
|
68
|
+
}
|
69
|
+
],
|
70
|
+
sampleData: [
|
71
|
+
{
|
72
|
+
usage_days: 234,
|
73
|
+
total_spend: 567.89,
|
74
|
+
support_tickets: 1,
|
75
|
+
subscription_tier: 'premium',
|
76
|
+
last_login: '2024-03-01'
|
77
|
+
},
|
78
|
+
{
|
79
|
+
usage_days: 45,
|
80
|
+
total_spend: null,
|
81
|
+
support_tickets: null,
|
82
|
+
subscription_tier: 'basic',
|
83
|
+
last_login: null
|
84
|
+
}
|
85
|
+
],
|
86
|
+
rowCount: 25000,
|
87
|
+
updatedAt: '2024-03-10T12:00:00Z'
|
88
|
+
}
|
89
|
+
];
|
90
|
+
|
91
|
+
export const mockModels: Model[] = [
|
92
|
+
{
|
93
|
+
id: 1,
|
94
|
+
name: 'Customer Churn Predictor',
|
95
|
+
modelType: 'classification',
|
96
|
+
status: 'completed',
|
97
|
+
deploymentStatus: 'inference',
|
98
|
+
deployed: true,
|
99
|
+
datasetId: 1,
|
100
|
+
configuration: {
|
101
|
+
algorithm: 'xgboost',
|
102
|
+
features: ['usage_days', 'total_spend', 'support_tickets'],
|
103
|
+
objective: 'binary:logistic',
|
104
|
+
metrics: ['accuracy', 'f1']
|
105
|
+
},
|
106
|
+
version: '2.1.0',
|
107
|
+
rootDir: '/models/churn_predictor',
|
108
|
+
file: { path: 'model.joblib' },
|
109
|
+
createdAt: daysAgo(30),
|
110
|
+
updatedAt: daysAgo(0)
|
111
|
+
}
|
112
|
+
];
|
113
|
+
|
114
|
+
export const mockRetrainingJobs: RetrainingJob[] = [
|
115
|
+
{
|
116
|
+
id: 1,
|
117
|
+
model: 'Customer Churn Predictor',
|
118
|
+
frequency: 'daily',
|
119
|
+
at: 2,
|
120
|
+
evaluator: {
|
121
|
+
metric: 'f1_score',
|
122
|
+
threshold: 0.85,
|
123
|
+
direction: 'maximize'
|
124
|
+
},
|
125
|
+
tunerConfig: {
|
126
|
+
trials: 10,
|
127
|
+
metrics: ['f1_score'],
|
128
|
+
parameters: {
|
129
|
+
max_depth: { min: 3, max: 10 },
|
130
|
+
learning_rate: { min: 0.01, max: 0.1 }
|
131
|
+
}
|
132
|
+
},
|
133
|
+
tuningFrequency: 'weekly',
|
134
|
+
lastTuningAt: daysAgo(7),
|
135
|
+
active: true,
|
136
|
+
status: 'completed',
|
137
|
+
lastRunAt: daysAgo(1),
|
138
|
+
lockedAt: null,
|
139
|
+
createdAt: daysAgo(30),
|
140
|
+
updatedAt: daysAgo(0)
|
141
|
+
}
|
142
|
+
];
|
143
|
+
|
144
|
+
export const mockRetrainingRuns: RetrainingRun[] = [
|
145
|
+
{
|
146
|
+
id: 1,
|
147
|
+
modelId: 1,
|
148
|
+
retrainingJobId: 1,
|
149
|
+
tunerJobId: null,
|
150
|
+
status: 'completed',
|
151
|
+
metricValue: 0.89,
|
152
|
+
threshold: 0.85,
|
153
|
+
thresholdDirection: 'maximize',
|
154
|
+
shoulddeploy: true,
|
155
|
+
startedAt: daysAgo(1),
|
156
|
+
completedAt: daysAgo(1),
|
157
|
+
errorMessage: null,
|
158
|
+
metadata: {
|
159
|
+
metrics: {
|
160
|
+
accuracy: 0.92,
|
161
|
+
precision: 0.88,
|
162
|
+
recall: 0.90,
|
163
|
+
f1: 0.89
|
164
|
+
},
|
165
|
+
parameters: {
|
166
|
+
max_depth: 6,
|
167
|
+
learning_rate: 0.05
|
168
|
+
}
|
169
|
+
},
|
170
|
+
createdAt: daysAgo(1),
|
171
|
+
updatedAt: daysAgo(1)
|
172
|
+
},
|
173
|
+
{
|
174
|
+
id: 2,
|
175
|
+
modelId: 1,
|
176
|
+
retrainingJobId: 1,
|
177
|
+
tunerJobId: 1,
|
178
|
+
status: 'completed',
|
179
|
+
metricValue: 0.86,
|
180
|
+
threshold: 0.85,
|
181
|
+
thresholdDirection: 'maximize',
|
182
|
+
shoulddeploy: true,
|
183
|
+
startedAt: daysAgo(2),
|
184
|
+
completedAt: daysAgo(2),
|
185
|
+
errorMessage: null,
|
186
|
+
metadata: {
|
187
|
+
metrics: {
|
188
|
+
accuracy: 0.90,
|
189
|
+
precision: 0.85,
|
190
|
+
recall: 0.87,
|
191
|
+
f1: 0.86
|
192
|
+
},
|
193
|
+
parameters: {
|
194
|
+
max_depth: 5,
|
195
|
+
learning_rate: 0.03
|
196
|
+
}
|
197
|
+
},
|
198
|
+
createdAt: daysAgo(2),
|
199
|
+
updatedAt: daysAgo(2)
|
200
|
+
},
|
201
|
+
{
|
202
|
+
id: 3,
|
203
|
+
modelId: 1,
|
204
|
+
retrainingJobId: 1,
|
205
|
+
tunerJobId: null,
|
206
|
+
status: 'failed',
|
207
|
+
metricValue: null,
|
208
|
+
threshold: 0.85,
|
209
|
+
thresholdDirection: 'maximize',
|
210
|
+
shoulddeploy: false,
|
211
|
+
startedAt: daysAgo(3),
|
212
|
+
completedAt: daysAgo(3),
|
213
|
+
errorMessage: 'Training failed due to insufficient memory',
|
214
|
+
metadata: null,
|
215
|
+
createdAt: daysAgo(3),
|
216
|
+
updatedAt: daysAgo(3)
|
217
|
+
},
|
218
|
+
{
|
219
|
+
id: 4,
|
220
|
+
modelId: 1,
|
221
|
+
retrainingJobId: 1,
|
222
|
+
tunerJobId: null,
|
223
|
+
status: 'completed',
|
224
|
+
metricValue: 0.83,
|
225
|
+
threshold: 0.85,
|
226
|
+
thresholdDirection: 'maximize',
|
227
|
+
shoulddeploy: false,
|
228
|
+
startedAt: daysAgo(4),
|
229
|
+
completedAt: daysAgo(4),
|
230
|
+
errorMessage: null,
|
231
|
+
metadata: {
|
232
|
+
metrics: {
|
233
|
+
accuracy: 0.87,
|
234
|
+
precision: 0.82,
|
235
|
+
recall: 0.84,
|
236
|
+
f1: 0.83
|
237
|
+
},
|
238
|
+
parameters: {
|
239
|
+
max_depth: 4,
|
240
|
+
learning_rate: 0.02
|
241
|
+
}
|
242
|
+
},
|
243
|
+
createdAt: daysAgo(4),
|
244
|
+
updatedAt: daysAgo(4)
|
245
|
+
}
|
246
|
+
];
|
247
|
+
|
248
|
+
export const mockFeatures: Feature[] = [
|
249
|
+
{
|
250
|
+
id: 1,
|
251
|
+
name: 'Normalize state',
|
252
|
+
description: 'Turn state names into 2 letter state abbreviations',
|
253
|
+
groupId: 1,
|
254
|
+
testDatasetId: 1,
|
255
|
+
inputColumns: ["state"],
|
256
|
+
outputColumns: ["state"],
|
257
|
+
code: "",
|
258
|
+
createdAt: daysAgo(30),
|
259
|
+
updatedAt: daysAgo(0)
|
260
|
+
}
|
261
|
+
];
|
262
|
+
|
263
|
+
export const mockFeatureGroups: FeatureGroup[] = [
|
264
|
+
{
|
265
|
+
id: 1,
|
266
|
+
name: 'Customer Churn',
|
267
|
+
description: 'Features for customer churn dataset',
|
268
|
+
features: mockFeatures,
|
269
|
+
createdAt: daysAgo(30),
|
270
|
+
updatedAt: daysAgo(0)
|
271
|
+
}
|
272
|
+
];
|
@@ -0,0 +1,103 @@
|
|
1
|
+
import React, { useState, useCallback } from 'react';
|
2
|
+
import { usePage, router } from '@inertiajs/react';
|
3
|
+
import { Settings } from 'lucide-react';
|
4
|
+
import { isEqual } from 'lodash';
|
5
|
+
import { DatasetPreview } from '../components/DatasetPreview';
|
6
|
+
import { ColumnConfigModal } from '../components/dataset/ColumnConfigModal';
|
7
|
+
import type { Dataset, Column } from '../types/dataset';
|
8
|
+
import type { PreprocessingConstants } from '../types';
|
9
|
+
|
10
|
+
interface Props {
|
11
|
+
dataset: Dataset;
|
12
|
+
constants: PreprocessingConstants;
|
13
|
+
}
|
14
|
+
|
15
|
+
export default function DatasetDetailsPage({ dataset, constants }: Props) {
|
16
|
+
const [showColumnConfig, setShowColumnConfig] = useState(false);
|
17
|
+
const [currentDataset, setCurrentDataset] = useState<Dataset>(dataset);
|
18
|
+
const { rootPath } = usePage().props;
|
19
|
+
|
20
|
+
const onSave = useCallback((updatedDataset: Dataset) => {
|
21
|
+
// Find dataset-level changes
|
22
|
+
const datasetChanges = Object.entries(updatedDataset).reduce((acc, [key, value]) => {
|
23
|
+
if (key !== 'columns' && key !== 'features' && !isEqual(currentDataset[key as keyof Dataset], value)) {
|
24
|
+
acc[key as keyof Dataset] = value;
|
25
|
+
}
|
26
|
+
return acc;
|
27
|
+
}, {} as Partial<Dataset>);
|
28
|
+
|
29
|
+
// Find column changes
|
30
|
+
const columnChanges = updatedDataset.columns.reduce((acc, newColumn) => {
|
31
|
+
const oldColumn = currentDataset.columns.find(c => c.id === newColumn.id);
|
32
|
+
|
33
|
+
if (!oldColumn || !isEqual(oldColumn, newColumn)) {
|
34
|
+
const changedFields = Object.entries(newColumn).reduce((fields, [key, value]) => {
|
35
|
+
if (!oldColumn || !isEqual(oldColumn[key as keyof Column], value)) {
|
36
|
+
fields[key] = value;
|
37
|
+
}
|
38
|
+
return fields;
|
39
|
+
}, {} as Record<string, any>);
|
40
|
+
|
41
|
+
if (Object.keys(changedFields).length > 0) {
|
42
|
+
acc[newColumn.id] = {
|
43
|
+
...changedFields,
|
44
|
+
id: newColumn.id
|
45
|
+
};
|
46
|
+
}
|
47
|
+
}
|
48
|
+
return acc;
|
49
|
+
}, {} as Record<number, Record<string, any>>);
|
50
|
+
|
51
|
+
// Format features for nested attributes
|
52
|
+
const transformChanges = updatedDataset.features?.map((feature, index) => ({
|
53
|
+
id: feature.id,
|
54
|
+
name: feature.name,
|
55
|
+
feature_class: feature.feature_class,
|
56
|
+
feature_position: index,
|
57
|
+
_destroy: feature._destroy
|
58
|
+
}));
|
59
|
+
|
60
|
+
// Only make the API call if there are actual changes
|
61
|
+
if (Object.keys(datasetChanges).length > 0 ||
|
62
|
+
Object.keys(columnChanges).length > 0 ||
|
63
|
+
!isEqual(currentDataset.features, updatedDataset.features)) {
|
64
|
+
router.patch(`${rootPath}/datasets/${dataset.id}`, {
|
65
|
+
dataset: {
|
66
|
+
...datasetChanges,
|
67
|
+
columns_attributes: columnChanges,
|
68
|
+
features_attributes: transformChanges
|
69
|
+
}
|
70
|
+
}, {
|
71
|
+
preserveState: true,
|
72
|
+
preserveScroll: true
|
73
|
+
});
|
74
|
+
}
|
75
|
+
|
76
|
+
// Update local state
|
77
|
+
setCurrentDataset(updatedDataset);
|
78
|
+
}, [currentDataset, dataset.id, rootPath]);
|
79
|
+
|
80
|
+
return (
|
81
|
+
<div className="p-8 space-y-6">
|
82
|
+
<div className="flex justify-end">
|
83
|
+
<button
|
84
|
+
onClick={() => setShowColumnConfig(true)}
|
85
|
+
className="flex items-center gap-2 px-4 py-2 bg-white border border-gray-300 rounded-md text-sm font-medium text-gray-700 hover:bg-gray-50"
|
86
|
+
>
|
87
|
+
<Settings className="w-4 h-4" />
|
88
|
+
Configure Columns
|
89
|
+
</button>
|
90
|
+
</div>
|
91
|
+
|
92
|
+
<DatasetPreview dataset={currentDataset} />
|
93
|
+
|
94
|
+
<ColumnConfigModal
|
95
|
+
isOpen={showColumnConfig}
|
96
|
+
onClose={() => setShowColumnConfig(false)}
|
97
|
+
initialDataset={currentDataset}
|
98
|
+
constants={constants}
|
99
|
+
onSave={onSave}
|
100
|
+
/>
|
101
|
+
</div>
|
102
|
+
);
|
103
|
+
}
|
@@ -0,0 +1,261 @@
|
|
1
|
+
import React, { useState, useMemo, useEffect } from 'react';
|
2
|
+
import { Link, usePage, router } from '@inertiajs/react';
|
3
|
+
import { Database, Plus, Trash2, ExternalLink, Loader2, AlertCircle, ChevronDown, ChevronUp } from 'lucide-react';
|
4
|
+
import { EmptyState } from '../components/EmptyState';
|
5
|
+
import { SearchInput } from '../components/SearchInput';
|
6
|
+
import { Pagination } from '../components/Pagination';
|
7
|
+
import { Dataset, DatasetWorkflowStatus, Column } from "@types/dataset";
|
8
|
+
interface Props {
|
9
|
+
datasets: Dataset[];
|
10
|
+
}
|
11
|
+
|
12
|
+
const ITEMS_PER_PAGE = 6;
|
13
|
+
|
14
|
+
const STATUS_STYLES: Record<DatasetWorkflowStatus, { bg: string; text: string; icon: React.ReactNode }> = {
|
15
|
+
analyzing: {
|
16
|
+
bg: 'bg-blue-100',
|
17
|
+
text: 'text-blue-800',
|
18
|
+
icon: <Loader2 className="w-4 h-4 animate-spin" />
|
19
|
+
},
|
20
|
+
ready: {
|
21
|
+
bg: 'bg-green-100',
|
22
|
+
text: 'text-green-800',
|
23
|
+
icon: null
|
24
|
+
},
|
25
|
+
failed: {
|
26
|
+
bg: 'bg-red-100',
|
27
|
+
text: 'text-red-800',
|
28
|
+
icon: <AlertCircle className="w-4 h-4" />
|
29
|
+
},
|
30
|
+
};
|
31
|
+
|
32
|
+
export default function DatasetsPage({ datasets, constants }: Props) {
|
33
|
+
console.log(datasets)
|
34
|
+
const { rootPath } = usePage().props;
|
35
|
+
const [searchQuery, setSearchQuery] = useState('');
|
36
|
+
const [currentPage, setCurrentPage] = useState(1);
|
37
|
+
const [expandedErrors, setExpandedErrors] = useState<number[]>([]);
|
38
|
+
|
39
|
+
const filteredDatasets = useMemo(() => {
|
40
|
+
return datasets.filter(dataset =>
|
41
|
+
dataset.name.toLowerCase().includes(searchQuery.toLowerCase()) ||
|
42
|
+
dataset.description.toLowerCase().includes(searchQuery.toLowerCase())
|
43
|
+
);
|
44
|
+
}, [datasets, searchQuery]);
|
45
|
+
|
46
|
+
const totalPages = Math.ceil(filteredDatasets.length / ITEMS_PER_PAGE);
|
47
|
+
const paginatedDatasets = filteredDatasets.slice(
|
48
|
+
(currentPage - 1) * ITEMS_PER_PAGE,
|
49
|
+
currentPage * ITEMS_PER_PAGE
|
50
|
+
);
|
51
|
+
|
52
|
+
const handleDelete = (datasetId: number) => {
|
53
|
+
if (confirm('Are you sure you want to delete this dataset?')) {
|
54
|
+
router.delete(`${rootPath}/datasets/${datasetId}`);
|
55
|
+
}
|
56
|
+
};
|
57
|
+
|
58
|
+
useEffect(() => {
|
59
|
+
let pollInterval: number | undefined;
|
60
|
+
|
61
|
+
const isAnyAnalyzing = datasets.some(d => d.workflow_status === 'analyzing');
|
62
|
+
|
63
|
+
if (isAnyAnalyzing) {
|
64
|
+
pollInterval = window.setInterval(() => {
|
65
|
+
router.get(window.location.href, {}, {
|
66
|
+
preserveScroll: true,
|
67
|
+
preserveState: true,
|
68
|
+
only: ['datasets']
|
69
|
+
});
|
70
|
+
}, 2000);
|
71
|
+
}
|
72
|
+
|
73
|
+
return () => {
|
74
|
+
if (pollInterval) {
|
75
|
+
window.clearInterval(pollInterval);
|
76
|
+
}
|
77
|
+
};
|
78
|
+
}, [datasets]);
|
79
|
+
|
80
|
+
const toggleError = (id: number) => {
|
81
|
+
setExpandedErrors(prev =>
|
82
|
+
prev.includes(id)
|
83
|
+
? prev.filter(expandedId => expandedId !== id)
|
84
|
+
: [...prev, id]
|
85
|
+
);
|
86
|
+
};
|
87
|
+
|
88
|
+
if (datasets.length === 0) {
|
89
|
+
return (
|
90
|
+
<div className="p-8">
|
91
|
+
<EmptyState
|
92
|
+
icon={Database}
|
93
|
+
title="Create your first dataset"
|
94
|
+
description="Create a dataset to start training your machine learning models"
|
95
|
+
actionLabel="Create Dataset"
|
96
|
+
onAction={() => { router.visit(`${rootPath}/datasets/new`) }}
|
97
|
+
/>
|
98
|
+
</div>
|
99
|
+
);
|
100
|
+
}
|
101
|
+
|
102
|
+
return (
|
103
|
+
<div className="p-8">
|
104
|
+
<div className="space-y-6">
|
105
|
+
<div className="flex justify-between items-center">
|
106
|
+
<div className="flex items-center gap-4">
|
107
|
+
<h2 className="text-xl font-semibold text-gray-900">Datasets</h2>
|
108
|
+
<SearchInput
|
109
|
+
value={searchQuery}
|
110
|
+
onChange={setSearchQuery}
|
111
|
+
placeholder="Search datasets..."
|
112
|
+
/>
|
113
|
+
</div>
|
114
|
+
<Link
|
115
|
+
href={`${rootPath}/datasets/new`}
|
116
|
+
className="inline-flex items-center gap-2 px-4 py-2 bg-blue-600 text-white text-sm font-medium rounded-md hover:bg-blue-700 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-blue-500"
|
117
|
+
>
|
118
|
+
<Plus className="w-4 h-4" />
|
119
|
+
New Dataset
|
120
|
+
</Link>
|
121
|
+
</div>
|
122
|
+
|
123
|
+
{paginatedDatasets.length === 0 ? (
|
124
|
+
<div className="text-center py-12 bg-white rounded-lg shadow">
|
125
|
+
<Database className="mx-auto h-12 w-12 text-gray-400" />
|
126
|
+
<h3 className="mt-2 text-sm font-medium text-gray-900">No datasets found</h3>
|
127
|
+
<p className="mt-1 text-sm text-gray-500">
|
128
|
+
No datasets match your search criteria. Try adjusting your search or create a new dataset.
|
129
|
+
</p>
|
130
|
+
<div className="mt-6">
|
131
|
+
<Link
|
132
|
+
href={`${rootPath}/datasets/new`}
|
133
|
+
className="inline-flex items-center px-4 py-2 border border-transparent shadow-sm text-sm font-medium rounded-md text-white bg-blue-600 hover:bg-blue-700 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-blue-500"
|
134
|
+
>
|
135
|
+
<Plus className="w-4 h-4 mr-2" />
|
136
|
+
New Dataset
|
137
|
+
</Link>
|
138
|
+
</div>
|
139
|
+
</div>
|
140
|
+
) : (
|
141
|
+
<>
|
142
|
+
<div className="grid grid-cols-1 md:grid-cols-2 gap-6">
|
143
|
+
{paginatedDatasets.map((dataset) => (
|
144
|
+
<div
|
145
|
+
key={dataset.id}
|
146
|
+
className="bg-white rounded-lg shadow-md p-6 hover:shadow-lg transition-shadow"
|
147
|
+
>
|
148
|
+
<div className="flex justify-between items-start mb-4">
|
149
|
+
<div className="flex items-start gap-3">
|
150
|
+
<Database className="w-5 h-5 text-blue-600 mt-1" />
|
151
|
+
<div>
|
152
|
+
<div className="flex items-center gap-2">
|
153
|
+
<h3 className="text-lg font-semibold text-gray-900">
|
154
|
+
{dataset.name}
|
155
|
+
</h3>
|
156
|
+
<div className={`inline-flex items-center gap-1 px-2 py-0.5 rounded-full text-xs font-medium ${STATUS_STYLES[dataset.workflow_status].bg} ${STATUS_STYLES[dataset.workflow_status].text}`}>
|
157
|
+
{STATUS_STYLES[dataset.workflow_status].icon}
|
158
|
+
<span>{dataset.workflow_status.charAt(0).toUpperCase() + dataset.workflow_status.slice(1)}</span>
|
159
|
+
</div>
|
160
|
+
</div>
|
161
|
+
<p className="text-sm text-gray-500 mt-1">
|
162
|
+
{dataset.description}
|
163
|
+
</p>
|
164
|
+
</div>
|
165
|
+
</div>
|
166
|
+
<div className="flex gap-2">
|
167
|
+
<Link
|
168
|
+
href={`${rootPath}/datasets/${dataset.id}`}
|
169
|
+
className={`transition-colors ${
|
170
|
+
dataset.workflow_status === 'analyzing'
|
171
|
+
? 'text-gray-300 cursor-not-allowed pointer-events-none'
|
172
|
+
: 'text-gray-400 hover:text-blue-600'
|
173
|
+
}`}
|
174
|
+
title={dataset.workflow_status === 'analyzing' ? 'Dataset is being analyzed' : 'View details'}
|
175
|
+
>
|
176
|
+
<ExternalLink className="w-5 h-5" />
|
177
|
+
</Link>
|
178
|
+
<button
|
179
|
+
className="text-gray-400 hover:text-red-600 transition-colors"
|
180
|
+
title="Delete dataset"
|
181
|
+
onClick={() => handleDelete(dataset.id)}
|
182
|
+
>
|
183
|
+
<Trash2 className="w-5 h-5" />
|
184
|
+
</button>
|
185
|
+
</div>
|
186
|
+
</div>
|
187
|
+
|
188
|
+
<div className="grid grid-cols-2 gap-4 mt-4">
|
189
|
+
<div>
|
190
|
+
<span className="text-sm text-gray-500">Columns</span>
|
191
|
+
<p className="text-sm font-medium text-gray-900">
|
192
|
+
{dataset.columns.length} columns
|
193
|
+
</p>
|
194
|
+
</div>
|
195
|
+
<div>
|
196
|
+
<span className="text-sm text-gray-500">Rows</span>
|
197
|
+
<p className="text-sm font-medium text-gray-900">
|
198
|
+
{dataset.num_rows.toLocaleString()}
|
199
|
+
</p>
|
200
|
+
</div>
|
201
|
+
</div>
|
202
|
+
|
203
|
+
<div className="mt-4 pt-4 border-t border-gray-100">
|
204
|
+
<div className="flex flex-wrap gap-2">
|
205
|
+
{dataset.columns.slice(0, 3).map((column: Column) => (
|
206
|
+
<span
|
207
|
+
key={column.name}
|
208
|
+
className="inline-flex items-center px-2.5 py-0.5 rounded-full text-xs font-medium bg-blue-100 text-blue-800"
|
209
|
+
>
|
210
|
+
{column.name}
|
211
|
+
</span>
|
212
|
+
))}
|
213
|
+
{dataset.columns.length > 3 && (
|
214
|
+
<span className="inline-flex items-center px-2.5 py-0.5 rounded-full text-xs font-medium bg-gray-100 text-gray-800">
|
215
|
+
+{dataset.columns.length - 3} more
|
216
|
+
</span>
|
217
|
+
)}
|
218
|
+
</div>
|
219
|
+
</div>
|
220
|
+
|
221
|
+
{dataset.workflow_status === 'failed' && dataset.stacktrace && (
|
222
|
+
<div className="mt-4 pt-4 border-t border-gray-100">
|
223
|
+
<button
|
224
|
+
onClick={() => toggleError(dataset.id)}
|
225
|
+
className="flex items-center gap-2 text-sm text-red-600 hover:text-red-700"
|
226
|
+
>
|
227
|
+
<AlertCircle className="w-4 h-4" />
|
228
|
+
<span>View Error Details</span>
|
229
|
+
{expandedErrors.includes(dataset.id) ? (
|
230
|
+
<ChevronUp className="w-4 h-4" />
|
231
|
+
) : (
|
232
|
+
<ChevronDown className="w-4 h-4" />
|
233
|
+
)}
|
234
|
+
</button>
|
235
|
+
{expandedErrors.includes(dataset.id) && (
|
236
|
+
<div className="mt-2 p-3 bg-red-50 rounded-md">
|
237
|
+
<pre className="text-xs text-red-700 whitespace-pre-wrap font-mono">
|
238
|
+
{dataset.stacktrace}
|
239
|
+
</pre>
|
240
|
+
</div>
|
241
|
+
)}
|
242
|
+
</div>
|
243
|
+
)}
|
244
|
+
|
245
|
+
</div>
|
246
|
+
))}
|
247
|
+
</div>
|
248
|
+
|
249
|
+
{totalPages > 1 && (
|
250
|
+
<Pagination
|
251
|
+
currentPage={currentPage}
|
252
|
+
totalPages={totalPages}
|
253
|
+
onPageChange={setCurrentPage}
|
254
|
+
/>
|
255
|
+
)}
|
256
|
+
</>
|
257
|
+
)}
|
258
|
+
</div>
|
259
|
+
</div>
|
260
|
+
);
|
261
|
+
}
|