easy_ml 0.2.0.pre.rc52 → 0.2.0.pre.rc56

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. checksums.yaml +4 -4
  2. data/app/controllers/easy_ml/application_controller.rb +1 -1
  3. data/app/controllers/easy_ml/deploys_controller.rb +1 -1
  4. data/app/controllers/easy_ml/models_controller.rb +4 -1
  5. data/app/frontend/components/AlertProvider.tsx +6 -3
  6. data/app/frontend/components/ModelForm.tsx +19 -35
  7. data/app/frontend/components/ScheduleModal.tsx +18 -26
  8. data/app/frontend/components/SearchableSelect.tsx +92 -67
  9. data/app/jobs/easy_ml/refresh_dataset_job.rb +3 -0
  10. data/app/models/easy_ml/column.rb +79 -0
  11. data/app/models/easy_ml/column_list.rb +7 -3
  12. data/app/models/easy_ml/dataset.rb +3 -4
  13. data/app/models/easy_ml/event.rb +5 -3
  14. data/config/initializers/evaluators.rb +41 -0
  15. data/lib/easy_ml/core/evaluators/base_evaluator.rb +4 -3
  16. data/lib/easy_ml/core/evaluators/classification_evaluators.rb +24 -0
  17. data/lib/easy_ml/core/evaluators/regression_evaluators.rb +16 -0
  18. data/lib/easy_ml/core/model_evaluator.rb +22 -4
  19. data/lib/easy_ml/core/tuner.rb +32 -24
  20. data/lib/easy_ml/engine.rb +5 -0
  21. data/lib/easy_ml/evaluators/base.rb +26 -0
  22. data/lib/easy_ml/evaluators.rb +6 -0
  23. data/lib/easy_ml/version.rb +1 -1
  24. data/lib/easy_ml.rb +1 -0
  25. data/public/easy_ml/assets/.vite/manifest.json +2 -2
  26. data/public/easy_ml/assets/assets/{Application-D6L0eW4P.css → Application-BUsRR6b6.css} +1 -1
  27. data/public/easy_ml/assets/assets/entrypoints/Application.tsx-DTZ2348z.js +474 -0
  28. data/public/easy_ml/assets/assets/entrypoints/{Application.tsx-BWHK2hEi.js.map → Application.tsx-DTZ2348z.js.map} +1 -1
  29. metadata +8 -5
  30. data/public/easy_ml/assets/assets/entrypoints/Application.tsx-BWHK2hEi.js +0 -474
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b0b6da194500895ff325b1408757ee328b355da448a5fcc53439d7b14ccea9c8
4
- data.tar.gz: da221df414245aafd56c5a05367146b9aae131885a99fa19e11aaa0f3cd32a20
3
+ metadata.gz: a2eb7d933162cc05c64a1ea7c21c65f9c80283a1ae45f37226282c503607008f
4
+ data.tar.gz: e25a1b7b1970753ae6f65917943607102ad52ef9f90831de0660563441448975
5
5
  SHA512:
6
- metadata.gz: 1d04422529423e09cff72496a29ff41038fdde5adfc0f267065981d04c24c9e9475d9ad0b2ab083e2d5e8cef22581180df97c93e960579cca8657b1771adc977
7
- data.tar.gz: d9904acd55317dc68d774c26f392399990fdd52f45f394f8fdc6addf11456ea1361e2d972a438647c7dc0f679e3642cd8ae1f13cc9e0824c72865de8ecb0b6a9
6
+ metadata.gz: 6fc39e6b2838ab6242df1848411450764b0434b63bb7e4f1cb60151850e794f1d0a71a956b70b7ca78e159003efc6947d7dbaad9010c7c6899a1baeb8c7570b2
7
+ data.tar.gz: f50ceecc6935fea0c1f82e5b76beaef2e6ee329087d0f7b7739d4a88b4738d9ed37d13acd47bf12d118092c0b66774772dab9a30f60e12c3854360329caacfa9
@@ -52,7 +52,7 @@ module EasyML
52
52
 
53
53
  flash_messages << { type: "success", message: flash[:notice] } if flash[:notice]
54
54
 
55
- flash_messages << { type: "error", message: flash[:alert] } if flash[:alert]
55
+ flash_messages << { type: "error", message: flash[:error] } if flash[:error]
56
56
 
57
57
  flash_messages << { type: "info", message: flash[:info] } if flash[:info]
58
58
 
@@ -13,7 +13,7 @@ module EasyML
13
13
  flash[:notice] = "Model deployment has started"
14
14
  redirect_to easy_ml_model_path(@deploy.model)
15
15
  rescue => e
16
- flash[:alert] = "Trouble deploying model: #{e.message}"
16
+ flash[:error] = "Trouble deploying model: #{e.message}"
17
17
  redirect_to easy_ml_model_path(@deploy.model)
18
18
  end
19
19
  end
@@ -70,6 +70,9 @@ module EasyML
70
70
  flash[:notice] = "Model was successfully updated."
71
71
  redirect_to easy_ml_models_path
72
72
  else
73
+ errors = model.errors.to_hash(true)
74
+ values = errors.values.flatten
75
+ flash.now[:error] = values.join(", ")
73
76
  render inertia: "pages/EditModelPage", props: {
74
77
  model: model_to_json(model),
75
78
  datasets: EasyML::Dataset.all.map { |dataset| dataset_to_json(dataset) },
@@ -99,7 +102,7 @@ module EasyML
99
102
  flash[:notice] = "Model was successfully deleted."
100
103
  redirect_to easy_ml_models_path
101
104
  else
102
- flash[:alert] = "Failed to delete the model."
105
+ flash[:error] = "Failed to delete the model."
103
106
  redirect_to easy_ml_models_path
104
107
  end
105
108
  end
@@ -38,9 +38,12 @@ export function AlertProvider({ children }: { children: React.ReactNode }) {
38
38
  const id = Math.random().toString(36).substring(7);
39
39
  setAlerts(prev => [...prev, { id, type, message }]);
40
40
 
41
- setTimeout(() => {
42
- removeAlert(id);
43
- }, numSeconds * 1000);
41
+ // Only auto-dismiss non-error alerts
42
+ if (type !== 'error') {
43
+ setTimeout(() => {
44
+ removeAlert(id);
45
+ }, numSeconds * 1000);
46
+ }
44
47
  }, [removeAlert]);
45
48
 
46
49
  return (
@@ -103,29 +103,6 @@ export function ModelForm({ initialData, datasets, constants, isEditing, errors:
103
103
  const objectives: { value: string; label: string; description?: string }[] =
104
104
  constants.objectives[data.model.model_type]?.[data.model.task] || [];
105
105
 
106
- useEffect(() => {
107
- // Only set default metrics if none were provided from the backend
108
- if (!initialData?.metrics) {
109
- const availableMetrics = constants.metrics[data.model.task]?.map(metric => metric.value) || [];
110
- setData({
111
- ...data,
112
- model: {
113
- ...data.model,
114
- objective: data.model.task === 'classification' ? 'binary:logistic' : 'reg:squarederror',
115
- metrics: availableMetrics
116
- }
117
- });
118
- } else {
119
- setData({
120
- ...data,
121
- model: {
122
- ...data.model,
123
- objective: data.model.task === 'classification' ? 'binary:logistic' : 'reg:squarederror'
124
- }
125
- });
126
- }
127
- }, [data.model.task]);
128
-
129
106
  useEffect(() => {
130
107
  if (isDataSet) {
131
108
  save();
@@ -187,11 +164,21 @@ export function ModelForm({ initialData, datasets, constants, isEditing, errors:
187
164
  save();
188
165
  };
189
166
 
190
- console.log(data.model)
191
167
  const selectedDataset = datasets.find(d => d.id === data.model.dataset_id);
192
168
 
193
169
  const filteredTunerJobConstants = constants.tuner_job_constants[data.model.model_type] || {};
194
170
 
171
+ const handleTaskChange = (value: string) => {
172
+ // First update the task
173
+ setData('model.task', value);
174
+
175
+ // Then force reset metrics to empty array
176
+ setData('model.metrics', []);
177
+
178
+ // Update objective based on new task
179
+ setData('model.objective', value === 'classification' ? 'binary:logistic' : 'reg:squarederror');
180
+ };
181
+
195
182
  return (
196
183
  <form onSubmit={handleSubmit} className="space-y-8">
197
184
  <div className="flex justify-between items-center border-b pb-4">
@@ -266,8 +253,7 @@ export function ModelForm({ initialData, datasets, constants, isEditing, errors:
266
253
  <SearchableSelect
267
254
  options={constants.tasks}
268
255
  value={data.model.task}
269
- onChange={(value) => setData('model.task', value as string)}
270
- placeholder="Select task"
256
+ onChange={handleTaskChange}
271
257
  />
272
258
  <ErrorDisplay error={errors.task} />
273
259
  </div>
@@ -300,24 +286,21 @@ export function ModelForm({ initialData, datasets, constants, isEditing, errors:
300
286
  type="checkbox"
301
287
  checked={data.model.metrics.includes(metric.value)}
302
288
  onChange={(e) => {
303
- const metrics = e.target.checked
289
+ const newMetrics = e.target.checked
304
290
  ? [...data.model.metrics, metric.value]
305
291
  : data.model.metrics.filter(m => m !== metric.value);
306
- setData('model.metrics', metrics);
292
+ setData('model.metrics', newMetrics);
307
293
  }}
308
- className="h-4 w-4 rounded border-gray-300 text-blue-600 focus:ring-blue-500"
294
+ className="h-4 w-4 text-blue-600 focus:ring-blue-500 border-gray-300 rounded"
309
295
  />
310
296
  <div className="ml-3">
311
- <span className="block text-sm font-medium text-gray-900">
312
- {metric.label}
313
- </span>
314
- <span className="block text-xs text-gray-500">
315
- {metric.direction === 'maximize' ? 'Higher is better' : 'Lower is better'}
316
- </span>
297
+ <span className="block text-sm font-medium text-gray-900">{metric.label}</span>
298
+ <span className="block text-xs text-gray-500">Direction: {metric.direction}</span>
317
299
  </div>
318
300
  </label>
319
301
  ))}
320
302
  </div>
303
+ <ErrorDisplay error={errors.metrics} />
321
304
  </div>
322
305
  </div>
323
306
 
@@ -374,6 +357,7 @@ export function ModelForm({ initialData, datasets, constants, isEditing, errors:
374
357
  dataset: selectedDataset,
375
358
  retraining_job: data.model.retraining_job_attributes
376
359
  }}
360
+ metrics={constants.metrics}
377
361
  tunerJobConstants={filteredTunerJobConstants}
378
362
  timezone={constants.timezone}
379
363
  retrainingJobConstants={constants.retraining_job_constants}
@@ -37,27 +37,20 @@ interface ScheduleModalProps {
37
37
  tuning_enabled?: boolean;
38
38
  };
39
39
  };
40
+ metrics: {
41
+ [key: string]: Array<{
42
+ value: string;
43
+ label: string;
44
+ description: string;
45
+ direction: string;
46
+ }>;
47
+ };
40
48
  tunerJobConstants: any;
41
49
  timezone: string;
42
50
  retrainingJobConstants: any;
43
51
  }
44
52
 
45
- const METRICS = {
46
- classification: [
47
- { value: 'accuracy_score', label: 'Accuracy', description: 'Overall prediction accuracy', direction: 'maximize' },
48
- { value: 'precision_score', label: 'Precision', description: 'Ratio of true positives to predicted positives', direction: 'maximize' },
49
- { value: 'recall_score', label: 'Recall', description: 'Ratio of true positives to actual positives', direction: 'maximize' },
50
- { value: 'f1_score', label: 'F1 Score', description: 'Harmonic mean of precision and recall', direction: 'maximize' }
51
- ],
52
- regression: [
53
- { value: 'mean_absolute_error', label: 'Mean Absolute Error', description: 'Average absolute differences between predicted and actual values', direction: 'minimize' },
54
- { value: 'mean_squared_error', label: 'Mean Squared Error', description: 'Average squared differences between predicted and actual values', direction: 'minimize' },
55
- { value: 'root_mean_squared_error', label: 'Root Mean Squared Error', description: 'Square root of mean squared error', direction: 'minimize' },
56
- { value: 'r2_score', label: 'R² Score', description: 'Proportion of variance in the target that is predictable', direction: 'maximize' }
57
- ]
58
- };
59
-
60
- export function ScheduleModal({ isOpen, onClose, onSave, initialData, tunerJobConstants, timezone, retrainingJobConstants }: ScheduleModalProps) {
53
+ export function ScheduleModal({ isOpen, onClose, onSave, initialData, metrics, tunerJobConstants, timezone, retrainingJobConstants }: ScheduleModalProps) {
61
54
  const [showBatchTrainingInfo, setShowBatchTrainingInfo] = useState(false);
62
55
  const [activeBatchPopover, setActiveBatchPopover] = useState<'size' | 'overlap' | null>(null);
63
56
 
@@ -97,8 +90,8 @@ export function ScheduleModal({ isOpen, onClose, onSave, initialData, tunerJobCo
97
90
  day_of_week: initialData.retraining_job?.at?.day_of_week ?? 1,
98
91
  day_of_month: initialData.retraining_job?.at?.day_of_month ?? 1
99
92
  },
100
- metric: initialData.retraining_job?.metric || METRICS[initialData.task === 'classification' ? 'classification' : 'regression'][0].value,
101
- threshold: initialData.retraining_job?.threshold || (initialData.task === 'classification' ? 0.85 : 0.1),
93
+ metric: initialData.retraining_job?.metric || (metrics[initialData.task]?.[0]?.value ?? ''),
94
+ threshold: initialData.retraining_job?.threshold ?? (initialData.task === 'classification' ? 0.85 : 0.1),
102
95
  tuner_config: initialData.retraining_job?.tuner_config ? {
103
96
  n_trials: initialData.retraining_job.tuner_config.n_trials || 10,
104
97
  config: {
@@ -336,9 +329,9 @@ export function ScheduleModal({ isOpen, onClose, onSave, initialData, tunerJobCo
336
329
  };
337
330
 
338
331
  return (
339
- <div className="fixed inset-0 bg-black bg-opacity-50 flex items-center justify-center z-50">
340
- <div className="bg-white rounded-lg w-full max-w-6xl max-h-[90vh] overflow-hidden">
341
- <div className="flex justify-between items-center p-4 border-b">
332
+ <div className="fixed inset-0 bg-black bg-opacity-50 flex items-start justify-center pt-[5vh] z-50">
333
+ <div className="bg-white rounded-lg w-full max-w-6xl flex flex-col" style={{ maxHeight: '90vh' }}>
334
+ <div className="flex-none flex justify-between items-center p-4 border-b">
342
335
  <h2 className="text-lg font-semibold">Training Configuration</h2>
343
336
  <button
344
337
  onClick={onClose}
@@ -348,7 +341,7 @@ export function ScheduleModal({ isOpen, onClose, onSave, initialData, tunerJobCo
348
341
  </button>
349
342
  </div>
350
343
 
351
- <div className="p-6 grid grid-cols-2 gap-8 max-h-[calc(90vh-8rem)] overflow-y-auto">
344
+ <div className="flex-1 p-6 grid grid-cols-2 gap-8 overflow-y-auto">
352
345
  {/* Left Column */}
353
346
  <div className="space-y-8">
354
347
  {/* Training Schedule */}
@@ -575,7 +568,7 @@ export function ScheduleModal({ isOpen, onClose, onSave, initialData, tunerJobCo
575
568
  Metric
576
569
  </label>
577
570
  <SearchableSelect
578
- options={METRICS[initialData.task === 'classification' ? 'classification' : 'regression'].map((metric) => ({
571
+ options={metrics[initialData.task].map((metric) => ({
579
572
  value: metric.value,
580
573
  label: metric.label,
581
574
  description: metric.description
@@ -610,8 +603,7 @@ export function ScheduleModal({ isOpen, onClose, onSave, initialData, tunerJobCo
610
603
  <h3 className="text-sm font-medium text-blue-800">Deployment Criteria</h3>
611
604
  <p className="mt-2 text-sm text-blue-700">
612
605
  {(() => {
613
- const metricsList = METRICS[initialData.task === 'classification' ? 'classification' : 'regression'];
614
- const selectedMetric = metricsList.find(m => m.value === formData.retraining_job_attributes.metric);
606
+ const selectedMetric = metrics[initialData.task].find(m => m.value === formData.retraining_job_attributes.metric);
615
607
  const direction = selectedMetric?.direction === 'minimize' ? 'below' : 'above';
616
608
 
617
609
  return `The model will be automatically deployed when the ${selectedMetric?.label} is ${direction} ${formData.retraining_job_attributes.threshold}.`;
@@ -711,7 +703,7 @@ export function ScheduleModal({ isOpen, onClose, onSave, initialData, tunerJobCo
711
703
  </div>
712
704
  </div>
713
705
 
714
- <div className="flex justify-end gap-4 p-4 border-t">
706
+ <div className="flex-none flex justify-end gap-4 p-4 border-t bg-white">
715
707
  <button
716
708
  onClick={onClose}
717
709
  className="px-4 py-2 text-sm font-medium text-gray-700 hover:text-gray-500"
@@ -1,5 +1,6 @@
1
1
  import React, { useState, useRef, useEffect, forwardRef } from 'react';
2
2
  import { Search, Check } from 'lucide-react';
3
+ import { createPortal } from 'react-dom';
3
4
 
4
5
  interface Option {
5
6
  value: string | number;
@@ -20,6 +21,7 @@ export const SearchableSelect = forwardRef<HTMLButtonElement, SearchableSelectPr
20
21
  ({ options, value, onChange, placeholder = 'Search...', renderOption }, ref) => {
21
22
  const [isOpen, setIsOpen] = useState(false);
22
23
  const [searchQuery, setSearchQuery] = useState('');
24
+ const [dropdownPosition, setDropdownPosition] = useState({ top: 0, left: 0, width: 0 });
23
25
  const containerRef = useRef<HTMLDivElement>(null);
24
26
  const inputRef = useRef<HTMLInputElement>(null);
25
27
 
@@ -47,11 +49,98 @@ export const SearchableSelect = forwardRef<HTMLButtonElement, SearchableSelectPr
47
49
  }
48
50
  }, [isOpen]);
49
51
 
52
+ useEffect(() => {
53
+ if (isOpen && containerRef.current) {
54
+ const rect = containerRef.current.getBoundingClientRect();
55
+ setDropdownPosition({
56
+ top: rect.bottom + window.scrollY,
57
+ left: rect.left + window.scrollX,
58
+ width: rect.width
59
+ });
60
+ }
61
+ }, [isOpen]);
62
+
63
+ const handleOptionClick = (optionValue: Option['value'], e: React.MouseEvent) => {
64
+ debugger;
65
+ e.preventDefault();
66
+ e.stopPropagation();
67
+ onChange(optionValue);
68
+ setIsOpen(false);
69
+ setSearchQuery('');
70
+ };
71
+
72
+ const dropdown = isOpen && createPortal(
73
+ <div
74
+ className="fixed bg-white shadow-lg rounded-md overflow-hidden border border-gray-200"
75
+ style={{
76
+ top: dropdownPosition.top,
77
+ left: dropdownPosition.left,
78
+ width: dropdownPosition.width,
79
+ zIndex: 9999
80
+ }}
81
+ >
82
+ <div className="p-2 border-b">
83
+ <div className="relative">
84
+ <Search className="absolute left-3 top-1/2 transform -translate-y-1/2 w-4 h-4 text-gray-400" />
85
+ <input
86
+ ref={inputRef}
87
+ type="text"
88
+ className="w-full pl-9 pr-4 py-2 border border-gray-300 rounded-md focus:outline-none focus:ring-1 focus:ring-blue-500 focus:border-blue-500"
89
+ placeholder="Search..."
90
+ value={searchQuery}
91
+ onChange={(e) => setSearchQuery(e.target.value)}
92
+ onMouseDown={(e) => e.stopPropagation()}
93
+ />
94
+ </div>
95
+ </div>
96
+
97
+ <div className="max-h-60 overflow-y-auto">
98
+ {filteredOptions.length === 0 ? (
99
+ <div className="text-center py-4 text-sm text-gray-500">
100
+ No results found
101
+ </div>
102
+ ) : (
103
+ <ul className="py-1">
104
+ {filteredOptions.map((option) => (
105
+ <li key={option.value}>
106
+ <button
107
+ type="button"
108
+ className={`w-full text-left px-4 py-2 hover:bg-gray-100 ${
109
+ option.value === value ? 'bg-blue-50' : ''
110
+ }`}
111
+ onMouseDown={(e) => handleOptionClick(option.value, e)}
112
+ >
113
+ <div className="flex items-center justify-between">
114
+ <span className="block font-medium">
115
+ {option.label}
116
+ </span>
117
+ {option.value === value && (
118
+ <Check className="w-4 h-4 text-blue-600" />
119
+ )}
120
+ </div>
121
+ {option.description && (
122
+ <span className="block text-sm text-gray-500">
123
+ {option.description}
124
+ </span>
125
+ )}
126
+ </button>
127
+ </li>
128
+ ))}
129
+ </ul>
130
+ )}
131
+ </div>
132
+ </div>,
133
+ document.body
134
+ );
135
+
50
136
  return (
51
137
  <div className="relative" ref={containerRef}>
52
138
  <button
53
139
  type="button"
54
- onClick={() => setIsOpen(!isOpen)}
140
+ onMouseDown={(e) => {
141
+ e.preventDefault();
142
+ setIsOpen(!isOpen);
143
+ }}
55
144
  className="w-full bg-white relative border border-gray-300 rounded-md shadow-sm pl-3 pr-10 py-2 text-left cursor-pointer focus:outline-none focus:ring-1 focus:ring-blue-500 focus:border-blue-500"
56
145
  ref={ref}
57
146
  >
@@ -61,72 +150,8 @@ export const SearchableSelect = forwardRef<HTMLButtonElement, SearchableSelectPr
61
150
  <span className="block truncate text-gray-500">{placeholder}</span>
62
151
  )}
63
152
  </button>
64
-
65
- {isOpen && (
66
- <div className="absolute z-10 mt-1 w-full bg-white shadow-lg max-h-96 rounded-md overflow-hidden">
67
- <div className="p-2 border-b">
68
- <div className="relative">
69
- <Search className="absolute left-3 top-1/2 transform -translate-y-1/2 w-4 h-4 text-gray-400" />
70
- <input
71
- ref={inputRef}
72
- type="text"
73
- className="w-full pl-9 pr-4 py-2 border border-gray-300 rounded-md focus:outline-none focus:ring-1 focus:ring-blue-500 focus:border-blue-500"
74
- placeholder="Search..."
75
- value={searchQuery}
76
- onChange={(e) => setSearchQuery(e.target.value)}
77
- onClick={(e) => e.stopPropagation()}
78
- />
79
- </div>
80
- </div>
81
-
82
- <div className="max-h-60 overflow-y-auto">
83
- {filteredOptions.length === 0 ? (
84
- <div className="text-center py-4 text-sm text-gray-500">
85
- No results found
86
- </div>
87
- ) : (
88
- <ul className="py-1">
89
- {filteredOptions.map((option) => (
90
- <li key={option.value}>
91
- <button
92
- type="button"
93
- className={`w-full text-left px-4 py-2 hover:bg-gray-100 ${
94
- option.value === value ? 'bg-blue-50' : ''
95
- }`}
96
- onClick={() => {
97
- onChange(option.value);
98
- setIsOpen(false);
99
- setSearchQuery('');
100
- }}
101
- >
102
- {renderOption ? (
103
- renderOption(option)
104
- ) : (
105
- <div className="flex items-center justify-between">
106
- <div>
107
- <div className="font-medium">{option.label}</div>
108
- {option.description && (
109
- <div className="text-sm text-gray-500">
110
- {option.description}
111
- </div>
112
- )}
113
- </div>
114
- {option.value === value && (
115
- <Check className="w-4 h-4 text-blue-600" />
116
- )}
117
- </div>
118
- )}
119
- </button>
120
- </li>
121
- ))}
122
- </ul>
123
- )}
124
- </div>
125
- </div>
126
- )}
153
+ {dropdown}
127
154
  </div>
128
155
  );
129
156
  }
130
- );
131
-
132
- SearchableSelect.displayName = 'SearchableSelect';
157
+ );
@@ -3,6 +3,8 @@ module EasyML
3
3
  def perform(id)
4
4
  begin
5
5
  dataset = EasyML::Dataset.find(id)
6
+ return if dataset.workflow_status == :analyzing
7
+
6
8
  puts "Refreshing dataset #{dataset.name}"
7
9
  puts "Needs refresh? #{dataset.needs_refresh?}"
8
10
  unless dataset.needs_refresh?
@@ -12,6 +14,7 @@ module EasyML
12
14
  create_event(dataset, "started")
13
15
 
14
16
  puts "Prepare! #{dataset.name}"
17
+ dataset.unlock!
15
18
  dataset.prepare
16
19
  if dataset.features.needs_fit.any?
17
20
  dataset.fit_features(async: true)
@@ -32,6 +32,7 @@ module EasyML
32
32
  before_save :ensure_valid_datatype
33
33
  after_create :set_date_column_if_date_splitter
34
34
  after_save :handle_date_column_change
35
+ before_save :set_defaults
35
36
 
36
37
  # Scopes
37
38
  scope :visible, -> { where(hidden: false) }
@@ -98,6 +99,84 @@ module EasyML
98
99
 
99
100
  private
100
101
 
102
+ def set_defaults
103
+ self.preprocessing_steps = set_preprocessing_steps_defaults
104
+ end
105
+
106
+ def set_preprocessing_steps_defaults
107
+ preprocessing_steps.inject({}) do |h, (type, config)|
108
+ h.tap do
109
+ h[type] = set_preprocessing_step_defaults(config)
110
+ end
111
+ end
112
+ end
113
+
114
+ ALLOWED_PARAMS = {
115
+ constant: [:constant],
116
+ categorical: %i[categorical_min one_hot ordinal_encoding],
117
+ most_frequent: %i[one_hot ordinal_encoding],
118
+ mean: [:clip],
119
+ median: [:clip],
120
+ }
121
+
122
+ REQUIRED_PARAMS = {
123
+ constant: [:constant],
124
+ categorical: %i[categorical_min one_hot ordinal_encoding],
125
+ }
126
+
127
+ DEFAULT_PARAMS = {
128
+ categorical_min: 1,
129
+ one_hot: true,
130
+ ordinal_encoding: false,
131
+ clip: { min: 0, max: 1_000_000_000 },
132
+ constant: nil,
133
+ }
134
+
135
+ XOR_PARAMS = [{
136
+ params: [:one_hot, :ordinal_encoding],
137
+ default: :one_hot,
138
+ }]
139
+
140
+ def set_preprocessing_step_defaults(config)
141
+ config.deep_symbolize_keys!
142
+ config[:params] ||= {}
143
+ params = config[:params].symbolize_keys
144
+
145
+ required = REQUIRED_PARAMS.fetch(config[:method].to_sym, [])
146
+ allowed = ALLOWED_PARAMS.fetch(config[:method].to_sym, [])
147
+
148
+ missing = required - params.keys
149
+ missing.reject! do |param|
150
+ XOR_PARAMS.any? do |rule|
151
+ if rule[:params].include?(param)
152
+ missing_param = rule[:params].find { |p| p != param }
153
+ params[missing_param] == true
154
+ else
155
+ false
156
+ end
157
+ end
158
+ end
159
+ extra = params.keys - allowed
160
+
161
+ missing.each do |key|
162
+ params[key] = DEFAULT_PARAMS.fetch(key)
163
+ end
164
+
165
+ extra.each do |key|
166
+ params.delete(key)
167
+ end
168
+
169
+ # Only set one of one_hot or ordinal_encoding to true,
170
+ # by default set one_hot to true
171
+ xor = XOR_PARAMS.find { |rule| rule[:params] & params.keys == rule[:params] }
172
+ if xor && xor[:params].all? { |param| params[param] }
173
+ xor[:params].each { |param| params[param] = false }
174
+ params[xor[:default]] = true
175
+ end
176
+
177
+ config.merge!(params: params)
178
+ end
179
+
101
180
  def handle_date_column_change
102
181
  return unless saved_change_to_is_date_column? && is_date_column?
103
182
 
@@ -1,14 +1,18 @@
1
1
  module EasyML
2
2
  module ColumnList
3
- def sync
3
+ def sync(only_new: false)
4
4
  return unless dataset.schema.present?
5
5
 
6
6
  EasyML::Column.transaction do
7
7
  col_names = syncable
8
8
  existing_columns = where(name: col_names)
9
9
  import_new(col_names, existing_columns)
10
- update_existing(existing_columns)
11
- delete_missing(existing_columns)
10
+
11
+ if !only_new
12
+ update_existing(existing_columns)
13
+ delete_missing(existing_columns)
14
+ end
15
+
12
16
  if existing_columns.none? # Totally new dataset
13
17
  dataset.after_create_columns
14
18
  end
@@ -175,7 +175,6 @@ module EasyML
175
175
 
176
176
  def actually_refresh
177
177
  refreshing do
178
- split_data
179
178
  process_data
180
179
  fully_reload
181
180
  learn
@@ -273,10 +272,10 @@ module EasyML
273
272
  raw.split_at.present? && raw.split_at < datasource.last_updated_at
274
273
  end
275
274
 
276
- def learn
275
+ def learn(only_new: false)
277
276
  learn_schema
278
277
  learn_statistics
279
- columns.sync
278
+ columns.sync(only_new: only_new)
280
279
  end
281
280
 
282
281
  def refreshing
@@ -399,7 +398,7 @@ module EasyML
399
398
 
400
399
  # Learn will update columns, so if any features have been added
401
400
  # since the last time columns were learned, we should re-learn the schema
402
- learn if idx == 0 && needs_learn?(df)
401
+ learn(only_new: true) if idx == 1 && needs_learn?(df)
403
402
  df = apply_column_mask(df, inference: inference) unless all_columns
404
403
  raise_on_nulls(df) if inference
405
404
  df, = processed.split_features_targets(df, true, target) if split_ys
@@ -56,14 +56,16 @@ module EasyML
56
56
  create_event(model, "failed", error)
57
57
  end
58
58
 
59
+ def self.easy_ml_context(stacktrace)
60
+ stacktrace.select { |loc| loc.match?(/easy_ml/) }
61
+ end
62
+
59
63
  def self.format_stacktrace(error)
60
64
  return nil if error.nil?
61
65
 
62
66
  topline = error.inspect
63
67
 
64
- stacktrace = error.backtrace.select do |loc|
65
- loc.match?(/easy_ml/)
66
- end
68
+ stacktrace = easy_ml_context(error.backtrace)
67
69
 
68
70
  %(#{topline}
69
71