easy_ml 0.2.0.pre.rc57 → 0.2.0.pre.rc60

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. checksums.yaml +4 -4
  2. data/app/controllers/easy_ml/apis_controller.rb +8 -0
  3. data/app/controllers/easy_ml/application_controller.rb +4 -0
  4. data/app/controllers/easy_ml/datasets_controller.rb +32 -1
  5. data/app/controllers/easy_ml/models_controller.rb +3 -0
  6. data/app/controllers/easy_ml/predictions_controller.rb +10 -5
  7. data/app/frontend/components/DatasetPreview.tsx +50 -19
  8. data/app/frontend/components/ModelForm.tsx +1 -1
  9. data/app/frontend/components/SearchableSelect.tsx +0 -1
  10. data/app/frontend/components/dataset/ColumnConfigModal.tsx +7 -1
  11. data/app/frontend/components/dataset/ColumnFilters.tsx +37 -3
  12. data/app/frontend/components/dataset/ColumnList.tsx +14 -2
  13. data/app/frontend/components/dataset/PreprocessingConfig.tsx +82 -21
  14. data/app/frontend/pages/DatasourcesPage.tsx +0 -2
  15. data/app/frontend/types/dataset.ts +3 -0
  16. data/app/jobs/easy_ml/compute_feature_job.rb +0 -2
  17. data/app/jobs/easy_ml/refresh_dataset_job.rb +0 -6
  18. data/app/models/easy_ml/column/imputers/base.rb +89 -0
  19. data/app/models/easy_ml/column/imputers/categorical.rb +35 -0
  20. data/app/models/easy_ml/column/imputers/clip.rb +30 -0
  21. data/app/models/easy_ml/column/imputers/constant.rb +27 -0
  22. data/app/models/easy_ml/column/imputers/ffill.rb +29 -0
  23. data/app/models/easy_ml/column/imputers/imputer.rb +103 -0
  24. data/app/models/easy_ml/column/imputers/mean.rb +27 -0
  25. data/app/models/easy_ml/column/imputers/median.rb +27 -0
  26. data/app/models/easy_ml/column/imputers/most_frequent.rb +27 -0
  27. data/app/models/easy_ml/column/imputers/null_imputer.rb +15 -0
  28. data/app/models/easy_ml/column/imputers/one_hot_encoder.rb +30 -0
  29. data/app/models/easy_ml/column/imputers/ordinal_encoder.rb +78 -0
  30. data/app/models/easy_ml/column/imputers/today.rb +20 -0
  31. data/app/models/easy_ml/column/imputers.rb +126 -0
  32. data/app/models/easy_ml/column/learner.rb +18 -0
  33. data/app/models/easy_ml/column/learners/base.rb +103 -0
  34. data/app/models/easy_ml/column/learners/boolean.rb +11 -0
  35. data/app/models/easy_ml/column/learners/categorical.rb +51 -0
  36. data/app/models/easy_ml/column/learners/datetime.rb +19 -0
  37. data/app/models/easy_ml/column/learners/null.rb +22 -0
  38. data/app/models/easy_ml/column/learners/numeric.rb +33 -0
  39. data/app/models/easy_ml/column/learners/string.rb +15 -0
  40. data/app/models/easy_ml/column/lineage/base.rb +22 -0
  41. data/app/models/easy_ml/column/lineage/computed_by_feature.rb +23 -0
  42. data/app/models/easy_ml/column/lineage/preprocessed.rb +23 -0
  43. data/app/models/easy_ml/column/lineage/raw_dataset.rb +23 -0
  44. data/app/models/easy_ml/column/lineage.rb +28 -0
  45. data/app/models/easy_ml/column/selector.rb +96 -0
  46. data/app/models/easy_ml/column.rb +344 -39
  47. data/app/models/easy_ml/column_history.rb +31 -20
  48. data/app/models/easy_ml/column_list.rb +79 -62
  49. data/app/models/easy_ml/dataset.rb +156 -104
  50. data/app/models/easy_ml/dataset_history.rb +23 -23
  51. data/app/models/easy_ml/datasource.rb +4 -0
  52. data/app/models/easy_ml/datasource_history.rb +1 -0
  53. data/app/models/easy_ml/datasources/file_datasource.rb +1 -1
  54. data/app/models/easy_ml/datasources/polars_datasource.rb +6 -12
  55. data/app/models/easy_ml/datasources/s3_datasource.rb +1 -1
  56. data/app/models/easy_ml/feature.rb +29 -10
  57. data/app/models/easy_ml/feature_history.rb +12 -0
  58. data/app/models/easy_ml/feature_list.rb +15 -0
  59. data/app/models/easy_ml/model.rb +25 -4
  60. data/app/models/easy_ml/model_history.rb +1 -0
  61. data/app/models/easy_ml/retraining_run.rb +1 -0
  62. data/app/serializers/easy_ml/column_serializer.rb +11 -1
  63. data/app/serializers/easy_ml/dataset_serializer.rb +23 -2
  64. data/config/initializers/enumerable.rb +17 -0
  65. data/config/initializers/inflections.rb +2 -0
  66. data/config/routes.rb +3 -0
  67. data/lib/easy_ml/core/tuner.rb +1 -1
  68. data/lib/easy_ml/data/date_converter.rb +137 -30
  69. data/lib/easy_ml/data/polars_column.rb +17 -0
  70. data/lib/easy_ml/data/polars_in_memory.rb +30 -0
  71. data/lib/easy_ml/data/polars_reader.rb +20 -1
  72. data/lib/easy_ml/data/splits/in_memory_split.rb +7 -5
  73. data/lib/easy_ml/data/splits/split.rb +2 -1
  74. data/lib/easy_ml/data/synced_directory.rb +5 -3
  75. data/lib/easy_ml/data.rb +1 -2
  76. data/lib/easy_ml/feature_store.rb +33 -22
  77. data/lib/easy_ml/predict.rb +13 -2
  78. data/lib/easy_ml/railtie/generators/migration/migration_generator.rb +7 -0
  79. data/lib/easy_ml/railtie/templates/migration/add_computed_columns_to_easy_ml_columns.rb.tt +18 -0
  80. data/lib/easy_ml/railtie/templates/migration/add_default_to_is_target.rb.tt +6 -0
  81. data/lib/easy_ml/railtie/templates/migration/add_last_feature_sha_to_columns.rb.tt +9 -0
  82. data/lib/easy_ml/railtie/templates/migration/add_learned_at_to_easy_ml_columns.rb.tt +13 -0
  83. data/lib/easy_ml/railtie/templates/migration/add_sha_to_datasources_datasets_and_columns.rb.tt +21 -0
  84. data/lib/easy_ml/railtie/templates/migration/add_slug_to_easy_ml_models.rb.tt +20 -0
  85. data/lib/easy_ml/railtie/templates/migration/remove_preprocessor_statistics_from_easy_ml_datasets.rb.tt +11 -0
  86. data/lib/easy_ml/version.rb +1 -1
  87. data/lib/tasks/profile.rake +40 -0
  88. data/public/easy_ml/assets/.vite/manifest.json +2 -2
  89. data/public/easy_ml/assets/assets/Application-BbFobaXt.css +1 -0
  90. data/public/easy_ml/assets/assets/entrypoints/Application.tsx-Dni_GM8r.js +489 -0
  91. data/public/easy_ml/assets/assets/entrypoints/Application.tsx-Dni_GM8r.js.map +1 -0
  92. metadata +45 -10
  93. data/app/models/easy_ml/adapters/base_adapter.rb +0 -45
  94. data/app/models/easy_ml/adapters/polars_adapter.rb +0 -77
  95. data/lib/easy_ml/data/preprocessor.rb +0 -383
  96. data/lib/easy_ml/data/simple_imputer.rb +0 -255
  97. data/lib/easy_ml/data/statistics_learner.rb +0 -128
  98. data/public/easy_ml/assets/assets/Application-BUsRR6b6.css +0 -1
  99. data/public/easy_ml/assets/assets/entrypoints/Application.tsx-DTZ2348z.js +0 -474
  100. data/public/easy_ml/assets/assets/entrypoints/Application.tsx-DTZ2348z.js.map +0 -1
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e52412950fefc02e9b838930f132873c726440ebbc343159504d7d3287a39d05
4
- data.tar.gz: 44ff18d1f1df78b542c8e536427189fce63d147e7e86623d219ed9b89c501ca7
3
+ metadata.gz: 43809758ea028bdeb30b9255c50fd951d7870cd5286d0e3ae9f0e30e09bf22a6
4
+ data.tar.gz: 5b2f0ae171a1043b8ce76dc438cab2931c2562f481de8c024beaeee8b15accfc
5
5
  SHA512:
6
- metadata.gz: 1e543781fb426a6fa7fe6ad6f5b7c924bdab38d88ac8ad7288db3a24f683661b3745a6f2176c993899a9f9737af7e54dfa59cc439a71739d3e2d2d2d75714621
7
- data.tar.gz: 3f012c5a3126eec7a69c3c11dd45017f7c2ded7a2bfd5e6e70bcaa388000b19e50d19ed15dc6b47786f61b698cc081e915abade7ece544a3c8a14d0a8f5c4696
6
+ metadata.gz: 012d1c40e4a4efdf330702effe53eef60b84f8ff7fb5277d0b0030192990f8e86ab9ec7e73ec80972328425c1bf53b8ccf9a275c0e083b7b8e13eeb04be5914b
7
+ data.tar.gz: b2b504db55932b4abcf8acd06ed19ea5eb9f3c79d246ac7a3398494f7220451e886ca35196c94816356542f774c11c12ca599595af2841aa76a14c0e6304e982
@@ -0,0 +1,8 @@
1
+ module EasyML
2
+ class APIsController < ApplicationController
3
+ def show
4
+ model = EasyML::Model.find_by!(slug: params[:model])
5
+ render json: { data: model.api_fields }
6
+ end
7
+ end
8
+ end
@@ -23,6 +23,10 @@ module EasyML
23
23
  SettingsSerializer.new(settings).serializable_hash.dig(:data, :attributes)
24
24
  end
25
25
 
26
+ def dataset_to_json_small(dataset)
27
+ DatasetSerializer::SmallSerializer.new(dataset).serializable_hash.dig(:data, :attributes)
28
+ end
29
+
26
30
  def dataset_to_json(dataset)
27
31
  DatasetSerializer.new(dataset).serializable_hash.dig(:data, :attributes)
28
32
  end
@@ -26,7 +26,7 @@ module EasyML
26
26
  datasets = Dataset.all.order(id: :desc)
27
27
 
28
28
  render inertia: "pages/DatasetsPage", props: {
29
- datasets: datasets.map { |dataset| dataset_to_json(dataset) },
29
+ datasets: datasets.map { |dataset| dataset_to_json_small(dataset) },
30
30
  constants: Dataset.constants,
31
31
  }
32
32
  end
@@ -76,6 +76,37 @@ module EasyML
76
76
  column_attrs[:preprocessing_steps] = nil if column_attrs.dig(:preprocessing_steps, :training, :method) == "none"
77
77
  end
78
78
 
79
+ # Handle feature ID assignment for existing features
80
+ if dataset_params[:features_attributes].present?
81
+ # Clean up any feature IDs that don't exist anymore
82
+ feature_ids = dataset_params[:features_attributes].map { |attrs| attrs[:id] }.compact
83
+ existing_feature_ids = Feature.where(id: feature_ids).pluck(:id)
84
+
85
+ params[:dataset][:features_attributes].each do |attrs|
86
+ if attrs[:id].present? && !existing_feature_ids.include?(attrs[:id].to_i)
87
+ attrs.delete(:id)
88
+ end
89
+ end
90
+
91
+ # Find existing features by feature_class
92
+ feature_classes = dataset_params[:features_attributes].map { |attrs|
93
+ attrs[:feature_class] if attrs[:id].blank?
94
+ }.compact
95
+
96
+ existing_features = Feature.where(feature_class: feature_classes)
97
+
98
+ # Update params with existing feature IDs
99
+ existing_features.each do |feature|
100
+ matching_param_index = params[:dataset][:features_attributes].find_index { |attrs|
101
+ attrs[:feature_class] == feature.feature_class
102
+ }
103
+
104
+ if matching_param_index
105
+ params[:dataset][:features_attributes][matching_param_index][:id] = feature.id
106
+ end
107
+ end
108
+ end
109
+
79
110
  if dataset.update(dataset_params)
80
111
  flash.now[:notice] = "Dataset configuration was successfully updated."
81
112
  render inertia: "pages/DatasetDetailsPage", props: {
@@ -53,6 +53,9 @@ module EasyML
53
53
  flash[:notice] = "Model was successfully created."
54
54
  redirect_to easy_ml_models_path
55
55
  else
56
+ errors = model.errors.to_hash(true)
57
+ values = errors.values.flatten
58
+ flash.now[:error] = values.join(", ")
56
59
  render inertia: "pages/NewModelPage", props: {
57
60
  datasets: EasyML::Dataset.all.map do |dataset|
58
61
  dataset.slice(:id, :name, :num_rows)
@@ -3,6 +3,11 @@ module EasyML
3
3
  skip_before_action :verify_authenticity_token, only: [:create]
4
4
 
5
5
  def create
6
+ slug = params[:model]
7
+ unless EasyML::Model.find_by(slug: slug).inference_version.present?
8
+ return render json: { error: "Model not found" }, status: :not_found
9
+ end
10
+
6
11
  unless params.key?(:input)
7
12
  return render json: { error: "Must provide key: input" }, status: :not_found
8
13
  end
@@ -12,17 +17,17 @@ module EasyML
12
17
  return render json: { error: "Input must be a hash" }, status: :not_found
13
18
  end
14
19
 
15
- model_name = params[:model]
16
- unless EasyML::Model.find_by(name: model_name).present?
17
- return render json: { error: "Model not found" }, status: :not_found
20
+ valid, fields = EasyML::Predict.validate_input(slug, input)
21
+ unless valid
22
+ return render json: { error: "Missing required fields: #{fields}" }, status: :not_found
18
23
  end
19
24
 
20
- prediction = EasyML::Predict.predict(model_name, input)
25
+ prediction = EasyML::Predict.predict(slug, input)
21
26
 
22
27
  render json: { prediction: EasyML::PredictionSerializer.new(prediction).serializable_hash.dig(:data, :attributes) }, status: :ok
23
28
  rescue ActiveRecord::RecordNotFound
24
29
  render json: { error: "Model not found" }, status: :not_found
25
- rescue StandardError => e
30
+ rescue => e
26
31
  render json: { error: e.message }, status: :unprocessable_entity
27
32
  end
28
33
  end
@@ -58,9 +58,11 @@ export function DatasetPreview({ dataset }: DatasetPreviewProps) {
58
58
  key={column.name}
59
59
  className="bg-gray-50 rounded-lg p-4"
60
60
  >
61
- <div className="flex items-center justify-between mb-2">
62
- <h4 className="font-medium text-gray-900">{column.name}</h4>
63
- <span className="text-xs font-medium text-gray-500 px-2 py-1 bg-gray-200 rounded-full">
61
+ <div className="flex items-center justify-between mb-2 gap-2">
62
+ <h4 className="font-medium text-gray-900 break-normal max-w-[70%] word-break:break-word overflow-wrap:anywhere whitespace-pre-wrap">
63
+ {column.name.split('_').join('_\u200B')}
64
+ </h4>
65
+ <span className="text-xs font-medium text-gray-500 px-2 py-1 bg-gray-200 rounded-full flex-shrink-0">
64
66
  {column.datatype}
65
67
  </span>
66
68
  </div>
@@ -68,23 +70,48 @@ export function DatasetPreview({ dataset }: DatasetPreviewProps) {
68
70
  {column.statistics && (
69
71
  <div className="space-y-1">
70
72
  {Object.entries(column.statistics.raw).map(([key, value]) => {
71
- if (key === "counts") {
73
+ // Skip internal keys and null/undefined values
74
+ if (key === "counts" ||
75
+ key === "allowed_categories" ||
76
+ key === "value" ||
77
+ key === "label_encoder" ||
78
+ key === "label_decoder" ||
79
+ value === null ||
80
+ value === undefined) {
72
81
  return null;
73
82
  }
83
+
84
+ // Format the value based on its type
85
+ let displayValue: string;
86
+ if (typeof value === 'number') {
87
+ displayValue = value.toLocaleString(undefined, {
88
+ maximumFractionDigits: 2
89
+ });
90
+ } else if (typeof value === 'object') {
91
+ // Handle arrays or other objects
92
+ displayValue = JSON.stringify(value);
93
+ } else if (typeof value === 'boolean') {
94
+ displayValue = value.toString();
95
+ } else {
96
+ displayValue = String(value);
97
+ }
98
+
99
+ // Truncate long strings
100
+ if (displayValue.length > 50) {
101
+ displayValue = displayValue.slice(0, 47) + '...';
102
+ }
103
+
74
104
  return (
75
- <div key={key} className="flex justify-between text-sm">
76
- <span className="text-gray-500">
77
- {key.charAt(0).toUpperCase() + key.slice(1)}:
78
- </span>
79
- <span className="font-medium text-gray-900">
80
- {typeof value === 'number' ?
81
- value.toLocaleString(undefined, {
82
- maximumFractionDigits: 2
83
- }) :
84
- value}
85
- </span>
86
- </div>
87
- )})}
105
+ <div key={key} className="flex justify-between text-sm gap-2">
106
+ <span className="text-gray-500 flex-shrink-0">
107
+ {key.charAt(0).toUpperCase() + key.slice(1)}:
108
+ </span>
109
+ <span className="font-medium text-gray-900 text-right break-all">
110
+ {displayValue}
111
+ </span>
112
+ </div>
113
+ );
114
+ })}
88
115
  </div>
89
116
  )}
90
117
  </div>
@@ -142,10 +169,14 @@ export function DatasetPreview({ dataset }: DatasetPreviewProps) {
142
169
  <tr key={i}>
143
170
  {columns.map((column) => (
144
171
  <td
145
- key={row[column]}
172
+ key={`${i}-${column}`}
146
173
  className="whitespace-nowrap px-3 py-4 text-sm text-gray-500"
147
174
  >
148
- {row[column]?.toString()}
175
+ {row[column] === null || row[column] === undefined
176
+ ? ''
177
+ : typeof row[column] === 'object'
178
+ ? JSON.stringify(row[column])
179
+ : String(row[column])}
149
180
  </td>
150
181
  ))}
151
182
  </tr>
@@ -74,7 +74,7 @@ export function ModelForm({ initialData, datasets, constants, isEditing, errors:
74
74
  dataset_id: initialData?.dataset_id || '',
75
75
  task: initialData?.task || 'classification',
76
76
  objective: initialData?.objective || 'binary:logistic',
77
- metrics: initialData?.metrics || ['accuracy'],
77
+ metrics: initialData?.metrics || ['accuracy_score'],
78
78
  retraining_job_attributes: initialData?.retraining_job ? {
79
79
  id: initialData.retraining_job.id,
80
80
  frequency: initialData.retraining_job.frequency,
@@ -61,7 +61,6 @@ export const SearchableSelect = forwardRef<HTMLButtonElement, SearchableSelectPr
61
61
  }, [isOpen]);
62
62
 
63
63
  const handleOptionClick = (optionValue: Option['value'], e: React.MouseEvent) => {
64
- debugger;
65
64
  e.preventDefault();
66
65
  e.stopPropagation();
67
66
  onChange(optionValue);
@@ -58,7 +58,7 @@ export function ColumnConfigModal({
58
58
  const [selectedColumn, setSelectedColumn] = useState<string | null>(null);
59
59
  const [searchQuery, setSearchQuery] = useState("");
60
60
  const [activeFilters, setActiveFilters] = useState<{
61
- view: "all" | "training" | "hidden" | "preprocessed" | "nulls";
61
+ view: "all" | "training" | "hidden" | "preprocessed" | "nulls" | "computed" | "required";
62
62
  types: string[];
63
63
  }>({
64
64
  view: "all",
@@ -103,6 +103,10 @@ export function ColumnConfigModal({
103
103
  return colHasPreprocessingSteps(column);
104
104
  case "nulls":
105
105
  return (column.statistics?.processed?.null_count || 0) > 0;
106
+ case "computed":
107
+ return column.is_computed;
108
+ case "required":
109
+ return column.required;
106
110
  default:
107
111
  return true;
108
112
  }
@@ -124,6 +128,8 @@ export function ColumnConfigModal({
124
128
  withNulls: dataset.columns.filter(
125
129
  (c) => (c.statistics?.processed?.null_count || 0) > 0
126
130
  ).length,
131
+ computed: dataset.columns.filter((c) => c.is_computed === true).length,
132
+ required: dataset.columns.filter((c) => c.required === true).length,
127
133
  }),
128
134
  [dataset.columns, filteredColumns]
129
135
  );
@@ -1,16 +1,16 @@
1
1
  import React, { useState } from 'react';
2
- import { Filter, Database, Wrench, Eye, EyeOff, AlertTriangle, ChevronLeft, ChevronRight } from 'lucide-react';
2
+ import { Filter, Database, Wrench, Eye, EyeOff, AlertTriangle, ChevronLeft, ChevronRight, Calculator, Target } from 'lucide-react';
3
3
  import type { Column } from '../../types';
4
4
 
5
5
  const ITEMS_PER_PAGE = 5;
6
6
  interface ColumnFiltersProps {
7
7
  types: string[];
8
8
  activeFilters: {
9
- view: 'all' | 'training' | 'hidden' | 'preprocessed' | 'nulls';
9
+ view: 'all' | 'training' | 'hidden' | 'preprocessed' | 'nulls' | 'computed' | 'required';
10
10
  types: string[];
11
11
  };
12
12
  onFilterChange: (filters: {
13
- view: 'all' | 'training' | 'hidden' | 'preprocessed' | 'nulls';
13
+ view: 'all' | 'training' | 'hidden' | 'preprocessed' | 'nulls' | 'computed' | 'required';
14
14
  types: string[];
15
15
  }) => void;
16
16
  columnStats: {
@@ -20,6 +20,8 @@ interface ColumnFiltersProps {
20
20
  hidden: number;
21
21
  withPreprocessing: number;
22
22
  withNulls: number;
23
+ computed: number;
24
+ required: number;
23
25
  };
24
26
  colHasPreprocessingSteps: (col: Column) => boolean;
25
27
  columns: Column[];
@@ -43,6 +45,10 @@ export function ColumnFilters({
43
45
  return `${columnStats.withPreprocessing} columns`;
44
46
  case 'nulls':
45
47
  return `${columnStats.withNulls} columns`;
48
+ case 'computed':
49
+ return `${columnStats.computed} columns`;
50
+ case 'required':
51
+ return `${columnStats.required} columns`;
46
52
  default:
47
53
  return `${columnStats.total} columns`;
48
54
  }
@@ -158,6 +164,34 @@ export function ColumnFilters({
158
164
  ({getViewStats('nulls')})
159
165
  </span>
160
166
  </button>
167
+ <button
168
+ onClick={() => onFilterChange({ ...activeFilters, view: 'computed' })}
169
+ className={`inline-flex items-center gap-1 px-3 py-1.5 rounded-md text-sm font-medium ${
170
+ activeFilters.view === 'computed'
171
+ ? 'bg-purple-100 text-purple-900'
172
+ : 'text-gray-600 hover:bg-gray-50'
173
+ }`}
174
+ >
175
+ <Calculator className="w-4 h-4" />
176
+ Computed
177
+ <span className="text-xs text-gray-500 ml-1">
178
+ ({getViewStats('computed')})
179
+ </span>
180
+ </button>
181
+ <button
182
+ onClick={() => onFilterChange({ ...activeFilters, view: 'required' })}
183
+ className={`inline-flex items-center gap-1 px-3 py-1.5 rounded-md text-sm font-medium ${
184
+ activeFilters.view === 'required'
185
+ ? 'bg-blue-100 text-blue-900'
186
+ : 'text-gray-600 hover:bg-gray-50'
187
+ }`}
188
+ >
189
+ <Target className="w-4 h-4" />
190
+ Required
191
+ <span className="text-xs text-gray-500 ml-1">
192
+ ({getViewStats('required')})
193
+ </span>
194
+ </button>
161
195
  </div>
162
196
 
163
197
  {/* Column Types */}
@@ -1,5 +1,5 @@
1
1
  import React from 'react';
2
- import { Settings2, AlertCircle, Target, EyeOff, Eye } from 'lucide-react';
2
+ import { Settings2, AlertCircle, Target, EyeOff, Eye, Calculator, Star } from 'lucide-react';
3
3
  import type { Column } from '../../types';
4
4
  import { usePage } from "@inertiajs/react";
5
5
 
@@ -79,11 +79,23 @@ export function ColumnList({
79
79
  </p>
80
80
  )}
81
81
  <div className="flex flex-wrap gap-2">
82
+ {column.required && (
83
+ <div className="flex items-center gap-1 text-blue-600">
84
+ <Star className="w-3 h-3" />
85
+ <span className="text-xs">required</span>
86
+ </div>
87
+ )}
88
+ {column.is_computed && (
89
+ <div className="flex items-center gap-1 text-purple-600">
90
+ <Calculator className="w-3 h-3" />
91
+ <span className="text-xs">computed</span>
92
+ </div>
93
+ )}
82
94
  {column.preprocessing_steps && column.preprocessing_steps?.training &&
83
95
  column.preprocessing_steps?.training?.method !== 'none' && (
84
96
  <div className="flex items-center gap-1 text-blue-600">
85
97
  <AlertCircle className="w-3 h-3" />
86
- <span className="text-xs">Preprocessing configured</span>
98
+ <span className="text-xs">preprocessing configured</span>
87
99
  </div>
88
100
  )}
89
101
  {column.hidden && (
@@ -1,5 +1,5 @@
1
1
  import React, { useState, useEffect } from 'react';
2
- import { Settings2, Wrench, ArrowRight, Pencil, Trash2, Database } from 'lucide-react';
2
+ import { Settings2, Wrench, ArrowRight, Pencil, Trash2, Database, Calculator, GitBranch } from 'lucide-react';
3
3
  import type { Dataset, Column, ColumnType, PreprocessingConstants, PreprocessingSteps, PreprocessingStep } from '../../types/dataset';
4
4
  import { Badge } from "@/components/ui/badge";
5
5
 
@@ -250,7 +250,7 @@ export function PreprocessingConfig({
250
250
  setIsEditingDescription(true);
251
251
  };
252
252
 
253
- let nullCount = (column.statistics?.processed.null_count || column.statistics?.raw.null_count) || 0;
253
+ let nullCount = (column.statistics?.processed.null_count || column.statistics?.raw?.null_count) || 0;
254
254
  const nullPercentage = nullCount && column.statistics?.raw.num_rows
255
255
  ? ((nullCount / column.statistics.raw.num_rows) * 100)
256
256
  : 0;
@@ -332,26 +332,41 @@ export function PreprocessingConfig({
332
332
  </div>
333
333
  </div>
334
334
  <div className="flex items-center gap-4 flex-shrink-0">
335
- {column.is_target ? (
336
- <span className="inline-flex items-center px-3 py-1 rounded-full text-sm font-medium bg-purple-100 text-purple-800">
337
- Target Column
338
- </span>
339
- ) : (
340
- <div className="flex items-center gap-2">
341
- <label className="flex items-center gap-2 text-sm">
342
- <input
343
- type="checkbox"
344
- checked={column.drop_if_null}
345
- onChange={onToggleDropIfNull}
346
- className="rounded border-gray-300 text-red-600 focus:ring-red-500"
347
- />
348
- <span className="flex items-center gap-1 text-gray-700">
349
- <Trash2 className="w-4 h-4 text-gray-400" />
350
- Drop if null
351
- </span>
352
- </label>
335
+ <div className="relative flex items-center gap-2">
336
+ <div className="absolute right-0 -top-8 flex items-center gap-2">
337
+ {column.required && (
338
+ <Badge variant="secondary" className="bg-blue-100 text-blue-800">
339
+ Required
340
+ </Badge>
341
+ )}
342
+ {column.is_computed && (
343
+ <Badge variant="secondary" className="bg-purple-100 text-purple-800">
344
+ <Calculator className="w-3 h-3 mr-1" />
345
+ Computed
346
+ </Badge>
347
+ )}
353
348
  </div>
354
- )}
349
+ {column.is_target ? (
350
+ <span className="inline-flex items-center px-3 py-1 rounded-full text-sm font-medium bg-purple-100 text-purple-800">
351
+ Target Column
352
+ </span>
353
+ ) : (
354
+ <div className="flex items-center gap-2">
355
+ <label className="flex items-center gap-2 text-sm">
356
+ <input
357
+ type="checkbox"
358
+ checked={column.drop_if_null}
359
+ onChange={onToggleDropIfNull}
360
+ className="rounded border-gray-300 text-red-600 focus:ring-red-500"
361
+ />
362
+ <span className="flex items-center gap-1 text-gray-700">
363
+ <Trash2 className="w-4 h-4 text-gray-400" />
364
+ Drop if null
365
+ </span>
366
+ </label>
367
+ </div>
368
+ )}
369
+ </div>
355
370
  </div>
356
371
  </div>
357
372
 
@@ -481,6 +496,52 @@ export function PreprocessingConfig({
481
496
  )}
482
497
  </div>
483
498
 
499
+ {/* Column Lineage Section */}
500
+ {column.lineage && column.lineage.length > 0 && (
501
+ <div className="bg-white rounded-lg border border-gray-200 p-6">
502
+ <h3 className="text-lg font-medium text-gray-900 mb-4 flex items-center gap-2">
503
+ <GitBranch className="w-5 h-5 text-gray-500" />
504
+ Column Lineage
505
+ </h3>
506
+ <div className="space-y-4">
507
+ {column.lineage.map((step, index) => (
508
+ <div key={index} className="flex items-start gap-3">
509
+ <div className={`w-8 h-8 rounded-full flex items-center justify-center flex-shrink-0 ${
510
+ step.key === 'raw_dataset'
511
+ ? 'bg-gray-100'
512
+ : step.key === 'computed_by_feature'
513
+ ? 'bg-purple-100'
514
+ : 'bg-blue-100'
515
+ }`}>
516
+ {step.key === 'raw_dataset' ? (
517
+ <Database className="w-4 h-4 text-gray-600" />
518
+ ) : step.key === 'computed_by_feature' ? (
519
+ <Calculator className="w-4 h-4 text-purple-600" />
520
+ ) : (
521
+ <Settings2 className="w-4 h-4 text-blue-600" />
522
+ )}
523
+ </div>
524
+ <div className="flex-1">
525
+ <div className="flex items-center justify-between">
526
+ <p className="text-sm font-medium text-gray-900">
527
+ {step.description}
528
+ </p>
529
+ {step.timestamp && (
530
+ <span className="text-xs text-gray-500">
531
+ {new Date(step.timestamp).toLocaleString()}
532
+ </span>
533
+ )}
534
+ </div>
535
+ {index < column.lineage.length - 1 && (
536
+ <div className="ml-4 mt-2 mb-2 w-0.5 h-4 bg-gray-200" />
537
+ )}
538
+ </div>
539
+ </div>
540
+ ))}
541
+ </div>
542
+ </div>
543
+ )}
544
+
484
545
  {/* Data Type Section */}
485
546
  <div className="bg-white rounded-lg border border-gray-200 p-6">
486
547
  <h3 className="text-lg font-medium text-gray-900 mb-4 flex items-center gap-2">
@@ -49,12 +49,10 @@ export default function DatasourcesPage({ datasources }: { datasources: Datasour
49
49
  preserveScroll: true, // Keeps the scroll position
50
50
  preserveState: true, // Keeps the form state
51
51
  onSuccess: (e) => {
52
- debugger;
53
52
  console.log("SUCCESS")
54
53
  // The page will automatically refresh with new data
55
54
  },
56
55
  onError: () => {
57
- debugger;
58
56
  // Handle error case if needed
59
57
  console.error('Failed to sync datasource');
60
58
  }
@@ -84,6 +84,9 @@ export interface Column {
84
84
  sample_values: {};
85
85
  statistics?: Statistics;
86
86
  preprocessing_steps?: PreprocessingSteps;
87
+ lineage?: Array<{ key: string }>;
88
+ required?: boolean;
89
+ is_computed?: boolean;
87
90
  }
88
91
 
89
92
  export interface Dataset {
@@ -13,7 +13,6 @@ module EasyML
13
13
 
14
14
  # Check if any feature has failed before proceeding
15
15
  if dataset.features.any? { |f| f.workflow_status == "failed" }
16
- puts "Aborting feature computation due to previous feature failure"
17
16
  return
18
17
  end
19
18
 
@@ -39,7 +38,6 @@ module EasyML
39
38
  end
40
39
 
41
40
  def self.after_batch_hook(batch_id, *args)
42
- puts "After batch!"
43
41
  batch_args = fetch_batch_arguments(batch_id).flatten.map(&:symbolize_keys)
44
42
  feature_ids = batch_args.pluck(:feature_id).uniq
45
43
  parent_id = batch_args.pluck(:parent_batch_id).first
@@ -3,28 +3,22 @@ module EasyML
3
3
  def perform(id)
4
4
  begin
5
5
  dataset = EasyML::Dataset.find(id)
6
- return if dataset.workflow_status == :analyzing
7
6
 
8
7
  puts "Refreshing dataset #{dataset.name}"
9
- puts "Needs refresh? #{dataset.needs_refresh?}"
10
8
  unless dataset.needs_refresh?
11
9
  dataset.update(workflow_status: :ready)
12
10
  end
13
11
 
14
12
  create_event(dataset, "started")
15
13
 
16
- puts "Prepare! #{dataset.name}"
17
14
  dataset.unlock!
18
15
  dataset.prepare
19
16
  if dataset.features.needs_fit.any?
20
17
  dataset.fit_features(async: true)
21
- puts "Computing features!"
22
18
  else
23
19
  dataset.actually_refresh
24
- puts "Done!"
25
20
  end
26
21
  rescue StandardError => e
27
- puts "Error #{e.message}"
28
22
  if Rails.env.test?
29
23
  raise e
30
24
  end