easy_ml 0.2.0.pre.rc57 → 0.2.0.pre.rc60

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. checksums.yaml +4 -4
  2. data/app/controllers/easy_ml/apis_controller.rb +8 -0
  3. data/app/controllers/easy_ml/application_controller.rb +4 -0
  4. data/app/controllers/easy_ml/datasets_controller.rb +32 -1
  5. data/app/controllers/easy_ml/models_controller.rb +3 -0
  6. data/app/controllers/easy_ml/predictions_controller.rb +10 -5
  7. data/app/frontend/components/DatasetPreview.tsx +50 -19
  8. data/app/frontend/components/ModelForm.tsx +1 -1
  9. data/app/frontend/components/SearchableSelect.tsx +0 -1
  10. data/app/frontend/components/dataset/ColumnConfigModal.tsx +7 -1
  11. data/app/frontend/components/dataset/ColumnFilters.tsx +37 -3
  12. data/app/frontend/components/dataset/ColumnList.tsx +14 -2
  13. data/app/frontend/components/dataset/PreprocessingConfig.tsx +82 -21
  14. data/app/frontend/pages/DatasourcesPage.tsx +0 -2
  15. data/app/frontend/types/dataset.ts +3 -0
  16. data/app/jobs/easy_ml/compute_feature_job.rb +0 -2
  17. data/app/jobs/easy_ml/refresh_dataset_job.rb +0 -6
  18. data/app/models/easy_ml/column/imputers/base.rb +89 -0
  19. data/app/models/easy_ml/column/imputers/categorical.rb +35 -0
  20. data/app/models/easy_ml/column/imputers/clip.rb +30 -0
  21. data/app/models/easy_ml/column/imputers/constant.rb +27 -0
  22. data/app/models/easy_ml/column/imputers/ffill.rb +29 -0
  23. data/app/models/easy_ml/column/imputers/imputer.rb +103 -0
  24. data/app/models/easy_ml/column/imputers/mean.rb +27 -0
  25. data/app/models/easy_ml/column/imputers/median.rb +27 -0
  26. data/app/models/easy_ml/column/imputers/most_frequent.rb +27 -0
  27. data/app/models/easy_ml/column/imputers/null_imputer.rb +15 -0
  28. data/app/models/easy_ml/column/imputers/one_hot_encoder.rb +30 -0
  29. data/app/models/easy_ml/column/imputers/ordinal_encoder.rb +78 -0
  30. data/app/models/easy_ml/column/imputers/today.rb +20 -0
  31. data/app/models/easy_ml/column/imputers.rb +126 -0
  32. data/app/models/easy_ml/column/learner.rb +18 -0
  33. data/app/models/easy_ml/column/learners/base.rb +103 -0
  34. data/app/models/easy_ml/column/learners/boolean.rb +11 -0
  35. data/app/models/easy_ml/column/learners/categorical.rb +51 -0
  36. data/app/models/easy_ml/column/learners/datetime.rb +19 -0
  37. data/app/models/easy_ml/column/learners/null.rb +22 -0
  38. data/app/models/easy_ml/column/learners/numeric.rb +33 -0
  39. data/app/models/easy_ml/column/learners/string.rb +15 -0
  40. data/app/models/easy_ml/column/lineage/base.rb +22 -0
  41. data/app/models/easy_ml/column/lineage/computed_by_feature.rb +23 -0
  42. data/app/models/easy_ml/column/lineage/preprocessed.rb +23 -0
  43. data/app/models/easy_ml/column/lineage/raw_dataset.rb +23 -0
  44. data/app/models/easy_ml/column/lineage.rb +28 -0
  45. data/app/models/easy_ml/column/selector.rb +96 -0
  46. data/app/models/easy_ml/column.rb +344 -39
  47. data/app/models/easy_ml/column_history.rb +31 -20
  48. data/app/models/easy_ml/column_list.rb +79 -62
  49. data/app/models/easy_ml/dataset.rb +156 -104
  50. data/app/models/easy_ml/dataset_history.rb +23 -23
  51. data/app/models/easy_ml/datasource.rb +4 -0
  52. data/app/models/easy_ml/datasource_history.rb +1 -0
  53. data/app/models/easy_ml/datasources/file_datasource.rb +1 -1
  54. data/app/models/easy_ml/datasources/polars_datasource.rb +6 -12
  55. data/app/models/easy_ml/datasources/s3_datasource.rb +1 -1
  56. data/app/models/easy_ml/feature.rb +29 -10
  57. data/app/models/easy_ml/feature_history.rb +12 -0
  58. data/app/models/easy_ml/feature_list.rb +15 -0
  59. data/app/models/easy_ml/model.rb +25 -4
  60. data/app/models/easy_ml/model_history.rb +1 -0
  61. data/app/models/easy_ml/retraining_run.rb +1 -0
  62. data/app/serializers/easy_ml/column_serializer.rb +11 -1
  63. data/app/serializers/easy_ml/dataset_serializer.rb +23 -2
  64. data/config/initializers/enumerable.rb +17 -0
  65. data/config/initializers/inflections.rb +2 -0
  66. data/config/routes.rb +3 -0
  67. data/lib/easy_ml/core/tuner.rb +1 -1
  68. data/lib/easy_ml/data/date_converter.rb +137 -30
  69. data/lib/easy_ml/data/polars_column.rb +17 -0
  70. data/lib/easy_ml/data/polars_in_memory.rb +30 -0
  71. data/lib/easy_ml/data/polars_reader.rb +20 -1
  72. data/lib/easy_ml/data/splits/in_memory_split.rb +7 -5
  73. data/lib/easy_ml/data/splits/split.rb +2 -1
  74. data/lib/easy_ml/data/synced_directory.rb +5 -3
  75. data/lib/easy_ml/data.rb +1 -2
  76. data/lib/easy_ml/feature_store.rb +33 -22
  77. data/lib/easy_ml/predict.rb +13 -2
  78. data/lib/easy_ml/railtie/generators/migration/migration_generator.rb +7 -0
  79. data/lib/easy_ml/railtie/templates/migration/add_computed_columns_to_easy_ml_columns.rb.tt +18 -0
  80. data/lib/easy_ml/railtie/templates/migration/add_default_to_is_target.rb.tt +6 -0
  81. data/lib/easy_ml/railtie/templates/migration/add_last_feature_sha_to_columns.rb.tt +9 -0
  82. data/lib/easy_ml/railtie/templates/migration/add_learned_at_to_easy_ml_columns.rb.tt +13 -0
  83. data/lib/easy_ml/railtie/templates/migration/add_sha_to_datasources_datasets_and_columns.rb.tt +21 -0
  84. data/lib/easy_ml/railtie/templates/migration/add_slug_to_easy_ml_models.rb.tt +20 -0
  85. data/lib/easy_ml/railtie/templates/migration/remove_preprocessor_statistics_from_easy_ml_datasets.rb.tt +11 -0
  86. data/lib/easy_ml/version.rb +1 -1
  87. data/lib/tasks/profile.rake +40 -0
  88. data/public/easy_ml/assets/.vite/manifest.json +2 -2
  89. data/public/easy_ml/assets/assets/Application-BbFobaXt.css +1 -0
  90. data/public/easy_ml/assets/assets/entrypoints/Application.tsx-Dni_GM8r.js +489 -0
  91. data/public/easy_ml/assets/assets/entrypoints/Application.tsx-Dni_GM8r.js.map +1 -0
  92. metadata +45 -10
  93. data/app/models/easy_ml/adapters/base_adapter.rb +0 -45
  94. data/app/models/easy_ml/adapters/polars_adapter.rb +0 -77
  95. data/lib/easy_ml/data/preprocessor.rb +0 -383
  96. data/lib/easy_ml/data/simple_imputer.rb +0 -255
  97. data/lib/easy_ml/data/statistics_learner.rb +0 -128
  98. data/public/easy_ml/assets/assets/Application-BUsRR6b6.css +0 -1
  99. data/public/easy_ml/assets/assets/entrypoints/Application.tsx-DTZ2348z.js +0 -474
  100. data/public/easy_ml/assets/assets/entrypoints/Application.tsx-DTZ2348z.js.map +0 -1
@@ -0,0 +1,89 @@
1
+ module EasyML
2
+ class Column
3
+ class Imputers
4
+ class Base
5
+ class << self
6
+ def param_applies(p)
7
+ Imputers.supported_params << p
8
+ Imputers.params_by_class[self] ||= []
9
+ Imputers.params_by_class[self] << p.to_sym
10
+ end
11
+
12
+ def method_applies(m)
13
+ Imputers.supported_methods << m.to_sym
14
+ Imputers.methods_by_class[self] ||= []
15
+ Imputers.methods_by_class[self] << m.to_sym
16
+ end
17
+
18
+ def description
19
+ "Unknown preprocessing method"
20
+ end
21
+ end
22
+
23
+ attr_accessor :column, :preprocessing_step
24
+
25
+ def initialize(column, preprocessing_step)
26
+ @column = column
27
+ @preprocessing_step = preprocessing_step.with_indifferent_access
28
+ end
29
+
30
+ def applies?
31
+ method_applies? || param_applies?
32
+ end
33
+
34
+ def method_applies?
35
+ imputers_own_methods.include?(method.to_sym)
36
+ end
37
+
38
+ def param_applies?
39
+ params.keys.any? { |p| imputers_own_params.include?(p.to_sym) && params[p] != false }
40
+ end
41
+
42
+ def imputers_own_methods
43
+ Imputers.methods_by_class[self.class] || []
44
+ end
45
+
46
+ def imputers_own_params
47
+ Imputers.params_by_class[self.class] || []
48
+ end
49
+
50
+ def params
51
+ @preprocessing_step.dig(:params)
52
+ end
53
+
54
+ def method
55
+ @preprocessing_step.dig(:method)
56
+ end
57
+
58
+ def statistics(*args)
59
+ if column.is_computed
60
+ column.statistics.dig(:processed, *args)
61
+ else
62
+ column.statistics.dig(:clipped, *args) || column.statistics.dig(:raw, *args)
63
+ end
64
+ end
65
+
66
+ def anything?
67
+ true
68
+ end
69
+
70
+ def inspect
71
+ params_str = params ? params.map { |k, v| "#{k}: #{v}" }.join(", ") : "none"
72
+ method_str = method ? method : "none"
73
+
74
+ "#<#{self.class.name} method=#{method_str.inspect} params={#{params_str}}>"
75
+ end
76
+
77
+ alias_method :to_s, :inspect
78
+
79
+ def transform(df)
80
+ raise "Method not implemented"
81
+ end
82
+
83
+ def description
84
+ self.class.description
85
+ end
86
+ end
87
+ end
88
+ end
89
+ end
@@ -0,0 +1,35 @@
1
+ module EasyML
2
+ class Column
3
+ class Imputers
4
+ class Categorical < Base
5
+ method_applies :categorical
6
+ param_applies :categorical_min
7
+
8
+ def self.description
9
+ "Categorical imputation"
10
+ end
11
+
12
+ def transform(df)
13
+ return df unless allowed_categories.present?
14
+
15
+ case column.datatype
16
+ when :categorical
17
+ df = df.with_column(
18
+ Polars.when(Polars.col(column.name).is_in(allowed_categories))
19
+ .then(Polars.col(column.name))
20
+ .otherwise(Polars.lit("other"))
21
+ .alias(column.name)
22
+ )
23
+ when :boolean
24
+ # no-op
25
+ end
26
+ df
27
+ end
28
+
29
+ def allowed_categories
30
+ column.allowed_categories
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,30 @@
1
+ module EasyML
2
+ class Column
3
+ class Imputers
4
+ class Clip < Base
5
+ attr_accessor :column, :dataset, :preprocessing_step
6
+
7
+ param_applies :clip
8
+
9
+ def self.description
10
+ "Clip"
11
+ end
12
+
13
+ def transform(df)
14
+ df = df.with_column(
15
+ Polars.col(column.name).clip(min, max).alias(column.name)
16
+ )
17
+ df
18
+ end
19
+
20
+ def min
21
+ params.dig(:clip, :min) || 0
22
+ end
23
+
24
+ def max
25
+ params.dig(:clip, :max) || Float::INFINITY
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,27 @@
1
+ module EasyML
2
+ class Column
3
+ class Imputers
4
+ class Constant < Base
5
+ method_applies :constant
6
+ param_applies :constant
7
+
8
+ def self.description
9
+ "Constant value imputation"
10
+ end
11
+
12
+ def transform(df)
13
+ return df unless constant.present?
14
+
15
+ df = df.with_column(
16
+ Polars.col(column.name).fill_null(constant).alias(column.name)
17
+ )
18
+ df
19
+ end
20
+
21
+ def constant
22
+ params.dig(:constant)
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,29 @@
1
+ module EasyML
2
+ class Column
3
+ class Imputers
4
+ class Ffill < Base
5
+ method_applies :ffill
6
+
7
+ def self.description
8
+ "Forward fill imputation"
9
+ end
10
+
11
+ def transform(df)
12
+ return df unless last_value.present?
13
+
14
+ df = df.with_column(
15
+ Polars.when(Polars.col(column.name).is_null)
16
+ .then(Polars.lit(last_value).cast(column.polars_datatype))
17
+ .otherwise(Polars.col(column.name).cast(column.polars_datatype))
18
+ .alias(column.name)
19
+ )
20
+ df
21
+ end
22
+
23
+ def last_value
24
+ statistics(:last_value)
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,103 @@
1
+ module EasyML
2
+ class Column
3
+ class Imputers
4
+ class Imputer
5
+ attr_accessor :dataset, :column, :preprocessing_step
6
+
7
+ def initialize(column, preprocessing_step)
8
+ @column = column
9
+ @dataset = column.dataset
10
+ @preprocessing_step = preprocessing_step.with_indifferent_access
11
+ validate_preprocessing_step!
12
+ end
13
+
14
+ def inspect
15
+ "#<#{self.class.name} adapters=#{adapters.map(&:inspect).join(", ")}>"
16
+ end
17
+
18
+ def ordered_adapters
19
+ [
20
+ Clip,
21
+ Mean,
22
+ Median,
23
+ Constant,
24
+ Ffill,
25
+ Categorical,
26
+ MostFrequent,
27
+ Today,
28
+ OneHotEncoder,
29
+ OrdinalEncoder,
30
+ ]
31
+ end
32
+
33
+ def adapters
34
+ @adapters ||= ordered_adapters.map { |klass| klass.new(column, preprocessing_step) }.select(&:applies?)
35
+ end
36
+
37
+ def imputers
38
+ return nil if column.preprocessing_steps.blank?
39
+
40
+ @imputers ||= column.preprocessing_steps.keys.reduce({}) do |hash, key|
41
+ hash[key.to_sym] = Imputer.new(
42
+ column: column,
43
+ preprocessing_step: column.preprocessing_steps[key],
44
+ )
45
+ end
46
+ end
47
+
48
+ def description
49
+ adapters.map(&:description).compact.join(", ")
50
+ end
51
+
52
+ def anything?
53
+ adapters.any?
54
+ end
55
+
56
+ def transform(df)
57
+ return df unless anything?
58
+
59
+ adapters.reduce(df) do |df, adapter|
60
+ adapter.transform(df)
61
+ end
62
+ end
63
+
64
+ def clip(df)
65
+ return df unless adapters.map(&:class).include?(Clip)
66
+
67
+ EasyML::Column::Imputers::Clip.new(column, preprocessing_step).transform(df)
68
+ end
69
+
70
+ def decode_labels(df)
71
+ return df unless adapters.map(&:class).include?(OrdinalEncoder)
72
+
73
+ EasyML::Column::Imputers::OrdinalEncoder.new(column, preprocessing_step).decode_labels(df)
74
+ end
75
+
76
+ private
77
+
78
+ def validate_preprocessing_step!
79
+ validate_params!
80
+ validate_method!
81
+ end
82
+
83
+ def validate_params!
84
+ return unless preprocessing_step[:params]
85
+
86
+ preprocessing_step[:params].keys.each do |param|
87
+ unless Imputers.supported_params.include?(param.to_sym)
88
+ raise ArgumentError, "Unsupported preprocessing parameter '#{param}'. Supported parameters are: #{Imputers.supported_params.join(", ")}"
89
+ end
90
+ end
91
+ end
92
+
93
+ def validate_method!
94
+ return unless preprocessing_step[:method]
95
+
96
+ unless Imputers.supported_methods.include?(preprocessing_step[:method].to_sym)
97
+ raise ArgumentError, "Unsupported preprocessing method '#{preprocessing_step[:method]}'. Supported methods are: #{Imputers.supported_methods.join(", ")}"
98
+ end
99
+ end
100
+ end
101
+ end
102
+ end
103
+ end
@@ -0,0 +1,27 @@
1
+ module EasyML
2
+ class Column
3
+ class Imputers
4
+ class Mean < Base
5
+ method_applies :mean
6
+
7
+ def self.description
8
+ "Mean imputation"
9
+ end
10
+
11
+ def transform(df)
12
+ return df unless mean.present?
13
+
14
+ mean = statistics(:mean)
15
+ df = df.with_column(
16
+ Polars.col(column.name).fill_null(mean).alias(column.name)
17
+ )
18
+ df
19
+ end
20
+
21
+ def mean
22
+ statistics(:mean)
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,27 @@
1
+ module EasyML
2
+ class Column
3
+ class Imputers
4
+ class Median < Base
5
+ method_applies :median
6
+
7
+ def self.description
8
+ "Median imputation"
9
+ end
10
+
11
+ def transform(df)
12
+ return df unless median.present?
13
+
14
+ median = statistics(:median)
15
+ df = df.with_column(
16
+ Polars.col(column.name).fill_null(median).alias(column.name)
17
+ )
18
+ df
19
+ end
20
+
21
+ def median
22
+ statistics(:median)
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,27 @@
1
+ module EasyML
2
+ class Column
3
+ class Imputers
4
+ class MostFrequent < Base
5
+ method_applies :most_frequent
6
+
7
+ def self.description
8
+ "Most frequent value imputation"
9
+ end
10
+
11
+ def transform(df)
12
+ return df unless most_frequent.present?
13
+
14
+ most_frequent = statistics(:most_frequent_value)
15
+ df = df.with_column(
16
+ Polars.col(column.name).fill_null(most_frequent).alias(column.name)
17
+ )
18
+ df
19
+ end
20
+
21
+ def most_frequent
22
+ statistics(:most_frequent_value)
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,15 @@
1
+ module EasyML
2
+ class Column
3
+ class Imputers
4
+ class NullImputer
5
+ def anything?
6
+ false
7
+ end
8
+
9
+ def method_missing(_name, df)
10
+ df
11
+ end
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,30 @@
1
+ module EasyML
2
+ class Column
3
+ class Imputers
4
+ class OneHotEncoder < Base
5
+ param_applies :one_hot
6
+
7
+ def self.description
8
+ "One-hot encoder"
9
+ end
10
+
11
+ def transform(df)
12
+ return df unless allowed_categories.present?
13
+
14
+ allowed_categories.each do |value|
15
+ new_col_name = "#{column.name}_#{value}".gsub(/-/, "_")
16
+ df = df.with_column(
17
+ df[column.name].cast(Polars::String).eq(value.to_s).cast(Polars::Boolean).alias(new_col_name)
18
+ )
19
+ end
20
+ df = df.drop([column.name])
21
+ df
22
+ end
23
+
24
+ def allowed_categories
25
+ column.allowed_categories.sort
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,78 @@
1
+ module EasyML
2
+ class Column
3
+ class Imputers
4
+ class OrdinalEncoder < Base
5
+ param_applies :ordinal_encoding
6
+
7
+ def self.description
8
+ "Ordinal encoder"
9
+ end
10
+
11
+ def transform(df)
12
+ return df unless label_encoder.present?
13
+
14
+ case column.datatype
15
+ when :categorical
16
+ df = df.with_column(
17
+ Polars.when(Polars.col(column.name).is_in(allowed_categories))
18
+ .then(Polars.col(column.name))
19
+ .otherwise(Polars.lit("other"))
20
+ .alias(column.name)
21
+ )
22
+ when :boolean
23
+ # no-op
24
+ end
25
+
26
+ df = df.with_column(
27
+ df[column.name].map { |v| label_encoder[column.cast(v)] || other_value }.alias(column.name)
28
+ )
29
+
30
+ df
31
+ end
32
+
33
+ def decode_labels(df)
34
+ if df.is_a?(Array)
35
+ return df.map { |v| label_decoder[v.to_i] }
36
+ end
37
+
38
+ df = df.with_column(
39
+ df[column.name].map { |v| label_decoder[v.to_i] }.alias(column.name)
40
+ )
41
+ df
42
+ end
43
+
44
+ def categories
45
+ label_encoder.keys
46
+ end
47
+
48
+ def values
49
+ label_encoder.values
50
+ end
51
+
52
+ def cast_encoder(encoder)
53
+ encoder.transform_keys { |k| column.cast(k) }
54
+ end
55
+
56
+ def cast_decoder(decoder)
57
+ decoder.transform_keys { |k| k.to_i }
58
+ end
59
+
60
+ def label_encoder
61
+ @label_encoder ||= cast_encoder(statistics(:label_encoder))
62
+ end
63
+
64
+ def label_decoder
65
+ @label_decoder ||= cast_decoder(statistics(:label_decoder))
66
+ end
67
+
68
+ def other_value
69
+ label_encoder.values.max + 1
70
+ end
71
+
72
+ def allowed_categories
73
+ column.allowed_categories
74
+ end
75
+ end
76
+ end
77
+ end
78
+ end
@@ -0,0 +1,20 @@
1
+ module EasyML
2
+ class Column
3
+ class Imputers
4
+ class Today < Base
5
+ method_applies :today
6
+
7
+ def self.description
8
+ "Current date imputation"
9
+ end
10
+
11
+ def transform(df)
12
+ df = df.with_column(
13
+ Polars.col(column.name).fill_null(Polars.lit(UTC.today.beginning_of_day)).alias(column.name)
14
+ )
15
+ df
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,126 @@
1
+ module EasyML
2
+ class Column
3
+ class Imputers
4
+ attr_accessor :dataset, :column
5
+
6
+ ALLOWED_PARAMS = {
7
+ constant: [:constant],
8
+ categorical: %i[categorical_min one_hot ordinal_encoding],
9
+ most_frequent: %i[one_hot ordinal_encoding],
10
+ mean: [:clip],
11
+ median: [:clip],
12
+ }
13
+
14
+ PREPROCESSING_STRATEGIES = {
15
+ float: [
16
+ { value: "ffill", label: "Forward Fill" },
17
+ { value: "mean", label: "Mean" },
18
+ { value: "median", label: "Median" },
19
+ { value: "constant", label: "Constant Value" },
20
+ ],
21
+ integer: [
22
+ { value: "ffill", label: "Forward Fill" },
23
+ { value: "mean", label: "Mean" },
24
+ { value: "median", label: "Median" },
25
+ { value: "constant", label: "Constant Value" },
26
+ ],
27
+ boolean: [
28
+ { value: "ffill", label: "Forward Fill" },
29
+ { value: "most_frequent", label: "Most Frequent" },
30
+ { value: "constant", label: "Constant Value" },
31
+ ],
32
+ datetime: [
33
+ { value: "ffill", label: "Forward Fill" },
34
+ { value: "constant", label: "Constant Value" },
35
+ { value: "today", label: "Current Date" },
36
+ ],
37
+ string: [
38
+ { value: "ffill", label: "Forward Fill" },
39
+ { value: "most_frequent", label: "Most Frequent" },
40
+ { value: "constant", label: "Constant Value" },
41
+ ],
42
+ text: [
43
+ { value: "ffill", label: "Forward Fill" },
44
+ { value: "most_frequent", label: "Most Frequent" },
45
+ { value: "constant", label: "Constant Value" },
46
+ ],
47
+ categorical: [
48
+ { value: "ffill", label: "Forward Fill" },
49
+ { value: "categorical", label: "Categorical" },
50
+ { value: "most_frequent", label: "Most Frequent" },
51
+ { value: "constant", label: "Constant Value" },
52
+ ],
53
+ }.freeze
54
+
55
+ def self.constants
56
+ {
57
+ preprocessing_strategies: PREPROCESSING_STRATEGIES,
58
+ }
59
+ end
60
+
61
+ def self.params_by_class
62
+ @params_by_class ||= {}
63
+ end
64
+
65
+ def self.methods_by_class
66
+ @methods_by_class ||= {}
67
+ end
68
+
69
+ def self.supported_params
70
+ @supported_params ||= []
71
+ end
72
+
73
+ def self.supported_methods
74
+ @supported_methods ||= []
75
+ end
76
+
77
+ def initialize(column)
78
+ @column = column
79
+ @dataset = column.dataset
80
+ end
81
+
82
+ class << self
83
+ def supported_params
84
+ @supported_params ||= []
85
+ end
86
+
87
+ def supported_methods
88
+ @supported_methods ||= []
89
+ end
90
+ end
91
+
92
+ def imputers
93
+ return {} if column.preprocessing_steps.blank?
94
+
95
+ @imputers ||= column.preprocessing_steps.keys.reduce({}) do |hash, key|
96
+ hash.tap do
97
+ hash[key.to_sym] = Imputer.new(
98
+ column,
99
+ column.preprocessing_steps[key],
100
+ )
101
+ end
102
+ end
103
+ end
104
+
105
+ def training
106
+ @training ||= imputer_group(:training)
107
+ end
108
+
109
+ def inference
110
+ @inference ||= imputer_group(:inference)
111
+ end
112
+
113
+ def preprocessing_descriptions
114
+ return [] if column.preprocessing_steps.blank?
115
+
116
+ [training.description].compact
117
+ end
118
+
119
+ private
120
+
121
+ def imputer_group(key)
122
+ imputers.dig(key.to_sym) || NullImputer.new
123
+ end
124
+ end
125
+ end
126
+ end
@@ -0,0 +1,18 @@
1
+ module EasyML
2
+ class Column
3
+ class Learner
4
+ attr_accessor :dataset, :column
5
+
6
+ def initialize(column)
7
+ @column = column
8
+ @dataset = column.dataset
9
+ end
10
+
11
+ def learner
12
+ @learner ||= EasyML::Column::Learners::Base.adapter(column).new(column)
13
+ end
14
+
15
+ delegate :learn, to: :learner
16
+ end
17
+ end
18
+ end