easy_ml 0.2.0.pre.rc77 → 0.2.0.pre.rc81

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. checksums.yaml +4 -4
  2. data/app/controllers/easy_ml/datasets_controller.rb +3 -3
  3. data/app/controllers/easy_ml/models_controller.rb +4 -3
  4. data/app/frontend/components/ModelForm.tsx +16 -0
  5. data/app/frontend/components/ScheduleModal.tsx +0 -2
  6. data/app/frontend/components/dataset/PreprocessingConfig.tsx +7 -6
  7. data/app/jobs/easy_ml/application_job.rb +1 -0
  8. data/app/jobs/easy_ml/batch_job.rb +47 -6
  9. data/app/jobs/easy_ml/compute_feature_job.rb +10 -10
  10. data/app/jobs/easy_ml/reaper.rb +14 -10
  11. data/app/jobs/easy_ml/refresh_dataset_job.rb +2 -0
  12. data/app/jobs/easy_ml/sync_datasource_job.rb +1 -0
  13. data/app/models/concerns/easy_ml/dataframe_serialization.rb +1 -17
  14. data/app/models/easy_ml/column/imputers/base.rb +1 -1
  15. data/app/models/easy_ml/column/imputers/imputer.rb +2 -0
  16. data/app/models/easy_ml/column/imputers/today.rb +1 -1
  17. data/app/models/easy_ml/column/selector.rb +0 -8
  18. data/app/models/easy_ml/column.rb +1 -1
  19. data/app/models/easy_ml/column_list.rb +2 -3
  20. data/app/models/easy_ml/dataset/learner/base.rb +2 -2
  21. data/app/models/easy_ml/dataset/learner/eager.rb +3 -1
  22. data/app/models/easy_ml/dataset/learner/lazy.rb +4 -1
  23. data/app/models/easy_ml/dataset.rb +47 -38
  24. data/app/models/easy_ml/datasource.rb +0 -6
  25. data/app/models/easy_ml/feature.rb +33 -8
  26. data/app/models/easy_ml/model.rb +27 -4
  27. data/app/models/easy_ml/models/hyperparameters/xgboost/gbtree.rb +21 -5
  28. data/app/models/easy_ml/models/xgboost/evals_callback.rb +9 -5
  29. data/app/models/easy_ml/models/xgboost.rb +58 -36
  30. data/app/models/easy_ml/retraining_run.rb +1 -1
  31. data/app/serializers/easy_ml/model_serializer.rb +1 -0
  32. data/lib/easy_ml/core/tuner/adapters/base_adapter.rb +16 -3
  33. data/lib/easy_ml/core/tuner/adapters/xgboost_adapter.rb +0 -17
  34. data/lib/easy_ml/core/tuner.rb +14 -5
  35. data/lib/easy_ml/data/dataset_manager/reader/base.rb +12 -0
  36. data/lib/easy_ml/data/dataset_manager/reader/data_frame.rb +8 -3
  37. data/lib/easy_ml/data/dataset_manager/reader/file.rb +5 -0
  38. data/lib/easy_ml/data/dataset_manager/reader.rb +7 -1
  39. data/lib/easy_ml/data/dataset_manager/writer/base.rb +26 -9
  40. data/lib/easy_ml/data/dataset_manager/writer.rb +5 -1
  41. data/lib/easy_ml/data/dataset_manager.rb +18 -4
  42. data/lib/easy_ml/data/embeddings/adapters.rb +56 -0
  43. data/lib/easy_ml/data/embeddings/compression.rb +0 -0
  44. data/lib/easy_ml/data/embeddings.rb +43 -0
  45. data/lib/easy_ml/data/polars_column.rb +19 -5
  46. data/lib/easy_ml/engine.rb +16 -14
  47. data/lib/easy_ml/feature_store.rb +19 -16
  48. data/lib/easy_ml/support/lockable.rb +1 -5
  49. data/lib/easy_ml/version.rb +1 -1
  50. data/public/easy_ml/assets/.vite/manifest.json +1 -1
  51. data/public/easy_ml/assets/assets/entrypoints/Application.tsx-Bbf3mD_b.js +522 -0
  52. data/public/easy_ml/assets/assets/entrypoints/{Application.tsx-B1qLZuyu.js.map → Application.tsx-Bbf3mD_b.js.map} +1 -1
  53. metadata +9 -7
  54. data/app/models/easy_ml/datasources/polars_datasource.rb +0 -69
  55. data/public/easy_ml/assets/assets/entrypoints/Application.tsx-B1qLZuyu.js +0 -522
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ee980703e3a768458e43d54a878bfa712d4f026967f3ccd8fa5bb2d1df50304c
4
- data.tar.gz: eb5eb31b580e9112886527f416d4f360ffe9b0ee73d9e2e7dd70d9a48528ea09
3
+ metadata.gz: 169873e9ea5e1b00f7a4e499a2aeffc377615757cdd4b5fe6d70c8c454b9d426
4
+ data.tar.gz: fc1d4509606f011bd3adbdf367e767a3e9dfc4fdbb6b5cd91bb413f72da364b2
5
5
  SHA512:
6
- metadata.gz: ddc8a0005b22caf186c13790e9209d7b843181d62be8e70710e33bd6e244e3f31e6cc02efc4effa82d9f38674ff8ba2d8abdc1e43db8ba339d712f0e12d10ec4
7
- data.tar.gz: 52193a2c0da5c0aca86bb627afff7efd9cbdfcf80ba2db8ae4e9baec061b716244e14c49f30451f52a221700b2b1a160a71935cb76cd77732a4e8fdcf25bd3a1
6
+ metadata.gz: f4ea106a66d3185f612e481b607cef21f5453a00ef6eb12558e53f43aa1d68b8f20d1e950c1840d97180ba6271db9b9b42c8a2d480c22ca8c1e33d1b768590d2
7
+ data.tar.gz: 24885cdecd46d612be8b8ce7d4bd9889bdf60420bc82c01993cfe0168e454ebdaaa70899f5cf9cc879ff89a895fd00b1761d01c945dafffc784f7bc1e4a2fb8e
@@ -23,7 +23,7 @@
23
23
  module EasyML
24
24
  class DatasetsController < ApplicationController
25
25
  def index
26
- datasets = Dataset.all.order(id: :desc)
26
+ datasets = Dataset.all.includes(:columns, :datasource).order(id: :desc)
27
27
 
28
28
  render inertia: "pages/DatasetsPage", props: {
29
29
  datasets: datasets.map { |dataset| dataset_to_json_small(dataset) },
@@ -80,7 +80,7 @@ module EasyML
80
80
  if dataset_params[:features_attributes].present?
81
81
  # Clean up any feature IDs that don't exist anymore
82
82
  feature_ids = dataset_params[:features_attributes].map { |attrs| attrs[:id] }.compact
83
- existing_feature_ids = Feature.where(id: feature_ids).pluck(:id)
83
+ existing_feature_ids = dataset.features.where(id: feature_ids).pluck(:id)
84
84
 
85
85
  params[:dataset][:features_attributes].each do |attrs|
86
86
  if attrs[:id].present? && !existing_feature_ids.include?(attrs[:id].to_i)
@@ -93,7 +93,7 @@ module EasyML
93
93
  attrs[:feature_class] if attrs[:id].blank?
94
94
  }.compact
95
95
 
96
- existing_features = Feature.where(feature_class: feature_classes)
96
+ existing_features = dataset.features.where(feature_class: feature_classes)
97
97
 
98
98
  # Update params with existing feature IDs
99
99
  existing_features.each do |feature|
@@ -30,7 +30,7 @@ module EasyML
30
30
  def new
31
31
  render inertia: "pages/NewModelPage", props: {
32
32
  datasets: EasyML::Dataset.all.map do |dataset|
33
- dataset.slice(:id, :name, :num_rows)
33
+ dataset_to_json(dataset)
34
34
  end,
35
35
  constants: EasyML::Model.constants,
36
36
  }
@@ -41,7 +41,7 @@ module EasyML
41
41
  render inertia: "pages/EditModelPage", props: {
42
42
  model: model_to_json(model),
43
43
  datasets: EasyML::Dataset.all.map do |dataset|
44
- dataset.slice(:id, :name, :num_rows)
44
+ dataset_to_json_small(dataset)
45
45
  end,
46
46
  constants: EasyML::Model.constants,
47
47
  }
@@ -167,7 +167,7 @@ module EasyML
167
167
  private
168
168
 
169
169
  def includes_list
170
- [:retraining_runs, :retraining_job, dataset: [:columns, :features, :splitter]]
170
+ [:retraining_runs, :retraining_job, dataset: [:features, :splitter, columns: [:lineages]]]
171
171
  end
172
172
 
173
173
  def model_params
@@ -177,6 +177,7 @@ module EasyML
177
177
  :dataset_id,
178
178
  :task,
179
179
  :objective,
180
+ :weights_column,
180
181
  metrics: [],
181
182
  retraining_job_attributes: [
182
183
  :id,
@@ -16,6 +16,7 @@ interface ModelFormProps {
16
16
  task: string;
17
17
  objective?: string;
18
18
  metrics?: string[];
19
+ weights_column?: string;
19
20
  retraining_job?: {
20
21
  frequency: string;
21
22
  at: {
@@ -75,6 +76,7 @@ export function ModelForm({ initialData, datasets, constants, isEditing, errors:
75
76
  task: initialData?.task || 'classification',
76
77
  objective: initialData?.objective || 'binary:logistic',
77
78
  metrics: initialData?.metrics || ['accuracy_score'],
79
+ weights_column: initialData?.weights_column || '',
78
80
  retraining_job_attributes: initialData?.retraining_job ? {
79
81
  id: initialData.retraining_job.id,
80
82
  frequency: initialData.retraining_job.frequency,
@@ -165,6 +167,7 @@ export function ModelForm({ initialData, datasets, constants, isEditing, errors:
165
167
  };
166
168
 
167
169
  const selectedDataset = datasets.find(d => d.id === data.model.dataset_id);
170
+ const columns = selectedDataset?.columns || [];
168
171
 
169
172
  const filteredTunerJobConstants = constants.tuner_job_constants[data.model.model_type] || {};
170
173
 
@@ -246,6 +249,19 @@ export function ModelForm({ initialData, datasets, constants, isEditing, errors:
246
249
  <ErrorDisplay error={errors.dataset_id} />
247
250
  </div>
248
251
 
252
+ <div>
253
+ <label className="block text-sm font-medium text-gray-700 mb-1">
254
+ Weights Column (Optional)
255
+ </label>
256
+ <SearchableSelect
257
+ value={data.model.weights_column}
258
+ options={columns.map(col => ({ value: col.name, label: col.name }))}
259
+ onChange={(value) => setData('model.weights_column', value)}
260
+ isClearable={true}
261
+ />
262
+ <ErrorDisplay error={errors.weights_column} />
263
+ </div>
264
+
249
265
  <div>
250
266
  <label className="block text-sm font-medium text-gray-700 mb-1">
251
267
  Task
@@ -587,8 +587,6 @@ export function ScheduleModal({ isOpen, onClose, onSave, initialData, metrics, t
587
587
  value={formData.retraining_job_attributes.threshold}
588
588
  onChange={(e) => handleEvaluatorChange('threshold', parseFloat(e.target.value))}
589
589
  step={0.01}
590
- min={0}
591
- max={1}
592
590
  className="block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500 py-2 px-4 shadow-sm border-gray-300 border"
593
591
  />
594
592
  </div>
@@ -250,16 +250,17 @@ export function PreprocessingConfig({
250
250
  setIsEditingDescription(true);
251
251
  };
252
252
 
253
- let nullCount = (column.statistics?.processed.null_count || column.statistics?.raw?.null_count) || 0;
254
- const nullPercentage = nullCount && column.statistics?.raw.num_rows
255
- ? ((nullCount / column.statistics.raw.num_rows) * 100)
253
+ let nullCount = (column.statistics?.processed?.null_count || column.statistics?.raw?.null_count) || 0;
254
+ let numRows = (column.statistics?.processed?.num_rows) || (column.statistics?.raw?.num_rows) || 0;
255
+ const nullPercentage = nullCount && numRows
256
+ ? ((nullCount / numRows) * 100)
256
257
  : 0;
257
258
 
258
- const nullPercentageProcessed = column.statistics?.processed?.null_count && column.statistics?.raw.num_rows
259
- ? ((column.statistics.processed.null_count / column.statistics.raw.num_rows) * 100)
259
+ const nullPercentageProcessed = column.statistics?.processed?.null_count && column.statistics?.processed?.num_rows
260
+ ? ((column.statistics.processed.null_count / column.statistics.processed.num_rows) * 100)
260
261
  : 0;
261
262
 
262
- const totalRows = column.statistics?.raw.num_rows ?? 0;
263
+ const totalRows = numRows;
263
264
 
264
265
  const renderStrategySpecificInfo = (type: 'training' | 'inference') => {
265
266
  const strategy = type === 'training' ? training : inference;
@@ -1,5 +1,6 @@
1
1
  module EasyML
2
2
  class ApplicationJob < ActiveJob::Base
3
+ @queue = :easy_ml
3
4
  queue_as :easy_ml
4
5
 
5
6
  def create_event(model, status, error = nil)
@@ -39,15 +39,15 @@ module EasyML
39
39
  rest.map do |batch|
40
40
  Resque.redis.rpush("batch:#{parent_id}:remaining", batch.to_json)
41
41
  end
42
-
42
+ track_batch(parent_id)
43
43
  handle_batch(parent_id, batch)
44
44
  end
45
45
 
46
46
  def handle_batch(parent_id, batch)
47
47
  if batch.size > 1
48
- enqueue_batch(batch)
48
+ enqueue_batch(batch, parent_id)
49
49
  else
50
- run_one_batch(parent_id, batch.first)
50
+ new.perform(parent_id, batch.first)
51
51
  after_batch_hook(parent_id, batch)
52
52
  end
53
53
  end
@@ -60,7 +60,21 @@ module EasyML
60
60
  end
61
61
 
62
62
  def next_batch?(parent_id)
63
- batches_remaining(parent_id) > 0
63
+ (batches_remaining(parent_id) > 0)
64
+ end
65
+
66
+ def list_batches
67
+ Resque.redis.hkeys("batches:tracking")
68
+ end
69
+
70
+ def track_batch(parent_id)
71
+ Resque.redis.hset("batches:tracking", parent_id, 1)
72
+ end
73
+
74
+ def cleanup_all
75
+ list_batches.each do |batch_id|
76
+ cleanup_batch(batch_id)
77
+ end
64
78
  end
65
79
 
66
80
  def batches_remaining(parent_id)
@@ -69,12 +83,39 @@ module EasyML
69
83
 
70
84
  def cleanup_batch(parent_id)
71
85
  Resque.redis.del("batch:#{parent_id}:remaining")
86
+ Resque.redis.hdel("batches:tracking", parent_id)
72
87
  end
73
88
 
74
- private
89
+ def batch_args
90
+ list_batches.map do |batch_id|
91
+ fetch_batch_arguments(batch_id)
92
+ end
93
+ end
94
+
95
+ def select_batches(&block)
96
+ list_batches.select do |batch_id|
97
+ yield fetch_batch_arguments(batch_id)
98
+ end
99
+ end
100
+
101
+ def poll
102
+ while true
103
+ sleep 2
104
+ EasyML::BatchJob.list_batches.map do |batch|
105
+ puts "Batch #{batch} | Remaining : #{EasyML::BatchJob.batches_remaining(batch)}"
106
+ end
107
+ end
108
+ end
75
109
 
76
110
  def get_parent_batch_id(args_list)
77
- args_list.dup.flatten.first.dig(:parent_batch_id)
111
+ args_list.dup.flatten.detect { |arg| arg.dig(:parent_batch_id) }.dig(:parent_batch_id)
112
+ end
113
+
114
+ private
115
+
116
+ def get_args_list(batch_id)
117
+ redis_key = "#{batch(batch_id)}:original_args"
118
+ redis.get(redis_key)
78
119
  end
79
120
 
80
121
  # Store batch arguments in Redis
@@ -14,31 +14,31 @@ module EasyML
14
14
  #
15
15
  # https://github.com/drfeelngood/resque-batched-job/blob/master/lib/resque/plugins/batched_job.rb#L86
16
16
  batch_args = batch_args.dup
17
- run_one_batch(batch_id, batch_args)
17
+ EasyML::ComputeFeatureJob.new.perform(batch_id, batch_args)
18
18
  end
19
19
 
20
- def self.run_one_batch(batch_id, batch_args)
20
+ def perform(batch_id, batch_args = {})
21
21
  EasyML::Feature.fit_one_batch(batch_id, batch_args)
22
22
  end
23
23
 
24
24
  def self.after_batch_hook(batch_id, *args)
25
- batch_args = fetch_batch_arguments(batch_id).flatten.map(&:symbolize_keys)
26
- feature_ids = batch_args.pluck(:feature_id).uniq
27
- parent_id = batch_args.pluck(:parent_batch_id).first
25
+ args = args.flatten.first.with_indifferent_access
26
+ feature_id = args.dig(:feature_id)
28
27
 
29
- feature = EasyML::Feature.find_by(id: feature_ids.first)
28
+ feature = EasyML::Feature.find_by(id: feature_id)
30
29
 
31
30
  if feature.failed?
32
31
  dataset.features.where(workflow_status: :analyzing).update_all(workflow_status: :ready)
33
- return BatchJob.cleanup_batch(parent_id)
32
+ return BatchJob.cleanup_batch(batch_id)
34
33
  end
35
34
 
36
35
  feature.after_fit
37
36
 
38
- if BatchJob.next_batch?(parent_id)
39
- BatchJob.enqueue_next_batch(self, parent_id)
37
+ if BatchJob.next_batch?(batch_id)
38
+ BatchJob.enqueue_next_batch(self, batch_id)
40
39
  else
41
- dataset = EasyML::Feature.find_by(id: feature_ids.first).dataset
40
+ cleanup_batch(batch_id)
41
+ dataset = feature.dataset
42
42
  dataset.after_fit_features
43
43
  end
44
44
  end
@@ -9,8 +9,8 @@ module EasyML
9
9
  {
10
10
  worker: worker,
11
11
  working: true,
12
- class: args.dig("job_class"),
13
- args: args.dig("arguments"),
12
+ class: args.is_a?(Hash) ? args.dig("job_class") : nil,
13
+ args: args.is_a?(Hash) ? args.dig("arguments") : nil,
14
14
  pid: worker.pid,
15
15
  }
16
16
  else
@@ -19,17 +19,23 @@ module EasyML
19
19
  end
20
20
  end
21
21
 
22
- def find_job(worker_class, *args)
22
+ def find_job(worker_class, *args, &block)
23
23
  list_workers.select do |config|
24
- config.dig(:class) == worker_class.to_s && config.dig(:args) == args
24
+ selected = config.dig(:class) == worker_class.to_s
25
+ if block_given?
26
+ selected &&= yield(config)
27
+ else
28
+ selected &= config.dig(:args) == args
29
+ end
30
+ selected
25
31
  end
26
32
  end
27
33
 
28
- def kill(worker_class, *args)
29
- find_job(worker_class, *args).each do |job|
34
+ def kill(worker_class, *args, &block)
35
+ find_job(worker_class, *args, &block).each do |job|
30
36
  begin
31
- # Send TERM signal to the process
32
- Process.kill("TERM", job[:pid])
37
+ # Send HUP signal to the process
38
+ Process.kill("USR1", job[:pid])
33
39
 
34
40
  # Remove the worker from Redis so it doesn't show up as a zombie
35
41
  # in the Resque web interface. This is important because:
@@ -37,12 +43,10 @@ module EasyML
37
43
  # 2. Prevents confusion about running workers
38
44
  # 3. Allows proper worker cleanup in Redis
39
45
  job[:worker].done_working
40
- job[:worker].unregister_worker
41
46
  rescue Errno::ESRCH
42
47
  # Process already gone, but still try to clean up Redis
43
48
  begin
44
49
  job[:worker].done_working
45
- job[:worker].unregister_worker
46
50
  rescue => e
47
51
  # Redis cleanup failed, worker might already be unregistered
48
52
  puts "Failed to unregister worker: #{e.message}"
@@ -1,5 +1,7 @@
1
1
  module EasyML
2
2
  class RefreshDatasetJob < ApplicationJob
3
+ @queue = :easy_ml
4
+
3
5
  def perform(id)
4
6
  begin
5
7
  dataset = EasyML::Dataset.find(id)
@@ -8,6 +8,7 @@ module EasyML
8
8
 
9
9
  begin
10
10
  datasource.refresh
11
+ datasource.after_sync
11
12
  rescue StandardError => e
12
13
  datasource.update!(is_syncing: false)
13
14
  handle_error(datasource, e)
@@ -8,23 +8,7 @@ module EasyML
8
8
  end
9
9
 
10
10
  def deserialize_dataframe(df_data)
11
- return unless df_data.present? && df_data.key?("columns")
12
-
13
- columns = df_data["columns"].map do |col|
14
- dtype = case col["datatype"]
15
- when Hash
16
- if col["datatype"]["Datetime"]
17
- Polars::Datetime.new(col["datatype"]["Datetime"][0].downcase.to_sym).class
18
- else
19
- Polars::Utf8
20
- end
21
- else
22
- Polars.const_get(col["datatype"])
23
- end
24
- Polars::Series.new(col["name"], col["values"], dtype: dtype)
25
- end
26
-
27
- Polars::DataFrame.new(columns)
11
+ Polars::DataFrame.new(df_data)
28
12
  end
29
13
  end
30
14
  end
@@ -63,7 +63,7 @@ module EasyML
63
63
  if column.is_computed
64
64
  column.statistics.dig(:processed, *args)
65
65
  else
66
- column.statistics.dig(:clipped, *args) || column.statistics.dig(:raw, *args)
66
+ column.statistics.dig(:raw, *args)
67
67
  end
68
68
  end
69
69
 
@@ -54,6 +54,8 @@ module EasyML
54
54
  return df unless anything?
55
55
 
56
56
  adapters.reduce(df) do |df, adapter|
57
+ next df if df.columns.exclude?(column.name)
58
+
57
59
  adapter.transform(df)
58
60
  end
59
61
  end
@@ -10,7 +10,7 @@ module EasyML
10
10
 
11
11
  def transform(df)
12
12
  df = df.with_column(
13
- Polars.col(column.name).fill_null(Polars.lit(UTC.today.beginning_of_day)).alias(column.name)
13
+ Polars.col(column.name).fill_null(Polars.lit(EasyML::Support::UTC.today.beginning_of_day)).alias(column.name)
14
14
  )
15
15
  df
16
16
  end
@@ -24,14 +24,6 @@ module EasyML
24
24
  end
25
25
  end
26
26
 
27
- def clipped
28
- Selector.new(column, :raw) do |df|
29
- column.imputers.training.clip(df)
30
- end
31
- end
32
-
33
- measure_method_timing :clipped
34
-
35
27
  def processed
36
28
  Selector.new(column, :processed)
37
29
  end
@@ -140,7 +140,7 @@ module EasyML
140
140
  end
141
141
  end
142
142
 
143
- delegate :raw, :processed, :data, :train, :test, :valid, :clipped, to: :data_selector
143
+ delegate :raw, :processed, :data, :train, :test, :valid, to: :data_selector
144
144
 
145
145
  def empty?
146
146
  data.blank?
@@ -28,12 +28,11 @@ module EasyML
28
28
  if computed
29
29
  cols = column_list.computed
30
30
  else
31
- cols = column_list.raw
31
+ cols = column_list
32
32
  end
33
33
 
34
34
  by_name = cols.index_by(&:name)
35
- df.columns.each do |col|
36
- column = by_name[col]
35
+ cols.each do |column|
37
36
  df = column.transform(df, inference: inference, computed: computed) if column
38
37
  end
39
38
 
@@ -15,8 +15,8 @@ module EasyML
15
15
  (column.one_hot? && type.to_sym == :processed)
16
16
  end
17
17
 
18
- TYPES_ALL = %i(raw clipped processed)
19
- TYPES_RAW = %i(raw clipped)
18
+ TYPES_ALL = %i(raw processed)
19
+ TYPES_RAW = %i(raw)
20
20
  TYPES_PROCESSED = %i(processed)
21
21
 
22
22
  def types(type = :all)
@@ -19,7 +19,9 @@ module EasyML
19
19
  end
20
20
 
21
21
  def fetch_df(split, type)
22
- @dataset.send(type).send(split, all_columns: true)
22
+ dataset.columns.apply_clip(
23
+ @dataset.send(type).send(split, all_columns: true)
24
+ )
23
25
  end
24
26
 
25
27
  def execute_queries(split, type)
@@ -21,7 +21,10 @@ module EasyML
21
21
 
22
22
  def run_queries(split, type)
23
23
  queries = build_queries(split, type)
24
- @dataset.send(type).send(split, all_columns: true, lazy: true).select(queries).collect
24
+
25
+ dataset.columns.apply_clip(
26
+ @dataset.send(type).send(split, all_columns: true, lazy: true)
27
+ ).select(queries).collect
25
28
  end
26
29
 
27
30
  def get_column_statistics(query_results)