easy_ml 0.2.0.pre.rc57 → 0.2.0.pre.rc58

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. checksums.yaml +4 -4
  2. data/app/controllers/easy_ml/apis_controller.rb +8 -0
  3. data/app/controllers/easy_ml/models_controller.rb +3 -0
  4. data/app/controllers/easy_ml/predictions_controller.rb +10 -5
  5. data/app/frontend/components/ModelForm.tsx +1 -1
  6. data/app/frontend/components/SearchableSelect.tsx +0 -1
  7. data/app/frontend/components/dataset/PreprocessingConfig.tsx +1 -1
  8. data/app/frontend/pages/DatasourcesPage.tsx +0 -2
  9. data/app/jobs/easy_ml/compute_feature_job.rb +1 -0
  10. data/app/models/easy_ml/column.rb +42 -4
  11. data/app/models/easy_ml/column_history.rb +5 -1
  12. data/app/models/easy_ml/column_list.rb +43 -11
  13. data/app/models/easy_ml/dataset.rb +45 -25
  14. data/app/models/easy_ml/datasource.rb +1 -0
  15. data/app/models/easy_ml/feature.rb +10 -3
  16. data/app/models/easy_ml/model.rb +25 -4
  17. data/app/models/easy_ml/model_history.rb +1 -0
  18. data/app/models/easy_ml/retraining_run.rb +1 -0
  19. data/config/initializers/inflections.rb +2 -0
  20. data/config/routes.rb +3 -0
  21. data/lib/easy_ml/core/tuner.rb +1 -1
  22. data/lib/easy_ml/data/preprocessor.rb +10 -53
  23. data/lib/easy_ml/data/splits/in_memory_split.rb +4 -0
  24. data/lib/easy_ml/data/statistics_learner.rb +79 -14
  25. data/lib/easy_ml/data/synced_directory.rb +4 -2
  26. data/lib/easy_ml/predict.rb +13 -2
  27. data/lib/easy_ml/railtie/generators/migration/migration_generator.rb +3 -0
  28. data/lib/easy_ml/railtie/templates/migration/add_computed_columns_to_easy_ml_columns.rb.tt +14 -0
  29. data/lib/easy_ml/railtie/templates/migration/add_default_to_is_target.rb.tt +6 -0
  30. data/lib/easy_ml/railtie/templates/migration/add_slug_to_easy_ml_models.rb.tt +20 -0
  31. data/lib/easy_ml/version.rb +1 -1
  32. data/public/easy_ml/assets/.vite/manifest.json +1 -1
  33. data/public/easy_ml/assets/assets/entrypoints/{Application.tsx-DTZ2348z.js → Application.tsx-DmkdJsDd.js} +34 -34
  34. data/public/easy_ml/assets/assets/entrypoints/{Application.tsx-DTZ2348z.js.map → Application.tsx-DmkdJsDd.js.map} +1 -1
  35. metadata +8 -4
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e52412950fefc02e9b838930f132873c726440ebbc343159504d7d3287a39d05
4
- data.tar.gz: 44ff18d1f1df78b542c8e536427189fce63d147e7e86623d219ed9b89c501ca7
3
+ metadata.gz: 1a25c50b89c079e7e62f52d1f5a52ef16f3d7bc9b388fcee9a7b0983148de9cd
4
+ data.tar.gz: bfcf0d06fbe498ccc70251c144649d7fd6699c2bdc4a9acbf5866e60ce04c7bd
5
5
  SHA512:
6
- metadata.gz: 1e543781fb426a6fa7fe6ad6f5b7c924bdab38d88ac8ad7288db3a24f683661b3745a6f2176c993899a9f9737af7e54dfa59cc439a71739d3e2d2d2d75714621
7
- data.tar.gz: 3f012c5a3126eec7a69c3c11dd45017f7c2ded7a2bfd5e6e70bcaa388000b19e50d19ed15dc6b47786f61b698cc081e915abade7ece544a3c8a14d0a8f5c4696
6
+ metadata.gz: 77186b1d2d7558db7d128e03c68f8632af6f28be4b1bf2daa71ac804abbcc5a26470fe29a802258d326048155fea46a7f707e46fc88cf8571af5f30cb870d839
7
+ data.tar.gz: a22ba3e21ab32e64674033f0c83d023e41c3c5f158117be5fa8b85f1865a4bfc39bda72042e4c1ff80277d4d664fcc694349f7a377933c7c52f23417c299618b
@@ -0,0 +1,8 @@
1
+ module EasyML
2
+ class APIsController < ApplicationController
3
+ def show
4
+ model = EasyML::Model.find_by!(slug: params[:model])
5
+ render json: { data: model.api_fields }
6
+ end
7
+ end
8
+ end
@@ -53,6 +53,9 @@ module EasyML
53
53
  flash[:notice] = "Model was successfully created."
54
54
  redirect_to easy_ml_models_path
55
55
  else
56
+ errors = model.errors.to_hash(true)
57
+ values = errors.values.flatten
58
+ flash.now[:error] = values.join(", ")
56
59
  render inertia: "pages/NewModelPage", props: {
57
60
  datasets: EasyML::Dataset.all.map do |dataset|
58
61
  dataset.slice(:id, :name, :num_rows)
@@ -3,6 +3,11 @@ module EasyML
3
3
  skip_before_action :verify_authenticity_token, only: [:create]
4
4
 
5
5
  def create
6
+ slug = params[:model]
7
+ unless EasyML::Model.find_by(slug: slug).inference_version.present?
8
+ return render json: { error: "Model not found" }, status: :not_found
9
+ end
10
+
6
11
  unless params.key?(:input)
7
12
  return render json: { error: "Must provide key: input" }, status: :not_found
8
13
  end
@@ -12,17 +17,17 @@ module EasyML
12
17
  return render json: { error: "Input must be a hash" }, status: :not_found
13
18
  end
14
19
 
15
- model_name = params[:model]
16
- unless EasyML::Model.find_by(name: model_name).present?
17
- return render json: { error: "Model not found" }, status: :not_found
20
+ valid, fields = EasyML::Predict.validate_input(slug, input)
21
+ unless valid
22
+ return render json: { error: "Missing required fields: #{fields}" }, status: :not_found
18
23
  end
19
24
 
20
- prediction = EasyML::Predict.predict(model_name, input)
25
+ prediction = EasyML::Predict.predict(slug, input)
21
26
 
22
27
  render json: { prediction: EasyML::PredictionSerializer.new(prediction).serializable_hash.dig(:data, :attributes) }, status: :ok
23
28
  rescue ActiveRecord::RecordNotFound
24
29
  render json: { error: "Model not found" }, status: :not_found
25
- rescue StandardError => e
30
+ rescue => e
26
31
  render json: { error: e.message }, status: :unprocessable_entity
27
32
  end
28
33
  end
@@ -74,7 +74,7 @@ export function ModelForm({ initialData, datasets, constants, isEditing, errors:
74
74
  dataset_id: initialData?.dataset_id || '',
75
75
  task: initialData?.task || 'classification',
76
76
  objective: initialData?.objective || 'binary:logistic',
77
- metrics: initialData?.metrics || ['accuracy'],
77
+ metrics: initialData?.metrics || ['accuracy_score'],
78
78
  retraining_job_attributes: initialData?.retraining_job ? {
79
79
  id: initialData.retraining_job.id,
80
80
  frequency: initialData.retraining_job.frequency,
@@ -61,7 +61,6 @@ export const SearchableSelect = forwardRef<HTMLButtonElement, SearchableSelectPr
61
61
  }, [isOpen]);
62
62
 
63
63
  const handleOptionClick = (optionValue: Option['value'], e: React.MouseEvent) => {
64
- debugger;
65
64
  e.preventDefault();
66
65
  e.stopPropagation();
67
66
  onChange(optionValue);
@@ -250,7 +250,7 @@ export function PreprocessingConfig({
250
250
  setIsEditingDescription(true);
251
251
  };
252
252
 
253
- let nullCount = (column.statistics?.processed.null_count || column.statistics?.raw.null_count) || 0;
253
+ let nullCount = (column.statistics?.processed.null_count || column.statistics?.raw?.null_count) || 0;
254
254
  const nullPercentage = nullCount && column.statistics?.raw.num_rows
255
255
  ? ((nullCount / column.statistics.raw.num_rows) * 100)
256
256
  : 0;
@@ -49,12 +49,10 @@ export default function DatasourcesPage({ datasources }: { datasources: Datasour
49
49
  preserveScroll: true, // Keeps the scroll position
50
50
  preserveState: true, // Keeps the form state
51
51
  onSuccess: (e) => {
52
- debugger;
53
52
  console.log("SUCCESS")
54
53
  // The page will automatically refresh with new data
55
54
  },
56
55
  onError: () => {
57
- debugger;
58
56
  // Handle error case if needed
59
57
  console.error('Failed to sync datasource');
60
58
  }
@@ -5,6 +5,7 @@ module EasyML
5
5
  @queue = :easy_ml
6
6
 
7
7
  def self.perform(batch_id, options = {})
8
+ puts "Performing compute feature job with options #{options}"
8
9
  begin
9
10
  options.symbolize_keys!
10
11
  feature_id = options.dig(:feature_id)
@@ -8,7 +8,7 @@
8
8
  # description :string
9
9
  # datatype :string
10
10
  # polars_datatype :string
11
- # is_target :boolean
11
+ # is_target :boolean default(FALSE)
12
12
  # hidden :boolean default(FALSE)
13
13
  # drop_if_null :boolean default(FALSE)
14
14
  # preprocessing_steps :json
@@ -17,6 +17,8 @@
17
17
  # created_at :datetime not null
18
18
  # updated_at :datetime not null
19
19
  # is_date_column :boolean default(FALSE)
20
+ # computed_by :string
21
+ # is_computed :boolean default(FALSE)
20
22
  #
21
23
  module EasyML
22
24
  class Column < ActiveRecord::Base
@@ -39,8 +41,11 @@ module EasyML
39
41
  scope :categorical, -> { where(datatype: %w[categorical string boolean]) }
40
42
  scope :datetime, -> { where(datatype: "datetime") }
41
43
  scope :date_column, -> { where(is_date_column: true) }
44
+ scope :required, -> { where(is_computed: false, hidden: false, is_target: false).where("preprocessing_steps IS NULL OR preprocessing_steps::text = '{}'::text") }
45
+ scope :api_inputs, -> { where(is_computed: false, hidden: false, is_target: false) }
46
+ scope :computed, -> { where(is_computed: true) }
42
47
 
43
- def columns
48
+ def aliases
44
49
  [name].concat(virtual_columns)
45
50
  end
46
51
 
@@ -100,16 +105,49 @@ module EasyML
100
105
 
101
106
  def allowed_categories
102
107
  return [] unless one_hot?
103
- stats = dataset.preprocessor.statistics
108
+ stats = dataset.statistics
104
109
  return [] if stats.nil? || stats.blank?
105
110
 
106
- stats.dup.to_h.dig(name.to_sym, :allowed_categories).sort.concat(["other"])
111
+ stats = stats.deep_symbolize_keys
112
+ stats = stats.dig(:raw)
113
+
114
+ (stats.dig(name.to_sym, :allowed_categories) || []).sort.concat(["other"])
107
115
  end
108
116
 
109
117
  def date_column?
110
118
  is_date_column
111
119
  end
112
120
 
121
+ def lineage
122
+ [
123
+ present_in_raw_dataset ? "Raw dataset" : nil,
124
+ computed_by ? "Computed by #{computed_by}" : nil,
125
+ preprocessing_steps.present? ? "Preprocessed using #{preprocessing_steps.keys.join(", ")}" : nil,
126
+ ].compact
127
+ end
128
+
129
+ def required?
130
+ is_computed && (preprocessing_steps.nil? || preprocessing_steps == {}) && !hidden && !is_target
131
+ end
132
+
133
+ def present_in_raw_dataset
134
+ dataset.raw.data&.columns&.include?(name) || false
135
+ end
136
+
137
+ def sort_required
138
+ required? ? 0 : 1
139
+ end
140
+
141
+ def to_api
142
+ {
143
+ name: name,
144
+ datatype: datatype,
145
+ description: description,
146
+ required: required?,
147
+ allowed_values: allowed_categories.empty? ? nil : allowed_categories,
148
+ }.compact
149
+ end
150
+
113
151
  private
114
152
 
115
153
  def set_defaults
@@ -9,7 +9,7 @@
9
9
  # description :string
10
10
  # datatype :string
11
11
  # polars_datatype :string
12
- # is_target :boolean
12
+ # is_target :boolean default(FALSE)
13
13
  # hidden :boolean default(FALSE)
14
14
  # drop_if_null :boolean default(FALSE)
15
15
  # preprocessing_steps :json
@@ -22,10 +22,14 @@
22
22
  # history_user_id :integer
23
23
  # snapshot_id :string
24
24
  # is_date_column :boolean default(FALSE)
25
+ # computed_by :string
26
+ # is_computed :boolean default(FALSE)
25
27
  #
26
28
  module EasyML
27
29
  class ColumnHistory < ActiveRecord::Base
28
30
  self.table_name = "easy_ml_column_histories"
29
31
  include Historiographer::History
32
+ scope :required, -> { where(is_computed: false, hidden: false, is_target: false).where("preprocessing_steps IS NULL OR preprocessing_steps::text = '{}'::text") }
33
+ scope :computed, -> { where(is_computed: true) }
30
34
  end
31
35
  end
@@ -1,5 +1,7 @@
1
1
  module EasyML
2
2
  module ColumnList
3
+ include Historiographer::Relation
4
+
3
5
  def sync(delete: true)
4
6
  return unless dataset.schema.present?
5
7
 
@@ -8,9 +10,10 @@ module EasyML
8
10
  existing_columns = where(name: col_names)
9
11
  import_new(col_names, existing_columns)
10
12
  update_existing(existing_columns)
13
+ set_feature_lineage
11
14
 
12
15
  if delete
13
- delete_missing(existing_columns)
16
+ delete_missing(col_names)
14
17
  end
15
18
 
16
19
  if existing_columns.none? # Totally new dataset
@@ -37,14 +40,9 @@ module EasyML
37
40
  end
38
41
  end
39
42
 
40
- def virtual_column?(column)
41
- false
42
- end
43
-
44
43
  def syncable
45
44
  dataset.processed_schema.keys.select do |col|
46
- !one_hot?(col) &&
47
- !virtual_column?(col)
45
+ !one_hot?(col)
48
46
  end
49
47
  end
50
48
 
@@ -56,8 +54,36 @@ module EasyML
56
54
  proxy_association.owner
57
55
  end
58
56
 
57
+ def sort_by_required
58
+ column_list.sort_by { |col| [col.sort_required, col.name] }
59
+ end
60
+
59
61
  private
60
62
 
63
+ def set_feature_lineage
64
+ # Get all features that compute columns
65
+ features_computing_columns = dataset.features.all.map do |feature|
66
+ [feature.name, feature.computes_columns]
67
+ end.compact.to_h
68
+
69
+ updates = column_list.reload.map do |column|
70
+ # Check if column is computed by any feature
71
+ computing_feature = features_computing_columns.find { |_, cols| cols.include?(column.name) }&.first
72
+ is_computed = !computing_feature.nil?
73
+
74
+ column.assign_attributes(
75
+ computed_by: computing_feature,
76
+ is_computed: is_computed,
77
+ )
78
+ next unless column.changed?
79
+
80
+ column
81
+ end.compact
82
+ EasyML::Column.import(updates.to_a, { on_duplicate_key_update: { columns: %i[computed_by is_computed] } })
83
+ cols = EasyML::Column.where(id: updates.map(&:id)).to_a
84
+ column_list.bulk_record_history(cols, { history_user_id: 1 })
85
+ end
86
+
61
87
  def import_new(new_columns, existing_columns)
62
88
  new_columns = new_columns - existing_columns.map(&:name)
63
89
  cols_to_insert = new_columns.map do |col_name|
@@ -67,6 +93,7 @@ module EasyML
67
93
  )
68
94
  end
69
95
  EasyML::Column.import(cols_to_insert)
96
+ column_list.reload
70
97
  end
71
98
 
72
99
  def update_existing(existing_columns)
@@ -116,13 +143,18 @@ module EasyML
116
143
  end
117
144
  EasyML::Column.import(existing_columns.to_a,
118
145
  { on_duplicate_key_update: { columns: %i[statistics datatype polars_datatype
119
- sample_values] } })
146
+ sample_values computed_by is_computed] } })
120
147
  end
121
148
 
122
- def delete_missing(existing_columns)
123
- raw_cols = dataset.raw.train(all_columns: true, limit: 1).columns
149
+ def delete_missing(col_names)
150
+ raw_cols = dataset.best_segment.train(all_columns: true, limit: 1).columns
124
151
  raw_cols = where(name: raw_cols)
125
- columns_to_delete = column_list - existing_columns - raw_cols
152
+ columns_to_delete = column_list.select do |col|
153
+ col_names.exclude?(col.name) &&
154
+ one_hots.map(&:name).exclude?(col.name) &&
155
+ raw_cols.map(&:name).exclude?(col.name) &&
156
+ dataset.features.flat_map(&:computes_columns).exclude?(col.name)
157
+ end
126
158
  columns_to_delete.each(&:destroy!)
127
159
  end
128
160
  end
@@ -140,6 +140,12 @@ module EasyML
140
140
  EasyML::RefreshDatasetJob.perform_later(id)
141
141
  end
142
142
 
143
+ def best_segment
144
+ [processed, raw].detect do |segment|
145
+ segment.send(:train, all_columns: true, limit: 1)&.columns
146
+ end
147
+ end
148
+
143
149
  def raw
144
150
  return @raw if @raw && @raw.dataset
145
151
 
@@ -175,9 +181,10 @@ module EasyML
175
181
 
176
182
  def actually_refresh
177
183
  refreshing do
184
+ learn(delete: false) # After syncing datasource, learn new statistics + sync columns
178
185
  process_data
179
186
  fully_reload
180
- learn
187
+ learn # After processing data, we may have new columns from newly applied features
181
188
  now = UTC.now
182
189
  update(workflow_status: "ready", refreshed_at: now, updated_at: now)
183
190
  fully_reload
@@ -336,21 +343,25 @@ module EasyML
336
343
 
337
344
  def learn_statistics
338
345
  stats = {
339
- raw: EasyML::Data::StatisticsLearner.learn(raw, self),
346
+ raw: EasyML::Data::StatisticsLearner.learn(raw, self, :raw),
340
347
  }
341
- stats.merge!(processed: EasyML::Data::StatisticsLearner.learn(processed, self)) if processed.data.present?
348
+ stats.merge!(processed: EasyML::Data::StatisticsLearner.learn(processed, self, :processed)) if processed.data.present?
349
+
350
+ columns.select(&:is_computed).each do |col|
351
+ if stats.dig(:processed, col.name)
352
+ stats[:raw][col.name] = stats[:processed][col.name]
353
+ end
354
+ end
342
355
 
343
356
  update(statistics: stats)
344
357
  end
345
358
 
346
359
  def process_data
347
- split_data
348
360
  fit
349
361
  normalize_all
350
- # alert_nulls
351
362
  end
352
363
 
353
- def needs_learn?(df)
364
+ def needs_learn?
354
365
  return true if columns_need_refresh?
355
366
 
356
367
  never_learned = columns.none?
@@ -359,6 +370,7 @@ module EasyML
359
370
  new_features = features.any? { |f| f.updated_at > columns.maximum(:updated_at) }
360
371
  return true if new_features
361
372
 
373
+ df = raw.query(limit: 1)
362
374
  new_cols = df.present? ? (df.columns - columns.map(&:name)) : []
363
375
  new_cols = columns.syncable
364
376
 
@@ -390,22 +402,24 @@ module EasyML
390
402
  { differing_columns: differing_columns, differences: differences }
391
403
  end
392
404
 
393
- def normalize(df = nil, split_ys: false, inference: false, all_columns: false, features: self.features, idx: nil)
394
- df = apply_features(df, features)
395
- df = drop_nulls(df)
405
+ def validate_input(df)
406
+ fields = missing_required_fields(df)
407
+ return fields.empty?, fields
408
+ end
409
+
410
+ def normalize(df = nil, split_ys: false, inference: false, all_columns: false, features: self.features)
396
411
  df = apply_missing_features(df, inference: inference)
412
+ df = drop_nulls(df)
397
413
  df = preprocessor.postprocess(df, inference: inference)
398
-
399
- # Learn will update columns, so if any features have been added
400
- # since the last time columns were learned, we should re-learn the schema
401
- learn(delete: false) if idx == 1 && needs_learn?(df)
414
+ df = apply_features(df, features)
415
+ learn unless inference # After applying features, we need to learn new statistics
416
+ df = preprocessor.postprocess(df, inference: inference, computed: true)
402
417
  df = apply_column_mask(df, inference: inference) unless all_columns
403
- raise_on_nulls(df) if inference
404
418
  df, = processed.split_features_targets(df, true, target) if split_ys
405
419
  df
406
420
  end
407
421
 
408
- def raise_on_nulls(df)
422
+ def missing_required_fields(df)
409
423
  desc_df = df.describe
410
424
 
411
425
  # Get the 'null_count' row
@@ -416,8 +430,10 @@ module EasyML
416
430
  null_count_row[col][0].to_i > 0
417
431
  end
418
432
 
419
- if columns_with_nulls.any?
420
- raise "Null values found in columns: #{columns_with_nulls.join(", ")}"
433
+ # This is a history class, because this only occurs on prediction
434
+ required_columns = columns.current.required.map(&:name)
435
+ required_columns.select do |col|
436
+ columns_with_nulls.include?(col) || df.columns.map(&:to_s).exclude?(col.to_s)
421
437
  end
422
438
  end
423
439
 
@@ -487,7 +503,7 @@ module EasyML
487
503
  end
488
504
 
489
505
  def preprocessing_steps
490
- return if columns.nil? || (columns.respond_to?(:empty?) && columns.empty?)
506
+ return {} if columns.nil? || (columns.respond_to?(:empty?) && columns.empty?)
491
507
  return @preprocessing_steps if @preprocessing_steps.present?
492
508
 
493
509
  training = standardize_preprocessing_steps(:training)
@@ -515,7 +531,7 @@ module EasyML
515
531
  end
516
532
 
517
533
  def drop_cols
518
- @drop_cols ||= preloaded_columns.select(&:hidden).flat_map(&:columns)
534
+ @drop_cols ||= preloaded_columns.select(&:hidden).flat_map(&:aliases)
519
535
  end
520
536
 
521
537
  def drop_if_null
@@ -552,10 +568,14 @@ module EasyML
552
568
  df[column_mask(df, inference: inference)]
553
569
  end
554
570
 
555
- def apply_missing_features(df, inference: false)
571
+ def apply_missing_features(df, inference: false, include_one_hots: false)
556
572
  return df unless inference
557
573
 
558
574
  missing_features = (col_order(inference: inference) - df.columns).compact
575
+ unless include_one_hots
576
+ missing_features -= columns.one_hots.flat_map(&:virtual_columns) unless include_one_hots
577
+ missing_features += columns.one_hots.map(&:name) - df.columns
578
+ end
559
579
  df.with_columns(missing_features.map { |f| Polars.lit(nil).alias(f) })
560
580
  end
561
581
 
@@ -661,9 +681,9 @@ module EasyML
661
681
  def normalize_all
662
682
  processed.cleanup
663
683
 
664
- SPLIT_ORDER.each_with_index do |segment, idx|
684
+ SPLIT_ORDER.each do |segment|
665
685
  df = raw.read(segment)
666
- processed_df = normalize(df, all_columns: true, idx: idx)
686
+ processed_df = normalize(df, all_columns: true)
667
687
  processed.save(segment, processed_df)
668
688
  end
669
689
  @normalized = true
@@ -687,8 +707,9 @@ module EasyML
687
707
  end
688
708
 
689
709
  def fit
690
- preprocessor.fit(raw.train(all_columns: true))
691
- self.preprocessor_statistics = preprocessor.statistics
710
+ computed_statistics = columns.where(is_computed: true).reduce({}) { |h, c| h.tap { h[c.name] = c.statistics.dig("processed") } }
711
+ preprocessor.fit(raw.train(all_columns: true), computed_statistics)
712
+ update(preprocessor_statistics: preprocessor.statistics)
692
713
  end
693
714
 
694
715
  # log_method :fit, "Learning statistics", verbose: true
@@ -701,7 +722,6 @@ module EasyML
701
722
  return unless force || should_split?
702
723
 
703
724
  cleanup
704
- features = self.features.ordered.load
705
725
  splitter.split(datasource) do |train_df, valid_df, test_df|
706
726
  [:train, :valid, :test].zip([train_df, valid_df, test_df]).each do |segment, df|
707
727
  raw.save(segment, df)
@@ -55,6 +55,7 @@ module EasyML
55
55
 
56
56
  has_many :events, as: :eventable, class_name: "EasyML::Event", dependent: :destroy
57
57
  attr_accessor :schema, :columns, :num_rows, :is_syncing
58
+ belongs_to :dataset, class_name: "EasyML::Dataset", optional: true, dependent: :destroy
58
59
 
59
60
  add_configuration_attributes :schema, :columns, :num_rows, :polars_args, :verbose, :is_syncing
60
61
  DATASOURCE_CONSTANTS.flat_map(&:configuration_attributes).each do |attribute|
@@ -165,6 +165,13 @@ module EasyML
165
165
  end
166
166
  end
167
167
 
168
+ def computes_columns
169
+ unless adapter.respond_to?(:computes_columns)
170
+ raise "Feature #{feature_class} must declare which columns it computes using the :computes_columns method"
171
+ end
172
+ adapter.computes_columns
173
+ end
174
+
168
175
  def build_batches
169
176
  if batchable?
170
177
  batch
@@ -239,7 +246,7 @@ module EasyML
239
246
 
240
247
  # Transform a single batch, used for testing the user's feature implementation
241
248
  def transform_batch(df = nil, batch_args = {})
242
- if df.present?
249
+ if df.is_a?(Polars::DataFrame)
243
250
  actually_transform_batch(df)
244
251
  else
245
252
  actually_transform_batch(build_batch(get_batch_args(**batch_args)))
@@ -296,8 +303,8 @@ module EasyML
296
303
  end
297
304
 
298
305
  def actually_transform_batch(df)
299
- return nil unless df.present?
300
- return df if adapter.respond_to?(:fit) && feature_store.empty?
306
+ return nil unless df.is_a?(Polars::DataFrame)
307
+ return df if !adapter.respond_to?(:transform) && feature_store.empty?
301
308
 
302
309
  result = adapter.transform(df, self)
303
310
  update!(applied_at: Time.current)
@@ -17,6 +17,7 @@
17
17
  # is_training :boolean
18
18
  # created_at :datetime not null
19
19
  # updated_at :datetime not null
20
+ # slug :string not null
20
21
  #
21
22
  require_relative "models/hyperparameters"
22
23
 
@@ -66,6 +67,7 @@ module EasyML
66
67
  after_initialize :bump_version, if: -> { new_record? }
67
68
  after_initialize :set_defaults, if: -> { new_record? }
68
69
  before_save :save_model_file, if: -> { is_fit? && !is_history_class? && model_changed? && !@skip_save_model_file }
70
+ before_validation :set_slug, if: :name_changed?
69
71
 
70
72
  VALID_TASKS = %i[regression classification].freeze
71
73
 
@@ -91,6 +93,7 @@ module EasyML
91
93
  }
92
94
  validates :model_type, inclusion: { in: MODEL_NAMES }
93
95
  validates :dataset_id, presence: true
96
+ validates :slug, presence: true, uniqueness: true
94
97
  validate :validate_metrics_allowed
95
98
  before_save :set_root_dir
96
99
 
@@ -189,6 +192,7 @@ module EasyML
189
192
  evaluator: evaluator,
190
193
  model: self,
191
194
  dataset: dataset,
195
+ metrics: metrics,
192
196
  }.compact
193
197
  tuner.merge!(extra_params)
194
198
  tuner_instance = EasyML::Core::Tuner.new(tuner)
@@ -307,7 +311,6 @@ module EasyML
307
311
 
308
312
  dataset.refresh
309
313
  adapter.fit(tuning: tuning, x_train: x_train, y_train: y_train, x_valid: x_valid, y_valid: y_valid, &progress_block)
310
- @is_fit = true
311
314
  end
312
315
 
313
316
  def batch_args
@@ -334,11 +337,8 @@ module EasyML
334
337
 
335
338
  def fit_in_batches(tuning: false, batch_size: nil, batch_overlap: nil, batch_key: nil, checkpoint_dir: Rails.root.join("tmp", "xgboost_checkpoints"), &progress_block)
336
339
  adapter.fit_in_batches(tuning: tuning, batch_size: batch_size, batch_overlap: batch_overlap, batch_key: batch_key, checkpoint_dir: checkpoint_dir, &progress_block)
337
- @is_fit = true
338
340
  end
339
341
 
340
- attr_accessor :is_fit
341
-
342
342
  def is_fit?
343
343
  model_file = get_model_file
344
344
  return true if model_file.present? && model_file.fit?
@@ -447,6 +447,21 @@ module EasyML
447
447
  )
448
448
  end
449
449
 
450
+ include Rails.application.routes.mounted_helpers
451
+
452
+ def api_fields
453
+ {
454
+ url: EasyML::Engine.routes.url_helpers.predictions_path,
455
+ method: "POST",
456
+ data: {
457
+ model: slug,
458
+ input: dataset.columns.api_inputs.sort_by_required.map(&:to_api).each_with_object({}) do |field, hash|
459
+ hash[field[:name]] = field.except(:name)
460
+ end,
461
+ },
462
+ }
463
+ end
464
+
450
465
  class CannotdeployError < StandardError
451
466
  end
452
467
 
@@ -606,6 +621,12 @@ module EasyML
606
621
  errors.add(:metrics,
607
622
  "don't know how to handle #{"metrics".pluralize(unknown_metrics)} #{unknown_metrics.join(", ")}, use EasyML::Core::ModelEvaluator.register(:name, Evaluator, :regression|:classification)")
608
623
  end
624
+
625
+ def set_slug
626
+ if slug.nil? && name.present?
627
+ self.slug = name.gsub(/\s/, "_").downcase
628
+ end
629
+ end
609
630
  end
610
631
  end
611
632
 
@@ -22,6 +22,7 @@
22
22
  # history_ended_at :datetime
23
23
  # history_user_id :integer
24
24
  # snapshot_id :string
25
+ # slug :string
25
26
  #
26
27
  module EasyML
27
28
  class ModelHistory < ActiveRecord::Base
@@ -158,6 +158,7 @@ module EasyML
158
158
  model: training_model,
159
159
  y_pred: y_pred,
160
160
  y_true: y_true,
161
+ dataset: training_model.dataset.test(all_columns: true),
161
162
  evaluator: evaluator,
162
163
  )
163
164
  metric_value = metrics[metric]
@@ -12,6 +12,8 @@ module EasyML
12
12
  inflect.acronym "EST"
13
13
  inflect.acronym "UTC"
14
14
  inflect.acronym "HTML"
15
+ inflect.acronym "API"
16
+ inflect.acronym "APIs"
15
17
  end
16
18
  end
17
19
  end
data/config/routes.rb CHANGED
@@ -11,6 +11,9 @@ EasyML::Engine.routes.draw do
11
11
  # Predictions API
12
12
  resources :predictions, only: [:create]
13
13
 
14
+ # API Documentation
15
+ get "api", to: "apis#show"
16
+
14
17
  resources :models, as: :easy_ml_models do
15
18
  member do
16
19
  post :train