easy_ml 0.2.0.pre.rc56 → 0.2.0.pre.rc57

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a2eb7d933162cc05c64a1ea7c21c65f9c80283a1ae45f37226282c503607008f
4
- data.tar.gz: e25a1b7b1970753ae6f65917943607102ad52ef9f90831de0660563441448975
3
+ metadata.gz: e52412950fefc02e9b838930f132873c726440ebbc343159504d7d3287a39d05
4
+ data.tar.gz: 44ff18d1f1df78b542c8e536427189fce63d147e7e86623d219ed9b89c501ca7
5
5
  SHA512:
6
- metadata.gz: 6fc39e6b2838ab6242df1848411450764b0434b63bb7e4f1cb60151850e794f1d0a71a956b70b7ca78e159003efc6947d7dbaad9010c7c6899a1baeb8c7570b2
7
- data.tar.gz: f50ceecc6935fea0c1f82e5b76beaef2e6ee329087d0f7b7739d4a88b4738d9ed37d13acd47bf12d118092c0b66774772dab9a30f60e12c3854360329caacfa9
6
+ metadata.gz: 1e543781fb426a6fa7fe6ad6f5b7c924bdab38d88ac8ad7288db3a24f683661b3745a6f2176c993899a9f9737af7e54dfa59cc439a71739d3e2d2d2d75714621
7
+ data.tar.gz: 3f012c5a3126eec7a69c3c11dd45017f7c2ded7a2bfd5e6e70bcaa388000b19e50d19ed15dc6b47786f61b698cc081e915abade7ece544a3c8a14d0a8f5c4696
@@ -30,7 +30,6 @@ module EasyML
30
30
  validates :name, uniqueness: { scope: :dataset_id }
31
31
 
32
32
  before_save :ensure_valid_datatype
33
- after_create :set_date_column_if_date_splitter
34
33
  after_save :handle_date_column_change
35
34
  before_save :set_defaults
36
35
 
@@ -41,6 +40,18 @@ module EasyML
41
40
  scope :datetime, -> { where(datatype: "datetime") }
42
41
  scope :date_column, -> { where(is_date_column: true) }
43
42
 
43
+ def columns
44
+ [name].concat(virtual_columns)
45
+ end
46
+
47
+ def virtual_columns
48
+ if one_hot?
49
+ allowed_categories.map { |cat| "#{name}_#{cat}" }
50
+ else
51
+ []
52
+ end
53
+ end
54
+
44
55
  def datatype=(dtype)
45
56
  write_attribute(:datatype, dtype)
46
57
  write_attribute(:polars_datatype, dtype)
@@ -88,9 +99,11 @@ module EasyML
88
99
  end
89
100
 
90
101
  def allowed_categories
91
- return nil unless one_hot?
102
+ return [] unless one_hot?
103
+ stats = dataset.preprocessor.statistics
104
+ return [] if stats.nil? || stats.blank?
92
105
 
93
- dataset.preprocessor.statistics.dup.to_h.dig(name.to_sym, :allowed_categories).sort.concat(["other"])
106
+ stats.dup.to_h.dig(name.to_sym, :allowed_categories).sort.concat(["other"])
94
107
  end
95
108
 
96
109
  def date_column?
@@ -1,15 +1,15 @@
1
1
  module EasyML
2
2
  module ColumnList
3
- def sync(only_new: false)
3
+ def sync(delete: true)
4
4
  return unless dataset.schema.present?
5
5
 
6
6
  EasyML::Column.transaction do
7
7
  col_names = syncable
8
8
  existing_columns = where(name: col_names)
9
9
  import_new(col_names, existing_columns)
10
+ update_existing(existing_columns)
10
11
 
11
- if !only_new
12
- update_existing(existing_columns)
12
+ if delete
13
13
  delete_missing(existing_columns)
14
14
  end
15
15
 
@@ -272,10 +272,10 @@ module EasyML
272
272
  raw.split_at.present? && raw.split_at < datasource.last_updated_at
273
273
  end
274
274
 
275
- def learn(only_new: false)
275
+ def learn(delete: true)
276
276
  learn_schema
277
277
  learn_statistics
278
- columns.sync(only_new: only_new)
278
+ columns.sync(delete: delete)
279
279
  end
280
280
 
281
281
  def refreshing
@@ -398,7 +398,7 @@ module EasyML
398
398
 
399
399
  # Learn will update columns, so if any features have been added
400
400
  # since the last time columns were learned, we should re-learn the schema
401
- learn(only_new: true) if idx == 1 && needs_learn?(df)
401
+ learn(delete: false) if idx == 1 && needs_learn?(df)
402
402
  df = apply_column_mask(df, inference: inference) unless all_columns
403
403
  raise_on_nulls(df) if inference
404
404
  df, = processed.split_features_targets(df, true, target) if split_ys
@@ -515,7 +515,7 @@ module EasyML
515
515
  end
516
516
 
517
517
  def drop_cols
518
- @drop_cols ||= preloaded_columns.select(&:hidden).map(&:name)
518
+ @drop_cols ||= preloaded_columns.select(&:hidden).flat_map(&:columns)
519
519
  end
520
520
 
521
521
  def drop_if_null
@@ -354,15 +354,16 @@ module EasyML
354
354
  dataset.decode_labels(ys, col: col)
355
355
  end
356
356
 
357
- def evaluate(y_pred: nil, y_true: nil, x_true: nil, evaluator: nil)
357
+ def evaluate(y_pred: nil, y_true: nil, x_true: nil, evaluator: nil, dataset: nil)
358
358
  evaluator ||= self.evaluator
359
359
  if y_pred.nil?
360
360
  inputs = default_evaluation_inputs
361
361
  y_pred = inputs[:y_pred]
362
362
  y_true = inputs[:y_true]
363
363
  x_true = inputs[:x_true]
364
+ dataset = inputs[:dataset]
364
365
  end
365
- EasyML::Core::ModelEvaluator.evaluate(model: self, y_pred: y_pred, y_true: y_true, x_true: x_true, evaluator: evaluator)
366
+ EasyML::Core::ModelEvaluator.evaluate(model: self, y_pred: y_pred, y_true: y_true, x_true: x_true, dataset: dataset, evaluator: evaluator)
366
367
  end
367
368
 
368
369
  def evaluator
@@ -524,11 +525,13 @@ module EasyML
524
525
 
525
526
  def default_evaluation_inputs
526
527
  x_true, y_true = dataset.test(split_ys: true)
528
+ ds = dataset.test(all_columns: true)
527
529
  y_pred = predict(x_true)
528
530
  {
529
531
  x_true: x_true,
530
532
  y_true: y_true,
531
533
  y_pred: y_pred,
534
+ dataset: ds,
532
535
  }
533
536
  end
534
537
 
@@ -32,7 +32,7 @@ module EasyML
32
32
  false
33
33
  end
34
34
 
35
- def validation_dataset
35
+ def test_dataset
36
36
  if tuner.present?
37
37
  [tuner.x_true, tuner.y_true]
38
38
  else
@@ -46,11 +46,12 @@ module EasyML
46
46
  log_frequency = 10
47
47
  if epoch % log_frequency == 0
48
48
  model.adapter.external_model = booster
49
- x_true, y_true = validation_dataset
49
+ x_true, y_true = test_dataset
50
50
  @preprocessed ||= model.preprocess(x_true)
51
51
  y_pred = model.predict(@preprocessed)
52
+ dataset = model.dataset.test(all_columns: true)
52
53
 
53
- metrics = model.evaluate(y_pred: y_pred, y_true: y_true, x_true: x_true)
54
+ metrics = model.evaluate(y_pred: y_pred, y_true: y_true, x_true: x_true, dataset: dataset)
54
55
  Wandb.log(metrics)
55
56
  end
56
57
 
@@ -32,7 +32,7 @@ module EasyML
32
32
  end
33
33
 
34
34
  # Instance methods that evaluators must implement
35
- def evaluate(y_pred: nil, y_true: nil, x_true: nil)
35
+ def evaluate(y_pred: nil, y_true: nil, x_true: nil, dataset: nil)
36
36
  raise NotImplementedError, "#{self.class} must implement #evaluate"
37
37
  end
38
38
 
@@ -5,7 +5,7 @@ module EasyML
5
5
  class AccuracyScore
6
6
  include BaseEvaluator
7
7
 
8
- def evaluate(y_pred:, y_true:, x_true: nil)
8
+ def evaluate(y_pred:, y_true:, x_true: nil, dataset: nil)
9
9
  y_pred = Numo::Int32.cast(y_pred)
10
10
  y_true = Numo::Int32.cast(y_true)
11
11
  y_pred.eq(y_true).count_true.to_f / y_pred.size
@@ -23,7 +23,7 @@ module EasyML
23
23
  class PrecisionScore
24
24
  include BaseEvaluator
25
25
 
26
- def evaluate(y_pred:, y_true:, x_true: nil)
26
+ def evaluate(y_pred:, y_true:, x_true: nil, dataset: nil)
27
27
  y_pred = Numo::Int32.cast(y_pred)
28
28
  y_true = Numo::Int32.cast(y_true)
29
29
  true_positives = (y_pred.eq(1) & y_true.eq(1)).count_true
@@ -45,7 +45,7 @@ module EasyML
45
45
  class RecallScore
46
46
  include BaseEvaluator
47
47
 
48
- def evaluate(y_pred:, y_true:, x_true: nil)
48
+ def evaluate(y_pred:, y_true:, x_true: nil, dataset: nil)
49
49
  y_pred = Numo::Int32.cast(y_pred)
50
50
  y_true = Numo::Int32.cast(y_true)
51
51
  true_positives = (y_pred.eq(1) & y_true.eq(1)).count_true
@@ -65,9 +65,9 @@ module EasyML
65
65
  class F1Score
66
66
  include BaseEvaluator
67
67
 
68
- def evaluate(y_pred:, y_true:, x_true: nil)
69
- precision = PrecisionScore.new.evaluate(y_pred: y_pred, y_true: y_true)
70
- recall = RecallScore.new.evaluate(y_pred: y_pred, y_true: y_true)
68
+ def evaluate(y_pred:, y_true:, x_true: nil, dataset: nil)
69
+ precision = PrecisionScore.new.evaluate(y_pred: y_pred, y_true: y_true, dataset: dataset)
70
+ recall = RecallScore.new.evaluate(y_pred: y_pred, y_true: y_true, dataset: dataset)
71
71
  return 0 unless (precision + recall) > 0
72
72
 
73
73
  2 * (precision * recall) / (precision + recall)
@@ -85,7 +85,7 @@ module EasyML
85
85
  class AUC
86
86
  include BaseEvaluator
87
87
 
88
- def evaluate(y_pred:, y_true:, x_true: nil)
88
+ def evaluate(y_pred:, y_true:, x_true: nil, dataset: nil)
89
89
  y_pred = Numo::DFloat.cast(y_pred)
90
90
  y_true = Numo::Int32.cast(y_true)
91
91
 
@@ -132,8 +132,8 @@ module EasyML
132
132
  class ROC_AUC
133
133
  include BaseEvaluator
134
134
 
135
- def evaluate(y_pred:, y_true:, x_true: nil)
136
- AUC.new.evaluate(y_pred: y_pred, y_true: y_true)
135
+ def evaluate(y_pred:, y_true:, x_true: nil, dataset: nil)
136
+ AUC.new.evaluate(y_pred: y_pred, y_true: y_true, dataset: dataset)
137
137
  end
138
138
 
139
139
  def description
@@ -5,7 +5,7 @@ module EasyML
5
5
  class MeanAbsoluteError
6
6
  include BaseEvaluator
7
7
 
8
- def evaluate(y_pred:, y_true:, x_true: nil)
8
+ def evaluate(y_pred:, y_true:, x_true: nil, dataset: nil)
9
9
  (Numo::DFloat.cast(y_pred) - Numo::DFloat.cast(y_true)).abs.mean
10
10
  end
11
11
 
@@ -21,7 +21,7 @@ module EasyML
21
21
  class MeanSquaredError
22
22
  include BaseEvaluator
23
23
 
24
- def evaluate(y_pred:, y_true:, x_true: nil)
24
+ def evaluate(y_pred:, y_true:, x_true: nil, dataset: nil)
25
25
  ((Numo::DFloat.cast(y_pred) - Numo::DFloat.cast(y_true)) ** 2).mean
26
26
  end
27
27
 
@@ -37,7 +37,7 @@ module EasyML
37
37
  class RootMeanSquaredError
38
38
  include BaseEvaluator
39
39
 
40
- def evaluate(y_pred:, y_true:, x_true: nil)
40
+ def evaluate(y_pred:, y_true:, x_true: nil, dataset: nil)
41
41
  Math.sqrt(((Numo::DFloat.cast(y_pred) - Numo::DFloat.cast(y_true)) ** 2).mean)
42
42
  end
43
43
 
@@ -61,7 +61,7 @@ module EasyML
61
61
  "maximize"
62
62
  end
63
63
 
64
- def evaluate(y_pred:, y_true:, x_true: nil)
64
+ def evaluate(y_pred:, y_true:, x_true: nil, dataset: nil)
65
65
  y_true = Numo::DFloat.cast(y_true)
66
66
  y_pred = Numo::DFloat.cast(y_pred)
67
67
 
@@ -98,13 +98,21 @@ module EasyML
98
98
  end
99
99
  end
100
100
 
101
- def evaluate(model:, y_pred:, y_true:, x_true: nil, evaluator: nil)
101
+ def evaluate(model:, y_pred:, y_true:, x_true: nil, evaluator: nil, dataset: nil)
102
102
  y_pred = normalize_input(y_pred)
103
103
  y_true = normalize_input(y_true)
104
104
  check_size(y_pred, y_true)
105
105
 
106
106
  metrics_results = {}
107
107
 
108
+ if x_true.nil?
109
+ x_true = model.dataset.test
110
+ end
111
+
112
+ if dataset.nil?
113
+ dataset = model.dataset.test(all_columns: true)
114
+ end
115
+
108
116
  model.metrics.each do |metric|
109
117
  evaluator_class = get(metric.to_sym)
110
118
  next unless evaluator_class
@@ -115,6 +123,7 @@ module EasyML
115
123
  y_pred: y_pred,
116
124
  y_true: y_true,
117
125
  x_true: x_true,
126
+ dataset: dataset,
118
127
  )
119
128
  end
120
129
 
@@ -124,7 +133,7 @@ module EasyML
124
133
  raise "Unknown evaluator: #{evaluator}" unless evaluator_class
125
134
 
126
135
  evaluator_instance = evaluator_class.new
127
- response = evaluator_instance.evaluate(y_pred: y_pred, y_true: y_true, x_true: x_true)
136
+ response = evaluator_instance.evaluate(y_pred: y_pred, y_true: y_true, x_true: x_true, dataset: dataset)
128
137
 
129
138
  if response.is_a?(Hash)
130
139
  metrics_results.merge!(response)
@@ -145,6 +154,9 @@ module EasyML
145
154
  def normalize_input(input)
146
155
  case input
147
156
  when Array
157
+ if input.first.class == TrueClass || input.first.class == FalseClass
158
+ input = input.map { |value| value ? 1.0 : 0.0 }
159
+ end
148
160
  Numo::DFloat.cast(input)
149
161
  when Polars::DataFrame
150
162
  if input.columns.count > 1
@@ -152,7 +164,10 @@ module EasyML
152
164
  end
153
165
 
154
166
  normalize_input(input[input.columns.first])
155
- when Polars::Series, Array
167
+ when Polars::Series
168
+ if input.dtype == Polars::Boolean
169
+ input = input.cast(Polars::Int64)
170
+ end
156
171
  Numo::DFloat.cast(input)
157
172
  else
158
173
  raise ArgumentError, "Don't know how to evaluate model with y_pred type #{input.class}"
@@ -8,7 +8,7 @@ module EasyML
8
8
  :metrics, :objective, :n_trials, :direction, :evaluator,
9
9
  :study, :results, :adapter, :tune_started_at, :x_true, :y_true,
10
10
  :project_name, :job, :current_run, :trial_enumerator, :progress_block,
11
- :tuner_job
11
+ :tuner_job, :dataset
12
12
 
13
13
  def initialize(options = {})
14
14
  @model = options[:model]
@@ -77,6 +77,7 @@ module EasyML
77
77
  x_true, y_true = model.dataset.test(split_ys: true)
78
78
  self.x_true = x_true
79
79
  self.y_true = y_true
80
+ self.dataset = model.dataset.test(all_columns: true)
80
81
  adapter.tune_started_at = tune_started_at
81
82
  adapter.y_true = y_true
82
83
  adapter.x_true = x_true
@@ -96,14 +97,6 @@ module EasyML
96
97
  run_metrics = tune_once
97
98
  result = calculate_result(run_metrics)
98
99
  @results.push(result)
99
-
100
- params = {
101
- hyperparameters: model.hyperparameters.to_h,
102
- value: result,
103
- status: :success,
104
- }.compact
105
-
106
- @tuner_run.update!(params)
107
100
  @study.tell(@current_trial, result)
108
101
  rescue StandardError => e
109
102
  @tuner_run.update!(status: :failed, hyperparameters: {})
@@ -138,14 +131,27 @@ module EasyML
138
131
  )
139
132
  self.current_run = @tuner_run
140
133
 
141
- adapter.run_trial(@current_trial) do |model|
142
- model.fit(tuning: true, &progress_block)
143
- y_pred = model.predict(x_true)
144
- model.metrics = metrics
145
- metrics = model.evaluate(y_pred: y_pred, y_true: y_true, x_true: x_true)
146
- puts metrics
147
- metrics
134
+ model = adapter.run_trial(@current_trial) do |model|
135
+ model.tap do
136
+ model.fit(tuning: true, &progress_block)
137
+ end
148
138
  end
139
+
140
+ y_pred = model.predict(x_true)
141
+ model.metrics = metrics
142
+ metrics = model.evaluate(y_pred: y_pred, y_true: y_true, x_true: x_true, dataset: dataset)
143
+ metric = metrics.symbolize_keys.dig(model.evaluator[:metric].to_sym)
144
+
145
+ puts metrics
146
+
147
+ params = {
148
+ hyperparameters: model.hyperparameters.to_h,
149
+ value: metric,
150
+ status: :success,
151
+ }.compact
152
+
153
+ @tuner_run.update!(params)
154
+ metrics
149
155
  end
150
156
 
151
157
  private
@@ -1,7 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module EasyML
4
- VERSION = "0.2.0-rc56"
4
+ VERSION = "0.2.0-rc57"
5
5
 
6
6
  module Version
7
7
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: easy_ml
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0.pre.rc56
4
+ version: 0.2.0.pre.rc57
5
5
  platform: ruby
6
6
  authors:
7
7
  - Brett Shollenberger