easy_ml 0.2.0.pre.rc40 → 0.2.0.pre.rc43
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Rakefile +11 -9
- data/app/controllers/easy_ml/settings_controller.rb +1 -4
- data/app/frontend/pages/SettingsPage.tsx +1 -80
- data/app/jobs/easy_ml/batch_job.rb +45 -1
- data/app/jobs/easy_ml/compute_feature_job.rb +68 -4
- data/app/models/concerns/easy_ml/dataframe_serialization.rb +30 -0
- data/app/models/easy_ml/dataset.rb +23 -22
- data/app/models/easy_ml/dataset_history.rb +1 -6
- data/app/models/easy_ml/datasources/polars_datasource.rb +4 -18
- data/app/models/easy_ml/event.rb +2 -1
- data/app/models/easy_ml/event_context.rb +58 -0
- data/app/models/easy_ml/feature.rb +43 -14
- data/app/models/easy_ml/model.rb +4 -7
- data/app/models/easy_ml/model_file.rb +17 -48
- data/app/models/easy_ml/splitter_history.rb +16 -0
- data/app/serializers/easy_ml/prediction_serializer.rb +6 -1
- data/config/initializers/zhong.rb +4 -0
- data/lib/easy_ml/data/date_converter.rb +1 -0
- data/lib/easy_ml/data/polars_reader.rb +17 -4
- data/lib/easy_ml/data/statistics_learner.rb +1 -1
- data/lib/easy_ml/engine.rb +22 -0
- data/lib/easy_ml/pending_migrations.rb +19 -0
- data/lib/easy_ml/predict.rb +25 -12
- data/lib/easy_ml/railtie/generators/migration/migration_generator.rb +39 -157
- data/lib/easy_ml/railtie/templates/migration/add_workflow_status_to_easy_ml_features.rb.tt +13 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_column_histories.rb.tt +4 -2
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_columns.rb.tt +22 -20
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_dataset_histories.rb.tt +5 -3
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_datasets.rb.tt +26 -24
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_datasource_histories.rb.tt +5 -3
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_datasources.rb.tt +12 -10
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_deploys.rb.tt +21 -19
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_event_contexts.rb.tt +14 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_events.rb.tt +16 -14
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_feature_histories.rb.tt +10 -8
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_features.rb.tt +27 -25
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_model_file_histories.rb.tt +5 -3
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_model_files.rb.tt +13 -11
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_model_histories.rb.tt +5 -3
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_models.rb.tt +28 -26
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_predictions.rb.tt +13 -11
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_retraining_jobs.rb.tt +70 -66
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_settings.rb.tt +6 -4
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_splitter_histories.rb.tt +6 -4
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_splitters.rb.tt +11 -9
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_tuner_jobs.rb.tt +34 -30
- data/lib/easy_ml/railtie/templates/migration/drop_path_from_easy_ml_model_files.rb.tt +11 -0
- data/lib/easy_ml/version.rb +1 -1
- data/lib/easy_ml.rb +1 -0
- data/public/easy_ml/assets/.vite/manifest.json +2 -2
- data/public/easy_ml/assets/assets/Application-zpGA_Q9c.css +1 -0
- data/public/easy_ml/assets/assets/entrypoints/{Application.tsx-DF5SSkYi.js → Application.tsx-jPsqOyb0.js} +87 -97
- data/public/easy_ml/assets/assets/entrypoints/Application.tsx-jPsqOyb0.js.map +1 -0
- metadata +11 -19
- data/public/easy_ml/assets/assets/Application-Cu7lNJmG.css +0 -1
- data/public/easy_ml/assets/assets/entrypoints/Application.tsx-DF5SSkYi.js.map +0 -1
@@ -17,6 +17,7 @@
|
|
17
17
|
# refresh_every :bigint
|
18
18
|
# created_at :datetime not null
|
19
19
|
# updated_at :datetime not null
|
20
|
+
# workflow_status :string
|
20
21
|
#
|
21
22
|
module EasyML
|
22
23
|
class Feature < ActiveRecord::Base
|
@@ -24,6 +25,11 @@ module EasyML
|
|
24
25
|
include Historiographer::Silent
|
25
26
|
historiographer_mode :snapshot_only
|
26
27
|
|
28
|
+
enum workflow_status: {
|
29
|
+
analyzing: "analyzing",
|
30
|
+
ready: "ready",
|
31
|
+
failed: "failed",
|
32
|
+
}
|
27
33
|
class << self
|
28
34
|
def compute_sha(feature_class)
|
29
35
|
require "digest"
|
@@ -135,13 +141,22 @@ module EasyML
|
|
135
141
|
adapter.respond_to?(:batch) || config.dig(:batch_size).present?
|
136
142
|
end
|
137
143
|
|
144
|
+
def primary_key
|
145
|
+
pkey = config.dig(:primary_key)
|
146
|
+
if pkey.is_a?(Array)
|
147
|
+
pkey
|
148
|
+
else
|
149
|
+
[pkey]
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
138
153
|
def numeric_primary_key?
|
139
154
|
if primary_key.nil?
|
140
155
|
return false unless should_be_batchable?
|
141
156
|
raise "Couldn't find primary key for feature #{feature_class}, check your feature class"
|
142
157
|
end
|
143
158
|
|
144
|
-
dataset.raw.data(limit: 1, select: primary_key)[primary_key].to_a.flat_map(
|
159
|
+
dataset.raw.data(limit: 1, select: primary_key)[primary_key].to_a.flat_map { |h| h.respond_to?(:values) ? h.values : h }.all? do |value|
|
145
160
|
case value
|
146
161
|
when String then value.match?(/\A[-+]?\d+(\.\d+)?\z/)
|
147
162
|
else
|
@@ -171,22 +186,25 @@ module EasyML
|
|
171
186
|
unless primary_key.present?
|
172
187
|
raise "Couldn't find primary key for feature #{feature_class}, check your feature class"
|
173
188
|
end
|
174
|
-
df = reader.query(select:
|
189
|
+
df = reader.query(select: primary_key)
|
175
190
|
rescue => e
|
176
191
|
raise "Couldn't find primary key #{primary_key.first} for feature #{feature_class}: #{e.message}"
|
177
192
|
end
|
178
193
|
return [] if df.nil?
|
179
194
|
|
180
195
|
min_id = df[primary_key.first].min
|
181
|
-
max_id = df[primary_key.
|
196
|
+
max_id = df[primary_key.last].max
|
182
197
|
end
|
183
198
|
|
184
|
-
(min_id..max_id).step(batch_size).map do |batch_start|
|
199
|
+
(min_id..max_id).step(batch_size).map.with_index do |batch_start, idx|
|
185
200
|
batch_end = [batch_start + batch_size, max_id + 1].min - 1
|
186
201
|
{
|
187
202
|
feature_id: id,
|
188
203
|
batch_start: batch_start,
|
189
204
|
batch_end: batch_end,
|
205
|
+
batch_number: feature_position,
|
206
|
+
subbatch_number: idx,
|
207
|
+
parent_batch_id: Random.uuid,
|
190
208
|
}
|
191
209
|
end
|
192
210
|
end
|
@@ -196,13 +214,16 @@ module EasyML
|
|
196
214
|
end
|
197
215
|
|
198
216
|
def fit(features: [self], async: false)
|
199
|
-
|
217
|
+
ordered_features = features.sort_by(&:feature_position)
|
218
|
+
jobs = ordered_features.map(&:build_batches)
|
219
|
+
|
200
220
|
if async
|
201
|
-
EasyML::ComputeFeatureJob.
|
221
|
+
EasyML::ComputeFeatureJob.enqueue_ordered_batches(jobs)
|
202
222
|
else
|
203
|
-
jobs.each do |job|
|
223
|
+
jobs.flatten.each do |job|
|
204
224
|
EasyML::ComputeFeatureJob.perform(nil, job)
|
205
225
|
end
|
226
|
+
features.each(&:after_fit) unless features.any?(&:failed?)
|
206
227
|
end
|
207
228
|
end
|
208
229
|
|
@@ -266,13 +287,11 @@ module EasyML
|
|
266
287
|
batch_df = adapter.fit(df, self, batch_args)
|
267
288
|
end
|
268
289
|
end
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
}.compact
|
275
|
-
update!(updates)
|
290
|
+
if batch_df.present?
|
291
|
+
store(batch_df)
|
292
|
+
else
|
293
|
+
"Feature #{feature_class}#fit should return a dataframe, received #{batch_df.class}"
|
294
|
+
end
|
276
295
|
batch_df
|
277
296
|
end
|
278
297
|
|
@@ -335,6 +354,7 @@ module EasyML
|
|
335
354
|
def apply_defaults
|
336
355
|
self.name ||= self.feature_class.demodulize.titleize
|
337
356
|
self.version ||= 1
|
357
|
+
self.workflow_status ||= :ready
|
338
358
|
end
|
339
359
|
|
340
360
|
def needs_columns
|
@@ -371,6 +391,15 @@ module EasyML
|
|
371
391
|
(should_be_batchable? ? 10_000 : nil)
|
372
392
|
end
|
373
393
|
|
394
|
+
def after_fit
|
395
|
+
updates = {
|
396
|
+
applied_at: Time.current,
|
397
|
+
needs_fit: false,
|
398
|
+
workflow_status: :ready,
|
399
|
+
}.compact
|
400
|
+
update!(updates)
|
401
|
+
end
|
402
|
+
|
374
403
|
private
|
375
404
|
|
376
405
|
def bulk_update_positions(features)
|
data/app/models/easy_ml/model.rb
CHANGED
@@ -250,6 +250,7 @@ module EasyML
|
|
250
250
|
bump_version(force: true)
|
251
251
|
path = model_file.full_path(version)
|
252
252
|
full_path = adapter.save_model_file(path)
|
253
|
+
puts "saving model to #{full_path}"
|
253
254
|
model_file.upload(full_path)
|
254
255
|
|
255
256
|
model_file.save
|
@@ -266,6 +267,7 @@ module EasyML
|
|
266
267
|
end
|
267
268
|
|
268
269
|
def cleanup
|
270
|
+
puts "keeping files #{files_to_keep}"
|
269
271
|
get_model_file&.cleanup(files_to_keep)
|
270
272
|
end
|
271
273
|
|
@@ -488,13 +490,9 @@ module EasyML
|
|
488
490
|
end
|
489
491
|
|
490
492
|
def root_dir
|
491
|
-
|
493
|
+
relative_dir = read_attribute(:root_dir) || default_root_dir
|
492
494
|
|
493
|
-
|
494
|
-
EasyML::Engine.root_dir.join(persisted).to_s
|
495
|
-
else
|
496
|
-
default_root_dir
|
497
|
-
end
|
495
|
+
EasyML::Engine.root_dir.join(relative_dir).to_s
|
498
496
|
end
|
499
497
|
|
500
498
|
def default_root_dir
|
@@ -544,7 +542,6 @@ module EasyML
|
|
544
542
|
|
545
543
|
def new_model_file!
|
546
544
|
build_model_file(
|
547
|
-
root_dir: root_dir,
|
548
545
|
model: self,
|
549
546
|
s3_bucket: EasyML::Configuration.s3_bucket,
|
550
547
|
s3_region: EasyML::Configuration.s3_region,
|
@@ -23,7 +23,7 @@ module EasyML
|
|
23
23
|
belongs_to :model, class_name: "EasyML::Model"
|
24
24
|
|
25
25
|
include EasyML::Concerns::Configurable
|
26
|
-
add_configuration_attributes :s3_bucket, :s3_prefix, :s3_region, :s3_access_key_id, :s3_secret_access_key
|
26
|
+
add_configuration_attributes :s3_bucket, :s3_prefix, :s3_region, :s3_access_key_id, :s3_secret_access_key
|
27
27
|
|
28
28
|
def synced_file
|
29
29
|
EasyML::Support::SyncedFile.new(
|
@@ -37,6 +37,21 @@ module EasyML
|
|
37
37
|
)
|
38
38
|
end
|
39
39
|
|
40
|
+
def root_dir
|
41
|
+
Pathname.new(model.root_dir)
|
42
|
+
end
|
43
|
+
|
44
|
+
def model_root
|
45
|
+
File.expand_path("..", root_dir.to_s)
|
46
|
+
end
|
47
|
+
|
48
|
+
def full_path(filename = nil)
|
49
|
+
filename = self.filename if filename.nil?
|
50
|
+
return nil if filename.nil?
|
51
|
+
|
52
|
+
root_dir.join(filename).to_s
|
53
|
+
end
|
54
|
+
|
40
55
|
def exist?
|
41
56
|
fit?
|
42
57
|
end
|
@@ -54,33 +69,7 @@ module EasyML
|
|
54
69
|
|
55
70
|
def upload(path)
|
56
71
|
synced_file.upload(path)
|
57
|
-
|
58
|
-
end
|
59
|
-
|
60
|
-
def set_path(path)
|
61
|
-
path = get_full_path(path)
|
62
|
-
basename = Pathname.new(path).basename.to_s
|
63
|
-
unless path.start_with?(full_dir)
|
64
|
-
new_path = File.join(full_dir, basename).to_s
|
65
|
-
FileUtils.mkdir_p(Pathname.new(new_path).dirname.to_s)
|
66
|
-
FileUtils.cp(path, new_path)
|
67
|
-
path = new_path
|
68
|
-
end
|
69
|
-
self.filename = basename
|
70
|
-
self.path = get_relative_path(path)
|
71
|
-
end
|
72
|
-
|
73
|
-
def get_full_path(path)
|
74
|
-
path = path.to_s
|
75
|
-
|
76
|
-
path = Rails.root.join(path) unless path.match?(Regexp.new(Rails.root.to_s))
|
77
|
-
path
|
78
|
-
end
|
79
|
-
|
80
|
-
def get_relative_path(path)
|
81
|
-
path = path.to_s
|
82
|
-
path = path.to_s.split(Rails.root.to_s).last
|
83
|
-
path.to_s.split("/")[0..-2].reject(&:empty?).join("/")
|
72
|
+
update(filename: Pathname.new(path).basename.to_s)
|
84
73
|
end
|
85
74
|
|
86
75
|
def download
|
@@ -94,26 +83,6 @@ module EasyML
|
|
94
83
|
Digest::SHA256.file(full_path).hexdigest
|
95
84
|
end
|
96
85
|
|
97
|
-
def full_path(filename = nil)
|
98
|
-
filename = self.filename if filename.nil?
|
99
|
-
return nil if filename.nil?
|
100
|
-
return nil if relative_dir.nil?
|
101
|
-
|
102
|
-
Rails.root.join(relative_dir, filename).to_s
|
103
|
-
end
|
104
|
-
|
105
|
-
def relative_dir
|
106
|
-
root_dir.to_s.gsub(Regexp.new(Rails.root.to_s), "").gsub!(%r{^/}, "")
|
107
|
-
end
|
108
|
-
|
109
|
-
def full_dir
|
110
|
-
Rails.root.join(relative_dir).to_s
|
111
|
-
end
|
112
|
-
|
113
|
-
def model_root
|
114
|
-
File.expand_path("..", full_dir)
|
115
|
-
end
|
116
|
-
|
117
86
|
def cleanup!
|
118
87
|
[model_root].each do |dir|
|
119
88
|
EasyML::Support::FileRotate.new(dir, []).cleanup(extension_allowlist)
|
@@ -1,3 +1,19 @@
|
|
1
|
+
# == Schema Information
|
2
|
+
#
|
3
|
+
# Table name: easy_ml_splitter_histories
|
4
|
+
#
|
5
|
+
# id :bigint not null, primary key
|
6
|
+
# splitter_id :integer not null
|
7
|
+
# splitter_type :string not null
|
8
|
+
# configuration :json
|
9
|
+
# dataset_id :integer not null
|
10
|
+
# created_at :datetime not null
|
11
|
+
# updated_at :datetime not null
|
12
|
+
# history_started_at :datetime not null
|
13
|
+
# history_ended_at :datetime
|
14
|
+
# history_user_id :integer
|
15
|
+
# snapshot_id :string
|
16
|
+
#
|
1
17
|
module EasyML
|
2
18
|
class SplitterHistory < ActiveRecord::Base
|
3
19
|
self.table_name = "easy_ml_splitter_histories"
|
@@ -5,7 +5,12 @@ module EasyML
|
|
5
5
|
include JSONAPI::Serializer
|
6
6
|
|
7
7
|
attribute :prediction do |object|
|
8
|
-
object.prediction_value
|
8
|
+
case object.prediction_value
|
9
|
+
when Hash
|
10
|
+
object.prediction_value.symbolize_keys.dig(:value)
|
11
|
+
when Numeric
|
12
|
+
object.prediction_value
|
13
|
+
end
|
9
14
|
end
|
10
15
|
|
11
16
|
attributes :id,
|
@@ -3,6 +3,7 @@ module EasyML
|
|
3
3
|
module DateConverter
|
4
4
|
COMMON_DATE_FORMATS = [
|
5
5
|
"%Y-%m-%dT%H:%M:%S.%6N", # e.g., "2021-01-01T00:00:00.000000"
|
6
|
+
"%Y-%m-%d %H:%M:%S.%L Z", # e.g., "2025-01-03 23:04:49.492 Z"
|
6
7
|
"%Y-%m-%d %H:%M:%S.%L", # e.g., "2021-01-01 00:01:36.000"
|
7
8
|
"%Y-%m-%d %H:%M:%S.%L", # e.g., "2021-01-01 00:01:36.000"
|
8
9
|
"%Y-%m-%d %H:%M:%S", # e.g., "2021-01-01 00:01:36"
|
@@ -196,15 +196,22 @@ module EasyML
|
|
196
196
|
polars_args[:dtypes].merge!(dtypes)
|
197
197
|
end
|
198
198
|
ext = Pathname.new(file).extname.gsub(/\./, "")
|
199
|
+
date_cols = []
|
199
200
|
case ext
|
200
201
|
when "csv"
|
201
|
-
filtered_args = filter_polars_args(Polars.method(:read_csv))
|
202
|
-
filtered_args.merge!(
|
202
|
+
filtered_args, date_cols = filter_polars_args(Polars.method(:read_csv))
|
203
|
+
filtered_args.merge!(
|
204
|
+
infer_schema_length: 1_000_000,
|
205
|
+
null_values: ["\\N", "\\\\N", "NULL"],
|
206
|
+
)
|
203
207
|
df = Polars.read_csv(file, **filtered_args)
|
204
208
|
when "parquet"
|
205
|
-
filtered_args = filter_polars_args(Polars.method(:read_parquet))
|
209
|
+
filtered_args, date_cols = filter_polars_args(Polars.method(:read_parquet))
|
206
210
|
df = Polars.read_parquet(file, **filtered_args)
|
207
211
|
end
|
212
|
+
date_cols.each do |col|
|
213
|
+
df = EasyML::Data::DateConverter.maybe_convert_date(df, col)
|
214
|
+
end
|
208
215
|
df
|
209
216
|
end
|
210
217
|
|
@@ -214,7 +221,13 @@ module EasyML
|
|
214
221
|
|
215
222
|
def filter_polars_args(method)
|
216
223
|
supported_params = method.parameters.map { |_, name| name }
|
217
|
-
polars_args.select { |k, _| supported_params.include?(k) }
|
224
|
+
filtered = polars_args.select { |k, _| supported_params.include?(k) }
|
225
|
+
|
226
|
+
# Filter out any datetime columns, and use maybe_convert_date to convert later
|
227
|
+
date_cols = (filtered[:dtypes] || {}).select { |k, v| v.class == Polars::Datetime }.keys
|
228
|
+
filtered[:dtypes] = (filtered[:dtypes] || {}).reject { |k, v| v.class == Polars::Datetime }.compact.to_h
|
229
|
+
filtered = filtered.select { |k, _| supported_params.include?(k) }
|
230
|
+
return filtered, date_cols
|
218
231
|
end
|
219
232
|
|
220
233
|
def csv_files
|
@@ -59,7 +59,7 @@ module EasyML::Data
|
|
59
59
|
stats[col].merge!(most_frequent_value: series.mode.sort.to_a&.first)
|
60
60
|
if field_type == :categorical
|
61
61
|
stats[col].merge!(
|
62
|
-
unique_count: series.n_unique,
|
62
|
+
unique_count: series.cast(:str).n_unique,
|
63
63
|
counts: Hash[series.value_counts.to_hashes.map(&:values)],
|
64
64
|
)
|
65
65
|
end
|
data/lib/easy_ml/engine.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
require "aws-sdk"
|
2
2
|
require "awesome_print"
|
3
|
+
require "rails/all"
|
3
4
|
require "inertia_rails"
|
4
5
|
require "jsonapi/serializer"
|
5
6
|
require "numo/narray"
|
@@ -68,6 +69,16 @@ module EasyML
|
|
68
69
|
end
|
69
70
|
end
|
70
71
|
|
72
|
+
initializer "easy_ml.check_pending_migrations" do
|
73
|
+
if defined?(Rails::Server)
|
74
|
+
config.after_initialize do
|
75
|
+
if EasyML.pending_migrations?
|
76
|
+
puts "\e[33mWARNING: You have pending EasyML migrations. Run 'rails generate easy_ml:migration' to add them.\e[0m"
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
71
82
|
initializer "easy_ml.active_job_config" do
|
72
83
|
resque_initializer = File.expand_path("config/initializers/resque.rb", root)
|
73
84
|
require resque_initializer if File.exist?(resque_initializer)
|
@@ -77,6 +88,17 @@ module EasyML
|
|
77
88
|
end
|
78
89
|
end
|
79
90
|
|
91
|
+
initializer "easy_ml.configure_secrets" do
|
92
|
+
EasyML::Configuration.configure do |config|
|
93
|
+
raise "S3_ACCESS_KEY_ID is missing. Set ENV['S3_ACCESS_KEY_ID']" unless ENV["S3_ACCESS_KEY_ID"]
|
94
|
+
raise "S3_SECRET_ACCESS_KEY is missing. Set ENV['S3_SECRET_ACCESS_KEY']" unless ENV["S3_SECRET_ACCESS_KEY"]
|
95
|
+
|
96
|
+
config.s3_access_key_id = ENV["S3_ACCESS_KEY_ID"]
|
97
|
+
config.s3_secret_access_key = ENV["S3_SECRET_ACCESS_KEY"]
|
98
|
+
config.wandb_api_key = ENV["WANDB_API_KEY"] if ENV["WANDB_API_KEY"]
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
80
102
|
initializer "easy_ml.setup_generators" do |app|
|
81
103
|
generators_path = EasyML::Engine.root.join("lib/easy_ml/railtie/generators")
|
82
104
|
generators_dirs = Dir[File.join(generators_path, "**", "*.rb")]
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module EasyML
|
2
|
+
def self.pending_migrations?
|
3
|
+
return false unless defined?(ActiveRecord)
|
4
|
+
|
5
|
+
# Get all migration files from our templates
|
6
|
+
template_dir = File.expand_path("../railtie/generators/templates/migration", __dir__)
|
7
|
+
template_migrations = Dir.glob(File.join(template_dir, "*.tt")).map do |f|
|
8
|
+
File.basename(f, ".tt").sub(/^create_/, "")
|
9
|
+
end
|
10
|
+
|
11
|
+
# Get all existing migrations
|
12
|
+
existing_migrations = Dir.glob(Rails.root.join("db/migrate/*_*.rb")).map do |f|
|
13
|
+
File.basename(f).sub(/^\d+_create_/, "").sub(/\.rb$/, "")
|
14
|
+
end
|
15
|
+
|
16
|
+
# Check if any template migrations are not in existing migrations
|
17
|
+
(template_migrations - existing_migrations).any?
|
18
|
+
end
|
19
|
+
end
|
data/lib/easy_ml/predict.rb
CHANGED
@@ -10,25 +10,38 @@ module EasyML
|
|
10
10
|
@models = {}
|
11
11
|
end
|
12
12
|
|
13
|
-
def self.predict(model_name, df)
|
13
|
+
def self.predict(model_name, df, serialize: false)
|
14
14
|
if df.is_a?(Hash)
|
15
15
|
df = Polars::DataFrame.new(df)
|
16
16
|
end
|
17
|
-
raw_input = df.to_hashes
|
17
|
+
raw_input = df.to_hashes
|
18
18
|
df = instance.normalize(model_name, df)
|
19
|
+
normalized_input = df.to_hashes
|
19
20
|
preds = instance.predict(model_name, df)
|
20
21
|
current_version = instance.get_model(model_name)
|
21
22
|
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
23
|
+
output = preds.zip(raw_input, normalized_input).map do |pred, raw, norm|
|
24
|
+
EasyML::Prediction.create!(
|
25
|
+
model: current_version.model,
|
26
|
+
model_history: current_version,
|
27
|
+
prediction_type: current_version.model.task,
|
28
|
+
prediction_value: pred,
|
29
|
+
raw_input: raw,
|
30
|
+
normalized_input: norm,
|
31
|
+
)
|
32
|
+
end
|
33
|
+
|
34
|
+
output = if output.is_a?(Array) && output.count == 1
|
35
|
+
output.first
|
36
|
+
else
|
37
|
+
output
|
38
|
+
end
|
39
|
+
|
40
|
+
if serialize
|
41
|
+
EasyML::PredictionSerializer.new(output).serializable_hash
|
42
|
+
else
|
43
|
+
output
|
44
|
+
end
|
32
45
|
end
|
33
46
|
|
34
47
|
def self.train(model_name, tuner: nil, evaluator: nil)
|