easy_ml 0.2.0.pre.rc39 → 0.2.0.pre.rc41

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. checksums.yaml +4 -4
  2. data/Rakefile +11 -9
  3. data/app/controllers/easy_ml/application_controller.rb +1 -1
  4. data/app/frontend/components/dataset/splitters/DateSplitter.tsx +4 -4
  5. data/app/frontend/components/dataset/splitters/types.ts +3 -3
  6. data/app/frontend/pages/NewDatasetPage.tsx +1 -1
  7. data/app/helpers/easy_ml/application_helper.rb +2 -2
  8. data/app/jobs/easy_ml/compute_feature_job.rb +54 -1
  9. data/app/models/concerns/easy_ml/dataframe_serialization.rb +30 -0
  10. data/app/models/easy_ml/dataset.rb +23 -22
  11. data/app/models/easy_ml/dataset_history.rb +1 -6
  12. data/app/models/easy_ml/datasources/polars_datasource.rb +4 -18
  13. data/app/models/easy_ml/event.rb +2 -1
  14. data/app/models/easy_ml/event_context.rb +58 -0
  15. data/app/models/easy_ml/feature.rb +40 -11
  16. data/app/models/easy_ml/model.rb +0 -1
  17. data/app/models/easy_ml/model_file.rb +7 -3
  18. data/app/models/easy_ml/splitter_history.rb +16 -0
  19. data/config/initializers/zhong.rb +4 -0
  20. data/lib/easy_ml/data/date_converter.rb +1 -0
  21. data/lib/easy_ml/data/polars_reader.rb +17 -4
  22. data/lib/easy_ml/data/statistics_learner.rb +1 -1
  23. data/lib/easy_ml/engine.rb +12 -1
  24. data/lib/easy_ml/pending_migrations.rb +19 -0
  25. data/lib/easy_ml/predict.rb +1 -3
  26. data/lib/easy_ml/railtie/generators/migration/migration_generator.rb +38 -157
  27. data/lib/easy_ml/railtie/templates/migration/add_workflow_status_to_easy_ml_features.rb.tt +8 -0
  28. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_column_histories.rb.tt +4 -2
  29. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_columns.rb.tt +22 -20
  30. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_dataset_histories.rb.tt +5 -3
  31. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_datasets.rb.tt +26 -24
  32. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_datasource_histories.rb.tt +5 -3
  33. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_datasources.rb.tt +12 -10
  34. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_deploys.rb.tt +21 -19
  35. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_event_contexts.rb.tt +14 -0
  36. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_events.rb.tt +16 -14
  37. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_feature_histories.rb.tt +10 -8
  38. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_features.rb.tt +27 -25
  39. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_model_file_histories.rb.tt +5 -3
  40. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_model_files.rb.tt +13 -11
  41. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_model_histories.rb.tt +5 -3
  42. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_models.rb.tt +28 -26
  43. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_predictions.rb.tt +13 -11
  44. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_retraining_jobs.rb.tt +70 -67
  45. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_settings.rb.tt +6 -4
  46. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_splitter_histories.rb.tt +6 -4
  47. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_splitters.rb.tt +11 -9
  48. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_tuner_jobs.rb.tt +34 -30
  49. data/lib/easy_ml/version.rb +1 -1
  50. data/lib/easy_ml.rb +1 -0
  51. data/public/easy_ml/assets/.vite/manifest.json +1 -1
  52. data/public/easy_ml/assets/assets/entrypoints/{Application.tsx-BRRjHz4-.js → Application.tsx-DF5SSkYi.js} +2 -2
  53. data/public/easy_ml/assets/assets/entrypoints/{Application.tsx-BRRjHz4-.js.map → Application.tsx-DF5SSkYi.js.map} +1 -1
  54. metadata +9 -18
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a53f346d534b4333dcc9f8880c9d0fd4d14acf7a596be57caf42ea789490e4e4
4
- data.tar.gz: 912b118a2c82f2397afce24d39a5c556d98fc58a310647de7232851f7ee606b4
3
+ metadata.gz: 03333c45a1103bf7e75446a0c54d6799b62e64646abc1ab2abac123a206d1424
4
+ data.tar.gz: cb5ba985a5b8e5fd136b92e5ca5f65162d5189f06ba91bdd6e6763a69f5fbe56
5
5
  SHA512:
6
- metadata.gz: 4d641839d982e782d5921cc7086ef583f3ae0f5901446d0c132deacb5a7101b420428cbbb52f58b0a4f9a94fb3ed6cd7f72cb1705eb2c7c3c4f938c4a7e9702d
7
- data.tar.gz: b19d313ece15cb343138f4d6e036947ba764708b80695ef6d7b502c45e3e34662dec959d8e1c1f423109f89ac87101b3383d64b80da3d32cc7e9bee7b30c6f5c
6
+ metadata.gz: ddd88ca06fecf8366a7e3fa370b2f78e0e73ebc4c29fefcdd6cd0b208286710d503b767d6fabcf9af5eb40105bb0ffe63ddd773278b0cc9379750dfb4763d87f
7
+ data.tar.gz: 0c3b8dfdb6d293692439a1818dca5fe1974e27039895e54f08e492bb621c563a77cc4e0921f1df58603526a0b95354741684052aa9f1df8928e5b54de6f8caac
data/Rakefile CHANGED
@@ -1,6 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "sprockets/railtie"
4
3
  require "bundler/gem_tasks"
5
4
  require "rspec/core/rake_task"
6
5
 
@@ -20,16 +19,19 @@ require_relative "lib/easy_ml"
20
19
  # Load the annotate tasks
21
20
  require "annotate/annotate_models"
22
21
 
22
+ require "combustion"
23
+ Combustion.path = "spec/internal"
24
+ Combustion::Application.configure_for_combustion
23
25
  task :environment do
24
- require "combustion"
25
- require "sprockets"
26
- Combustion.path = "spec/internal"
27
- Combustion.initialize! :active_record do |config|
28
- config.assets = ActiveSupport::OrderedOptions.new # Stub to avoid errors
29
- config.assets.enabled = false # Set false since assets are handled by Vite
30
- end
31
- EasyML::Engine.eager_load!
26
+ Combustion::Application.initialize!
27
+
28
+ # Reset migrations paths so we can keep the migrations in the project root,
29
+ # not the Rails root
30
+ migrations_paths = ["spec/internal/db/migrate"]
31
+ ActiveRecord::Tasks::DatabaseTasks.migrations_paths = migrations_paths
32
+ ActiveRecord::Migrator.migrations_paths = migrations_paths
32
33
  end
34
+ Combustion::Application.load_tasks
33
35
 
34
36
  namespace :easy_ml do
35
37
  task annotate_models: :environment do
@@ -12,7 +12,7 @@ module EasyML
12
12
  before_action :hot_reload
13
13
 
14
14
  def hot_reload
15
- return unless Rails.env.development? && ENV["EASY_ML_DEMO_APP"]
15
+ return unless Rails.env.development? && ENV["EASY_ML_DEV"]
16
16
 
17
17
  Dir[EasyML::Engine.root.join("lib/**/*")].select { |f| Pathname.new(f).extname == ".rb" }.each do |file|
18
18
  load file
@@ -12,14 +12,14 @@ export function DateSplitter({ attributes, columns, onChange }: DateSplitterProp
12
12
  return (
13
13
  <div className="space-y-4">
14
14
  <div>
15
- <label htmlFor="date_column" className="block text-sm font-medium text-gray-700">
15
+ <label htmlFor="date_col" className="block text-sm font-medium text-gray-700">
16
16
  Date Column
17
17
  </label>
18
18
  <SearchableSelect
19
- id="date_column"
20
- value={attributes.date_column}
19
+ id="date_col"
20
+ value={attributes.date_col}
21
21
  options={columns.map(col => ({ value: col, label: col }))}
22
- onChange={(value) => onChange({ ...attributes, date_column: value })}
22
+ onChange={(value) => onChange({ ...attributes, date_col: value })}
23
23
  placeholder="Select date column"
24
24
  />
25
25
  </div>
@@ -18,7 +18,7 @@ export type SplitterType =
18
18
  | 'leave_p_out';
19
19
 
20
20
  export interface DateSplitConfig {
21
- date_column: string;
21
+ date_col: string;
22
22
  months_test: number;
23
23
  months_valid: number;
24
24
  }
@@ -81,7 +81,7 @@ export interface ValidationResult {
81
81
 
82
82
  // Validation functions for each splitter type
83
83
  export const validateDateSplitter = (config: DateSplitConfig): ValidationResult => {
84
- if (!config.date_column) {
84
+ if (!config.date_col) {
85
85
  return { isValid: false, error: "Please select a date column" };
86
86
  }
87
87
  if (!config.months_test || config.months_test <= 0) {
@@ -108,7 +108,7 @@ export const validateRandomSplitter = (config: RandomSplitConfig): ValidationRes
108
108
  };
109
109
 
110
110
  export const validatePredefinedSplitter = (config: PredefinedSplitConfig): ValidationResult => {
111
- if (!config.files || config.files.length === 0) {
111
+ if (!config.train_files || config.train_files.length === 0) {
112
112
  return { isValid: false, error: "Please select at least one file for splitting" };
113
113
  }
114
114
  return { isValid: true };
@@ -30,7 +30,7 @@ export default function NewDatasetPage({ constants, datasources }: NewDatasetFor
30
30
  switch (type) {
31
31
  case 'date':
32
32
  const dateConfig: DateSplitConfig = {
33
- date_column: '',
33
+ date_col: '',
34
34
  months_test: 2,
35
35
  months_valid: 2
36
36
  };
@@ -3,8 +3,8 @@
3
3
  module EasyML
4
4
  module ApplicationHelper
5
5
  # Override: Returns the engine assets manifest.
6
- def vite_manifest
7
- ViteRuby.new(EasyML::Engine.root).manifest
6
+ def easy_ml_manifest
7
+ ViteRuby.new(root: EasyML::Engine.root).manifest
8
8
  end
9
9
 
10
10
  def prod_script_tags
@@ -1,12 +1,43 @@
1
1
  module EasyML
2
2
  class ComputeFeatureJob < BatchJob
3
+ extend EasyML::DataframeSerialization
4
+
3
5
  @queue = :easy_ml
4
6
 
5
7
  def self.perform(batch_id, options = {})
8
+ puts "processing batch_id #{batch_id}"
6
9
  options.symbolize_keys!
7
10
  feature_id = options.dig(:feature_id)
8
11
  feature = EasyML::Feature.find(feature_id)
9
- feature.fit_batch(options)
12
+ dataset = feature.dataset
13
+
14
+ # Check if any feature has failed before proceeding
15
+ if dataset.features.any? { |f| f.workflow_status == "failed" }
16
+ puts "Aborting feature computation due to previous feature failure"
17
+ return
18
+ end
19
+
20
+ begin
21
+ feature.fit_batch(options.merge!(batch_id: batch_id))
22
+ rescue => e
23
+ puts "Error computing feature: #{e.message}"
24
+ EasyML::Feature.transaction do
25
+ return if dataset.reload.workflow_status == :failed
26
+
27
+ puts "Logging error"
28
+ feature.update(workflow_status: :failed)
29
+ dataset.update(workflow_status: :failed)
30
+ build_error_with_context(dataset, e, batch_id, feature)
31
+ end
32
+ end
33
+ end
34
+
35
+ def self.build_error_with_context(dataset, error, batch_id, feature)
36
+ error = EasyML::Event.handle_error(dataset, error)
37
+ batch = feature.build_batch(batch_id: batch_id)
38
+
39
+ # Convert any dataframes in the context to serialized form
40
+ error.create_context(context: batch)
10
41
  end
11
42
 
12
43
  def self.after_batch_hook(batch_id, *args)
@@ -15,5 +46,27 @@ module EasyML
15
46
  dataset = EasyML::Feature.find_by(id: feature_ids.first).dataset
16
47
  dataset.after_fit_features
17
48
  end
49
+
50
+ def self.feature_fully_processed?(feature)
51
+ end
52
+
53
+ private
54
+
55
+ def self.remove_remaining_batch_jobs(batch_id)
56
+ # Remove all remaining jobs in the batch
57
+ while (jobs = Resque.peek(:easy_ml, 0, 1000)).any?
58
+ jobs.each do |job|
59
+ if job["args"][0] == batch_id
60
+ Resque.dequeue(self, *job["args"])
61
+ end
62
+ end
63
+
64
+ # Break if we've processed all jobs (no more jobs match our batch_id)
65
+ break unless jobs.any? { |job| job["args"][0] == batch_id }
66
+ end
67
+ end
18
68
  end
19
69
  end
70
+
71
+ # If any feature fails, the entire batch fails
72
+ # If any feature fails, the RELATED batches should fail
@@ -0,0 +1,30 @@
1
+ module EasyML
2
+ module DataframeSerialization
3
+ extend ActiveSupport::Concern
4
+
5
+ def serialize_dataframe(df)
6
+ return unless df
7
+ JSON.parse(df.write_json)
8
+ end
9
+
10
+ def deserialize_dataframe(df_data)
11
+ return unless df_data.present? && df_data.key?("columns")
12
+
13
+ columns = df_data["columns"].map do |col|
14
+ dtype = case col["datatype"]
15
+ when Hash
16
+ if col["datatype"]["Datetime"]
17
+ Polars::Datetime.new(col["datatype"]["Datetime"][0].downcase.to_sym).class
18
+ else
19
+ Polars::Utf8
20
+ end
21
+ else
22
+ Polars.const_get(col["datatype"])
23
+ end
24
+ Polars::Series.new(col["name"], col["values"], dtype: dtype)
25
+ end
26
+
27
+ Polars::DataFrame.new(columns)
28
+ end
29
+ end
30
+ end
@@ -102,13 +102,9 @@ module EasyML
102
102
  end
103
103
 
104
104
  def root_dir
105
- persisted = read_attribute(:root_dir)
105
+ relative_dir = read_attribute(:root_dir) || default_root_dir
106
106
 
107
- if persisted.present? && !persisted.blank?
108
- EasyML::Engine.root_dir.join(persisted).to_s
109
- else
110
- default_root_dir
111
- end
107
+ EasyML::Engine.root_dir.join(relative_dir).to_s
112
108
  end
113
109
 
114
110
  def destructively_cleanup!
@@ -219,8 +215,11 @@ module EasyML
219
215
  end
220
216
 
221
217
  def after_fit_features
222
- features.update_all(needs_fit: false, fit_at: Time.current)
223
218
  unlock!
219
+ reload
220
+ return if failed?
221
+
222
+ features.update_all(needs_fit: false, fit_at: Time.current)
224
223
  actually_refresh
225
224
  end
226
225
 
@@ -281,22 +280,24 @@ module EasyML
281
280
  end
282
281
 
283
282
  def refreshing
284
- return false if is_history_class?
285
- unlock! unless analyzing?
286
-
287
- lock_dataset do
288
- update(workflow_status: "analyzing")
289
- fully_reload
290
- yield
291
- ensure
292
- unlock!
293
- end
294
- rescue => e
295
- update(workflow_status: "failed")
296
- e.backtrace.grep(/easy_ml/).each do |line|
297
- puts line
283
+ begin
284
+ return false if is_history_class?
285
+ unlock! unless analyzing?
286
+
287
+ lock_dataset do
288
+ update(workflow_status: "analyzing")
289
+ fully_reload
290
+ yield
291
+ ensure
292
+ unlock!
293
+ end
294
+ rescue => e
295
+ update(workflow_status: "failed")
296
+ e.backtrace.grep(/easy_ml/).each do |line|
297
+ puts line
298
+ end
299
+ raise e
298
300
  end
299
- raise e
300
301
  end
301
302
 
302
303
  def unlock!
@@ -30,17 +30,12 @@ module EasyML
30
30
  self.table_name = "easy_ml_dataset_histories"
31
31
  include Historiographer::History
32
32
 
33
- has_many :columns,
34
- ->(dataset_history) { where(snapshot_id: dataset_history.snapshot_id) },
33
+ has_many :columns, ->(dataset_history) { where(snapshot_id: dataset_history.snapshot_id) },
35
34
  class_name: "EasyML::ColumnHistory",
36
35
  foreign_key: "dataset_id",
37
36
  primary_key: "dataset_id",
38
37
  extend: EasyML::ColumnList
39
38
 
40
- def root_dir
41
- read_attribute(:root_dir)
42
- end
43
-
44
39
  def fit
45
40
  false
46
41
  end
@@ -1,6 +1,8 @@
1
1
  module EasyML
2
2
  module Datasources
3
3
  class PolarsDatasource < BaseDatasource
4
+ include EasyML::DataframeSerialization
5
+
4
6
  validates :df, presence: true
5
7
  add_configuration_attributes :df
6
8
 
@@ -58,7 +60,7 @@ module EasyML
58
60
  return unless df
59
61
 
60
62
  datasource.configuration = (datasource.configuration || {}).merge(
61
- "df" => JSON.parse(df.write_json),
63
+ "df" => serialize_dataframe(df),
62
64
  )
63
65
  end
64
66
 
@@ -66,23 +68,7 @@ module EasyML
66
68
  return unless datasource.configuration&.key?("df")
67
69
 
68
70
  df_data = datasource.configuration["df"]
69
- return unless df_data.present? && df_data.key?("columns")
70
-
71
- columns = df_data["columns"].map do |col|
72
- dtype = case col["datatype"]
73
- when Hash
74
- if col["datatype"]["Datetime"]
75
- Polars::Datetime.new(col["datatype"]["Datetime"][0].downcase.to_sym).class
76
- else
77
- Polars::Utf8
78
- end
79
- else
80
- Polars.const_get(col["datatype"])
81
- end
82
- Polars::Series.new(col["name"], col["values"], dtype: dtype)
83
- end
84
-
85
- datasource.df = Polars::DataFrame.new(columns)
71
+ datasource.df = deserialize_dataframe(df_data)
86
72
  end
87
73
  end
88
74
  end
@@ -19,6 +19,7 @@ module EasyML
19
19
  STATUSES = %w[started success failed].freeze
20
20
 
21
21
  belongs_to :eventable, polymorphic: true, optional: true
22
+ has_one :context, dependent: :destroy, class_name: "EasyML::EventContext"
22
23
 
23
24
  validates :name, presence: true
24
25
  validates :status, presence: true, inclusion: { in: STATUSES }
@@ -51,8 +52,8 @@ module EasyML
51
52
  error = e
52
53
  end
53
54
  end
54
- create_event(model, "failed", error)
55
55
  Rails.logger.error("#{self.class.name} failed: #{error.message}")
56
+ create_event(model, "failed", error)
56
57
  end
57
58
 
58
59
  def self.format_stacktrace(error)
@@ -0,0 +1,58 @@
1
+ # == Schema Information
2
+ #
3
+ # Table name: easy_ml_event_contexts
4
+ #
5
+ # id :bigint not null, primary key
6
+ # event_id :bigint not null
7
+ # context :jsonb not null
8
+ # created_at :datetime not null
9
+ # updated_at :datetime not null
10
+ #
11
+ module EasyML
12
+ class EventContext < ActiveRecord::Base
13
+ include EasyML::DataframeSerialization
14
+
15
+ self.table_name = "easy_ml_event_contexts"
16
+
17
+ belongs_to :event
18
+
19
+ validates :context, presence: true
20
+ validates :event, presence: true
21
+
22
+ def context=(new_context)
23
+ write_attribute(:context, serialize_context(new_context))
24
+ @context = new_context
25
+ end
26
+
27
+ def context
28
+ @context ||= deserialize_context(read_attribute(:context))
29
+ end
30
+
31
+ private
32
+
33
+ def serialize_context(new_context)
34
+ case new_context
35
+ when Hash
36
+ self.format = :json
37
+ new_context.to_json
38
+ when YAML
39
+ self.format = :yaml
40
+ new_context.to_yaml
41
+ when Polars::DataFrame
42
+ self.format = :dataframe
43
+ serialize_dataframe(new_context)
44
+ end
45
+ end
46
+
47
+ def deserialize_context(context)
48
+ case format.to_sym
49
+ when :json
50
+ JSON.parse(context)
51
+ when :yaml
52
+ YAML.safe_load(context)
53
+ when :dataframe
54
+ deserialize_dataframe(context)
55
+ end
56
+ end
57
+ end
58
+ end
@@ -17,6 +17,7 @@
17
17
  # refresh_every :bigint
18
18
  # created_at :datetime not null
19
19
  # updated_at :datetime not null
20
+ # workflow_status :string
20
21
  #
21
22
  module EasyML
22
23
  class Feature < ActiveRecord::Base
@@ -24,6 +25,11 @@ module EasyML
24
25
  include Historiographer::Silent
25
26
  historiographer_mode :snapshot_only
26
27
 
28
+ enum workflow_status: {
29
+ analyzing: "analyzing",
30
+ ready: "ready",
31
+ failed: "failed",
32
+ }
27
33
  class << self
28
34
  def compute_sha(feature_class)
29
35
  require "digest"
@@ -135,13 +141,22 @@ module EasyML
135
141
  adapter.respond_to?(:batch) || config.dig(:batch_size).present?
136
142
  end
137
143
 
144
+ def primary_key
145
+ pkey = config.dig(:primary_key)
146
+ if pkey.is_a?(Array)
147
+ pkey
148
+ else
149
+ [pkey]
150
+ end
151
+ end
152
+
138
153
  def numeric_primary_key?
139
154
  if primary_key.nil?
140
155
  return false unless should_be_batchable?
141
156
  raise "Couldn't find primary key for feature #{feature_class}, check your feature class"
142
157
  end
143
158
 
144
- dataset.raw.data(limit: 1, select: primary_key)[primary_key].to_a.flat_map(&:values).all? do |value|
159
+ dataset.raw.data(limit: 1, select: primary_key)[primary_key].to_a.flat_map { |h| h.respond_to?(:values) ? h.values : h }.all? do |value|
145
160
  case value
146
161
  when String then value.match?(/\A[-+]?\d+(\.\d+)?\z/)
147
162
  else
@@ -171,14 +186,14 @@ module EasyML
171
186
  unless primary_key.present?
172
187
  raise "Couldn't find primary key for feature #{feature_class}, check your feature class"
173
188
  end
174
- df = reader.query(select: [primary_key.first])
189
+ df = reader.query(select: primary_key)
175
190
  rescue => e
176
191
  raise "Couldn't find primary key #{primary_key.first} for feature #{feature_class}: #{e.message}"
177
192
  end
178
193
  return [] if df.nil?
179
194
 
180
195
  min_id = df[primary_key.first].min
181
- max_id = df[primary_key.first].max
196
+ max_id = df[primary_key.last].max
182
197
  end
183
198
 
184
199
  (min_id..max_id).step(batch_size).map do |batch_start|
@@ -196,7 +211,11 @@ module EasyML
196
211
  end
197
212
 
198
213
  def fit(features: [self], async: false)
199
- jobs = features.flat_map(&:build_batches)
214
+ # Sort features by position to ensure they're processed in order
215
+ features.update_all(workflow_status: :analyzing)
216
+ ordered_features = features.sort_by(&:feature_position)
217
+ jobs = ordered_features.flat_map(&:build_batches)
218
+
200
219
  if async
201
220
  EasyML::ComputeFeatureJob.enqueue_batch(jobs)
202
221
  else
@@ -266,13 +285,11 @@ module EasyML
266
285
  batch_df = adapter.fit(df, self, batch_args)
267
286
  end
268
287
  end
269
- raise "Feature #{feature_class}#fit must return a dataframe" unless batch_df.present?
270
- store(batch_df)
271
- updates = {
272
- applied_at: Time.current,
273
- needs_fit: false,
274
- }.compact
275
- update!(updates)
288
+ if batch_df.present?
289
+ store(batch_df)
290
+ else
291
+ "Feature #{feature_class}#fit should return a dataframe, received #{batch_df.class}"
292
+ end
276
293
  batch_df
277
294
  end
278
295
 
@@ -335,6 +352,7 @@ module EasyML
335
352
  def apply_defaults
336
353
  self.name ||= self.feature_class.demodulize.titleize
337
354
  self.version ||= 1
355
+ self.workflow_status ||= :ready
338
356
  end
339
357
 
340
358
  def needs_columns
@@ -371,6 +389,17 @@ module EasyML
371
389
  (should_be_batchable? ? 10_000 : nil)
372
390
  end
373
391
 
392
+ def after_fit
393
+ updates = {
394
+ applied_at: Time.current,
395
+ needs_fit: false,
396
+ }.compact
397
+ update!(updates)
398
+ end
399
+
400
+ def fully_processed?
401
+ end
402
+
374
403
  private
375
404
 
376
405
  def bulk_update_positions(features)
@@ -544,7 +544,6 @@ module EasyML
544
544
 
545
545
  def new_model_file!
546
546
  build_model_file(
547
- root_dir: root_dir,
548
547
  model: self,
549
548
  s3_bucket: EasyML::Configuration.s3_bucket,
550
549
  s3_region: EasyML::Configuration.s3_region,
@@ -23,7 +23,7 @@ module EasyML
23
23
  belongs_to :model, class_name: "EasyML::Model"
24
24
 
25
25
  include EasyML::Concerns::Configurable
26
- add_configuration_attributes :s3_bucket, :s3_prefix, :s3_region, :s3_access_key_id, :s3_secret_access_key, :root_dir
26
+ add_configuration_attributes :s3_bucket, :s3_prefix, :s3_region, :s3_access_key_id, :s3_secret_access_key
27
27
 
28
28
  def synced_file
29
29
  EasyML::Support::SyncedFile.new(
@@ -33,10 +33,14 @@ module EasyML
33
33
  s3_region: s3_region,
34
34
  s3_access_key_id: s3_access_key_id,
35
35
  s3_secret_access_key: s3_secret_access_key,
36
- root_dir: root_dir,
36
+ root_dir: full_dir,
37
37
  )
38
38
  end
39
39
 
40
+ def root_dir
41
+ model.root_dir
42
+ end
43
+
40
44
  def exist?
41
45
  fit?
42
46
  end
@@ -103,7 +107,7 @@ module EasyML
103
107
  end
104
108
 
105
109
  def relative_dir
106
- root_dir.to_s.gsub(Regexp.new(Rails.root.to_s), "").gsub!(%r{^/}, "")
110
+ root_dir.to_s.gsub(Regexp.new(Rails.root.to_s), "").gsub(%r{^/}, "")
107
111
  end
108
112
 
109
113
  def full_dir
@@ -1,3 +1,19 @@
1
+ # == Schema Information
2
+ #
3
+ # Table name: easy_ml_splitter_histories
4
+ #
5
+ # id :bigint not null, primary key
6
+ # splitter_id :integer not null
7
+ # splitter_type :string not null
8
+ # configuration :json
9
+ # dataset_id :integer not null
10
+ # created_at :datetime not null
11
+ # updated_at :datetime not null
12
+ # history_started_at :datetime not null
13
+ # history_ended_at :datetime
14
+ # history_user_id :integer
15
+ # snapshot_id :string
16
+ #
1
17
  module EasyML
2
18
  class SplitterHistory < ActiveRecord::Base
3
19
  self.table_name = "easy_ml_splitter_histories"
@@ -7,5 +7,9 @@ if %w[zhong:start].include?(ARGV.first)
7
7
  every 1.hour, "cleanup" do
8
8
  EasyML::CleanJob.perform_later
9
9
  end
10
+
11
+ every 1.hour, "cleanup" do
12
+ EasyML::ScheduleRetrainingJob.perform_later
13
+ end
10
14
  end
11
15
  end
@@ -3,6 +3,7 @@ module EasyML
3
3
  module DateConverter
4
4
  COMMON_DATE_FORMATS = [
5
5
  "%Y-%m-%dT%H:%M:%S.%6N", # e.g., "2021-01-01T00:00:00.000000"
6
+ "%Y-%m-%d %H:%M:%S.%L Z", # e.g., "2025-01-03 23:04:49.492 Z"
6
7
  "%Y-%m-%d %H:%M:%S.%L", # e.g., "2021-01-01 00:01:36.000"
7
8
  "%Y-%m-%d %H:%M:%S.%L", # e.g., "2021-01-01 00:01:36.000"
8
9
  "%Y-%m-%d %H:%M:%S", # e.g., "2021-01-01 00:01:36"