easy_ml 0.2.0.pre.rc71 → 0.2.0.pre.rc75

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. checksums.yaml +4 -4
  2. data/app/controllers/easy_ml/datasets_controller.rb +33 -0
  3. data/app/controllers/easy_ml/datasources_controller.rb +7 -0
  4. data/app/controllers/easy_ml/models_controller.rb +46 -0
  5. data/app/frontend/components/DatasetCard.tsx +212 -0
  6. data/app/frontend/components/ModelCard.tsx +114 -29
  7. data/app/frontend/components/StackTrace.tsx +13 -0
  8. data/app/frontend/components/dataset/FeatureConfigPopover.tsx +10 -7
  9. data/app/frontend/components/datasets/UploadDatasetButton.tsx +51 -0
  10. data/app/frontend/components/models/DownloadModelModal.tsx +90 -0
  11. data/app/frontend/components/models/UploadModelModal.tsx +212 -0
  12. data/app/frontend/components/models/index.ts +2 -0
  13. data/app/frontend/pages/DatasetsPage.tsx +36 -130
  14. data/app/frontend/pages/DatasourcesPage.tsx +22 -2
  15. data/app/frontend/pages/ModelsPage.tsx +37 -11
  16. data/app/frontend/types/dataset.ts +1 -2
  17. data/app/frontend/types.ts +1 -1
  18. data/app/jobs/easy_ml/reaper.rb +55 -0
  19. data/app/jobs/easy_ml/training_job.rb +1 -1
  20. data/app/models/easy_ml/column/imputers/base.rb +4 -0
  21. data/app/models/easy_ml/column/imputers/clip.rb +5 -3
  22. data/app/models/easy_ml/column/imputers/imputer.rb +11 -13
  23. data/app/models/easy_ml/column/imputers/mean.rb +7 -3
  24. data/app/models/easy_ml/column/imputers/null_imputer.rb +3 -0
  25. data/app/models/easy_ml/column/imputers/ordinal_encoder.rb +5 -1
  26. data/app/models/easy_ml/column/imputers.rb +3 -1
  27. data/app/models/easy_ml/column/lineage/base.rb +5 -1
  28. data/app/models/easy_ml/column/lineage/computed_by_feature.rb +1 -1
  29. data/app/models/easy_ml/column/lineage/preprocessed.rb +1 -1
  30. data/app/models/easy_ml/column/lineage/raw_dataset.rb +1 -1
  31. data/app/models/easy_ml/column/selector.rb +4 -0
  32. data/app/models/easy_ml/column.rb +79 -63
  33. data/app/models/easy_ml/column_history.rb +28 -28
  34. data/app/models/easy_ml/column_list/imputer.rb +23 -0
  35. data/app/models/easy_ml/column_list.rb +39 -26
  36. data/app/models/easy_ml/dataset/learner/base.rb +34 -0
  37. data/app/models/easy_ml/dataset/learner/eager/boolean.rb +10 -0
  38. data/app/models/easy_ml/dataset/learner/eager/categorical.rb +51 -0
  39. data/app/models/easy_ml/dataset/learner/eager/query.rb +37 -0
  40. data/app/models/easy_ml/dataset/learner/eager.rb +43 -0
  41. data/app/models/easy_ml/dataset/learner/lazy/boolean.rb +13 -0
  42. data/app/models/easy_ml/dataset/learner/lazy/categorical.rb +10 -0
  43. data/app/models/easy_ml/dataset/learner/lazy/datetime.rb +19 -0
  44. data/app/models/easy_ml/dataset/learner/lazy/null.rb +17 -0
  45. data/app/models/easy_ml/dataset/learner/lazy/numeric.rb +19 -0
  46. data/app/models/easy_ml/dataset/learner/lazy/query.rb +69 -0
  47. data/app/models/easy_ml/dataset/learner/lazy/string.rb +19 -0
  48. data/app/models/easy_ml/dataset/learner/lazy.rb +51 -0
  49. data/app/models/easy_ml/dataset/learner/query.rb +25 -0
  50. data/app/models/easy_ml/dataset/learner.rb +100 -0
  51. data/app/models/easy_ml/dataset.rb +150 -36
  52. data/app/models/easy_ml/dataset_history.rb +1 -0
  53. data/app/models/easy_ml/datasource.rb +9 -0
  54. data/app/models/easy_ml/event.rb +5 -7
  55. data/app/models/easy_ml/export/column.rb +27 -0
  56. data/app/models/easy_ml/export/dataset.rb +37 -0
  57. data/app/models/easy_ml/export/datasource.rb +12 -0
  58. data/app/models/easy_ml/export/feature.rb +24 -0
  59. data/app/models/easy_ml/export/model.rb +40 -0
  60. data/app/models/easy_ml/export/retraining_job.rb +20 -0
  61. data/app/models/easy_ml/export/splitter.rb +14 -0
  62. data/app/models/easy_ml/feature.rb +21 -0
  63. data/app/models/easy_ml/import/column.rb +35 -0
  64. data/app/models/easy_ml/import/dataset.rb +148 -0
  65. data/app/models/easy_ml/import/feature.rb +36 -0
  66. data/app/models/easy_ml/import/model.rb +136 -0
  67. data/app/models/easy_ml/import/retraining_job.rb +29 -0
  68. data/app/models/easy_ml/import/splitter.rb +34 -0
  69. data/app/models/easy_ml/lineage.rb +44 -0
  70. data/app/models/easy_ml/model.rb +101 -37
  71. data/app/models/easy_ml/model_file.rb +6 -0
  72. data/app/models/easy_ml/models/xgboost/evals_callback.rb +7 -7
  73. data/app/models/easy_ml/models/xgboost.rb +33 -9
  74. data/app/models/easy_ml/retraining_job.rb +8 -1
  75. data/app/models/easy_ml/retraining_run.rb +7 -5
  76. data/app/models/easy_ml/splitter.rb +8 -0
  77. data/app/models/lineage_history.rb +6 -0
  78. data/app/serializers/easy_ml/column_serializer.rb +7 -1
  79. data/app/serializers/easy_ml/dataset_serializer.rb +2 -1
  80. data/app/serializers/easy_ml/lineage_serializer.rb +9 -0
  81. data/config/routes.rb +14 -1
  82. data/lib/easy_ml/core/tuner/adapters/base_adapter.rb +3 -3
  83. data/lib/easy_ml/core/tuner.rb +13 -12
  84. data/lib/easy_ml/data/polars_column.rb +149 -100
  85. data/lib/easy_ml/data/polars_reader.rb +8 -5
  86. data/lib/easy_ml/data/polars_schema.rb +56 -0
  87. data/lib/easy_ml/data/splits/file_split.rb +20 -2
  88. data/lib/easy_ml/data/splits/split.rb +10 -1
  89. data/lib/easy_ml/data.rb +1 -0
  90. data/lib/easy_ml/deep_compact.rb +19 -0
  91. data/lib/easy_ml/engine.rb +1 -0
  92. data/lib/easy_ml/feature_store.rb +2 -6
  93. data/lib/easy_ml/railtie/generators/migration/migration_generator.rb +6 -0
  94. data/lib/easy_ml/railtie/templates/migration/add_extra_metadata_to_columns.rb.tt +9 -0
  95. data/lib/easy_ml/railtie/templates/migration/add_raw_schema_to_datasets.rb.tt +9 -0
  96. data/lib/easy_ml/railtie/templates/migration/add_unique_constraint_to_easy_ml_model_names.rb.tt +8 -0
  97. data/lib/easy_ml/railtie/templates/migration/create_easy_ml_lineages.rb.tt +24 -0
  98. data/lib/easy_ml/railtie/templates/migration/remove_evaluator_from_retraining_jobs.rb.tt +7 -0
  99. data/lib/easy_ml/railtie/templates/migration/update_preprocessing_steps_to_jsonb.rb.tt +18 -0
  100. data/lib/easy_ml/timing.rb +34 -0
  101. data/lib/easy_ml/version.rb +1 -1
  102. data/lib/easy_ml.rb +2 -0
  103. data/public/easy_ml/assets/.vite/manifest.json +2 -2
  104. data/public/easy_ml/assets/assets/Application-Q7L6ioxr.css +1 -0
  105. data/public/easy_ml/assets/assets/entrypoints/Application.tsx-Rrzo4ecT.js +522 -0
  106. data/public/easy_ml/assets/assets/entrypoints/Application.tsx-Rrzo4ecT.js.map +1 -0
  107. metadata +53 -12
  108. data/app/models/easy_ml/column/learners/base.rb +0 -103
  109. data/app/models/easy_ml/column/learners/boolean.rb +0 -11
  110. data/app/models/easy_ml/column/learners/categorical.rb +0 -51
  111. data/app/models/easy_ml/column/learners/datetime.rb +0 -19
  112. data/app/models/easy_ml/column/learners/null.rb +0 -22
  113. data/app/models/easy_ml/column/learners/numeric.rb +0 -33
  114. data/app/models/easy_ml/column/learners/string.rb +0 -15
  115. data/public/easy_ml/assets/assets/Application-BbFobaXt.css +0 -1
  116. data/public/easy_ml/assets/assets/entrypoints/Application.tsx-CibZcrBc.js +0 -489
  117. data/public/easy_ml/assets/assets/entrypoints/Application.tsx-CibZcrBc.js.map +0 -1
@@ -0,0 +1,148 @@
1
+ module EasyML
2
+ module Import
3
+ class Dataset
4
+ def self.permitted_keys
5
+ @permitted_keys ||= EasyML::Dataset.columns.map(&:name).map(&:to_sym) -
6
+ EasyML::Export::Dataset::UNCONFIGURABLE_COLUMNS.map(&:to_sym) +
7
+ [:columns, :features, :splitter, :datasource]
8
+ end
9
+
10
+ def self.from_config(json_config, action: nil, dataset: nil)
11
+ raise ArgumentError, "Target dataset must be specified" if action == :update && dataset.nil?
12
+
13
+ config = json_config.is_a?(String) ? JSON.parse(json_config) : json_config
14
+ dataset_config = config["dataset"]
15
+
16
+ # Extract configs for related models
17
+ datasource_config = dataset_config.delete("datasource")
18
+ splitter_config = dataset_config.delete("splitter")
19
+ columns_config = dataset_config.delete("columns") || []
20
+ features_config = dataset_config.delete("features") || []
21
+
22
+ if action == :create
23
+ name = dataset_config["name"]
24
+ dataset = EasyML::Dataset.find_by(name: name)
25
+ action = dataset.present? ? :update : :create
26
+ end
27
+ raise ArgumentError, "Action must be specified" unless action.present?
28
+
29
+ if action == :create
30
+ create_dataset(
31
+ dataset_config,
32
+ datasource_config,
33
+ splitter_config,
34
+ columns_config,
35
+ features_config
36
+ )
37
+ elsif action == :update
38
+ update_dataset(
39
+ dataset,
40
+ dataset_config,
41
+ columns_config,
42
+ features_config
43
+ )
44
+ else
45
+ raise ArgumentError, "Invalid action: #{action}. Must be :create or :update"
46
+ end
47
+ end
48
+
49
+ private
50
+
51
+ def self.create_dataset(dataset_config, datasource_config, splitter_config, columns_config, features_config)
52
+ # Create new datasource
53
+ datasource = EasyML::Datasource.find_or_create_by(name: datasource_config["name"]) do |ds|
54
+ ds.assign_attributes(datasource_config)
55
+ end
56
+ datasource.update!(datasource_config)
57
+
58
+ # Create new dataset
59
+ dataset = EasyML::Dataset.create!(
60
+ dataset_config.merge(datasource: datasource)
61
+ )
62
+
63
+ # Create splitter if config exists
64
+ EasyML::Splitter.from_config(splitter_config, dataset) if splitter_config.present?
65
+
66
+ # Create columns
67
+ columns_config.each do |column_config|
68
+ EasyML::Column.from_config(column_config, dataset, action: :create)
69
+ end
70
+
71
+ # Create features
72
+ features_config.each do |feature_config|
73
+ EasyML::Feature.from_config(feature_config, dataset, action: :create)
74
+ end
75
+
76
+ dataset
77
+ end
78
+
79
+ def self.update_dataset(dataset, dataset_config, columns_config, features_config)
80
+ # Update dataset attributes except name (preserve original name)
81
+ dataset.update!(dataset_config.except("name", "datasource"))
82
+
83
+ needs_refresh = false
84
+
85
+ # Update existing columns
86
+ columns_config.each do |column_config|
87
+ column_name = column_config["name"]
88
+ existing_column = dataset.columns.find_by(name: column_name)
89
+
90
+ if existing_column
91
+ old_drop_if_null = existing_column.drop_if_null
92
+ new_drop_if_null = column_config["drop_if_null"]
93
+
94
+ # Check if drop_if_null has changed
95
+ needs_refresh ||= !new_drop_if_null.nil? && old_drop_if_null != new_drop_if_null
96
+ end
97
+
98
+ EasyML::Column.from_config(column_config, dataset, action: :update)
99
+ end
100
+
101
+ # Update or create features
102
+ features_config.each do |feature_config|
103
+ EasyML::Feature.from_config(feature_config, dataset, action: :update)
104
+ end
105
+
106
+ # Refresh if needed
107
+ dataset.refresh_async if needs_refresh
108
+
109
+ dataset
110
+ end
111
+
112
+ def self.validate(dataset_config)
113
+ extra_keys = dataset_config.keys.map(&:to_sym) - permitted_keys
114
+ raise ArgumentError, "Invalid dataset keys: #{extra_keys.join(", ")}" unless extra_keys.empty?
115
+
116
+ if dataset_config[:splitter].present?
117
+ dataset_config[:splitter] = EasyML::Import::Splitter.validate(dataset_config[:splitter])
118
+ end
119
+
120
+ if dataset_config[:columns].present?
121
+ unless dataset_config[:columns].is_a?(Array)
122
+ raise ArgumentError, "Columns configuration must be an array"
123
+ end
124
+ dataset_config[:columns].each_with_index do |col_config, idx|
125
+ unless col_config.is_a?(Hash)
126
+ raise ArgumentError, "Each column configuration must be a hash, at index #{idx}"
127
+ end
128
+ EasyML::Import::Column.validate(col_config, idx)
129
+ end
130
+ end
131
+
132
+ if dataset_config[:features].present?
133
+ unless dataset_config[:features].is_a?(Array)
134
+ raise ArgumentError, "Features configuration must be an array"
135
+ end
136
+ dataset_config[:features].each_with_index do |feat_config, idx|
137
+ unless feat_config.is_a?(Hash)
138
+ raise ArgumentError, "Each feature configuration must be a hash, at index #{idx}"
139
+ end
140
+ EasyML::Import::Feature.validate(feat_config, idx)
141
+ end
142
+ end
143
+
144
+ dataset_config
145
+ end
146
+ end
147
+ end
148
+ end
@@ -0,0 +1,36 @@
1
+ module EasyML
2
+ module Import
3
+ class Feature
4
+ def self.permitted_keys
5
+ @permitted_keys ||= EasyML::Feature.columns.map(&:name).map(&:to_sym) -
6
+ EasyML::Export::Feature::UNCONFIGURABLE_COLUMNS.map(&:to_sym)
7
+ end
8
+
9
+ def self.from_config(config, dataset, action: :create)
10
+ feature_name = config["name"]
11
+ existing_feature = dataset.features.find_by(name: feature_name)
12
+
13
+ case action
14
+ when :create
15
+ dataset.features.create!(config)
16
+ when :update
17
+ if existing_feature
18
+ existing_feature.update!(config)
19
+ existing_feature
20
+ else
21
+ # Features can be added during update, unlike columns
22
+ dataset.features.create!(config)
23
+ end
24
+ else
25
+ raise ArgumentError, "Invalid action: #{action}. Must be :create or :update"
26
+ end
27
+ end
28
+
29
+ def self.validate(config, idx)
30
+ extra_keys = config.keys.map(&:to_sym) - permitted_keys
31
+ raise ArgumentError, "Invalid keys in feature config at index #{idx}: #{extra_keys.join(", ")}" unless extra_keys.empty?
32
+ config
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,136 @@
1
+ module EasyML
2
+ module Import
3
+ class Model
4
+ def self.permitted_keys
5
+ @permitted_keys ||= EasyML::Model.columns.map(&:name).map(&:to_sym) -
6
+ EasyML::Export::Model::UNCONFIGURABLE_COLUMNS.map(&:to_sym) +
7
+ [:weights] +
8
+ EasyML::Model.configuration_attributes.map(&:to_sym) +
9
+ [:dataset, :splitter, :retraining_job]
10
+ end
11
+
12
+ def self.from_config(json_config, action: nil, model: nil, include_dataset: true, dataset: nil)
13
+ raise ArgumentError, "Action must be specified" unless action.present?
14
+ raise ArgumentError, "Target model must be specified" if action == :update && model.nil?
15
+ raise ArgumentError, "Dataset must be specified when creating a model" if action == :create && !include_dataset && dataset.nil?
16
+
17
+ config = json_config.is_a?(String) ? JSON.parse(json_config) : json_config
18
+ config = config.deep_dup.with_indifferent_access
19
+
20
+ # Validate the configuration
21
+ validate(config)
22
+ model_config = config["model"]
23
+
24
+ # Config variables would skip custom setters, so better to manually merge
25
+ configuration = model_config.delete("configuration")
26
+ model_config.merge!(configuration) if configuration.present?
27
+
28
+ case action
29
+ when :create
30
+ create_model(model_config, include_dataset: include_dataset, dataset: dataset)
31
+ when :update
32
+ update_model(model, model_config, include_dataset: include_dataset)
33
+ else
34
+ raise ArgumentError, "Invalid action: #{action}. Must be :create or :update"
35
+ end
36
+ end
37
+
38
+ private
39
+
40
+ def self.create_model(model_config, include_dataset:, dataset:)
41
+ # Handle dataset if included
42
+ model_dataset = if include_dataset && model_config["dataset"].present?
43
+ dataset_config = { "dataset" => model_config.delete("dataset") }
44
+ EasyML::Import::Dataset.from_config(dataset_config, action: :create)
45
+ else
46
+ dataset
47
+ end
48
+
49
+ # Create model
50
+ model = EasyML::Model.new(model_config.except("weights", "dataset", "retraining_job"))
51
+ model.dataset = model_dataset
52
+
53
+ model_name = model_config["name"]
54
+ if (existing_model = EasyML::Model.find_by(name: model_name)).present?
55
+ model.name = generate_unique_name(model_name)
56
+ end
57
+ model.save!
58
+
59
+ if model_config["retraining_job"].present?
60
+ retraining_job = EasyML::RetrainingJob.from_config(model_config["retraining_job"], model)
61
+ model.retraining_job = retraining_job
62
+ model.save!
63
+ model.reload
64
+ end
65
+
66
+ # Update weights if present
67
+ if model_config["weights"].present?
68
+ model.update!(weights: model_config["weights"])
69
+ model.import
70
+ end
71
+
72
+ model
73
+ end
74
+
75
+ def self.update_model(model, model_config, include_dataset:)
76
+ # Update dataset if included
77
+ if include_dataset && model_config["dataset"].present?
78
+ dataset_config = { "dataset" => model_config.delete("dataset") }
79
+ EasyML::Import::Dataset.from_config(dataset_config, action: :update, dataset: model.dataset)
80
+ end
81
+
82
+ # Update model attributes except name (preserve original name)
83
+ model.update!(model_config.except("name", "weights", "dataset", "retraining_job"))
84
+
85
+ if model_config["retraining_job"].present?
86
+ retraining_job = EasyML::RetrainingJob.from_config(model_config["retraining_job"], model)
87
+ model.retraining_job = retraining_job
88
+ model.save!
89
+ model.reload
90
+ end
91
+
92
+ # Update weights if present
93
+ if model_config["weights"].present?
94
+ model.update!(weights: model_config["weights"])
95
+ model.import
96
+ end
97
+
98
+ model
99
+ end
100
+
101
+ def self.validate(json_config)
102
+ config = json_config.is_a?(String) ? JSON.parse(json_config) : json_config
103
+ config = config.deep_dup.with_indifferent_access
104
+
105
+ # Validate root keys: must have only "model"
106
+ extra_keys = config.keys.map(&:to_sym) - [:model]
107
+ raise ArgumentError, "Invalid root keys: #{extra_keys.join(", ")}" unless extra_keys.empty?
108
+
109
+ model_config = config[:model]
110
+ # Validate that model_config does not contain keys that are unconfigurable
111
+ extra_keys = model_config.keys.map(&:to_sym) - permitted_keys
112
+ raise ArgumentError, "Invalid model keys: #{extra_keys.join(", ")}" unless extra_keys.empty?
113
+
114
+ # Delegate nested validations to individual importers
115
+ if model_config["dataset"].present?
116
+ model_config["dataset"] = EasyML::Import::Dataset.validate(model_config["dataset"])
117
+ end
118
+
119
+ if model_config["retraining_job"].present?
120
+ model_config["retraining_job"] = EasyML::Import::RetrainingJob.validate(model_config["retraining_job"])
121
+ end
122
+
123
+ config
124
+ end
125
+
126
+ def self.generate_unique_name(base_name)
127
+ revision = EasyML::Model.where("name LIKE ?", "#{base_name} (Revision %)")
128
+ .map { |m| m.name.match(/\(Revision (\d+)\)/).try(:[], 1).try(:to_i) }
129
+ .compact
130
+ .max || 0
131
+
132
+ "#{base_name} (Revision #{revision + 1})"
133
+ end
134
+ end
135
+ end
136
+ end
@@ -0,0 +1,29 @@
1
+ module EasyML
2
+ module Import
3
+ class RetrainingJob
4
+ def self.permitted_keys
5
+ @permitted_keys ||= EasyML::RetrainingJob.columns.map(&:name).map(&:to_sym) -
6
+ EasyML::Export::RetrainingJob::UNCONFIGURABLE_COLUMNS.map(&:to_sym)
7
+ end
8
+
9
+ def self.from_config(config, model)
10
+ existing_job = model.get_retraining_job
11
+ existing_job.update!(config)
12
+ existing_job
13
+ end
14
+
15
+ def self.validate(config)
16
+ return nil unless config.present?
17
+
18
+ unless config.is_a?(Hash)
19
+ raise ArgumentError, "Retraining job configuration must be a hash"
20
+ end
21
+
22
+ extra_keys = config.keys.map(&:to_sym) - permitted_keys
23
+ raise ArgumentError, "Invalid retraining job keys: #{extra_keys.join(", ")}" unless extra_keys.empty?
24
+
25
+ config
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,34 @@
1
+ module EasyML
2
+ module Import
3
+ class Splitter
4
+ def self.permitted_keys
5
+ @permitted_keys ||= EasyML::Splitter.columns.map(&:name).map(&:to_sym) -
6
+ EasyML::Export::Splitter::UNCONFIGURABLE_COLUMNS.map(&:to_sym)
7
+ end
8
+
9
+ def self.from_config(config, dataset)
10
+ return nil unless config.present?
11
+
12
+ if dataset.splitter.present?
13
+ dataset.splitter.update!(config)
14
+ dataset.splitter
15
+ else
16
+ dataset.create_splitter!(config)
17
+ end
18
+ end
19
+
20
+ def self.validate(config)
21
+ return nil unless config.present?
22
+
23
+ unless config.is_a?(Hash)
24
+ raise ArgumentError, "Splitter configuration must be a hash"
25
+ end
26
+
27
+ extra_keys = config.keys.map(&:to_sym) - permitted_keys
28
+ raise ArgumentError, "Invalid splitter keys: #{extra_keys.join(", ")}" unless extra_keys.empty?
29
+
30
+ config
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,44 @@
1
+ # == Schema Information
2
+ #
3
+ # Table name: easy_ml_lineages
4
+ #
5
+ # id :bigint not null, primary key
6
+ # column_id :bigint not null
7
+ # key :string not null
8
+ # description :string
9
+ # occurred_at :datetime
10
+ # created_at :datetime not null
11
+ # updated_at :datetime not null
12
+ #
13
+ module EasyML
14
+ class Lineage < ActiveRecord::Base
15
+ belongs_to :column
16
+
17
+ class << self
18
+ def learn(column)
19
+ @lineage = EasyML::Column::Lineage.new(column).lineage
20
+
21
+ existing_lineage = where(column_id: column.id)
22
+ missing_lineage = @lineage.select { |l| !existing_lineage.exists?(key: l[:key]) }
23
+
24
+ missing_lineage = missing_lineage.map { |l|
25
+ EasyML::Lineage.new(
26
+ column_id: column.id,
27
+ key: l[:key],
28
+ occurred_at: l[:occurred_at],
29
+ description: l[:description],
30
+ )
31
+ }
32
+ existing_lineage = existing_lineage.map do |lineage|
33
+ matching_lineage = @lineage.detect { |ll| ll[:key].to_sym == lineage.key.to_sym }
34
+
35
+ lineage&.assign_attributes(
36
+ occurred_at: matching_lineage[:occurred_at],
37
+ description: matching_lineage[:description],
38
+ )
39
+ end
40
+ missing_lineage.concat(existing_lineage)
41
+ end
42
+ end
43
+ end
44
+ end
@@ -45,7 +45,7 @@ module EasyML
45
45
  MODEL_NAMES = MODEL_OPTIONS.keys.freeze
46
46
  MODEL_CONSTANTS = MODEL_OPTIONS.values.map(&:constantize)
47
47
 
48
- add_configuration_attributes :task, :objective, :hyperparameters, :evaluator, :callbacks, :metrics
48
+ add_configuration_attributes :task, :objective, :hyperparameters, :callbacks, :metrics
49
49
  MODEL_CONSTANTS.flat_map(&:configuration_attributes).each do |attribute|
50
50
  add_configuration_attributes attribute
51
51
  end
@@ -53,10 +53,10 @@ module EasyML
53
53
  belongs_to :dataset
54
54
  belongs_to :model_file, class_name: "EasyML::ModelFile", foreign_key: "model_file_id", optional: true
55
55
 
56
- has_one :retraining_job, class_name: "EasyML::RetrainingJob"
56
+ has_one :retraining_job, class_name: "EasyML::RetrainingJob", dependent: :destroy
57
57
  accepts_nested_attributes_for :retraining_job
58
- has_many :retraining_runs, class_name: "EasyML::RetrainingRun"
59
- has_many :deploys, class_name: "EasyML::Deploy"
58
+ has_many :retraining_runs, class_name: "EasyML::RetrainingRun", dependent: :destroy
59
+ has_many :deploys, class_name: "EasyML::Deploy", dependent: :destroy
60
60
 
61
61
  scope :deployed, -> { EasyML::ModelHistory.deployed }
62
62
 
@@ -110,6 +110,13 @@ module EasyML
110
110
  is_training == true
111
111
  end
112
112
 
113
+ def abort!
114
+ EasyML::Reaper.kill(EasyML::TrainingJob, id)
115
+ update(is_training: false, status: :ready)
116
+ get_retraining_job.retraining_runs.last.update(status: :aborted)
117
+ unlock!
118
+ end
119
+
113
120
  def train(async: true)
114
121
  pending_run # Ensure we update the pending job before enqueuing in background so UI updates properly
115
122
  update(is_training: true)
@@ -120,26 +127,41 @@ module EasyML
120
127
  end
121
128
  end
122
129
 
130
+ def trained?
131
+ retraining_runs.where(status: :success).exists?
132
+ end
133
+
134
+ def deployed?
135
+ inference_version.present?
136
+ end
137
+
138
+ def weights=(weights)
139
+ raise ArgumentError, "Cannot set weights on model without type" unless model_type.present?
140
+
141
+ model_file = get_model_file
142
+ adapter.set_weights(model_file, weights)
143
+ save_model_file
144
+ end
145
+
146
+ def weights
147
+ adapter.weights(get_model_file)
148
+ end
149
+
123
150
  def get_retraining_job
124
- if retraining_job
125
- self.evaluator = retraining_job.evaluator
126
- evaluator = self.evaluator.symbolize_keys
127
- else
128
- default_eval = Core::ModelEvaluator.default_evaluator(task)
129
- self.evaluator = default_eval
130
- evaluator = default_eval
131
- end
151
+ return retraining_job if retraining_job.present?
132
152
 
133
- retraining_job || create_retraining_job(
134
- model: self,
135
- active: false,
136
- evaluator: evaluator,
137
- metric: evaluator[:metric],
138
- direction: evaluator[:direction],
139
- threshold: evaluator[:threshold],
140
- frequency: "month",
141
- at: { hour: 0, day_of_month: 1 },
142
- )
153
+ evaluator = Core::ModelEvaluator.default_evaluator(task).symbolize_keys
154
+
155
+ method = persisted? ? :create_retraining_job : :build_retraining_job
156
+
157
+ send(method,
158
+ model: self,
159
+ active: false,
160
+ metric: evaluator[:metric],
161
+ direction: evaluator[:direction],
162
+ threshold: evaluator[:threshold],
163
+ frequency: "month",
164
+ at: { hour: 0, day_of_month: 1 })
143
165
  end
144
166
 
145
167
  def pending_run
@@ -147,6 +169,15 @@ module EasyML
147
169
  job.retraining_runs.find_or_create_by(status: "pending", model: self)
148
170
  end
149
171
 
172
+ def import
173
+ lock_model do
174
+ run = pending_run
175
+ run.wrap_training do
176
+ [self, hyperparameters.to_h]
177
+ end
178
+ end
179
+ end
180
+
150
181
  def actually_train(&progress_block)
151
182
  lock_model do
152
183
  run = pending_run
@@ -186,6 +217,20 @@ module EasyML
186
217
  "training:#{self.name}:#{self.id}"
187
218
  end
188
219
 
220
+ def hyperparameters=(hyperparameters)
221
+ return unless model_type.present?
222
+
223
+ @hypers = adapter.build_hyperparameters(hyperparameters)
224
+ end
225
+
226
+ def hyperparameters
227
+ @hypers ||= adapter.build_hyperparameters(@hyperparameters)
228
+ end
229
+
230
+ def callbacks
231
+ @cbs ||= adapter.build_callbacks(@callbacks)
232
+ end
233
+
189
234
  def hyperparameter_search(&progress_block)
190
235
  tuner = retraining_job.tuner_config.symbolize_keys
191
236
  extra_params = {
@@ -232,16 +277,11 @@ module EasyML
232
277
  alias_method :latest_version, :inference_version
233
278
  alias_method :deployed, :inference_version
234
279
 
235
- def hyperparameters
236
- @hypers ||= adapter.build_hyperparameters(@hyperparameters)
237
- end
238
-
239
- def callbacks
240
- @cbs ||= adapter.build_callbacks(@callbacks)
241
- end
242
-
243
280
  def predict(xs)
244
281
  load_model!
282
+ unless xs.is_a?(XGBoost::DMatrix)
283
+ xs = dataset.normalize(xs, inference: true)
284
+ end
245
285
  adapter.predict(xs)
246
286
  end
247
287
 
@@ -309,7 +349,7 @@ module EasyML
309
349
  def fit(tuning: false, x_train: nil, y_train: nil, x_valid: nil, y_valid: nil, &progress_block)
310
350
  return fit_in_batches(**batch_args.merge!(tuning: tuning), &progress_block) if fit_in_batches?
311
351
 
312
- dataset.refresh
352
+ dataset.refresh if dataset.reload.needs_refresh?
313
353
  adapter.fit(tuning: tuning, x_train: x_train, y_train: y_train, x_valid: x_valid, y_valid: y_valid, &progress_block)
314
354
  end
315
355
 
@@ -354,6 +394,10 @@ module EasyML
354
394
  dataset.decode_labels(ys, col: col)
355
395
  end
356
396
 
397
+ def evaluator
398
+ get_retraining_job&.evaluator || default_evaluator
399
+ end
400
+
357
401
  def evaluate(y_pred: nil, y_true: nil, x_true: nil, evaluator: nil, dataset: nil)
358
402
  evaluator ||= self.evaluator
359
403
  if y_pred.nil?
@@ -366,10 +410,6 @@ module EasyML
366
410
  EasyML::Core::ModelEvaluator.evaluate(model: self, y_pred: y_pred, y_true: y_true, x_true: x_true, dataset: dataset, evaluator: evaluator)
367
411
  end
368
412
 
369
- def evaluator
370
- instance_variable_get(:@evaluator) || default_evaluator
371
- end
372
-
373
413
  def default_evaluator
374
414
  return nil unless task.present?
375
415
 
@@ -381,7 +421,7 @@ module EasyML
381
421
  end
382
422
 
383
423
  def evals
384
- last_run&.metrics || {}
424
+ (last_run&.metrics || {}).with_indifferent_access
385
425
  end
386
426
 
387
427
  def metric_accessor(metric)
@@ -536,6 +576,28 @@ module EasyML
536
576
  end
537
577
  end
538
578
 
579
+ UNCONFIGURABLE_COLUMNS = %w(
580
+ id
581
+ dataset_id
582
+ model_file_id
583
+ root_dir
584
+ file
585
+ sha
586
+ last_trained_at
587
+ is_training
588
+ created_at
589
+ updated_at
590
+ slug
591
+ )
592
+
593
+ def to_config(include_dataset: false)
594
+ EasyML::Export::Model.to_config(self, include_dataset: include_dataset)
595
+ end
596
+
597
+ def self.from_config(json_config, action: nil, model: nil, include_dataset: true, dataset: nil)
598
+ EasyML::Import::Model.from_config(json_config, action: action, model: model, include_dataset: include_dataset, dataset: dataset)
599
+ end
600
+
539
601
  private
540
602
 
541
603
  def default_evaluation_inputs
@@ -615,6 +677,8 @@ module EasyML
615
677
  end
616
678
 
617
679
  def validate_metrics_allowed
680
+ set_defaults if metrics.nil? || metrics.empty?
681
+
618
682
  unknown_metrics = metrics.select { |metric| allowed_metrics.exclude?(metric) }
619
683
  return unless unknown_metrics.any?
620
684
 
@@ -624,7 +688,7 @@ module EasyML
624
688
 
625
689
  def set_slug
626
690
  if slug.nil? && name.present?
627
- self.slug = name.gsub(/\s/, "_").downcase
691
+ self.slug = name.gsub(/\s/, "_").gsub(/[^a-zA-Z0-9_]/, "").downcase
628
692
  end
629
693
  end
630
694
  end
@@ -97,5 +97,11 @@ module EasyML
97
97
  def extension_allowlist
98
98
  %w[bin model json]
99
99
  end
100
+
101
+ def write(content)
102
+ FileUtils.mkdir_p(File.dirname(full_path))
103
+ File.write(full_path, content)
104
+ upload(full_path)
105
+ end
100
106
  end
101
107
  end