easy_ml 0.2.0.pre.rc40 → 0.2.0.pre.rc43
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Rakefile +11 -9
- data/app/controllers/easy_ml/settings_controller.rb +1 -4
- data/app/frontend/pages/SettingsPage.tsx +1 -80
- data/app/jobs/easy_ml/batch_job.rb +45 -1
- data/app/jobs/easy_ml/compute_feature_job.rb +68 -4
- data/app/models/concerns/easy_ml/dataframe_serialization.rb +30 -0
- data/app/models/easy_ml/dataset.rb +23 -22
- data/app/models/easy_ml/dataset_history.rb +1 -6
- data/app/models/easy_ml/datasources/polars_datasource.rb +4 -18
- data/app/models/easy_ml/event.rb +2 -1
- data/app/models/easy_ml/event_context.rb +58 -0
- data/app/models/easy_ml/feature.rb +43 -14
- data/app/models/easy_ml/model.rb +4 -7
- data/app/models/easy_ml/model_file.rb +17 -48
- data/app/models/easy_ml/splitter_history.rb +16 -0
- data/app/serializers/easy_ml/prediction_serializer.rb +6 -1
- data/config/initializers/zhong.rb +4 -0
- data/lib/easy_ml/data/date_converter.rb +1 -0
- data/lib/easy_ml/data/polars_reader.rb +17 -4
- data/lib/easy_ml/data/statistics_learner.rb +1 -1
- data/lib/easy_ml/engine.rb +22 -0
- data/lib/easy_ml/pending_migrations.rb +19 -0
- data/lib/easy_ml/predict.rb +25 -12
- data/lib/easy_ml/railtie/generators/migration/migration_generator.rb +39 -157
- data/lib/easy_ml/railtie/templates/migration/add_workflow_status_to_easy_ml_features.rb.tt +13 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_column_histories.rb.tt +4 -2
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_columns.rb.tt +22 -20
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_dataset_histories.rb.tt +5 -3
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_datasets.rb.tt +26 -24
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_datasource_histories.rb.tt +5 -3
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_datasources.rb.tt +12 -10
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_deploys.rb.tt +21 -19
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_event_contexts.rb.tt +14 -0
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_events.rb.tt +16 -14
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_feature_histories.rb.tt +10 -8
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_features.rb.tt +27 -25
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_model_file_histories.rb.tt +5 -3
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_model_files.rb.tt +13 -11
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_model_histories.rb.tt +5 -3
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_models.rb.tt +28 -26
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_predictions.rb.tt +13 -11
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_retraining_jobs.rb.tt +70 -66
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_settings.rb.tt +6 -4
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_splitter_histories.rb.tt +6 -4
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_splitters.rb.tt +11 -9
- data/lib/easy_ml/railtie/templates/migration/create_easy_ml_tuner_jobs.rb.tt +34 -30
- data/lib/easy_ml/railtie/templates/migration/drop_path_from_easy_ml_model_files.rb.tt +11 -0
- data/lib/easy_ml/version.rb +1 -1
- data/lib/easy_ml.rb +1 -0
- data/public/easy_ml/assets/.vite/manifest.json +2 -2
- data/public/easy_ml/assets/assets/Application-zpGA_Q9c.css +1 -0
- data/public/easy_ml/assets/assets/entrypoints/{Application.tsx-DF5SSkYi.js → Application.tsx-jPsqOyb0.js} +87 -97
- data/public/easy_ml/assets/assets/entrypoints/Application.tsx-jPsqOyb0.js.map +1 -0
- metadata +11 -19
- data/public/easy_ml/assets/assets/Application-Cu7lNJmG.css +0 -1
- data/public/easy_ml/assets/assets/entrypoints/Application.tsx-DF5SSkYi.js.map +0 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6fc7f7afffd4e61312ab0302fb20e40a0eed3dcde05e16e9ea9ed1f523cb8681
|
4
|
+
data.tar.gz: d39511d86a3e01bc4ececcc20e1f7d0a583ac2eb07c8752c4ca2766b38318343
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 81f8594d04fcaa7274e7fc1a20ff8f67bd03599988f43e8377d10734639eb3aa48b8516f0422c7ca496774c2922e1e6042105cea72a7bad01c3bf847a553f636
|
7
|
+
data.tar.gz: d5c7cf29044fe2ac184b88ae2b68f1b9c12550157c331f263eb2e3ee79225c40245736c2531610141138e58753ffeaa257145052c02aa6fbe7598cafb0699a78
|
data/Rakefile
CHANGED
@@ -1,6 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require "sprockets/railtie"
|
4
3
|
require "bundler/gem_tasks"
|
5
4
|
require "rspec/core/rake_task"
|
6
5
|
|
@@ -20,16 +19,19 @@ require_relative "lib/easy_ml"
|
|
20
19
|
# Load the annotate tasks
|
21
20
|
require "annotate/annotate_models"
|
22
21
|
|
22
|
+
require "combustion"
|
23
|
+
Combustion.path = "spec/internal"
|
24
|
+
Combustion::Application.configure_for_combustion
|
23
25
|
task :environment do
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
EasyML::Engine.eager_load!
|
26
|
+
Combustion::Application.initialize!
|
27
|
+
|
28
|
+
# Reset migrations paths so we can keep the migrations in the project root,
|
29
|
+
# not the Rails root
|
30
|
+
migrations_paths = ["spec/internal/db/migrate"]
|
31
|
+
ActiveRecord::Tasks::DatabaseTasks.migrations_paths = migrations_paths
|
32
|
+
ActiveRecord::Migrator.migrations_paths = migrations_paths
|
32
33
|
end
|
34
|
+
Combustion::Application.load_tasks
|
33
35
|
|
34
36
|
namespace :easy_ml do
|
35
37
|
task annotate_models: :environment do
|
@@ -29,11 +29,10 @@ module EasyML
|
|
29
29
|
EasyML::Configuration.configure do |config|
|
30
30
|
config.storage = @settings.storage
|
31
31
|
config.timezone = @settings.timezone
|
32
|
-
config.s3_access_key_id = @settings.s3_access_key_id
|
33
|
-
config.s3_secret_access_key = @settings.s3_secret_access_key
|
34
32
|
config.s3_bucket = @settings.s3_bucket
|
35
33
|
config.s3_region = @settings.s3_region
|
36
34
|
config.s3_prefix = @settings.s3_prefix
|
35
|
+
config.wandb_api_key = @settings.wandb_api_key
|
37
36
|
end
|
38
37
|
flash.now[:notice] = "Settings saved."
|
39
38
|
render inertia: "pages/SettingsPage", props: {
|
@@ -47,8 +46,6 @@ module EasyML
|
|
47
46
|
params.require(:settings).permit(
|
48
47
|
:storage,
|
49
48
|
:timezone,
|
50
|
-
:s3_access_key_id,
|
51
|
-
:s3_secret_access_key,
|
52
49
|
:s3_bucket,
|
53
50
|
:s3_region,
|
54
51
|
:s3_prefix,
|
@@ -1,7 +1,7 @@
|
|
1
1
|
import React, { useState } from 'react';
|
2
2
|
import { usePage } from '@inertiajs/react'
|
3
3
|
import { useInertiaForm } from 'use-inertia-form';
|
4
|
-
import { Settings2, Save, AlertCircle, Key,
|
4
|
+
import { Settings2, Save, AlertCircle, Key, Globe2, Database } from 'lucide-react';
|
5
5
|
import { PluginSettings } from '../components/settings/PluginSettings';
|
6
6
|
|
7
7
|
interface Settings {
|
@@ -9,9 +9,6 @@ interface Settings {
|
|
9
9
|
timezone: string;
|
10
10
|
s3_bucket: string;
|
11
11
|
s3_region: string;
|
12
|
-
s3_access_key_id: string;
|
13
|
-
s3_secret_access_key: string;
|
14
|
-
wandb_api_key: string;
|
15
12
|
}
|
16
13
|
}
|
17
14
|
|
@@ -88,7 +85,6 @@ export default function SettingsPage({ settings: initialSettings }: { settings:
|
|
88
85
|
<select
|
89
86
|
id="timezone"
|
90
87
|
value={formData.settings.timezone}
|
91
|
-
|
92
88
|
onChange={(e) => setFormData({
|
93
89
|
...formData,
|
94
90
|
settings: {
|
@@ -113,7 +109,6 @@ export default function SettingsPage({ settings: initialSettings }: { settings:
|
|
113
109
|
{/* S3 Configuration */}
|
114
110
|
<div className="space-y-4">
|
115
111
|
<div className="flex items-center gap-2 mb-4">
|
116
|
-
|
117
112
|
<Database className="w-5 h-5 text-gray-500" />
|
118
113
|
<h3 className="text-lg font-medium text-gray-900">S3 Configuration</h3>
|
119
114
|
</div>
|
@@ -162,80 +157,6 @@ export default function SettingsPage({ settings: initialSettings }: { settings:
|
|
162
157
|
</select>
|
163
158
|
</div>
|
164
159
|
</div>
|
165
|
-
|
166
|
-
<div className="bg-blue-50 rounded-lg p-4">
|
167
|
-
<div className="flex gap-2">
|
168
|
-
<AlertCircle className="w-5 h-5 text-blue-500 mt-0.5" />
|
169
|
-
<div>
|
170
|
-
<h4 className="text-sm font-medium text-blue-900">AWS Credentials</h4>
|
171
|
-
<p className="mt-1 text-sm text-blue-700">
|
172
|
-
These credentials will be used as default for all S3 operations. You can override them per datasource.
|
173
|
-
</p>
|
174
|
-
</div>
|
175
|
-
</div>
|
176
|
-
</div>
|
177
|
-
|
178
|
-
<div className="grid grid-cols-2 gap-6">
|
179
|
-
<div>
|
180
|
-
<label htmlFor="accessKeyId" className="block text-sm font-medium text-gray-700 mb-1">
|
181
|
-
Access Key ID
|
182
|
-
</label>
|
183
|
-
<div className="relative">
|
184
|
-
<Key className="absolute left-3 top-1/2 transform -translate-y-1/2 w-4 h-4 text-gray-400" />
|
185
|
-
<input
|
186
|
-
type="text"
|
187
|
-
id="accessKeyId"
|
188
|
-
value={formData.settings.s3_access_key_id}
|
189
|
-
onChange={(e) => setFormData({
|
190
|
-
...formData,
|
191
|
-
settings: {
|
192
|
-
...formData.settings,
|
193
|
-
s3_access_key_id: e.target.value
|
194
|
-
}
|
195
|
-
})}
|
196
|
-
className="mt-1 block w-full pl-9 rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500"
|
197
|
-
placeholder="AKIA..."
|
198
|
-
/>
|
199
|
-
</div>
|
200
|
-
</div>
|
201
|
-
|
202
|
-
<div>
|
203
|
-
<label htmlFor="secretAccessKey" className="block text-sm font-medium text-gray-700 mb-1">
|
204
|
-
Secret Access Key
|
205
|
-
</label>
|
206
|
-
<div className="relative">
|
207
|
-
<Key className="absolute left-3 top-1/2 transform -translate-y-1/2 w-4 h-4 text-gray-400" />
|
208
|
-
<input
|
209
|
-
type={showSecretKey ? 'text' : 'password'}
|
210
|
-
id="secretAccessKey"
|
211
|
-
value={formData.settings.s3_secret_access_key}
|
212
|
-
onChange={(e) => setFormData({
|
213
|
-
...formData,
|
214
|
-
settings: {
|
215
|
-
...formData.settings,
|
216
|
-
s3_secret_access_key: e.target.value
|
217
|
-
}
|
218
|
-
})}
|
219
|
-
className="mt-1 block w-full pl-9 pr-24 rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500"
|
220
|
-
placeholder="Your secret key"
|
221
|
-
/>
|
222
|
-
<button
|
223
|
-
type="button"
|
224
|
-
onClick={() => setShowSecretKey(!showSecretKey)}
|
225
|
-
className="absolute right-2 top-1/2 transform -translate-y-1/2 text-sm text-gray-500 hover:text-gray-700"
|
226
|
-
>
|
227
|
-
{showSecretKey ? 'Hide' : 'Show'}
|
228
|
-
</button>
|
229
|
-
</div>
|
230
|
-
</div>
|
231
|
-
</div>
|
232
|
-
</div>
|
233
|
-
|
234
|
-
<div className="border-t border-gray-200 pt-8">
|
235
|
-
<PluginSettings
|
236
|
-
settings={formData.settings}
|
237
|
-
setData={(settings) => setFormData({ ...settings })}
|
238
|
-
/>
|
239
160
|
</div>
|
240
161
|
|
241
162
|
<div className="pt-6 border-t flex items-center justify-between">
|
@@ -12,7 +12,14 @@ module EasyML
|
|
12
12
|
# E.g. EasyML::ComputeFeatureBatchJob.enqueue_batch(features.map(&:id))
|
13
13
|
#
|
14
14
|
def enqueue_batch(args_list, batch_id = default_batch_id)
|
15
|
-
args_list = args_list.map
|
15
|
+
args_list = args_list.map do |arg|
|
16
|
+
arg = arg.is_a?(Array) ? arg : [arg]
|
17
|
+
arg.map do |arg|
|
18
|
+
arg.merge!(
|
19
|
+
batch_id: batch_id,
|
20
|
+
)
|
21
|
+
end
|
22
|
+
end
|
16
23
|
store_batch_arguments(batch_id, args_list)
|
17
24
|
|
18
25
|
args_list.each do |args|
|
@@ -22,8 +29,45 @@ module EasyML
|
|
22
29
|
batch_id
|
23
30
|
end
|
24
31
|
|
32
|
+
def enqueue_ordered_batches(args_list)
|
33
|
+
parent_id = get_parent_batch_id(args_list)
|
34
|
+
store_batch_arguments(parent_id, args_list)
|
35
|
+
|
36
|
+
batch = args_list.first
|
37
|
+
rest = args_list[1..]
|
38
|
+
|
39
|
+
rest.map do |batch|
|
40
|
+
Resque.redis.rpush("batch:#{parent_id}:remaining", batch.to_json)
|
41
|
+
end
|
42
|
+
|
43
|
+
enqueue_batch(batch)
|
44
|
+
end
|
45
|
+
|
46
|
+
def enqueue_next_batch(caller, parent_id)
|
47
|
+
next_batch = Resque.redis.lpop("batch:#{parent_id}:remaining")
|
48
|
+
payload = Resque.decode(next_batch)
|
49
|
+
|
50
|
+
caller.enqueue_batch(payload)
|
51
|
+
end
|
52
|
+
|
53
|
+
def next_batch?(parent_id)
|
54
|
+
batches_remaining(parent_id) > 0
|
55
|
+
end
|
56
|
+
|
57
|
+
def batches_remaining(parent_id)
|
58
|
+
Resque.redis.llen("batch:#{parent_id}:remaining")
|
59
|
+
end
|
60
|
+
|
61
|
+
def cleanup_batch(parent_id)
|
62
|
+
Resque.redis.del("batch:#{parent_id}:remaining")
|
63
|
+
end
|
64
|
+
|
25
65
|
private
|
26
66
|
|
67
|
+
def get_parent_batch_id(args_list)
|
68
|
+
args_list.dup.flatten.first.dig(:parent_batch_id)
|
69
|
+
end
|
70
|
+
|
27
71
|
# Store batch arguments in Redis
|
28
72
|
def store_batch_arguments(batch_id, args_list)
|
29
73
|
redis_key = "#{batch(batch_id)}:original_args"
|
@@ -1,19 +1,83 @@
|
|
1
1
|
module EasyML
|
2
2
|
class ComputeFeatureJob < BatchJob
|
3
|
+
extend EasyML::DataframeSerialization
|
4
|
+
|
3
5
|
@queue = :easy_ml
|
4
6
|
|
5
7
|
def self.perform(batch_id, options = {})
|
6
8
|
options.symbolize_keys!
|
7
9
|
feature_id = options.dig(:feature_id)
|
8
10
|
feature = EasyML::Feature.find(feature_id)
|
9
|
-
feature.
|
11
|
+
dataset = feature.dataset
|
12
|
+
|
13
|
+
# Check if any feature has failed before proceeding
|
14
|
+
if dataset.features.any? { |f| f.workflow_status == "failed" }
|
15
|
+
puts "Aborting feature computation due to previous feature failure"
|
16
|
+
return
|
17
|
+
end
|
18
|
+
|
19
|
+
begin
|
20
|
+
feature.update(workflow_status: :analyzing) if feature.workflow_status == :ready
|
21
|
+
feature.fit_batch(options.merge!(batch_id: batch_id))
|
22
|
+
rescue => e
|
23
|
+
EasyML::Feature.transaction do
|
24
|
+
return if dataset.reload.workflow_status == :failed
|
25
|
+
|
26
|
+
feature.update(workflow_status: :failed)
|
27
|
+
dataset.update(workflow_status: :failed)
|
28
|
+
build_error_with_context(dataset, e, batch_id, feature)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def self.build_error_with_context(dataset, error, batch_id, feature)
|
34
|
+
error = EasyML::Event.handle_error(dataset, error)
|
35
|
+
batch = feature.build_batch(batch_id: batch_id)
|
36
|
+
|
37
|
+
# Convert any dataframes in the context to serialized form
|
38
|
+
error.create_context(context: batch)
|
10
39
|
end
|
11
40
|
|
12
41
|
def self.after_batch_hook(batch_id, *args)
|
13
42
|
puts "After batch!"
|
14
|
-
|
15
|
-
|
16
|
-
|
43
|
+
batch_args = fetch_batch_arguments(batch_id).flatten.map(&:symbolize_keys)
|
44
|
+
feature_ids = batch_args.pluck(:feature_id).uniq
|
45
|
+
parent_id = batch_args.pluck(:parent_batch_id).first
|
46
|
+
|
47
|
+
feature = EasyML::Feature.find_by(id: feature_ids.first)
|
48
|
+
|
49
|
+
if feature.failed?
|
50
|
+
dataset.features.where(workflow_status: :analyzing).update_all(workflow_status: :ready)
|
51
|
+
return BatchJob.cleanup_batch(parent_id)
|
52
|
+
end
|
53
|
+
|
54
|
+
feature.after_fit
|
55
|
+
|
56
|
+
if BatchJob.next_batch?(parent_id)
|
57
|
+
BatchJob.enqueue_next_batch(self, parent_id)
|
58
|
+
else
|
59
|
+
dataset = EasyML::Feature.find_by(id: feature_ids.first).dataset
|
60
|
+
dataset.after_fit_features
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
private
|
65
|
+
|
66
|
+
def self.remove_remaining_batch_jobs(batch_id)
|
67
|
+
# Remove all remaining jobs in the batch
|
68
|
+
while (jobs = Resque.peek(:easy_ml, 0, 1000)).any?
|
69
|
+
jobs.each do |job|
|
70
|
+
if job["args"][0] == batch_id
|
71
|
+
Resque.dequeue(self, *job["args"])
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
# Break if we've processed all jobs (no more jobs match our batch_id)
|
76
|
+
break unless jobs.any? { |job| job["args"][0] == batch_id }
|
77
|
+
end
|
17
78
|
end
|
18
79
|
end
|
19
80
|
end
|
81
|
+
|
82
|
+
# If any feature fails, the entire batch fails
|
83
|
+
# If any feature fails, the RELATED batches should fail
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module EasyML
|
2
|
+
module DataframeSerialization
|
3
|
+
extend ActiveSupport::Concern
|
4
|
+
|
5
|
+
def serialize_dataframe(df)
|
6
|
+
return unless df
|
7
|
+
JSON.parse(df.write_json)
|
8
|
+
end
|
9
|
+
|
10
|
+
def deserialize_dataframe(df_data)
|
11
|
+
return unless df_data.present? && df_data.key?("columns")
|
12
|
+
|
13
|
+
columns = df_data["columns"].map do |col|
|
14
|
+
dtype = case col["datatype"]
|
15
|
+
when Hash
|
16
|
+
if col["datatype"]["Datetime"]
|
17
|
+
Polars::Datetime.new(col["datatype"]["Datetime"][0].downcase.to_sym).class
|
18
|
+
else
|
19
|
+
Polars::Utf8
|
20
|
+
end
|
21
|
+
else
|
22
|
+
Polars.const_get(col["datatype"])
|
23
|
+
end
|
24
|
+
Polars::Series.new(col["name"], col["values"], dtype: dtype)
|
25
|
+
end
|
26
|
+
|
27
|
+
Polars::DataFrame.new(columns)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -102,13 +102,9 @@ module EasyML
|
|
102
102
|
end
|
103
103
|
|
104
104
|
def root_dir
|
105
|
-
|
105
|
+
relative_dir = read_attribute(:root_dir) || default_root_dir
|
106
106
|
|
107
|
-
|
108
|
-
EasyML::Engine.root_dir.join(persisted).to_s
|
109
|
-
else
|
110
|
-
default_root_dir
|
111
|
-
end
|
107
|
+
EasyML::Engine.root_dir.join(relative_dir).to_s
|
112
108
|
end
|
113
109
|
|
114
110
|
def destructively_cleanup!
|
@@ -219,8 +215,11 @@ module EasyML
|
|
219
215
|
end
|
220
216
|
|
221
217
|
def after_fit_features
|
222
|
-
features.update_all(needs_fit: false, fit_at: Time.current)
|
223
218
|
unlock!
|
219
|
+
reload
|
220
|
+
return if failed?
|
221
|
+
|
222
|
+
features.update_all(needs_fit: false, fit_at: Time.current)
|
224
223
|
actually_refresh
|
225
224
|
end
|
226
225
|
|
@@ -281,22 +280,24 @@ module EasyML
|
|
281
280
|
end
|
282
281
|
|
283
282
|
def refreshing
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
283
|
+
begin
|
284
|
+
return false if is_history_class?
|
285
|
+
unlock! unless analyzing?
|
286
|
+
|
287
|
+
lock_dataset do
|
288
|
+
update(workflow_status: "analyzing")
|
289
|
+
fully_reload
|
290
|
+
yield
|
291
|
+
ensure
|
292
|
+
unlock!
|
293
|
+
end
|
294
|
+
rescue => e
|
295
|
+
update(workflow_status: "failed")
|
296
|
+
e.backtrace.grep(/easy_ml/).each do |line|
|
297
|
+
puts line
|
298
|
+
end
|
299
|
+
raise e
|
298
300
|
end
|
299
|
-
raise e
|
300
301
|
end
|
301
302
|
|
302
303
|
def unlock!
|
@@ -30,17 +30,12 @@ module EasyML
|
|
30
30
|
self.table_name = "easy_ml_dataset_histories"
|
31
31
|
include Historiographer::History
|
32
32
|
|
33
|
-
has_many :columns,
|
34
|
-
->(dataset_history) { where(snapshot_id: dataset_history.snapshot_id) },
|
33
|
+
has_many :columns, ->(dataset_history) { where(snapshot_id: dataset_history.snapshot_id) },
|
35
34
|
class_name: "EasyML::ColumnHistory",
|
36
35
|
foreign_key: "dataset_id",
|
37
36
|
primary_key: "dataset_id",
|
38
37
|
extend: EasyML::ColumnList
|
39
38
|
|
40
|
-
def root_dir
|
41
|
-
read_attribute(:root_dir)
|
42
|
-
end
|
43
|
-
|
44
39
|
def fit
|
45
40
|
false
|
46
41
|
end
|
@@ -1,6 +1,8 @@
|
|
1
1
|
module EasyML
|
2
2
|
module Datasources
|
3
3
|
class PolarsDatasource < BaseDatasource
|
4
|
+
include EasyML::DataframeSerialization
|
5
|
+
|
4
6
|
validates :df, presence: true
|
5
7
|
add_configuration_attributes :df
|
6
8
|
|
@@ -58,7 +60,7 @@ module EasyML
|
|
58
60
|
return unless df
|
59
61
|
|
60
62
|
datasource.configuration = (datasource.configuration || {}).merge(
|
61
|
-
"df" =>
|
63
|
+
"df" => serialize_dataframe(df),
|
62
64
|
)
|
63
65
|
end
|
64
66
|
|
@@ -66,23 +68,7 @@ module EasyML
|
|
66
68
|
return unless datasource.configuration&.key?("df")
|
67
69
|
|
68
70
|
df_data = datasource.configuration["df"]
|
69
|
-
|
70
|
-
|
71
|
-
columns = df_data["columns"].map do |col|
|
72
|
-
dtype = case col["datatype"]
|
73
|
-
when Hash
|
74
|
-
if col["datatype"]["Datetime"]
|
75
|
-
Polars::Datetime.new(col["datatype"]["Datetime"][0].downcase.to_sym).class
|
76
|
-
else
|
77
|
-
Polars::Utf8
|
78
|
-
end
|
79
|
-
else
|
80
|
-
Polars.const_get(col["datatype"])
|
81
|
-
end
|
82
|
-
Polars::Series.new(col["name"], col["values"], dtype: dtype)
|
83
|
-
end
|
84
|
-
|
85
|
-
datasource.df = Polars::DataFrame.new(columns)
|
71
|
+
datasource.df = deserialize_dataframe(df_data)
|
86
72
|
end
|
87
73
|
end
|
88
74
|
end
|
data/app/models/easy_ml/event.rb
CHANGED
@@ -19,6 +19,7 @@ module EasyML
|
|
19
19
|
STATUSES = %w[started success failed].freeze
|
20
20
|
|
21
21
|
belongs_to :eventable, polymorphic: true, optional: true
|
22
|
+
has_one :context, dependent: :destroy, class_name: "EasyML::EventContext"
|
22
23
|
|
23
24
|
validates :name, presence: true
|
24
25
|
validates :status, presence: true, inclusion: { in: STATUSES }
|
@@ -51,8 +52,8 @@ module EasyML
|
|
51
52
|
error = e
|
52
53
|
end
|
53
54
|
end
|
54
|
-
create_event(model, "failed", error)
|
55
55
|
Rails.logger.error("#{self.class.name} failed: #{error.message}")
|
56
|
+
create_event(model, "failed", error)
|
56
57
|
end
|
57
58
|
|
58
59
|
def self.format_stacktrace(error)
|
@@ -0,0 +1,58 @@
|
|
1
|
+
# == Schema Information
|
2
|
+
#
|
3
|
+
# Table name: easy_ml_event_contexts
|
4
|
+
#
|
5
|
+
# id :bigint not null, primary key
|
6
|
+
# event_id :bigint not null
|
7
|
+
# context :jsonb not null
|
8
|
+
# created_at :datetime not null
|
9
|
+
# updated_at :datetime not null
|
10
|
+
#
|
11
|
+
module EasyML
|
12
|
+
class EventContext < ActiveRecord::Base
|
13
|
+
include EasyML::DataframeSerialization
|
14
|
+
|
15
|
+
self.table_name = "easy_ml_event_contexts"
|
16
|
+
|
17
|
+
belongs_to :event
|
18
|
+
|
19
|
+
validates :context, presence: true
|
20
|
+
validates :event, presence: true
|
21
|
+
|
22
|
+
def context=(new_context)
|
23
|
+
write_attribute(:context, serialize_context(new_context))
|
24
|
+
@context = new_context
|
25
|
+
end
|
26
|
+
|
27
|
+
def context
|
28
|
+
@context ||= deserialize_context(read_attribute(:context))
|
29
|
+
end
|
30
|
+
|
31
|
+
private
|
32
|
+
|
33
|
+
def serialize_context(new_context)
|
34
|
+
case new_context
|
35
|
+
when Hash
|
36
|
+
self.format = :json
|
37
|
+
new_context.to_json
|
38
|
+
when YAML
|
39
|
+
self.format = :yaml
|
40
|
+
new_context.to_yaml
|
41
|
+
when Polars::DataFrame
|
42
|
+
self.format = :dataframe
|
43
|
+
serialize_dataframe(new_context)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def deserialize_context(context)
|
48
|
+
case format.to_sym
|
49
|
+
when :json
|
50
|
+
JSON.parse(context)
|
51
|
+
when :yaml
|
52
|
+
YAML.safe_load(context)
|
53
|
+
when :dataframe
|
54
|
+
deserialize_dataframe(context)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|