easy_ml 0.2.0.pre.rc100 → 0.2.0.pre.rc101
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/config/initializers/zhong.rb +6 -0
- data/lib/easy_ml/data/polars_column.rb +1 -0
- data/lib/easy_ml/data/synced_directory.rb +36 -23
- data/lib/easy_ml/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7f48937aea567de8e40bc34486c4ac945b860ca26654d8d3b06efa1c1d4a54f3
|
4
|
+
data.tar.gz: 1abb8bb2e3f3ba8bb9c228d7a9691e8906ababa523ad0d7155cdafbd3ec62396
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ef28fcb989d2934329e4da3c9a138d3fc7b4c9ae995d7ce021217f4507e24b17664d2dee9690b7a00105e804b41f490442e2b90f1d76f8c12d7ddca768ae43ba
|
7
|
+
data.tar.gz: 3ba6f95ca3a660540e81a49c5eba84f530b606d89dc499e05aa288d26b90802dfc74b0b7615360002f1f36c1255f41ea6871378ffa1e93abc147b4f2a5c6ab0c
|
@@ -8,6 +8,12 @@ if %w[zhong:start].include?(ARGV.first)
|
|
8
8
|
EasyML::CleanJob.perform_later
|
9
9
|
end
|
10
10
|
|
11
|
+
every 1.day, "refresh datasets" do
|
12
|
+
EasyML::Dataset.all.each do |dataset|
|
13
|
+
dataset.refresh_async
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
11
17
|
every 1.hour, "schedule retraining" do
|
12
18
|
EasyML::ScheduleRetrainingJob.perform_later
|
13
19
|
end
|
@@ -126,7 +126,7 @@ module EasyML
|
|
126
126
|
)
|
127
127
|
|
128
128
|
Rails.logger.info("Downloaded #{object.key} to #{local_file_path}")
|
129
|
-
if object.key.end_with?(".gz")
|
129
|
+
if object.key.end_with?(".gz") && !object.key.end_with?(".parquet.gz")
|
130
130
|
ungzipped_file_path = ungzip_file(local_file_path)
|
131
131
|
Rails.logger.info("Ungzipped to #{ungzipped_file_path}")
|
132
132
|
end
|
@@ -284,48 +284,61 @@ module EasyML
|
|
284
284
|
relative_path = Pathname.new(file_path).relative_path_from(Pathname.new(root_dir)).to_s
|
285
285
|
s3_key = s3_prefix.present? ? File.join(s3_prefix, File.basename(relative_path)) : relative_path
|
286
286
|
|
287
|
-
# Create a temporary gzipped version of the file
|
288
|
-
gzipped_file_path = "#{file_path}.gz"
|
289
|
-
|
290
287
|
begin
|
291
|
-
Rails.logger.info("
|
288
|
+
Rails.logger.info("Uploading #{file_path} to s3://#{s3_bucket}/#{s3_key}")
|
292
289
|
|
293
|
-
|
294
|
-
|
290
|
+
if file_path.end_with?(".parquet")
|
291
|
+
# Upload parquet files directly without compression
|
295
292
|
File.open(file_path, "rb") do |file|
|
296
|
-
|
293
|
+
s3.put_object(
|
294
|
+
bucket: s3_bucket,
|
295
|
+
key: s3_key,
|
296
|
+
body: file
|
297
|
+
)
|
298
|
+
end
|
299
|
+
Rails.logger.info("Successfully uploaded #{file_path} to s3://#{s3_bucket}/#{s3_key}")
|
300
|
+
else
|
301
|
+
# Create a temporary gzipped version of the file
|
302
|
+
gzipped_file_path = "#{file_path}.gz"
|
303
|
+
|
304
|
+
# Compress the file
|
305
|
+
Zlib::GzipWriter.open(gzipped_file_path) do |gz|
|
306
|
+
File.open(file_path, "rb") do |file|
|
307
|
+
gz.write(file.read)
|
308
|
+
end
|
297
309
|
end
|
298
|
-
end
|
299
310
|
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
311
|
+
# Upload the gzipped file
|
312
|
+
File.open(gzipped_file_path, "rb") do |file|
|
313
|
+
s3.put_object(
|
314
|
+
bucket: s3_bucket,
|
315
|
+
key: "#{s3_key}.gz",
|
316
|
+
body: file,
|
317
|
+
content_encoding: "gzip",
|
318
|
+
)
|
319
|
+
end
|
320
|
+
|
321
|
+
Rails.logger.info("Successfully uploaded #{file_path} to s3://#{s3_bucket}/#{s3_key}.gz")
|
309
322
|
|
310
|
-
|
323
|
+
# Clean up temporary gzipped file
|
324
|
+
File.delete(gzipped_file_path) if File.exist?(gzipped_file_path)
|
325
|
+
end
|
311
326
|
rescue Aws::S3::Errors::ServiceError, StandardError => e
|
312
327
|
Rails.logger.error("Failed to upload #{file_path}: #{e.message}")
|
313
328
|
raise e
|
314
|
-
ensure
|
315
|
-
# Clean up temporary gzipped file
|
316
|
-
File.delete(gzipped_file_path) if File.exist?(gzipped_file_path)
|
317
329
|
end
|
318
330
|
end
|
319
331
|
|
320
332
|
def should_upload?(file_path)
|
321
333
|
relative_path = Pathname.new(file_path).relative_path_from(Pathname.new(root_dir)).to_s
|
322
334
|
s3_key = s3_prefix.present? ? File.join(s3_prefix, relative_path) : relative_path
|
335
|
+
s3_key = "#{s3_key}.gz" unless file_path.end_with?(".parquet")
|
323
336
|
|
324
337
|
begin
|
325
338
|
# Check if file exists in S3
|
326
339
|
response = s3.head_object(
|
327
340
|
bucket: s3_bucket,
|
328
|
-
key:
|
341
|
+
key: s3_key,
|
329
342
|
)
|
330
343
|
|
331
344
|
# Compare modification times
|
data/lib/easy_ml/version.rb
CHANGED
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: easy_ml
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.0.pre.
|
4
|
+
version: 0.2.0.pre.rc101
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Brett Shollenberger
|
8
8
|
bindir: exe
|
9
9
|
cert_chain: []
|
10
|
-
date: 2025-03-
|
10
|
+
date: 2025-03-10 00:00:00.000000000 Z
|
11
11
|
dependencies:
|
12
12
|
- !ruby/object:Gem::Dependency
|
13
13
|
name: activerecord
|