easy_ml 0.2.0.pre.rc76 → 0.2.0.pre.rc78

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. checksums.yaml +4 -4
  2. data/app/controllers/easy_ml/models_controller.rb +3 -2
  3. data/app/frontend/components/ModelForm.tsx +16 -0
  4. data/app/frontend/components/ScheduleModal.tsx +0 -2
  5. data/app/frontend/components/dataset/PreprocessingConfig.tsx +7 -6
  6. data/app/jobs/easy_ml/application_job.rb +1 -0
  7. data/app/jobs/easy_ml/batch_job.rb +47 -6
  8. data/app/jobs/easy_ml/compute_feature_job.rb +10 -10
  9. data/app/jobs/easy_ml/reaper.rb +14 -10
  10. data/app/jobs/easy_ml/refresh_dataset_job.rb +2 -0
  11. data/app/jobs/easy_ml/sync_datasource_job.rb +1 -0
  12. data/app/models/concerns/easy_ml/dataframe_serialization.rb +1 -17
  13. data/app/models/easy_ml/column/imputers/base.rb +1 -1
  14. data/app/models/easy_ml/column/imputers/ordinal_encoder.rb +1 -5
  15. data/app/models/easy_ml/column/imputers/today.rb +1 -1
  16. data/app/models/easy_ml/column/selector.rb +0 -8
  17. data/app/models/easy_ml/column.rb +1 -1
  18. data/app/models/easy_ml/dataset/learner/base.rb +2 -2
  19. data/app/models/easy_ml/dataset/learner/eager.rb +3 -1
  20. data/app/models/easy_ml/dataset/learner/lazy.rb +4 -1
  21. data/app/models/easy_ml/dataset/refresh_reasons.rb +12 -0
  22. data/app/models/easy_ml/dataset.rb +29 -76
  23. data/app/models/easy_ml/datasource.rb +0 -6
  24. data/app/models/easy_ml/feature.rb +27 -38
  25. data/app/models/easy_ml/model.rb +20 -2
  26. data/app/models/easy_ml/models/xgboost/evals_callback.rb +3 -2
  27. data/app/models/easy_ml/models/xgboost.rb +52 -36
  28. data/app/models/easy_ml/retraining_run.rb +1 -1
  29. data/app/serializers/easy_ml/dataset_serializer.rb +1 -1
  30. data/app/serializers/easy_ml/model_serializer.rb +1 -0
  31. data/lib/easy_ml/core/tuner.rb +7 -4
  32. data/lib/easy_ml/data/dataset_manager/normalizer.rb +0 -0
  33. data/lib/easy_ml/data/dataset_manager/reader/base.rb +80 -0
  34. data/lib/easy_ml/data/dataset_manager/reader/batch.rb +106 -0
  35. data/lib/easy_ml/data/dataset_manager/reader/data_frame.rb +23 -0
  36. data/lib/easy_ml/data/dataset_manager/reader/file.rb +75 -0
  37. data/lib/easy_ml/data/dataset_manager/reader.rb +58 -0
  38. data/lib/easy_ml/data/dataset_manager/writer/append_only.rb +67 -0
  39. data/lib/easy_ml/data/dataset_manager/writer/base.rb +139 -0
  40. data/lib/easy_ml/data/dataset_manager/writer/named.rb +14 -0
  41. data/lib/easy_ml/data/dataset_manager/writer/partitioned/partition_reasons.rb +15 -0
  42. data/lib/easy_ml/data/dataset_manager/writer/partitioned.rb +150 -0
  43. data/lib/easy_ml/data/dataset_manager/writer.rb +80 -0
  44. data/lib/easy_ml/data/dataset_manager.rb +140 -0
  45. data/lib/easy_ml/data/partition/boundaries.rb +60 -0
  46. data/lib/easy_ml/data/partition.rb +7 -0
  47. data/lib/easy_ml/data/polars_column.rb +19 -5
  48. data/lib/easy_ml/data/synced_directory.rb +1 -2
  49. data/lib/easy_ml/data.rb +2 -0
  50. data/lib/easy_ml/engine.rb +16 -14
  51. data/lib/easy_ml/feature_store.rb +21 -188
  52. data/lib/easy_ml/reasons.rb +41 -0
  53. data/lib/easy_ml/support/lockable.rb +1 -5
  54. data/lib/easy_ml/version.rb +1 -1
  55. data/lib/easy_ml.rb +1 -1
  56. data/public/easy_ml/assets/.vite/manifest.json +1 -1
  57. data/public/easy_ml/assets/assets/entrypoints/Application.tsx-Bbf3mD_b.js +522 -0
  58. data/public/easy_ml/assets/assets/entrypoints/{Application.tsx-B1qLZuyu.js.map → Application.tsx-Bbf3mD_b.js.map} +1 -1
  59. metadata +24 -9
  60. data/app/models/easy_ml/datasources/polars_datasource.rb +0 -69
  61. data/lib/easy_ml/data/filter_extensions.rb +0 -31
  62. data/public/easy_ml/assets/assets/entrypoints/Application.tsx-B1qLZuyu.js +0 -522
  63. /data/app/models/{lineage_history.rb → easy_ml/lineage_history.rb} +0 -0
@@ -0,0 +1,58 @@
1
+ module EasyML
2
+ module Data
3
+ class DatasetManager
4
+ class Reader
5
+ require_relative "reader/base"
6
+ require_relative "reader/file"
7
+ require_relative "reader/data_frame"
8
+
9
+ ADAPTERS = [
10
+ File,
11
+ DataFrame,
12
+ ]
13
+
14
+ def self.query(input, **kwargs, &block)
15
+ adapter(input).new(
16
+ kwargs.merge!(input: input), &block
17
+ ).query
18
+ end
19
+
20
+ def self.schema(input, **kwargs, &block)
21
+ adapter(input).new(
22
+ kwargs.merge!(input: input), &block
23
+ ).schema
24
+ end
25
+
26
+ def self.files(dir)
27
+ Dir.glob(::File.join(dir, "**/*.{parquet}"))
28
+ end
29
+
30
+ def self.sha
31
+ files = sha.sort
32
+
33
+ file_hashes = files.map do |file|
34
+ meta = Polars.read_parquet_schema(file)
35
+ row_count = Polars.scan_parquet(file).select(Polars.col("*").count).collect[0, 0]
36
+
37
+ Digest::SHA256.hexdigest([
38
+ meta.to_json,
39
+ row_count.to_s,
40
+ ].join("|"))
41
+ end
42
+
43
+ Digest::SHA256.hexdigest(file_hashes.join)
44
+ end
45
+
46
+ private
47
+
48
+ def self.adapter(input)
49
+ if input.is_a?(Polars::DataFrame) || input.is_a?(Polars::LazyFrame)
50
+ DataFrame
51
+ else
52
+ File
53
+ end
54
+ end
55
+ end
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,67 @@
1
+ module EasyML
2
+ module Data
3
+ class DatasetManager
4
+ class Writer
5
+ class AppendOnly < Base
6
+ attr_accessor :primary_key
7
+
8
+ def initialize(options)
9
+ super
10
+ @primary_key = options.dig(:primary_key)
11
+ raise "primary_key required for append_only writer" if primary_key.nil?
12
+ raise "filenames required: specify the prefix to uuse for unique new files" unless filenames.present?
13
+ end
14
+
15
+ def store
16
+ # If there are no existing files, just store as normal
17
+ return super if files.empty?
18
+
19
+ # Get existing data lazily
20
+ existing_keys = query(lazy: true)
21
+ .select(primary_key)
22
+ .collect[primary_key]
23
+ .to_a
24
+
25
+ # Convert input to lazy if it isn't already
26
+ input_data = df.is_a?(Polars::LazyFrame) ? df : df.lazy
27
+
28
+ # Filter out records that already exist
29
+ new_records = input_data.filter(
30
+ Polars.col(primary_key).is_in(existing_keys).not_
31
+ )
32
+
33
+ # If we have new records, store them
34
+ if new_records.clone.select(Polars.length).collect[0, 0] > 0
35
+ @df = new_records
36
+ store_to_unique_file
37
+ end
38
+ end
39
+
40
+ def compact
41
+ files = self.files
42
+ return if files.empty?
43
+
44
+ clear_unique_id
45
+
46
+ # Mv existing compacted parquet to a temp file, so it doesn't conflict with write,
47
+ # but can still be queried
48
+ compacted_file = File.join(root_dir, "compacted.parquet")
49
+ if File.exist?(compacted_file)
50
+ tmp_file = File.join(root_dir, "compacted.orig.parquet")
51
+ FileUtils.mv(compacted_file, tmp_file)
52
+ end
53
+ files = self.files
54
+
55
+ compacted_file.tap do |target_file|
56
+ compacted_data = query(lazy: true).sort(primary_key)
57
+
58
+ safe_write(compacted_data, target_file)
59
+ FileUtils.rm(files)
60
+ clear_unique_id
61
+ end
62
+ end
63
+ end
64
+ end
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,139 @@
1
+ module EasyML
2
+ module Data
3
+ class DatasetManager
4
+ class Writer
5
+ class Base
6
+ attr_accessor :filenames, :root_dir, :options, :append_only, :df
7
+
8
+ def initialize(options)
9
+ @root_dir = options.dig(:root_dir)
10
+ @filenames = options.dig(:filenames)
11
+ @append_only = options.dig(:append_only)
12
+ @options = options
13
+ @df = options.dig(:df)
14
+ end
15
+
16
+ def wipe
17
+ clear_unique_id
18
+ FileUtils.rm_rf(root_dir)
19
+ end
20
+
21
+ def store
22
+ store_to_unique_file
23
+ end
24
+
25
+ def compact
26
+ files = self.files
27
+
28
+ clear_unique_id
29
+ File.join(root_dir, "compacted.parquet").tap do |target_file|
30
+ safe_write(
31
+ query(lazy: true),
32
+ target_file
33
+ )
34
+ FileUtils.rm(files)
35
+ end
36
+ clear_unique_id
37
+ end
38
+
39
+ def unlock!
40
+ clear_all_keys
41
+ end
42
+
43
+ private
44
+
45
+ def files
46
+ DatasetManager.new(options).files
47
+ end
48
+
49
+ def query(**kwargs, &block)
50
+ DatasetManager.new(options).query(root_dir, **kwargs, &block)
51
+ end
52
+
53
+ def store_to_unique_file(subdir: nil)
54
+ safe_write(df, unique_path(subdir: subdir))
55
+ end
56
+
57
+ def acquire_lock(key, &block)
58
+ Support::Lockable.with_lock("#{key}:lock", wait_timeout: 2, &block)
59
+ end
60
+
61
+ def unique_path(subdir: nil)
62
+ filename = [filenames, unique_id(subdir: subdir), "parquet"].compact.join(".")
63
+
64
+ File.join(root_dir, subdir.to_s, filename)
65
+ end
66
+
67
+ def safe_write(df, path)
68
+ FileUtils.mkdir_p(File.dirname(path))
69
+ df.is_a?(Polars::LazyFrame) ? df.sink_parquet(path) : df.write_parquet(path)
70
+ path
71
+ end
72
+
73
+ def clear_all_keys
74
+ list_keys.each { |key| unlock_file(key) }
75
+ end
76
+
77
+ def unlock_file(key)
78
+ acquire_lock(key) do |suo|
79
+ suo.client.del(key)
80
+ end
81
+ end
82
+
83
+ def clear_unique_id(subdir: nil)
84
+ key = unique_id_key(subdir: subdir)
85
+ acquire_lock(key) do |suo|
86
+ suo.client.del(key)
87
+ end
88
+ end
89
+
90
+ def unique_id_key(subdir: nil)
91
+ File.join("dataset_managers", root_dir, subdir.to_s, "sequence")
92
+ end
93
+
94
+ def add_key(key)
95
+ keylist = unique_id_key(subdir: "keylist")
96
+
97
+ acquire_lock(keylist) do |suo|
98
+ suo.client.sadd(keylist, key)
99
+ end
100
+ end
101
+
102
+ def list_keys
103
+ keylist = unique_id_key(subdir: "keylist")
104
+
105
+ acquire_lock(keylist) do |suo|
106
+ if suo.client.type(keylist) == "set"
107
+ suo.client.smembers(keylist)
108
+ else
109
+ suo.client.del(keylist)
110
+ []
111
+ end
112
+ end
113
+ end
114
+
115
+ def key_exists?(key)
116
+ keylist = unique_id_key(subdir: "keylist")
117
+
118
+ acquire_lock(keylist) do |suo|
119
+ suo.client.sismember(keylist, key)
120
+ end
121
+ end
122
+
123
+ def unique_id(subdir: nil)
124
+ key = unique_id_key(subdir: subdir)
125
+ add_key(key)
126
+
127
+ acquire_lock(key) do |suo|
128
+ redis = suo.client
129
+
130
+ seq = (redis.get(key) || "0").to_i
131
+ redis.set(key, (seq + 1).to_s)
132
+ seq + 1
133
+ end
134
+ end
135
+ end
136
+ end
137
+ end
138
+ end
139
+ end
@@ -0,0 +1,14 @@
1
+ module EasyML
2
+ module Data
3
+ class DatasetManager
4
+ class Writer
5
+ class Named < Base
6
+ def store(name)
7
+ clear_unique_id(subdir: name)
8
+ store_to_unique_file(subdir: name)
9
+ end
10
+ end
11
+ end
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,15 @@
1
+ module EasyML
2
+ module Data
3
+ class DatasetManager
4
+ class Writer
5
+ class Partitioned < Base
6
+ class PartitionReasons < EasyML::Reasons
7
+ add_reason "Missing primary key", -> { primary_key.nil? }
8
+ add_reason "Df does not contain primary key", -> { df.columns.exclude?(primary_key) }
9
+ add_reason "Primary key is not numeric", -> { !numeric_primary_key? }
10
+ end
11
+ end
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,150 @@
1
+ module EasyML
2
+ module Data
3
+ class DatasetManager
4
+ class Writer
5
+ class Partitioned < Base
6
+ require_relative "partitioned/partition_reasons"
7
+
8
+ attr_accessor :partition_size, :partition, :primary_key, :df
9
+
10
+ def initialize(options)
11
+ super
12
+ @partition_size = options.dig(:partition_size)
13
+ @partition = options.dig(:partition)
14
+ @primary_key = options.dig(:primary_key)
15
+
16
+ raise "filenames required: specify the prefix to use for unique new files" unless filenames.present?
17
+ end
18
+
19
+ def wipe
20
+ partitions.each do |partition|
21
+ FileUtils.rm_rf(File.join(root_dir, partition))
22
+ end
23
+ clear_all_keys
24
+ end
25
+
26
+ def store
27
+ unless can_partition?
28
+ puts cannot_partition_reasons.explain
29
+ return Base.new(options).store
30
+ end
31
+
32
+ store_each_partition
33
+ end
34
+
35
+ def compact
36
+ files = self.files
37
+ @df = query(lazy: true)
38
+
39
+ clear_unique_id(subdir: "compacted")
40
+ compact_each_partition.tap do
41
+ FileUtils.rm(files)
42
+ clear_unique_id
43
+ end
44
+ end
45
+
46
+ private
47
+
48
+ def partitions
49
+ Dir.glob(File.join(root_dir, "**/*")).map { |f| f.split("/").last }
50
+ end
51
+
52
+ def compact_each_partition
53
+ with_each_partition do |partition_df, _|
54
+ safe_write(
55
+ partition_df.sort(Polars.col(primary_key)),
56
+ unique_path(subdir: "compacted")
57
+ )
58
+ end
59
+ end
60
+
61
+ def with_each_partition(&block)
62
+ partition_boundaries.map do |partition|
63
+ partition_start = partition[:partition_start]
64
+ partition_end = partition[:partition_end]
65
+ partition_df = df.filter(Polars.col(primary_key).is_between(partition_start, partition_end))
66
+ num_rows = lazy? ? partition_df.select(Polars.length).collect[0, 0] : partition_df.shape[0]
67
+
68
+ binding.pry if num_rows == 0
69
+ next if num_rows == 0
70
+ yield partition_df, partition
71
+ end
72
+ end
73
+
74
+ def store_each_partition
75
+ with_each_partition do |partition_df, partition|
76
+ safe_write(
77
+ partition_df,
78
+ unique_path(subdir: partition[:partition])
79
+ )
80
+ end
81
+ end
82
+
83
+ def partition_boundaries
84
+ EasyML::Data::Partition::Boundaries.new(df, primary_key, partition_size).to_a
85
+ end
86
+
87
+ def cannot_partition_reasons
88
+ @cannot_partition_reasons ||= PartitionReasons.new(self)
89
+ end
90
+
91
+ def can_partition?
92
+ @partitioned ||= cannot_partition_reasons.none?
93
+ end
94
+
95
+ def lazy?
96
+ df.is_a?(Polars::LazyFrame)
97
+ end
98
+
99
+ def cast_primary_key
100
+ case dtype_primary_key
101
+ when Polars::Categorical
102
+ Polars.col(primary_key).cast(Polars::String)
103
+ else
104
+ Polars.col(primary_key)
105
+ end
106
+ end
107
+
108
+ def dtype_primary_key
109
+ @dtype_primary_key ||= schema[primary_key]
110
+ end
111
+
112
+ def schema
113
+ @schema ||= df.schema
114
+ end
115
+
116
+ def min_key
117
+ return @min_key if @min_key
118
+
119
+ if lazy?
120
+ @min_key = df.select(cast_primary_key).min.collect.to_a[0].dig(primary_key)
121
+ else
122
+ @min_key = df[primary_key].min
123
+ end
124
+ end
125
+
126
+ def max_key
127
+ return @max_key if @max_key
128
+
129
+ if lazy?
130
+ @max_key = df.select(cast_primary_key).max.collect.to_a[0].dig(primary_key)
131
+ else
132
+ @max_key = df[primary_key].max
133
+ end
134
+ end
135
+
136
+ def numeric_primary_key?
137
+ begin
138
+ # We are intentionally not using to_i, so it will raise an error for keys like "A1"
139
+ min = min_key.is_a?(String) ? Integer(min_key) : min_key
140
+ max = max_key.is_a?(String) ? Integer(max_key) : max_key
141
+ min.is_a?(Integer) && max.is_a?(Integer)
142
+ rescue ArgumentError
143
+ false
144
+ end
145
+ end
146
+ end
147
+ end
148
+ end
149
+ end
150
+ end
@@ -0,0 +1,80 @@
1
+ module EasyML
2
+ module Data
3
+ class DatasetManager
4
+ class Writer
5
+ require_relative "writer/base"
6
+ require_relative "writer/partitioned"
7
+ require_relative "writer/append_only"
8
+ require_relative "writer/named"
9
+
10
+ ADAPTERS = [
11
+ Base,
12
+ Partitioned,
13
+ AppendOnly,
14
+ Named,
15
+ ]
16
+
17
+ attr_accessor :filenames, :root_dir, :partition,
18
+ :primary_key, :options, :append_only, :named
19
+
20
+ def initialize(options)
21
+ @root_dir = options.dig(:root_dir)
22
+ @filenames = options.dig(:filenames)
23
+ @partition = options.dig(:partition) || (options.dig(:partition_size).present? && options.dig(:primary_key).present?)
24
+ @append_only = options.dig(:append_only)
25
+ @primary_key = options.dig(:primary_key)
26
+ @named = options.dig(:named) || false
27
+ @options = options
28
+ end
29
+
30
+ def unlock!
31
+ adapter_class.new(options).unlock!
32
+ end
33
+
34
+ def store(df, *args)
35
+ adapter_class.new(options.merge!(df: df)).store(*args)
36
+ end
37
+
38
+ def wipe
39
+ adapter_class.new(options).wipe
40
+ end
41
+
42
+ def compact
43
+ adapter_class.new(options).compact
44
+ end
45
+
46
+ def inspect
47
+ keys = %w(root_dir append_only partition primary_key)
48
+ attrs = keys.map { |k| "#{k}=#{send(k)}" unless send(k).nil? }.compact
49
+ "#<#{self.class.name} #{attrs.join(" ")}>"
50
+ end
51
+
52
+ private
53
+
54
+ def adapter_class
55
+ if partition?
56
+ Partitioned
57
+ elsif append_only?
58
+ AppendOnly
59
+ elsif named?
60
+ Named
61
+ else
62
+ Base
63
+ end
64
+ end
65
+
66
+ def named?
67
+ @named
68
+ end
69
+
70
+ def partition?
71
+ @partition
72
+ end
73
+
74
+ def append_only?
75
+ @append_only
76
+ end
77
+ end
78
+ end
79
+ end
80
+ end
@@ -0,0 +1,140 @@
1
+ module EasyML
2
+ module Data
3
+ class DatasetManager
4
+ require_relative "dataset_manager/writer"
5
+ require_relative "dataset_manager/reader"
6
+
7
+ attr_accessor :root_dir, :partition, :append_only, :filenames, :primary_key,
8
+ :partition_size, :s3_bucket, :s3_prefix, :s3_access_key_id,
9
+ :s3_secret_access_key, :polars_args, :source_of_truth,
10
+ :options
11
+
12
+ def initialize(options = {})
13
+ @root_dir = options.dig(:root_dir)
14
+ @partition = options.dig(:partition) || (options.dig(:partition_size).present? && options.dig(:primary_key).present?)
15
+ @append_only = options.dig(:append_only) || false
16
+ @filenames = options.dig(:filenames) || "file"
17
+ @primary_key = options.dig(:primary_key)
18
+ @partition_size = options.dig(:partition_size) || nil
19
+ @s3_bucket = options.dig(:s3_bucket) || EasyML::Configuration.s3_bucket
20
+ @s3_prefix = options.dig(:s3_prefix) || nil
21
+ @s3_access_key_id = options.dig(:s3_access_key_id) || EasyML::Configuration.s3_access_key_id
22
+ @s3_secret_access_key = options.dig(:s3_secret_access_key) || EasyML::Configuration.s3_secret_access_key
23
+ @polars_args = options.dig(:polars_args) || {}
24
+ @source_of_truth = options.dig(:source_of_truth) || :local
25
+ @options = options
26
+
27
+ raise "primary_key required: how should we divide partitions?" if partition && primary_key.nil?
28
+ raise "partition_size required: specify number of rows in each partition" if partition && partition_size.nil?
29
+ raise "root_dir required: specify the root_dir of the dataset" unless root_dir.present?
30
+ end
31
+
32
+ def inspect
33
+ keys = %w(root append_only partition primary_key)
34
+ attrs = keys.map { |k| "#{k}=#{send(k)}" unless send(k).nil? }.compact
35
+ "#<#{self.class.name} #{attrs.join("\n\t")}>"
36
+ end
37
+
38
+ class << self
39
+ def query(input = nil, **kwargs, &block)
40
+ Reader.query(input, **kwargs, &block)
41
+ end
42
+
43
+ def schema(input = nil, **kwargs, &block)
44
+ Reader.schema(input, **kwargs, &block)
45
+ end
46
+
47
+ def num_rows
48
+ Reader.num_rows
49
+ end
50
+ end
51
+
52
+ def num_rows
53
+ Reader.num_rows(root_dir)
54
+ end
55
+
56
+ def query(input = nil, **kwargs, &block)
57
+ input = root_dir if input.nil?
58
+ DatasetManager.query(input, **kwargs, &block)
59
+ end
60
+
61
+ def schema(input = nil, **kwargs, &block)
62
+ input = root_dir if input.nil?
63
+ DatasetManager.schema(input, **kwargs, &block)
64
+ end
65
+
66
+ def sha
67
+ Reader.sha(root_dir)
68
+ end
69
+
70
+ # Transform CSV files into Parquet files, of all the same datatype.
71
+ # Learn datatypes of columns and store schema.
72
+ def normalize
73
+ Normalizer.normalize(root_dir)
74
+ end
75
+
76
+ def data
77
+ query
78
+ end
79
+
80
+ def unlock!
81
+ writer.unlock!
82
+ end
83
+
84
+ def compact
85
+ writer.compact
86
+ end
87
+
88
+ def store(df, *args)
89
+ writer.store(df, *args)
90
+ end
91
+
92
+ def cp(from, to)
93
+ writer.cp(from, to)
94
+ end
95
+
96
+ def empty?
97
+ files.empty? || query(limit: 1).empty?
98
+ end
99
+
100
+ def files
101
+ Reader.files(root_dir)
102
+ end
103
+
104
+ def wipe
105
+ writer.wipe
106
+ end
107
+
108
+ def upload
109
+ synced_directory.upload
110
+ end
111
+
112
+ def download
113
+ synced_directory.download
114
+ end
115
+
116
+ private
117
+
118
+ def root
119
+ root_dir.gsub(/^#{Rails.root.to_s}/, "")
120
+ end
121
+
122
+ def writer
123
+ Writer.new(options)
124
+ end
125
+
126
+ def synced_directory
127
+ @synced_dir ||= EasyML::Data::SyncedDirectory.new(
128
+ root_dir: root_dir,
129
+ source_of_truth: source_of_truth,
130
+ s3_bucket: s3_bucket,
131
+ s3_prefix: s3_prefix,
132
+ s3_access_key_id: s3_access_key_id,
133
+ s3_secret_access_key: s3_secret_access_key,
134
+ polars_args: polars_args,
135
+ cache_for: 0,
136
+ )
137
+ end
138
+ end
139
+ end
140
+ end