easy_ml 0.2.0.pre.rc76 → 0.2.0.pre.rc78
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/app/controllers/easy_ml/models_controller.rb +3 -2
- data/app/frontend/components/ModelForm.tsx +16 -0
- data/app/frontend/components/ScheduleModal.tsx +0 -2
- data/app/frontend/components/dataset/PreprocessingConfig.tsx +7 -6
- data/app/jobs/easy_ml/application_job.rb +1 -0
- data/app/jobs/easy_ml/batch_job.rb +47 -6
- data/app/jobs/easy_ml/compute_feature_job.rb +10 -10
- data/app/jobs/easy_ml/reaper.rb +14 -10
- data/app/jobs/easy_ml/refresh_dataset_job.rb +2 -0
- data/app/jobs/easy_ml/sync_datasource_job.rb +1 -0
- data/app/models/concerns/easy_ml/dataframe_serialization.rb +1 -17
- data/app/models/easy_ml/column/imputers/base.rb +1 -1
- data/app/models/easy_ml/column/imputers/ordinal_encoder.rb +1 -5
- data/app/models/easy_ml/column/imputers/today.rb +1 -1
- data/app/models/easy_ml/column/selector.rb +0 -8
- data/app/models/easy_ml/column.rb +1 -1
- data/app/models/easy_ml/dataset/learner/base.rb +2 -2
- data/app/models/easy_ml/dataset/learner/eager.rb +3 -1
- data/app/models/easy_ml/dataset/learner/lazy.rb +4 -1
- data/app/models/easy_ml/dataset/refresh_reasons.rb +12 -0
- data/app/models/easy_ml/dataset.rb +29 -76
- data/app/models/easy_ml/datasource.rb +0 -6
- data/app/models/easy_ml/feature.rb +27 -38
- data/app/models/easy_ml/model.rb +20 -2
- data/app/models/easy_ml/models/xgboost/evals_callback.rb +3 -2
- data/app/models/easy_ml/models/xgboost.rb +52 -36
- data/app/models/easy_ml/retraining_run.rb +1 -1
- data/app/serializers/easy_ml/dataset_serializer.rb +1 -1
- data/app/serializers/easy_ml/model_serializer.rb +1 -0
- data/lib/easy_ml/core/tuner.rb +7 -4
- data/lib/easy_ml/data/dataset_manager/normalizer.rb +0 -0
- data/lib/easy_ml/data/dataset_manager/reader/base.rb +80 -0
- data/lib/easy_ml/data/dataset_manager/reader/batch.rb +106 -0
- data/lib/easy_ml/data/dataset_manager/reader/data_frame.rb +23 -0
- data/lib/easy_ml/data/dataset_manager/reader/file.rb +75 -0
- data/lib/easy_ml/data/dataset_manager/reader.rb +58 -0
- data/lib/easy_ml/data/dataset_manager/writer/append_only.rb +67 -0
- data/lib/easy_ml/data/dataset_manager/writer/base.rb +139 -0
- data/lib/easy_ml/data/dataset_manager/writer/named.rb +14 -0
- data/lib/easy_ml/data/dataset_manager/writer/partitioned/partition_reasons.rb +15 -0
- data/lib/easy_ml/data/dataset_manager/writer/partitioned.rb +150 -0
- data/lib/easy_ml/data/dataset_manager/writer.rb +80 -0
- data/lib/easy_ml/data/dataset_manager.rb +140 -0
- data/lib/easy_ml/data/partition/boundaries.rb +60 -0
- data/lib/easy_ml/data/partition.rb +7 -0
- data/lib/easy_ml/data/polars_column.rb +19 -5
- data/lib/easy_ml/data/synced_directory.rb +1 -2
- data/lib/easy_ml/data.rb +2 -0
- data/lib/easy_ml/engine.rb +16 -14
- data/lib/easy_ml/feature_store.rb +21 -188
- data/lib/easy_ml/reasons.rb +41 -0
- data/lib/easy_ml/support/lockable.rb +1 -5
- data/lib/easy_ml/version.rb +1 -1
- data/lib/easy_ml.rb +1 -1
- data/public/easy_ml/assets/.vite/manifest.json +1 -1
- data/public/easy_ml/assets/assets/entrypoints/Application.tsx-Bbf3mD_b.js +522 -0
- data/public/easy_ml/assets/assets/entrypoints/{Application.tsx-B1qLZuyu.js.map → Application.tsx-Bbf3mD_b.js.map} +1 -1
- metadata +24 -9
- data/app/models/easy_ml/datasources/polars_datasource.rb +0 -69
- data/lib/easy_ml/data/filter_extensions.rb +0 -31
- data/public/easy_ml/assets/assets/entrypoints/Application.tsx-B1qLZuyu.js +0 -522
- /data/app/models/{lineage_history.rb → easy_ml/lineage_history.rb} +0 -0
@@ -0,0 +1,60 @@
|
|
1
|
+
module EasyML
|
2
|
+
module Data
|
3
|
+
module Partition
|
4
|
+
class Boundaries
|
5
|
+
attr_reader :df, :primary_key, :partition_size
|
6
|
+
|
7
|
+
def initialize(df, primary_key, partition_size)
|
8
|
+
@df = df
|
9
|
+
@primary_key = primary_key.is_a?(Array) ? primary_key.first : primary_key
|
10
|
+
@partition_size = partition_size
|
11
|
+
end
|
12
|
+
|
13
|
+
def inspect
|
14
|
+
"#<#{self.class.name.split("::").last} partition_size=#{partition_size} primary_key=#{primary_key}>"
|
15
|
+
end
|
16
|
+
|
17
|
+
def boundaries
|
18
|
+
return @boundaries if @boundaries
|
19
|
+
|
20
|
+
@boundaries = df.with_columns(
|
21
|
+
Polars.col(primary_key)
|
22
|
+
.truediv(partition_size)
|
23
|
+
.floor
|
24
|
+
.add(1)
|
25
|
+
.cast(Polars::Int64)
|
26
|
+
.alias("partition")
|
27
|
+
)
|
28
|
+
@boundaries = @boundaries.with_columns(
|
29
|
+
Polars.col("partition")
|
30
|
+
.sub(1)
|
31
|
+
.mul(partition_size)
|
32
|
+
.cast(Polars::Int64)
|
33
|
+
.alias("partition_start"),
|
34
|
+
Polars.col("partition")
|
35
|
+
.mul(partition_size)
|
36
|
+
.sub(1)
|
37
|
+
.cast(Polars::Int64)
|
38
|
+
.alias("partition_end")
|
39
|
+
)
|
40
|
+
# @boundaries = @boundaries.with_columns(
|
41
|
+
# Polars.col(primary_key).is_between(Polars.col("partition_start"), Polars.col("partition_end")).select("partition")
|
42
|
+
# )
|
43
|
+
end
|
44
|
+
|
45
|
+
def to_a
|
46
|
+
is_lazy = df.is_a?(Polars::LazyFrame)
|
47
|
+
empty = is_lazy ? df.limit(1).collect.empty? : df.shape[0] == 0
|
48
|
+
return [] if empty
|
49
|
+
|
50
|
+
sorted = boundaries.select(["partition", "partition_start", "partition_end"]).unique.sort("partition")
|
51
|
+
array = (is_lazy ? sorted.collect.to_a : sorted.to_a).map(&:with_indifferent_access)
|
52
|
+
# For the last partition, set the end to the total number of rows (so we read the last row with is_between queries)
|
53
|
+
last_idx = array.size - 1
|
54
|
+
array[last_idx]["partition_end"] = is_lazy ? df.select(Polars.col(primary_key)).max.collect.to_a.first.dig(primary_key) : df[primary_key].max
|
55
|
+
array
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
@@ -124,11 +124,25 @@ module EasyML
|
|
124
124
|
# @param series [Polars::Series] The string series to analyze
|
125
125
|
# @return [Symbol] One of :datetime, :text, or :categorical
|
126
126
|
def determine_string_type(series)
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
127
|
+
# Try to parse as numeric first
|
128
|
+
begin
|
129
|
+
# Try integer first
|
130
|
+
series.cast(Polars::Int64)
|
131
|
+
return :numeric
|
132
|
+
rescue StandardError
|
133
|
+
begin
|
134
|
+
# Try float if integer fails
|
135
|
+
series.cast(Polars::Float64)
|
136
|
+
return :numeric
|
137
|
+
rescue StandardError
|
138
|
+
# If not numeric, check for datetime or categorical
|
139
|
+
if EasyML::Data::DateConverter.maybe_convert_date(Polars::DataFrame.new({ temp: series }),
|
140
|
+
:temp)[:temp].dtype.is_a?(Polars::Datetime)
|
141
|
+
:datetime
|
142
|
+
else
|
143
|
+
categorical_or_text?(series)
|
144
|
+
end
|
145
|
+
end
|
132
146
|
end
|
133
147
|
end
|
134
148
|
|
@@ -1,5 +1,3 @@
|
|
1
|
-
require_relative "polars_reader"
|
2
|
-
|
3
1
|
module EasyML
|
4
2
|
module Data
|
5
3
|
class SyncedDirectory
|
@@ -16,6 +14,7 @@ module EasyML
|
|
16
14
|
@s3_region = options.dig(:s3_region) || EasyML::Configuration.s3_region
|
17
15
|
@cache_for = options.dig(:cache_for)
|
18
16
|
@polars_args = options.dig(:polars_args)
|
17
|
+
@source_of_truth = options.dig(:source_of_truth) || :remote
|
19
18
|
end
|
20
19
|
|
21
20
|
delegate :query, :data, :all_files, :files, :sha, to: :reader
|
data/lib/easy_ml/data.rb
CHANGED
data/lib/easy_ml/engine.rb
CHANGED
@@ -55,7 +55,7 @@ module EasyML
|
|
55
55
|
Polars.enable_string_cache
|
56
56
|
end
|
57
57
|
|
58
|
-
if %w[db:migrate db:migrate:status db:setup db:drop assets:precompile].include?(ARGV.first)
|
58
|
+
if %w[db:create db:migrate db:migrate:status db:setup db:drop assets:precompile].include?(ARGV.first)
|
59
59
|
config.eager_load_paths = config.eager_load_paths.without(config.eager_load_paths.map(&:to_s).grep(/easy_ml/).map { |p| Pathname.new(p) })
|
60
60
|
else
|
61
61
|
config.after_initialize do
|
@@ -77,6 +77,21 @@ module EasyML
|
|
77
77
|
end
|
78
78
|
end
|
79
79
|
|
80
|
+
unless %w[db:create db:migrate db:migrate:status db:setup db:drop assets:precompile].include?(ARGV.first)
|
81
|
+
initializer "easy_ml.configure_secrets" do
|
82
|
+
EasyML::Configuration.configure do |config|
|
83
|
+
raise "S3_ACCESS_KEY_ID is missing. Set ENV['S3_ACCESS_KEY_ID']" unless ENV["S3_ACCESS_KEY_ID"]
|
84
|
+
raise "S3_SECRET_ACCESS_KEY is missing. Set ENV['S3_SECRET_ACCESS_KEY']" unless ENV["S3_SECRET_ACCESS_KEY"]
|
85
|
+
|
86
|
+
config.s3_access_key_id = ENV["S3_ACCESS_KEY_ID"]
|
87
|
+
config.s3_secret_access_key = ENV["S3_SECRET_ACCESS_KEY"]
|
88
|
+
config.s3_region = ENV["S3_REGION"] ? ENV["S3_REGION"] : "us-east-1"
|
89
|
+
config.timezone = ENV["TIMEZONE"].present? ? ENV["TIMEZONE"] : "America/New_York"
|
90
|
+
config.wandb_api_key = ENV["WANDB_API_KEY"] if ENV["WANDB_API_KEY"]
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
80
95
|
initializer "easy_ml.check_pending_migrations" do
|
81
96
|
if defined?(Rails::Server)
|
82
97
|
config.after_initialize do
|
@@ -96,19 +111,6 @@ module EasyML
|
|
96
111
|
end
|
97
112
|
end
|
98
113
|
|
99
|
-
initializer "easy_ml.configure_secrets" do
|
100
|
-
EasyML::Configuration.configure do |config|
|
101
|
-
raise "S3_ACCESS_KEY_ID is missing. Set ENV['S3_ACCESS_KEY_ID']" unless ENV["S3_ACCESS_KEY_ID"]
|
102
|
-
raise "S3_SECRET_ACCESS_KEY is missing. Set ENV['S3_SECRET_ACCESS_KEY']" unless ENV["S3_SECRET_ACCESS_KEY"]
|
103
|
-
|
104
|
-
config.s3_access_key_id = ENV["S3_ACCESS_KEY_ID"]
|
105
|
-
config.s3_secret_access_key = ENV["S3_SECRET_ACCESS_KEY"]
|
106
|
-
config.s3_region = ENV["S3_REGION"] if ENV["S3_REGION"]
|
107
|
-
config.timezone = ENV["TIMEZONE"].present? ? ENV["TIMEZONE"] : "America/New_York"
|
108
|
-
config.wandb_api_key = ENV["WANDB_API_KEY"] if ENV["WANDB_API_KEY"]
|
109
|
-
end
|
110
|
-
end
|
111
|
-
|
112
114
|
initializer "easy_ml.setup_generators" do |app|
|
113
115
|
generators_path = EasyML::Engine.root.join("lib/easy_ml/railtie/generators")
|
114
116
|
generators_dirs = Dir[File.join(generators_path, "**", "*.rb")]
|
@@ -1,69 +1,28 @@
|
|
1
1
|
module EasyML
|
2
|
-
class FeatureStore
|
2
|
+
class FeatureStore < EasyML::Data::DatasetManager
|
3
3
|
attr_reader :feature
|
4
4
|
|
5
5
|
def initialize(feature)
|
6
6
|
@feature = feature
|
7
|
-
end
|
8
|
-
|
9
|
-
def store(df)
|
10
|
-
primary_key = feature.primary_key&.first
|
11
|
-
return store_without_partitioning(df) unless df.columns.include?(primary_key)
|
12
|
-
return store_without_partitioning(df) unless primary_key
|
13
|
-
|
14
|
-
min_key = df[primary_key].min
|
15
|
-
max_key = df[primary_key].max
|
16
|
-
batch_size = feature.batch_size || 10_000
|
17
|
-
|
18
|
-
begin
|
19
|
-
# We are intentionally not using to_i, so it will raise an error for keys like "A1"
|
20
|
-
min_key = Integer(min_key) if min_key.is_a?(String)
|
21
|
-
max_key = Integer(max_key) if max_key.is_a?(String)
|
22
|
-
rescue ArgumentError
|
23
|
-
return store_without_partitioning(df)
|
24
|
-
end
|
25
|
-
|
26
|
-
# Only partition if we have integer keys where we can predict boundaries
|
27
|
-
return store_without_partitioning(df) unless min_key.is_a?(Integer) && max_key.is_a?(Integer)
|
28
|
-
|
29
|
-
partitions = compute_partition_boundaries(min_key, max_key, batch_size)
|
30
|
-
partitions.each do |partition_start|
|
31
|
-
partition_end = partition_start + batch_size - 1
|
32
|
-
partition_df = df.filter(
|
33
|
-
(Polars.col(primary_key) >= partition_start) &
|
34
|
-
(Polars.col(primary_key) <= partition_end)
|
35
|
-
)
|
36
|
-
|
37
|
-
next if partition_df.height == 0
|
38
7
|
|
39
|
-
|
8
|
+
datasource_config = feature&.dataset&.datasource&.configuration
|
9
|
+
if datasource_config
|
10
|
+
options = {
|
11
|
+
root_dir: feature_dir,
|
12
|
+
filenames: "feature",
|
13
|
+
append_only: false,
|
14
|
+
primary_key: feature.primary_key&.first,
|
15
|
+
partition_size: batch_size,
|
16
|
+
s3_bucket: datasource_config.dig("s3_bucket") || EasyML::Configuration.s3_bucket,
|
17
|
+
s3_prefix: s3_prefix,
|
18
|
+
polars_args: datasource_config.dig("polars_args"),
|
19
|
+
}.compact
|
20
|
+
super(options)
|
21
|
+
else
|
22
|
+
super({ root_dir: "" })
|
40
23
|
end
|
41
24
|
end
|
42
25
|
|
43
|
-
def query(**kwargs)
|
44
|
-
query_all_partitions(**kwargs)
|
45
|
-
end
|
46
|
-
|
47
|
-
def empty?
|
48
|
-
list_partitions.empty?
|
49
|
-
end
|
50
|
-
|
51
|
-
def list_partitions
|
52
|
-
Dir.glob(File.join(feature_dir, "feature*.parquet")).sort
|
53
|
-
end
|
54
|
-
|
55
|
-
def wipe
|
56
|
-
FileUtils.rm_rf(feature_dir)
|
57
|
-
end
|
58
|
-
|
59
|
-
def upload_remote_files
|
60
|
-
synced_directory.upload
|
61
|
-
end
|
62
|
-
|
63
|
-
def download
|
64
|
-
synced_directory&.download
|
65
|
-
end
|
66
|
-
|
67
26
|
def cp(old_version, new_version)
|
68
27
|
old_dir = feature_dir_for_version(old_version)
|
69
28
|
new_dir = feature_dir_for_version(new_version)
|
@@ -74,7 +33,7 @@ module EasyML
|
|
74
33
|
files_to_cp = Dir.glob(Pathname.new(old_dir).join("**/*")).select { |f| File.file?(f) }
|
75
34
|
|
76
35
|
files_to_cp.each do |file|
|
77
|
-
target_file = file.gsub(
|
36
|
+
target_file = file.gsub(old_dir, new_dir)
|
78
37
|
FileUtils.mkdir_p(File.dirname(target_file))
|
79
38
|
FileUtils.cp(file, target_file)
|
80
39
|
end
|
@@ -82,77 +41,17 @@ module EasyML
|
|
82
41
|
|
83
42
|
private
|
84
43
|
|
85
|
-
def
|
86
|
-
|
87
|
-
when :partitions
|
88
|
-
list_partitions.each do |partition|
|
89
|
-
FileUtils.rm(partition)
|
90
|
-
end
|
91
|
-
when :no_partitions
|
92
|
-
FileUtils.rm_rf(feature_path)
|
93
|
-
when :all
|
94
|
-
wipe
|
95
|
-
end
|
96
|
-
end
|
97
|
-
|
98
|
-
def store_without_partitioning(df)
|
99
|
-
lock_file do
|
100
|
-
cleanup(type: :partitions)
|
101
|
-
path = feature_path
|
102
|
-
safe_write(df, path)
|
103
|
-
end
|
104
|
-
end
|
105
|
-
|
106
|
-
def safe_write(df, path)
|
107
|
-
FileUtils.mkdir_p(File.dirname(path))
|
108
|
-
df.write_parquet(path)
|
109
|
-
end
|
110
|
-
|
111
|
-
def store_partition(partition_df, primary_key, partition_start)
|
112
|
-
lock_partition(partition_start) do
|
113
|
-
cleanup(type: :no_partitions)
|
114
|
-
path = partition_path(partition_start)
|
115
|
-
|
116
|
-
if File.exist?(path)
|
117
|
-
reader = EasyML::Data::PolarsReader.new
|
118
|
-
existing_df = reader.query([path])
|
119
|
-
preserved_records = existing_df.filter(
|
120
|
-
Polars.col(primary_key).is_in(partition_df[primary_key]).is_not
|
121
|
-
)
|
122
|
-
if preserved_records.shape[1] != partition_df.shape[1]
|
123
|
-
wipe
|
124
|
-
else
|
125
|
-
partition_df = Polars.concat([preserved_records, partition_df], how: "vertical")
|
126
|
-
end
|
127
|
-
end
|
128
|
-
|
129
|
-
safe_write(partition_df, path)
|
130
|
-
end
|
131
|
-
end
|
132
|
-
|
133
|
-
def query_all_partitions(**kwargs)
|
134
|
-
reader = EasyML::Data::PolarsReader.new
|
135
|
-
pattern = File.join(feature_dir, "feature*.parquet")
|
136
|
-
files = Dir.glob(pattern)
|
137
|
-
|
138
|
-
return Polars::DataFrame.new if files.empty?
|
139
|
-
|
140
|
-
reader.query(files, **kwargs)
|
141
|
-
end
|
142
|
-
|
143
|
-
def compute_partition_boundaries(min_key, max_key, batch_size)
|
144
|
-
start_partition = (min_key / batch_size.to_f).floor * batch_size
|
145
|
-
end_partition = (max_key / batch_size.to_f).floor * batch_size
|
146
|
-
(start_partition..end_partition).step(batch_size).to_a
|
44
|
+
def batch_size
|
45
|
+
@batch_size ||= feature.batch_size || 10_000
|
147
46
|
end
|
148
47
|
|
149
48
|
def feature_dir_for_version(version)
|
150
49
|
File.join(
|
151
50
|
Rails.root,
|
152
51
|
"easy_ml/datasets",
|
153
|
-
feature
|
52
|
+
feature&.dataset&.name&.parameterize&.gsub("-", "_"),
|
154
53
|
"features",
|
155
|
-
feature
|
54
|
+
feature&.name&.parameterize&.gsub("-", "_"),
|
156
55
|
version.to_s
|
157
56
|
)
|
158
57
|
end
|
@@ -161,74 +60,8 @@ module EasyML
|
|
161
60
|
feature_dir_for_version(feature.version)
|
162
61
|
end
|
163
62
|
|
164
|
-
def feature_path
|
165
|
-
File.join(feature_dir, "feature.parquet")
|
166
|
-
end
|
167
|
-
|
168
|
-
def partition_path(partition_start)
|
169
|
-
File.join(feature_dir, "feature#{partition_start}.parquet")
|
170
|
-
end
|
171
|
-
|
172
63
|
def s3_prefix
|
173
64
|
File.join("datasets", feature_dir.split("datasets").last)
|
174
65
|
end
|
175
|
-
|
176
|
-
def synced_directory
|
177
|
-
return unless feature.dataset&.datasource.present?
|
178
|
-
|
179
|
-
datasource_config = feature.dataset.datasource.configuration || {}
|
180
|
-
@synced_dir ||= EasyML::Data::SyncedDirectory.new(
|
181
|
-
root_dir: feature_dir,
|
182
|
-
s3_bucket: datasource_config.dig("s3_bucket") || EasyML::Configuration.s3_bucket,
|
183
|
-
s3_prefix: s3_prefix,
|
184
|
-
s3_access_key_id: EasyML::Configuration.s3_access_key_id,
|
185
|
-
s3_secret_access_key: EasyML::Configuration.s3_secret_access_key,
|
186
|
-
polars_args: datasource_config.dig("polars_args"),
|
187
|
-
cache_for: 0,
|
188
|
-
)
|
189
|
-
end
|
190
|
-
|
191
|
-
def lock_partition(partition_start)
|
192
|
-
Support::Lockable.with_lock(partition_lock_key(partition_start), wait_timeout: 2, stale_timeout: 60) do |client|
|
193
|
-
begin
|
194
|
-
yield client if block_given?
|
195
|
-
ensure
|
196
|
-
unlock_partition(partition_start)
|
197
|
-
end
|
198
|
-
end
|
199
|
-
end
|
200
|
-
|
201
|
-
def lock_file
|
202
|
-
Support::Lockable.with_lock(file_lock_key, wait_timeout: 2, stale_timeout: 60) do |client|
|
203
|
-
begin
|
204
|
-
yield client if block_given?
|
205
|
-
ensure
|
206
|
-
unlock_file
|
207
|
-
end
|
208
|
-
end
|
209
|
-
end
|
210
|
-
|
211
|
-
def unlock_partition(partition_start)
|
212
|
-
Support::Lockable.unlock!(partition_lock_key(partition_start))
|
213
|
-
end
|
214
|
-
|
215
|
-
def unlock_file
|
216
|
-
Support::Lockable.unlock!(file_lock_key)
|
217
|
-
end
|
218
|
-
|
219
|
-
def unlock_all_partitions
|
220
|
-
list_partitions.each do |partition_path|
|
221
|
-
partition_start = partition_path.match(/feature(\d+)\.parquet/)[1].to_i
|
222
|
-
unlock_partition(partition_start)
|
223
|
-
end
|
224
|
-
end
|
225
|
-
|
226
|
-
def partition_lock_key(partition_start)
|
227
|
-
"feature_store:#{feature.id}.partition.#{partition_start}"
|
228
|
-
end
|
229
|
-
|
230
|
-
def file_lock_key
|
231
|
-
"feature_store:#{feature.id}.file"
|
232
|
-
end
|
233
66
|
end
|
234
67
|
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
module EasyML
|
2
|
+
class Reasons
|
3
|
+
def initialize(context)
|
4
|
+
@context = context
|
5
|
+
@reasons = {}
|
6
|
+
end
|
7
|
+
|
8
|
+
class << self
|
9
|
+
def add_reason(name, check)
|
10
|
+
@reasons ||= {}
|
11
|
+
key = name.to_s.downcase.gsub(/\s/, "_").to_sym
|
12
|
+
@reasons[key] = { name: name, check: check }
|
13
|
+
end
|
14
|
+
|
15
|
+
def reasons
|
16
|
+
@reasons ||= {}
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def inspect
|
21
|
+
"#<#{self.class.name.split("::").last} checks=[#{self.class.reasons.map { |k, v| "#{v[:name]}" }.join(", ")}]>"
|
22
|
+
end
|
23
|
+
|
24
|
+
def none?(except: [])
|
25
|
+
check(except: except).none?
|
26
|
+
end
|
27
|
+
|
28
|
+
def check(except: [])
|
29
|
+
self.class.reasons.except(*except).select do |_, config|
|
30
|
+
@context.instance_exec(&config[:check])
|
31
|
+
end.map do |_, config|
|
32
|
+
config[:name]
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def explain
|
37
|
+
reasons = check
|
38
|
+
reasons.any? ? reasons.join(", ") : :none
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
@@ -22,7 +22,7 @@ module EasyML
|
|
22
22
|
end
|
23
23
|
|
24
24
|
def self.lock_client(key, wait_timeout: 0.1, stale_timeout: 60 * 10, resources: 1)
|
25
|
-
Suo::Client::Redis.new(
|
25
|
+
Suo::Client::Redis.new(key, {
|
26
26
|
acquisition_timeout: wait_timeout,
|
27
27
|
stale_lock_expiry: stale_timeout,
|
28
28
|
resources: resources,
|
@@ -30,10 +30,6 @@ module EasyML
|
|
30
30
|
})
|
31
31
|
end
|
32
32
|
|
33
|
-
def self.prefixed_key(key)
|
34
|
-
"easy_ml:#{key}"
|
35
|
-
end
|
36
|
-
|
37
33
|
# Execute a block with a Redis lock
|
38
34
|
def self.with_lock(key, wait_timeout: 0.1, stale_timeout: 60 * 10, resources: 1)
|
39
35
|
lock_key = nil
|
data/lib/easy_ml/version.rb
CHANGED
data/lib/easy_ml.rb
CHANGED
@@ -15,13 +15,13 @@ module EasyML
|
|
15
15
|
class Error < StandardError; end
|
16
16
|
|
17
17
|
require_relative "easy_ml/configuration"
|
18
|
+
require_relative "easy_ml/reasons"
|
18
19
|
require_relative "easy_ml/deep_compact"
|
19
20
|
require_relative "easy_ml/timing"
|
20
21
|
require_relative "easy_ml/support"
|
21
22
|
require_relative "easy_ml/core_ext"
|
22
23
|
require_relative "easy_ml/logging"
|
23
24
|
require_relative "easy_ml/data"
|
24
|
-
require_relative "easy_ml/data/filter_extensions"
|
25
25
|
require_relative "easy_ml/evaluators"
|
26
26
|
require_relative "easy_ml/features"
|
27
27
|
require_relative "easy_ml/feature_store"
|