easy_ml 0.2.0.pre.rc90 → 0.2.0.pre.rc92
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/app/models/easy_ml/column.rb +15 -15
- data/app/models/easy_ml/column_list.rb +5 -1
- data/app/models/easy_ml/dataset.rb +25 -3
- data/lib/easy_ml/data/dataset_manager/writer/partitioned.rb +1 -3
- data/lib/easy_ml/railtie/generators/migration/migration_generator.rb +1 -0
- data/lib/easy_ml/railtie/templates/migration/add_unique_constraint_to_dataset_names.rb.tt +8 -0
- data/lib/easy_ml/timing.rb +1 -3
- data/lib/easy_ml/version.rb +1 -1
- metadata +2 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 428c2f3e0ce95c3f41ea3e8c31a8cdbc3709a9bd4332d0736b6b2b4000677430
|
4
|
+
data.tar.gz: 2bad8c01bbb1a65f3135765b815c88b3ac405a48e9a97d30173f44ac5755c4b2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: '08218be1a52ee2f871da922b176af2ca6555739af0bec44065bfc81dfdfcb84b7a27c3501b66915b5d07db4bce2427ee4324e38f30a3534564c1be3df1d991a2'
|
7
|
+
data.tar.gz: 2eb3d84ec7fc7be829ad27bf689c617905299110cc40d0185a81d94c3b165c2039168b36473806140c220a4cb7f91c91a905d611657bfc8205ff2f77c257edb9
|
@@ -515,24 +515,24 @@ module EasyML
|
|
515
515
|
end
|
516
516
|
|
517
517
|
def cast_statement(df, df_col, expected_dtype)
|
518
|
-
expected_dtype = expected_dtype.class
|
518
|
+
expected_dtype = expected_dtype.is_a?(Polars::DataType) ? expected_dtype : expected_dtype.class
|
519
519
|
actual_type = df[df_col].dtype
|
520
520
|
|
521
521
|
cast_statement = case expected_dtype
|
522
|
-
|
523
|
-
|
524
|
-
|
525
|
-
|
526
|
-
|
527
|
-
|
528
|
-
|
529
|
-
|
530
|
-
|
531
|
-
|
532
|
-
|
533
|
-
|
534
|
-
|
535
|
-
|
522
|
+
when Polars::Boolean
|
523
|
+
case actual_type
|
524
|
+
when Polars::Boolean
|
525
|
+
Polars.col(df_col).cast(expected_dtype)
|
526
|
+
when Polars::String, Polars::Categorical
|
527
|
+
Polars.col(df_col).eq("true").cast(expected_dtype)
|
528
|
+
when Polars::Null
|
529
|
+
Polars.col(df_col)
|
530
|
+
else
|
531
|
+
raise "Unexpected dtype: #{actual_type} for column: #{df_col}"
|
532
|
+
end
|
533
|
+
else
|
534
|
+
Polars.col(df_col).cast(expected_dtype)
|
535
|
+
end
|
536
536
|
|
537
537
|
cast_statement.alias(df_col)
|
538
538
|
end
|
@@ -104,7 +104,11 @@ module EasyML
|
|
104
104
|
expected_dtype = schema[df_col.to_sym]
|
105
105
|
db_col.cast_statement(df, df_col, expected_dtype)
|
106
106
|
end
|
107
|
-
|
107
|
+
begin
|
108
|
+
df = df.with_columns(cast_statements)
|
109
|
+
rescue => e
|
110
|
+
binding.pry
|
111
|
+
end
|
108
112
|
end
|
109
113
|
|
110
114
|
def cast(processed_or_raw)
|
@@ -217,6 +217,7 @@ module EasyML
|
|
217
217
|
@processed = processed.cp(version)
|
218
218
|
save.tap do
|
219
219
|
features.each(&:bump_version)
|
220
|
+
EasyML::Feature.import(features.to_a, on_duplicate_key_update: [:version])
|
220
221
|
end
|
221
222
|
end
|
222
223
|
|
@@ -633,11 +634,16 @@ module EasyML
|
|
633
634
|
def apply_missing_columns(df, inference: false)
|
634
635
|
return df unless inference
|
635
636
|
|
636
|
-
|
637
|
+
required_cols = col_order(inference: inference).compact.uniq
|
637
638
|
columns.one_hots.each do |one_hot|
|
638
|
-
|
639
|
-
|
639
|
+
virtual_columns = one_hot.virtual_columns
|
640
|
+
if virtual_columns.all? { |vc| df.columns.include?(vc) }
|
641
|
+
required_cols -= virtual_columns
|
642
|
+
else
|
643
|
+
required_cols += [one_hot.name]
|
644
|
+
end
|
640
645
|
end
|
646
|
+
missing_columns = required_cols - df.columns
|
641
647
|
df.with_columns(missing_columns.map { |f| Polars.lit(nil).alias(f) })
|
642
648
|
end
|
643
649
|
|
@@ -894,5 +900,21 @@ module EasyML
|
|
894
900
|
def underscored_name
|
895
901
|
name.gsub(/\s{2,}/, " ").gsub(/\s/, "_").downcase
|
896
902
|
end
|
903
|
+
|
904
|
+
TIME_METHODS = %w(
|
905
|
+
refresh
|
906
|
+
prepare_features
|
907
|
+
refresh_datasource
|
908
|
+
split_data
|
909
|
+
fit
|
910
|
+
normalize_all
|
911
|
+
learn
|
912
|
+
learn_statistics
|
913
|
+
fit_features
|
914
|
+
)
|
915
|
+
include EasyML::Timing
|
916
|
+
TIME_METHODS.each do |method|
|
917
|
+
measure_method_timing method
|
918
|
+
end
|
897
919
|
end
|
898
920
|
end
|
@@ -0,0 +1,8 @@
|
|
1
|
+
class AddUniqueConstraintToDatasetNames < ActiveRecord::Migration[<%= ActiveRecord::Migration.current_version %>]
|
2
|
+
def change
|
3
|
+
if index_exists?(:easy_ml_datasets, :name)
|
4
|
+
remove_index :easy_ml_datasets, :name
|
5
|
+
end
|
6
|
+
add_index :easy_ml_datasets, :name, unique: true
|
7
|
+
end
|
8
|
+
end
|
data/lib/easy_ml/timing.rb
CHANGED
@@ -19,9 +19,7 @@ module EasyML
|
|
19
19
|
result = send(method_alias, *args, **kwargs, &block)
|
20
20
|
ending = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
21
21
|
elapsed = ending - starting
|
22
|
-
|
23
|
-
puts "#{method_name} took #{elapsed.round(2)} seconds"
|
24
|
-
end
|
22
|
+
puts "#{method_name} took #{elapsed} seconds"
|
25
23
|
# StatsD.measure("#{Rails.env}.#{prefix.present? ? "#{prefix}." : ""}#{method_name}.timing", elapsed)
|
26
24
|
result
|
27
25
|
end
|
data/lib/easy_ml/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: easy_ml
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.0.pre.
|
4
|
+
version: 0.2.0.pre.rc92
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Brett Shollenberger
|
@@ -803,6 +803,7 @@ files:
|
|
803
803
|
- lib/easy_ml/railtie/templates/migration/add_raw_schema_to_datasets.rb.tt
|
804
804
|
- lib/easy_ml/railtie/templates/migration/add_sha_to_datasources_datasets_and_columns.rb.tt
|
805
805
|
- lib/easy_ml/railtie/templates/migration/add_slug_to_easy_ml_models.rb.tt
|
806
|
+
- lib/easy_ml/railtie/templates/migration/add_unique_constraint_to_dataset_names.rb.tt
|
806
807
|
- lib/easy_ml/railtie/templates/migration/add_unique_constraint_to_easy_ml_model_names.rb.tt
|
807
808
|
- lib/easy_ml/railtie/templates/migration/add_workflow_status_to_easy_ml_dataset_histories.rb.tt
|
808
809
|
- lib/easy_ml/railtie/templates/migration/add_workflow_status_to_easy_ml_features.rb.tt
|