easy_ml 0.2.0.pre.rc90 → 0.2.0.pre.rc92

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: '08be0b67dba395b4aa3493a0a0fa6e5cde31246f299a14460590c8ced298d557'
4
- data.tar.gz: d72734f3d5045e1f3554eecadb641d141fbb89f02b3c6f0cb265e5588c6d2866
3
+ metadata.gz: 428c2f3e0ce95c3f41ea3e8c31a8cdbc3709a9bd4332d0736b6b2b4000677430
4
+ data.tar.gz: 2bad8c01bbb1a65f3135765b815c88b3ac405a48e9a97d30173f44ac5755c4b2
5
5
  SHA512:
6
- metadata.gz: a873a1cf9b00fd84dc0912392f4de8140eff18377c97e3d9464c7ce2d73a3a82c16f0ae34d09b095be290ca6cb287bc28bfa76864d1e003db99d308bc44413da
7
- data.tar.gz: 47bc5ff93e92e8d51b62dad043917ce3b110a4141aa9e404456f2934dbe4b5d198506af81af7ddda78b511b96a564abc6df62834e376b02a4d7a6859dafdecfc
6
+ metadata.gz: '08218be1a52ee2f871da922b176af2ca6555739af0bec44065bfc81dfdfcb84b7a27c3501b66915b5d07db4bce2427ee4324e38f30a3534564c1be3df1d991a2'
7
+ data.tar.gz: 2eb3d84ec7fc7be829ad27bf689c617905299110cc40d0185a81d94c3b165c2039168b36473806140c220a4cb7f91c91a905d611657bfc8205ff2f77c257edb9
@@ -515,24 +515,24 @@ module EasyML
515
515
  end
516
516
 
517
517
  def cast_statement(df, df_col, expected_dtype)
518
- expected_dtype = expected_dtype.class
518
+ expected_dtype = expected_dtype.is_a?(Polars::DataType) ? expected_dtype : expected_dtype.class
519
519
  actual_type = df[df_col].dtype
520
520
 
521
521
  cast_statement = case expected_dtype
522
- when Polars::Boolean
523
- case actual_type
524
- when Polars::Boolean
525
- Polars.col(df_col).cast(expected_dtype)
526
- when Polars::String, Polars::Categorical
527
- Polars.col(df_col).eq("true").cast(expected_dtype)
528
- when Polars::Null
529
- Polars.col(df_col)
530
- else
531
- raise "Unexpected dtype: #{actual_type} for column: #{df_col}"
532
- end
533
- else
534
- Polars.col(df_col).cast(expected_dtype)
535
- end
522
+ when Polars::Boolean
523
+ case actual_type
524
+ when Polars::Boolean
525
+ Polars.col(df_col).cast(expected_dtype)
526
+ when Polars::String, Polars::Categorical
527
+ Polars.col(df_col).eq("true").cast(expected_dtype)
528
+ when Polars::Null
529
+ Polars.col(df_col)
530
+ else
531
+ raise "Unexpected dtype: #{actual_type} for column: #{df_col}"
532
+ end
533
+ else
534
+ Polars.col(df_col).cast(expected_dtype)
535
+ end
536
536
 
537
537
  cast_statement.alias(df_col)
538
538
  end
@@ -104,7 +104,11 @@ module EasyML
104
104
  expected_dtype = schema[df_col.to_sym]
105
105
  db_col.cast_statement(df, df_col, expected_dtype)
106
106
  end
107
- df = df.with_columns(cast_statements)
107
+ begin
108
+ df = df.with_columns(cast_statements)
109
+ rescue => e
110
+ binding.pry
111
+ end
108
112
  end
109
113
 
110
114
  def cast(processed_or_raw)
@@ -217,6 +217,7 @@ module EasyML
217
217
  @processed = processed.cp(version)
218
218
  save.tap do
219
219
  features.each(&:bump_version)
220
+ EasyML::Feature.import(features.to_a, on_duplicate_key_update: [:version])
220
221
  end
221
222
  end
222
223
 
@@ -633,11 +634,16 @@ module EasyML
633
634
  def apply_missing_columns(df, inference: false)
634
635
  return df unless inference
635
636
 
636
- missing_columns = (col_order(inference: inference) - df.columns).compact.uniq
637
+ required_cols = col_order(inference: inference).compact.uniq
637
638
  columns.one_hots.each do |one_hot|
638
- missing_columns -= one_hot.virtual_columns
639
- missing_columns += [one_hot.name]
639
+ virtual_columns = one_hot.virtual_columns
640
+ if virtual_columns.all? { |vc| df.columns.include?(vc) }
641
+ required_cols -= virtual_columns
642
+ else
643
+ required_cols += [one_hot.name]
644
+ end
640
645
  end
646
+ missing_columns = required_cols - df.columns
641
647
  df.with_columns(missing_columns.map { |f| Polars.lit(nil).alias(f) })
642
648
  end
643
649
 
@@ -894,5 +900,21 @@ module EasyML
894
900
  def underscored_name
895
901
  name.gsub(/\s{2,}/, " ").gsub(/\s/, "_").downcase
896
902
  end
903
+
904
+ TIME_METHODS = %w(
905
+ refresh
906
+ prepare_features
907
+ refresh_datasource
908
+ split_data
909
+ fit
910
+ normalize_all
911
+ learn
912
+ learn_statistics
913
+ fit_features
914
+ )
915
+ include EasyML::Timing
916
+ TIME_METHODS.each do |method|
917
+ measure_method_timing method
918
+ end
897
919
  end
898
920
  end
@@ -17,9 +17,7 @@ module EasyML
17
17
  end
18
18
 
19
19
  def wipe
20
- folders.each do |folder|
21
- FileUtils.rm_rf(File.join(root_dir, folder))
22
- end
20
+ super
23
21
  clear_all_keys
24
22
  end
25
23
 
@@ -59,6 +59,7 @@ module EasyML
59
59
  add_pca_model_id_to_easy_ml_columns
60
60
  add_workflow_status_to_easy_ml_dataset_histories
61
61
  add_metadata_to_easy_ml_predictions
62
+ add_unique_constraint_to_dataset_names
62
63
  ].freeze
63
64
 
64
65
  # Specify the next migration number
@@ -0,0 +1,8 @@
1
+ class AddUniqueConstraintToDatasetNames < ActiveRecord::Migration[<%= ActiveRecord::Migration.current_version %>]
2
+ def change
3
+ if index_exists?(:easy_ml_datasets, :name)
4
+ remove_index :easy_ml_datasets, :name
5
+ end
6
+ add_index :easy_ml_datasets, :name, unique: true
7
+ end
8
+ end
@@ -19,9 +19,7 @@ module EasyML
19
19
  result = send(method_alias, *args, **kwargs, &block)
20
20
  ending = Process.clock_gettime(Process::CLOCK_MONOTONIC)
21
21
  elapsed = ending - starting
22
- 10.times do
23
- puts "#{method_name} took #{elapsed.round(2)} seconds"
24
- end
22
+ puts "#{method_name} took #{elapsed} seconds"
25
23
  # StatsD.measure("#{Rails.env}.#{prefix.present? ? "#{prefix}." : ""}#{method_name}.timing", elapsed)
26
24
  result
27
25
  end
@@ -1,7 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module EasyML
4
- VERSION = "0.2.0-rc90"
4
+ VERSION = "0.2.0-rc92"
5
5
 
6
6
  module Version
7
7
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: easy_ml
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0.pre.rc90
4
+ version: 0.2.0.pre.rc92
5
5
  platform: ruby
6
6
  authors:
7
7
  - Brett Shollenberger
@@ -803,6 +803,7 @@ files:
803
803
  - lib/easy_ml/railtie/templates/migration/add_raw_schema_to_datasets.rb.tt
804
804
  - lib/easy_ml/railtie/templates/migration/add_sha_to_datasources_datasets_and_columns.rb.tt
805
805
  - lib/easy_ml/railtie/templates/migration/add_slug_to_easy_ml_models.rb.tt
806
+ - lib/easy_ml/railtie/templates/migration/add_unique_constraint_to_dataset_names.rb.tt
806
807
  - lib/easy_ml/railtie/templates/migration/add_unique_constraint_to_easy_ml_model_names.rb.tt
807
808
  - lib/easy_ml/railtie/templates/migration/add_workflow_status_to_easy_ml_dataset_histories.rb.tt
808
809
  - lib/easy_ml/railtie/templates/migration/add_workflow_status_to_easy_ml_features.rb.tt