easy_ml 0.2.0.pre.rc102 → 0.2.0.pre.rc103

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4c4736c4959fd2d08faac5dbe0c4525014edb8faa7e5b914875a0a84f58e53f2
4
- data.tar.gz: bbab12ed80cf8c3bd608388648cd8362d7f4b46408b135aaf79ef494dca7deed
3
+ metadata.gz: ef3f840cce99d7205957fbb39a6b319a45035624dce2e4e10f681383cb088abf
4
+ data.tar.gz: e25100f792ad48cfa4feab7eb652a2d6c49bfc6e28f3bcb97c8150f9bdd1bfc5
5
5
  SHA512:
6
- metadata.gz: 8dd7645d2b4da2d03a0c3fc1eaf9bcfdfd05ae31e9871782154ade2149ca4269ee5d78e6cb959d6f10e498cdbd427dfb958dc37c3b1208b0fe8885abac61dcad
7
- data.tar.gz: 6690b85ba40db78063ffe8fbf3b9302b82a52c3137f5a59d8ba8bb70838ebd931965968185b6031b90fa0b9d8f3192bcc1d2150e3cce43a5ad1959af738d180e
6
+ metadata.gz: 5f58395d392158d149db34ad5019a0e011164ca8d331846553e44e6564a291d88323ad0090c1c5ded60f696940b30949cba4e1a614fa9cd502e94372ef949707
7
+ data.tar.gz: 9497391351ad054308a985cc6b9e608f8dfef61be7417d66502cb11c26ca4f7825456b31aab010c016ac10feff791a8f7a01893743ecf11a26fabb9de7405b82
@@ -1028,7 +1028,6 @@ export function PreprocessingConfig({
1028
1028
  label: strategy.label
1029
1029
  })) || [])
1030
1030
  ]}
1031
- options={constants.preprocessing_strategies[selectedType]}
1032
1031
  />
1033
1032
 
1034
1033
  {renderStrategySpecificInfo('training')}
@@ -522,27 +522,32 @@ module EasyML
522
522
  EasyML::Import::Column.from_config(config, dataset, action: action)
523
523
  end
524
524
 
525
- def cast_statement(df, df_col, expected_dtype)
526
- expected_dtype = expected_dtype.is_a?(Polars::DataType) ? expected_dtype.class : expected_dtype
527
- actual_type = df[df_col].dtype
525
+ def cast_statement(series = nil)
526
+ expected_dtype = polars_datatype
527
+ actual_type = series&.dtype || expected_dtype
528
+
529
+ return Polars.col(name).cast(expected_dtype).alias(name) if expected_dtype == actual_type
528
530
 
529
531
  cast_statement = case expected_dtype.to_s
530
- when "Polars::Boolean"
532
+ when /Polars::List/
533
+ # we should start tracking polars args so we can know what type of list it is
534
+ Polars.col(name)
535
+ when /Polars::Boolean/
531
536
  case actual_type.to_s
532
- when "Polars::Boolean"
533
- Polars.col(df_col).cast(expected_dtype)
534
- when "Polars::Utf8", "Polars::Categorical", "Polars::String"
535
- Polars.col(df_col).eq("true").cast(expected_dtype)
536
- when "Polars::Null"
537
- Polars.col(df_col)
537
+ when /Polars::Boolean/, /Polars::Int/
538
+ Polars.col(name).cast(expected_dtype)
539
+ when /Polars::Utf/, /Polars::Categorical/, /Polars::String/
540
+ Polars.col(name).eq("true").cast(expected_dtype)
541
+ when /Polars::Null/
542
+ Polars.col(name)
538
543
  else
539
- raise "Unexpected dtype: #{actual_type} for column: #{df_col}"
544
+ raise "Unexpected dtype: #{actual_type} for column: #{name}"
540
545
  end
541
546
  else
542
- Polars.col(df_col).cast(expected_dtype)
547
+ Polars.col(name).cast(expected_dtype, strict: false)
543
548
  end
544
549
 
545
- cast_statement.alias(df_col)
550
+ cast_statement.alias(name)
546
551
  end
547
552
 
548
553
  def cast(value)
@@ -101,10 +101,10 @@ module EasyML
101
101
  end
102
102
  cast_statements = (df.columns & schema.keys.map(&:to_s)).map do |df_col|
103
103
  db_col = column_index[df_col]
104
- expected_dtype = schema[df_col.to_sym]
105
- db_col.cast_statement(df, df_col, expected_dtype)
104
+ db_col.cast_statement(df[df_col])
106
105
  end
107
106
  df = df.with_columns(cast_statements)
107
+ df
108
108
  end
109
109
 
110
110
  def cast(processed_or_raw)
@@ -10,7 +10,9 @@ module EasyML
10
10
  end
11
11
 
12
12
  def unique_count
13
- Polars.col(column.name).n_unique.alias("#{column.name}__unique_count")
13
+ Polars.col(column.name)
14
+ .cast(column.polars_datatype)
15
+ .n_unique.alias("#{column.name}__unique_count")
14
16
  end
15
17
  end
16
18
  end
@@ -5,11 +5,30 @@ module EasyML
5
5
  class Numeric < Query
6
6
  def train_query
7
7
  super.concat([
8
- Polars.col(column.name).mean.alias("#{column.name}__mean"),
9
- Polars.col(column.name).median.alias("#{column.name}__median"),
10
- Polars.col(column.name).min.alias("#{column.name}__min"),
11
- Polars.col(column.name).max.alias("#{column.name}__max"),
12
- Polars.col(column.name).std.alias("#{column.name}__std"),
8
+ Polars.col(column.name)
9
+ .cast(column.polars_datatype)
10
+ .mean
11
+ .alias("#{column.name}__mean"),
12
+
13
+ Polars.col(column.name)
14
+ .cast(column.polars_datatype)
15
+ .median
16
+ .alias("#{column.name}__median"),
17
+
18
+ Polars.col(column.name)
19
+ .cast(column.polars_datatype)
20
+ .min
21
+ .alias("#{column.name}__min"),
22
+
23
+ Polars.col(column.name)
24
+ .cast(column.polars_datatype)
25
+ .max
26
+ .alias("#{column.name}__max"),
27
+
28
+ Polars.col(column.name)
29
+ .cast(column.polars_datatype)
30
+ .std
31
+ .alias("#{column.name}__std"),
13
32
  ])
14
33
  end
15
34
  end
@@ -44,25 +44,37 @@ module EasyML
44
44
  end
45
45
 
46
46
  def null_count
47
- Polars.col(column.name).null_count.alias("#{column.name}__null_count")
47
+ Polars.col(column.name)
48
+ .cast(column.polars_datatype)
49
+ .null_count
50
+ .alias("#{column.name}__null_count")
48
51
  end
49
52
 
50
53
  def num_rows
51
- Polars.col(column.name).len.alias("#{column.name}__num_rows")
54
+ Polars.col(column.name)
55
+ .cast(column.polars_datatype)
56
+ .len
57
+ .alias("#{column.name}__num_rows")
52
58
  end
53
59
 
54
60
  def most_frequent_value
55
- Polars.col(column.name).filter(Polars.col(column.name).is_not_null).mode.first.alias("#{column.name}__most_frequent_value")
61
+ Polars.col(column.name)
62
+ .cast(column.polars_datatype)
63
+ .filter(Polars.col(column.name).is_not_null)
64
+ .mode
65
+ .first
66
+ .alias("#{column.name}__most_frequent_value")
56
67
  end
57
68
 
58
69
  def last_value
59
70
  return unless dataset.date_column.present?
60
71
 
61
72
  Polars.col(column.name)
62
- .sort_by(dataset.date_column.name, reverse: true, nulls_last: true)
63
- .filter(Polars.col(column.name).is_not_null)
64
- .first
65
- .alias("#{column.name}__last_value")
73
+ .cast(column.polars_datatype)
74
+ .sort_by(dataset.date_column.name, reverse: true, nulls_last: true)
75
+ .filter(Polars.col(column.name).is_not_null)
76
+ .first
77
+ .alias("#{column.name}__last_value")
66
78
  end
67
79
  end
68
80
  end
@@ -10,7 +10,10 @@ module EasyML
10
10
  end
11
11
 
12
12
  def unique_count
13
- Polars.col(column.name).cast(:str).n_unique.alias("#{column.name}__unique_count")
13
+ Polars.col(column.name)
14
+ .cast(Polars::String)
15
+ .n_unique
16
+ .alias("#{column.name}__unique_count")
14
17
  end
15
18
  end
16
19
  end
@@ -28,15 +28,15 @@ module EasyML
28
28
  )
29
29
  .select(queries).collect
30
30
  rescue => e
31
- problematic_query = queries.detect {
31
+ problematic_queries = queries.select { |query|
32
32
  begin
33
- dataset.send(type).send(split, all_columns: true, lazy: true).select(queries).collect
33
+ dataset.send(type).send(split, all_columns: true, lazy: true).select([query]).collect
34
34
  false
35
35
  rescue => e
36
36
  true
37
37
  end
38
38
  }
39
- raise "Query failed for column #{problematic_query}, likely wrong datatype"
39
+ raise "Query failed for queries... likely due to wrong column datatype: #{problematic_queries.join("\n")}"
40
40
  end
41
41
  end
42
42
 
@@ -64,4 +64,4 @@ module EasyML
64
64
  end
65
65
  end
66
66
  end
67
- end
67
+ end
@@ -498,7 +498,7 @@ module EasyML
498
498
  feature_cols -= [weights_col] if weights_col
499
499
 
500
500
  # Get features, labels and weights
501
- exploded = explode_embeddings(xs.select(feature_cols))
501
+ exploded = explode_embeddings(xs)
502
502
  feature_cols = exploded.columns
503
503
  features = lazy ? exploded.collect.to_numo : exploded.to_numo
504
504
 
@@ -175,7 +175,7 @@ module EasyML
175
175
  end
176
176
  combined_lazy_df = combined_lazy_df.with_columns(
177
177
  cast.map do |col, dtype|
178
- Polars.col(col).cast(dtype).alias(col)
178
+ Polars.col(col).cast(dtype, strict: false).alias(col)
179
179
  end
180
180
  )
181
181
  end
@@ -1,7 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module EasyML
4
- VERSION = "0.2.0-rc102"
4
+ VERSION = "0.2.0-rc103"
5
5
 
6
6
  module Version
7
7
  end
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "entrypoints/Application.tsx": {
3
- "file": "assets/entrypoints/Application.tsx-CRS5bRgw.js",
3
+ "file": "assets/entrypoints/Application.tsx-gkZ77wo8.js",
4
4
  "name": "entrypoints/Application.tsx",
5
5
  "src": "entrypoints/Application.tsx",
6
6
  "isEntry": true,