easy_ml 0.2.0.pre.rc103 → 0.2.0.pre.rc105

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ef3f840cce99d7205957fbb39a6b319a45035624dce2e4e10f681383cb088abf
4
- data.tar.gz: e25100f792ad48cfa4feab7eb652a2d6c49bfc6e28f3bcb97c8150f9bdd1bfc5
3
+ metadata.gz: c0f70db7d7b59423ef34b5f3cd15d783ec22112977b44c1a5c7df390cabbc1c9
4
+ data.tar.gz: c985391e9231db8870c3a0dde14d41c9a0b75f0643b9ebb4ad30af7b57354230
5
5
  SHA512:
6
- metadata.gz: 5f58395d392158d149db34ad5019a0e011164ca8d331846553e44e6564a291d88323ad0090c1c5ded60f696940b30949cba4e1a614fa9cd502e94372ef949707
7
- data.tar.gz: 9497391351ad054308a985cc6b9e608f8dfef61be7417d66502cb11c26ca4f7825456b31aab010c016ac10feff791a8f7a01893743ecf11a26fabb9de7405b82
6
+ metadata.gz: d06176713f32434dc24b7250090a7c005b79a0c515d96e7ef35b67a28e9c5c6feb58ed2bb6a5136f98897c9ded66b8c9c5ee78018cb8056c50ef1f2491b26e0a
7
+ data.tar.gz: 8a049b2858d28e0f4eb7776a606a1e3375535c7ab0a13d41ed32a5c253b9fc979d9dcf705c0c7e99322340b8c0d3b21c38513885e79947347efd1b9bafb66347
@@ -528,6 +528,18 @@ module EasyML
528
528
 
529
529
  return Polars.col(name).cast(expected_dtype).alias(name) if expected_dtype == actual_type
530
530
 
531
+ if encoding.present?
532
+ encoding_cast = case encoding.to_sym
533
+ when :one_hot
534
+ Polars.col(series.name).cast(Polars::Boolean).alias(series.name)
535
+ when :ordinal
536
+ Polars.col(series.name).cast(Polars::Int64).alias(series.name)
537
+ when :embedding
538
+ Polars.col(series.name).alias(series.name)
539
+ end
540
+ return encoding_cast
541
+ end
542
+
531
543
  cast_statement = case expected_dtype.to_s
532
544
  when /Polars::List/
533
545
  # we should start tracking polars args so we can know what type of list it is
@@ -11,7 +11,7 @@ module EasyML
11
11
 
12
12
  def unique_count
13
13
  Polars.col(column.name)
14
- .cast(column.polars_datatype)
14
+ .cast(datatype)
15
15
  .n_unique.alias("#{column.name}__unique_count")
16
16
  end
17
17
  end
@@ -6,27 +6,27 @@ module EasyML
6
6
  def train_query
7
7
  super.concat([
8
8
  Polars.col(column.name)
9
- .cast(column.polars_datatype)
9
+ .cast(datatype)
10
10
  .mean
11
11
  .alias("#{column.name}__mean"),
12
12
 
13
13
  Polars.col(column.name)
14
- .cast(column.polars_datatype)
14
+ .cast(datatype)
15
15
  .median
16
16
  .alias("#{column.name}__median"),
17
17
 
18
18
  Polars.col(column.name)
19
- .cast(column.polars_datatype)
19
+ .cast(datatype)
20
20
  .min
21
21
  .alias("#{column.name}__min"),
22
22
 
23
23
  Polars.col(column.name)
24
- .cast(column.polars_datatype)
24
+ .cast(datatype)
25
25
  .max
26
26
  .alias("#{column.name}__max"),
27
27
 
28
28
  Polars.col(column.name)
29
- .cast(column.polars_datatype)
29
+ .cast(datatype)
30
30
  .std
31
31
  .alias("#{column.name}__std"),
32
32
  ])
@@ -33,6 +33,14 @@ module EasyML
33
33
  end
34
34
  end
35
35
 
36
+ def datatype
37
+ case column.polars_datatype.to_s
38
+ when /Polars::Categorical/ then Polars::String
39
+ else
40
+ column.polars_datatype
41
+ end
42
+ end
43
+
36
44
  private
37
45
 
38
46
  def full_dataset_query
@@ -45,21 +53,21 @@ module EasyML
45
53
 
46
54
  def null_count
47
55
  Polars.col(column.name)
48
- .cast(column.polars_datatype)
56
+ .cast(datatype)
49
57
  .null_count
50
58
  .alias("#{column.name}__null_count")
51
59
  end
52
60
 
53
61
  def num_rows
54
62
  Polars.col(column.name)
55
- .cast(column.polars_datatype)
63
+ .cast(datatype)
56
64
  .len
57
65
  .alias("#{column.name}__num_rows")
58
66
  end
59
67
 
60
68
  def most_frequent_value
61
69
  Polars.col(column.name)
62
- .cast(column.polars_datatype)
70
+ .cast(datatype)
63
71
  .filter(Polars.col(column.name).is_not_null)
64
72
  .mode
65
73
  .first
@@ -70,7 +78,7 @@ module EasyML
70
78
  return unless dataset.date_column.present?
71
79
 
72
80
  Polars.col(column.name)
73
- .cast(column.polars_datatype)
81
+ .cast(datatype)
74
82
  .sort_by(dataset.date_column.name, reverse: true, nulls_last: true)
75
83
  .filter(Polars.col(column.name).is_not_null)
76
84
  .first
@@ -498,7 +498,7 @@ module EasyML
498
498
  feature_cols -= [weights_col] if weights_col
499
499
 
500
500
  # Get features, labels and weights
501
- exploded = explode_embeddings(xs)
501
+ exploded = explode_embeddings(xs.select(feature_cols))
502
502
  feature_cols = exploded.columns
503
503
  features = lazy ? exploded.collect.to_numo : exploded.to_numo
504
504
 
@@ -1,7 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module EasyML
4
- VERSION = "0.2.0-rc103"
4
+ VERSION = "0.2.0-rc105"
5
5
 
6
6
  module Version
7
7
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: easy_ml
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0.pre.rc103
4
+ version: 0.2.0.pre.rc105
5
5
  platform: ruby
6
6
  authors:
7
7
  - Brett Shollenberger
8
8
  bindir: exe
9
9
  cert_chain: []
10
- date: 2025-03-18 00:00:00.000000000 Z
10
+ date: 2025-03-19 00:00:00.000000000 Z
11
11
  dependencies:
12
12
  - !ruby/object:Gem::Dependency
13
13
  name: activerecord