easy_ml 0.2.0.pre.rc104 → 0.2.0.pre.rc105

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 617edee53d32c1340b1a996a48e6a8a60d8cccff4345e27b2e5cf2ffc926c4ac
4
- data.tar.gz: 225c133b9365d62e579e39e862ef76efaf7759d1c40a59b89968441381c8c5ee
3
+ metadata.gz: c0f70db7d7b59423ef34b5f3cd15d783ec22112977b44c1a5c7df390cabbc1c9
4
+ data.tar.gz: c985391e9231db8870c3a0dde14d41c9a0b75f0643b9ebb4ad30af7b57354230
5
5
  SHA512:
6
- metadata.gz: 8a31abca5a4086eab323b1dfc112c05f89682bbe1fab0211ed111a0978cdc43f45dfd3ece03c6920001cfb4286d22b41e52421d280f02476e2feb63f0214eefd
7
- data.tar.gz: 1f109b18e911eee6f81fe797f80320cf37a06e5ae25e4eb76894ffa98dbc931a66b63264462d36ce58e29ab0a7b95e90d522457e53b4fc15a71ece5473d73389
6
+ metadata.gz: d06176713f32434dc24b7250090a7c005b79a0c515d96e7ef35b67a28e9c5c6feb58ed2bb6a5136f98897c9ded66b8c9c5ee78018cb8056c50ef1f2491b26e0a
7
+ data.tar.gz: 8a049b2858d28e0f4eb7776a606a1e3375535c7ab0a13d41ed32a5c253b9fc979d9dcf705c0c7e99322340b8c0d3b21c38513885e79947347efd1b9bafb66347
@@ -11,7 +11,7 @@ module EasyML
11
11
 
12
12
  def unique_count
13
13
  Polars.col(column.name)
14
- .cast(column.polars_datatype)
14
+ .cast(datatype)
15
15
  .n_unique.alias("#{column.name}__unique_count")
16
16
  end
17
17
  end
@@ -6,27 +6,27 @@ module EasyML
6
6
  def train_query
7
7
  super.concat([
8
8
  Polars.col(column.name)
9
- .cast(column.polars_datatype)
9
+ .cast(datatype)
10
10
  .mean
11
11
  .alias("#{column.name}__mean"),
12
12
 
13
13
  Polars.col(column.name)
14
- .cast(column.polars_datatype)
14
+ .cast(datatype)
15
15
  .median
16
16
  .alias("#{column.name}__median"),
17
17
 
18
18
  Polars.col(column.name)
19
- .cast(column.polars_datatype)
19
+ .cast(datatype)
20
20
  .min
21
21
  .alias("#{column.name}__min"),
22
22
 
23
23
  Polars.col(column.name)
24
- .cast(column.polars_datatype)
24
+ .cast(datatype)
25
25
  .max
26
26
  .alias("#{column.name}__max"),
27
27
 
28
28
  Polars.col(column.name)
29
- .cast(column.polars_datatype)
29
+ .cast(datatype)
30
30
  .std
31
31
  .alias("#{column.name}__std"),
32
32
  ])
@@ -33,6 +33,14 @@ module EasyML
33
33
  end
34
34
  end
35
35
 
36
+ def datatype
37
+ case column.polars_datatype.to_s
38
+ when /Polars::Categorical/ then Polars::String
39
+ else
40
+ column.polars_datatype
41
+ end
42
+ end
43
+
36
44
  private
37
45
 
38
46
  def full_dataset_query
@@ -45,21 +53,21 @@ module EasyML
45
53
 
46
54
  def null_count
47
55
  Polars.col(column.name)
48
- .cast(column.polars_datatype)
56
+ .cast(datatype)
49
57
  .null_count
50
58
  .alias("#{column.name}__null_count")
51
59
  end
52
60
 
53
61
  def num_rows
54
62
  Polars.col(column.name)
55
- .cast(column.polars_datatype)
63
+ .cast(datatype)
56
64
  .len
57
65
  .alias("#{column.name}__num_rows")
58
66
  end
59
67
 
60
68
  def most_frequent_value
61
69
  Polars.col(column.name)
62
- .cast(column.polars_datatype)
70
+ .cast(datatype)
63
71
  .filter(Polars.col(column.name).is_not_null)
64
72
  .mode
65
73
  .first
@@ -70,7 +78,7 @@ module EasyML
70
78
  return unless dataset.date_column.present?
71
79
 
72
80
  Polars.col(column.name)
73
- .cast(column.polars_datatype)
81
+ .cast(datatype)
74
82
  .sort_by(dataset.date_column.name, reverse: true, nulls_last: true)
75
83
  .filter(Polars.col(column.name).is_not_null)
76
84
  .first
@@ -1,7 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module EasyML
4
- VERSION = "0.2.0-rc104"
4
+ VERSION = "0.2.0-rc105"
5
5
 
6
6
  module Version
7
7
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: easy_ml
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0.pre.rc104
4
+ version: 0.2.0.pre.rc105
5
5
  platform: ruby
6
6
  authors:
7
7
  - Brett Shollenberger
8
8
  bindir: exe
9
9
  cert_chain: []
10
- date: 2025-03-18 00:00:00.000000000 Z
10
+ date: 2025-03-19 00:00:00.000000000 Z
11
11
  dependencies:
12
12
  - !ruby/object:Gem::Dependency
13
13
  name: activerecord